1 /* $Id: cqltransform.c,v 1.24 2006-10-05 16:12:23 adam Exp $
2 Copyright (C) 1995-2005, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_end;
57 const char *cp = line;
58 while (*cp && !strchr(" \t=\r\n#", *cp))
63 while (*cp && strchr(" \t\r\n", *cp))
68 while (*cp && strchr(" \t\r\n", *cp))
71 if (!(cp_value_end = strchr(cp, '#')))
72 cp_value_end = strlen(line) + line;
74 if (cp_value_end != cp_value_start &&
75 strchr(" \t\r\n", cp_value_end[-1]))
77 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
78 (*pp)->pattern = (char *) xmalloc (cp_pattern_end - line + 1);
79 memcpy ((*pp)->pattern, line, cp_pattern_end - line);
80 (*pp)->pattern[cp_pattern_end-line] = 0;
82 (*pp)->value = (char *) xmalloc (cp_value_end - cp_value_start + 1);
83 if (cp_value_start != cp_value_end)
84 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
85 (*pp)->value[cp_value_end - cp_value_start] = 0;
92 void cql_transform_close(cql_transform_t ct)
94 struct cql_prop_entry *pe;
100 struct cql_prop_entry *pe_next = pe->next;
111 cql_transform_t cql_transform_open_fname(const char *fname)
114 FILE *f = fopen(fname, "r");
117 ct = cql_transform_open_FILE(f);
122 static const char *cql_lookup_property(cql_transform_t ct,
123 const char *pat1, const char *pat2,
127 struct cql_prop_entry *e;
129 if (pat1 && pat2 && pat3)
130 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
131 else if (pat1 && pat2)
132 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
133 else if (pat1 && pat3)
134 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
136 sprintf (pattern, "%.39s", pat1);
140 for (e = ct->entry; e; e = e->next)
142 if (!cql_strcmp(e->pattern, pattern))
148 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
149 const char *uri, const char *val, const char *default_val,
150 void (*pr)(const char *buf, void *client_data),
155 const char *eval = val ? val : default_val;
156 const char *prefix = 0;
160 struct cql_prop_entry *e;
162 for (e = ct->entry; e; e = e->next)
163 if (!memcmp(e->pattern, "set.", 4) && e->value &&
164 !strcmp(e->value, uri))
166 prefix = e->pattern+4;
169 /* must have a prefix now - if not it's an error */
175 res = cql_lookup_property(ct, category, prefix, eval);
177 res = cql_lookup_property(ct, category, prefix, "*");
183 const char *cp0 = res, *cp1;
184 while ((cp1 = strchr(cp0, '=')))
186 while (*cp1 && *cp1 != ' ')
188 if (cp1 - cp0 >= sizeof(buf))
190 memcpy (buf, cp0, cp1 - cp0);
192 (*pr)("@attr ", client_data);
193 (*pr)(buf, client_data);
194 (*pr)(" ", client_data);
202 if (errcode && !ct->error)
206 ct->addinfo = xstrdup(val);
213 int cql_pr_attr(cql_transform_t ct, const char *category,
214 const char *val, const char *default_val,
215 void (*pr)(const char *buf, void *client_data),
219 return cql_pr_attr_uri(ct, category, 0 /* uri */,
220 val, default_val, pr, client_data, errcode);
224 static void cql_pr_int (int val,
225 void (*pr)(const char *buf, void *client_data),
228 char buf[21]; /* enough characters to 2^64 */
229 sprintf(buf, "%d", val);
230 (*pr)(buf, client_data);
231 (*pr)(" ", client_data);
235 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
236 void (*pr)(const char *buf, void *client_data),
240 int distance; /* to be filled in later depending on unit */
241 int distance_defined = 0;
243 int proxrel = 2; /* less than or equal */
244 int unit = 2; /* word */
247 char *name = mods->u.st.index;
248 char *term = mods->u.st.term;
249 char *relation = mods->u.st.relation;
251 if (!strcmp(name, "distance")) {
252 distance = strtol(term, (char**) 0, 0);
253 distance_defined = 1;
254 if (!strcmp(relation, "=")) {
256 } else if (!strcmp(relation, ">")) {
258 } else if (!strcmp(relation, "<")) {
260 } else if (!strcmp(relation, ">=")) {
262 } else if (!strcmp(relation, "<=")) {
264 } else if (!strcmp(relation, "<>")) {
267 ct->error = 40; /* Unsupported proximity relation */
268 ct->addinfo = xstrdup(relation);
271 } else if (!strcmp(name, "ordered")) {
273 } else if (!strcmp(name, "unordered")) {
275 } else if (!strcmp(name, "unit")) {
276 if (!strcmp(term, "word")) {
278 } else if (!strcmp(term, "sentence")) {
280 } else if (!strcmp(term, "paragraph")) {
282 } else if (!strcmp(term, "element")) {
285 ct->error = 42; /* Unsupported proximity unit */
286 ct->addinfo = xstrdup(term);
290 ct->error = 46; /* Unsupported boolean modifier */
291 ct->addinfo = xstrdup(name);
295 mods = mods->u.st.modifiers;
298 if (!distance_defined)
299 distance = (unit == 2) ? 1 : 0;
301 cql_pr_int(exclusion, pr, client_data);
302 cql_pr_int(distance, pr, client_data);
303 cql_pr_int(ordered, pr, client_data);
304 cql_pr_int(proxrel, pr, client_data);
305 (*pr)("k ", client_data);
306 cql_pr_int(unit, pr, client_data);
311 /* Returns location of first wildcard character in the `length'
312 * characters starting at `term', or a null pointer of there are
313 * none -- like memchr().
315 static const char *wcchar(const char *term, int length)
317 const char *best = 0;
321 for (whichp = "*?"; *whichp != '\0'; whichp++) {
322 current = (const char *) memchr(term, *whichp, length);
323 if (current != 0 && (best == 0 || current < best))
331 void emit_term(cql_transform_t ct,
333 const char *term, int length,
334 void (*pr)(const char *buf, void *client_data),
338 const char *ns = cn->u.st.index_uri;
340 assert(cn->which == CQL_NODE_ST);
344 if (length > 1 && term[0] == '^' && term[length-1] == '^')
346 cql_pr_attr(ct, "position", "firstAndLast", 0,
347 pr, client_data, 32);
351 else if (term[0] == '^')
353 cql_pr_attr(ct, "position", "first", 0,
354 pr, client_data, 32);
358 else if (term[length-1] == '^')
360 cql_pr_attr(ct, "position", "last", 0,
361 pr, client_data, 32);
366 cql_pr_attr(ct, "position", "any", 0,
367 pr, client_data, 32);
373 /* Check for well-known globbing patterns that represent
374 * simple truncation attributes as expected by, for example,
375 * Bath-compliant server. If we find such a pattern but
376 * there's no mapping for it, that's fine: we just use a
377 * general pattern-matching attribute.
379 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
380 wcchar(term+1, length-2) == 0 &&
381 cql_pr_attr(ct, "truncation", "both", 0,
382 pr, client_data, 0)) {
386 else if (term[0] == '*' &&
387 wcchar(term+1, length-1) == 0 &&
388 cql_pr_attr(ct, "truncation", "left", 0,
389 pr, client_data, 0)) {
393 else if (term[length-1] == '*' &&
394 wcchar(term, length-1) == 0 &&
395 cql_pr_attr(ct, "truncation", "right", 0,
396 pr, client_data, 0)) {
399 else if (wcchar(term, length))
401 /* We have one or more wildcard characters, but not in a
402 * way that can be dealt with using only the standard
403 * left-, right- and both-truncation attributes. We need
404 * to translate the pattern into a Z39.58-type pattern,
405 * which has been supported in BIB-1 since 1996. If
406 * there's no configuration element for "truncation.z3958"
407 * we indicate this as error 28 "Masking character not
412 cql_pr_attr(ct, "truncation", "z3958", 0,
413 pr, client_data, 28);
414 mem = (char *) xmalloc(length+1);
415 for (i = 0; i < length; i++) {
416 if (term[i] == '*') mem[i] = '?';
417 else if (term[i] == '?') mem[i] = '#';
418 else mem[i] = term[i];
424 /* No masking characters. Use "truncation.none" if given. */
425 cql_pr_attr(ct, "truncation", "none", 0,
430 cql_pr_attr_uri(ct, "index", ns,
431 cn->u.st.index, "serverChoice",
432 pr, client_data, 16);
434 if (cn->u.st.modifiers)
436 struct cql_node *mod = cn->u.st.modifiers;
437 for (; mod; mod = mod->u.st.modifiers)
439 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
440 pr, client_data, 20);
444 (*pr)("\"", client_data);
445 for (i = 0; i<length; i++)
447 /* pr(int) each character */
453 /* do we have to escape this char? */
461 (*pr)(cp, client_data);
463 (*pr)("\" ", client_data);
466 void emit_wordlist(cql_transform_t ct,
468 void (*pr)(const char *buf, void *client_data),
472 const char *cp0 = cn->u.st.term;
474 const char *last_term = 0;
480 cp1 = strchr(cp0, ' ');
483 (*pr)("@", client_data);
484 (*pr)(op, client_data);
485 (*pr)(" ", client_data);
486 emit_term(ct, cn, last_term, last_length, pr, client_data);
490 last_length = cp1 - cp0;
492 last_length = strlen(cp0);
496 emit_term(ct, cn, last_term, last_length, pr, client_data);
499 void cql_transform_r(cql_transform_t ct,
501 void (*pr)(const char *buf, void *client_data),
505 struct cql_node *mods;
512 ns = cn->u.st.index_uri;
515 if (!strcmp(ns, cql_uri())
516 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
518 (*pr)("@set \"", client_data);
519 (*pr)(cn->u.st.term, client_data);
520 (*pr)("\" ", client_data);
532 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
533 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
534 cql_pr_attr(ct, "relation", "eq", "scr",
535 pr, client_data, 19);
536 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
537 cql_pr_attr(ct, "relation", "le", "scr",
538 pr, client_data, 19);
539 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
540 cql_pr_attr(ct, "relation", "ge", "scr",
541 pr, client_data, 19);
543 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
544 pr, client_data, 19);
545 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
546 pr, client_data, 24);
547 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
549 emit_wordlist(ct, cn, pr, client_data, "and");
551 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
553 emit_wordlist(ct, cn, pr, client_data, "or");
557 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
562 (*pr)("@", client_data);
563 (*pr)(cn->u.boolean.value, client_data);
564 (*pr)(" ", client_data);
565 mods = cn->u.boolean.modifiers;
566 if (!strcmp(cn->u.boolean.value, "prox")) {
567 if (!cql_pr_prox(ct, mods, pr, client_data))
570 /* Boolean modifiers other than on proximity not supported */
571 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
572 ct->addinfo = xstrdup(mods->u.st.index);
576 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
577 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
581 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
586 int cql_transform(cql_transform_t ct,
588 void (*pr)(const char *buf, void *client_data),
591 struct cql_prop_entry *e;
592 NMEM nmem = nmem_create();
599 for (e = ct->entry; e ; e = e->next)
601 if (!cql_strncmp(e->pattern, "set.", 4))
602 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
603 else if (!cql_strcmp(e->pattern, "set"))
604 cql_apply_prefix(nmem, cn, 0, e->value);
606 cql_transform_r (ct, cn, pr, client_data);
612 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
614 return cql_transform(ct, cn, cql_fputs, f);
617 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
620 struct cql_buf_write_info info;
626 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
628 /* Attempt to write past end of buffer. For some reason, this
629 SRW diagnostic is deprecated, but it's so perfect for our
630 purposes that it would be stupid not to use it. */
632 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
633 sprintf(numbuf, "%ld", (long) info.max);
634 ct->addinfo = xstrdup(numbuf);
638 info.buf[info.off] = '\0';
642 int cql_transform_error(cql_transform_t ct, const char **addinfo)
644 *addinfo = ct->addinfo;
650 * indent-tabs-mode: nil
652 * vim: shiftwidth=4 tabstop=8 expandtab