1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
25 #include <yaz/xmalloc.h>
26 #include <yaz/diagsrw.h>
28 struct cql_prop_entry {
31 struct cql_prop_entry *next;
34 struct cql_transform_t_ {
35 struct cql_prop_entry *entry;
40 cql_transform_t cql_transform_open_FILE(FILE *f)
43 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
44 struct cql_prop_entry **pp = &ct->entry;
48 while (fgets(line, sizeof(line)-1, f))
50 const char *cp_value_start;
51 const char *cp_value_end;
52 const char *cp_pattern_start;
53 const char *cp_pattern_end;
54 const char *cp = line;
56 while (*cp && strchr(" \t", *cp))
58 cp_pattern_start = cp;
60 while (*cp && !strchr(" \t\r\n=#", *cp))
63 if (cp == cp_pattern_start)
65 while (*cp && strchr(" \t", *cp))
70 cql_transform_close(ct);
74 while (*cp && strchr(" \t\r\n", *cp))
77 cp_value_end = strchr(cp, '#');
79 cp_value_end = strlen(line) + line;
81 if (cp_value_end != cp_value_start &&
82 strchr(" \t\r\n", cp_value_end[-1]))
84 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
85 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
86 memcpy ((*pp)->pattern, cp_pattern_start,
87 cp_pattern_end-cp_pattern_start);
88 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
90 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
91 if (cp_value_start != cp_value_end)
92 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
93 (*pp)->value[cp_value_end - cp_value_start] = '\0';
100 void cql_transform_close(cql_transform_t ct)
102 struct cql_prop_entry *pe;
108 struct cql_prop_entry *pe_next = pe->next;
119 cql_transform_t cql_transform_open_fname(const char *fname)
122 FILE *f = fopen(fname, "r");
125 ct = cql_transform_open_FILE(f);
130 static const char *cql_lookup_property(cql_transform_t ct,
131 const char *pat1, const char *pat2,
135 struct cql_prop_entry *e;
137 if (pat1 && pat2 && pat3)
138 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
139 else if (pat1 && pat2)
140 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
141 else if (pat1 && pat3)
142 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
144 sprintf (pattern, "%.39s", pat1);
148 for (e = ct->entry; e; e = e->next)
150 if (!cql_strcmp(e->pattern, pattern))
156 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
157 const char *uri, const char *val, const char *default_val,
158 void (*pr)(const char *buf, void *client_data),
163 const char *eval = val ? val : default_val;
164 const char *prefix = 0;
168 struct cql_prop_entry *e;
170 for (e = ct->entry; e; e = e->next)
171 if (!memcmp(e->pattern, "set.", 4) && e->value &&
172 !strcmp(e->value, uri))
174 prefix = e->pattern+4;
177 /* must have a prefix now - if not it's an error */
183 res = cql_lookup_property(ct, category, prefix, eval);
184 /* we have some aliases for some relations unfortunately.. */
185 if (!res && !prefix && !strcmp(category, "relation"))
187 if (!strcmp(val, "=="))
188 res = cql_lookup_property(ct, category, prefix, "exact");
189 if (!strcmp(val, "="))
190 res = cql_lookup_property(ct, category, prefix, "eq");
191 if (!strcmp(val, "<="))
192 res = cql_lookup_property(ct, category, prefix, "le");
193 if (!strcmp(val, ">="))
194 res = cql_lookup_property(ct, category, prefix, "ge");
197 res = cql_lookup_property(ct, category, prefix, "*");
203 const char *cp0 = res, *cp1;
204 while ((cp1 = strchr(cp0, '=')))
207 while (*cp1 && *cp1 != ' ')
209 if (cp1 - cp0 >= sizeof(buf))
211 memcpy (buf, cp0, cp1 - cp0);
213 (*pr)("@attr ", client_data);
215 for (i = 0; buf[i]; i++)
218 (*pr)(eval, client_data);
224 (*pr)(tmp, client_data);
227 (*pr)(" ", client_data);
235 if (errcode && !ct->error)
239 ct->addinfo = xstrdup(val);
246 int cql_pr_attr(cql_transform_t ct, const char *category,
247 const char *val, const char *default_val,
248 void (*pr)(const char *buf, void *client_data),
252 return cql_pr_attr_uri(ct, category, 0 /* uri */,
253 val, default_val, pr, client_data, errcode);
257 static void cql_pr_int (int val,
258 void (*pr)(const char *buf, void *client_data),
261 char buf[21]; /* enough characters to 2^64 */
262 sprintf(buf, "%d", val);
263 (*pr)(buf, client_data);
264 (*pr)(" ", client_data);
268 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
269 void (*pr)(const char *buf, void *client_data),
273 int distance; /* to be filled in later depending on unit */
274 int distance_defined = 0;
276 int proxrel = 2; /* less than or equal */
277 int unit = 2; /* word */
280 char *name = mods->u.st.index;
281 char *term = mods->u.st.term;
282 char *relation = mods->u.st.relation;
284 if (!strcmp(name, "distance")) {
285 distance = strtol(term, (char**) 0, 0);
286 distance_defined = 1;
287 if (!strcmp(relation, "=")) {
289 } else if (!strcmp(relation, ">")) {
291 } else if (!strcmp(relation, "<")) {
293 } else if (!strcmp(relation, ">=")) {
295 } else if (!strcmp(relation, "<=")) {
297 } else if (!strcmp(relation, "<>")) {
300 ct->error = 40; /* Unsupported proximity relation */
301 ct->addinfo = xstrdup(relation);
304 } else if (!strcmp(name, "ordered")) {
306 } else if (!strcmp(name, "unordered")) {
308 } else if (!strcmp(name, "unit")) {
309 if (!strcmp(term, "word")) {
311 } else if (!strcmp(term, "sentence")) {
313 } else if (!strcmp(term, "paragraph")) {
315 } else if (!strcmp(term, "element")) {
318 ct->error = 42; /* Unsupported proximity unit */
319 ct->addinfo = xstrdup(term);
323 ct->error = 46; /* Unsupported boolean modifier */
324 ct->addinfo = xstrdup(name);
328 mods = mods->u.st.modifiers;
331 if (!distance_defined)
332 distance = (unit == 2) ? 1 : 0;
334 cql_pr_int(exclusion, pr, client_data);
335 cql_pr_int(distance, pr, client_data);
336 cql_pr_int(ordered, pr, client_data);
337 cql_pr_int(proxrel, pr, client_data);
338 (*pr)("k ", client_data);
339 cql_pr_int(unit, pr, client_data);
344 /* Returns location of first wildcard character in the `length'
345 * characters starting at `term', or a null pointer of there are
346 * none -- like memchr().
348 static const char *wcchar(int start, const char *term, int length)
352 if (start || term[-1] != '\\')
353 if (strchr("*?", *term))
363 /* ### checks for CQL relation-name rather than Type-1 attribute */
364 static int has_modifier(struct cql_node *cn, const char *name) {
365 struct cql_node *mod;
366 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
367 if (!strcmp(mod->u.st.index, name))
375 void emit_term(cql_transform_t ct,
377 const char *term, int length,
378 void (*pr)(const char *buf, void *client_data),
382 const char *ns = cn->u.st.index_uri;
383 int process_term = !has_modifier(cn, "regexp");
386 assert(cn->which == CQL_NODE_ST);
388 if (process_term && length > 0)
390 if (length > 1 && term[0] == '^' && term[length-1] == '^')
392 cql_pr_attr(ct, "position", "firstAndLast", 0,
393 pr, client_data, 32);
397 else if (term[0] == '^')
399 cql_pr_attr(ct, "position", "first", 0,
400 pr, client_data, 32);
404 else if (term[length-1] == '^')
406 cql_pr_attr(ct, "position", "last", 0,
407 pr, client_data, 32);
412 cql_pr_attr(ct, "position", "any", 0,
413 pr, client_data, 32);
417 if (process_term && length > 0)
419 const char *first_wc = wcchar(1, term, length);
420 const char *second_wc = first_wc ?
421 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
423 /* Check for well-known globbing patterns that represent
424 * simple truncation attributes as expected by, for example,
425 * Bath-compliant server. If we find such a pattern but
426 * there's no mapping for it, that's fine: we just use a
427 * general pattern-matching attribute.
429 if (first_wc == term && second_wc == term + length-1
430 && *first_wc == '*' && *second_wc == '*'
431 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
436 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
437 && cql_pr_attr(ct, "truncation", "left", 0,
443 else if (first_wc == term + length-1 && second_wc == 0
445 && cql_pr_attr(ct, "truncation", "right", 0,
452 /* We have one or more wildcard characters, but not in a
453 * way that can be dealt with using only the standard
454 * left-, right- and both-truncation attributes. We need
455 * to translate the pattern into a Z39.58-type pattern,
456 * which has been supported in BIB-1 since 1996. If
457 * there's no configuration element for "truncation.z3958"
458 * we indicate this as error 28 "Masking character not
462 cql_pr_attr(ct, "truncation", "z3958", 0,
463 pr, client_data, 28);
464 z3958_mem = (char *) xmalloc(length+1);
465 for (i = 0; i < length; i++)
467 if (i > 0 && term[i-1] == '\\')
468 z3958_mem[i] = term[i];
469 else if (term[i] == '*')
471 else if (term[i] == '?')
474 z3958_mem[i] = term[i];
476 z3958_mem[length] = '\0';
480 /* No masking characters. Use "truncation.none" if given. */
481 cql_pr_attr(ct, "truncation", "none", 0,
486 cql_pr_attr_uri(ct, "index", ns,
487 cn->u.st.index, "serverChoice",
488 pr, client_data, 16);
490 if (cn->u.st.modifiers)
492 struct cql_node *mod = cn->u.st.modifiers;
493 for (; mod; mod = mod->u.st.modifiers)
495 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
496 pr, client_data, 20);
500 (*pr)("\"", client_data);
501 for (i = 0; i<length; i++)
503 /* pr(int) each character */
504 /* we do not need to deal with \-sequences because the
505 CQL and PQF terms have same \-format, bug #1988 */
510 (*pr)(buf, client_data);
512 (*pr)("\" ", client_data);
516 void emit_terms(cql_transform_t ct,
518 void (*pr)(const char *buf, void *client_data),
522 struct cql_node *ne = cn->u.st.extra_terms;
525 (*pr)("@", client_data);
526 (*pr)(op, client_data);
527 (*pr)(" ", client_data);
529 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
531 for (; ne; ne = ne->u.st.extra_terms)
533 if (ne->u.st.extra_terms)
535 (*pr)("@", client_data);
536 (*pr)(op, client_data);
537 (*pr)(" ", client_data);
539 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
544 void emit_wordlist(cql_transform_t ct,
546 void (*pr)(const char *buf, void *client_data),
550 const char *cp0 = cn->u.st.term;
552 const char *last_term = 0;
558 cp1 = strchr(cp0, ' ');
561 (*pr)("@", client_data);
562 (*pr)(op, client_data);
563 (*pr)(" ", client_data);
564 emit_term(ct, cn, last_term, last_length, pr, client_data);
568 last_length = cp1 - cp0;
570 last_length = strlen(cp0);
574 emit_term(ct, cn, last_term, last_length, pr, client_data);
577 void cql_transform_r(cql_transform_t ct,
579 void (*pr)(const char *buf, void *client_data),
583 struct cql_node *mods;
590 ns = cn->u.st.index_uri;
593 if (!strcmp(ns, cql_uri())
594 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
596 (*pr)("@set \"", client_data);
597 (*pr)(cn->u.st.term, client_data);
598 (*pr)("\" ", client_data);
610 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
611 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data, 19);
612 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
613 pr, client_data, 24);
614 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
616 emit_wordlist(ct, cn, pr, client_data, "and");
618 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
620 emit_wordlist(ct, cn, pr, client_data, "or");
624 emit_terms(ct, cn, pr, client_data, "and");
628 (*pr)("@", client_data);
629 (*pr)(cn->u.boolean.value, client_data);
630 (*pr)(" ", client_data);
631 mods = cn->u.boolean.modifiers;
632 if (!strcmp(cn->u.boolean.value, "prox")) {
633 if (!cql_pr_prox(ct, mods, pr, client_data))
636 /* Boolean modifiers other than on proximity not supported */
637 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
638 ct->addinfo = xstrdup(mods->u.st.index);
642 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
643 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
647 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
652 int cql_transform(cql_transform_t ct,
654 void (*pr)(const char *buf, void *client_data),
657 struct cql_prop_entry *e;
658 NMEM nmem = nmem_create();
665 for (e = ct->entry; e ; e = e->next)
667 if (!cql_strncmp(e->pattern, "set.", 4))
668 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
669 else if (!cql_strcmp(e->pattern, "set"))
670 cql_apply_prefix(nmem, cn, 0, e->value);
672 cql_transform_r (ct, cn, pr, client_data);
678 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
680 return cql_transform(ct, cn, cql_fputs, f);
683 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
686 struct cql_buf_write_info info;
692 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
694 /* Attempt to write past end of buffer. For some reason, this
695 SRW diagnostic is deprecated, but it's so perfect for our
696 purposes that it would be stupid not to use it. */
698 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
699 sprintf(numbuf, "%ld", (long) info.max);
700 ct->addinfo = xstrdup(numbuf);
704 info.buf[info.off] = '\0';
708 int cql_transform_error(cql_transform_t ct, const char **addinfo)
710 *addinfo = ct->addinfo;
716 * indent-tabs-mode: nil
718 * vim: shiftwidth=4 tabstop=8 expandtab