1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
25 #include <yaz/xmalloc.h>
26 #include <yaz/diagsrw.h>
28 struct cql_prop_entry {
31 struct cql_prop_entry *next;
34 struct cql_transform_t_ {
35 struct cql_prop_entry *entry;
40 cql_transform_t cql_transform_open_FILE(FILE *f)
43 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
44 struct cql_prop_entry **pp = &ct->entry;
48 while (fgets(line, sizeof(line)-1, f))
50 const char *cp_value_start;
51 const char *cp_value_end;
52 const char *cp_pattern_start;
53 const char *cp_pattern_end;
54 const char *cp = line;
56 while (*cp && strchr(" \t", *cp))
58 cp_pattern_start = cp;
60 while (*cp && !strchr(" \t\r\n=#", *cp))
63 if (cp == cp_pattern_start)
65 while (*cp && strchr(" \t", *cp))
70 cql_transform_close(ct);
74 while (*cp && strchr(" \t\r\n", *cp))
77 cp_value_end = strchr(cp, '#');
79 cp_value_end = strlen(line) + line;
81 if (cp_value_end != cp_value_start &&
82 strchr(" \t\r\n", cp_value_end[-1]))
84 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
85 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
86 memcpy ((*pp)->pattern, cp_pattern_start,
87 cp_pattern_end-cp_pattern_start);
88 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
90 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
91 if (cp_value_start != cp_value_end)
92 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
93 (*pp)->value[cp_value_end - cp_value_start] = '\0';
100 void cql_transform_close(cql_transform_t ct)
102 struct cql_prop_entry *pe;
108 struct cql_prop_entry *pe_next = pe->next;
119 cql_transform_t cql_transform_open_fname(const char *fname)
122 FILE *f = fopen(fname, "r");
125 ct = cql_transform_open_FILE(f);
130 static const char *cql_lookup_property(cql_transform_t ct,
131 const char *pat1, const char *pat2,
135 struct cql_prop_entry *e;
137 if (pat1 && pat2 && pat3)
138 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
139 else if (pat1 && pat2)
140 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
141 else if (pat1 && pat3)
142 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
144 sprintf (pattern, "%.39s", pat1);
148 for (e = ct->entry; e; e = e->next)
150 if (!cql_strcmp(e->pattern, pattern))
156 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
157 const char *uri, const char *val, const char *default_val,
158 void (*pr)(const char *buf, void *client_data),
163 const char *eval = val ? val : default_val;
164 const char *prefix = 0;
168 struct cql_prop_entry *e;
170 for (e = ct->entry; e; e = e->next)
171 if (!memcmp(e->pattern, "set.", 4) && e->value &&
172 !strcmp(e->value, uri))
174 prefix = e->pattern+4;
177 /* must have a prefix now - if not it's an error */
183 res = cql_lookup_property(ct, category, prefix, eval);
184 /* we have some aliases for some relations unfortunately.. */
185 if (!res && !prefix && !strcmp(category, "relation"))
187 if (!strcmp(val, "=="))
188 res = cql_lookup_property(ct, category, prefix, "exact");
189 if (!strcmp(val, "="))
190 res = cql_lookup_property(ct, category, prefix, "eq");
191 if (!strcmp(val, "<="))
192 res = cql_lookup_property(ct, category, prefix, "le");
193 if (!strcmp(val, ">="))
194 res = cql_lookup_property(ct, category, prefix, "ge");
197 res = cql_lookup_property(ct, category, prefix, "*");
203 const char *cp0 = res, *cp1;
204 while ((cp1 = strchr(cp0, '=')))
207 while (*cp1 && *cp1 != ' ')
209 if (cp1 - cp0 >= sizeof(buf))
211 memcpy (buf, cp0, cp1 - cp0);
213 (*pr)("@attr ", client_data);
215 for (i = 0; buf[i]; i++)
218 (*pr)(eval, client_data);
224 (*pr)(tmp, client_data);
227 (*pr)(" ", client_data);
235 if (errcode && !ct->error)
239 ct->addinfo = xstrdup(val);
246 int cql_pr_attr(cql_transform_t ct, const char *category,
247 const char *val, const char *default_val,
248 void (*pr)(const char *buf, void *client_data),
252 return cql_pr_attr_uri(ct, category, 0 /* uri */,
253 val, default_val, pr, client_data, errcode);
257 static void cql_pr_int (int val,
258 void (*pr)(const char *buf, void *client_data),
261 char buf[21]; /* enough characters to 2^64 */
262 sprintf(buf, "%d", val);
263 (*pr)(buf, client_data);
264 (*pr)(" ", client_data);
268 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
269 void (*pr)(const char *buf, void *client_data),
273 int distance; /* to be filled in later depending on unit */
274 int distance_defined = 0;
276 int proxrel = 2; /* less than or equal */
277 int unit = 2; /* word */
280 char *name = mods->u.st.index;
281 char *term = mods->u.st.term;
282 char *relation = mods->u.st.relation;
284 if (!strcmp(name, "distance")) {
285 distance = strtol(term, (char**) 0, 0);
286 distance_defined = 1;
287 if (!strcmp(relation, "=")) {
289 } else if (!strcmp(relation, ">")) {
291 } else if (!strcmp(relation, "<")) {
293 } else if (!strcmp(relation, ">=")) {
295 } else if (!strcmp(relation, "<=")) {
297 } else if (!strcmp(relation, "<>")) {
300 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
301 ct->addinfo = xstrdup(relation);
304 } else if (!strcmp(name, "ordered")) {
306 } else if (!strcmp(name, "unordered")) {
308 } else if (!strcmp(name, "unit")) {
309 if (!strcmp(term, "word")) {
311 } else if (!strcmp(term, "sentence")) {
313 } else if (!strcmp(term, "paragraph")) {
315 } else if (!strcmp(term, "element")) {
318 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
319 ct->addinfo = xstrdup(term);
323 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
324 ct->addinfo = xstrdup(name);
328 mods = mods->u.st.modifiers;
331 if (!distance_defined)
332 distance = (unit == 2) ? 1 : 0;
334 cql_pr_int(exclusion, pr, client_data);
335 cql_pr_int(distance, pr, client_data);
336 cql_pr_int(ordered, pr, client_data);
337 cql_pr_int(proxrel, pr, client_data);
338 (*pr)("k ", client_data);
339 cql_pr_int(unit, pr, client_data);
344 /* Returns location of first wildcard character in the `length'
345 * characters starting at `term', or a null pointer of there are
346 * none -- like memchr().
348 static const char *wcchar(int start, const char *term, int length)
352 if (start || term[-1] != '\\')
353 if (strchr("*?", *term))
363 /* ### checks for CQL relation-name rather than Type-1 attribute */
364 static int has_modifier(struct cql_node *cn, const char *name) {
365 struct cql_node *mod;
366 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
367 if (!strcmp(mod->u.st.index, name))
375 void emit_term(cql_transform_t ct,
377 const char *term, int length,
378 void (*pr)(const char *buf, void *client_data),
382 const char *ns = cn->u.st.index_uri;
383 int process_term = !has_modifier(cn, "regexp");
386 assert(cn->which == CQL_NODE_ST);
388 if (process_term && length > 0)
390 if (length > 1 && term[0] == '^' && term[length-1] == '^')
392 cql_pr_attr(ct, "position", "firstAndLast", 0,
393 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
397 else if (term[0] == '^')
399 cql_pr_attr(ct, "position", "first", 0,
400 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
404 else if (term[length-1] == '^')
406 cql_pr_attr(ct, "position", "last", 0,
407 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
412 cql_pr_attr(ct, "position", "any", 0,
413 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
417 if (process_term && length > 0)
419 const char *first_wc = wcchar(1, term, length);
420 const char *second_wc = first_wc ?
421 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
423 /* Check for well-known globbing patterns that represent
424 * simple truncation attributes as expected by, for example,
425 * Bath-compliant server. If we find such a pattern but
426 * there's no mapping for it, that's fine: we just use a
427 * general pattern-matching attribute.
429 if (first_wc == term && second_wc == term + length-1
430 && *first_wc == '*' && *second_wc == '*'
431 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
436 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
437 && cql_pr_attr(ct, "truncation", "left", 0,
443 else if (first_wc == term + length-1 && second_wc == 0
445 && cql_pr_attr(ct, "truncation", "right", 0,
452 /* We have one or more wildcard characters, but not in a
453 * way that can be dealt with using only the standard
454 * left-, right- and both-truncation attributes. We need
455 * to translate the pattern into a Z39.58-type pattern,
456 * which has been supported in BIB-1 since 1996. If
457 * there's no configuration element for "truncation.z3958"
458 * we indicate this as error 28 "Masking character not
462 cql_pr_attr(ct, "truncation", "z3958", 0,
463 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
464 z3958_mem = (char *) xmalloc(length+1);
465 for (i = 0; i < length; i++)
467 if (i > 0 && term[i-1] == '\\')
468 z3958_mem[i] = term[i];
469 else if (term[i] == '*')
471 else if (term[i] == '?')
474 z3958_mem[i] = term[i];
476 z3958_mem[length] = '\0';
480 /* No masking characters. Use "truncation.none" if given. */
481 cql_pr_attr(ct, "truncation", "none", 0,
486 cql_pr_attr_uri(ct, "index", ns,
487 cn->u.st.index, "serverChoice",
488 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
490 if (cn->u.st.modifiers)
492 struct cql_node *mod = cn->u.st.modifiers;
493 for (; mod; mod = mod->u.st.modifiers)
495 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
496 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
500 (*pr)("\"", client_data);
501 for (i = 0; i<length; i++)
503 /* pr(int) each character */
504 /* we do not need to deal with \-sequences because the
505 CQL and PQF terms have same \-format, bug #1988 */
510 (*pr)(buf, client_data);
512 (*pr)("\" ", client_data);
516 void emit_terms(cql_transform_t ct,
518 void (*pr)(const char *buf, void *client_data),
522 struct cql_node *ne = cn->u.st.extra_terms;
525 (*pr)("@", client_data);
526 (*pr)(op, client_data);
527 (*pr)(" ", client_data);
529 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
531 for (; ne; ne = ne->u.st.extra_terms)
533 if (ne->u.st.extra_terms)
535 (*pr)("@", client_data);
536 (*pr)(op, client_data);
537 (*pr)(" ", client_data);
539 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
544 void emit_wordlist(cql_transform_t ct,
546 void (*pr)(const char *buf, void *client_data),
550 const char *cp0 = cn->u.st.term;
552 const char *last_term = 0;
558 cp1 = strchr(cp0, ' ');
561 (*pr)("@", client_data);
562 (*pr)(op, client_data);
563 (*pr)(" ", client_data);
564 emit_term(ct, cn, last_term, last_length, pr, client_data);
568 last_length = cp1 - cp0;
570 last_length = strlen(cp0);
574 emit_term(ct, cn, last_term, last_length, pr, client_data);
577 void cql_transform_r(cql_transform_t ct,
579 void (*pr)(const char *buf, void *client_data),
583 struct cql_node *mods;
590 ns = cn->u.st.index_uri;
593 if (!strcmp(ns, cql_uri())
594 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
596 (*pr)("@set \"", client_data);
597 (*pr)(cn->u.st.term, client_data);
598 (*pr)("\" ", client_data);
606 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
610 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
611 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
612 YAZ_SRW_UNSUPP_RELATION);
613 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
614 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
615 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
617 emit_wordlist(ct, cn, pr, client_data, "and");
619 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
621 emit_wordlist(ct, cn, pr, client_data, "or");
625 emit_terms(ct, cn, pr, client_data, "and");
629 (*pr)("@", client_data);
630 (*pr)(cn->u.boolean.value, client_data);
631 (*pr)(" ", client_data);
632 mods = cn->u.boolean.modifiers;
633 if (!strcmp(cn->u.boolean.value, "prox")) {
634 if (!cql_pr_prox(ct, mods, pr, client_data))
637 /* Boolean modifiers other than on proximity not supported */
638 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
639 ct->addinfo = xstrdup(mods->u.st.index);
643 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
644 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
648 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
653 int cql_transform(cql_transform_t ct,
655 void (*pr)(const char *buf, void *client_data),
658 struct cql_prop_entry *e;
659 NMEM nmem = nmem_create();
666 for (e = ct->entry; e ; e = e->next)
668 if (!cql_strncmp(e->pattern, "set.", 4))
669 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
670 else if (!cql_strcmp(e->pattern, "set"))
671 cql_apply_prefix(nmem, cn, 0, e->value);
673 cql_transform_r (ct, cn, pr, client_data);
679 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
681 return cql_transform(ct, cn, cql_fputs, f);
684 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
687 struct cql_buf_write_info info;
693 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
695 /* Attempt to write past end of buffer. For some reason, this
696 SRW diagnostic is deprecated, but it's so perfect for our
697 purposes that it would be stupid not to use it. */
699 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
700 sprintf(numbuf, "%ld", (long) info.max);
701 ct->addinfo = xstrdup(numbuf);
705 info.buf[info.off] = '\0';
709 int cql_transform_error(cql_transform_t ct, const char **addinfo)
711 *addinfo = ct->addinfo;
717 * indent-tabs-mode: nil
719 * vim: shiftwidth=4 tabstop=8 expandtab