1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
25 #include <yaz/xmalloc.h>
26 #include <yaz/diagsrw.h>
27 #include <yaz/tokenizer.h>
28 #include <yaz/wrbuf.h>
30 struct cql_prop_entry {
33 struct cql_prop_entry *next;
36 struct cql_transform_t_ {
37 struct cql_prop_entry *entry;
38 yaz_tok_cfg_t tok_cfg;
45 cql_transform_t cql_transform_create(void)
47 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
48 ct->tok_cfg = yaz_tok_cfg_create();
49 ct->w = wrbuf_alloc();
56 cql_transform_t cql_transform_open_FILE(FILE *f)
58 cql_transform_t ct = cql_transform_create();
60 struct cql_prop_entry **pp = &ct->entry;
62 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
64 while (fgets(line, sizeof(line)-1, f))
66 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
70 if (t == YAZ_TOK_STRING)
72 char * pattern = xstrdup(yaz_tok_parse_string(tp));
76 yaz_tok_parse_destroy(tp);
77 cql_transform_close(ct);
82 while (t == YAZ_TOK_STRING)
84 /* attset type=value OR type=value */
85 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
89 if (t == YAZ_TOK_STRING)
91 wrbuf_puts(ct->w, " ");
92 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
97 yaz_tok_parse_destroy(tp);
98 cql_transform_close(ct);
101 t = yaz_tok_move(tp);
102 if (t != YAZ_TOK_STRING) /* value */
104 yaz_tok_parse_destroy(tp);
105 cql_transform_close(ct);
108 wrbuf_puts(ct->w, "=");
109 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
110 t = yaz_tok_move(tp);
111 wrbuf_puts(ct->w, " ");
113 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
114 (*pp)->pattern = pattern;
115 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
118 else if (t != YAZ_TOK_EOF)
120 yaz_tok_parse_destroy(tp);
121 cql_transform_close(ct);
124 yaz_tok_parse_destroy(tp);
130 void cql_transform_close(cql_transform_t ct)
132 struct cql_prop_entry *pe;
138 struct cql_prop_entry *pe_next = pe->next;
145 yaz_tok_cfg_destroy(ct->tok_cfg);
146 wrbuf_destroy(ct->w);
150 cql_transform_t cql_transform_open_fname(const char *fname)
153 FILE *f = fopen(fname, "r");
156 ct = cql_transform_open_FILE(f);
161 static const char *cql_lookup_property(cql_transform_t ct,
162 const char *pat1, const char *pat2,
166 struct cql_prop_entry *e;
168 if (pat1 && pat2 && pat3)
169 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
170 else if (pat1 && pat2)
171 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
172 else if (pat1 && pat3)
173 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
175 sprintf(pattern, "%.39s", pat1);
179 for (e = ct->entry; e; e = e->next)
181 if (!cql_strcmp(e->pattern, pattern))
187 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
188 const char *uri, const char *val, const char *default_val,
189 void (*pr)(const char *buf, void *client_data),
194 const char *eval = val ? val : default_val;
195 const char *prefix = 0;
199 struct cql_prop_entry *e;
201 for (e = ct->entry; e; e = e->next)
202 if (!memcmp(e->pattern, "set.", 4) && e->value &&
203 !strcmp(e->value, uri))
205 prefix = e->pattern+4;
208 /* must have a prefix now - if not it's an error */
214 res = cql_lookup_property(ct, category, prefix, eval);
215 /* we have some aliases for some relations unfortunately.. */
216 if (!res && !prefix && !strcmp(category, "relation"))
218 if (!strcmp(val, "=="))
219 res = cql_lookup_property(ct, category, prefix, "exact");
220 if (!strcmp(val, "="))
221 res = cql_lookup_property(ct, category, prefix, "eq");
222 if (!strcmp(val, "<="))
223 res = cql_lookup_property(ct, category, prefix, "le");
224 if (!strcmp(val, ">="))
225 res = cql_lookup_property(ct, category, prefix, "ge");
228 res = cql_lookup_property(ct, category, prefix, "*");
234 const char *cp0 = res, *cp1;
235 while ((cp1 = strchr(cp0, '=')))
238 while (*cp1 && *cp1 != ' ')
240 if (cp1 - cp0 >= sizeof(buf))
242 memcpy(buf, cp0, cp1 - cp0);
244 (*pr)("@attr ", client_data);
246 for (i = 0; buf[i]; i++)
249 (*pr)(eval, client_data);
255 (*pr)(tmp, client_data);
258 (*pr)(" ", client_data);
266 if (errcode && !ct->error)
270 ct->addinfo = xstrdup(val);
277 int cql_pr_attr(cql_transform_t ct, const char *category,
278 const char *val, const char *default_val,
279 void (*pr)(const char *buf, void *client_data),
283 return cql_pr_attr_uri(ct, category, 0 /* uri */,
284 val, default_val, pr, client_data, errcode);
288 static void cql_pr_int(int val,
289 void (*pr)(const char *buf, void *client_data),
292 char buf[21]; /* enough characters to 2^64 */
293 sprintf(buf, "%d", val);
294 (*pr)(buf, client_data);
295 (*pr)(" ", client_data);
299 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
300 void (*pr)(const char *buf, void *client_data),
304 int distance; /* to be filled in later depending on unit */
305 int distance_defined = 0;
307 int proxrel = 2; /* less than or equal */
308 int unit = 2; /* word */
312 const char *name = mods->u.st.index;
313 const char *term = mods->u.st.term;
314 const char *relation = mods->u.st.relation;
316 if (!strcmp(name, "distance")) {
317 distance = strtol(term, (char**) 0, 0);
318 distance_defined = 1;
319 if (!strcmp(relation, "="))
321 else if (!strcmp(relation, ">"))
323 else if (!strcmp(relation, "<"))
325 else if (!strcmp(relation, ">="))
327 else if (!strcmp(relation, "<="))
329 else if (!strcmp(relation, "<>"))
333 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
334 ct->addinfo = xstrdup(relation);
338 else if (!strcmp(name, "ordered"))
340 else if (!strcmp(name, "unordered"))
342 else if (!strcmp(name, "unit"))
344 if (!strcmp(term, "word"))
346 else if (!strcmp(term, "sentence"))
348 else if (!strcmp(term, "paragraph"))
350 else if (!strcmp(term, "element"))
354 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
355 ct->addinfo = xstrdup(term);
361 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
362 ct->addinfo = xstrdup(name);
365 mods = mods->u.st.modifiers;
368 if (!distance_defined)
369 distance = (unit == 2) ? 1 : 0;
371 cql_pr_int(exclusion, pr, client_data);
372 cql_pr_int(distance, pr, client_data);
373 cql_pr_int(ordered, pr, client_data);
374 cql_pr_int(proxrel, pr, client_data);
375 (*pr)("k ", client_data);
376 cql_pr_int(unit, pr, client_data);
381 /* Returns location of first wildcard character in the `length'
382 * characters starting at `term', or a null pointer of there are
383 * none -- like memchr().
385 static const char *wcchar(int start, const char *term, int length)
389 if (start || term[-1] != '\\')
390 if (strchr("*?", *term))
400 /* ### checks for CQL relation-name rather than Type-1 attribute */
401 static int has_modifier(struct cql_node *cn, const char *name) {
402 struct cql_node *mod;
403 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
404 if (!strcmp(mod->u.st.index, name))
412 void emit_term(cql_transform_t ct,
414 const char *term, int length,
415 void (*pr)(const char *buf, void *client_data),
419 const char *ns = cn->u.st.index_uri;
420 int process_term = !has_modifier(cn, "regexp");
423 assert(cn->which == CQL_NODE_ST);
425 if (process_term && length > 0)
427 if (length > 1 && term[0] == '^' && term[length-1] == '^')
429 cql_pr_attr(ct, "position", "firstAndLast", 0,
430 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
434 else if (term[0] == '^')
436 cql_pr_attr(ct, "position", "first", 0,
437 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
441 else if (term[length-1] == '^')
443 cql_pr_attr(ct, "position", "last", 0,
444 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
449 cql_pr_attr(ct, "position", "any", 0,
450 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
454 if (process_term && length > 0)
456 const char *first_wc = wcchar(1, term, length);
457 const char *second_wc = first_wc ?
458 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
460 /* Check for well-known globbing patterns that represent
461 * simple truncation attributes as expected by, for example,
462 * Bath-compliant server. If we find such a pattern but
463 * there's no mapping for it, that's fine: we just use a
464 * general pattern-matching attribute.
466 if (first_wc == term && second_wc == term + length-1
467 && *first_wc == '*' && *second_wc == '*'
468 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
473 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
474 && cql_pr_attr(ct, "truncation", "left", 0,
480 else if (first_wc == term + length-1 && second_wc == 0
482 && cql_pr_attr(ct, "truncation", "right", 0,
489 /* We have one or more wildcard characters, but not in a
490 * way that can be dealt with using only the standard
491 * left-, right- and both-truncation attributes. We need
492 * to translate the pattern into a Z39.58-type pattern,
493 * which has been supported in BIB-1 since 1996. If
494 * there's no configuration element for "truncation.z3958"
495 * we indicate this as error 28 "Masking character not
499 cql_pr_attr(ct, "truncation", "z3958", 0,
500 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
501 z3958_mem = (char *) xmalloc(length+1);
502 for (i = 0; i < length; i++)
504 if (i > 0 && term[i-1] == '\\')
505 z3958_mem[i] = term[i];
506 else if (term[i] == '*')
508 else if (term[i] == '?')
511 z3958_mem[i] = term[i];
513 z3958_mem[length] = '\0';
517 /* No masking characters. Use "truncation.none" if given. */
518 cql_pr_attr(ct, "truncation", "none", 0,
523 cql_pr_attr_uri(ct, "index", ns,
524 cn->u.st.index, "serverChoice",
525 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
527 if (cn->u.st.modifiers)
529 struct cql_node *mod = cn->u.st.modifiers;
530 for (; mod; mod = mod->u.st.modifiers)
532 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
533 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
537 (*pr)("\"", client_data);
538 for (i = 0; i<length; i++)
540 /* pr(int) each character */
541 /* we do not need to deal with \-sequences because the
542 CQL and PQF terms have same \-format, bug #1988 */
547 (*pr)(buf, client_data);
549 (*pr)("\" ", client_data);
553 void emit_terms(cql_transform_t ct,
555 void (*pr)(const char *buf, void *client_data),
559 struct cql_node *ne = cn->u.st.extra_terms;
562 (*pr)("@", client_data);
563 (*pr)(op, client_data);
564 (*pr)(" ", client_data);
566 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
568 for (; ne; ne = ne->u.st.extra_terms)
570 if (ne->u.st.extra_terms)
572 (*pr)("@", client_data);
573 (*pr)(op, client_data);
574 (*pr)(" ", client_data);
576 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
581 void emit_wordlist(cql_transform_t ct,
583 void (*pr)(const char *buf, void *client_data),
587 const char *cp0 = cn->u.st.term;
589 const char *last_term = 0;
595 cp1 = strchr(cp0, ' ');
598 (*pr)("@", client_data);
599 (*pr)(op, client_data);
600 (*pr)(" ", client_data);
601 emit_term(ct, cn, last_term, last_length, pr, client_data);
605 last_length = cp1 - cp0;
607 last_length = strlen(cp0);
611 emit_term(ct, cn, last_term, last_length, pr, client_data);
614 void cql_transform_r(cql_transform_t ct,
616 void (*pr)(const char *buf, void *client_data),
620 struct cql_node *mods;
627 ns = cn->u.st.index_uri;
630 if (!strcmp(ns, cql_uri())
631 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
633 (*pr)("@set \"", client_data);
634 (*pr)(cn->u.st.term, client_data);
635 (*pr)("\" ", client_data);
643 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
647 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
648 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
649 YAZ_SRW_UNSUPP_RELATION);
650 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
651 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
652 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
653 emit_wordlist(ct, cn, pr, client_data, "and");
654 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
655 emit_wordlist(ct, cn, pr, client_data, "or");
657 emit_terms(ct, cn, pr, client_data, "and");
660 (*pr)("@", client_data);
661 (*pr)(cn->u.boolean.value, client_data);
662 (*pr)(" ", client_data);
663 mods = cn->u.boolean.modifiers;
664 if (!strcmp(cn->u.boolean.value, "prox"))
666 if (!cql_pr_prox(ct, mods, pr, client_data))
671 /* Boolean modifiers other than on proximity not supported */
672 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
673 ct->addinfo = xstrdup(mods->u.st.index);
677 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
678 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
682 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
687 int cql_transform(cql_transform_t ct, struct cql_node *cn,
688 void (*pr)(const char *buf, void *client_data),
691 struct cql_prop_entry *e;
692 NMEM nmem = nmem_create();
698 for (e = ct->entry; e ; e = e->next)
700 if (!cql_strncmp(e->pattern, "set.", 4))
701 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
702 else if (!cql_strcmp(e->pattern, "set"))
703 cql_apply_prefix(nmem, cn, 0, e->value);
705 cql_transform_r(ct, cn, pr, client_data);
711 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
713 return cql_transform(ct, cn, cql_fputs, f);
716 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
718 struct cql_buf_write_info info;
724 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
726 /* Attempt to write past end of buffer. For some reason, this
727 SRW diagnostic is deprecated, but it's so perfect for our
728 purposes that it would be stupid not to use it. */
730 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
731 sprintf(numbuf, "%ld", (long) info.max);
732 ct->addinfo = xstrdup(numbuf);
736 info.buf[info.off] = '\0';
740 int cql_transform_error(cql_transform_t ct, const char **addinfo)
742 *addinfo = ct->addinfo;
746 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
749 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
756 * indent-tabs-mode: nil
758 * vim: shiftwidth=4 tabstop=8 expandtab