1 /* $Id: cqltransform.c,v 1.30 2007-12-20 22:45:37 adam Exp $
2 Copyright (C) 1995-2007, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_start;
57 const char *cp_pattern_end;
58 const char *cp = line;
60 while (*cp && strchr(" \t", *cp))
62 cp_pattern_start = cp;
64 while (*cp && !strchr(" \t\r\n=#", *cp))
67 if (cp == cp_pattern_start)
69 while (*cp && strchr(" \t", *cp))
74 cql_transform_close(ct);
78 while (*cp && strchr(" \t\r\n", *cp))
81 cp_value_end = strchr(cp, '#');
83 cp_value_end = strlen(line) + line;
85 if (cp_value_end != cp_value_start &&
86 strchr(" \t\r\n", cp_value_end[-1]))
88 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
89 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
90 memcpy ((*pp)->pattern, cp_pattern_start,
91 cp_pattern_end-cp_pattern_start);
92 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
94 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
95 if (cp_value_start != cp_value_end)
96 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
97 (*pp)->value[cp_value_end - cp_value_start] = '\0';
104 void cql_transform_close(cql_transform_t ct)
106 struct cql_prop_entry *pe;
112 struct cql_prop_entry *pe_next = pe->next;
123 cql_transform_t cql_transform_open_fname(const char *fname)
126 FILE *f = fopen(fname, "r");
129 ct = cql_transform_open_FILE(f);
134 static const char *cql_lookup_property(cql_transform_t ct,
135 const char *pat1, const char *pat2,
139 struct cql_prop_entry *e;
141 if (pat1 && pat2 && pat3)
142 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
143 else if (pat1 && pat2)
144 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
145 else if (pat1 && pat3)
146 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
148 sprintf (pattern, "%.39s", pat1);
152 for (e = ct->entry; e; e = e->next)
154 if (!cql_strcmp(e->pattern, pattern))
160 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
161 const char *uri, const char *val, const char *default_val,
162 void (*pr)(const char *buf, void *client_data),
167 const char *eval = val ? val : default_val;
168 const char *prefix = 0;
172 struct cql_prop_entry *e;
174 for (e = ct->entry; e; e = e->next)
175 if (!memcmp(e->pattern, "set.", 4) && e->value &&
176 !strcmp(e->value, uri))
178 prefix = e->pattern+4;
181 /* must have a prefix now - if not it's an error */
187 res = cql_lookup_property(ct, category, prefix, eval);
189 res = cql_lookup_property(ct, category, prefix, "*");
195 const char *cp0 = res, *cp1;
196 while ((cp1 = strchr(cp0, '=')))
199 while (*cp1 && *cp1 != ' ')
201 if (cp1 - cp0 >= sizeof(buf))
203 memcpy (buf, cp0, cp1 - cp0);
205 (*pr)("@attr ", client_data);
207 for (i = 0; buf[i]; i++)
210 (*pr)(eval, client_data);
216 (*pr)(tmp, client_data);
219 (*pr)(" ", client_data);
227 if (errcode && !ct->error)
231 ct->addinfo = xstrdup(val);
238 int cql_pr_attr(cql_transform_t ct, const char *category,
239 const char *val, const char *default_val,
240 void (*pr)(const char *buf, void *client_data),
244 return cql_pr_attr_uri(ct, category, 0 /* uri */,
245 val, default_val, pr, client_data, errcode);
249 static void cql_pr_int (int val,
250 void (*pr)(const char *buf, void *client_data),
253 char buf[21]; /* enough characters to 2^64 */
254 sprintf(buf, "%d", val);
255 (*pr)(buf, client_data);
256 (*pr)(" ", client_data);
260 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
261 void (*pr)(const char *buf, void *client_data),
265 int distance; /* to be filled in later depending on unit */
266 int distance_defined = 0;
268 int proxrel = 2; /* less than or equal */
269 int unit = 2; /* word */
272 char *name = mods->u.st.index;
273 char *term = mods->u.st.term;
274 char *relation = mods->u.st.relation;
276 if (!strcmp(name, "distance")) {
277 distance = strtol(term, (char**) 0, 0);
278 distance_defined = 1;
279 if (!strcmp(relation, "=")) {
281 } else if (!strcmp(relation, ">")) {
283 } else if (!strcmp(relation, "<")) {
285 } else if (!strcmp(relation, ">=")) {
287 } else if (!strcmp(relation, "<=")) {
289 } else if (!strcmp(relation, "<>")) {
292 ct->error = 40; /* Unsupported proximity relation */
293 ct->addinfo = xstrdup(relation);
296 } else if (!strcmp(name, "ordered")) {
298 } else if (!strcmp(name, "unordered")) {
300 } else if (!strcmp(name, "unit")) {
301 if (!strcmp(term, "word")) {
303 } else if (!strcmp(term, "sentence")) {
305 } else if (!strcmp(term, "paragraph")) {
307 } else if (!strcmp(term, "element")) {
310 ct->error = 42; /* Unsupported proximity unit */
311 ct->addinfo = xstrdup(term);
315 ct->error = 46; /* Unsupported boolean modifier */
316 ct->addinfo = xstrdup(name);
320 mods = mods->u.st.modifiers;
323 if (!distance_defined)
324 distance = (unit == 2) ? 1 : 0;
326 cql_pr_int(exclusion, pr, client_data);
327 cql_pr_int(distance, pr, client_data);
328 cql_pr_int(ordered, pr, client_data);
329 cql_pr_int(proxrel, pr, client_data);
330 (*pr)("k ", client_data);
331 cql_pr_int(unit, pr, client_data);
336 /* Returns location of first wildcard character in the `length'
337 * characters starting at `term', or a null pointer of there are
338 * none -- like memchr().
340 static const char *wcchar(int start, const char *term, int length)
344 if (start || term[-1] != '\\')
345 if (strchr("*?", *term))
355 /* ### checks for CQL relation-name rather than Type-1 attribute */
356 static int has_modifier(struct cql_node *cn, const char *name) {
357 struct cql_node *mod;
358 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
359 if (!strcmp(mod->u.st.index, name))
367 void emit_term(cql_transform_t ct,
369 const char *term, int length,
370 void (*pr)(const char *buf, void *client_data),
374 const char *ns = cn->u.st.index_uri;
375 int process_term = !has_modifier(cn, "regexp");
378 assert(cn->which == CQL_NODE_ST);
380 if (process_term && length > 0)
382 if (length > 1 && term[0] == '^' && term[length-1] == '^')
384 cql_pr_attr(ct, "position", "firstAndLast", 0,
385 pr, client_data, 32);
389 else if (term[0] == '^')
391 cql_pr_attr(ct, "position", "first", 0,
392 pr, client_data, 32);
396 else if (term[length-1] == '^')
398 cql_pr_attr(ct, "position", "last", 0,
399 pr, client_data, 32);
404 cql_pr_attr(ct, "position", "any", 0,
405 pr, client_data, 32);
409 if (process_term && length > 0)
411 const char *first_wc = wcchar(1, term, length);
412 const char *second_wc = first_wc ?
413 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
415 /* Check for well-known globbing patterns that represent
416 * simple truncation attributes as expected by, for example,
417 * Bath-compliant server. If we find such a pattern but
418 * there's no mapping for it, that's fine: we just use a
419 * general pattern-matching attribute.
421 if (first_wc == term && second_wc == term + length-1
422 && *first_wc == '*' && *second_wc == '*'
423 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
428 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
429 && cql_pr_attr(ct, "truncation", "left", 0,
435 else if (first_wc == term + length-1 && second_wc == 0
437 && cql_pr_attr(ct, "truncation", "right", 0,
444 /* We have one or more wildcard characters, but not in a
445 * way that can be dealt with using only the standard
446 * left-, right- and both-truncation attributes. We need
447 * to translate the pattern into a Z39.58-type pattern,
448 * which has been supported in BIB-1 since 1996. If
449 * there's no configuration element for "truncation.z3958"
450 * we indicate this as error 28 "Masking character not
454 cql_pr_attr(ct, "truncation", "z3958", 0,
455 pr, client_data, 28);
456 z3958_mem = (char *) xmalloc(length+1);
457 for (i = 0; i < length; i++)
459 if (i > 0 && term[i-1] == '\\')
460 z3958_mem[i] = term[i];
461 else if (term[i] == '*')
463 else if (term[i] == '?')
466 z3958_mem[i] = term[i];
468 z3958_mem[length] = '\0';
472 /* No masking characters. Use "truncation.none" if given. */
473 cql_pr_attr(ct, "truncation", "none", 0,
478 cql_pr_attr_uri(ct, "index", ns,
479 cn->u.st.index, "serverChoice",
480 pr, client_data, 16);
482 if (cn->u.st.modifiers)
484 struct cql_node *mod = cn->u.st.modifiers;
485 for (; mod; mod = mod->u.st.modifiers)
487 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
488 pr, client_data, 20);
492 (*pr)("\"", client_data);
493 for (i = 0; i<length; i++)
495 /* pr(int) each character */
496 /* we do not need to deal with \-sequences because the
497 CQL and PQF terms have same \-format, bug #1988 */
502 (*pr)(buf, client_data);
504 (*pr)("\" ", client_data);
508 void emit_wordlist(cql_transform_t ct,
510 void (*pr)(const char *buf, void *client_data),
514 const char *cp0 = cn->u.st.term;
516 const char *last_term = 0;
522 cp1 = strchr(cp0, ' ');
525 (*pr)("@", client_data);
526 (*pr)(op, client_data);
527 (*pr)(" ", client_data);
528 emit_term(ct, cn, last_term, last_length, pr, client_data);
532 last_length = cp1 - cp0;
534 last_length = strlen(cp0);
538 emit_term(ct, cn, last_term, last_length, pr, client_data);
541 void cql_transform_r(cql_transform_t ct,
543 void (*pr)(const char *buf, void *client_data),
547 struct cql_node *mods;
554 ns = cn->u.st.index_uri;
557 if (!strcmp(ns, cql_uri())
558 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
560 (*pr)("@set \"", client_data);
561 (*pr)(cn->u.st.term, client_data);
562 (*pr)("\" ", client_data);
574 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
575 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
576 cql_pr_attr(ct, "relation", "eq", "scr",
577 pr, client_data, 19);
578 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
579 cql_pr_attr(ct, "relation", "le", "scr",
580 pr, client_data, 19);
581 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
582 cql_pr_attr(ct, "relation", "ge", "scr",
583 pr, client_data, 19);
585 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
586 pr, client_data, 19);
587 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
588 pr, client_data, 24);
589 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
591 emit_wordlist(ct, cn, pr, client_data, "and");
593 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
595 emit_wordlist(ct, cn, pr, client_data, "or");
599 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
604 (*pr)("@", client_data);
605 (*pr)(cn->u.boolean.value, client_data);
606 (*pr)(" ", client_data);
607 mods = cn->u.boolean.modifiers;
608 if (!strcmp(cn->u.boolean.value, "prox")) {
609 if (!cql_pr_prox(ct, mods, pr, client_data))
612 /* Boolean modifiers other than on proximity not supported */
613 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
614 ct->addinfo = xstrdup(mods->u.st.index);
618 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
619 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
623 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
628 int cql_transform(cql_transform_t ct,
630 void (*pr)(const char *buf, void *client_data),
633 struct cql_prop_entry *e;
634 NMEM nmem = nmem_create();
641 for (e = ct->entry; e ; e = e->next)
643 if (!cql_strncmp(e->pattern, "set.", 4))
644 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
645 else if (!cql_strcmp(e->pattern, "set"))
646 cql_apply_prefix(nmem, cn, 0, e->value);
648 cql_transform_r (ct, cn, pr, client_data);
654 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
656 return cql_transform(ct, cn, cql_fputs, f);
659 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
662 struct cql_buf_write_info info;
668 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
670 /* Attempt to write past end of buffer. For some reason, this
671 SRW diagnostic is deprecated, but it's so perfect for our
672 purposes that it would be stupid not to use it. */
674 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
675 sprintf(numbuf, "%ld", (long) info.max);
676 ct->addinfo = xstrdup(numbuf);
680 info.buf[info.off] = '\0';
684 int cql_transform_error(cql_transform_t ct, const char **addinfo)
686 *addinfo = ct->addinfo;
692 * indent-tabs-mode: nil
694 * vim: shiftwidth=4 tabstop=8 expandtab