1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2011 Index Data
3 * See the file LICENSE for details.
7 * \brief Implements CQL transform (CQL to RPN conversion).
9 * Evaluation order of rules:
27 #include <yaz/rpn2cql.h>
28 #include <yaz/xmalloc.h>
29 #include <yaz/diagsrw.h>
30 #include <yaz/tokenizer.h>
31 #include <yaz/wrbuf.h>
32 #include <yaz/z-core.h>
33 #include <yaz/matchstr.h>
34 #include <yaz/oid_db.h>
37 struct cql_prop_entry {
40 Z_AttributeList attr_list;
41 struct cql_prop_entry *next;
44 struct cql_transform_t_ {
45 struct cql_prop_entry *entry;
46 yaz_tok_cfg_t tok_cfg;
54 cql_transform_t cql_transform_create(void)
56 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
57 ct->tok_cfg = yaz_tok_cfg_create();
58 ct->w = wrbuf_alloc();
62 ct->nmem = nmem_create();
66 static int cql_transform_parse_tok_line(cql_transform_t ct,
71 Z_AttributeElement *ae[20];
72 int ret = 0; /* 0=OK, != 0 FAIL */
76 while (t == YAZ_TOK_STRING && ae_num < 20)
78 WRBUF type_str = wrbuf_alloc();
80 Z_AttributeElement *elem = 0;
81 const char *value_str = 0;
82 /* attset type=value OR type=value */
84 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
85 elem->attributeSet = 0;
87 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
88 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
92 wrbuf_destroy(type_str);
94 wrbuf_destroy(set_str);
97 if (t == YAZ_TOK_STRING)
99 wrbuf_puts(ct->w, " ");
100 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
104 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
105 wrbuf_cstr(set_str), ct->nmem);
107 type_str = wrbuf_alloc();
108 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
109 t = yaz_tok_move(tp);
111 elem->attributeType = nmem_intdup(ct->nmem, 0);
112 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
115 wrbuf_destroy(type_str);
117 wrbuf_destroy(set_str);
118 yaz_log(YLOG_WARN, "Expected numeric attribute type");
123 wrbuf_destroy(type_str);
125 wrbuf_destroy(set_str);
129 yaz_log(YLOG_WARN, "Expected = after after attribute type");
133 t = yaz_tok_move(tp);
134 if (t != YAZ_TOK_STRING) /* value */
136 yaz_log(YLOG_WARN, "Missing attribute value");
140 value_str = yaz_tok_parse_string(tp);
141 if (isdigit(*value_str))
143 elem->which = Z_AttributeValue_numeric;
144 elem->value.numeric =
145 nmem_intdup(ct->nmem, atoi(value_str));
149 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
150 nmem_malloc(ct->nmem, sizeof(*ca));
151 elem->which = Z_AttributeValue_complex;
152 elem->value.complex = ca;
154 ca->list = (Z_StringOrNumeric **)
155 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
156 ca->list[0] = (Z_StringOrNumeric *)
157 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
158 ca->list[0]->which = Z_StringOrNumeric_string;
159 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
160 ca->num_semanticAction = 0;
161 ca->semanticAction = 0;
163 wrbuf_puts(ct->w, "=");
164 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
165 t = yaz_tok_move(tp);
166 wrbuf_puts(ct->w, " ");
169 if (ret == 0) /* OK? */
171 struct cql_prop_entry **pp = &ct->entry;
174 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
175 (*pp)->pattern = xstrdup(pattern);
176 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
178 (*pp)->attr_list.num_attributes = ae_num;
180 (*pp)->attr_list.attributes = 0;
183 (*pp)->attr_list.attributes = (Z_AttributeElement **)
184 nmem_malloc(ct->nmem,
185 ae_num * sizeof(Z_AttributeElement *));
186 memcpy((*pp)->attr_list.attributes, ae,
187 ae_num * sizeof(Z_AttributeElement *));
193 ODR pr = odr_createmem(ODR_PRINT);
194 Z_AttributeList *alp = &(*pp)->attr_list;
195 odr_setprint(pr, yaz_log_file());
196 z_AttributeList(pr, &alp, 0, 0);
204 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
208 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
209 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
210 r = cql_transform_parse_tok_line(ct, pattern, tp);
211 yaz_tok_parse_destroy(tp);
215 cql_transform_t cql_transform_open_FILE(FILE *f)
217 cql_transform_t ct = cql_transform_create();
220 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
222 while (fgets(line, sizeof(line)-1, f))
224 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
227 t = yaz_tok_move(tp);
228 if (t == YAZ_TOK_STRING)
230 char * pattern = xstrdup(yaz_tok_parse_string(tp));
231 t = yaz_tok_move(tp);
234 yaz_tok_parse_destroy(tp);
235 cql_transform_close(ct);
238 if (cql_transform_parse_tok_line(ct, pattern, tp))
240 yaz_tok_parse_destroy(tp);
241 cql_transform_close(ct);
246 else if (t != YAZ_TOK_EOF)
248 yaz_tok_parse_destroy(tp);
249 cql_transform_close(ct);
252 yaz_tok_parse_destroy(tp);
257 void cql_transform_close(cql_transform_t ct)
259 struct cql_prop_entry *pe;
265 struct cql_prop_entry *pe_next = pe->next;
272 yaz_tok_cfg_destroy(ct->tok_cfg);
273 wrbuf_destroy(ct->w);
274 nmem_destroy(ct->nmem);
278 cql_transform_t cql_transform_open_fname(const char *fname)
281 FILE *f = fopen(fname, "r");
284 ct = cql_transform_open_FILE(f);
290 struct Z_AttributeElement {
291 Z_AttributeSetId *attributeSet; /* OPT */
296 Z_ComplexAttribute *complex;
297 #define Z_AttributeValue_numeric 1
298 #define Z_AttributeValue_complex 2
303 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
305 ODR odr_a = odr_createmem(ODR_ENCODE);
306 ODR odr_b = odr_createmem(ODR_ENCODE);
311 z_AttributeElement(odr_a, &a, 0, 0);
312 z_AttributeElement(odr_b, &b, 0, 0);
314 buf_a = odr_getbuf(odr_a, &len_a, 0);
315 buf_b = odr_getbuf(odr_b, &len_b, 0);
317 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
324 const char *cql_lookup_reverse(cql_transform_t ct,
325 const char *category,
326 Z_AttributeList *attributes)
328 struct cql_prop_entry *e;
329 size_t clen = strlen(category);
330 for (e = ct->entry; e; e = e->next)
332 if (!strncmp(e->pattern, category, clen))
334 /* category matches.. See if attributes in pattern value
335 are all listed in actual attributes */
337 for (i = 0; i < e->attr_list.num_attributes; i++)
339 /* entry attribute */
340 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
342 for (j = 0; j < attributes->num_attributes; j++)
344 /* actual attribute */
345 Z_AttributeElement *a_ae = attributes->attributes[j];
346 int r = compare_attr(e_ae, a_ae);
350 if (j == attributes->num_attributes)
351 break; /* i was not found at all.. try next pattern */
354 if (i == e->attr_list.num_attributes)
355 return e->pattern + clen;
361 static const char *cql_lookup_property(cql_transform_t ct,
362 const char *pat1, const char *pat2,
366 struct cql_prop_entry *e;
368 if (pat1 && pat2 && pat3)
369 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
370 else if (pat1 && pat2)
371 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
372 else if (pat1 && pat3)
373 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
375 sprintf(pattern, "%.39s", pat1);
379 for (e = ct->entry; e; e = e->next)
381 if (!cql_strcmp(e->pattern, pattern))
387 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
388 const char *uri, const char *val, const char *default_val,
389 void (*pr)(const char *buf, void *client_data),
394 const char *eval = val ? val : default_val;
395 const char *prefix = 0;
399 struct cql_prop_entry *e;
401 for (e = ct->entry; e; e = e->next)
402 if (!memcmp(e->pattern, "set.", 4) && e->value &&
403 !strcmp(e->value, uri))
405 prefix = e->pattern+4;
408 /* must have a prefix now - if not it's an error */
414 res = cql_lookup_property(ct, category, prefix, eval);
415 /* we have some aliases for some relations unfortunately.. */
416 if (!res && !prefix && !strcmp(category, "relation"))
418 if (!strcmp(val, "=="))
419 res = cql_lookup_property(ct, category, prefix, "exact");
420 if (!strcmp(val, "="))
421 res = cql_lookup_property(ct, category, prefix, "eq");
422 if (!strcmp(val, "<="))
423 res = cql_lookup_property(ct, category, prefix, "le");
424 if (!strcmp(val, ">="))
425 res = cql_lookup_property(ct, category, prefix, "ge");
428 res = cql_lookup_property(ct, category, prefix, "*");
434 const char *cp0 = res, *cp1;
435 while ((cp1 = strchr(cp0, '=')))
438 while (*cp1 && *cp1 != ' ')
440 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
442 memcpy(buf, cp0, cp1 - cp0);
444 (*pr)("@attr ", client_data);
446 for (i = 0; buf[i]; i++)
449 (*pr)(eval, client_data);
455 (*pr)(tmp, client_data);
458 (*pr)(" ", client_data);
466 if (errcode && !ct->error)
470 ct->addinfo = xstrdup(val);
477 int cql_pr_attr(cql_transform_t ct, const char *category,
478 const char *val, const char *default_val,
479 void (*pr)(const char *buf, void *client_data),
483 return cql_pr_attr_uri(ct, category, 0 /* uri */,
484 val, default_val, pr, client_data, errcode);
488 static void cql_pr_int(int val,
489 void (*pr)(const char *buf, void *client_data),
492 char buf[21]; /* enough characters to 2^64 */
493 sprintf(buf, "%d", val);
494 (*pr)(buf, client_data);
495 (*pr)(" ", client_data);
499 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
500 void (*pr)(const char *buf, void *client_data),
504 int distance; /* to be filled in later depending on unit */
505 int distance_defined = 0;
507 int proxrel = 2; /* less than or equal */
508 int unit = 2; /* word */
512 const char *name = mods->u.st.index;
513 const char *term = mods->u.st.term;
514 const char *relation = mods->u.st.relation;
516 if (!strcmp(name, "distance")) {
517 distance = strtol(term, (char**) 0, 0);
518 distance_defined = 1;
519 if (!strcmp(relation, "="))
521 else if (!strcmp(relation, ">"))
523 else if (!strcmp(relation, "<"))
525 else if (!strcmp(relation, ">="))
527 else if (!strcmp(relation, "<="))
529 else if (!strcmp(relation, "<>"))
533 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
534 ct->addinfo = xstrdup(relation);
538 else if (!strcmp(name, "ordered"))
540 else if (!strcmp(name, "unordered"))
542 else if (!strcmp(name, "unit"))
544 if (!strcmp(term, "word"))
546 else if (!strcmp(term, "sentence"))
548 else if (!strcmp(term, "paragraph"))
550 else if (!strcmp(term, "element"))
554 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
555 ct->addinfo = xstrdup(term);
561 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
562 ct->addinfo = xstrdup(name);
565 mods = mods->u.st.modifiers;
568 if (!distance_defined)
569 distance = (unit == 2) ? 1 : 0;
571 cql_pr_int(exclusion, pr, client_data);
572 cql_pr_int(distance, pr, client_data);
573 cql_pr_int(ordered, pr, client_data);
574 cql_pr_int(proxrel, pr, client_data);
575 (*pr)("k ", client_data);
576 cql_pr_int(unit, pr, client_data);
581 /* Returns location of first wildcard character in the `length'
582 * characters starting at `term', or a null pointer of there are
583 * none -- like memchr().
585 static const char *wcchar(int start, const char *term, int length)
589 if (start || term[-1] != '\\')
590 if (strchr("*?", *term))
600 /* ### checks for CQL relation-name rather than Type-1 attribute */
601 static int has_modifier(struct cql_node *cn, const char *name) {
602 struct cql_node *mod;
603 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
604 if (!strcmp(mod->u.st.index, name))
612 static void emit_term(cql_transform_t ct,
614 const char *term, int length,
615 void (*pr)(const char *buf, void *client_data),
619 const char *ns = cn->u.st.index_uri;
620 int process_term = !has_modifier(cn, "regexp");
623 assert(cn->which == CQL_NODE_ST);
625 if (process_term && length > 0)
627 if (length > 1 && term[0] == '^' && term[length-1] == '^')
629 cql_pr_attr(ct, "position", "firstAndLast", 0,
630 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
634 else if (term[0] == '^')
636 cql_pr_attr(ct, "position", "first", 0,
637 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
641 else if (term[length-1] == '^')
643 cql_pr_attr(ct, "position", "last", 0,
644 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
649 cql_pr_attr(ct, "position", "any", 0,
650 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
654 if (process_term && length > 0)
656 const char *first_wc = wcchar(1, term, length);
657 const char *second_wc = first_wc ?
658 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
660 /* Check for well-known globbing patterns that represent
661 * simple truncation attributes as expected by, for example,
662 * Bath-compliant server. If we find such a pattern but
663 * there's no mapping for it, that's fine: we just use a
664 * general pattern-matching attribute.
666 if (first_wc == term && second_wc == term + length-1
667 && *first_wc == '*' && *second_wc == '*'
668 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
673 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
674 && cql_pr_attr(ct, "truncation", "left", 0,
680 else if (first_wc == term + length-1 && second_wc == 0
682 && cql_pr_attr(ct, "truncation", "right", 0,
689 /* We have one or more wildcard characters, but not in a
690 * way that can be dealt with using only the standard
691 * left-, right- and both-truncation attributes. We need
692 * to translate the pattern into a Z39.58-type pattern,
693 * which has been supported in BIB-1 since 1996. If
694 * there's no configuration element for "truncation.z3958"
695 * we indicate this as error 28 "Masking character not
699 cql_pr_attr(ct, "truncation", "z3958", 0,
700 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
701 z3958_mem = (char *) xmalloc(length+1);
702 for (i = 0; i < length; i++)
704 if (i > 0 && term[i-1] == '\\')
705 z3958_mem[i] = term[i];
706 else if (term[i] == '*')
708 else if (term[i] == '?')
711 z3958_mem[i] = term[i];
713 z3958_mem[length] = '\0';
717 /* No masking characters. Use "truncation.none" if given. */
718 cql_pr_attr(ct, "truncation", "none", 0,
723 cql_pr_attr_uri(ct, "index", ns,
724 cn->u.st.index, "serverChoice",
725 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
727 if (cn->u.st.modifiers)
729 struct cql_node *mod = cn->u.st.modifiers;
730 for (; mod; mod = mod->u.st.modifiers)
732 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
733 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
737 (*pr)("\"", client_data);
738 for (i = 0; i<length; i++)
740 /* pr(int) each character */
741 /* we do not need to deal with \-sequences because the
742 CQL and PQF terms have same \-format, bug #1988 */
747 (*pr)(buf, client_data);
749 (*pr)("\" ", client_data);
753 static void emit_terms(cql_transform_t ct,
755 void (*pr)(const char *buf, void *client_data),
759 struct cql_node *ne = cn->u.st.extra_terms;
762 (*pr)("@", client_data);
763 (*pr)(op, client_data);
764 (*pr)(" ", client_data);
766 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
768 for (; ne; ne = ne->u.st.extra_terms)
770 if (ne->u.st.extra_terms)
772 (*pr)("@", client_data);
773 (*pr)(op, client_data);
774 (*pr)(" ", client_data);
776 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
781 static void emit_wordlist(cql_transform_t ct,
783 void (*pr)(const char *buf, void *client_data),
787 const char *cp0 = cn->u.st.term;
789 const char *last_term = 0;
795 cp1 = strchr(cp0, ' ');
798 (*pr)("@", client_data);
799 (*pr)(op, client_data);
800 (*pr)(" ", client_data);
801 emit_term(ct, cn, last_term, last_length, pr, client_data);
805 last_length = cp1 - cp0;
807 last_length = strlen(cp0);
811 emit_term(ct, cn, last_term, last_length, pr, client_data);
814 void cql_transform_r(cql_transform_t ct,
816 void (*pr)(const char *buf, void *client_data),
820 struct cql_node *mods;
827 ns = cn->u.st.index_uri;
830 if (!strcmp(ns, cql_uri())
831 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
833 (*pr)("@set \"", client_data);
834 (*pr)(cn->u.st.term, client_data);
835 (*pr)("\" ", client_data);
843 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
847 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
848 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
849 YAZ_SRW_UNSUPP_RELATION);
850 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
851 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
852 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
853 emit_wordlist(ct, cn, pr, client_data, "and");
854 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
855 emit_wordlist(ct, cn, pr, client_data, "or");
857 emit_terms(ct, cn, pr, client_data, "and");
860 (*pr)("@", client_data);
861 (*pr)(cn->u.boolean.value, client_data);
862 (*pr)(" ", client_data);
863 mods = cn->u.boolean.modifiers;
864 if (!strcmp(cn->u.boolean.value, "prox"))
866 if (!cql_pr_prox(ct, mods, pr, client_data))
871 /* Boolean modifiers other than on proximity not supported */
872 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
873 ct->addinfo = xstrdup(mods->u.st.index);
877 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
878 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
882 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
887 int cql_transform(cql_transform_t ct, struct cql_node *cn,
888 void (*pr)(const char *buf, void *client_data),
891 struct cql_prop_entry *e;
892 NMEM nmem = nmem_create();
898 for (e = ct->entry; e ; e = e->next)
900 if (!cql_strncmp(e->pattern, "set.", 4))
901 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
902 else if (!cql_strcmp(e->pattern, "set"))
903 cql_apply_prefix(nmem, cn, 0, e->value);
905 cql_transform_r(ct, cn, pr, client_data);
911 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
913 return cql_transform(ct, cn, cql_fputs, f);
916 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
918 struct cql_buf_write_info info;
924 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
926 /* Attempt to write past end of buffer. For some reason, this
927 SRW diagnostic is deprecated, but it's so perfect for our
928 purposes that it would be stupid not to use it. */
930 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
931 sprintf(numbuf, "%ld", (long) info.max);
932 ct->addinfo = xstrdup(numbuf);
936 info.buf[info.off] = '\0';
940 int cql_transform_error(cql_transform_t ct, const char **addinfo)
942 *addinfo = ct->addinfo;
946 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
949 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
956 * c-file-style: "Stroustrup"
957 * indent-tabs-mode: nil
959 * vim: shiftwidth=4 tabstop=8 expandtab