1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief Implements CQL transform (CQL to RPN conversion).
9 * Evaluation order of rules:
26 #include <yaz/rpn2cql.h>
27 #include <yaz/xmalloc.h>
28 #include <yaz/diagsrw.h>
29 #include <yaz/tokenizer.h>
30 #include <yaz/wrbuf.h>
31 #include <yaz/z-core.h>
32 #include <yaz/matchstr.h>
33 #include <yaz/oid_db.h>
36 struct cql_prop_entry {
39 Z_AttributeList attr_list;
40 struct cql_prop_entry *next;
43 struct cql_transform_t_ {
44 struct cql_prop_entry *entry;
45 yaz_tok_cfg_t tok_cfg;
52 cql_transform_t cql_transform_create(void)
54 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
55 ct->tok_cfg = yaz_tok_cfg_create();
57 ct->addinfo = wrbuf_alloc();
59 ct->nmem = nmem_create();
63 static int cql_transform_parse_tok_line(cql_transform_t ct,
68 Z_AttributeElement *ae[20];
69 int ret = 0; /* 0=OK, != 0 FAIL */
71 WRBUF w = wrbuf_alloc();
75 while (t == YAZ_TOK_STRING && ae_num < 20)
77 WRBUF type_str = wrbuf_alloc();
79 Z_AttributeElement *elem = 0;
80 const char *value_str = 0;
81 /* attset type=value OR type=value */
83 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
84 elem->attributeSet = 0;
86 wrbuf_puts(w, yaz_tok_parse_string(tp));
87 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
91 wrbuf_destroy(type_str);
93 wrbuf_destroy(set_str);
96 if (t == YAZ_TOK_STRING)
99 wrbuf_puts(w, yaz_tok_parse_string(tp));
103 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
104 wrbuf_cstr(set_str), ct->nmem);
106 type_str = wrbuf_alloc();
107 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
108 t = yaz_tok_move(tp);
110 elem->attributeType = nmem_intdup(ct->nmem, 0);
111 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
114 wrbuf_destroy(type_str);
116 wrbuf_destroy(set_str);
117 yaz_log(YLOG_WARN, "Expected numeric attribute type");
122 wrbuf_destroy(type_str);
124 wrbuf_destroy(set_str);
128 yaz_log(YLOG_WARN, "Expected = after after attribute type");
132 t = yaz_tok_move(tp);
133 if (t != YAZ_TOK_STRING) /* value */
135 yaz_log(YLOG_WARN, "Missing attribute value");
139 value_str = yaz_tok_parse_string(tp);
140 if (yaz_isdigit(*value_str))
142 elem->which = Z_AttributeValue_numeric;
143 elem->value.numeric =
144 nmem_intdup(ct->nmem, atoi(value_str));
148 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
149 nmem_malloc(ct->nmem, sizeof(*ca));
150 elem->which = Z_AttributeValue_complex;
151 elem->value.complex = ca;
153 ca->list = (Z_StringOrNumeric **)
154 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
155 ca->list[0] = (Z_StringOrNumeric *)
156 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
157 ca->list[0]->which = Z_StringOrNumeric_string;
158 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
159 ca->num_semanticAction = 0;
160 ca->semanticAction = 0;
163 wrbuf_puts(w, yaz_tok_parse_string(tp));
164 t = yaz_tok_move(tp);
168 if (ret == 0) /* OK? */
170 struct cql_prop_entry **pp = &ct->entry;
173 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
174 (*pp)->pattern = xstrdup(pattern);
175 (*pp)->value = xstrdup(wrbuf_cstr(w));
177 (*pp)->attr_list.num_attributes = ae_num;
179 (*pp)->attr_list.attributes = 0;
182 (*pp)->attr_list.attributes = (Z_AttributeElement **)
183 nmem_malloc(ct->nmem,
184 ae_num * sizeof(Z_AttributeElement *));
185 memcpy((*pp)->attr_list.attributes, ae,
186 ae_num * sizeof(Z_AttributeElement *));
192 ODR pr = odr_createmem(ODR_PRINT);
193 Z_AttributeList *alp = &(*pp)->attr_list;
194 odr_setprint(pr, yaz_log_file());
195 z_AttributeList(pr, &alp, 0, 0);
204 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
208 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
209 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
210 r = cql_transform_parse_tok_line(ct, pattern, tp);
211 yaz_tok_parse_destroy(tp);
215 cql_transform_t cql_transform_open_FILE(FILE *f)
217 cql_transform_t ct = cql_transform_create();
220 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
222 while (fgets(line, sizeof(line)-1, f))
224 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
226 t = yaz_tok_move(tp);
227 if (t == YAZ_TOK_STRING)
229 char * pattern = xstrdup(yaz_tok_parse_string(tp));
230 t = yaz_tok_move(tp);
233 yaz_tok_parse_destroy(tp);
234 cql_transform_close(ct);
237 if (cql_transform_parse_tok_line(ct, pattern, tp))
239 yaz_tok_parse_destroy(tp);
240 cql_transform_close(ct);
245 else if (t != YAZ_TOK_EOF)
247 yaz_tok_parse_destroy(tp);
248 cql_transform_close(ct);
251 yaz_tok_parse_destroy(tp);
256 void cql_transform_close(cql_transform_t ct)
258 struct cql_prop_entry *pe;
264 struct cql_prop_entry *pe_next = pe->next;
270 wrbuf_destroy(ct->addinfo);
271 yaz_tok_cfg_destroy(ct->tok_cfg);
272 nmem_destroy(ct->nmem);
276 cql_transform_t cql_transform_open_fname(const char *fname)
279 FILE *f = fopen(fname, "r");
282 ct = cql_transform_open_FILE(f);
288 struct Z_AttributeElement {
289 Z_AttributeSetId *attributeSet; /* OPT */
294 Z_ComplexAttribute *complex;
295 #define Z_AttributeValue_numeric 1
296 #define Z_AttributeValue_complex 2
301 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
303 ODR odr_a = odr_createmem(ODR_ENCODE);
304 ODR odr_b = odr_createmem(ODR_ENCODE);
309 z_AttributeElement(odr_a, &a, 0, 0);
310 z_AttributeElement(odr_b, &b, 0, 0);
312 buf_a = odr_getbuf(odr_a, &len_a, 0);
313 buf_b = odr_getbuf(odr_b, &len_b, 0);
315 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
322 const char *cql_lookup_reverse(cql_transform_t ct,
323 const char *category,
324 Z_AttributeList *attributes)
326 struct cql_prop_entry *e;
327 size_t clen = strlen(category);
328 for (e = ct->entry; e; e = e->next)
330 if (!strncmp(e->pattern, category, clen))
332 /* category matches.. See if attributes in pattern value
333 are all listed in actual attributes */
335 for (i = 0; i < e->attr_list.num_attributes; i++)
337 /* entry attribute */
338 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
340 for (j = 0; j < attributes->num_attributes; j++)
342 /* actual attribute */
343 Z_AttributeElement *a_ae = attributes->attributes[j];
344 int r = compare_attr(e_ae, a_ae);
348 if (j == attributes->num_attributes)
349 break; /* i was not found at all.. try next pattern */
352 if (i == e->attr_list.num_attributes)
353 return e->pattern + clen;
359 static const char *cql_lookup_property(cql_transform_t ct,
360 const char *pat1, const char *pat2,
364 struct cql_prop_entry *e;
366 if (pat1 && pat2 && pat3)
367 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
368 else if (pat1 && pat2)
369 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
370 else if (pat1 && pat3)
371 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
373 sprintf(pattern, "%.39s", pat1);
377 for (e = ct->entry; e; e = e->next)
379 if (!cql_strcmp(e->pattern, pattern))
385 int cql_pr_attr_uri(cql_transform_t ct, WRBUF addinfo, const char *category,
386 const char *uri, const char *val, const char *default_val,
387 void (*pr)(const char *buf, void *client_data),
392 const char *eval = val ? val : default_val;
393 const char *prefix = 0;
397 struct cql_prop_entry *e;
399 for (e = ct->entry; e; e = e->next)
400 if (!memcmp(e->pattern, "set.", 4) && e->value &&
401 !strcmp(e->value, uri))
403 prefix = e->pattern+4;
406 /* must have a prefix now - if not it's an error */
412 res = cql_lookup_property(ct, category, prefix, eval);
413 /* we have some aliases for some relations unfortunately.. */
414 if (!res && !prefix && !strcmp(category, "relation"))
416 if (!strcmp(val, "=="))
417 res = cql_lookup_property(ct, category, prefix, "exact");
418 if (!strcmp(val, "="))
419 res = cql_lookup_property(ct, category, prefix, "eq");
420 if (!strcmp(val, "<="))
421 res = cql_lookup_property(ct, category, prefix, "le");
422 if (!strcmp(val, ">="))
423 res = cql_lookup_property(ct, category, prefix, "ge");
426 res = cql_lookup_property(ct, category, prefix, "*");
432 const char *cp0 = res, *cp1;
433 while ((cp1 = strchr(cp0, '=')))
436 while (*cp1 && *cp1 != ' ')
438 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
440 memcpy(buf, cp0, cp1 - cp0);
442 (*pr)("@attr ", client_data);
444 for (i = 0; buf[i]; i++)
447 (*pr)(eval, client_data);
453 (*pr)(tmp, client_data);
456 (*pr)(" ", client_data);
465 return 1; /* signal error, but do not set addinfo */
467 wrbuf_puts(addinfo, val);
471 int cql_pr_attr(cql_transform_t ct, WRBUF addinfo, const char *category,
472 const char *val, const char *default_val,
473 void (*pr)(const char *buf, void *client_data),
477 return cql_pr_attr_uri(ct, addinfo, category, 0 /* uri */,
478 val, default_val, pr, client_data, errcode);
482 static void cql_pr_int(int val,
483 void (*pr)(const char *buf, void *client_data),
486 char buf[21]; /* enough characters to 2^64 */
487 sprintf(buf, "%d", val);
488 (*pr)(buf, client_data);
489 (*pr)(" ", client_data);
493 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
495 void (*pr)(const char *buf, void *client_data),
501 int proxrel = 2; /* less than or equal */
502 int unit = 2; /* word */
506 const char *name = mods->u.st.index;
507 const char *term = mods->u.st.term;
508 const char *relation = mods->u.st.relation;
510 if (!strcmp(name, "distance")) {
511 distance = strtol(term, (char**) 0, 0);
512 if (!strcmp(relation, "="))
514 else if (!strcmp(relation, ">"))
516 else if (!strcmp(relation, "<"))
518 else if (!strcmp(relation, ">="))
520 else if (!strcmp(relation, "<="))
522 else if (!strcmp(relation, "<>"))
526 wrbuf_puts(addinfo, relation);
527 return YAZ_SRW_UNSUPP_PROX_RELATION;
530 else if (!strcmp(name, "ordered"))
532 else if (!strcmp(name, "unordered"))
534 else if (!strcmp(name, "unit"))
536 if (!strcmp(term, "word"))
538 else if (!strcmp(term, "sentence"))
540 else if (!strcmp(term, "paragraph"))
542 else if (!strcmp(term, "element"))
546 wrbuf_puts(addinfo, term);
547 return YAZ_SRW_UNSUPP_PROX_UNIT;
552 wrbuf_puts(addinfo, name);
553 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
555 mods = mods->u.st.modifiers;
559 distance = (unit == 2) ? 1 : 0;
561 cql_pr_int(exclusion, pr, client_data);
562 cql_pr_int(distance, pr, client_data);
563 cql_pr_int(ordered, pr, client_data);
564 cql_pr_int(proxrel, pr, client_data);
565 (*pr)("k ", client_data);
566 cql_pr_int(unit, pr, client_data);
571 /* ### checks for CQL relation-name rather than Type-1 attribute */
572 static int has_modifier(struct cql_node *cn, const char *name) {
573 struct cql_node *mod;
574 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
575 if (!strcmp(mod->u.st.index, name))
582 static int emit_term(cql_transform_t ct,
583 struct cql_node *cn, WRBUF addinfo,
584 const char *term, int length,
585 void (*pr)(const char *buf, void *client_data),
589 const char *ns = cn->u.st.index_uri;
591 int process_term = 1;
593 if (has_modifier(cn, "regexp"))
595 else if (has_modifier(cn, "unmasked"))
597 else if (cql_lookup_property(ct, "truncation", 0, "cql"))
600 r = cql_pr_attr(ct, addinfo, "truncation", "cql", 0,
601 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
605 assert(cn->which == CQL_NODE_ST);
608 { /* convert term via truncation.things */
611 for (i = 0; i < length; i++)
613 if (term[i] == '\\' && i < length - 1)
622 else if (i == length - 1)
628 else if (i == length - 1)
641 r = cql_pr_attr(ct, addinfo, "position", "firstAndLast", 0,
643 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
649 else if (anchor == 1)
651 r = cql_pr_attr(ct, addinfo, "position", "first", 0,
653 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
659 else if (anchor == 2)
661 r = cql_pr_attr(ct, addinfo, "position", "last", 0,
663 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
670 r = cql_pr_attr(ct, addinfo, "position", "any", 0,
672 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
678 if (trunc == 3 && !cql_pr_attr(ct, addinfo, "truncation",
679 "both", 0, pr, client_data, 0))
684 else if (trunc == 1 && !cql_pr_attr(ct, addinfo, "truncation",
685 "left", 0, pr, client_data, 0))
690 else if (trunc == 2 && !cql_pr_attr(ct, addinfo, "truncation",
691 "right", 0, pr, client_data, 0))
698 cql_pr_attr(ct, addinfo, "truncation", "none", 0,
703 r = cql_pr_attr(ct, addinfo, "truncation", "z3958", 0,
704 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
711 r = cql_pr_attr_uri(ct, addinfo, "index", ns,
712 cn->u.st.index, "serverChoice",
713 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
717 if (cn->u.st.modifiers)
719 struct cql_node *mod = cn->u.st.modifiers;
720 for (; mod; mod = mod->u.st.modifiers)
722 r = cql_pr_attr(ct, addinfo,
723 "relationModifier", mod->u.st.index, 0,
724 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
729 (*pr)("\"", client_data);
731 for (i = 0; i < length; i++)
733 char x[2]; /* temp buffer */
734 if (term[i] == '\\' && i < length - 1)
737 if (strchr("\"\\", term[i]))
738 pr("\\", client_data);
739 if (z3958_mode && strchr("#?", term[i]))
740 pr("\\\\", client_data); /* double \\ to survive PQF parse */
745 else if (z3958_mode && term[i] == '*')
747 pr("?", client_data);
748 if (i < length - 1 && yaz_isdigit(term[i+1]))
749 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
751 else if (z3958_mode && term[i] == '?')
753 pr("#", client_data);
758 pr("\\", client_data);
759 if (z3958_mode && strchr("#?", term[i]))
760 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
768 for (i = 0; i < length; i++)
776 (*pr)("\" ", client_data);
780 static int emit_terms(cql_transform_t ct, struct cql_node *cn,
782 void (*pr)(const char *buf, void *client_data),
786 struct cql_node *ne = cn->u.st.extra_terms;
790 (*pr)("@", client_data);
791 (*pr)(op, client_data);
792 (*pr)(" ", client_data);
794 r = emit_term(ct, cn, addinfo, cn->u.st.term, strlen(cn->u.st.term),
796 for (; !r && ne; ne = ne->u.st.extra_terms)
798 if (ne->u.st.extra_terms)
800 (*pr)("@", client_data);
801 (*pr)(op, client_data);
802 (*pr)(" ", client_data);
804 r = emit_term(ct, cn, addinfo, ne->u.st.term, strlen(ne->u.st.term),
810 static int emit_wordlist(cql_transform_t ct, struct cql_node *cn,
812 void (*pr)(const char *buf, void *client_data),
817 const char *cp0 = cn->u.st.term;
819 const char *last_term = 0;
825 cp1 = strchr(cp0, ' ');
828 (*pr)("@", client_data);
829 (*pr)(op, client_data);
830 (*pr)(" ", client_data);
831 r = emit_term(ct, cn, addinfo, last_term, last_length,
836 last_length = cp1 - cp0;
838 last_length = strlen(cp0);
842 r = emit_term(ct, cn, addinfo, last_term, last_length, pr, client_data);
846 static int emit_node(cql_transform_t ct, struct cql_node *cn,
848 void (*pr)(const char *buf, void *client_data),
853 struct cql_node *mods;
860 ns = cn->u.st.index_uri;
863 if (!strcmp(ns, cql_uri())
864 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
866 (*pr)("@set \"", client_data);
867 (*pr)(cn->u.st.term, client_data);
868 (*pr)("\" ", client_data);
874 return YAZ_SRW_UNSUPP_CONTEXT_SET;
876 cql_pr_attr(ct, addinfo, "always", 0, 0, pr, client_data, 0);
877 r = cql_pr_attr(ct, addinfo, "relation", cn->u.st.relation, 0,
878 pr, client_data, YAZ_SRW_UNSUPP_RELATION);
881 r = cql_pr_attr(ct, addinfo, "structure", cn->u.st.relation, 0,
883 YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
886 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
887 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "and");
888 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
889 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "or");
891 r = emit_terms(ct, cn, addinfo, pr, client_data, "and");
894 (*pr)("@", client_data);
895 (*pr)(cn->u.boolean.value, client_data);
896 (*pr)(" ", client_data);
897 mods = cn->u.boolean.modifiers;
898 if (!strcmp(cn->u.boolean.value, "prox"))
900 r = cql_pr_prox(ct, mods, addinfo, pr, client_data);
906 /* Boolean modifiers other than on proximity not supported */
907 wrbuf_puts(addinfo, mods->u.st.index);
908 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
911 r = emit_node(ct, cn->u.boolean.left, addinfo, pr, client_data);
914 r = emit_node(ct, cn->u.boolean.right, addinfo, pr, client_data);
919 r = emit_node(ct, cn->u.sort.search, addinfo, pr, client_data);
922 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
928 int cql_transform_r(cql_transform_t ct, struct cql_node *cn,
930 void (*pr)(const char *buf, void *client_data),
933 struct cql_prop_entry *e;
934 NMEM nmem = nmem_create();
937 for (e = ct->entry; e ; e = e->next)
939 if (!cql_strncmp(e->pattern, "set.", 4))
940 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
941 else if (!cql_strcmp(e->pattern, "set"))
942 cql_apply_prefix(nmem, cn, 0, e->value);
944 r = emit_node(ct, cn, addinfo, pr, client_data);
949 int cql_transform(cql_transform_t ct, struct cql_node *cn,
950 void (*pr)(const char *buf, void *client_data),
953 WRBUF addinfo = wrbuf_alloc();
954 int r = cql_transform_r(ct, cn, addinfo, pr, client_data);
955 cql_transform_set_error(ct, r, wrbuf_cstr(addinfo));
956 wrbuf_destroy(addinfo);
960 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
962 return cql_transform(ct, cn, cql_fputs, f);
965 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
968 struct cql_buf_write_info info;
974 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
976 /* Attempt to write past end of buffer. For some reason, this
977 SRW diagnostic is deprecated, but it's so perfect for our
978 purposes that it would be stupid not to use it. */
980 sprintf(numbuf, "%ld", (long) info.max);
981 cql_transform_set_error(ct, YAZ_SRW_TOO_MANY_CHARS_IN_QUERY, numbuf);
985 info.buf[info.off] = '\0';
989 int cql_transform_error(cql_transform_t ct, const char **addinfo)
991 *addinfo = wrbuf_len(ct->addinfo) ? wrbuf_cstr(ct->addinfo) : 0;
995 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
997 wrbuf_rewind(ct->addinfo);
999 wrbuf_puts(ct->addinfo, addinfo);
1006 * c-file-style: "Stroustrup"
1007 * indent-tabs-mode: nil
1009 * vim: shiftwidth=4 tabstop=8 expandtab