1 /* $Id: cqltransform.c,v 1.22 2006-04-05 12:04:51 mike Exp $
2 Copyright (C) 1995-2005, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
18 #include <yaz/xmalloc.h>
19 #include <yaz/diagsrw.h>
21 struct cql_prop_entry {
24 struct cql_prop_entry *next;
27 struct cql_transform_t_ {
28 struct cql_prop_entry *entry;
33 cql_transform_t cql_transform_open_FILE(FILE *f)
36 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
37 struct cql_prop_entry **pp = &ct->entry;
41 while (fgets(line, sizeof(line)-1, f))
43 const char *cp_value_start;
44 const char *cp_value_end;
45 const char *cp_pattern_end;
46 const char *cp = line;
47 while (*cp && !strchr(" \t=\r\n#", *cp))
52 while (*cp && strchr(" \t\r\n", *cp))
57 while (*cp && strchr(" \t\r\n", *cp))
60 if (!(cp_value_end = strchr(cp, '#')))
61 cp_value_end = strlen(line) + line;
63 if (cp_value_end != cp_value_start &&
64 strchr(" \t\r\n", cp_value_end[-1]))
66 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
67 (*pp)->pattern = (char *) xmalloc (cp_pattern_end - line + 1);
68 memcpy ((*pp)->pattern, line, cp_pattern_end - line);
69 (*pp)->pattern[cp_pattern_end-line] = 0;
71 (*pp)->value = (char *) xmalloc (cp_value_end - cp_value_start + 1);
72 if (cp_value_start != cp_value_end)
73 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
74 (*pp)->value[cp_value_end - cp_value_start] = 0;
81 void cql_transform_close(cql_transform_t ct)
83 struct cql_prop_entry *pe;
89 struct cql_prop_entry *pe_next = pe->next;
100 cql_transform_t cql_transform_open_fname(const char *fname)
103 FILE *f = fopen(fname, "r");
106 ct = cql_transform_open_FILE(f);
111 static const char *cql_lookup_property(cql_transform_t ct,
112 const char *pat1, const char *pat2,
116 struct cql_prop_entry *e;
118 if (pat1 && pat2 && pat3)
119 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
120 else if (pat1 && pat2)
121 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
122 else if (pat1 && pat3)
123 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
125 sprintf (pattern, "%.39s", pat1);
129 for (e = ct->entry; e; e = e->next)
131 if (!cql_strcmp(e->pattern, pattern))
137 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
138 const char *uri, const char *val, const char *default_val,
139 void (*pr)(const char *buf, void *client_data),
144 const char *eval = val ? val : default_val;
145 const char *prefix = 0;
149 struct cql_prop_entry *e;
151 for (e = ct->entry; e; e = e->next)
152 if (!memcmp(e->pattern, "set.", 4) && e->value &&
153 !strcmp(e->value, uri))
155 prefix = e->pattern+4;
158 /* must have a prefix now - if not it's an error */
164 res = cql_lookup_property(ct, category, prefix, eval);
166 res = cql_lookup_property(ct, category, prefix, "*");
172 const char *cp0 = res, *cp1;
173 while ((cp1 = strchr(cp0, '=')))
175 while (*cp1 && *cp1 != ' ')
177 if (cp1 - cp0 >= sizeof(buf))
179 memcpy (buf, cp0, cp1 - cp0);
181 (*pr)("@attr ", client_data);
182 (*pr)(buf, client_data);
183 (*pr)(" ", client_data);
191 if (errcode && !ct->error)
195 ct->addinfo = xstrdup(val);
202 int cql_pr_attr(cql_transform_t ct, const char *category,
203 const char *val, const char *default_val,
204 void (*pr)(const char *buf, void *client_data),
208 return cql_pr_attr_uri(ct, category, 0 /* uri */,
209 val, default_val, pr, client_data, errcode);
213 static void cql_pr_int (int val,
214 void (*pr)(const char *buf, void *client_data),
217 char buf[21]; /* enough characters to 2^64 */
218 sprintf(buf, "%d", val);
219 (*pr)(buf, client_data);
220 (*pr)(" ", client_data);
224 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
225 void (*pr)(const char *buf, void *client_data),
229 int distance; /* to be filled in later depending on unit */
230 int distance_defined = 0;
232 int proxrel = 2; /* less than or equal */
233 int unit = 2; /* word */
236 char *name = mods->u.st.index;
237 char *term = mods->u.st.term;
238 char *relation = mods->u.st.relation;
240 if (!strcmp(name, "distance")) {
241 distance = strtol(term, (char**) 0, 0);
242 distance_defined = 1;
243 if (!strcmp(relation, "=")) {
245 } else if (!strcmp(relation, ">")) {
247 } else if (!strcmp(relation, "<")) {
249 } else if (!strcmp(relation, ">=")) {
251 } else if (!strcmp(relation, "<=")) {
253 } else if (!strcmp(relation, "<>")) {
256 ct->error = 40; /* Unsupported proximity relation */
257 ct->addinfo = xstrdup(relation);
260 } else if (!strcmp(name, "ordered")) {
262 } else if (!strcmp(name, "unordered")) {
264 } else if (!strcmp(name, "unit")) {
265 if (!strcmp(term, "word")) {
267 } else if (!strcmp(term, "sentence")) {
269 } else if (!strcmp(term, "paragraph")) {
271 } else if (!strcmp(term, "element")) {
274 ct->error = 42; /* Unsupported proximity unit */
275 ct->addinfo = xstrdup(term);
279 ct->error = 46; /* Unsupported boolean modifier */
280 ct->addinfo = xstrdup(name);
284 mods = mods->u.st.modifiers;
287 if (!distance_defined)
288 distance = (unit == 2) ? 1 : 0;
290 cql_pr_int(exclusion, pr, client_data);
291 cql_pr_int(distance, pr, client_data);
292 cql_pr_int(ordered, pr, client_data);
293 cql_pr_int(proxrel, pr, client_data);
294 (*pr)("k ", client_data);
295 cql_pr_int(unit, pr, client_data);
300 /* Returns location of first wildcard character in the `length'
301 * characters starting at `term', or a null pointer of there are
302 * none -- like memchr().
304 static const char *wcchar(const char *term, int length)
306 const char *best = 0;
310 for (whichp = "*?"; *whichp != '\0'; whichp++) {
311 current = (const char *) memchr(term, *whichp, length);
312 if (current != 0 && (best == 0 || current < best))
320 void emit_term(cql_transform_t ct,
321 const char *term, int length,
322 void (*pr)(const char *buf, void *client_data),
328 if (length > 1 && term[0] == '^' && term[length-1] == '^')
330 cql_pr_attr(ct, "position", "firstAndLast", 0,
331 pr, client_data, 32);
335 else if (term[0] == '^')
337 cql_pr_attr(ct, "position", "first", 0,
338 pr, client_data, 32);
342 else if (term[length-1] == '^')
344 cql_pr_attr(ct, "position", "last", 0,
345 pr, client_data, 32);
350 cql_pr_attr(ct, "position", "any", 0,
351 pr, client_data, 32);
357 /* Check for well-known globbing patterns that represent
358 * simple truncation attributes as expected by, for example,
359 * Bath-compliant server. If we find such a pattern but
360 * there's no mapping for it, that's fine: we just use a
361 * general pattern-matching attribute.
363 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
364 wcchar(term+1, length-2) == 0 &&
365 cql_pr_attr(ct, "truncation", "both", 0,
366 pr, client_data, 0)) {
370 else if (term[0] == '*' &&
371 wcchar(term+1, length-1) == 0 &&
372 cql_pr_attr(ct, "truncation", "left", 0,
373 pr, client_data, 0)) {
377 else if (term[length-1] == '*' &&
378 wcchar(term, length-1) == 0 &&
379 cql_pr_attr(ct, "truncation", "right", 0,
380 pr, client_data, 0)) {
383 else if (wcchar(term, length))
385 /* We have one or more wildcard characters, but not in a
386 * way that can be dealt with using only the standard
387 * left-, right- and both-truncation attributes. We need
388 * to translate the pattern into a Z39.58-type pattern,
389 * which has been supported in BIB-1 since 1996. If
390 * there's no configuration element for "truncation.z3958"
391 * we indicate this as error 28 "Masking character not
396 cql_pr_attr(ct, "truncation", "z3958", 0,
397 pr, client_data, 28);
398 mem = (char *) xmalloc(length+1);
399 for (i = 0; i < length; i++) {
400 if (term[i] == '*') mem[i] = '?';
401 else if (term[i] == '?') mem[i] = '#';
402 else mem[i] = term[i];
408 /* No masking characters. Use "truncation.none" if given. */
409 cql_pr_attr(ct, "truncation", "none", 0,
414 (*pr)("\"", client_data);
415 for (i = 0; i<length; i++)
420 (*pr)(buf, client_data);
422 (*pr)("\" ", client_data);
425 void emit_wordlist(cql_transform_t ct,
427 void (*pr)(const char *buf, void *client_data),
431 const char *cp0 = cn->u.st.term;
433 const char *last_term = 0;
439 cp1 = strchr(cp0, ' ');
442 (*pr)("@", client_data);
443 (*pr)(op, client_data);
444 (*pr)(" ", client_data);
445 emit_term(ct, last_term, last_length, pr, client_data);
449 last_length = cp1 - cp0;
451 last_length = strlen(cp0);
455 emit_term(ct, last_term, last_length, pr, client_data);
458 void cql_transform_r(cql_transform_t ct,
460 void (*pr)(const char *buf, void *client_data),
464 struct cql_node *mods;
471 ns = cn->u.st.index_uri;
474 if (!strcmp(ns, cql_uri())
475 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
477 (*pr)("@set \"", client_data);
478 (*pr)(cn->u.st.term, client_data);
479 (*pr)("\" ", client_data);
491 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
492 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
493 cql_pr_attr(ct, "relation", "eq", "scr",
494 pr, client_data, 19);
495 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
496 cql_pr_attr(ct, "relation", "le", "scr",
497 pr, client_data, 19);
498 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
499 cql_pr_attr(ct, "relation", "ge", "scr",
500 pr, client_data, 19);
502 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
503 pr, client_data, 19);
504 if (cn->u.st.modifiers)
506 struct cql_node *mod = cn->u.st.modifiers;
507 for (; mod; mod = mod->u.st.modifiers)
509 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
510 pr, client_data, 20);
513 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
514 pr, client_data, 24);
516 cql_pr_attr_uri(ct, "index", ns,
517 cn->u.st.index, "serverChoice",
518 pr, client_data, 16);
520 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
522 emit_wordlist(ct, cn, pr, client_data, "and");
524 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
526 emit_wordlist(ct, cn, pr, client_data, "or");
530 emit_term(ct, cn->u.st.term, strlen(cn->u.st.term),
535 (*pr)("@", client_data);
536 (*pr)(cn->u.boolean.value, client_data);
537 (*pr)(" ", client_data);
538 mods = cn->u.boolean.modifiers;
539 if (!strcmp(cn->u.boolean.value, "prox")) {
540 if (!cql_pr_prox(ct, mods, pr, client_data))
543 /* Boolean modifiers other than on proximity not supported */
544 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
545 ct->addinfo = xstrdup(mods->u.st.index);
549 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
550 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
554 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
559 int cql_transform(cql_transform_t ct,
561 void (*pr)(const char *buf, void *client_data),
564 struct cql_prop_entry *e;
565 NMEM nmem = nmem_create();
572 for (e = ct->entry; e ; e = e->next)
574 if (!cql_strncmp(e->pattern, "set.", 4))
575 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
576 else if (!cql_strcmp(e->pattern, "set"))
577 cql_apply_prefix(nmem, cn, 0, e->value);
579 cql_transform_r (ct, cn, pr, client_data);
585 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
587 return cql_transform(ct, cn, cql_fputs, f);
590 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
593 struct cql_buf_write_info info;
599 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
601 /* Attempt to write past end of buffer. For some reason, this
602 SRW diagnostic is deprecated, but it's so perfect for our
603 purposes that it would be stupid not to use it. */
605 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
606 sprintf(numbuf, "%ld", (long) info.max);
607 ct->addinfo = xstrdup(numbuf);
611 info.buf[info.off] = '\0';
615 int cql_transform_error(cql_transform_t ct, const char **addinfo)
617 *addinfo = ct->addinfo;
623 * indent-tabs-mode: nil
625 * vim: shiftwidth=4 tabstop=8 expandtab