1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
7 * \brief Implements PQF parsing
17 #include <yaz/proto.h>
18 #include <yaz/oid_db.h>
19 #include <yaz/pquery.h>
21 struct yaz_pqf_parser {
22 const char *query_buf;
23 const char *query_ptr;
35 static Z_RPNStructure *rpn_structure(struct yaz_pqf_parser *li, ODR o,
36 int num_attr, int max_attr,
37 Odr_int *attr_list, char **attr_clist,
40 static Odr_oid *query_oid_getvalbyname(struct yaz_pqf_parser *li, ODR o)
44 if (li->lex_len >= sizeof(buf)-1)
46 memcpy(buf, li->lex_buf, li->lex_len);
47 buf[li->lex_len] = '\0';
48 return yaz_string_to_oid_odr(yaz_oid_std(), CLASS_ATTSET, buf, o);
51 static int compare_term(struct yaz_pqf_parser *li, const char *src,
54 size_t len=strlen(src);
56 if (li->lex_len == len+off && !memcmp(li->lex_buf+off, src, len-off))
61 static int query_token(struct yaz_pqf_parser *li)
64 const char *sep_match;
65 const char **qptr = &li->query_ptr;
72 if ((sep_match = strchr(li->left_sep, **qptr)))
74 sep_char = li->right_sep[sep_match - li->left_sep];
79 if (**qptr == li->escape_char && yaz_isdigit((*qptr)[1]))
85 while (**qptr && **qptr != sep_char)
87 if (**qptr == '\\' && (*qptr)[1])
97 if (sep_char == ' ' &&
98 li->lex_len >= 1 && li->lex_buf[0] == li->escape_char)
100 if (compare_term(li, "and", 1))
102 if (compare_term(li, "or", 1))
104 if (compare_term(li, "not", 1))
106 if (compare_term(li, "attr", 1))
108 if (compare_term(li, "set", 1))
110 if (compare_term(li, "attrset", 1))
112 if (compare_term(li, "prox", 1))
114 if (compare_term(li, "term", 1))
120 static int lex(struct yaz_pqf_parser *li)
122 return li->query_look = query_token(li);
125 static int escape_string(char *out_buf, const char *in, int len)
130 if (*in == '\\' && len > 0)
185 return out - out_buf;
188 static int p_query_parse_attr(struct yaz_pqf_parser *li, ODR o,
189 int num_attr, Odr_int *attr_list,
190 char **attr_clist, Odr_oid **attr_set)
195 if (!(cp = strchr(li->lex_buf, '=')) ||
196 (size_t) (cp-li->lex_buf) > li->lex_len)
198 attr_set[num_attr] = query_oid_getvalbyname(li, o);
199 if (attr_set[num_attr] == 0)
201 li->error = YAZ_PQF_ERROR_ATTSET;
206 li->error = YAZ_PQF_ERROR_MISSING;
209 if (!(cp = strchr(li->lex_buf, '=')))
211 li->error = YAZ_PQF_ERROR_BADATTR;
218 attr_set[num_attr] = attr_set[num_attr-1];
220 attr_set[num_attr] = 0;
222 if (*li->lex_buf < '0' || *li->lex_buf > '9')
224 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
227 attr_list[2*num_attr] = odr_atoi(li->lex_buf);
230 /* inspect value .. and make it a integer if it appears to be */
231 for (i = cp - li->lex_buf; i < li->lex_len; i++)
232 if (li->lex_buf[i] < '0' || li->lex_buf[i] > '9')
234 int len = li->lex_len - (cp - li->lex_buf);
235 attr_list[2*num_attr+1] = 0;
236 attr_clist[num_attr] = (char *) odr_malloc(o, len+1);
237 len = escape_string(attr_clist[num_attr], cp, len);
238 attr_clist[num_attr][len] = '\0';
241 attr_list[2*num_attr+1] = odr_atoi(cp);
242 attr_clist[num_attr] = 0;
246 static Z_AttributeList *get_attributeList(ODR o,
247 int num_attr, Odr_int *attr_list,
248 char **attr_clist, Odr_oid **attr_set)
252 Z_AttributeElement **elements;
253 Z_AttributeList *attributes= (Z_AttributeList *)
254 odr_malloc(o, sizeof(*attributes));
255 attributes->num_attributes = num_attr;
258 attributes->attributes = (Z_AttributeElement**)odr_nullval();
261 elements = (Z_AttributeElement**)
262 odr_malloc(o, num_attr * sizeof(*elements));
264 attr_tmp = (Odr_int *)odr_malloc(o, num_attr * 2 * sizeof(*attr_tmp));
265 memcpy(attr_tmp, attr_list, num_attr * 2 * sizeof(*attr_tmp));
266 for (i = num_attr; --i >= 0; )
269 for (j = i+1; j<num_attr; j++)
270 if (attr_tmp[2*j] == attr_tmp[2*i])
275 (Z_AttributeElement*)odr_malloc(o,sizeof(**elements));
276 elements[k]->attributeType = &attr_tmp[2*i];
277 elements[k]->attributeSet = attr_set[i];
281 elements[k]->which = Z_AttributeValue_complex;
282 elements[k]->value.complex = (Z_ComplexAttribute *)
283 odr_malloc(o, sizeof(Z_ComplexAttribute));
284 elements[k]->value.complex->num_list = 1;
285 elements[k]->value.complex->list =
286 (Z_StringOrNumeric **)
287 odr_malloc(o, 1 * sizeof(Z_StringOrNumeric *));
288 elements[k]->value.complex->list[0] =
289 (Z_StringOrNumeric *)
290 odr_malloc(o, sizeof(Z_StringOrNumeric));
291 elements[k]->value.complex->list[0]->which =
292 Z_StringOrNumeric_string;
293 elements[k]->value.complex->list[0]->u.string =
295 elements[k]->value.complex->semanticAction = 0;
296 elements[k]->value.complex->num_semanticAction = 0;
300 elements[k]->which = Z_AttributeValue_numeric;
301 elements[k]->value.numeric = &attr_tmp[2*i+1];
305 attributes->num_attributes = k;
306 attributes->attributes = elements;
310 Z_Term *z_Term_create(ODR o, int term_type, const char *buf, size_t len)
312 Z_Term *term = (Z_Term *)odr_malloc(o, sizeof(*term));
313 Odr_oct *term_octet = (Odr_oct *)odr_malloc(o, sizeof(*term_octet));
314 term_octet->buf = (char *)odr_malloc(o, 1 + len);
315 memcpy(term_octet->buf, buf, len);
316 term_octet->len = len;
318 term_octet->size = len;
320 term_octet->buf[term_octet->len] = 0; /* null terminate */
325 term->which = Z_Term_general;
326 term->u.general = term_octet;
328 case Z_Term_characterString:
329 term->which = Z_Term_characterString;
330 term->u.characterString = (char*) term_octet->buf;
331 /* null terminated above */
334 term->which = Z_Term_numeric;
335 term->u.numeric = odr_intdup(o, odr_atoi((const char*) term_octet->buf));
338 term->which = Z_Term_null;
339 term->u.null = odr_nullval();
341 case Z_Term_external:
342 term->which = Z_Term_external;
343 term->u.external = 0;
346 term->which = Z_Term_null;
347 term->u.null = odr_nullval();
353 static Z_AttributesPlusTerm *rpn_term_attributes(
354 struct yaz_pqf_parser *li, ODR o, Z_AttributeList *attributes)
356 char *es_str = odr_malloc(o, li->lex_len+1);
357 int es_len = escape_string(es_str, li->lex_buf, li->lex_len);
358 Z_Term *term = z_Term_create(o, li->term_type, es_str, es_len);
359 Z_AttributesPlusTerm *zapt = (Z_AttributesPlusTerm *)
360 odr_malloc(o, sizeof(*zapt));
363 zapt->attributes = attributes;
367 static Z_AttributesPlusTerm *rpn_term(struct yaz_pqf_parser *li, ODR o,
368 int num_attr, Odr_int *attr_list,
369 char **attr_clist, Odr_oid **attr_set)
371 return rpn_term_attributes(li, o, get_attributeList(o, num_attr, attr_list, attr_clist, attr_set));
374 static Z_Operand *rpn_simple(struct yaz_pqf_parser *li, ODR o,
375 int num_attr, Odr_int *attr_list,
381 zo = (Z_Operand *)odr_malloc(o, sizeof(*zo));
382 switch (li->query_look)
385 zo->which = Z_Operand_APT;
386 if (!(zo->u.attributesPlusTerm =
387 rpn_term(li, o, num_attr, attr_list, attr_clist, attr_set)))
395 li->error = YAZ_PQF_ERROR_MISSING;
398 zo->which = Z_Operand_resultSetId;
399 zo->u.resultSetId = (char *)odr_malloc(o, li->lex_len+1);
400 memcpy(zo->u.resultSetId, li->lex_buf, li->lex_len);
401 zo->u.resultSetId[li->lex_len] = '\0';
405 /* we're only called if one of the above types are seens so
406 this shouldn't happen */
407 li->error = YAZ_PQF_ERROR_INTERNAL;
413 static Z_ProximityOperator *rpn_proximity(struct yaz_pqf_parser *li, ODR o)
415 Z_ProximityOperator *p = (Z_ProximityOperator *)odr_malloc(o, sizeof(*p));
419 li->error = YAZ_PQF_ERROR_MISSING;
422 if (*li->lex_buf == '1')
423 p->exclusion = odr_booldup(o, 1);
424 else if (*li->lex_buf == '0')
425 p->exclusion = odr_booldup(o, 0);
426 else if (*li->lex_buf == 'v' || *li->lex_buf == 'n')
430 li->error = YAZ_PQF_ERROR_PROXIMITY;
436 li->error = YAZ_PQF_ERROR_MISSING;
439 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
440 p->distance = odr_intdup(o, odr_atoi(li->lex_buf));
443 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
449 li->error = YAZ_PQF_ERROR_MISSING;
452 if (*li->lex_buf == '1')
453 p->ordered = odr_booldup(o, 1);
454 else if (*li->lex_buf == '0')
455 p->ordered = odr_booldup(o, 0);
458 li->error = YAZ_PQF_ERROR_PROXIMITY;
464 li->error = YAZ_PQF_ERROR_MISSING;
467 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
468 p->relationType = odr_intdup(o, odr_atoi(li->lex_buf));
471 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
477 li->error = YAZ_PQF_ERROR_MISSING;
480 if (*li->lex_buf == 'k')
481 p->which = Z_ProximityOperator_known;
482 else if (*li->lex_buf == 'p')
483 p->which = Z_ProximityOperator_private;
485 p->which = atoi(li->lex_buf);
487 if (p->which != Z_ProximityOperator_known
488 && p->which != Z_ProximityOperator_private)
490 li->error = YAZ_PQF_ERROR_PROXIMITY;
496 li->error = YAZ_PQF_ERROR_MISSING;
499 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
500 p->u.known = odr_intdup(o, odr_atoi(li->lex_buf));
503 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
509 static Z_Complex *rpn_complex(struct yaz_pqf_parser *li, ODR o,
510 int num_attr, int max_attr,
511 Odr_int *attr_list, char **attr_clist,
517 zc = (Z_Complex *)odr_malloc(o, sizeof(*zc));
518 zo = (Z_Operator *)odr_malloc(o, sizeof(*zo));
520 switch (li->query_look)
523 zo->which = Z_Operator_and;
524 zo->u.op_and = odr_nullval();
527 zo->which = Z_Operator_or;
528 zo->u.op_or = odr_nullval();
531 zo->which = Z_Operator_and_not;
532 zo->u.and_not = odr_nullval();
535 zo->which = Z_Operator_prox;
536 zo->u.prox = rpn_proximity(li, o);
541 /* we're only called if one of the above types are seens so
542 this shouldn't happen */
543 li->error = YAZ_PQF_ERROR_INTERNAL;
548 rpn_structure(li, o, num_attr, max_attr, attr_list,
549 attr_clist, attr_set)))
552 rpn_structure(li, o, num_attr, max_attr, attr_list,
553 attr_clist, attr_set)))
558 static void rpn_term_type(struct yaz_pqf_parser *li)
562 if (compare_term(li, "general", 0))
563 li->term_type = Z_Term_general;
564 else if (compare_term(li, "numeric", 0))
565 li->term_type = Z_Term_numeric;
566 else if (compare_term(li, "string", 0))
567 li->term_type = Z_Term_characterString;
568 else if (compare_term(li, "oid", 0))
569 li->term_type = Z_Term_oid;
570 else if (compare_term(li, "datetime", 0))
571 li->term_type = Z_Term_dateTime;
572 else if (compare_term(li, "null", 0))
573 li->term_type = Z_Term_null;
575 else if (compare_term(li, "range", 0))
577 /* prepare for external: range search .. */
578 li->term_type = Z_Term_external;
579 li->external_type = VAL_MULTISRCH2;
585 static Z_RPNStructure *rpn_structure(struct yaz_pqf_parser *li, ODR o,
586 int num_attr, int max_attr,
593 sz = (Z_RPNStructure *)odr_malloc(o, sizeof(*sz));
594 switch (li->query_look)
600 sz->which = Z_RPNStructure_complex;
601 if (!(sz->u.complex =
602 rpn_complex(li, o, num_attr, max_attr, attr_list,
603 attr_clist, attr_set)))
608 sz->which = Z_RPNStructure_simple;
610 rpn_simple(li, o, num_attr, attr_list,
611 attr_clist, attr_set)))
618 li->error = YAZ_PQF_ERROR_MISSING;
621 if (num_attr >= max_attr)
623 li->error = YAZ_PQF_ERROR_TOOMANY;
626 if (!p_query_parse_attr(li, o, num_attr, attr_list,
627 attr_clist, attr_set))
632 rpn_structure(li, o, num_attr, max_attr, attr_list,
633 attr_clist, attr_set);
638 rpn_structure(li, o, num_attr, max_attr, attr_list,
639 attr_clist, attr_set);
640 case 0: /* operator/operand expected! */
641 li->error = YAZ_PQF_ERROR_MISSING;
647 static Z_RPNQuery *p_query_rpn_mk(ODR o, struct yaz_pqf_parser *li)
650 Odr_int attr_array[1024];
651 char *attr_clist[512];
652 Odr_oid *attr_set[512];
653 Odr_oid *top_set = 0;
655 zq = (Z_RPNQuery *)odr_malloc(o, sizeof(*zq));
657 if (li->query_look == 'r')
660 top_set = query_oid_getvalbyname(li, o);
663 li->error = YAZ_PQF_ERROR_ATTSET;
670 top_set = odr_oiddup(o, yaz_oid_attset_bib_1);
673 zq->attributeSetId = top_set;
675 if (!zq->attributeSetId)
677 li->error = YAZ_PQF_ERROR_ATTSET;
681 if (!(zq->RPNStructure = rpn_structure(li, o, 0, 512,
682 attr_array, attr_clist, attr_set)))
686 li->error = YAZ_PQF_ERROR_EXTRA;
692 static void pqf_parser_begin(struct yaz_pqf_parser *li, const char *buf)
694 li->query_buf = li->query_ptr = buf;
698 Z_RPNQuery *p_query_rpn(ODR o, const char *qbuf)
700 struct yaz_pqf_parser li;
704 li.right_sep = "}\"";
705 li.escape_char = '@';
706 li.term_type = Z_Term_general;
708 pqf_parser_begin(&li, qbuf);
709 return p_query_rpn_mk(o, &li);
712 static Z_AttributeList *p_query_scan_attributes_mk(struct yaz_pqf_parser *li,
714 Odr_oid **attributeSetP)
716 Odr_int attr_list[1024];
717 char *attr_clist[512];
718 Odr_oid *attr_set[512];
721 Odr_oid *top_set = 0;
724 if (li->query_look == 'r')
727 top_set = query_oid_getvalbyname(li, o);
730 li->error = YAZ_PQF_ERROR_ATTSET;
737 top_set = odr_oiddup(o, yaz_oid_attset_bib_1);
739 *attributeSetP = top_set;
743 if (li->query_look == 'l')
748 li->error = YAZ_PQF_ERROR_MISSING;
751 if (num_attr >= max_attr)
753 li->error = YAZ_PQF_ERROR_TOOMANY;
756 if (!p_query_parse_attr(li, o, num_attr, attr_list,
757 attr_clist, attr_set))
762 else if (li->query_look == 'y')
770 return get_attributeList(o, num_attr, attr_list, attr_clist, attr_set);
773 static Z_AttributesPlusTerm *p_query_scan_mk(struct yaz_pqf_parser *li,
775 Odr_oid **attributeSetP)
777 Z_AttributeList *attr_list = p_query_scan_attributes_mk(li, o, attributeSetP);
778 Z_AttributesPlusTerm *apt;
782 li->error = YAZ_PQF_ERROR_MISSING;
785 apt = rpn_term_attributes(li, o, attr_list);
789 if (li->query_look != 0)
791 li->error = YAZ_PQF_ERROR_EXTRA;
797 YAZ_PQF_Parser yaz_pqf_create(void)
799 YAZ_PQF_Parser p = (YAZ_PQF_Parser) xmalloc(sizeof(*p));
803 p->right_sep = "}\"";
804 p->escape_char = '@';
805 p->term_type = Z_Term_general;
810 void yaz_pqf_destroy(YAZ_PQF_Parser p)
815 Z_RPNQuery *yaz_pqf_parse(YAZ_PQF_Parser p, ODR o, const char *qbuf)
819 pqf_parser_begin(p, qbuf);
820 return p_query_rpn_mk(o, p);
823 Z_AttributesPlusTerm *yaz_pqf_scan(YAZ_PQF_Parser p, ODR o,
824 Odr_oid **attributeSetP,
829 pqf_parser_begin(p, qbuf);
830 return p_query_scan_mk(p, o, attributeSetP);
833 Z_AttributeList *yaz_pqf_scan_attribute_list(YAZ_PQF_Parser p, ODR o,
834 Odr_oid **attributeSetP,
839 pqf_parser_begin(p, qbuf);
840 return p_query_scan_attributes_mk(p, o, attributeSetP);
843 static Z_FacetField* parse_facet(ODR odr, const char *facet)
845 YAZ_PQF_Parser pqf_parser = yaz_pqf_create();
846 struct yaz_pqf_parser *li = pqf_parser;
847 Odr_oid *attributeSetId;
848 Z_FacetField *facet_field = 0;
849 Z_AttributeList *attribute_list;
851 pqf_parser_begin(pqf_parser, facet);
852 attribute_list = p_query_scan_attributes_mk(li, odr, &attributeSetId);
855 facet_field = (Z_FacetField *) odr_malloc(odr, sizeof(*facet_field));
856 facet_field->attributes = attribute_list;
857 facet_field->num_terms = 0;
858 facet_field->terms = odr_malloc(odr, 10 * sizeof(*facet_field->terms));
859 while (li->query_look == 't')
861 if (facet_field->num_terms < 10)
863 char *es_str = odr_malloc(odr, li->lex_len+1);
864 int es_len = escape_string(es_str, li->lex_buf, li->lex_len);
865 Z_Term *term = z_Term_create(odr, li->term_type, es_str, es_len);
867 facet_field->terms[facet_field->num_terms] =
868 (Z_FacetTerm *) odr_malloc(odr, sizeof(Z_FacetTerm));
869 facet_field->terms[facet_field->num_terms]->term = term;
870 facet_field->terms[facet_field->num_terms]->count =
872 facet_field->num_terms++;
877 yaz_pqf_destroy(pqf_parser);
881 Z_FacetList *yaz_pqf_parse_facet_list(ODR o, const char *qbuf)
886 nmem_strsplit(odr_getmem(o), ",", qbuf, &darray, &num);
890 Z_FacetList *fl = (Z_FacetList*) odr_malloc(o, sizeof(*fl));
892 fl->elements = (Z_FacetField **)
893 odr_malloc(o, num * sizeof(*fl->elements));
894 for (i = 0; i < num; i++)
896 fl->elements[i] = parse_facet(o, darray[i]);
897 if (!fl->elements[i])
906 int yaz_pqf_error(YAZ_PQF_Parser p, const char **msg, size_t *off)
910 case YAZ_PQF_ERROR_NONE:
911 *msg = "no error"; break;
912 case YAZ_PQF_ERROR_EXTRA:
913 *msg = "extra token"; break;
914 case YAZ_PQF_ERROR_MISSING:
915 *msg = "missing token"; break;
916 case YAZ_PQF_ERROR_ATTSET:
917 *msg = "unknown attribute set"; break;
918 case YAZ_PQF_ERROR_TOOMANY:
919 *msg = "too many attributes"; break;
920 case YAZ_PQF_ERROR_BADATTR:
921 *msg = "bad attribute specification"; break;
922 case YAZ_PQF_ERROR_INTERNAL:
923 *msg = "internal error"; break;
924 case YAZ_PQF_ERROR_PROXIMITY:
925 *msg = "proximity error"; break;
926 case YAZ_PQF_ERROR_BAD_INTEGER:
927 *msg = "bad integer"; break;
929 *msg = "unknown error"; break;
931 *off = p->query_ptr - p->query_buf;
937 * c-file-style: "Stroustrup"
938 * indent-tabs-mode: nil
940 * vim: shiftwidth=4 tabstop=8 expandtab