1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2011 Index Data
3 * See the file LICENSE for details.
7 * \brief Implements PQF parsing
18 #include <yaz/proto.h>
19 #include <yaz/oid_db.h>
20 #include <yaz/pquery.h>
22 struct yaz_pqf_parser {
23 const char *query_buf;
24 const char *query_ptr;
36 static Z_RPNStructure *rpn_structure(struct yaz_pqf_parser *li, ODR o,
37 int num_attr, int max_attr,
38 Odr_int *attr_list, char **attr_clist,
41 static Odr_oid *query_oid_getvalbyname(struct yaz_pqf_parser *li, ODR o)
45 if (li->lex_len >= sizeof(buf)-1)
47 memcpy(buf, li->lex_buf, li->lex_len);
48 buf[li->lex_len] = '\0';
49 return yaz_string_to_oid_odr(yaz_oid_std(), CLASS_ATTSET, buf, o);
52 static int compare_term(struct yaz_pqf_parser *li, const char *src,
55 size_t len=strlen(src);
57 if (li->lex_len == len+off && !memcmp(li->lex_buf+off, src, len-off))
62 static int query_token(struct yaz_pqf_parser *li)
65 const char *sep_match;
66 const char **qptr = &li->query_ptr;
73 if ((sep_match = strchr(li->left_sep, **qptr)))
75 sep_char = li->right_sep[sep_match - li->left_sep];
80 if (**qptr == li->escape_char && isdigit(((const unsigned char *) *qptr)[1]))
86 while (**qptr && **qptr != sep_char)
98 if (sep_char == ' ' &&
99 li->lex_len >= 1 && li->lex_buf[0] == li->escape_char)
101 if (compare_term(li, "and", 1))
103 if (compare_term(li, "or", 1))
105 if (compare_term(li, "not", 1))
107 if (compare_term(li, "attr", 1))
109 if (compare_term(li, "set", 1))
111 if (compare_term(li, "attrset", 1))
113 if (compare_term(li, "prox", 1))
115 if (compare_term(li, "term", 1))
121 static int lex(struct yaz_pqf_parser *li)
123 return li->query_look = query_token(li);
126 int escape_string(char *out_buf, const char *in, int len)
131 if (*in == '\\' && len > 0)
186 return out - out_buf;
189 int p_query_parse_attr(struct yaz_pqf_parser *li, ODR o,
190 int num_attr, Odr_int *attr_list,
191 char **attr_clist, Odr_oid **attr_set)
196 if (!(cp = strchr(li->lex_buf, '=')) ||
197 (size_t) (cp-li->lex_buf) > li->lex_len)
199 attr_set[num_attr] = query_oid_getvalbyname(li, o);
200 if (attr_set[num_attr] == 0)
202 li->error = YAZ_PQF_ERROR_ATTSET;
207 li->error = YAZ_PQF_ERROR_MISSING;
210 if (!(cp = strchr(li->lex_buf, '=')))
212 li->error = YAZ_PQF_ERROR_BADATTR;
219 attr_set[num_attr] = attr_set[num_attr-1];
221 attr_set[num_attr] = 0;
223 if (*li->lex_buf < '0' || *li->lex_buf > '9')
225 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
228 attr_list[2*num_attr] = odr_atoi(li->lex_buf);
231 /* inspect value .. and make it a integer if it appears to be */
232 for (i = cp - li->lex_buf; i < li->lex_len; i++)
233 if (li->lex_buf[i] < '0' || li->lex_buf[i] > '9')
235 int len = li->lex_len - (cp - li->lex_buf);
236 attr_list[2*num_attr+1] = 0;
237 attr_clist[num_attr] = (char *) odr_malloc(o, len+1);
238 len = escape_string(attr_clist[num_attr], cp, len);
239 attr_clist[num_attr][len] = '\0';
242 attr_list[2*num_attr+1] = odr_atoi(cp);
243 attr_clist[num_attr] = 0;
247 Z_AttributeList *get_attributeList(ODR o,
248 int num_attr, Odr_int *attr_list,
249 char **attr_clist, Odr_oid **attr_set)
253 Z_AttributeElement **elements;
254 Z_AttributeList *attributes= (Z_AttributeList *) odr_malloc(o, sizeof(*attributes));
255 attributes->num_attributes = num_attr;
257 attributes->attributes = (Z_AttributeElement**)odr_nullval();
260 elements = (Z_AttributeElement**) odr_malloc (o, num_attr * sizeof(*elements));
262 attr_tmp = (Odr_int *)odr_malloc(o, num_attr * 2 * sizeof(*attr_tmp));
263 memcpy(attr_tmp, attr_list, num_attr * 2 * sizeof(*attr_tmp));
264 for (i = num_attr; --i >= 0; )
267 for (j = i+1; j<num_attr; j++)
268 if (attr_tmp[2*j] == attr_tmp[2*i])
273 (Z_AttributeElement*)odr_malloc(o,sizeof(**elements));
274 elements[k]->attributeType = &attr_tmp[2*i];
275 elements[k]->attributeSet = attr_set[i];
279 elements[k]->which = Z_AttributeValue_complex;
280 elements[k]->value.complex = (Z_ComplexAttribute *)
281 odr_malloc(o, sizeof(Z_ComplexAttribute));
282 elements[k]->value.complex->num_list = 1;
283 elements[k]->value.complex->list =
284 (Z_StringOrNumeric **)
285 odr_malloc(o, 1 * sizeof(Z_StringOrNumeric *));
286 elements[k]->value.complex->list[0] =
287 (Z_StringOrNumeric *)
288 odr_malloc(o, sizeof(Z_StringOrNumeric));
289 elements[k]->value.complex->list[0]->which =
290 Z_StringOrNumeric_string;
291 elements[k]->value.complex->list[0]->u.string =
293 elements[k]->value.complex->semanticAction = 0;
294 elements[k]->value.complex->num_semanticAction = 0;
298 elements[k]->which = Z_AttributeValue_numeric;
299 elements[k]->value.numeric = &attr_tmp[2*i+1];
303 attributes->num_attributes = k;
304 attributes->attributes = elements;
308 static Z_AttributesPlusTerm *rpn_term_attributes(struct yaz_pqf_parser *li, ODR o, Z_AttributeList *attributes) {
309 Z_AttributesPlusTerm *zapt;
313 zapt = (Z_AttributesPlusTerm *)odr_malloc(o, sizeof(*zapt));
314 term = (Z_Term *)odr_malloc(o, sizeof(*term));
316 zapt->attributes = attributes;
318 term_octet = (Odr_oct *)odr_malloc(o, sizeof(*term_octet));
319 term_octet->buf = (unsigned char *)odr_malloc(o, 1 + li->lex_len);
320 term_octet->size = term_octet->len =
321 escape_string((char *) (term_octet->buf), li->lex_buf, li->lex_len);
322 term_octet->buf[term_octet->size] = 0; /* null terminate */
324 switch (li->term_type)
327 term->which = Z_Term_general;
328 term->u.general = term_octet;
330 case Z_Term_characterString:
331 term->which = Z_Term_characterString;
332 term->u.characterString = (char*) term_octet->buf;
333 /* null terminated above */
336 term->which = Z_Term_numeric;
337 term->u.numeric = odr_intdup(o, odr_atoi((const char*) term_octet->buf));
340 term->which = Z_Term_null;
341 term->u.null = odr_nullval();
343 case Z_Term_external:
344 term->which = Z_Term_external;
345 term->u.external = 0;
348 term->which = Z_Term_null;
349 term->u.null = odr_nullval();
356 static Z_AttributesPlusTerm *rpn_term(struct yaz_pqf_parser *li, ODR o,
357 int num_attr, Odr_int *attr_list,
358 char **attr_clist, Odr_oid **attr_set)
360 return rpn_term_attributes(li, o, get_attributeList(o, num_attr, attr_list, attr_clist, attr_set));
363 static Z_Operand *rpn_simple(struct yaz_pqf_parser *li, ODR o,
364 int num_attr, Odr_int *attr_list,
370 zo = (Z_Operand *)odr_malloc(o, sizeof(*zo));
371 switch (li->query_look)
374 zo->which = Z_Operand_APT;
375 if (!(zo->u.attributesPlusTerm =
376 rpn_term(li, o, num_attr, attr_list, attr_clist, attr_set)))
384 li->error = YAZ_PQF_ERROR_MISSING;
387 zo->which = Z_Operand_resultSetId;
388 zo->u.resultSetId = (char *)odr_malloc(o, li->lex_len+1);
389 memcpy(zo->u.resultSetId, li->lex_buf, li->lex_len);
390 zo->u.resultSetId[li->lex_len] = '\0';
394 /* we're only called if one of the above types are seens so
395 this shouldn't happen */
396 li->error = YAZ_PQF_ERROR_INTERNAL;
402 static Z_ProximityOperator *rpn_proximity(struct yaz_pqf_parser *li, ODR o)
404 Z_ProximityOperator *p = (Z_ProximityOperator *)odr_malloc(o, sizeof(*p));
408 li->error = YAZ_PQF_ERROR_MISSING;
411 if (*li->lex_buf == '1')
412 p->exclusion = odr_booldup(o, 1);
413 else if (*li->lex_buf == '0')
414 p->exclusion = odr_booldup(o, 0);
415 else if (*li->lex_buf == 'v' || *li->lex_buf == 'n')
419 li->error = YAZ_PQF_ERROR_PROXIMITY;
425 li->error = YAZ_PQF_ERROR_MISSING;
428 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
429 p->distance = odr_intdup(o, odr_atoi(li->lex_buf));
432 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
438 li->error = YAZ_PQF_ERROR_MISSING;
441 if (*li->lex_buf == '1')
442 p->ordered = odr_booldup(o, 1);
443 else if (*li->lex_buf == '0')
444 p->ordered = odr_booldup(o, 0);
447 li->error = YAZ_PQF_ERROR_PROXIMITY;
453 li->error = YAZ_PQF_ERROR_MISSING;
456 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
457 p->relationType = odr_intdup(o, odr_atoi(li->lex_buf));
460 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
466 li->error = YAZ_PQF_ERROR_MISSING;
469 if (*li->lex_buf == 'k')
470 p->which = Z_ProximityOperator_known;
471 else if (*li->lex_buf == 'p')
472 p->which = Z_ProximityOperator_private;
474 p->which = atoi(li->lex_buf);
476 if (p->which != Z_ProximityOperator_known
477 && p->which != Z_ProximityOperator_private)
479 li->error = YAZ_PQF_ERROR_PROXIMITY;
485 li->error = YAZ_PQF_ERROR_MISSING;
488 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
489 p->u.known = odr_intdup(o, odr_atoi(li->lex_buf));
492 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
498 static Z_Complex *rpn_complex(struct yaz_pqf_parser *li, ODR o,
499 int num_attr, int max_attr,
500 Odr_int *attr_list, char **attr_clist,
506 zc = (Z_Complex *)odr_malloc(o, sizeof(*zc));
507 zo = (Z_Operator *)odr_malloc(o, sizeof(*zo));
509 switch (li->query_look)
512 zo->which = Z_Operator_and;
513 zo->u.op_and = odr_nullval();
516 zo->which = Z_Operator_or;
517 zo->u.op_or = odr_nullval();
520 zo->which = Z_Operator_and_not;
521 zo->u.and_not = odr_nullval();
524 zo->which = Z_Operator_prox;
525 zo->u.prox = rpn_proximity(li, o);
530 /* we're only called if one of the above types are seens so
531 this shouldn't happen */
532 li->error = YAZ_PQF_ERROR_INTERNAL;
537 rpn_structure(li, o, num_attr, max_attr, attr_list,
538 attr_clist, attr_set)))
541 rpn_structure(li, o, num_attr, max_attr, attr_list,
542 attr_clist, attr_set)))
547 static void rpn_term_type(struct yaz_pqf_parser *li)
551 if (compare_term(li, "general", 0))
552 li->term_type = Z_Term_general;
553 else if (compare_term(li, "numeric", 0))
554 li->term_type = Z_Term_numeric;
555 else if (compare_term(li, "string", 0))
556 li->term_type = Z_Term_characterString;
557 else if (compare_term(li, "oid", 0))
558 li->term_type = Z_Term_oid;
559 else if (compare_term(li, "datetime", 0))
560 li->term_type = Z_Term_dateTime;
561 else if (compare_term(li, "null", 0))
562 li->term_type = Z_Term_null;
564 else if (compare_term(li, "range", 0))
566 /* prepare for external: range search .. */
567 li->term_type = Z_Term_external;
568 li->external_type = VAL_MULTISRCH2;
574 static Z_RPNStructure *rpn_structure(struct yaz_pqf_parser *li, ODR o,
575 int num_attr, int max_attr,
582 sz = (Z_RPNStructure *)odr_malloc(o, sizeof(*sz));
583 switch (li->query_look)
589 sz->which = Z_RPNStructure_complex;
590 if (!(sz->u.complex =
591 rpn_complex(li, o, num_attr, max_attr, attr_list,
592 attr_clist, attr_set)))
597 sz->which = Z_RPNStructure_simple;
599 rpn_simple(li, o, num_attr, attr_list,
600 attr_clist, attr_set)))
607 li->error = YAZ_PQF_ERROR_MISSING;
610 if (num_attr >= max_attr)
612 li->error = YAZ_PQF_ERROR_TOOMANY;
615 if (!p_query_parse_attr(li, o, num_attr, attr_list,
616 attr_clist, attr_set))
621 rpn_structure(li, o, num_attr, max_attr, attr_list,
622 attr_clist, attr_set);
627 rpn_structure(li, o, num_attr, max_attr, attr_list,
628 attr_clist, attr_set);
629 case 0: /* operator/operand expected! */
630 li->error = YAZ_PQF_ERROR_MISSING;
636 static Z_RPNQuery *p_query_rpn_mk(ODR o, struct yaz_pqf_parser *li)
639 Odr_int attr_array[1024];
640 char *attr_clist[512];
641 Odr_oid *attr_set[512];
642 Odr_oid *top_set = 0;
644 zq = (Z_RPNQuery *)odr_malloc(o, sizeof(*zq));
646 if (li->query_look == 'r')
649 top_set = query_oid_getvalbyname(li, o);
652 li->error = YAZ_PQF_ERROR_ATTSET;
659 top_set = odr_oiddup(o, yaz_oid_attset_bib_1);
662 zq->attributeSetId = top_set;
664 if (!zq->attributeSetId)
666 li->error = YAZ_PQF_ERROR_ATTSET;
670 if (!(zq->RPNStructure = rpn_structure(li, o, 0, 512,
671 attr_array, attr_clist, attr_set)))
675 li->error = YAZ_PQF_ERROR_EXTRA;
681 Z_RPNQuery *p_query_rpn(ODR o, const char *qbuf)
683 struct yaz_pqf_parser li;
687 li.right_sep = "}\"";
688 li.escape_char = '@';
689 li.term_type = Z_Term_general;
690 li.query_buf = li.query_ptr = qbuf;
692 return p_query_rpn_mk(o, &li);
696 static Z_AttributeList *p_query_scan_attributes_mk(struct yaz_pqf_parser *li,
698 Odr_oid **attributeSetP)
700 Odr_int attr_list[1024];
701 char *attr_clist[512];
702 Odr_oid *attr_set[512];
705 Odr_oid *top_set = 0;
708 if (li->query_look == 'r')
711 top_set = query_oid_getvalbyname(li, o);
714 li->error = YAZ_PQF_ERROR_ATTSET;
721 top_set = odr_oiddup(o, yaz_oid_attset_bib_1);
723 *attributeSetP = top_set;
727 if (li->query_look == 'l')
732 li->error = YAZ_PQF_ERROR_MISSING;
735 if (num_attr >= max_attr)
737 li->error = YAZ_PQF_ERROR_TOOMANY;
740 if (!p_query_parse_attr(li, o, num_attr, attr_list,
741 attr_clist, attr_set))
746 else if (li->query_look == 'y')
754 return get_attributeList(o, num_attr, attr_list, attr_clist, attr_set);
757 static Z_AttributesPlusTerm *p_query_scan_mk(struct yaz_pqf_parser *li,
759 Odr_oid **attributeSetP)
761 Z_AttributeList *attr_list = p_query_scan_attributes_mk(li, o, attributeSetP);
762 Z_AttributesPlusTerm *apt;
766 li->error = YAZ_PQF_ERROR_MISSING;
769 apt = rpn_term_attributes(li, o, attr_list);
773 if (li->query_look != 0)
775 li->error = YAZ_PQF_ERROR_EXTRA;
781 YAZ_PQF_Parser yaz_pqf_create(void)
783 YAZ_PQF_Parser p = (YAZ_PQF_Parser) xmalloc(sizeof(*p));
787 p->right_sep = "}\"";
788 p->escape_char = '@';
789 p->term_type = Z_Term_general;
794 void yaz_pqf_destroy(YAZ_PQF_Parser p)
799 Z_RPNQuery *yaz_pqf_parse(YAZ_PQF_Parser p, ODR o, const char *qbuf)
803 p->query_buf = p->query_ptr = qbuf;
805 return p_query_rpn_mk(o, p);
808 Z_AttributesPlusTerm *yaz_pqf_scan(YAZ_PQF_Parser p, ODR o,
809 Odr_oid **attributeSetP,
814 p->query_buf = p->query_ptr = qbuf;
816 return p_query_scan_mk(p, o, attributeSetP);
819 Z_AttributeList *yaz_pqf_scan_attribute_list(YAZ_PQF_Parser p, ODR o,
820 Odr_oid **attributeSetP,
825 p->query_buf = p->query_ptr = qbuf;
827 return p_query_scan_attributes_mk(p, o, attributeSetP);
830 static Z_FacetField* parse_facet(ODR odr, const char *facet, int length)
832 YAZ_PQF_Parser pqf_parser = yaz_pqf_create();
833 char *buffer = odr_strdupn(odr, facet, length);
834 Odr_oid *attributeSetId;
835 Z_FacetField *facet_field = 0;
836 Z_AttributeList *attribute_list =
837 yaz_pqf_scan_attribute_list(pqf_parser, odr, &attributeSetId, buffer);
841 facet_field = odr_malloc(odr, sizeof(*facet_field));
842 facet_field->attributes = attribute_list;
843 facet_field->num_terms = 0;
844 facet_field->terms = 0;
846 yaz_pqf_destroy(pqf_parser);
850 #define FACET_DElIMITER ','
852 static int scan_facet_argument(const char *arg) {
854 int length = strlen(arg);
856 for (index = 0; index < length; index++) {
857 if (arg[index] == FACET_DElIMITER)
864 * yax_pdg_parse_facet_list: Parses a comma-separated list of AttributeList(s) into a FacetList.
865 * It does not handle the optional facet term(s).
868 Z_FacetList *yaz_pqf_parse_facet_list(ODR odr, const char *facet) {
869 Z_FacetList *facet_list = 0;
870 Z_FacetField **elements;
872 int num_elements = scan_facet_argument(facet);
873 if (num_elements == 0)
875 facet_list = odr_malloc(odr, sizeof(*facet_list));
876 facet_list->num = num_elements;
877 elements = odr_malloc(odr, num_elements * sizeof(*elements));
878 facet_list->elements = elements;
879 for (index = 0; index < num_elements;) {
880 const char *pos = strchr(facet, FACET_DElIMITER);
882 pos = facet + strlen(facet);
883 elements[index] = parse_facet(odr, (const char *) facet, (pos - facet));
884 if (elements[index]) {
889 facet_list->num = num_elements;
898 int yaz_pqf_error(YAZ_PQF_Parser p, const char **msg, size_t *off)
902 case YAZ_PQF_ERROR_NONE:
903 *msg = "no error"; break;
904 case YAZ_PQF_ERROR_EXTRA:
905 *msg = "extra token"; break;
906 case YAZ_PQF_ERROR_MISSING:
907 *msg = "missing token"; break;
908 case YAZ_PQF_ERROR_ATTSET:
909 *msg = "unknown attribute set"; break;
910 case YAZ_PQF_ERROR_TOOMANY:
911 *msg = "too many attributes"; break;
912 case YAZ_PQF_ERROR_BADATTR:
913 *msg = "bad attribute specification"; break;
914 case YAZ_PQF_ERROR_INTERNAL:
915 *msg = "internal error"; break;
916 case YAZ_PQF_ERROR_PROXIMITY:
917 *msg = "proximity error"; break;
918 case YAZ_PQF_ERROR_BAD_INTEGER:
919 *msg = "bad integer"; break;
921 *msg = "unknown error"; break;
923 *off = p->query_ptr - p->query_buf;
929 * c-file-style: "Stroustrup"
930 * indent-tabs-mode: nil
932 * vim: shiftwidth=4 tabstop=8 expandtab