1 /* $Id: rpnsearch.c,v 1.25 2007-12-03 13:04:04 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, const char *ct2, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
170 if (ct2 && strchr(ct2, *s0))
173 map = zebra_maps_input(zm, &s1, strlen(s1), first);
174 if (**map != *CHR_SPACE)
183 static void esc_str(char *out_buf, size_t out_size,
184 const char *in_buf, int in_size)
190 assert(out_size > 20);
192 for (k = 0; k<in_size; k++)
194 int c = in_buf[k] & 0xff;
196 if (c < 32 || c > 126)
200 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
201 if (strlen(out_buf) > out_size-20)
203 strcat(out_buf, "..");
209 #define REGEX_CHARS " []()|.*+?!"
211 static void add_non_space(const char *start, const char *end,
213 char *dst_term, int *dst_ptr,
214 const char **map, int q_map_match)
216 size_t sz = end - start;
217 memcpy(dst_term + *dst_ptr, start, sz);
223 if (strchr(REGEX_CHARS, *start))
224 wrbuf_putc(term_dict, '\\');
225 wrbuf_putc(term_dict, *start);
232 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
234 wrbuf_puts(term_dict, map[0]);
239 static int term_100_icu(zebra_map_t zm,
240 const char **src, WRBUF term_dict, int space_split,
244 const char *res_buf = 0;
246 int r = zebra_map_tokenize(zm, *src, strlen(*src),
249 yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
251 strcat(dst_term, *src);
252 *src += strlen(*src);
257 for (i = 0; i < res_len; i++)
259 if (strchr(REGEX_CHARS, res_buf[i]))
260 wrbuf_putc(term_dict, '\\');
262 wrbuf_putc(term_dict, 1);
263 wrbuf_putc(term_dict, res_buf[i]);
265 r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
270 /* term_100: handle term, where trunc = none(no operators at all) */
271 static int term_100(zebra_map_t zm,
272 const char **src, WRBUF term_dict, int space_split,
280 const char *space_start = 0;
281 const char *space_end = 0;
283 if (zebra_maps_is_icu(zm))
284 return term_100_icu(zm, src, term_dict, space_split, dst_term);
286 if (!term_pre(zm, src, NULL, NULL, !space_split))
293 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
296 if (**map == *CHR_SPACE)
299 else /* complete subfield only. */
301 if (**map == *CHR_SPACE)
302 { /* save space mapping for later .. */
307 else if (space_start)
308 { /* reload last space */
309 while (space_start < space_end)
311 if (strchr(REGEX_CHARS, *space_start))
312 wrbuf_putc(term_dict, '\\');
313 dst_term[j++] = *space_start;
314 wrbuf_putc(term_dict, *space_start);
319 space_start = space_end = 0;
324 add_non_space(s1, s0, term_dict, dst_term, &j,
332 /* term_101: handle term, where trunc = Process # */
333 static int term_101(zebra_map_t zm,
334 const char **src, WRBUF term_dict, int space_split,
342 if (!term_pre(zm, src, "#", "#", !space_split))
350 wrbuf_puts(term_dict, ".*");
351 dst_term[j++] = *s0++;
357 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
358 if (space_split && **map == *CHR_SPACE)
362 add_non_space(s1, s0, term_dict, dst_term, &j,
366 dst_term[j++] = '\0';
371 /* term_103: handle term, where trunc = re-2 (regular expressions) */
372 static int term_103(zebra_map_t zm, const char **src,
373 WRBUF term_dict, int *errors, int space_split,
381 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
384 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
385 isdigit(((const unsigned char *)s0)[1]))
387 *errors = s0[1] - '0';
394 if (strchr("^\\()[].*+?|-", *s0))
397 wrbuf_putc(term_dict, *s0);
405 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
406 if (space_split && **map == *CHR_SPACE)
410 add_non_space(s1, s0, term_dict, dst_term, &j,
420 /* term_103: handle term, where trunc = re-1 (regular expressions) */
421 static int term_102(zebra_map_t zm, const char **src,
422 WRBUF term_dict, int space_split, char *dst_term)
424 return term_103(zm, src, term_dict, NULL, space_split, dst_term);
428 /* term_104: handle term, process # and ! */
429 static int term_104(zebra_map_t zm, const char **src,
430 WRBUF term_dict, int space_split, char *dst_term)
437 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
445 dst_term[j++] = *s0++;
446 if (*s0 >= '0' && *s0 <= '9')
449 while (*s0 >= '0' && *s0 <= '9')
451 limit = limit * 10 + (*s0 - '0');
452 dst_term[j++] = *s0++;
458 wrbuf_puts(term_dict, ".?");
463 wrbuf_puts(term_dict, ".*");
469 wrbuf_puts(term_dict, ".*");
470 dst_term[j++] = *s0++;
475 wrbuf_puts(term_dict, ".");
476 dst_term[j++] = *s0++;
482 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
483 if (space_split && **map == *CHR_SPACE)
487 add_non_space(s1, s0, term_dict, dst_term, &j,
491 dst_term[j++] = '\0';
496 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
497 static int term_105(zebra_map_t zm, const char **src,
498 WRBUF term_dict, int space_split,
499 char *dst_term, int right_truncate)
506 if (!term_pre(zm, src, "*!", "*!", !space_split))
514 wrbuf_puts(term_dict, ".*");
515 dst_term[j++] = *s0++;
520 wrbuf_putc(term_dict, '.');
521 dst_term[j++] = *s0++;
527 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
528 if (space_split && **map == *CHR_SPACE)
532 add_non_space(s1, s0, term_dict, dst_term, &j,
537 wrbuf_puts(term_dict, ".*");
538 dst_term[j++] = '\0';
544 /* gen_regular_rel - generate regular expression from relation
545 * val: border value (inclusive)
546 * islt: 1 if <=; 0 if >=.
548 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
550 char dst_buf[20*5*20]; /* assuming enough for expansion */
557 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
561 strcpy(dst, "(-[0-9]+|(");
569 strcpy(dst, "([0-9]+|-(");
580 sprintf(numstr, "%d", val);
581 for (w = strlen(numstr); --w >= 0; pos++)
600 strcpy(dst + dst_p, numstr);
601 dst_p = strlen(dst) - pos - 1;
629 for (i = 0; i<pos; i++)
642 /* match everything less than 10^(pos-1) */
644 for (i = 1; i<pos; i++)
645 strcat(dst, "[0-9]?");
649 /* match everything greater than 10^pos */
650 for (i = 0; i <= pos; i++)
651 strcat(dst, "[0-9]");
652 strcat(dst, "[0-9]*");
655 wrbuf_puts(term_dict, dst);
658 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
660 const char *src = wrbuf_cstr(wsrc);
661 if (src[*indx] == '\\')
663 wrbuf_putc(term_p, src[*indx]);
666 wrbuf_putc(term_p, src[*indx]);
671 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
672 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
673 * >= abc ([b-].*|a[c-].*|ab[c-].*)
674 * ([^-a].*|a[^-b].*|ab[c-].*)
675 * < abc ([-0].*|a[-a].*|ab[-b].*)
676 * ([^a-].*|a[^b-].*|ab[^c-].*)
677 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
678 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
680 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
681 const char **term_sub, WRBUF term_dict,
682 const Odr_oid *attributeSet,
683 zebra_map_t zm, int space_split, char *term_dst,
689 WRBUF term_component = wrbuf_alloc();
691 attr_init_APT(&relation, zapt, 2);
692 relation_value = attr_find(&relation, NULL);
695 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
696 switch (relation_value)
699 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
701 wrbuf_destroy(term_component);
704 yaz_log(log_level_rpn, "Relation <");
706 wrbuf_putc(term_dict, '(');
707 for (i = 0; i < wrbuf_len(term_component); )
712 wrbuf_putc(term_dict, '|');
714 string_rel_add_char(term_dict, term_component, &j);
716 wrbuf_putc(term_dict, '[');
718 wrbuf_putc(term_dict, '^');
720 wrbuf_putc(term_dict, 1);
721 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
723 string_rel_add_char(term_dict, term_component, &i);
724 wrbuf_putc(term_dict, '-');
726 wrbuf_putc(term_dict, ']');
727 wrbuf_putc(term_dict, '.');
728 wrbuf_putc(term_dict, '*');
730 wrbuf_putc(term_dict, ')');
733 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
735 wrbuf_destroy(term_component);
738 yaz_log(log_level_rpn, "Relation <=");
740 wrbuf_putc(term_dict, '(');
741 for (i = 0; i < wrbuf_len(term_component); )
746 string_rel_add_char(term_dict, term_component, &j);
747 wrbuf_putc(term_dict, '[');
749 wrbuf_putc(term_dict, '^');
751 wrbuf_putc(term_dict, 1);
752 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
754 string_rel_add_char(term_dict, term_component, &i);
755 wrbuf_putc(term_dict, '-');
757 wrbuf_putc(term_dict, ']');
758 wrbuf_putc(term_dict, '.');
759 wrbuf_putc(term_dict, '*');
761 wrbuf_putc(term_dict, '|');
763 for (i = 0; i < wrbuf_len(term_component); )
764 string_rel_add_char(term_dict, term_component, &i);
765 wrbuf_putc(term_dict, ')');
768 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
770 wrbuf_destroy(term_component);
773 yaz_log(log_level_rpn, "Relation >");
775 wrbuf_putc(term_dict, '(');
776 for (i = 0; i < wrbuf_len(term_component); )
781 string_rel_add_char(term_dict, term_component, &j);
782 wrbuf_putc(term_dict, '[');
784 wrbuf_putc(term_dict, '^');
785 wrbuf_putc(term_dict, '-');
786 string_rel_add_char(term_dict, term_component, &i);
788 wrbuf_putc(term_dict, ']');
789 wrbuf_putc(term_dict, '.');
790 wrbuf_putc(term_dict, '*');
792 wrbuf_putc(term_dict, '|');
794 for (i = 0; i < wrbuf_len(term_component); )
795 string_rel_add_char(term_dict, term_component, &i);
796 wrbuf_putc(term_dict, '.');
797 wrbuf_putc(term_dict, '+');
798 wrbuf_putc(term_dict, ')');
801 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
803 wrbuf_destroy(term_component);
806 yaz_log(log_level_rpn, "Relation >=");
808 wrbuf_putc(term_dict, '(');
809 for (i = 0; i < wrbuf_len(term_component); )
814 wrbuf_putc(term_dict, '|');
816 string_rel_add_char(term_dict, term_component, &j);
817 wrbuf_putc(term_dict, '[');
819 if (i < wrbuf_len(term_component)-1)
821 wrbuf_putc(term_dict, '^');
822 wrbuf_putc(term_dict, '-');
823 string_rel_add_char(term_dict, term_component, &i);
827 string_rel_add_char(term_dict, term_component, &i);
828 wrbuf_putc(term_dict, '-');
830 wrbuf_putc(term_dict, ']');
831 wrbuf_putc(term_dict, '.');
832 wrbuf_putc(term_dict, '*');
834 wrbuf_putc(term_dict, ')');
841 yaz_log(log_level_rpn, "Relation =");
842 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
844 wrbuf_destroy(term_component);
847 wrbuf_puts(term_dict, "(");
848 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
849 wrbuf_puts(term_dict, ")");
852 yaz_log(log_level_rpn, "Relation always matches");
853 /* skip to end of term (we don't care what it is) */
854 while (**term_sub != '\0')
858 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
859 wrbuf_destroy(term_component);
862 wrbuf_destroy(term_component);
866 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
867 const char **term_sub,
869 const Odr_oid *attributeSet, NMEM stream,
870 struct grep_info *grep_info,
871 const char *index_type, int complete_flag,
873 const char *xpath_use,
874 struct ord_list **ol);
876 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
877 Z_AttributesPlusTerm *zapt,
878 zint *hits_limit_value,
879 const char **term_ref_id_str,
882 AttrType term_ref_id_attr;
883 AttrType hits_limit_attr;
886 attr_init_APT(&hits_limit_attr, zapt, 11);
887 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
889 attr_init_APT(&term_ref_id_attr, zapt, 10);
890 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
891 if (term_ref_id_int >= 0)
893 char *res = nmem_malloc(nmem, 20);
894 sprintf(res, "%d", term_ref_id_int);
895 *term_ref_id_str = res;
898 /* no limit given ? */
899 if (*hits_limit_value == -1)
901 if (*term_ref_id_str)
903 /* use global if term_ref is present */
904 *hits_limit_value = zh->approx_limit;
908 /* no counting if term_ref is not present */
909 *hits_limit_value = 0;
912 else if (*hits_limit_value == 0)
914 /* 0 is the same as global limit */
915 *hits_limit_value = zh->approx_limit;
917 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
918 *term_ref_id_str ? *term_ref_id_str : "none",
923 static ZEBRA_RES term_trunc(ZebraHandle zh,
924 Z_AttributesPlusTerm *zapt,
925 const char **term_sub,
926 const Odr_oid *attributeSet, NMEM stream,
927 struct grep_info *grep_info,
928 const char *index_type, int complete_flag,
930 const char *rank_type,
931 const char *xpath_use,
934 struct rset_key_control *kc)
938 zint hits_limit_value;
939 const char *term_ref_id_str = 0;
940 WRBUF term_dict = wrbuf_alloc();
943 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
945 grep_info->isam_p_indx = 0;
946 res = string_term(zh, zapt, term_sub, term_dict,
947 attributeSet, stream, grep_info,
948 index_type, complete_flag,
949 term_dst, xpath_use, &ol);
950 wrbuf_destroy(term_dict);
953 if (!*term_sub) /* no more terms ? */
955 yaz_log(log_level_rpn, "term: %s", term_dst);
956 *rset = rset_trunc(zh, grep_info->isam_p_buf,
957 grep_info->isam_p_indx, term_dst,
958 strlen(term_dst), rank_type, 1 /* preserve pos */,
959 zapt->term->which, rset_nmem,
960 kc, kc->scope, ol, index_type, hits_limit_value,
967 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
968 const char **term_sub,
970 const Odr_oid *attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 const char *index_type, int complete_flag,
974 const char *xpath_use,
975 struct ord_list **ol)
979 int truncation_value;
981 struct rpn_char_map_info rcmi;
983 int space_split = complete_flag ? 0 : 1;
986 int max_pos, prefix_len = 0;
990 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
992 *ol = ord_list_create(stream);
994 rpn_char_map_prepare(zh->reg, zm, &rcmi);
995 attr_init_APT(&truncation, zapt, 5);
996 truncation_value = attr_find(&truncation, NULL);
997 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
999 termp = *term_sub; /* start of term for each database */
1001 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1002 attributeSet, &ord) != ZEBRA_OK)
1008 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1010 *ol = ord_list_append(stream, *ol, ord);
1011 ord_len = key_SU_encode(ord, ord_buf);
1013 wrbuf_putc(term_dict, '(');
1015 for (i = 0; i<ord_len; i++)
1017 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1018 wrbuf_putc(term_dict, ord_buf[i]);
1020 wrbuf_putc(term_dict, ')');
1022 prefix_len = wrbuf_len(term_dict);
1024 switch (truncation_value)
1026 case -1: /* not specified */
1027 case 100: /* do not truncate */
1028 if (!string_relation(zh, zapt, &termp, term_dict,
1030 zm, space_split, term_dst,
1035 zebra_setError(zh, relation_error, 0);
1042 case 1: /* right truncation */
1043 wrbuf_putc(term_dict, '(');
1044 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1049 wrbuf_puts(term_dict, ".*)");
1051 case 2: /* keft truncation */
1052 wrbuf_puts(term_dict, "(.*");
1053 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1058 wrbuf_putc(term_dict, ')');
1060 case 3: /* left&right truncation */
1061 wrbuf_puts(term_dict, "(.*");
1062 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1067 wrbuf_puts(term_dict, ".*)");
1069 case 101: /* process # in term */
1070 wrbuf_putc(term_dict, '(');
1071 if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1076 wrbuf_puts(term_dict, ")");
1078 case 102: /* Regexp-1 */
1079 wrbuf_putc(term_dict, '(');
1080 if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1085 wrbuf_putc(term_dict, ')');
1087 case 103: /* Regexp-2 */
1089 wrbuf_putc(term_dict, '(');
1090 if (!term_103(zm, &termp, term_dict, ®ex_range,
1091 space_split, term_dst))
1096 wrbuf_putc(term_dict, ')');
1098 case 104: /* process # and ! in term */
1099 wrbuf_putc(term_dict, '(');
1100 if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1105 wrbuf_putc(term_dict, ')');
1107 case 105: /* process * and ! in term */
1108 wrbuf_putc(term_dict, '(');
1109 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1114 wrbuf_putc(term_dict, ')');
1116 case 106: /* process * and ! in term */
1117 wrbuf_putc(term_dict, '(');
1118 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1123 wrbuf_putc(term_dict, ')');
1126 zebra_setError_zint(zh,
1127 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1134 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1135 esc_str(buf, sizeof(buf), input, strlen(input));
1138 WRBUF pr_wr = wrbuf_alloc();
1140 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1141 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1142 wrbuf_destroy(pr_wr);
1144 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1145 grep_info, &max_pos,
1146 ord_len /* number of "exact" chars */,
1149 zebra_set_partial_result(zh);
1151 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1153 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1159 static void grep_info_delete(struct grep_info *grep_info)
1162 xfree(grep_info->term_no);
1164 xfree(grep_info->isam_p_buf);
1167 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1168 Z_AttributesPlusTerm *zapt,
1169 struct grep_info *grep_info,
1170 const char *index_type)
1173 grep_info->term_no = 0;
1175 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1176 grep_info->isam_p_size = 0;
1177 grep_info->isam_p_buf = NULL;
1179 grep_info->index_type = index_type;
1180 grep_info->termset = 0;
1186 attr_init_APT(&truncmax, zapt, 13);
1187 truncmax_value = attr_find(&truncmax, NULL);
1188 if (truncmax_value != -1)
1189 grep_info->trunc_max = truncmax_value;
1194 int termset_value_numeric;
1195 const char *termset_value_string;
1197 attr_init_APT(&termset, zapt, 8);
1198 termset_value_numeric =
1199 attr_find_ex(&termset, NULL, &termset_value_string);
1200 if (termset_value_numeric != -1)
1203 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1207 const char *termset_name = 0;
1208 if (termset_value_numeric != -2)
1211 sprintf(resname, "%d", termset_value_numeric);
1212 termset_name = resname;
1215 termset_name = termset_value_string;
1216 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1217 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1218 if (!grep_info->termset)
1220 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1230 \brief Create result set(s) for list of terms
1231 \param zh Zebra Handle
1232 \param zapt Attributes Plust Term (RPN leaf)
1233 \param termz term as used in query but converted to UTF-8
1234 \param attributeSet default attribute set
1235 \param stream memory for result
1236 \param index_type register type ("w", "p",..)
1237 \param complete_flag whether it's phrases or not
1238 \param rank_type term flags for ranking
1239 \param xpath_use use attribute for X-Path (-1 for no X-path)
1240 \param rset_nmem memory for result sets
1241 \param result_sets output result set for each term in list (output)
1242 \param num_result_sets number of output result sets
1243 \param kc rset key control to be used for created result sets
1245 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1246 Z_AttributesPlusTerm *zapt,
1248 const Odr_oid *attributeSet,
1250 const char *index_type, int complete_flag,
1251 const char *rank_type,
1252 const char *xpath_use,
1254 RSET **result_sets, int *num_result_sets,
1255 struct rset_key_control *kc)
1257 char term_dst[IT_MAX_WORD+1];
1258 struct grep_info grep_info;
1259 const char *termp = termz;
1262 *num_result_sets = 0;
1264 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1270 if (alloc_sets == *num_result_sets)
1273 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1276 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277 alloc_sets = alloc_sets + add;
1278 *result_sets = rnew;
1280 res = term_trunc(zh, zapt, &termp, attributeSet,
1282 index_type, complete_flag,
1283 term_dst, rank_type,
1284 xpath_use, rset_nmem,
1285 &(*result_sets)[*num_result_sets],
1287 if (res != ZEBRA_OK)
1290 for (i = 0; i < *num_result_sets; i++)
1291 rset_delete((*result_sets)[i]);
1292 grep_info_delete(&grep_info);
1295 if ((*result_sets)[*num_result_sets] == 0)
1297 (*num_result_sets)++;
1302 grep_info_delete(&grep_info);
1306 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1307 Z_AttributesPlusTerm *zapt,
1308 const Odr_oid *attributeSet,
1309 const char *index_type,
1312 struct rset_key_control *kc)
1318 char term_dict[100];
1322 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1324 attr_init_APT(&position, zapt, 3);
1325 position_value = attr_find(&position, NULL);
1326 switch(position_value)
1335 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1341 if (!zebra_maps_is_first_in_field(zm))
1343 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1348 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1349 attributeSet, &ord) != ZEBRA_OK)
1353 ord_len = key_SU_encode(ord, ord_buf);
1354 memcpy(term_dict, ord_buf, ord_len);
1355 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1356 val = dict_lookup(zh->reg->dict, term_dict);
1359 assert(*val == sizeof(ISAM_P));
1360 memcpy(&isam_p, val+1, sizeof(isam_p));
1362 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1368 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1369 Z_AttributesPlusTerm *zapt,
1370 const char *termz_org,
1371 const Odr_oid *attributeSet,
1373 const char *index_type, int complete_flag,
1374 const char *rank_type,
1375 const char *xpath_use,
1378 struct rset_key_control *kc)
1380 RSET *result_sets = 0;
1381 int num_result_sets = 0;
1383 term_list_trunc(zh, zapt, termz_org, attributeSet,
1384 stream, index_type, complete_flag,
1385 rank_type, xpath_use,
1387 &result_sets, &num_result_sets, kc);
1389 if (res != ZEBRA_OK)
1392 if (num_result_sets > 0)
1395 res = rpn_search_APT_position(zh, zapt, attributeSet,
1397 rset_nmem, &first_set,
1399 if (res != ZEBRA_OK)
1403 RSET *nsets = nmem_malloc(stream,
1404 sizeof(RSET) * (num_result_sets+1));
1405 nsets[0] = first_set;
1406 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1407 result_sets = nsets;
1411 if (num_result_sets == 0)
1412 *rset = rset_create_null(rset_nmem, kc, 0);
1413 else if (num_result_sets == 1)
1414 *rset = result_sets[0];
1416 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1417 num_result_sets, result_sets,
1418 1 /* ordered */, 0 /* exclusion */,
1419 3 /* relation */, 1 /* distance */);
1425 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1426 Z_AttributesPlusTerm *zapt,
1427 const char *termz_org,
1428 const Odr_oid *attributeSet,
1430 const char *index_type,
1432 const char *rank_type,
1433 const char *xpath_use,
1436 struct rset_key_control *kc)
1438 RSET *result_sets = 0;
1439 int num_result_sets = 0;
1442 term_list_trunc(zh, zapt, termz_org, attributeSet,
1443 stream, index_type, complete_flag,
1444 rank_type, xpath_use,
1446 &result_sets, &num_result_sets, kc);
1447 if (res != ZEBRA_OK)
1450 for (i = 0; i<num_result_sets; i++)
1453 res = rpn_search_APT_position(zh, zapt, attributeSet,
1455 rset_nmem, &first_set,
1457 if (res != ZEBRA_OK)
1459 for (i = 0; i<num_result_sets; i++)
1460 rset_delete(result_sets[i]);
1468 tmp_set[0] = first_set;
1469 tmp_set[1] = result_sets[i];
1471 result_sets[i] = rset_create_prox(
1472 rset_nmem, kc, kc->scope,
1474 1 /* ordered */, 0 /* exclusion */,
1475 3 /* relation */, 1 /* distance */);
1478 if (num_result_sets == 0)
1479 *rset = rset_create_null(rset_nmem, kc, 0);
1480 else if (num_result_sets == 1)
1481 *rset = result_sets[0];
1483 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1484 num_result_sets, result_sets);
1490 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1491 Z_AttributesPlusTerm *zapt,
1492 const char *termz_org,
1493 const Odr_oid *attributeSet,
1495 const char *index_type,
1497 const char *rank_type,
1498 const char *xpath_use,
1501 struct rset_key_control *kc)
1503 RSET *result_sets = 0;
1504 int num_result_sets = 0;
1507 term_list_trunc(zh, zapt, termz_org, attributeSet,
1508 stream, index_type, complete_flag,
1509 rank_type, xpath_use,
1511 &result_sets, &num_result_sets,
1513 if (res != ZEBRA_OK)
1515 for (i = 0; i<num_result_sets; i++)
1518 res = rpn_search_APT_position(zh, zapt, attributeSet,
1520 rset_nmem, &first_set,
1522 if (res != ZEBRA_OK)
1524 for (i = 0; i<num_result_sets; i++)
1525 rset_delete(result_sets[i]);
1533 tmp_set[0] = first_set;
1534 tmp_set[1] = result_sets[i];
1536 result_sets[i] = rset_create_prox(
1537 rset_nmem, kc, kc->scope,
1539 1 /* ordered */, 0 /* exclusion */,
1540 3 /* relation */, 1 /* distance */);
1545 if (num_result_sets == 0)
1546 *rset = rset_create_null(rset_nmem, kc, 0);
1547 else if (num_result_sets == 1)
1548 *rset = result_sets[0];
1550 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1551 num_result_sets, result_sets);
1557 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1558 const char **term_sub,
1560 const Odr_oid *attributeSet,
1561 struct grep_info *grep_info,
1571 WRBUF term_num = wrbuf_alloc();
1574 attr_init_APT(&relation, zapt, 2);
1575 relation_value = attr_find(&relation, NULL);
1577 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1579 switch (relation_value)
1582 yaz_log(log_level_rpn, "Relation <");
1583 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1585 wrbuf_destroy(term_num);
1588 term_value = atoi(wrbuf_cstr(term_num));
1589 gen_regular_rel(term_dict, term_value-1, 1);
1592 yaz_log(log_level_rpn, "Relation <=");
1593 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1595 wrbuf_destroy(term_num);
1598 term_value = atoi(wrbuf_cstr(term_num));
1599 gen_regular_rel(term_dict, term_value, 1);
1602 yaz_log(log_level_rpn, "Relation >=");
1603 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1605 wrbuf_destroy(term_num);
1608 term_value = atoi(wrbuf_cstr(term_num));
1609 gen_regular_rel(term_dict, term_value, 0);
1612 yaz_log(log_level_rpn, "Relation >");
1613 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1615 wrbuf_destroy(term_num);
1618 term_value = atoi(wrbuf_cstr(term_num));
1619 gen_regular_rel(term_dict, term_value+1, 0);
1623 yaz_log(log_level_rpn, "Relation =");
1624 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1626 wrbuf_destroy(term_num);
1629 term_value = atoi(wrbuf_cstr(term_num));
1630 wrbuf_printf(term_dict, "(0*%d)", term_value);
1633 /* term_tmp untouched.. */
1634 while (**term_sub != '\0')
1638 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1639 wrbuf_destroy(term_num);
1642 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1643 0, grep_info, max_pos, 0, grep_handle);
1646 zebra_set_partial_result(zh);
1648 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1649 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1650 wrbuf_destroy(term_num);
1654 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1655 const char **term_sub,
1657 const Odr_oid *attributeSet, NMEM stream,
1658 struct grep_info *grep_info,
1659 const char *index_type, int complete_flag,
1661 const char *xpath_use,
1662 struct ord_list **ol)
1665 struct rpn_char_map_info rcmi;
1667 int relation_error = 0;
1668 int ord, ord_len, i;
1670 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1672 *ol = ord_list_create(stream);
1674 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1678 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1679 attributeSet, &ord) != ZEBRA_OK)
1684 wrbuf_rewind(term_dict);
1686 *ol = ord_list_append(stream, *ol, ord);
1688 ord_len = key_SU_encode(ord, ord_buf);
1690 wrbuf_putc(term_dict, '(');
1691 for (i = 0; i < ord_len; i++)
1693 wrbuf_putc(term_dict, 1);
1694 wrbuf_putc(term_dict, ord_buf[i]);
1696 wrbuf_putc(term_dict, ')');
1698 if (!numeric_relation(zh, zapt, &termp, term_dict,
1699 attributeSet, grep_info, &max_pos, zm,
1700 term_dst, &relation_error))
1704 zebra_setError(zh, relation_error, 0);
1711 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1716 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1717 Z_AttributesPlusTerm *zapt,
1719 const Odr_oid *attributeSet,
1721 const char *index_type,
1723 const char *rank_type,
1724 const char *xpath_use,
1727 struct rset_key_control *kc)
1729 char term_dst[IT_MAX_WORD+1];
1730 const char *termp = termz;
1731 RSET *result_sets = 0;
1732 int num_result_sets = 0;
1734 struct grep_info grep_info;
1736 zint hits_limit_value;
1737 const char *term_ref_id_str = 0;
1739 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1742 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1743 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1747 struct ord_list *ol;
1748 WRBUF term_dict = wrbuf_alloc();
1749 if (alloc_sets == num_result_sets)
1752 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1755 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1756 alloc_sets = alloc_sets + add;
1759 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1760 grep_info.isam_p_indx = 0;
1761 res = numeric_term(zh, zapt, &termp, term_dict,
1762 attributeSet, stream, &grep_info,
1763 index_type, complete_flag,
1764 term_dst, xpath_use, &ol);
1765 wrbuf_destroy(term_dict);
1766 if (res == ZEBRA_FAIL || termp == 0)
1768 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1769 result_sets[num_result_sets] =
1770 rset_trunc(zh, grep_info.isam_p_buf,
1771 grep_info.isam_p_indx, term_dst,
1772 strlen(term_dst), rank_type,
1773 0 /* preserve position */,
1774 zapt->term->which, rset_nmem,
1775 kc, kc->scope, ol, index_type,
1778 if (!result_sets[num_result_sets])
1784 grep_info_delete(&grep_info);
1786 if (res != ZEBRA_OK)
1788 if (num_result_sets == 0)
1789 *rset = rset_create_null(rset_nmem, kc, 0);
1790 else if (num_result_sets == 1)
1791 *rset = result_sets[0];
1793 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1794 num_result_sets, result_sets);
1800 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1801 Z_AttributesPlusTerm *zapt,
1803 const Odr_oid *attributeSet,
1805 const char *rank_type, NMEM rset_nmem,
1807 struct rset_key_control *kc)
1810 zint sysno = atozint(termz);
1814 rec = rec_get(zh->reg->records, sysno);
1822 *rset = rset_create_null(rset_nmem, kc, 0);
1828 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1829 res_get(zh->res, "setTmpDir"), 0);
1830 rsfd = rset_open(*rset, RSETF_WRITE);
1835 rset_write(rsfd, &key);
1841 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1842 const Odr_oid *attributeSet, NMEM stream,
1843 Z_SortKeySpecList *sort_sequence,
1844 const char *rank_type,
1847 struct rset_key_control *kc)
1850 int sort_relation_value;
1851 AttrType sort_relation_type;
1856 attr_init_APT(&sort_relation_type, zapt, 7);
1857 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1859 if (!sort_sequence->specs)
1861 sort_sequence->num_specs = 10;
1862 sort_sequence->specs = (Z_SortKeySpec **)
1863 nmem_malloc(stream, sort_sequence->num_specs *
1864 sizeof(*sort_sequence->specs));
1865 for (i = 0; i<sort_sequence->num_specs; i++)
1866 sort_sequence->specs[i] = 0;
1868 if (zapt->term->which != Z_Term_general)
1871 i = atoi_n((char *) zapt->term->u.general->buf,
1872 zapt->term->u.general->len);
1873 if (i >= sort_sequence->num_specs)
1875 sprintf(termz, "%d", i);
1877 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1878 sks->sortElement = (Z_SortElement *)
1879 nmem_malloc(stream, sizeof(*sks->sortElement));
1880 sks->sortElement->which = Z_SortElement_generic;
1881 sk = sks->sortElement->u.generic = (Z_SortKey *)
1882 nmem_malloc(stream, sizeof(*sk));
1883 sk->which = Z_SortKey_sortAttributes;
1884 sk->u.sortAttributes = (Z_SortAttributes *)
1885 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1887 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1888 sk->u.sortAttributes->list = zapt->attributes;
1890 sks->sortRelation = (int *)
1891 nmem_malloc(stream, sizeof(*sks->sortRelation));
1892 if (sort_relation_value == 1)
1893 *sks->sortRelation = Z_SortKeySpec_ascending;
1894 else if (sort_relation_value == 2)
1895 *sks->sortRelation = Z_SortKeySpec_descending;
1897 *sks->sortRelation = Z_SortKeySpec_ascending;
1899 sks->caseSensitivity = (int *)
1900 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1901 *sks->caseSensitivity = 0;
1903 sks->which = Z_SortKeySpec_null;
1904 sks->u.null = odr_nullval ();
1905 sort_sequence->specs[i] = sks;
1906 *rset = rset_create_null(rset_nmem, kc, 0);
1911 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1912 const Odr_oid *attributeSet,
1913 struct xpath_location_step *xpath, int max,
1916 const Odr_oid *curAttributeSet = attributeSet;
1918 const char *use_string = 0;
1920 attr_init_APT(&use, zapt, 1);
1921 attr_find_ex(&use, &curAttributeSet, &use_string);
1923 if (!use_string || *use_string != '/')
1926 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1931 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1932 const char *index_type, const char *term,
1933 const char *xpath_use,
1935 struct rset_key_control *kc)
1937 struct grep_info grep_info;
1938 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1939 zinfo_index_category_index,
1940 index_type, xpath_use);
1941 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1942 return rset_create_null(rset_nmem, kc, 0);
1945 return rset_create_null(rset_nmem, kc, 0);
1951 WRBUF term_dict = wrbuf_alloc();
1952 int ord_len = key_SU_encode(ord, ord_buf);
1953 int term_type = Z_Term_characterString;
1954 const char *flags = "void";
1956 wrbuf_putc(term_dict, '(');
1957 for (i = 0; i<ord_len; i++)
1959 wrbuf_putc(term_dict, 1);
1960 wrbuf_putc(term_dict, ord_buf[i]);
1962 wrbuf_putc(term_dict, ')');
1963 wrbuf_puts(term_dict, term);
1965 grep_info.isam_p_indx = 0;
1966 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1967 &grep_info, &max_pos, 0, grep_handle);
1968 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1969 grep_info.isam_p_indx);
1970 rset = rset_trunc(zh, grep_info.isam_p_buf,
1971 grep_info.isam_p_indx, term, strlen(term),
1972 flags, 1, term_type, rset_nmem,
1973 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1974 0 /* term_ref_id_str */);
1975 grep_info_delete(&grep_info);
1976 wrbuf_destroy(term_dict);
1982 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1983 NMEM stream, const char *rank_type, RSET rset,
1984 int xpath_len, struct xpath_location_step *xpath,
1987 struct rset_key_control *kc)
1990 int always_matches = rset ? 0 : 1;
1998 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1999 for (i = 0; i<xpath_len; i++)
2001 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2013 a[@attr = value]/b[@other = othervalue]
2015 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2016 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2017 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2018 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2019 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2020 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2024 dict_grep_cmap(zh->reg->dict, 0, 0);
2027 int level = xpath_len;
2030 while (--level >= 0)
2032 WRBUF xpath_rev = wrbuf_alloc();
2034 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2036 for (i = level; i >= 1; --i)
2038 const char *cp = xpath[i].part;
2044 wrbuf_puts(xpath_rev, "[^/]*");
2045 else if (*cp == ' ')
2046 wrbuf_puts(xpath_rev, "\001 ");
2048 wrbuf_putc(xpath_rev, *cp);
2050 /* wrbuf_putc does not null-terminate , but
2051 wrbuf_puts below ensures it does.. so xpath_rev
2052 is OK iff length is > 0 */
2054 wrbuf_puts(xpath_rev, "/");
2056 else if (i == 1) /* // case */
2057 wrbuf_puts(xpath_rev, ".*");
2059 if (xpath[level].predicate &&
2060 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2061 xpath[level].predicate->u.relation.name[0])
2063 WRBUF wbuf = wrbuf_alloc();
2064 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2065 if (xpath[level].predicate->u.relation.value)
2067 const char *cp = xpath[level].predicate->u.relation.value;
2068 wrbuf_putc(wbuf, '=');
2072 if (strchr(REGEX_CHARS, *cp))
2073 wrbuf_putc(wbuf, '\\');
2074 wrbuf_putc(wbuf, *cp);
2078 rset_attr = xpath_trunc(
2079 zh, stream, "0", wrbuf_cstr(wbuf),
2080 ZEBRA_XPATH_ATTR_NAME,
2082 wrbuf_destroy(wbuf);
2088 wrbuf_destroy(xpath_rev);
2092 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2093 wrbuf_cstr(xpath_rev));
2094 if (wrbuf_len(xpath_rev))
2096 rset_start_tag = xpath_trunc(zh, stream, "0",
2097 wrbuf_cstr(xpath_rev),
2098 ZEBRA_XPATH_ELM_BEGIN,
2101 rset = rset_start_tag;
2104 rset_end_tag = xpath_trunc(zh, stream, "0",
2105 wrbuf_cstr(xpath_rev),
2106 ZEBRA_XPATH_ELM_END,
2109 rset = rset_create_between(rset_nmem, kc, kc->scope,
2110 rset_start_tag, rset,
2111 rset_end_tag, rset_attr);
2114 wrbuf_destroy(xpath_rev);
2122 #define MAX_XPATH_STEPS 10
2124 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2125 Z_AttributesPlusTerm *zapt,
2126 const Odr_oid *attributeSet, NMEM stream,
2127 Z_SortKeySpecList *sort_sequence,
2130 struct rset_key_control *kc);
2132 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2133 const Odr_oid *attributeSet, NMEM stream,
2134 Z_SortKeySpecList *sort_sequence,
2135 int num_bases, const char **basenames,
2138 struct rset_key_control *kc)
2140 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2141 ZEBRA_RES res = ZEBRA_OK;
2143 for (i = 0; i < num_bases; i++)
2146 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2148 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2153 res = rpn_search_database(zh, zapt, attributeSet, stream,
2155 rset_nmem, rsets+i, kc);
2156 if (res != ZEBRA_OK)
2159 if (res != ZEBRA_OK)
2160 { /* must clean up the already created sets */
2162 rset_delete(rsets[i]);
2169 else if (num_bases == 0)
2170 *rset = rset_create_null(rset_nmem, kc, 0);
2172 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2178 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2179 Z_AttributesPlusTerm *zapt,
2180 const Odr_oid *attributeSet, NMEM stream,
2181 Z_SortKeySpecList *sort_sequence,
2184 struct rset_key_control *kc)
2186 ZEBRA_RES res = ZEBRA_OK;
2187 const char *index_type;
2188 char *search_type = NULL;
2189 char rank_type[128];
2192 char termz[IT_MAX_WORD+1];
2194 const char *xpath_use = 0;
2195 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2199 log_level_rpn = yaz_log_module_level("rpn");
2202 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2203 rank_type, &complete_flag, &sort_flag);
2205 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2206 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2207 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2208 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2210 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2214 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2215 rank_type, rset_nmem, rset, kc);
2216 /* consider if an X-Path query is used */
2217 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2218 xpath, MAX_XPATH_STEPS, stream);
2221 if (xpath[xpath_len-1].part[0] == '@')
2222 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2224 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2231 attr_init_APT(&relation, zapt, 2);
2232 relation_value = attr_find(&relation, NULL);
2234 if (relation_value == 103) /* alwaysmatches */
2236 *rset = 0; /* signal no "term" set */
2237 return rpn_search_xpath(zh, stream, rank_type, *rset,
2238 xpath_len, xpath, rset_nmem, rset, kc);
2243 /* search using one of the various search type strategies
2244 termz is our UTF-8 search term
2245 attributeSet is top-level default attribute set
2246 stream is ODR for search
2247 reg_id is the register type
2248 complete_flag is 1 for complete subfield, 0 for incomplete
2249 xpath_use is use-attribute to be used for X-Path search, 0 for none
2251 if (!strcmp(search_type, "phrase"))
2253 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2254 index_type, complete_flag, rank_type,
2259 else if (!strcmp(search_type, "and-list"))
2261 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2262 index_type, complete_flag, rank_type,
2267 else if (!strcmp(search_type, "or-list"))
2269 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2270 index_type, complete_flag, rank_type,
2275 else if (!strcmp(search_type, "local"))
2277 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2278 rank_type, rset_nmem, rset, kc);
2280 else if (!strcmp(search_type, "numeric"))
2282 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2283 index_type, complete_flag, rank_type,
2290 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2293 if (res != ZEBRA_OK)
2297 return rpn_search_xpath(zh, stream, rank_type, *rset,
2298 xpath_len, xpath, rset_nmem, rset, kc);
2301 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2302 const Odr_oid *attributeSet,
2303 NMEM stream, NMEM rset_nmem,
2304 Z_SortKeySpecList *sort_sequence,
2305 int num_bases, const char **basenames,
2306 RSET **result_sets, int *num_result_sets,
2307 Z_Operator *parent_op,
2308 struct rset_key_control *kc);
2310 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2313 ZEBRA_RES res = ZEBRA_OK;
2314 if (zs->which == Z_RPNStructure_complex)
2316 if (res == ZEBRA_OK)
2317 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2319 if (res == ZEBRA_OK)
2320 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2323 else if (zs->which == Z_RPNStructure_simple)
2325 if (zs->u.simple->which == Z_Operand_APT)
2327 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2328 AttrType global_hits_limit_attr;
2331 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2333 l = attr_find(&global_hits_limit_attr, NULL);
2341 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2342 const Odr_oid *attributeSet,
2343 NMEM stream, NMEM rset_nmem,
2344 Z_SortKeySpecList *sort_sequence,
2345 int num_bases, const char **basenames,
2348 RSET *result_sets = 0;
2349 int num_result_sets = 0;
2351 struct rset_key_control *kc = zebra_key_control_create(zh);
2353 res = rpn_search_structure(zh, zs, attributeSet,
2356 num_bases, basenames,
2357 &result_sets, &num_result_sets,
2358 0 /* no parent op */,
2360 if (res != ZEBRA_OK)
2363 for (i = 0; i<num_result_sets; i++)
2364 rset_delete(result_sets[i]);
2369 assert(num_result_sets == 1);
2370 assert(result_sets);
2371 assert(*result_sets);
2372 *result_set = *result_sets;
2378 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2379 const Odr_oid *attributeSet,
2380 NMEM stream, NMEM rset_nmem,
2381 Z_SortKeySpecList *sort_sequence,
2382 int num_bases, const char **basenames,
2383 RSET **result_sets, int *num_result_sets,
2384 Z_Operator *parent_op,
2385 struct rset_key_control *kc)
2387 *num_result_sets = 0;
2388 if (zs->which == Z_RPNStructure_complex)
2391 Z_Operator *zop = zs->u.complex->roperator;
2392 RSET *result_sets_l = 0;
2393 int num_result_sets_l = 0;
2394 RSET *result_sets_r = 0;
2395 int num_result_sets_r = 0;
2397 res = rpn_search_structure(zh, zs->u.complex->s1,
2398 attributeSet, stream, rset_nmem,
2400 num_bases, basenames,
2401 &result_sets_l, &num_result_sets_l,
2403 if (res != ZEBRA_OK)
2406 for (i = 0; i<num_result_sets_l; i++)
2407 rset_delete(result_sets_l[i]);
2410 res = rpn_search_structure(zh, zs->u.complex->s2,
2411 attributeSet, stream, rset_nmem,
2413 num_bases, basenames,
2414 &result_sets_r, &num_result_sets_r,
2416 if (res != ZEBRA_OK)
2419 for (i = 0; i<num_result_sets_l; i++)
2420 rset_delete(result_sets_l[i]);
2421 for (i = 0; i<num_result_sets_r; i++)
2422 rset_delete(result_sets_r[i]);
2426 /* make a new list of result for all children */
2427 *num_result_sets = num_result_sets_l + num_result_sets_r;
2428 *result_sets = nmem_malloc(stream, *num_result_sets *
2429 sizeof(**result_sets));
2430 memcpy(*result_sets, result_sets_l,
2431 num_result_sets_l * sizeof(**result_sets));
2432 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2433 num_result_sets_r * sizeof(**result_sets));
2435 if (!parent_op || parent_op->which != zop->which
2436 || (zop->which != Z_Operator_and &&
2437 zop->which != Z_Operator_or))
2439 /* parent node different from this one (or non-present) */
2440 /* we must combine result sets now */
2444 case Z_Operator_and:
2445 rset = rset_create_and(rset_nmem, kc,
2447 *num_result_sets, *result_sets);
2450 rset = rset_create_or(rset_nmem, kc,
2451 kc->scope, 0, /* termid */
2452 *num_result_sets, *result_sets);
2454 case Z_Operator_and_not:
2455 rset = rset_create_not(rset_nmem, kc,
2460 case Z_Operator_prox:
2461 if (zop->u.prox->which != Z_ProximityOperator_known)
2464 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2468 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2470 zebra_setError_zint(zh,
2471 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2472 *zop->u.prox->u.known);
2477 rset = rset_create_prox(rset_nmem, kc,
2479 *num_result_sets, *result_sets,
2480 *zop->u.prox->ordered,
2481 (!zop->u.prox->exclusion ?
2482 0 : *zop->u.prox->exclusion),
2483 *zop->u.prox->relationType,
2484 *zop->u.prox->distance );
2488 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2491 *num_result_sets = 1;
2492 *result_sets = nmem_malloc(stream, *num_result_sets *
2493 sizeof(**result_sets));
2494 (*result_sets)[0] = rset;
2497 else if (zs->which == Z_RPNStructure_simple)
2502 if (zs->u.simple->which == Z_Operand_APT)
2504 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2505 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2506 attributeSet, stream, sort_sequence,
2507 num_bases, basenames, rset_nmem, &rset,
2509 if (res != ZEBRA_OK)
2512 else if (zs->u.simple->which == Z_Operand_resultSetId)
2514 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2515 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2519 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2520 zs->u.simple->u.resultSetId);
2527 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2530 *num_result_sets = 1;
2531 *result_sets = nmem_malloc(stream, *num_result_sets *
2532 sizeof(**result_sets));
2533 (*result_sets)[0] = rset;
2537 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2548 * indent-tabs-mode: nil
2550 * vim: shiftwidth=4 tabstop=8 expandtab