1 /* $Id: zrpn.c,v 1.219 2006-06-22 15:44:44 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
51 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
56 const char *outp = *out;
57 yaz_log(YLOG_LOG, "---");
60 yaz_log(YLOG_LOG, "%02X", *outp);
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69 struct rpn_char_map_info *map_info)
71 map_info->zm = reg->zebra_maps;
72 map_info->reg_type = reg_type;
73 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91 char *dst, const char *src)
96 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
100 if (len < IT_MAX_WORD-1)
105 while (*cp && len < IT_MAX_WORD-1)
111 static void add_isam_p(const char *name, const char *info,
116 log_level_rpn = yaz_log_module_level("rpn");
119 if (p->isam_p_indx == p->isam_p_size)
121 ISAM_P *new_isam_p_buf;
125 p->isam_p_size = 2*p->isam_p_size + 100;
126 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
130 memcpy(new_isam_p_buf, p->isam_p_buf,
131 p->isam_p_indx * sizeof(*p->isam_p_buf));
132 xfree(p->isam_p_buf);
134 p->isam_p_buf = new_isam_p_buf;
137 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
140 memcpy(new_term_no, p->isam_p_buf,
141 p->isam_p_indx * sizeof(*p->term_no));
144 p->term_no = new_term_no;
147 assert(*info == sizeof(*p->isam_p_buf));
148 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
154 char term_tmp[IT_MAX_WORD];
156 const char *index_name;
157 int len = key_SU_decode (&ord, (const unsigned char *) name);
159 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
160 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161 zebraExplain_lookup_ord(p->zh->reg->zei,
162 ord, 0 /* index_type */, &db, &index_name);
163 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
165 resultSetAddTerm(p->zh, p->termset, name[len], db,
166 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
890 yaz_log(log_level_rpn, "Relation =");
891 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
892 term_component, space_split, term_dst))
894 strcat(term_tmp, "(");
895 strcat(term_tmp, term_component);
896 strcat(term_tmp, ")");
899 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906 const char **term_sub,
907 oid_value attributeSet, NMEM stream,
908 struct grep_info *grep_info,
909 int reg_type, int complete_flag,
910 int num_bases, char **basenames,
912 const char *xpath_use,
913 struct ord_list **ol);
915 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
916 Z_AttributesPlusTerm *zapt,
917 zint *hits_limit_value,
918 const char **term_ref_id_str,
921 AttrType term_ref_id_attr;
922 AttrType hits_limit_attr;
925 attr_init_APT(&hits_limit_attr, zapt, 9);
926 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
928 attr_init_APT(&term_ref_id_attr, zapt, 10);
929 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
930 if (term_ref_id_int >= 0)
932 char *res = nmem_malloc(nmem, 20);
933 sprintf(res, "%d", term_ref_id_int);
934 *term_ref_id_str = res;
937 /* no limit given ? */
938 if (*hits_limit_value == -1)
940 if (*term_ref_id_str)
942 /* use global if term_ref is present */
943 *hits_limit_value = zh->approx_limit;
947 /* no counting if term_ref is not present */
948 *hits_limit_value = 0;
951 else if (*hits_limit_value == 0)
953 /* 0 is the same as global limit */
954 *hits_limit_value = zh->approx_limit;
956 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
957 *term_ref_id_str ? *term_ref_id_str : "none",
962 static ZEBRA_RES term_trunc(ZebraHandle zh,
963 Z_AttributesPlusTerm *zapt,
964 const char **term_sub,
965 oid_value attributeSet, NMEM stream,
966 struct grep_info *grep_info,
967 int reg_type, int complete_flag,
968 int num_bases, char **basenames,
970 const char *rank_type,
971 const char *xpath_use,
974 struct rset_key_control *kc)
978 zint hits_limit_value;
979 const char *term_ref_id_str = 0;
982 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
984 grep_info->isam_p_indx = 0;
985 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986 reg_type, complete_flag, num_bases, basenames,
987 term_dst, xpath_use, &ol);
990 if (!*term_sub) /* no more terms ? */
992 yaz_log(log_level_rpn, "term: %s", term_dst);
993 *rset = rset_trunc(zh, grep_info->isam_p_buf,
994 grep_info->isam_p_indx, term_dst,
995 strlen(term_dst), rank_type, 1 /* preserve pos */,
996 zapt->term->which, rset_nmem,
997 kc, kc->scope, ol, reg_type, hits_limit_value,
1004 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1005 const char **term_sub,
1006 oid_value attributeSet, NMEM stream,
1007 struct grep_info *grep_info,
1008 int reg_type, int complete_flag,
1009 int num_bases, char **basenames,
1011 const char *xpath_use,
1012 struct ord_list **ol)
1014 char term_dict[2*IT_MAX_WORD+4000];
1016 AttrType truncation;
1017 int truncation_value;
1018 oid_value curAttributeSet = attributeSet;
1020 struct rpn_char_map_info rcmi;
1021 int space_split = complete_flag ? 0 : 1;
1023 int bases_ok = 0; /* no of databases with OK attribute */
1025 *ol = ord_list_create(stream);
1027 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1028 attr_init_APT(&truncation, zapt, 5);
1029 truncation_value = attr_find(&truncation, NULL);
1030 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1032 for (base_no = 0; base_no < num_bases; base_no++)
1036 int regex_range = 0;
1038 int max_pos, prefix_len = 0;
1045 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1047 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1048 basenames[base_no]);
1052 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1053 reg_type, xpath_use,
1054 curAttributeSet, &ord)
1059 *ol = ord_list_append(stream, *ol, ord);
1062 term_dict[prefix_len++] = '|';
1064 term_dict[prefix_len++] = '(';
1066 ord_len = key_SU_encode (ord, ord_buf);
1067 for (i = 0; i<ord_len; i++)
1069 term_dict[prefix_len++] = 1;
1070 term_dict[prefix_len++] = ord_buf[i];
1072 if (ord_len > init_pos)
1079 term_dict[prefix_len++] = ')';
1080 term_dict[prefix_len] = '\0';
1082 switch (truncation_value)
1084 case -1: /* not specified */
1085 case 100: /* do not truncate */
1086 if (!string_relation (zh, zapt, &termp, term_dict,
1088 reg_type, space_split, term_dst,
1093 zebra_setError(zh, relation_error, 0);
1100 case 1: /* right truncation */
1101 term_dict[j++] = '(';
1102 if (!term_100(zh->reg->zebra_maps, reg_type,
1103 &termp, term_dict + j, space_split, term_dst))
1108 strcat(term_dict, ".*)");
1110 case 2: /* keft truncation */
1111 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1112 if (!term_100(zh->reg->zebra_maps, reg_type,
1113 &termp, term_dict + j, space_split, term_dst))
1118 strcat(term_dict, ")");
1120 case 3: /* left&right truncation */
1121 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1122 if (!term_100(zh->reg->zebra_maps, reg_type,
1123 &termp, term_dict + j, space_split, term_dst))
1128 strcat(term_dict, ".*)");
1130 case 101: /* process # in term */
1131 term_dict[j++] = '(';
1132 if (!term_101(zh->reg->zebra_maps, reg_type,
1133 &termp, term_dict + j, space_split, term_dst))
1138 strcat(term_dict, ")");
1140 case 102: /* Regexp-1 */
1141 term_dict[j++] = '(';
1142 if (!term_102(zh->reg->zebra_maps, reg_type,
1143 &termp, term_dict + j, space_split, term_dst))
1148 strcat(term_dict, ")");
1150 case 103: /* Regexp-2 */
1152 term_dict[j++] = '(';
1153 if (!term_103(zh->reg->zebra_maps, reg_type,
1154 &termp, term_dict + j, ®ex_range,
1155 space_split, term_dst))
1160 strcat(term_dict, ")");
1162 case 104: /* process # and ! in term */
1163 term_dict[j++] = '(';
1164 if (!term_104(zh->reg->zebra_maps, reg_type,
1165 &termp, term_dict + j, space_split, term_dst))
1170 strcat(term_dict, ")");
1172 case 105: /* process * and ! in term */
1173 term_dict[j++] = '(';
1174 if (!term_105(zh->reg->zebra_maps, reg_type,
1175 &termp, term_dict + j, space_split, term_dst, 1))
1180 strcat(term_dict, ")");
1182 case 106: /* process * and ! in term */
1183 term_dict[j++] = '(';
1184 if (!term_105(zh->reg->zebra_maps, reg_type,
1185 &termp, term_dict + j, space_split, term_dst, 0))
1190 strcat(term_dict, ")");
1193 zebra_setError_zint(zh,
1194 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1201 const char *input = term_dict + prefix_len;
1202 esc_str(buf, sizeof(buf), input, strlen(input));
1206 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1207 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1208 grep_info, &max_pos, init_pos,
1211 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1217 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1222 /* convert APT search term to UTF8 */
1223 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1227 Z_Term *term = zapt->term;
1229 switch (term->which)
1231 case Z_Term_general:
1232 if (zh->iconv_to_utf8 != 0)
1234 char *inbuf = (char *) term->u.general->buf;
1235 size_t inleft = term->u.general->len;
1236 char *outbuf = termz;
1237 size_t outleft = IT_MAX_WORD-1;
1240 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1242 if (ret == (size_t)(-1))
1244 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1247 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1255 sizez = term->u.general->len;
1256 if (sizez > IT_MAX_WORD-1)
1257 sizez = IT_MAX_WORD-1;
1258 memcpy (termz, term->u.general->buf, sizez);
1259 termz[sizez] = '\0';
1262 case Z_Term_characterString:
1263 sizez = strlen(term->u.characterString);
1264 if (sizez > IT_MAX_WORD-1)
1265 sizez = IT_MAX_WORD-1;
1266 memcpy (termz, term->u.characterString, sizez);
1267 termz[sizez] = '\0';
1270 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1276 /* convert APT SCAN term to internal cmap */
1277 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1278 char *termz, int reg_type)
1280 char termz0[IT_MAX_WORD];
1282 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1283 return ZEBRA_FAIL; /* error */
1287 const char *cp = (const char *) termz0;
1288 const char *cp_end = cp + strlen(cp);
1291 const char *space_map = NULL;
1294 while ((len = (cp_end - cp)) > 0)
1296 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1297 if (**map == *CHR_SPACE)
1302 for (src = space_map; *src; src++)
1305 for (src = *map; *src; src++)
1314 static void grep_info_delete(struct grep_info *grep_info)
1317 xfree(grep_info->term_no);
1319 xfree(grep_info->isam_p_buf);
1322 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1323 Z_AttributesPlusTerm *zapt,
1324 struct grep_info *grep_info,
1328 int termset_value_numeric;
1329 const char *termset_value_string;
1332 grep_info->term_no = 0;
1334 grep_info->isam_p_size = 0;
1335 grep_info->isam_p_buf = NULL;
1337 grep_info->reg_type = reg_type;
1338 grep_info->termset = 0;
1342 attr_init_APT(&termset, zapt, 8);
1343 termset_value_numeric =
1344 attr_find_ex(&termset, NULL, &termset_value_string);
1345 if (termset_value_numeric != -1)
1348 const char *termset_name = 0;
1349 if (termset_value_numeric != -2)
1352 sprintf(resname, "%d", termset_value_numeric);
1353 termset_name = resname;
1356 termset_name = termset_value_string;
1357 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359 if (!grep_info->termset)
1361 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1369 \brief Create result set(s) for list of terms
1370 \param zh Zebra Handle
1371 \param termz term as used in query but converted to UTF-8
1372 \param attributeSet default attribute set
1373 \param stream memory for result
1374 \param reg_type register type ('w', 'p',..)
1375 \param complete_flag whether it's phrases or not
1376 \param rank_type term flags for ranking
1377 \param xpath_use use attribute for X-Path (-1 for no X-path)
1378 \param num_bases number of databases
1379 \param basenames array of databases
1380 \param rset_mem memory for result sets
1381 \param result_sets output result set for each term in list (output)
1382 \param number number of output result sets
1383 \param kc rset key control to be used for created result sets
1385 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1386 Z_AttributesPlusTerm *zapt,
1388 oid_value attributeSet,
1390 int reg_type, int complete_flag,
1391 const char *rank_type,
1392 const char *xpath_use,
1393 int num_bases, char **basenames,
1395 RSET **result_sets, int *num_result_sets,
1396 struct rset_key_control *kc)
1398 char term_dst[IT_MAX_WORD+1];
1399 struct grep_info grep_info;
1400 const char *termp = termz;
1402 int empty_term = *termz ? 0 : 1;
1405 *num_result_sets = 0;
1407 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1413 if (alloc_sets == *num_result_sets)
1416 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1419 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1420 alloc_sets = alloc_sets + add;
1421 *result_sets = rnew;
1423 res = term_trunc(zh, zapt, &termp, attributeSet,
1425 reg_type, complete_flag,
1426 num_bases, basenames,
1427 term_dst, rank_type,
1428 xpath_use, rset_nmem,
1429 &(*result_sets)[*num_result_sets],
1431 if (res != ZEBRA_OK)
1434 for (i = 0; i < *num_result_sets; i++)
1435 rset_delete((*result_sets)[i]);
1436 grep_info_delete (&grep_info);
1439 if ((*result_sets)[*num_result_sets] == 0)
1441 (*num_result_sets)++;
1448 grep_info_delete(&grep_info);
1453 static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1454 oid_value attributeSet, NMEM stream,
1455 struct grep_info *grep_info,
1456 int reg_type, int complete_flag,
1457 int num_bases, char **basenames,
1458 const char *xpath_use,
1459 struct ord_list **ol)
1461 char term_dict[2*IT_MAX_WORD+4000];
1463 struct rpn_char_map_info rcmi;
1465 int bases_ok = 0; /* no of databases with OK attribute */
1467 *ol = ord_list_create(stream);
1469 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1471 for (base_no = 0; base_no < num_bases; base_no++)
1474 int regex_range = 0;
1476 int max_pos, prefix_len = 0;
1480 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1482 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1483 basenames[base_no]);
1487 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches,
1488 reg_type, xpath_use,
1489 attributeSet, &ord) != ZEBRA_OK)
1491 yaz_log(YLOG_LOG, "Got ordinal value: %d", ord);
1492 *ol = ord_list_append(stream, *ol, ord);
1495 term_dict[prefix_len++] = '|';
1497 term_dict[prefix_len++] = '(';
1499 ord_len = key_SU_encode (ord, ord_buf);
1500 for (i = 0; i<ord_len; i++)
1502 term_dict[prefix_len++] = 1;
1503 term_dict[prefix_len++] = ord_buf[i];
1505 if (ord_len > init_pos)
1510 term_dict[prefix_len++] = ')';
1511 term_dict[prefix_len] = '\0';
1513 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1514 grep_info, &max_pos, init_pos,
1519 yaz_log(YLOG_LOG, "always_term: %d positions", grep_info->isam_p_indx);
1523 static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh,
1524 Z_AttributesPlusTerm *zapt,
1525 const char *termz_org,
1526 oid_value attributeSet,
1528 int reg_type, int complete_flag,
1529 const char *rank_type,
1530 const char *xpath_use,
1531 int num_bases, char **basenames,
1534 struct rset_key_control *kc)
1536 const char *term_dst = "always";
1537 struct grep_info grep_info;
1538 zint hits_limit_value;
1539 const char *term_ref_id_str = 0;
1541 struct ord_list *ol;
1543 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1545 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1548 grep_info.isam_p_indx = 0;
1550 res = always_term(zh, zapt, attributeSet, stream, &grep_info,
1551 reg_type, complete_flag, num_bases, basenames,
1553 if (res == ZEBRA_OK)
1555 *rset = rset_trunc(zh, grep_info.isam_p_buf,
1556 grep_info.isam_p_indx, term_dst, strlen(term_dst),
1557 rank_type, 1 /* preserve pos */,
1558 zapt->term->which, rset_nmem,
1559 kc, kc->scope, ol, reg_type, hits_limit_value,
1564 grep_info_delete (&grep_info);
1568 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1569 Z_AttributesPlusTerm *zapt,
1570 const char *termz_org,
1571 oid_value attributeSet,
1573 int reg_type, int complete_flag,
1574 const char *rank_type,
1575 const char *xpath_use,
1576 int num_bases, char **basenames,
1579 struct rset_key_control *kc)
1581 RSET *result_sets = 0;
1582 int num_result_sets = 0;
1584 term_list_trunc(zh, zapt, termz_org, attributeSet,
1585 stream, reg_type, complete_flag,
1586 rank_type, xpath_use,
1587 num_bases, basenames,
1589 &result_sets, &num_result_sets, kc);
1590 if (res != ZEBRA_OK)
1592 if (num_result_sets == 0)
1593 *rset = rset_create_null(rset_nmem, kc, 0);
1594 else if (num_result_sets == 1)
1595 *rset = result_sets[0];
1597 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1598 num_result_sets, result_sets,
1599 1 /* ordered */, 0 /* exclusion */,
1600 3 /* relation */, 1 /* distance */);
1606 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1607 Z_AttributesPlusTerm *zapt,
1608 const char *termz_org,
1609 oid_value attributeSet,
1611 int reg_type, int complete_flag,
1612 const char *rank_type,
1613 const char *xpath_use,
1614 int num_bases, char **basenames,
1617 struct rset_key_control *kc)
1619 RSET *result_sets = 0;
1620 int num_result_sets = 0;
1622 term_list_trunc(zh, zapt, termz_org, attributeSet,
1623 stream, reg_type, complete_flag,
1624 rank_type, xpath_use,
1625 num_bases, basenames,
1627 &result_sets, &num_result_sets, kc);
1628 if (res != ZEBRA_OK)
1630 if (num_result_sets == 0)
1631 *rset = rset_create_null(rset_nmem, kc, 0);
1632 else if (num_result_sets == 1)
1633 *rset = result_sets[0];
1635 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1636 num_result_sets, result_sets);
1642 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1643 Z_AttributesPlusTerm *zapt,
1644 const char *termz_org,
1645 oid_value attributeSet,
1647 int reg_type, int complete_flag,
1648 const char *rank_type,
1649 const char *xpath_use,
1650 int num_bases, char **basenames,
1653 struct rset_key_control *kc)
1655 RSET *result_sets = 0;
1656 int num_result_sets = 0;
1658 term_list_trunc(zh, zapt, termz_org, attributeSet,
1659 stream, reg_type, complete_flag,
1660 rank_type, xpath_use,
1661 num_bases, basenames,
1663 &result_sets, &num_result_sets,
1665 if (res != ZEBRA_OK)
1667 if (num_result_sets == 0)
1668 *rset = rset_create_null(rset_nmem, kc, 0);
1669 else if (num_result_sets == 1)
1670 *rset = result_sets[0];
1672 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1673 num_result_sets, result_sets);
1679 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1680 const char **term_sub,
1682 oid_value attributeSet,
1683 struct grep_info *grep_info,
1693 char *term_tmp = term_dict + strlen(term_dict);
1696 attr_init_APT(&relation, zapt, 2);
1697 relation_value = attr_find(&relation, NULL);
1699 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1701 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1704 term_value = atoi (term_tmp);
1705 switch (relation_value)
1708 yaz_log(log_level_rpn, "Relation <");
1709 gen_regular_rel(term_tmp, term_value-1, 1);
1712 yaz_log(log_level_rpn, "Relation <=");
1713 gen_regular_rel(term_tmp, term_value, 1);
1716 yaz_log(log_level_rpn, "Relation >=");
1717 gen_regular_rel(term_tmp, term_value, 0);
1720 yaz_log(log_level_rpn, "Relation >");
1721 gen_regular_rel(term_tmp, term_value+1, 0);
1725 yaz_log(log_level_rpn, "Relation =");
1726 sprintf(term_tmp, "(0*%d)", term_value);
1729 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1732 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1733 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1736 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1737 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1741 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1742 const char **term_sub,
1743 oid_value attributeSet,
1744 struct grep_info *grep_info,
1745 int reg_type, int complete_flag,
1746 int num_bases, char **basenames,
1748 const char *xpath_use,
1751 char term_dict[2*IT_MAX_WORD+2];
1753 oid_value curAttributeSet = attributeSet;
1755 struct rpn_char_map_info rcmi;
1757 int bases_ok = 0; /* no of databases with OK attribute */
1759 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1761 for (base_no = 0; base_no < num_bases; base_no++)
1763 int max_pos, prefix_len = 0;
1764 int relation_error = 0;
1765 int ord, ord_len, i;
1770 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1772 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1773 basenames[base_no]);
1777 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1778 reg_type, xpath_use, curAttributeSet, &ord)
1785 term_dict[prefix_len++] = '|';
1787 term_dict[prefix_len++] = '(';
1789 ord_len = key_SU_encode (ord, ord_buf);
1790 for (i = 0; i < ord_len; i++)
1792 term_dict[prefix_len++] = 1;
1793 term_dict[prefix_len++] = ord_buf[i];
1796 term_dict[prefix_len++] = ')';
1797 term_dict[prefix_len] = '\0';
1798 if (!numeric_relation(zh, zapt, &termp, term_dict,
1799 attributeSet, grep_info, &max_pos, reg_type,
1800 term_dst, &relation_error))
1804 zebra_setError(zh, relation_error, 0);
1814 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1819 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1820 Z_AttributesPlusTerm *zapt,
1822 oid_value attributeSet,
1824 int reg_type, int complete_flag,
1825 const char *rank_type,
1826 const char *xpath_use,
1827 int num_bases, char **basenames,
1830 struct rset_key_control *kc)
1832 char term_dst[IT_MAX_WORD+1];
1833 const char *termp = termz;
1834 RSET *result_sets = 0;
1835 int num_result_sets = 0;
1837 struct grep_info grep_info;
1839 zint hits_limit_value;
1840 const char *term_ref_id_str = 0;
1842 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1844 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1845 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1849 if (alloc_sets == num_result_sets)
1852 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1855 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1856 alloc_sets = alloc_sets + add;
1859 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1860 grep_info.isam_p_indx = 0;
1861 res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1862 reg_type, complete_flag, num_bases, basenames,
1863 term_dst, xpath_use,
1865 if (res == ZEBRA_FAIL || termp == 0)
1867 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1868 result_sets[num_result_sets] =
1869 rset_trunc(zh, grep_info.isam_p_buf,
1870 grep_info.isam_p_indx, term_dst,
1871 strlen(term_dst), rank_type,
1872 0 /* preserve position */,
1873 zapt->term->which, rset_nmem,
1874 kc, kc->scope, 0, reg_type,
1877 if (!result_sets[num_result_sets])
1881 grep_info_delete(&grep_info);
1885 for (i = 0; i<num_result_sets; i++)
1886 rset_delete(result_sets[i]);
1889 if (num_result_sets == 0)
1890 *rset = rset_create_null(rset_nmem, kc, 0);
1891 if (num_result_sets == 1)
1892 *rset = result_sets[0];
1894 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1895 num_result_sets, result_sets);
1901 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1902 Z_AttributesPlusTerm *zapt,
1904 oid_value attributeSet,
1906 const char *rank_type, NMEM rset_nmem,
1908 struct rset_key_control *kc)
1913 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1914 res_get (zh->res, "setTmpDir"),0 );
1915 rsfd = rset_open(*rset, RSETF_WRITE);
1923 rset_write (rsfd, &key);
1928 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1929 oid_value attributeSet, NMEM stream,
1930 Z_SortKeySpecList *sort_sequence,
1931 const char *rank_type,
1934 struct rset_key_control *kc)
1937 int sort_relation_value;
1938 AttrType sort_relation_type;
1945 attr_init_APT(&sort_relation_type, zapt, 7);
1946 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1948 if (!sort_sequence->specs)
1950 sort_sequence->num_specs = 10;
1951 sort_sequence->specs = (Z_SortKeySpec **)
1952 nmem_malloc(stream, sort_sequence->num_specs *
1953 sizeof(*sort_sequence->specs));
1954 for (i = 0; i<sort_sequence->num_specs; i++)
1955 sort_sequence->specs[i] = 0;
1957 if (zapt->term->which != Z_Term_general)
1960 i = atoi_n ((char *) zapt->term->u.general->buf,
1961 zapt->term->u.general->len);
1962 if (i >= sort_sequence->num_specs)
1964 sprintf(termz, "%d", i);
1966 oe.proto = PROTO_Z3950;
1967 oe.oclass = CLASS_ATTSET;
1968 oe.value = attributeSet;
1969 if (!oid_ent_to_oid (&oe, oid))
1972 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1973 sks->sortElement = (Z_SortElement *)
1974 nmem_malloc(stream, sizeof(*sks->sortElement));
1975 sks->sortElement->which = Z_SortElement_generic;
1976 sk = sks->sortElement->u.generic = (Z_SortKey *)
1977 nmem_malloc(stream, sizeof(*sk));
1978 sk->which = Z_SortKey_sortAttributes;
1979 sk->u.sortAttributes = (Z_SortAttributes *)
1980 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1982 sk->u.sortAttributes->id = oid;
1983 sk->u.sortAttributes->list = zapt->attributes;
1985 sks->sortRelation = (int *)
1986 nmem_malloc(stream, sizeof(*sks->sortRelation));
1987 if (sort_relation_value == 1)
1988 *sks->sortRelation = Z_SortKeySpec_ascending;
1989 else if (sort_relation_value == 2)
1990 *sks->sortRelation = Z_SortKeySpec_descending;
1992 *sks->sortRelation = Z_SortKeySpec_ascending;
1994 sks->caseSensitivity = (int *)
1995 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1996 *sks->caseSensitivity = 0;
1998 sks->which = Z_SortKeySpec_null;
1999 sks->u.null = odr_nullval ();
2000 sort_sequence->specs[i] = sks;
2001 *rset = rset_create_null(rset_nmem, kc, 0);
2006 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2007 oid_value attributeSet,
2008 struct xpath_location_step *xpath, int max,
2011 oid_value curAttributeSet = attributeSet;
2013 const char *use_string = 0;
2015 attr_init_APT(&use, zapt, 1);
2016 attr_find_ex(&use, &curAttributeSet, &use_string);
2018 if (!use_string || *use_string != '/')
2021 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2026 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2027 int reg_type, const char *term,
2028 const char *xpath_use,
2030 struct rset_key_control *kc)
2033 struct grep_info grep_info;
2034 char term_dict[2048];
2037 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2038 zinfo_index_category_index,
2041 int ord_len, i, r, max_pos;
2042 int term_type = Z_Term_characterString;
2043 const char *flags = "void";
2045 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2046 return rset_create_null(rset_nmem, kc, 0);
2049 return rset_create_null(rset_nmem, kc, 0);
2051 term_dict[prefix_len++] = '|';
2053 term_dict[prefix_len++] = '(';
2055 ord_len = key_SU_encode (ord, ord_buf);
2056 for (i = 0; i<ord_len; i++)
2058 term_dict[prefix_len++] = 1;
2059 term_dict[prefix_len++] = ord_buf[i];
2061 term_dict[prefix_len++] = ')';
2062 strcpy(term_dict+prefix_len, term);
2064 grep_info.isam_p_indx = 0;
2065 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2066 &grep_info, &max_pos, 0, grep_handle);
2067 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2068 grep_info.isam_p_indx);
2069 rset = rset_trunc(zh, grep_info.isam_p_buf,
2070 grep_info.isam_p_indx, term, strlen(term),
2071 flags, 1, term_type,rset_nmem,
2072 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2073 0 /* term_ref_id_str */);
2074 grep_info_delete(&grep_info);
2079 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2080 int num_bases, char **basenames,
2081 NMEM stream, const char *rank_type, RSET rset,
2082 int xpath_len, struct xpath_location_step *xpath,
2085 struct rset_key_control *kc)
2089 int always_matches = rset ? 0 : 1;
2097 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2098 for (i = 0; i<xpath_len; i++)
2100 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2112 a[@attr = value]/b[@other = othervalue]
2114 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2115 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2116 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2117 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2118 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2119 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2123 dict_grep_cmap (zh->reg->dict, 0, 0);
2125 for (base_no = 0; base_no < num_bases; base_no++)
2127 int level = xpath_len;
2130 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2132 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2133 basenames[base_no]);
2137 while (--level >= 0)
2139 WRBUF xpath_rev = wrbuf_alloc();
2141 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2143 for (i = level; i >= 1; --i)
2145 const char *cp = xpath[i].part;
2151 wrbuf_puts(xpath_rev, "[^/]*");
2152 else if (*cp == ' ')
2153 wrbuf_puts(xpath_rev, "\001 ");
2155 wrbuf_putc(xpath_rev, *cp);
2157 /* wrbuf_putc does not null-terminate , but
2158 wrbuf_puts below ensures it does.. so xpath_rev
2159 is OK iff length is > 0 */
2161 wrbuf_puts(xpath_rev, "/");
2163 else if (i == 1) /* // case */
2164 wrbuf_puts(xpath_rev, ".*");
2166 if (xpath[level].predicate &&
2167 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2168 xpath[level].predicate->u.relation.name[0])
2170 WRBUF wbuf = wrbuf_alloc();
2171 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2172 if (xpath[level].predicate->u.relation.value)
2174 const char *cp = xpath[level].predicate->u.relation.value;
2175 wrbuf_putc(wbuf, '=');
2179 if (strchr(REGEX_CHARS, *cp))
2180 wrbuf_putc(wbuf, '\\');
2181 wrbuf_putc(wbuf, *cp);
2185 wrbuf_puts(wbuf, "");
2186 rset_attr = xpath_trunc(
2187 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2189 wrbuf_free(wbuf, 1);
2195 wrbuf_free(xpath_rev, 1);
2199 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2200 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2201 if (wrbuf_len(xpath_rev))
2203 rset_start_tag = xpath_trunc(zh, stream, '0',
2204 wrbuf_buf(xpath_rev),
2205 ZEBRA_XPATH_ELM_BEGIN,
2208 rset = rset_start_tag;
2211 rset_end_tag = xpath_trunc(zh, stream, '0',
2212 wrbuf_buf(xpath_rev),
2213 ZEBRA_XPATH_ELM_END,
2216 rset = rset_create_between(rset_nmem, kc, kc->scope,
2217 rset_start_tag, rset,
2218 rset_end_tag, rset_attr);
2221 wrbuf_free(xpath_rev, 1);
2229 #define MAX_XPATH_STEPS 10
2231 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2232 oid_value attributeSet, NMEM stream,
2233 Z_SortKeySpecList *sort_sequence,
2234 int num_bases, char **basenames,
2237 struct rset_key_control *kc)
2239 ZEBRA_RES res = ZEBRA_OK;
2241 char *search_type = NULL;
2242 char rank_type[128];
2245 char termz[IT_MAX_WORD+1];
2247 const char *xpath_use = 0;
2248 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2252 log_level_rpn = yaz_log_module_level("rpn");
2255 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2256 rank_type, &complete_flag, &sort_flag);
2258 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2259 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2260 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2261 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2263 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2267 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2268 rank_type, rset_nmem, rset, kc);
2269 /* consider if an X-Path query is used */
2270 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2271 xpath, MAX_XPATH_STEPS, stream);
2274 if (xpath[xpath_len-1].part[0] == '@')
2275 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2277 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2280 /* search using one of the various search type strategies
2281 termz is our UTF-8 search term
2282 attributeSet is top-level default attribute set
2283 stream is ODR for search
2284 reg_id is the register type
2285 complete_flag is 1 for complete subfield, 0 for incomplete
2286 xpath_use is use-attribute to be used for X-Path search, 0 for none
2288 if (!strcmp(search_type, "phrase"))
2290 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2291 reg_id, complete_flag, rank_type,
2293 num_bases, basenames, rset_nmem,
2296 else if (!strcmp(search_type, "and-list"))
2298 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2299 reg_id, complete_flag, rank_type,
2301 num_bases, basenames, rset_nmem,
2304 else if (!strcmp(search_type, "or-list"))
2306 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2307 reg_id, complete_flag, rank_type,
2309 num_bases, basenames, rset_nmem,
2312 else if (!strcmp(search_type, "local"))
2314 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2315 rank_type, rset_nmem, rset, kc);
2317 else if (!strcmp(search_type, "numeric"))
2319 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2320 reg_id, complete_flag, rank_type,
2322 num_bases, basenames, rset_nmem,
2325 else if (!strcmp(search_type, "always"))
2327 if (xpath_len >= 0) /* alwaysmatches and X-Path ? */
2329 *rset = 0; /* signal no "term" set */
2330 return rpn_search_xpath(zh, num_bases, basenames,
2331 stream, rank_type, *rset,
2332 xpath_len, xpath, rset_nmem, rset, kc);
2336 res = rpn_search_APT_alwaysmatches(zh, zapt, termz,
2337 attributeSet, stream,
2338 reg_id, complete_flag,
2341 num_bases, basenames, rset_nmem,
2347 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2350 if (res != ZEBRA_OK)
2354 return rpn_search_xpath(zh, num_bases, basenames,
2355 stream, rank_type, *rset,
2356 xpath_len, xpath, rset_nmem, rset, kc);
2359 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2360 oid_value attributeSet,
2361 NMEM stream, NMEM rset_nmem,
2362 Z_SortKeySpecList *sort_sequence,
2363 int num_bases, char **basenames,
2364 RSET **result_sets, int *num_result_sets,
2365 Z_Operator *parent_op,
2366 struct rset_key_control *kc);
2368 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2369 oid_value attributeSet,
2370 NMEM stream, NMEM rset_nmem,
2371 Z_SortKeySpecList *sort_sequence,
2372 int num_bases, char **basenames,
2375 RSET *result_sets = 0;
2376 int num_result_sets = 0;
2378 struct rset_key_control *kc = zebra_key_control_create(zh);
2380 res = rpn_search_structure(zh, zs, attributeSet,
2383 num_bases, basenames,
2384 &result_sets, &num_result_sets,
2385 0 /* no parent op */,
2387 if (res != ZEBRA_OK)
2390 for (i = 0; i<num_result_sets; i++)
2391 rset_delete(result_sets[i]);
2396 assert(num_result_sets == 1);
2397 assert(result_sets);
2398 assert(*result_sets);
2399 *result_set = *result_sets;
2405 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2406 oid_value attributeSet,
2407 NMEM stream, NMEM rset_nmem,
2408 Z_SortKeySpecList *sort_sequence,
2409 int num_bases, char **basenames,
2410 RSET **result_sets, int *num_result_sets,
2411 Z_Operator *parent_op,
2412 struct rset_key_control *kc)
2414 *num_result_sets = 0;
2415 if (zs->which == Z_RPNStructure_complex)
2418 Z_Operator *zop = zs->u.complex->roperator;
2419 RSET *result_sets_l = 0;
2420 int num_result_sets_l = 0;
2421 RSET *result_sets_r = 0;
2422 int num_result_sets_r = 0;
2424 res = rpn_search_structure(zh, zs->u.complex->s1,
2425 attributeSet, stream, rset_nmem,
2427 num_bases, basenames,
2428 &result_sets_l, &num_result_sets_l,
2430 if (res != ZEBRA_OK)
2433 for (i = 0; i<num_result_sets_l; i++)
2434 rset_delete(result_sets_l[i]);
2437 res = rpn_search_structure(zh, zs->u.complex->s2,
2438 attributeSet, stream, rset_nmem,
2440 num_bases, basenames,
2441 &result_sets_r, &num_result_sets_r,
2443 if (res != ZEBRA_OK)
2446 for (i = 0; i<num_result_sets_l; i++)
2447 rset_delete(result_sets_l[i]);
2448 for (i = 0; i<num_result_sets_r; i++)
2449 rset_delete(result_sets_r[i]);
2453 /* make a new list of result for all children */
2454 *num_result_sets = num_result_sets_l + num_result_sets_r;
2455 *result_sets = nmem_malloc(stream, *num_result_sets *
2456 sizeof(**result_sets));
2457 memcpy(*result_sets, result_sets_l,
2458 num_result_sets_l * sizeof(**result_sets));
2459 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2460 num_result_sets_r * sizeof(**result_sets));
2462 if (!parent_op || parent_op->which != zop->which
2463 || (zop->which != Z_Operator_and &&
2464 zop->which != Z_Operator_or))
2466 /* parent node different from this one (or non-present) */
2467 /* we must combine result sets now */
2471 case Z_Operator_and:
2472 rset = rset_create_and(rset_nmem, kc,
2474 *num_result_sets, *result_sets);
2477 rset = rset_create_or(rset_nmem, kc,
2478 kc->scope, 0, /* termid */
2479 *num_result_sets, *result_sets);
2481 case Z_Operator_and_not:
2482 rset = rset_create_not(rset_nmem, kc,
2487 case Z_Operator_prox:
2488 if (zop->u.prox->which != Z_ProximityOperator_known)
2491 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2495 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2497 zebra_setError_zint(zh,
2498 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2499 *zop->u.prox->u.known);
2504 rset = rset_create_prox(rset_nmem, kc,
2506 *num_result_sets, *result_sets,
2507 *zop->u.prox->ordered,
2508 (!zop->u.prox->exclusion ?
2509 0 : *zop->u.prox->exclusion),
2510 *zop->u.prox->relationType,
2511 *zop->u.prox->distance );
2515 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2518 *num_result_sets = 1;
2519 *result_sets = nmem_malloc(stream, *num_result_sets *
2520 sizeof(**result_sets));
2521 (*result_sets)[0] = rset;
2524 else if (zs->which == Z_RPNStructure_simple)
2529 if (zs->u.simple->which == Z_Operand_APT)
2531 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2532 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2533 attributeSet, stream, sort_sequence,
2534 num_bases, basenames, rset_nmem, &rset,
2536 if (res != ZEBRA_OK)
2539 else if (zs->u.simple->which == Z_Operand_resultSetId)
2541 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2542 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2546 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2547 zs->u.simple->u.resultSetId);
2554 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2557 *num_result_sets = 1;
2558 *result_sets = nmem_malloc(stream, *num_result_sets *
2559 sizeof(**result_sets));
2560 (*result_sets)[0] = rset;
2564 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2570 struct scan_info_entry {
2576 struct scan_info_entry *list;
2582 static int scan_handle (char *name, const char *info, int pos, void *client)
2584 int len_prefix, idx;
2585 struct scan_info *scan_info = (struct scan_info *) client;
2587 len_prefix = strlen(scan_info->prefix);
2588 if (memcmp (name, scan_info->prefix, len_prefix))
2591 idx = scan_info->after - pos + scan_info->before;
2597 scan_info->list[idx].term = (char *)
2598 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2599 strcpy(scan_info->list[idx].term, name + len_prefix);
2600 assert (*info == sizeof(ISAM_P));
2601 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2605 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2606 char **dst, const char *src)
2608 char term_src[IT_MAX_WORD];
2609 char term_dst[IT_MAX_WORD];
2611 zebra_term_untrans (zh, reg_type, term_src, src);
2613 if (zh->iconv_from_utf8 != 0)
2616 char *inbuf = term_src;
2617 size_t inleft = strlen(term_src);
2618 char *outbuf = term_dst;
2619 size_t outleft = sizeof(term_dst)-1;
2622 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2624 if (ret == (size_t)(-1))
2627 len = outbuf - term_dst;
2628 *dst = nmem_malloc(stream, len + 1);
2630 memcpy (*dst, term_dst, len);
2634 *dst = nmem_strdup(stream, term_src);
2637 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2643 yaz_log(YLOG_DEBUG, "count_set");
2645 rset->hits_limit = zh->approx_limit;
2648 rfd = rset_open(rset, RSETF_READ);
2649 while (rset_read(rfd, &key,0 /* never mind terms */))
2651 if (key.mem[0] != psysno)
2653 psysno = key.mem[0];
2654 if (rfd->counted_items >= rset->hits_limit)
2659 *count = rset->hits_count;
2662 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2663 oid_value attributeset,
2664 int num_bases, char **basenames,
2665 int *position, int *num_entries, ZebraScanEntry **list,
2666 int *is_partial, RSET limit_set, int return_zero)
2669 int pos = *position;
2670 int num = *num_entries;
2674 char termz[IT_MAX_WORD+20];
2675 struct scan_info *scan_info_array;
2676 ZebraScanEntry *glist;
2677 int ords[32], ord_no = 0;
2680 int bases_ok = 0; /* no of databases with OK attribute */
2681 int errCode = 0; /* err code (if any is not OK) */
2682 char *errString = 0; /* addinfo */
2684 unsigned index_type;
2685 char *search_type = NULL;
2686 char rank_type[128];
2689 NMEM rset_nmem = NULL;
2690 struct rset_key_control *kc = 0;
2695 if (attributeset == VAL_NONE)
2696 attributeset = VAL_BIB1;
2701 int termset_value_numeric;
2702 const char *termset_value_string;
2703 attr_init_APT(&termset, zapt, 8);
2704 termset_value_numeric =
2705 attr_find_ex(&termset, NULL, &termset_value_string);
2706 if (termset_value_numeric != -1)
2709 const char *termset_name = 0;
2711 if (termset_value_numeric != -2)
2714 sprintf(resname, "%d", termset_value_numeric);
2715 termset_name = resname;
2718 termset_name = termset_value_string;
2720 limit_set = resultSetRef (zh, termset_name);
2724 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2725 pos, num, attributeset);
2727 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2728 rank_type, &complete_flag, &sort_flag))
2731 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2734 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2738 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2740 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2741 basenames[base_no]);
2746 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
2747 index_type, 0, attributeset, &ord)
2752 ords[ord_no++] = ord;
2754 if (!bases_ok && errCode)
2756 zebra_setError(zh, errCode, errString);
2765 /* prepare dictionary scanning */
2777 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2778 "after=%d before+after=%d",
2779 pos, num, before, after, before+after);
2780 scan_info_array = (struct scan_info *)
2781 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2782 for (i = 0; i < ord_no; i++)
2784 int j, prefix_len = 0;
2785 int before_tmp = before, after_tmp = after;
2786 struct scan_info *scan_info = scan_info_array + i;
2787 struct rpn_char_map_info rcmi;
2789 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2791 scan_info->before = before;
2792 scan_info->after = after;
2793 scan_info->odr = stream;
2795 scan_info->list = (struct scan_info_entry *)
2796 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2797 for (j = 0; j<before+after; j++)
2798 scan_info->list[j].term = NULL;
2800 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2801 termz[prefix_len] = 0;
2802 strcpy(scan_info->prefix, termz);
2804 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2808 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2809 scan_info, scan_handle);
2811 glist = (ZebraScanEntry *)
2812 odr_malloc(stream, (before+after)*sizeof(*glist));
2814 rset_nmem = nmem_create();
2815 kc = zebra_key_control_create(zh);
2817 /* consider terms after main term */
2818 for (i = 0; i < ord_no; i++)
2822 for (i = 0; i<after; i++)
2825 const char *mterm = NULL;
2828 int lo = i + pos-1; /* offset in result list */
2830 /* find: j0 is the first of the minimal values */
2831 for (j = 0; j < ord_no; j++)
2833 if (ptr[j] < before+after && ptr[j] >= 0 &&
2834 (tst = scan_info_array[j].list[ptr[j]].term) &&
2835 (!mterm || strcmp (tst, mterm) < 0))
2842 break; /* no value found, stop */
2844 /* get result set for first one , but only if it's within bounds */
2847 /* get result set for first term */
2848 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2849 &glist[lo].term, mterm);
2850 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2851 glist[lo].term, strlen(glist[lo].term),
2852 NULL, 0, zapt->term->which, rset_nmem,
2853 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2854 0 /* term_ref_id_str */);
2856 ptr[j0]++; /* move index for this set .. */
2857 /* get result set for remaining scan terms */
2858 for (j = j0+1; j<ord_no; j++)
2860 if (ptr[j] < before+after && ptr[j] >= 0 &&
2861 (tst = scan_info_array[j].list[ptr[j]].term) &&
2862 !strcmp (tst, mterm))
2871 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2873 strlen(glist[lo].term), NULL, 0,
2874 zapt->term->which,rset_nmem,
2875 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2876 0 /* term_ref_id_str */ );
2877 rset = rset_create_or(rset_nmem, kc,
2878 kc->scope, 0 /* termid */,
2887 /* merge with limit_set if given */
2892 rsets[1] = rset_dup(limit_set);
2894 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2897 count_set(zh, rset, &count);
2898 glist[lo].occurrences = count;
2904 *num_entries -= (after-i);
2906 if (*num_entries < 0)
2909 nmem_destroy(rset_nmem);
2914 /* consider terms before main term */
2915 for (i = 0; i<ord_no; i++)
2918 for (i = 0; i<before; i++)
2921 const char *mterm = NULL;
2924 int lo = before-1-i; /* offset in result list */
2927 for (j = 0; j <ord_no; j++)
2929 if (ptr[j] < before && ptr[j] >= 0 &&
2930 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2931 (!mterm || strcmp (tst, mterm) > 0))
2940 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2941 &glist[lo].term, mterm);
2944 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2945 glist[lo].term, strlen(glist[lo].term),
2946 NULL, 0, zapt->term->which, rset_nmem,
2947 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2948 0 /* term_ref_id_str */);
2952 for (j = j0+1; j<ord_no; j++)
2954 if (ptr[j] < before && ptr[j] >= 0 &&
2955 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2956 !strcmp (tst, mterm))
2961 rsets[1] = rset_trunc(
2963 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2965 strlen(glist[lo].term), NULL, 0,
2966 zapt->term->which, rset_nmem,
2967 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2968 0 /* term_ref_id_str */);
2969 rset = rset_create_or(rset_nmem, kc,
2970 kc->scope, 0 /* termid */, 2, rsets);
2979 rsets[1] = rset_dup(limit_set);
2981 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2983 count_set(zh, rset, &count);
2984 glist[lo].occurrences = count;
2988 nmem_destroy(rset_nmem);
2995 if (*num_entries <= 0)
3002 *list = glist + i; /* list is set to first 'real' entry */
3004 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3005 *position, *num_entries);
3012 * indent-tabs-mode: nil
3014 * vim: shiftwidth=4 tabstop=8 expandtab