1 /* $Id: zrpn.c,v 1.223 2006-07-03 10:52:48 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 #define TERMSET_DISABLE 1
51 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
53 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
54 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
58 const char *outp = *out;
59 yaz_log(YLOG_LOG, "---");
62 yaz_log(YLOG_LOG, "%02X", *outp);
70 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
71 struct rpn_char_map_info *map_info)
73 map_info->zm = reg->zebra_maps;
74 map_info->reg_type = reg_type;
75 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
92 void zebra_term_untrans(ZebraHandle zh, int reg_type,
93 char *dst, const char *src)
98 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
102 if (len < IT_MAX_WORD-1)
107 while (*cp && len < IT_MAX_WORD-1)
113 static void add_isam_p(const char *name, const char *info,
118 log_level_rpn = yaz_log_module_level("rpn");
121 if (p->isam_p_indx == p->isam_p_size)
123 ISAM_P *new_isam_p_buf;
127 p->isam_p_size = 2*p->isam_p_size + 100;
128 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
132 memcpy(new_isam_p_buf, p->isam_p_buf,
133 p->isam_p_indx * sizeof(*p->isam_p_buf));
134 xfree(p->isam_p_buf);
136 p->isam_p_buf = new_isam_p_buf;
139 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
142 memcpy(new_term_no, p->isam_p_buf,
143 p->isam_p_indx * sizeof(*p->term_no));
146 p->term_no = new_term_no;
149 assert(*info == sizeof(*p->isam_p_buf));
150 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
155 char term_tmp[IT_MAX_WORD];
157 const char *index_name;
158 int len = key_SU_decode (&ord, (const unsigned char *) name);
160 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
161 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
162 zebraExplain_lookup_ord(p->zh->reg->zei,
163 ord, 0 /* index_type */, &db, &index_name);
164 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
166 resultSetAddTerm(p->zh, p->termset, name[len], db,
167 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
889 yaz_log(log_level_rpn, "Relation =");
890 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
891 term_component, space_split, term_dst))
893 strcat(term_tmp, "(");
894 strcat(term_tmp, term_component);
895 strcat(term_tmp, ")");
898 yaz_log(log_level_rpn, "Relation always matches");
899 /* skip to end of term (we don't care what it is) */
900 while (**term_sub != '\0')
904 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
910 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
911 const char **term_sub,
912 oid_value attributeSet, NMEM stream,
913 struct grep_info *grep_info,
914 int reg_type, int complete_flag,
915 int num_bases, char **basenames,
917 const char *xpath_use,
918 struct ord_list **ol);
920 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
921 Z_AttributesPlusTerm *zapt,
922 zint *hits_limit_value,
923 const char **term_ref_id_str,
926 AttrType term_ref_id_attr;
927 AttrType hits_limit_attr;
930 attr_init_APT(&hits_limit_attr, zapt, 9);
931 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
933 attr_init_APT(&term_ref_id_attr, zapt, 10);
934 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
935 if (term_ref_id_int >= 0)
937 char *res = nmem_malloc(nmem, 20);
938 sprintf(res, "%d", term_ref_id_int);
939 *term_ref_id_str = res;
942 /* no limit given ? */
943 if (*hits_limit_value == -1)
945 if (*term_ref_id_str)
947 /* use global if term_ref is present */
948 *hits_limit_value = zh->approx_limit;
952 /* no counting if term_ref is not present */
953 *hits_limit_value = 0;
956 else if (*hits_limit_value == 0)
958 /* 0 is the same as global limit */
959 *hits_limit_value = zh->approx_limit;
961 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
962 *term_ref_id_str ? *term_ref_id_str : "none",
967 static ZEBRA_RES term_trunc(ZebraHandle zh,
968 Z_AttributesPlusTerm *zapt,
969 const char **term_sub,
970 oid_value attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 int reg_type, int complete_flag,
973 int num_bases, char **basenames,
975 const char *rank_type,
976 const char *xpath_use,
979 struct rset_key_control *kc)
983 zint hits_limit_value;
984 const char *term_ref_id_str = 0;
987 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
988 grep_info->isam_p_indx = 0;
989 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
990 reg_type, complete_flag, num_bases, basenames,
991 term_dst, xpath_use, &ol);
994 if (!*term_sub) /* no more terms ? */
996 yaz_log(log_level_rpn, "term: %s", term_dst);
997 *rset = rset_trunc(zh, grep_info->isam_p_buf,
998 grep_info->isam_p_indx, term_dst,
999 strlen(term_dst), rank_type, 1 /* preserve pos */,
1000 zapt->term->which, rset_nmem,
1001 kc, kc->scope, ol, reg_type, hits_limit_value,
1008 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1009 const char **term_sub,
1010 oid_value attributeSet, NMEM stream,
1011 struct grep_info *grep_info,
1012 int reg_type, int complete_flag,
1013 int num_bases, char **basenames,
1015 const char *xpath_use,
1016 struct ord_list **ol)
1018 char term_dict[2*IT_MAX_WORD+4000];
1020 AttrType truncation;
1021 int truncation_value;
1023 struct rpn_char_map_info rcmi;
1024 int space_split = complete_flag ? 0 : 1;
1026 int bases_ok = 0; /* no of databases with OK attribute */
1028 *ol = ord_list_create(stream);
1030 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1031 attr_init_APT(&truncation, zapt, 5);
1032 truncation_value = attr_find(&truncation, NULL);
1033 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1035 for (base_no = 0; base_no < num_bases; base_no++)
1038 int regex_range = 0;
1039 int max_pos, prefix_len = 0;
1044 termp = *term_sub; /* start of term for each database */
1046 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1048 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049 basenames[base_no]);
1053 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054 attributeSet, &ord) != ZEBRA_OK)
1059 *ol = ord_list_append(stream, *ol, ord);
1060 ord_len = key_SU_encode (ord, ord_buf);
1062 term_dict[prefix_len++] = '(';
1063 for (i = 0; i<ord_len; i++)
1065 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1066 term_dict[prefix_len++] = ord_buf[i];
1068 term_dict[prefix_len++] = ')';
1069 term_dict[prefix_len] = '\0';
1071 switch (truncation_value)
1073 case -1: /* not specified */
1074 case 100: /* do not truncate */
1075 if (!string_relation(zh, zapt, &termp, term_dict,
1077 reg_type, space_split, term_dst,
1082 zebra_setError(zh, relation_error, 0);
1089 case 1: /* right truncation */
1090 term_dict[j++] = '(';
1091 if (!term_100(zh->reg->zebra_maps, reg_type,
1092 &termp, term_dict + j, space_split, term_dst))
1097 strcat(term_dict, ".*)");
1099 case 2: /* keft truncation */
1100 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1101 if (!term_100(zh->reg->zebra_maps, reg_type,
1102 &termp, term_dict + j, space_split, term_dst))
1107 strcat(term_dict, ")");
1109 case 3: /* left&right truncation */
1110 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1111 if (!term_100(zh->reg->zebra_maps, reg_type,
1112 &termp, term_dict + j, space_split, term_dst))
1117 strcat(term_dict, ".*)");
1119 case 101: /* process # in term */
1120 term_dict[j++] = '(';
1121 if (!term_101(zh->reg->zebra_maps, reg_type,
1122 &termp, term_dict + j, space_split, term_dst))
1127 strcat(term_dict, ")");
1129 case 102: /* Regexp-1 */
1130 term_dict[j++] = '(';
1131 if (!term_102(zh->reg->zebra_maps, reg_type,
1132 &termp, term_dict + j, space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 103: /* Regexp-2 */
1141 term_dict[j++] = '(';
1142 if (!term_103(zh->reg->zebra_maps, reg_type,
1143 &termp, term_dict + j, ®ex_range,
1144 space_split, term_dst))
1149 strcat(term_dict, ")");
1151 case 104: /* process # and ! in term */
1152 term_dict[j++] = '(';
1153 if (!term_104(zh->reg->zebra_maps, reg_type,
1154 &termp, term_dict + j, space_split, term_dst))
1159 strcat(term_dict, ")");
1161 case 105: /* process * and ! in term */
1162 term_dict[j++] = '(';
1163 if (!term_105(zh->reg->zebra_maps, reg_type,
1164 &termp, term_dict + j, space_split, term_dst, 1))
1169 strcat(term_dict, ")");
1171 case 106: /* process * and ! in term */
1172 term_dict[j++] = '(';
1173 if (!term_105(zh->reg->zebra_maps, reg_type,
1174 &termp, term_dict + j, space_split, term_dst, 0))
1179 strcat(term_dict, ")");
1182 zebra_setError_zint(zh,
1183 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1190 const char *input = term_dict + prefix_len;
1191 esc_str(buf, sizeof(buf), input, strlen(input));
1193 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1194 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1195 grep_info, &max_pos,
1196 ord_len /* number of "exact" chars */,
1199 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1204 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1209 /* convert APT search term to UTF8 */
1210 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1214 Z_Term *term = zapt->term;
1216 switch (term->which)
1218 case Z_Term_general:
1219 if (zh->iconv_to_utf8 != 0)
1221 char *inbuf = (char *) term->u.general->buf;
1222 size_t inleft = term->u.general->len;
1223 char *outbuf = termz;
1224 size_t outleft = IT_MAX_WORD-1;
1227 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1229 if (ret == (size_t)(-1))
1231 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1234 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1242 sizez = term->u.general->len;
1243 if (sizez > IT_MAX_WORD-1)
1244 sizez = IT_MAX_WORD-1;
1245 memcpy (termz, term->u.general->buf, sizez);
1246 termz[sizez] = '\0';
1249 case Z_Term_characterString:
1250 sizez = strlen(term->u.characterString);
1251 if (sizez > IT_MAX_WORD-1)
1252 sizez = IT_MAX_WORD-1;
1253 memcpy (termz, term->u.characterString, sizez);
1254 termz[sizez] = '\0';
1257 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1263 /* convert APT SCAN term to internal cmap */
1264 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1265 char *termz, int reg_type)
1267 char termz0[IT_MAX_WORD];
1269 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1270 return ZEBRA_FAIL; /* error */
1274 const char *cp = (const char *) termz0;
1275 const char *cp_end = cp + strlen(cp);
1278 const char *space_map = NULL;
1281 while ((len = (cp_end - cp)) > 0)
1283 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1284 if (**map == *CHR_SPACE)
1289 for (src = space_map; *src; src++)
1292 for (src = *map; *src; src++)
1301 static void grep_info_delete(struct grep_info *grep_info)
1304 xfree(grep_info->term_no);
1306 xfree(grep_info->isam_p_buf);
1309 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1310 Z_AttributesPlusTerm *zapt,
1311 struct grep_info *grep_info,
1315 int termset_value_numeric;
1316 const char *termset_value_string;
1319 grep_info->term_no = 0;
1321 grep_info->isam_p_size = 0;
1322 grep_info->isam_p_buf = NULL;
1324 grep_info->reg_type = reg_type;
1325 grep_info->termset = 0;
1328 attr_init_APT(&termset, zapt, 8);
1329 termset_value_numeric =
1330 attr_find_ex(&termset, NULL, &termset_value_string);
1331 if (termset_value_numeric != -1)
1334 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1338 const char *termset_name = 0;
1339 if (termset_value_numeric != -2)
1342 sprintf(resname, "%d", termset_value_numeric);
1343 termset_name = resname;
1346 termset_name = termset_value_string;
1347 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1348 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1349 if (!grep_info->termset)
1351 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1360 \brief Create result set(s) for list of terms
1361 \param zh Zebra Handle
1362 \param termz term as used in query but converted to UTF-8
1363 \param attributeSet default attribute set
1364 \param stream memory for result
1365 \param reg_type register type ('w', 'p',..)
1366 \param complete_flag whether it's phrases or not
1367 \param rank_type term flags for ranking
1368 \param xpath_use use attribute for X-Path (-1 for no X-path)
1369 \param num_bases number of databases
1370 \param basenames array of databases
1371 \param rset_mem memory for result sets
1372 \param result_sets output result set for each term in list (output)
1373 \param number number of output result sets
1374 \param kc rset key control to be used for created result sets
1376 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1377 Z_AttributesPlusTerm *zapt,
1379 oid_value attributeSet,
1381 int reg_type, int complete_flag,
1382 const char *rank_type,
1383 const char *xpath_use,
1384 int num_bases, char **basenames,
1386 RSET **result_sets, int *num_result_sets,
1387 struct rset_key_control *kc)
1389 char term_dst[IT_MAX_WORD+1];
1390 struct grep_info grep_info;
1391 const char *termp = termz;
1394 *num_result_sets = 0;
1396 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1402 if (alloc_sets == *num_result_sets)
1405 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1408 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1409 alloc_sets = alloc_sets + add;
1410 *result_sets = rnew;
1412 res = term_trunc(zh, zapt, &termp, attributeSet,
1414 reg_type, complete_flag,
1415 num_bases, basenames,
1416 term_dst, rank_type,
1417 xpath_use, rset_nmem,
1418 &(*result_sets)[*num_result_sets],
1420 if (res != ZEBRA_OK)
1423 for (i = 0; i < *num_result_sets; i++)
1424 rset_delete((*result_sets)[i]);
1425 grep_info_delete (&grep_info);
1428 if ((*result_sets)[*num_result_sets] == 0)
1430 (*num_result_sets)++;
1435 grep_info_delete(&grep_info);
1439 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1440 Z_AttributesPlusTerm *zapt,
1441 const char *termz_org,
1442 oid_value attributeSet,
1444 int reg_type, int complete_flag,
1445 const char *rank_type,
1446 const char *xpath_use,
1447 int num_bases, char **basenames,
1450 struct rset_key_control *kc)
1452 RSET *result_sets = 0;
1453 int num_result_sets = 0;
1455 term_list_trunc(zh, zapt, termz_org, attributeSet,
1456 stream, reg_type, complete_flag,
1457 rank_type, xpath_use,
1458 num_bases, basenames,
1460 &result_sets, &num_result_sets, kc);
1461 if (res != ZEBRA_OK)
1463 if (num_result_sets == 0)
1464 *rset = rset_create_null(rset_nmem, kc, 0);
1465 else if (num_result_sets == 1)
1466 *rset = result_sets[0];
1468 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1469 num_result_sets, result_sets,
1470 1 /* ordered */, 0 /* exclusion */,
1471 3 /* relation */, 1 /* distance */);
1477 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1478 Z_AttributesPlusTerm *zapt,
1479 const char *termz_org,
1480 oid_value attributeSet,
1482 int reg_type, int complete_flag,
1483 const char *rank_type,
1484 const char *xpath_use,
1485 int num_bases, char **basenames,
1488 struct rset_key_control *kc)
1490 RSET *result_sets = 0;
1491 int num_result_sets = 0;
1493 term_list_trunc(zh, zapt, termz_org, attributeSet,
1494 stream, reg_type, complete_flag,
1495 rank_type, xpath_use,
1496 num_bases, basenames,
1498 &result_sets, &num_result_sets, kc);
1499 if (res != ZEBRA_OK)
1501 if (num_result_sets == 0)
1502 *rset = rset_create_null(rset_nmem, kc, 0);
1503 else if (num_result_sets == 1)
1504 *rset = result_sets[0];
1506 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1507 num_result_sets, result_sets);
1513 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1514 Z_AttributesPlusTerm *zapt,
1515 const char *termz_org,
1516 oid_value attributeSet,
1518 int reg_type, int complete_flag,
1519 const char *rank_type,
1520 const char *xpath_use,
1521 int num_bases, char **basenames,
1524 struct rset_key_control *kc)
1526 RSET *result_sets = 0;
1527 int num_result_sets = 0;
1529 term_list_trunc(zh, zapt, termz_org, attributeSet,
1530 stream, reg_type, complete_flag,
1531 rank_type, xpath_use,
1532 num_bases, basenames,
1534 &result_sets, &num_result_sets,
1536 if (res != ZEBRA_OK)
1538 if (num_result_sets == 0)
1539 *rset = rset_create_null(rset_nmem, kc, 0);
1540 else if (num_result_sets == 1)
1541 *rset = result_sets[0];
1543 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1544 num_result_sets, result_sets);
1550 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1551 const char **term_sub,
1553 oid_value attributeSet,
1554 struct grep_info *grep_info,
1564 char *term_tmp = term_dict + strlen(term_dict);
1567 attr_init_APT(&relation, zapt, 2);
1568 relation_value = attr_find(&relation, NULL);
1570 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1572 switch (relation_value)
1575 yaz_log(log_level_rpn, "Relation <");
1576 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1579 term_value = atoi (term_tmp);
1580 gen_regular_rel(term_tmp, term_value-1, 1);
1583 yaz_log(log_level_rpn, "Relation <=");
1584 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1587 term_value = atoi (term_tmp);
1588 gen_regular_rel(term_tmp, term_value, 1);
1591 yaz_log(log_level_rpn, "Relation >=");
1592 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1595 term_value = atoi (term_tmp);
1596 gen_regular_rel(term_tmp, term_value, 0);
1599 yaz_log(log_level_rpn, "Relation >");
1600 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1603 term_value = atoi (term_tmp);
1604 gen_regular_rel(term_tmp, term_value+1, 0);
1608 yaz_log(log_level_rpn, "Relation =");
1609 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1612 term_value = atoi (term_tmp);
1613 sprintf(term_tmp, "(0*%d)", term_value);
1616 /* term_tmp untouched.. */
1617 while (**term_sub != '\0')
1621 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1624 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1625 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1628 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1629 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1633 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1634 const char **term_sub,
1635 oid_value attributeSet, NMEM stream,
1636 struct grep_info *grep_info,
1637 int reg_type, int complete_flag,
1638 int num_bases, char **basenames,
1640 const char *xpath_use,
1641 struct ord_list **ol)
1643 char term_dict[2*IT_MAX_WORD+2];
1646 struct rpn_char_map_info rcmi;
1648 int bases_ok = 0; /* no of databases with OK attribute */
1650 *ol = ord_list_create(stream);
1652 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1654 for (base_no = 0; base_no < num_bases; base_no++)
1656 int max_pos, prefix_len = 0;
1657 int relation_error = 0;
1658 int ord, ord_len, i;
1663 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1665 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1666 basenames[base_no]);
1670 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1671 attributeSet, &ord) != ZEBRA_OK)
1675 *ol = ord_list_append(stream, *ol, ord);
1677 ord_len = key_SU_encode (ord, ord_buf);
1679 term_dict[prefix_len++] = '(';
1680 for (i = 0; i < ord_len; i++)
1682 term_dict[prefix_len++] = 1;
1683 term_dict[prefix_len++] = ord_buf[i];
1685 term_dict[prefix_len++] = ')';
1686 term_dict[prefix_len] = '\0';
1688 if (!numeric_relation(zh, zapt, &termp, term_dict,
1689 attributeSet, grep_info, &max_pos, reg_type,
1690 term_dst, &relation_error))
1694 zebra_setError(zh, relation_error, 0);
1704 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1709 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1710 Z_AttributesPlusTerm *zapt,
1712 oid_value attributeSet,
1714 int reg_type, int complete_flag,
1715 const char *rank_type,
1716 const char *xpath_use,
1717 int num_bases, char **basenames,
1720 struct rset_key_control *kc)
1722 char term_dst[IT_MAX_WORD+1];
1723 const char *termp = termz;
1724 RSET *result_sets = 0;
1725 int num_result_sets = 0;
1727 struct grep_info grep_info;
1729 zint hits_limit_value;
1730 const char *term_ref_id_str = 0;
1732 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1734 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1735 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1739 struct ord_list *ol;
1740 if (alloc_sets == num_result_sets)
1743 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1746 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1747 alloc_sets = alloc_sets + add;
1750 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1751 grep_info.isam_p_indx = 0;
1752 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1753 reg_type, complete_flag, num_bases, basenames,
1754 term_dst, xpath_use, &ol);
1755 if (res == ZEBRA_FAIL || termp == 0)
1757 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1758 result_sets[num_result_sets] =
1759 rset_trunc(zh, grep_info.isam_p_buf,
1760 grep_info.isam_p_indx, term_dst,
1761 strlen(term_dst), rank_type,
1762 0 /* preserve position */,
1763 zapt->term->which, rset_nmem,
1764 kc, kc->scope, ol, reg_type,
1767 if (!result_sets[num_result_sets])
1773 grep_info_delete(&grep_info);
1775 if (res != ZEBRA_OK)
1777 if (num_result_sets == 0)
1778 *rset = rset_create_null(rset_nmem, kc, 0);
1779 else if (num_result_sets == 1)
1780 *rset = result_sets[0];
1782 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1783 num_result_sets, result_sets);
1789 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1790 Z_AttributesPlusTerm *zapt,
1792 oid_value attributeSet,
1794 const char *rank_type, NMEM rset_nmem,
1796 struct rset_key_control *kc)
1801 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1802 res_get (zh->res, "setTmpDir"),0 );
1803 rsfd = rset_open(*rset, RSETF_WRITE);
1811 rset_write (rsfd, &key);
1816 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1817 oid_value attributeSet, NMEM stream,
1818 Z_SortKeySpecList *sort_sequence,
1819 const char *rank_type,
1822 struct rset_key_control *kc)
1825 int sort_relation_value;
1826 AttrType sort_relation_type;
1833 attr_init_APT(&sort_relation_type, zapt, 7);
1834 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1836 if (!sort_sequence->specs)
1838 sort_sequence->num_specs = 10;
1839 sort_sequence->specs = (Z_SortKeySpec **)
1840 nmem_malloc(stream, sort_sequence->num_specs *
1841 sizeof(*sort_sequence->specs));
1842 for (i = 0; i<sort_sequence->num_specs; i++)
1843 sort_sequence->specs[i] = 0;
1845 if (zapt->term->which != Z_Term_general)
1848 i = atoi_n ((char *) zapt->term->u.general->buf,
1849 zapt->term->u.general->len);
1850 if (i >= sort_sequence->num_specs)
1852 sprintf(termz, "%d", i);
1854 oe.proto = PROTO_Z3950;
1855 oe.oclass = CLASS_ATTSET;
1856 oe.value = attributeSet;
1857 if (!oid_ent_to_oid (&oe, oid))
1860 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1861 sks->sortElement = (Z_SortElement *)
1862 nmem_malloc(stream, sizeof(*sks->sortElement));
1863 sks->sortElement->which = Z_SortElement_generic;
1864 sk = sks->sortElement->u.generic = (Z_SortKey *)
1865 nmem_malloc(stream, sizeof(*sk));
1866 sk->which = Z_SortKey_sortAttributes;
1867 sk->u.sortAttributes = (Z_SortAttributes *)
1868 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1870 sk->u.sortAttributes->id = oid;
1871 sk->u.sortAttributes->list = zapt->attributes;
1873 sks->sortRelation = (int *)
1874 nmem_malloc(stream, sizeof(*sks->sortRelation));
1875 if (sort_relation_value == 1)
1876 *sks->sortRelation = Z_SortKeySpec_ascending;
1877 else if (sort_relation_value == 2)
1878 *sks->sortRelation = Z_SortKeySpec_descending;
1880 *sks->sortRelation = Z_SortKeySpec_ascending;
1882 sks->caseSensitivity = (int *)
1883 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1884 *sks->caseSensitivity = 0;
1886 sks->which = Z_SortKeySpec_null;
1887 sks->u.null = odr_nullval ();
1888 sort_sequence->specs[i] = sks;
1889 *rset = rset_create_null(rset_nmem, kc, 0);
1894 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1895 oid_value attributeSet,
1896 struct xpath_location_step *xpath, int max,
1899 oid_value curAttributeSet = attributeSet;
1901 const char *use_string = 0;
1903 attr_init_APT(&use, zapt, 1);
1904 attr_find_ex(&use, &curAttributeSet, &use_string);
1906 if (!use_string || *use_string != '/')
1909 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1914 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1915 int reg_type, const char *term,
1916 const char *xpath_use,
1918 struct rset_key_control *kc)
1921 struct grep_info grep_info;
1922 char term_dict[2048];
1925 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1926 zinfo_index_category_index,
1929 int ord_len, i, r, max_pos;
1930 int term_type = Z_Term_characterString;
1931 const char *flags = "void";
1933 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1934 return rset_create_null(rset_nmem, kc, 0);
1937 return rset_create_null(rset_nmem, kc, 0);
1939 term_dict[prefix_len++] = '|';
1941 term_dict[prefix_len++] = '(';
1943 ord_len = key_SU_encode (ord, ord_buf);
1944 for (i = 0; i<ord_len; i++)
1946 term_dict[prefix_len++] = 1;
1947 term_dict[prefix_len++] = ord_buf[i];
1949 term_dict[prefix_len++] = ')';
1950 strcpy(term_dict+prefix_len, term);
1952 grep_info.isam_p_indx = 0;
1953 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1954 &grep_info, &max_pos, 0, grep_handle);
1955 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1956 grep_info.isam_p_indx);
1957 rset = rset_trunc(zh, grep_info.isam_p_buf,
1958 grep_info.isam_p_indx, term, strlen(term),
1959 flags, 1, term_type,rset_nmem,
1960 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
1961 0 /* term_ref_id_str */);
1962 grep_info_delete(&grep_info);
1967 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1968 int num_bases, char **basenames,
1969 NMEM stream, const char *rank_type, RSET rset,
1970 int xpath_len, struct xpath_location_step *xpath,
1973 struct rset_key_control *kc)
1977 int always_matches = rset ? 0 : 1;
1985 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1986 for (i = 0; i<xpath_len; i++)
1988 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2000 a[@attr = value]/b[@other = othervalue]
2002 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2003 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2004 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2005 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2006 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2007 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2011 dict_grep_cmap (zh->reg->dict, 0, 0);
2013 for (base_no = 0; base_no < num_bases; base_no++)
2015 int level = xpath_len;
2018 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2020 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2021 basenames[base_no]);
2025 while (--level >= 0)
2027 WRBUF xpath_rev = wrbuf_alloc();
2029 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2031 for (i = level; i >= 1; --i)
2033 const char *cp = xpath[i].part;
2039 wrbuf_puts(xpath_rev, "[^/]*");
2040 else if (*cp == ' ')
2041 wrbuf_puts(xpath_rev, "\001 ");
2043 wrbuf_putc(xpath_rev, *cp);
2045 /* wrbuf_putc does not null-terminate , but
2046 wrbuf_puts below ensures it does.. so xpath_rev
2047 is OK iff length is > 0 */
2049 wrbuf_puts(xpath_rev, "/");
2051 else if (i == 1) /* // case */
2052 wrbuf_puts(xpath_rev, ".*");
2054 if (xpath[level].predicate &&
2055 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2056 xpath[level].predicate->u.relation.name[0])
2058 WRBUF wbuf = wrbuf_alloc();
2059 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2060 if (xpath[level].predicate->u.relation.value)
2062 const char *cp = xpath[level].predicate->u.relation.value;
2063 wrbuf_putc(wbuf, '=');
2067 if (strchr(REGEX_CHARS, *cp))
2068 wrbuf_putc(wbuf, '\\');
2069 wrbuf_putc(wbuf, *cp);
2073 wrbuf_puts(wbuf, "");
2074 rset_attr = xpath_trunc(
2075 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2077 wrbuf_free(wbuf, 1);
2083 wrbuf_free(xpath_rev, 1);
2087 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2088 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2089 if (wrbuf_len(xpath_rev))
2091 rset_start_tag = xpath_trunc(zh, stream, '0',
2092 wrbuf_buf(xpath_rev),
2093 ZEBRA_XPATH_ELM_BEGIN,
2096 rset = rset_start_tag;
2099 rset_end_tag = xpath_trunc(zh, stream, '0',
2100 wrbuf_buf(xpath_rev),
2101 ZEBRA_XPATH_ELM_END,
2104 rset = rset_create_between(rset_nmem, kc, kc->scope,
2105 rset_start_tag, rset,
2106 rset_end_tag, rset_attr);
2109 wrbuf_free(xpath_rev, 1);
2117 #define MAX_XPATH_STEPS 10
2119 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2120 oid_value attributeSet, NMEM stream,
2121 Z_SortKeySpecList *sort_sequence,
2122 int num_bases, char **basenames,
2125 struct rset_key_control *kc)
2127 ZEBRA_RES res = ZEBRA_OK;
2129 char *search_type = NULL;
2130 char rank_type[128];
2133 char termz[IT_MAX_WORD+1];
2135 const char *xpath_use = 0;
2136 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2140 log_level_rpn = yaz_log_module_level("rpn");
2143 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2144 rank_type, &complete_flag, &sort_flag);
2146 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2147 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2148 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2149 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2151 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2155 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2156 rank_type, rset_nmem, rset, kc);
2157 /* consider if an X-Path query is used */
2158 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2159 xpath, MAX_XPATH_STEPS, stream);
2162 if (xpath[xpath_len-1].part[0] == '@')
2163 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2165 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2172 attr_init_APT(&relation, zapt, 2);
2173 relation_value = attr_find(&relation, NULL);
2175 if (relation_value == 103) /* alwaysmatches */
2177 *rset = 0; /* signal no "term" set */
2178 return rpn_search_xpath(zh, num_bases, basenames,
2179 stream, rank_type, *rset,
2180 xpath_len, xpath, rset_nmem, rset, kc);
2185 /* search using one of the various search type strategies
2186 termz is our UTF-8 search term
2187 attributeSet is top-level default attribute set
2188 stream is ODR for search
2189 reg_id is the register type
2190 complete_flag is 1 for complete subfield, 0 for incomplete
2191 xpath_use is use-attribute to be used for X-Path search, 0 for none
2193 if (!strcmp(search_type, "phrase"))
2195 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2196 reg_id, complete_flag, rank_type,
2198 num_bases, basenames, rset_nmem,
2201 else if (!strcmp(search_type, "and-list"))
2203 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2204 reg_id, complete_flag, rank_type,
2206 num_bases, basenames, rset_nmem,
2209 else if (!strcmp(search_type, "or-list"))
2211 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2212 reg_id, complete_flag, rank_type,
2214 num_bases, basenames, rset_nmem,
2217 else if (!strcmp(search_type, "local"))
2219 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2220 rank_type, rset_nmem, rset, kc);
2222 else if (!strcmp(search_type, "numeric"))
2224 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2225 reg_id, complete_flag, rank_type,
2227 num_bases, basenames, rset_nmem,
2232 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2235 if (res != ZEBRA_OK)
2239 return rpn_search_xpath(zh, num_bases, basenames,
2240 stream, rank_type, *rset,
2241 xpath_len, xpath, rset_nmem, rset, kc);
2244 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2245 oid_value attributeSet,
2246 NMEM stream, NMEM rset_nmem,
2247 Z_SortKeySpecList *sort_sequence,
2248 int num_bases, char **basenames,
2249 RSET **result_sets, int *num_result_sets,
2250 Z_Operator *parent_op,
2251 struct rset_key_control *kc);
2253 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2254 oid_value attributeSet,
2255 NMEM stream, NMEM rset_nmem,
2256 Z_SortKeySpecList *sort_sequence,
2257 int num_bases, char **basenames,
2260 RSET *result_sets = 0;
2261 int num_result_sets = 0;
2263 struct rset_key_control *kc = zebra_key_control_create(zh);
2265 res = rpn_search_structure(zh, zs, attributeSet,
2268 num_bases, basenames,
2269 &result_sets, &num_result_sets,
2270 0 /* no parent op */,
2272 if (res != ZEBRA_OK)
2275 for (i = 0; i<num_result_sets; i++)
2276 rset_delete(result_sets[i]);
2281 assert(num_result_sets == 1);
2282 assert(result_sets);
2283 assert(*result_sets);
2284 *result_set = *result_sets;
2290 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2291 oid_value attributeSet,
2292 NMEM stream, NMEM rset_nmem,
2293 Z_SortKeySpecList *sort_sequence,
2294 int num_bases, char **basenames,
2295 RSET **result_sets, int *num_result_sets,
2296 Z_Operator *parent_op,
2297 struct rset_key_control *kc)
2299 *num_result_sets = 0;
2300 if (zs->which == Z_RPNStructure_complex)
2303 Z_Operator *zop = zs->u.complex->roperator;
2304 RSET *result_sets_l = 0;
2305 int num_result_sets_l = 0;
2306 RSET *result_sets_r = 0;
2307 int num_result_sets_r = 0;
2309 res = rpn_search_structure(zh, zs->u.complex->s1,
2310 attributeSet, stream, rset_nmem,
2312 num_bases, basenames,
2313 &result_sets_l, &num_result_sets_l,
2315 if (res != ZEBRA_OK)
2318 for (i = 0; i<num_result_sets_l; i++)
2319 rset_delete(result_sets_l[i]);
2322 res = rpn_search_structure(zh, zs->u.complex->s2,
2323 attributeSet, stream, rset_nmem,
2325 num_bases, basenames,
2326 &result_sets_r, &num_result_sets_r,
2328 if (res != ZEBRA_OK)
2331 for (i = 0; i<num_result_sets_l; i++)
2332 rset_delete(result_sets_l[i]);
2333 for (i = 0; i<num_result_sets_r; i++)
2334 rset_delete(result_sets_r[i]);
2338 /* make a new list of result for all children */
2339 *num_result_sets = num_result_sets_l + num_result_sets_r;
2340 *result_sets = nmem_malloc(stream, *num_result_sets *
2341 sizeof(**result_sets));
2342 memcpy(*result_sets, result_sets_l,
2343 num_result_sets_l * sizeof(**result_sets));
2344 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2345 num_result_sets_r * sizeof(**result_sets));
2347 if (!parent_op || parent_op->which != zop->which
2348 || (zop->which != Z_Operator_and &&
2349 zop->which != Z_Operator_or))
2351 /* parent node different from this one (or non-present) */
2352 /* we must combine result sets now */
2356 case Z_Operator_and:
2357 rset = rset_create_and(rset_nmem, kc,
2359 *num_result_sets, *result_sets);
2362 rset = rset_create_or(rset_nmem, kc,
2363 kc->scope, 0, /* termid */
2364 *num_result_sets, *result_sets);
2366 case Z_Operator_and_not:
2367 rset = rset_create_not(rset_nmem, kc,
2372 case Z_Operator_prox:
2373 if (zop->u.prox->which != Z_ProximityOperator_known)
2376 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2380 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2382 zebra_setError_zint(zh,
2383 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2384 *zop->u.prox->u.known);
2389 rset = rset_create_prox(rset_nmem, kc,
2391 *num_result_sets, *result_sets,
2392 *zop->u.prox->ordered,
2393 (!zop->u.prox->exclusion ?
2394 0 : *zop->u.prox->exclusion),
2395 *zop->u.prox->relationType,
2396 *zop->u.prox->distance );
2400 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2403 *num_result_sets = 1;
2404 *result_sets = nmem_malloc(stream, *num_result_sets *
2405 sizeof(**result_sets));
2406 (*result_sets)[0] = rset;
2409 else if (zs->which == Z_RPNStructure_simple)
2414 if (zs->u.simple->which == Z_Operand_APT)
2416 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2417 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2418 attributeSet, stream, sort_sequence,
2419 num_bases, basenames, rset_nmem, &rset,
2421 if (res != ZEBRA_OK)
2424 else if (zs->u.simple->which == Z_Operand_resultSetId)
2426 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2427 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2431 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2432 zs->u.simple->u.resultSetId);
2439 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2442 *num_result_sets = 1;
2443 *result_sets = nmem_malloc(stream, *num_result_sets *
2444 sizeof(**result_sets));
2445 (*result_sets)[0] = rset;
2449 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2455 struct scan_info_entry {
2461 struct scan_info_entry *list;
2467 static int scan_handle (char *name, const char *info, int pos, void *client)
2469 int len_prefix, idx;
2470 struct scan_info *scan_info = (struct scan_info *) client;
2472 len_prefix = strlen(scan_info->prefix);
2473 if (memcmp (name, scan_info->prefix, len_prefix))
2476 idx = scan_info->after - pos + scan_info->before;
2482 scan_info->list[idx].term = (char *)
2483 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2484 strcpy(scan_info->list[idx].term, name + len_prefix);
2485 assert (*info == sizeof(ISAM_P));
2486 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2490 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2491 char **dst, const char *src)
2493 char term_src[IT_MAX_WORD];
2494 char term_dst[IT_MAX_WORD];
2496 zebra_term_untrans (zh, reg_type, term_src, src);
2498 if (zh->iconv_from_utf8 != 0)
2501 char *inbuf = term_src;
2502 size_t inleft = strlen(term_src);
2503 char *outbuf = term_dst;
2504 size_t outleft = sizeof(term_dst)-1;
2507 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2509 if (ret == (size_t)(-1))
2512 len = outbuf - term_dst;
2513 *dst = nmem_malloc(stream, len + 1);
2515 memcpy (*dst, term_dst, len);
2519 *dst = nmem_strdup(stream, term_src);
2522 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2528 yaz_log(YLOG_DEBUG, "count_set");
2530 rset->hits_limit = zh->approx_limit;
2533 rfd = rset_open(rset, RSETF_READ);
2534 while (rset_read(rfd, &key,0 /* never mind terms */))
2536 if (key.mem[0] != psysno)
2538 psysno = key.mem[0];
2539 if (rfd->counted_items >= rset->hits_limit)
2544 *count = rset->hits_count;
2547 #define RPN_MAX_ORDS 32
2549 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2550 oid_value attributeset,
2551 int num_bases, char **basenames,
2552 int *position, int *num_entries, ZebraScanEntry **list,
2553 int *is_partial, RSET limit_set, int return_zero)
2556 int pos = *position;
2557 int num = *num_entries;
2561 char termz[IT_MAX_WORD+20];
2562 struct scan_info *scan_info_array;
2563 ZebraScanEntry *glist;
2564 int ords[RPN_MAX_ORDS], ord_no = 0;
2565 int ptr[RPN_MAX_ORDS];
2567 unsigned index_type;
2568 char *search_type = NULL;
2569 char rank_type[128];
2572 NMEM rset_nmem = NULL;
2573 struct rset_key_control *kc = 0;
2578 if (attributeset == VAL_NONE)
2579 attributeset = VAL_BIB1;
2584 int termset_value_numeric;
2585 const char *termset_value_string;
2586 attr_init_APT(&termset, zapt, 8);
2587 termset_value_numeric =
2588 attr_find_ex(&termset, NULL, &termset_value_string);
2589 if (termset_value_numeric != -1)
2592 const char *termset_name = 0;
2594 if (termset_value_numeric != -2)
2597 sprintf(resname, "%d", termset_value_numeric);
2598 termset_name = resname;
2601 termset_name = termset_value_string;
2603 limit_set = resultSetRef (zh, termset_name);
2607 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2608 pos, num, attributeset);
2610 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2611 rank_type, &complete_flag, &sort_flag))
2614 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2617 for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2621 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2623 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2624 basenames[base_no]);
2628 if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
2631 ords[ord_no++] = ord;
2638 /* prepare dictionary scanning */
2650 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2651 "after=%d before+after=%d",
2652 pos, num, before, after, before+after);
2653 scan_info_array = (struct scan_info *)
2654 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2655 for (i = 0; i < ord_no; i++)
2657 int j, prefix_len = 0;
2658 int before_tmp = before, after_tmp = after;
2659 struct scan_info *scan_info = scan_info_array + i;
2660 struct rpn_char_map_info rcmi;
2662 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2664 scan_info->before = before;
2665 scan_info->after = after;
2666 scan_info->odr = stream;
2668 scan_info->list = (struct scan_info_entry *)
2669 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2670 for (j = 0; j<before+after; j++)
2671 scan_info->list[j].term = NULL;
2673 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2674 termz[prefix_len] = 0;
2675 strcpy(scan_info->prefix, termz);
2677 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2681 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2682 scan_info, scan_handle);
2684 glist = (ZebraScanEntry *)
2685 odr_malloc(stream, (before+after)*sizeof(*glist));
2687 rset_nmem = nmem_create();
2688 kc = zebra_key_control_create(zh);
2690 /* consider terms after main term */
2691 for (i = 0; i < ord_no; i++)
2695 for (i = 0; i<after; i++)
2698 const char *mterm = NULL;
2701 int lo = i + pos-1; /* offset in result list */
2703 /* find: j0 is the first of the minimal values */
2704 for (j = 0; j < ord_no; j++)
2706 if (ptr[j] < before+after && ptr[j] >= 0 &&
2707 (tst = scan_info_array[j].list[ptr[j]].term) &&
2708 (!mterm || strcmp (tst, mterm) < 0))
2715 break; /* no value found, stop */
2717 /* get result set for first one , but only if it's within bounds */
2720 /* get result set for first term */
2721 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2722 &glist[lo].term, mterm);
2723 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2724 glist[lo].term, strlen(glist[lo].term),
2725 NULL, 0, zapt->term->which, rset_nmem,
2726 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2727 0 /* term_ref_id_str */);
2729 ptr[j0]++; /* move index for this set .. */
2730 /* get result set for remaining scan terms */
2731 for (j = j0+1; j<ord_no; j++)
2733 if (ptr[j] < before+after && ptr[j] >= 0 &&
2734 (tst = scan_info_array[j].list[ptr[j]].term) &&
2735 !strcmp (tst, mterm))
2744 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2746 strlen(glist[lo].term), NULL, 0,
2747 zapt->term->which,rset_nmem,
2748 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2749 0 /* term_ref_id_str */ );
2750 rset = rset_create_or(rset_nmem, kc,
2751 kc->scope, 0 /* termid */,
2760 /* merge with limit_set if given */
2765 rsets[1] = rset_dup(limit_set);
2767 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2770 count_set(zh, rset, &count);
2771 glist[lo].occurrences = count;
2777 *num_entries -= (after-i);
2779 if (*num_entries < 0)
2782 nmem_destroy(rset_nmem);
2787 /* consider terms before main term */
2788 for (i = 0; i<ord_no; i++)
2791 for (i = 0; i<before; i++)
2794 const char *mterm = NULL;
2797 int lo = before-1-i; /* offset in result list */
2800 for (j = 0; j <ord_no; j++)
2802 if (ptr[j] < before && ptr[j] >= 0 &&
2803 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2804 (!mterm || strcmp (tst, mterm) > 0))
2813 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2814 &glist[lo].term, mterm);
2817 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2818 glist[lo].term, strlen(glist[lo].term),
2819 NULL, 0, zapt->term->which, rset_nmem,
2820 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2821 0 /* term_ref_id_str */);
2825 for (j = j0+1; j<ord_no; j++)
2827 if (ptr[j] < before && ptr[j] >= 0 &&
2828 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2829 !strcmp (tst, mterm))
2834 rsets[1] = rset_trunc(
2836 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2838 strlen(glist[lo].term), NULL, 0,
2839 zapt->term->which, rset_nmem,
2840 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2841 0 /* term_ref_id_str */);
2842 rset = rset_create_or(rset_nmem, kc,
2843 kc->scope, 0 /* termid */, 2, rsets);
2852 rsets[1] = rset_dup(limit_set);
2854 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2856 count_set(zh, rset, &count);
2857 glist[lo].occurrences = count;
2861 nmem_destroy(rset_nmem);
2868 if (*num_entries <= 0)
2875 *list = glist + i; /* list is set to first 'real' entry */
2877 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2878 *position, *num_entries);
2885 * indent-tabs-mode: nil
2887 * vim: shiftwidth=4 tabstop=8 expandtab