1 /* $Id: rpnsearch.c,v 1.4 2006-12-20 14:19:21 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode (&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211 const char **src, char *dst, int space_split,
219 const char *space_start = 0;
220 const char *space_end = 0;
222 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
229 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
233 if (**map == *CHR_SPACE)
236 else /* complete subfield only. */
238 if (**map == *CHR_SPACE)
239 { /* save space mapping for later .. */
244 else if (space_start)
245 { /* reload last space */
246 while (space_start < space_end)
248 if (strchr(REGEX_CHARS, *space_start))
250 dst_term[j++] = *space_start;
251 dst[i++] = *space_start++;
254 space_start = space_end = 0;
257 /* add non-space char */
258 memcpy(dst_term+j, s1, s0 - s1);
264 if (strchr(REGEX_CHARS, *s1))
272 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
274 strcpy(dst + i, map[0]);
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286 const char **src, char *dst, int space_split,
294 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
303 dst_term[j++] = *s0++;
309 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
311 if (space_split && **map == *CHR_SPACE)
314 /* add non-space char */
315 memcpy(dst_term+j, s1, s0 - s1);
321 if (strchr(REGEX_CHARS, *s1))
329 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
331 strcpy(dst + i, map[0]);
337 dst_term[j++] = '\0';
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344 char *dst, int *errors, int space_split,
352 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
355 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356 isdigit(((const unsigned char *)s0)[1]))
358 *errors = s0[1] - '0';
365 if (strchr("^\\()[].*+?|-", *s0))
374 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
376 if (space_split && **map == *CHR_SPACE)
379 /* add non-space char */
380 memcpy(dst_term+j, s1, s0 - s1);
386 if (strchr(REGEX_CHARS, *s1))
394 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
396 strcpy(dst + i, map[0]);
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410 char *dst, int space_split, char *dst_term)
412 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419 const char **src, char *dst, int space_split,
427 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
434 dst_term[j++] = *s0++;
435 if (*s0 >= '0' && *s0 <= '9')
438 while (*s0 >= '0' && *s0 <= '9')
440 limit = limit * 10 + (*s0 - '0');
441 dst_term[j++] = *s0++;
461 dst_term[j++] = *s0++;
466 dst_term[j++] = *s0++;
472 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
474 if (space_split && **map == *CHR_SPACE)
477 /* add non-space char */
478 memcpy(dst_term+j, s1, s0 - s1);
484 if (strchr(REGEX_CHARS, *s1))
492 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
494 strcpy(dst + i, map[0]);
500 dst_term[j++] = '\0';
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507 const char **src, char *dst, int space_split,
508 char *dst_term, int right_truncate)
515 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
524 dst_term[j++] = *s0++;
529 dst_term[j++] = *s0++;
535 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
537 if (space_split && **map == *CHR_SPACE)
540 /* add non-space char */
541 memcpy(dst_term+j, s1, s0 - s1);
547 if (strchr(REGEX_CHARS, *s1))
555 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
557 strcpy(dst + i, map[0]);
569 dst_term[j++] = '\0';
575 /* gen_regular_rel - generate regular expression from relation
576 * val: border value (inclusive)
577 * islt: 1 if <=; 0 if >=.
579 static void gen_regular_rel(char *dst, int val, int islt)
586 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
590 strcpy(dst, "(-[0-9]+|(");
598 strcpy(dst, "([0-9]+|-(");
610 sprintf(numstr, "%d", val);
611 for (w = strlen(numstr); --w >= 0; pos++)
630 strcpy(dst + dst_p, numstr);
631 dst_p = strlen(dst) - pos - 1;
659 for (i = 0; i<pos; i++)
672 /* match everything less than 10^(pos-1) */
674 for (i = 1; i<pos; i++)
675 strcat(dst, "[0-9]?");
679 /* match everything greater than 10^pos */
680 for (i = 0; i <= pos; i++)
681 strcat(dst, "[0-9]");
682 strcat(dst, "[0-9]*");
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
689 if (src[*indx] == '\\')
690 *(*term_p)++ = src[(*indx)++];
691 *(*term_p)++ = src[(*indx)++];
695 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
696 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697 * >= abc ([b-].*|a[c-].*|ab[c-].*)
698 * ([^-a].*|a[^-b].*|ab[c-].*)
699 * < abc ([-0].*|a[-a].*|ab[-b].*)
700 * ([^a-].*|a[^b-].*|ab[^c-].*)
701 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
702 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705 const char **term_sub, char *term_dict,
706 oid_value attributeSet,
707 int reg_type, int space_split, char *term_dst,
713 char *term_tmp = term_dict + strlen(term_dict);
714 char term_component[2*IT_MAX_WORD+20];
716 attr_init_APT(&relation, zapt, 2);
717 relation_value = attr_find(&relation, NULL);
720 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721 switch (relation_value)
724 if (!term_100(zh->reg->zebra_maps, reg_type,
725 term_sub, term_component,
726 space_split, term_dst))
728 yaz_log(log_level_rpn, "Relation <");
731 for (i = 0; term_component[i]; )
738 string_rel_add_char(&term_tmp, term_component, &j);
745 *term_tmp++ = FIRST_IN_FIELD_CHAR;
747 string_rel_add_char(&term_tmp, term_component, &i);
754 if ((term_tmp - term_dict) > IT_MAX_WORD)
759 yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
762 if (!term_100(zh->reg->zebra_maps, reg_type,
763 term_sub, term_component,
764 space_split, term_dst))
766 yaz_log(log_level_rpn, "Relation <=");
769 for (i = 0; term_component[i]; )
774 string_rel_add_char(&term_tmp, term_component, &j);
780 *term_tmp++ = FIRST_IN_FIELD_CHAR;
782 string_rel_add_char(&term_tmp, term_component, &i);
791 if ((term_tmp - term_dict) > IT_MAX_WORD)
794 for (i = 0; term_component[i]; )
795 string_rel_add_char(&term_tmp, term_component, &i);
800 if (!term_100 (zh->reg->zebra_maps, reg_type,
801 term_sub, term_component, space_split, term_dst))
803 yaz_log(log_level_rpn, "Relation >");
806 for (i = 0; term_component[i];)
811 string_rel_add_char(&term_tmp, term_component, &j);
816 string_rel_add_char(&term_tmp, term_component, &i);
824 if ((term_tmp - term_dict) > IT_MAX_WORD)
827 for (i = 0; term_component[i];)
828 string_rel_add_char(&term_tmp, term_component, &i);
835 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836 term_component, space_split, term_dst))
838 yaz_log(log_level_rpn, "Relation >=");
841 for (i = 0; term_component[i];)
848 string_rel_add_char(&term_tmp, term_component, &j);
851 if (term_component[i+1])
855 string_rel_add_char(&term_tmp, term_component, &i);
859 string_rel_add_char(&term_tmp, term_component, &i);
866 if ((term_tmp - term_dict) > IT_MAX_WORD)
877 yaz_log(log_level_rpn, "Relation =");
878 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879 term_component, space_split, term_dst))
881 strcat(term_tmp, "(");
882 strcat(term_tmp, term_component);
883 strcat(term_tmp, ")");
886 yaz_log(log_level_rpn, "Relation always matches");
887 /* skip to end of term (we don't care what it is) */
888 while (**term_sub != '\0')
892 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899 const char **term_sub,
900 oid_value attributeSet, NMEM stream,
901 struct grep_info *grep_info,
902 int reg_type, int complete_flag,
903 int num_bases, char **basenames,
905 const char *xpath_use,
906 struct ord_list **ol);
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909 Z_AttributesPlusTerm *zapt,
910 zint *hits_limit_value,
911 const char **term_ref_id_str,
914 AttrType term_ref_id_attr;
915 AttrType hits_limit_attr;
918 attr_init_APT(&hits_limit_attr, zapt, 11);
919 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
921 attr_init_APT(&term_ref_id_attr, zapt, 10);
922 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923 if (term_ref_id_int >= 0)
925 char *res = nmem_malloc(nmem, 20);
926 sprintf(res, "%d", term_ref_id_int);
927 *term_ref_id_str = res;
930 /* no limit given ? */
931 if (*hits_limit_value == -1)
933 if (*term_ref_id_str)
935 /* use global if term_ref is present */
936 *hits_limit_value = zh->approx_limit;
940 /* no counting if term_ref is not present */
941 *hits_limit_value = 0;
944 else if (*hits_limit_value == 0)
946 /* 0 is the same as global limit */
947 *hits_limit_value = zh->approx_limit;
949 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950 *term_ref_id_str ? *term_ref_id_str : "none",
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956 Z_AttributesPlusTerm *zapt,
957 const char **term_sub,
958 oid_value attributeSet, NMEM stream,
959 struct grep_info *grep_info,
960 int reg_type, int complete_flag,
961 int num_bases, char **basenames,
963 const char *rank_type,
964 const char *xpath_use,
967 struct rset_key_control *kc)
971 zint hits_limit_value;
972 const char *term_ref_id_str = 0;
975 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976 grep_info->isam_p_indx = 0;
977 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978 reg_type, complete_flag, num_bases, basenames,
979 term_dst, xpath_use, &ol);
982 if (!*term_sub) /* no more terms ? */
984 yaz_log(log_level_rpn, "term: %s", term_dst);
985 *rset = rset_trunc(zh, grep_info->isam_p_buf,
986 grep_info->isam_p_indx, term_dst,
987 strlen(term_dst), rank_type, 1 /* preserve pos */,
988 zapt->term->which, rset_nmem,
989 kc, kc->scope, ol, reg_type, hits_limit_value,
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997 const char **term_sub,
998 oid_value attributeSet, NMEM stream,
999 struct grep_info *grep_info,
1000 int reg_type, int complete_flag,
1001 int num_bases, char **basenames,
1003 const char *xpath_use,
1004 struct ord_list **ol)
1006 char term_dict[2*IT_MAX_WORD+4000];
1008 AttrType truncation;
1009 int truncation_value;
1011 struct rpn_char_map_info rcmi;
1012 int space_split = complete_flag ? 0 : 1;
1014 int bases_ok = 0; /* no of databases with OK attribute */
1016 *ol = ord_list_create(stream);
1018 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019 attr_init_APT(&truncation, zapt, 5);
1020 truncation_value = attr_find(&truncation, NULL);
1021 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1023 for (base_no = 0; base_no < num_bases; base_no++)
1026 int regex_range = 0;
1027 int max_pos, prefix_len = 0;
1032 termp = *term_sub; /* start of term for each database */
1034 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1036 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037 basenames[base_no]);
1041 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042 attributeSet, &ord) != ZEBRA_OK)
1047 *ol = ord_list_append(stream, *ol, ord);
1048 ord_len = key_SU_encode (ord, ord_buf);
1050 term_dict[prefix_len++] = '(';
1051 for (i = 0; i<ord_len; i++)
1053 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1054 term_dict[prefix_len++] = ord_buf[i];
1056 term_dict[prefix_len++] = ')';
1057 term_dict[prefix_len] = '\0';
1059 switch (truncation_value)
1061 case -1: /* not specified */
1062 case 100: /* do not truncate */
1063 if (!string_relation(zh, zapt, &termp, term_dict,
1065 reg_type, space_split, term_dst,
1070 zebra_setError(zh, relation_error, 0);
1077 case 1: /* right truncation */
1078 term_dict[j++] = '(';
1079 if (!term_100(zh->reg->zebra_maps, reg_type,
1080 &termp, term_dict + j, space_split, term_dst))
1085 strcat(term_dict, ".*)");
1087 case 2: /* keft truncation */
1088 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089 if (!term_100(zh->reg->zebra_maps, reg_type,
1090 &termp, term_dict + j, space_split, term_dst))
1095 strcat(term_dict, ")");
1097 case 3: /* left&right truncation */
1098 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099 if (!term_100(zh->reg->zebra_maps, reg_type,
1100 &termp, term_dict + j, space_split, term_dst))
1105 strcat(term_dict, ".*)");
1107 case 101: /* process # in term */
1108 term_dict[j++] = '(';
1109 if (!term_101(zh->reg->zebra_maps, reg_type,
1110 &termp, term_dict + j, space_split, term_dst))
1115 strcat(term_dict, ")");
1117 case 102: /* Regexp-1 */
1118 term_dict[j++] = '(';
1119 if (!term_102(zh->reg->zebra_maps, reg_type,
1120 &termp, term_dict + j, space_split, term_dst))
1125 strcat(term_dict, ")");
1127 case 103: /* Regexp-2 */
1129 term_dict[j++] = '(';
1130 if (!term_103(zh->reg->zebra_maps, reg_type,
1131 &termp, term_dict + j, ®ex_range,
1132 space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 104: /* process # and ! in term */
1140 term_dict[j++] = '(';
1141 if (!term_104(zh->reg->zebra_maps, reg_type,
1142 &termp, term_dict + j, space_split, term_dst))
1147 strcat(term_dict, ")");
1149 case 105: /* process * and ! in term */
1150 term_dict[j++] = '(';
1151 if (!term_105(zh->reg->zebra_maps, reg_type,
1152 &termp, term_dict + j, space_split, term_dst, 1))
1157 strcat(term_dict, ")");
1159 case 106: /* process * and ! in term */
1160 term_dict[j++] = '(';
1161 if (!term_105(zh->reg->zebra_maps, reg_type,
1162 &termp, term_dict + j, space_split, term_dst, 0))
1167 strcat(term_dict, ")");
1170 zebra_setError_zint(zh,
1171 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1178 const char *input = term_dict + prefix_len;
1179 esc_str(buf, sizeof(buf), input, strlen(input));
1181 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183 grep_info, &max_pos,
1184 ord_len /* number of "exact" chars */,
1187 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1192 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1198 static void grep_info_delete(struct grep_info *grep_info)
1201 xfree(grep_info->term_no);
1203 xfree(grep_info->isam_p_buf);
1206 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1207 Z_AttributesPlusTerm *zapt,
1208 struct grep_info *grep_info,
1212 int termset_value_numeric;
1213 const char *termset_value_string;
1216 grep_info->term_no = 0;
1218 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1219 grep_info->isam_p_size = 0;
1220 grep_info->isam_p_buf = NULL;
1222 grep_info->reg_type = reg_type;
1223 grep_info->termset = 0;
1226 attr_init_APT(&termset, zapt, 8);
1227 termset_value_numeric =
1228 attr_find_ex(&termset, NULL, &termset_value_string);
1229 if (termset_value_numeric != -1)
1232 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1236 const char *termset_name = 0;
1237 if (termset_value_numeric != -2)
1240 sprintf(resname, "%d", termset_value_numeric);
1241 termset_name = resname;
1244 termset_name = termset_value_string;
1245 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1246 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1247 if (!grep_info->termset)
1249 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1258 \brief Create result set(s) for list of terms
1259 \param zh Zebra Handle
1260 \param zapt Attributes Plust Term (RPN leaf)
1261 \param termz term as used in query but converted to UTF-8
1262 \param attributeSet default attribute set
1263 \param stream memory for result
1264 \param reg_type register type ('w', 'p',..)
1265 \param complete_flag whether it's phrases or not
1266 \param rank_type term flags for ranking
1267 \param xpath_use use attribute for X-Path (-1 for no X-path)
1268 \param num_bases number of databases
1269 \param basenames array of databases
1270 \param rset_nmem memory for result sets
1271 \param result_sets output result set for each term in list (output)
1272 \param num_result_sets number of output result sets
1273 \param kc rset key control to be used for created result sets
1275 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1276 Z_AttributesPlusTerm *zapt,
1278 oid_value attributeSet,
1280 int reg_type, int complete_flag,
1281 const char *rank_type,
1282 const char *xpath_use,
1283 int num_bases, char **basenames,
1285 RSET **result_sets, int *num_result_sets,
1286 struct rset_key_control *kc)
1288 char term_dst[IT_MAX_WORD+1];
1289 struct grep_info grep_info;
1290 const char *termp = termz;
1293 *num_result_sets = 0;
1295 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1301 if (alloc_sets == *num_result_sets)
1304 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1307 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1308 alloc_sets = alloc_sets + add;
1309 *result_sets = rnew;
1311 res = term_trunc(zh, zapt, &termp, attributeSet,
1313 reg_type, complete_flag,
1314 num_bases, basenames,
1315 term_dst, rank_type,
1316 xpath_use, rset_nmem,
1317 &(*result_sets)[*num_result_sets],
1319 if (res != ZEBRA_OK)
1322 for (i = 0; i < *num_result_sets; i++)
1323 rset_delete((*result_sets)[i]);
1324 grep_info_delete (&grep_info);
1327 if ((*result_sets)[*num_result_sets] == 0)
1329 (*num_result_sets)++;
1334 grep_info_delete(&grep_info);
1338 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1339 Z_AttributesPlusTerm *zapt,
1340 oid_value attributeSet,
1342 int num_bases, char **basenames,
1345 struct rset_key_control *kc)
1353 attr_init_APT(&position, zapt, 3);
1354 position_value = attr_find(&position, NULL);
1355 switch(position_value)
1364 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1369 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1371 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1376 if (!zh->reg->isamb && !zh->reg->isamc)
1378 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1382 f_set = xmalloc(sizeof(RSET) * num_bases);
1383 for (base_no = 0; base_no < num_bases; base_no++)
1387 char term_dict[100];
1392 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1394 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1395 basenames[base_no]);
1399 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1400 attributeSet, &ord) != ZEBRA_OK)
1403 ord_len = key_SU_encode (ord, ord_buf);
1404 memcpy(term_dict, ord_buf, ord_len);
1405 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1406 val = dict_lookup(zh->reg->dict, term_dict);
1409 assert(*val == sizeof(ISAM_P));
1410 memcpy(&isam_p, val+1, sizeof(isam_p));
1414 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1415 zh->reg->isamb, isam_p, 0);
1416 else if (zh->reg->isamc)
1417 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1418 zh->reg->isamc, isam_p, 0);
1422 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1423 0 /* termid */, num_sets, f_set);
1429 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1430 Z_AttributesPlusTerm *zapt,
1431 const char *termz_org,
1432 oid_value attributeSet,
1434 int reg_type, int complete_flag,
1435 const char *rank_type,
1436 const char *xpath_use,
1437 int num_bases, char **basenames,
1440 struct rset_key_control *kc)
1442 RSET *result_sets = 0;
1443 int num_result_sets = 0;
1445 term_list_trunc(zh, zapt, termz_org, attributeSet,
1446 stream, reg_type, complete_flag,
1447 rank_type, xpath_use,
1448 num_bases, basenames,
1450 &result_sets, &num_result_sets, kc);
1452 if (res != ZEBRA_OK)
1455 if (num_result_sets > 0)
1458 res = rpn_search_APT_position(zh, zapt, attributeSet,
1460 num_bases, basenames,
1461 rset_nmem, &first_set,
1463 if (res != ZEBRA_OK)
1467 RSET *nsets = nmem_malloc(stream,
1468 sizeof(RSET) * (num_result_sets+1));
1469 nsets[0] = first_set;
1470 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1471 result_sets = nsets;
1475 if (num_result_sets == 0)
1476 *rset = rset_create_null(rset_nmem, kc, 0);
1477 else if (num_result_sets == 1)
1478 *rset = result_sets[0];
1480 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1481 num_result_sets, result_sets,
1482 1 /* ordered */, 0 /* exclusion */,
1483 3 /* relation */, 1 /* distance */);
1489 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1490 Z_AttributesPlusTerm *zapt,
1491 const char *termz_org,
1492 oid_value attributeSet,
1494 int reg_type, int complete_flag,
1495 const char *rank_type,
1496 const char *xpath_use,
1497 int num_bases, char **basenames,
1500 struct rset_key_control *kc)
1502 RSET *result_sets = 0;
1503 int num_result_sets = 0;
1506 term_list_trunc(zh, zapt, termz_org, attributeSet,
1507 stream, reg_type, complete_flag,
1508 rank_type, xpath_use,
1509 num_bases, basenames,
1511 &result_sets, &num_result_sets, kc);
1512 if (res != ZEBRA_OK)
1515 for (i = 0; i<num_result_sets; i++)
1518 res = rpn_search_APT_position(zh, zapt, attributeSet,
1520 num_bases, basenames,
1521 rset_nmem, &first_set,
1523 if (res != ZEBRA_OK)
1525 for (i = 0; i<num_result_sets; i++)
1526 rset_delete(result_sets[i]);
1534 tmp_set[0] = first_set;
1535 tmp_set[1] = result_sets[i];
1537 result_sets[i] = rset_create_prox(
1538 rset_nmem, kc, kc->scope,
1540 1 /* ordered */, 0 /* exclusion */,
1541 3 /* relation */, 1 /* distance */);
1544 if (num_result_sets == 0)
1545 *rset = rset_create_null(rset_nmem, kc, 0);
1546 else if (num_result_sets == 1)
1547 *rset = result_sets[0];
1549 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1550 num_result_sets, result_sets);
1556 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1557 Z_AttributesPlusTerm *zapt,
1558 const char *termz_org,
1559 oid_value attributeSet,
1561 int reg_type, int complete_flag,
1562 const char *rank_type,
1563 const char *xpath_use,
1564 int num_bases, char **basenames,
1567 struct rset_key_control *kc)
1569 RSET *result_sets = 0;
1570 int num_result_sets = 0;
1573 term_list_trunc(zh, zapt, termz_org, attributeSet,
1574 stream, reg_type, complete_flag,
1575 rank_type, xpath_use,
1576 num_bases, basenames,
1578 &result_sets, &num_result_sets,
1580 if (res != ZEBRA_OK)
1582 for (i = 0; i<num_result_sets; i++)
1585 res = rpn_search_APT_position(zh, zapt, attributeSet,
1587 num_bases, basenames,
1588 rset_nmem, &first_set,
1590 if (res != ZEBRA_OK)
1592 for (i = 0; i<num_result_sets; i++)
1593 rset_delete(result_sets[i]);
1601 tmp_set[0] = first_set;
1602 tmp_set[1] = result_sets[i];
1604 result_sets[i] = rset_create_prox(
1605 rset_nmem, kc, kc->scope,
1607 1 /* ordered */, 0 /* exclusion */,
1608 3 /* relation */, 1 /* distance */);
1613 if (num_result_sets == 0)
1614 *rset = rset_create_null(rset_nmem, kc, 0);
1615 else if (num_result_sets == 1)
1616 *rset = result_sets[0];
1618 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1619 num_result_sets, result_sets);
1625 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1626 const char **term_sub,
1628 oid_value attributeSet,
1629 struct grep_info *grep_info,
1639 char *term_tmp = term_dict + strlen(term_dict);
1642 attr_init_APT(&relation, zapt, 2);
1643 relation_value = attr_find(&relation, NULL);
1645 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1647 switch (relation_value)
1650 yaz_log(log_level_rpn, "Relation <");
1651 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1654 term_value = atoi (term_tmp);
1655 gen_regular_rel(term_tmp, term_value-1, 1);
1658 yaz_log(log_level_rpn, "Relation <=");
1659 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1662 term_value = atoi (term_tmp);
1663 gen_regular_rel(term_tmp, term_value, 1);
1666 yaz_log(log_level_rpn, "Relation >=");
1667 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1670 term_value = atoi (term_tmp);
1671 gen_regular_rel(term_tmp, term_value, 0);
1674 yaz_log(log_level_rpn, "Relation >");
1675 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1678 term_value = atoi (term_tmp);
1679 gen_regular_rel(term_tmp, term_value+1, 0);
1683 yaz_log(log_level_rpn, "Relation =");
1684 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1687 term_value = atoi (term_tmp);
1688 sprintf(term_tmp, "(0*%d)", term_value);
1691 /* term_tmp untouched.. */
1692 while (**term_sub != '\0')
1696 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1699 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1700 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1703 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1704 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1708 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1709 const char **term_sub,
1710 oid_value attributeSet, NMEM stream,
1711 struct grep_info *grep_info,
1712 int reg_type, int complete_flag,
1713 int num_bases, char **basenames,
1715 const char *xpath_use,
1716 struct ord_list **ol)
1718 char term_dict[2*IT_MAX_WORD+2];
1721 struct rpn_char_map_info rcmi;
1723 int bases_ok = 0; /* no of databases with OK attribute */
1725 *ol = ord_list_create(stream);
1727 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1729 for (base_no = 0; base_no < num_bases; base_no++)
1731 int max_pos, prefix_len = 0;
1732 int relation_error = 0;
1733 int ord, ord_len, i;
1738 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1740 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1741 basenames[base_no]);
1745 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1746 attributeSet, &ord) != ZEBRA_OK)
1750 *ol = ord_list_append(stream, *ol, ord);
1752 ord_len = key_SU_encode (ord, ord_buf);
1754 term_dict[prefix_len++] = '(';
1755 for (i = 0; i < ord_len; i++)
1757 term_dict[prefix_len++] = 1;
1758 term_dict[prefix_len++] = ord_buf[i];
1760 term_dict[prefix_len++] = ')';
1761 term_dict[prefix_len] = '\0';
1763 if (!numeric_relation(zh, zapt, &termp, term_dict,
1764 attributeSet, grep_info, &max_pos, reg_type,
1765 term_dst, &relation_error))
1769 zebra_setError(zh, relation_error, 0);
1779 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1784 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1785 Z_AttributesPlusTerm *zapt,
1787 oid_value attributeSet,
1789 int reg_type, int complete_flag,
1790 const char *rank_type,
1791 const char *xpath_use,
1792 int num_bases, char **basenames,
1795 struct rset_key_control *kc)
1797 char term_dst[IT_MAX_WORD+1];
1798 const char *termp = termz;
1799 RSET *result_sets = 0;
1800 int num_result_sets = 0;
1802 struct grep_info grep_info;
1804 zint hits_limit_value;
1805 const char *term_ref_id_str = 0;
1807 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1809 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1810 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1814 struct ord_list *ol;
1815 if (alloc_sets == num_result_sets)
1818 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1821 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1822 alloc_sets = alloc_sets + add;
1825 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1826 grep_info.isam_p_indx = 0;
1827 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1828 reg_type, complete_flag, num_bases, basenames,
1829 term_dst, xpath_use, &ol);
1830 if (res == ZEBRA_FAIL || termp == 0)
1832 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1833 result_sets[num_result_sets] =
1834 rset_trunc(zh, grep_info.isam_p_buf,
1835 grep_info.isam_p_indx, term_dst,
1836 strlen(term_dst), rank_type,
1837 0 /* preserve position */,
1838 zapt->term->which, rset_nmem,
1839 kc, kc->scope, ol, reg_type,
1842 if (!result_sets[num_result_sets])
1848 grep_info_delete(&grep_info);
1850 if (res != ZEBRA_OK)
1852 if (num_result_sets == 0)
1853 *rset = rset_create_null(rset_nmem, kc, 0);
1854 else if (num_result_sets == 1)
1855 *rset = result_sets[0];
1857 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1858 num_result_sets, result_sets);
1864 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1865 Z_AttributesPlusTerm *zapt,
1867 oid_value attributeSet,
1869 const char *rank_type, NMEM rset_nmem,
1871 struct rset_key_control *kc)
1876 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1877 res_get (zh->res, "setTmpDir"),0 );
1878 rsfd = rset_open(*rset, RSETF_WRITE);
1886 rset_write (rsfd, &key);
1891 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1892 oid_value attributeSet, NMEM stream,
1893 Z_SortKeySpecList *sort_sequence,
1894 const char *rank_type,
1897 struct rset_key_control *kc)
1900 int sort_relation_value;
1901 AttrType sort_relation_type;
1908 attr_init_APT(&sort_relation_type, zapt, 7);
1909 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1911 if (!sort_sequence->specs)
1913 sort_sequence->num_specs = 10;
1914 sort_sequence->specs = (Z_SortKeySpec **)
1915 nmem_malloc(stream, sort_sequence->num_specs *
1916 sizeof(*sort_sequence->specs));
1917 for (i = 0; i<sort_sequence->num_specs; i++)
1918 sort_sequence->specs[i] = 0;
1920 if (zapt->term->which != Z_Term_general)
1923 i = atoi_n ((char *) zapt->term->u.general->buf,
1924 zapt->term->u.general->len);
1925 if (i >= sort_sequence->num_specs)
1927 sprintf(termz, "%d", i);
1929 oe.proto = PROTO_Z3950;
1930 oe.oclass = CLASS_ATTSET;
1931 oe.value = attributeSet;
1932 if (!oid_ent_to_oid (&oe, oid))
1935 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1936 sks->sortElement = (Z_SortElement *)
1937 nmem_malloc(stream, sizeof(*sks->sortElement));
1938 sks->sortElement->which = Z_SortElement_generic;
1939 sk = sks->sortElement->u.generic = (Z_SortKey *)
1940 nmem_malloc(stream, sizeof(*sk));
1941 sk->which = Z_SortKey_sortAttributes;
1942 sk->u.sortAttributes = (Z_SortAttributes *)
1943 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1945 sk->u.sortAttributes->id = oid;
1946 sk->u.sortAttributes->list = zapt->attributes;
1948 sks->sortRelation = (int *)
1949 nmem_malloc(stream, sizeof(*sks->sortRelation));
1950 if (sort_relation_value == 1)
1951 *sks->sortRelation = Z_SortKeySpec_ascending;
1952 else if (sort_relation_value == 2)
1953 *sks->sortRelation = Z_SortKeySpec_descending;
1955 *sks->sortRelation = Z_SortKeySpec_ascending;
1957 sks->caseSensitivity = (int *)
1958 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1959 *sks->caseSensitivity = 0;
1961 sks->which = Z_SortKeySpec_null;
1962 sks->u.null = odr_nullval ();
1963 sort_sequence->specs[i] = sks;
1964 *rset = rset_create_null(rset_nmem, kc, 0);
1969 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1970 oid_value attributeSet,
1971 struct xpath_location_step *xpath, int max,
1974 oid_value curAttributeSet = attributeSet;
1976 const char *use_string = 0;
1978 attr_init_APT(&use, zapt, 1);
1979 attr_find_ex(&use, &curAttributeSet, &use_string);
1981 if (!use_string || *use_string != '/')
1984 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1989 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1990 int reg_type, const char *term,
1991 const char *xpath_use,
1993 struct rset_key_control *kc)
1996 struct grep_info grep_info;
1997 char term_dict[2048];
2000 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2001 zinfo_index_category_index,
2004 int ord_len, i, r, max_pos;
2005 int term_type = Z_Term_characterString;
2006 const char *flags = "void";
2008 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2009 return rset_create_null(rset_nmem, kc, 0);
2012 return rset_create_null(rset_nmem, kc, 0);
2014 term_dict[prefix_len++] = '|';
2016 term_dict[prefix_len++] = '(';
2018 ord_len = key_SU_encode (ord, ord_buf);
2019 for (i = 0; i<ord_len; i++)
2021 term_dict[prefix_len++] = 1;
2022 term_dict[prefix_len++] = ord_buf[i];
2024 term_dict[prefix_len++] = ')';
2025 strcpy(term_dict+prefix_len, term);
2027 grep_info.isam_p_indx = 0;
2028 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2029 &grep_info, &max_pos, 0, grep_handle);
2030 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2031 grep_info.isam_p_indx);
2032 rset = rset_trunc(zh, grep_info.isam_p_buf,
2033 grep_info.isam_p_indx, term, strlen(term),
2034 flags, 1, term_type,rset_nmem,
2035 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2036 0 /* term_ref_id_str */);
2037 grep_info_delete(&grep_info);
2042 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2043 int num_bases, char **basenames,
2044 NMEM stream, const char *rank_type, RSET rset,
2045 int xpath_len, struct xpath_location_step *xpath,
2048 struct rset_key_control *kc)
2052 int always_matches = rset ? 0 : 1;
2060 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2061 for (i = 0; i<xpath_len; i++)
2063 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2075 a[@attr = value]/b[@other = othervalue]
2077 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2078 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2079 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2080 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2081 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2082 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2086 dict_grep_cmap (zh->reg->dict, 0, 0);
2088 for (base_no = 0; base_no < num_bases; base_no++)
2090 int level = xpath_len;
2093 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2095 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2096 basenames[base_no]);
2100 while (--level >= 0)
2102 WRBUF xpath_rev = wrbuf_alloc();
2104 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2106 for (i = level; i >= 1; --i)
2108 const char *cp = xpath[i].part;
2114 wrbuf_puts(xpath_rev, "[^/]*");
2115 else if (*cp == ' ')
2116 wrbuf_puts(xpath_rev, "\001 ");
2118 wrbuf_putc(xpath_rev, *cp);
2120 /* wrbuf_putc does not null-terminate , but
2121 wrbuf_puts below ensures it does.. so xpath_rev
2122 is OK iff length is > 0 */
2124 wrbuf_puts(xpath_rev, "/");
2126 else if (i == 1) /* // case */
2127 wrbuf_puts(xpath_rev, ".*");
2129 if (xpath[level].predicate &&
2130 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2131 xpath[level].predicate->u.relation.name[0])
2133 WRBUF wbuf = wrbuf_alloc();
2134 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2135 if (xpath[level].predicate->u.relation.value)
2137 const char *cp = xpath[level].predicate->u.relation.value;
2138 wrbuf_putc(wbuf, '=');
2142 if (strchr(REGEX_CHARS, *cp))
2143 wrbuf_putc(wbuf, '\\');
2144 wrbuf_putc(wbuf, *cp);
2148 wrbuf_puts(wbuf, "");
2149 rset_attr = xpath_trunc(
2150 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2152 wrbuf_free(wbuf, 1);
2158 wrbuf_free(xpath_rev, 1);
2162 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2163 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2164 if (wrbuf_len(xpath_rev))
2166 rset_start_tag = xpath_trunc(zh, stream, '0',
2167 wrbuf_buf(xpath_rev),
2168 ZEBRA_XPATH_ELM_BEGIN,
2171 rset = rset_start_tag;
2174 rset_end_tag = xpath_trunc(zh, stream, '0',
2175 wrbuf_buf(xpath_rev),
2176 ZEBRA_XPATH_ELM_END,
2179 rset = rset_create_between(rset_nmem, kc, kc->scope,
2180 rset_start_tag, rset,
2181 rset_end_tag, rset_attr);
2184 wrbuf_free(xpath_rev, 1);
2192 #define MAX_XPATH_STEPS 10
2194 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2195 oid_value attributeSet, NMEM stream,
2196 Z_SortKeySpecList *sort_sequence,
2197 int num_bases, char **basenames,
2200 struct rset_key_control *kc)
2202 ZEBRA_RES res = ZEBRA_OK;
2204 char *search_type = NULL;
2205 char rank_type[128];
2208 char termz[IT_MAX_WORD+1];
2210 const char *xpath_use = 0;
2211 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2215 log_level_rpn = yaz_log_module_level("rpn");
2218 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2219 rank_type, &complete_flag, &sort_flag);
2221 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2222 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2223 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2224 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2226 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2230 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2231 rank_type, rset_nmem, rset, kc);
2232 /* consider if an X-Path query is used */
2233 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2234 xpath, MAX_XPATH_STEPS, stream);
2237 if (xpath[xpath_len-1].part[0] == '@')
2238 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2240 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2247 attr_init_APT(&relation, zapt, 2);
2248 relation_value = attr_find(&relation, NULL);
2250 if (relation_value == 103) /* alwaysmatches */
2252 *rset = 0; /* signal no "term" set */
2253 return rpn_search_xpath(zh, num_bases, basenames,
2254 stream, rank_type, *rset,
2255 xpath_len, xpath, rset_nmem, rset, kc);
2260 /* search using one of the various search type strategies
2261 termz is our UTF-8 search term
2262 attributeSet is top-level default attribute set
2263 stream is ODR for search
2264 reg_id is the register type
2265 complete_flag is 1 for complete subfield, 0 for incomplete
2266 xpath_use is use-attribute to be used for X-Path search, 0 for none
2268 if (!strcmp(search_type, "phrase"))
2270 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2271 reg_id, complete_flag, rank_type,
2273 num_bases, basenames, rset_nmem,
2276 else if (!strcmp(search_type, "and-list"))
2278 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2279 reg_id, complete_flag, rank_type,
2281 num_bases, basenames, rset_nmem,
2284 else if (!strcmp(search_type, "or-list"))
2286 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2287 reg_id, complete_flag, rank_type,
2289 num_bases, basenames, rset_nmem,
2292 else if (!strcmp(search_type, "local"))
2294 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2295 rank_type, rset_nmem, rset, kc);
2297 else if (!strcmp(search_type, "numeric"))
2299 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2300 reg_id, complete_flag, rank_type,
2302 num_bases, basenames, rset_nmem,
2307 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2310 if (res != ZEBRA_OK)
2314 return rpn_search_xpath(zh, num_bases, basenames,
2315 stream, rank_type, *rset,
2316 xpath_len, xpath, rset_nmem, rset, kc);
2319 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2320 oid_value attributeSet,
2321 NMEM stream, NMEM rset_nmem,
2322 Z_SortKeySpecList *sort_sequence,
2323 int num_bases, char **basenames,
2324 RSET **result_sets, int *num_result_sets,
2325 Z_Operator *parent_op,
2326 struct rset_key_control *kc);
2328 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2331 ZEBRA_RES res = ZEBRA_OK;
2332 if (zs->which == Z_RPNStructure_complex)
2334 if (res == ZEBRA_OK)
2335 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2337 if (res == ZEBRA_OK)
2338 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2341 else if (zs->which == Z_RPNStructure_simple)
2343 if (zs->u.simple->which == Z_Operand_APT)
2345 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2346 AttrType global_hits_limit_attr;
2349 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2351 l = attr_find(&global_hits_limit_attr, NULL);
2359 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2360 oid_value attributeSet,
2361 NMEM stream, NMEM rset_nmem,
2362 Z_SortKeySpecList *sort_sequence,
2363 int num_bases, char **basenames,
2366 RSET *result_sets = 0;
2367 int num_result_sets = 0;
2369 struct rset_key_control *kc = zebra_key_control_create(zh);
2371 res = rpn_search_structure(zh, zs, attributeSet,
2374 num_bases, basenames,
2375 &result_sets, &num_result_sets,
2376 0 /* no parent op */,
2378 if (res != ZEBRA_OK)
2381 for (i = 0; i<num_result_sets; i++)
2382 rset_delete(result_sets[i]);
2387 assert(num_result_sets == 1);
2388 assert(result_sets);
2389 assert(*result_sets);
2390 *result_set = *result_sets;
2396 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2397 oid_value attributeSet,
2398 NMEM stream, NMEM rset_nmem,
2399 Z_SortKeySpecList *sort_sequence,
2400 int num_bases, char **basenames,
2401 RSET **result_sets, int *num_result_sets,
2402 Z_Operator *parent_op,
2403 struct rset_key_control *kc)
2405 *num_result_sets = 0;
2406 if (zs->which == Z_RPNStructure_complex)
2409 Z_Operator *zop = zs->u.complex->roperator;
2410 RSET *result_sets_l = 0;
2411 int num_result_sets_l = 0;
2412 RSET *result_sets_r = 0;
2413 int num_result_sets_r = 0;
2415 res = rpn_search_structure(zh, zs->u.complex->s1,
2416 attributeSet, stream, rset_nmem,
2418 num_bases, basenames,
2419 &result_sets_l, &num_result_sets_l,
2421 if (res != ZEBRA_OK)
2424 for (i = 0; i<num_result_sets_l; i++)
2425 rset_delete(result_sets_l[i]);
2428 res = rpn_search_structure(zh, zs->u.complex->s2,
2429 attributeSet, stream, rset_nmem,
2431 num_bases, basenames,
2432 &result_sets_r, &num_result_sets_r,
2434 if (res != ZEBRA_OK)
2437 for (i = 0; i<num_result_sets_l; i++)
2438 rset_delete(result_sets_l[i]);
2439 for (i = 0; i<num_result_sets_r; i++)
2440 rset_delete(result_sets_r[i]);
2444 /* make a new list of result for all children */
2445 *num_result_sets = num_result_sets_l + num_result_sets_r;
2446 *result_sets = nmem_malloc(stream, *num_result_sets *
2447 sizeof(**result_sets));
2448 memcpy(*result_sets, result_sets_l,
2449 num_result_sets_l * sizeof(**result_sets));
2450 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2451 num_result_sets_r * sizeof(**result_sets));
2453 if (!parent_op || parent_op->which != zop->which
2454 || (zop->which != Z_Operator_and &&
2455 zop->which != Z_Operator_or))
2457 /* parent node different from this one (or non-present) */
2458 /* we must combine result sets now */
2462 case Z_Operator_and:
2463 rset = rset_create_and(rset_nmem, kc,
2465 *num_result_sets, *result_sets);
2468 rset = rset_create_or(rset_nmem, kc,
2469 kc->scope, 0, /* termid */
2470 *num_result_sets, *result_sets);
2472 case Z_Operator_and_not:
2473 rset = rset_create_not(rset_nmem, kc,
2478 case Z_Operator_prox:
2479 if (zop->u.prox->which != Z_ProximityOperator_known)
2482 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2486 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2488 zebra_setError_zint(zh,
2489 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2490 *zop->u.prox->u.known);
2495 rset = rset_create_prox(rset_nmem, kc,
2497 *num_result_sets, *result_sets,
2498 *zop->u.prox->ordered,
2499 (!zop->u.prox->exclusion ?
2500 0 : *zop->u.prox->exclusion),
2501 *zop->u.prox->relationType,
2502 *zop->u.prox->distance );
2506 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2509 *num_result_sets = 1;
2510 *result_sets = nmem_malloc(stream, *num_result_sets *
2511 sizeof(**result_sets));
2512 (*result_sets)[0] = rset;
2515 else if (zs->which == Z_RPNStructure_simple)
2520 if (zs->u.simple->which == Z_Operand_APT)
2522 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2523 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2524 attributeSet, stream, sort_sequence,
2525 num_bases, basenames, rset_nmem, &rset,
2527 if (res != ZEBRA_OK)
2530 else if (zs->u.simple->which == Z_Operand_resultSetId)
2532 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2533 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2537 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2538 zs->u.simple->u.resultSetId);
2545 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2548 *num_result_sets = 1;
2549 *result_sets = nmem_malloc(stream, *num_result_sets *
2550 sizeof(**result_sets));
2551 (*result_sets)[0] = rset;
2555 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2566 * indent-tabs-mode: nil
2568 * vim: shiftwidth=4 tabstop=8 expandtab