1 /* $Id: zrpn.c,v 1.222 2006-07-03 10:43:43 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
51 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
53 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
54 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
58 const char *outp = *out;
59 yaz_log(YLOG_LOG, "---");
62 yaz_log(YLOG_LOG, "%02X", *outp);
70 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
71 struct rpn_char_map_info *map_info)
73 map_info->zm = reg->zebra_maps;
74 map_info->reg_type = reg_type;
75 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
92 void zebra_term_untrans(ZebraHandle zh, int reg_type,
93 char *dst, const char *src)
98 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
102 if (len < IT_MAX_WORD-1)
107 while (*cp && len < IT_MAX_WORD-1)
113 static void add_isam_p(const char *name, const char *info,
118 log_level_rpn = yaz_log_module_level("rpn");
121 if (p->isam_p_indx == p->isam_p_size)
123 ISAM_P *new_isam_p_buf;
127 p->isam_p_size = 2*p->isam_p_size + 100;
128 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
132 memcpy(new_isam_p_buf, p->isam_p_buf,
133 p->isam_p_indx * sizeof(*p->isam_p_buf));
134 xfree(p->isam_p_buf);
136 p->isam_p_buf = new_isam_p_buf;
139 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
142 memcpy(new_term_no, p->isam_p_buf,
143 p->isam_p_indx * sizeof(*p->term_no));
146 p->term_no = new_term_no;
149 assert(*info == sizeof(*p->isam_p_buf));
150 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
156 char term_tmp[IT_MAX_WORD];
158 const char *index_name;
159 int len = key_SU_decode (&ord, (const unsigned char *) name);
161 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
162 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
163 zebraExplain_lookup_ord(p->zh->reg->zei,
164 ord, 0 /* index_type */, &db, &index_name);
165 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
167 resultSetAddTerm(p->zh, p->termset, name[len], db,
168 index_name, term_tmp);
174 static int grep_handle(char *name, const char *info, void *p)
176 add_isam_p(name, info, (struct grep_info *) p);
180 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
181 const char *ct1, const char *ct2, int first)
183 const char *s1, *s0 = *src;
186 /* skip white space */
189 if (ct1 && strchr(ct1, *s0))
191 if (ct2 && strchr(ct2, *s0))
194 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
195 if (**map != *CHR_SPACE)
204 static void esc_str(char *out_buf, size_t out_size,
205 const char *in_buf, int in_size)
211 assert(out_size > 20);
213 for (k = 0; k<in_size; k++)
215 int c = in_buf[k] & 0xff;
217 if (c < 32 || c > 126)
221 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
222 if (strlen(out_buf) > out_size-20)
224 strcat(out_buf, "..");
230 #define REGEX_CHARS " []()|.*+?!"
232 /* term_100: handle term, where trunc = none(no operators at all) */
233 static int term_100(ZebraMaps zebra_maps, int reg_type,
234 const char **src, char *dst, int space_split,
242 const char *space_start = 0;
243 const char *space_end = 0;
245 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
252 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
256 if (**map == *CHR_SPACE)
259 else /* complete subfield only. */
261 if (**map == *CHR_SPACE)
262 { /* save space mapping for later .. */
267 else if (space_start)
268 { /* reload last space */
269 while (space_start < space_end)
271 if (strchr(REGEX_CHARS, *space_start))
273 dst_term[j++] = *space_start;
274 dst[i++] = *space_start++;
277 space_start = space_end = 0;
280 /* add non-space char */
281 memcpy(dst_term+j, s1, s0 - s1);
287 if (strchr(REGEX_CHARS, *s1))
295 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
297 strcpy(dst + i, map[0]);
307 /* term_101: handle term, where trunc = Process # */
308 static int term_101(ZebraMaps zebra_maps, int reg_type,
309 const char **src, char *dst, int space_split,
317 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
326 dst_term[j++] = *s0++;
332 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
334 if (space_split && **map == *CHR_SPACE)
337 /* add non-space char */
338 memcpy(dst_term+j, s1, s0 - s1);
344 if (strchr(REGEX_CHARS, *s1))
352 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
354 strcpy(dst + i, map[0]);
360 dst_term[j++] = '\0';
365 /* term_103: handle term, where trunc = re-2 (regular expressions) */
366 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
367 char *dst, int *errors, int space_split,
375 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
378 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
379 isdigit(((const unsigned char *)s0)[1]))
381 *errors = s0[1] - '0';
388 if (strchr("^\\()[].*+?|-", *s0))
397 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
399 if (space_split && **map == *CHR_SPACE)
402 /* add non-space char */
403 memcpy(dst_term+j, s1, s0 - s1);
409 if (strchr(REGEX_CHARS, *s1))
417 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
419 strcpy(dst + i, map[0]);
431 /* term_103: handle term, where trunc = re-1 (regular expressions) */
432 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
433 char *dst, int space_split, char *dst_term)
435 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
440 /* term_104: handle term, where trunc = Process # and ! */
441 static int term_104(ZebraMaps zebra_maps, int reg_type,
442 const char **src, char *dst, int space_split,
450 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
457 dst_term[j++] = *s0++;
458 if (*s0 >= '0' && *s0 <= '9')
461 while (*s0 >= '0' && *s0 <= '9')
463 limit = limit * 10 + (*s0 - '0');
464 dst_term[j++] = *s0++;
484 dst_term[j++] = *s0++;
489 dst_term[j++] = *s0++;
495 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
497 if (space_split && **map == *CHR_SPACE)
500 /* add non-space char */
501 memcpy(dst_term+j, s1, s0 - s1);
507 if (strchr(REGEX_CHARS, *s1))
515 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
517 strcpy(dst + i, map[0]);
523 dst_term[j++] = '\0';
528 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
529 static int term_105(ZebraMaps zebra_maps, int reg_type,
530 const char **src, char *dst, int space_split,
531 char *dst_term, int right_truncate)
538 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
547 dst_term[j++] = *s0++;
552 dst_term[j++] = *s0++;
558 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
560 if (space_split && **map == *CHR_SPACE)
563 /* add non-space char */
564 memcpy(dst_term+j, s1, s0 - s1);
570 if (strchr(REGEX_CHARS, *s1))
578 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
580 strcpy(dst + i, map[0]);
592 dst_term[j++] = '\0';
598 /* gen_regular_rel - generate regular expression from relation
599 * val: border value (inclusive)
600 * islt: 1 if <=; 0 if >=.
602 static void gen_regular_rel(char *dst, int val, int islt)
609 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
613 strcpy(dst, "(-[0-9]+|(");
621 strcpy(dst, "([0-9]+|-(");
633 sprintf(numstr, "%d", val);
634 for (w = strlen(numstr); --w >= 0; pos++)
653 strcpy(dst + dst_p, numstr);
654 dst_p = strlen(dst) - pos - 1;
682 for (i = 0; i<pos; i++)
695 /* match everything less than 10^(pos-1) */
697 for (i = 1; i<pos; i++)
698 strcat(dst, "[0-9]?");
702 /* match everything greater than 10^pos */
703 for (i = 0; i <= pos; i++)
704 strcat(dst, "[0-9]");
705 strcat(dst, "[0-9]*");
710 void string_rel_add_char(char **term_p, const char *src, int *indx)
712 if (src[*indx] == '\\')
713 *(*term_p)++ = src[(*indx)++];
714 *(*term_p)++ = src[(*indx)++];
718 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
719 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
720 * >= abc ([b-].*|a[c-].*|ab[c-].*)
721 * ([^-a].*|a[^-b].*|ab[c-].*)
722 * < abc ([-0].*|a[-a].*|ab[-b].*)
723 * ([^a-].*|a[^b-].*|ab[^c-].*)
724 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
725 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
727 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
728 const char **term_sub, char *term_dict,
729 oid_value attributeSet,
730 int reg_type, int space_split, char *term_dst,
736 char *term_tmp = term_dict + strlen(term_dict);
737 char term_component[2*IT_MAX_WORD+20];
739 attr_init_APT(&relation, zapt, 2);
740 relation_value = attr_find(&relation, NULL);
743 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
744 switch (relation_value)
747 if (!term_100(zh->reg->zebra_maps, reg_type,
748 term_sub, term_component,
749 space_split, term_dst))
751 yaz_log(log_level_rpn, "Relation <");
754 for (i = 0; term_component[i]; )
761 string_rel_add_char(&term_tmp, term_component, &j);
766 string_rel_add_char(&term_tmp, term_component, &i);
773 if ((term_tmp - term_dict) > IT_MAX_WORD)
780 if (!term_100(zh->reg->zebra_maps, reg_type,
781 term_sub, term_component,
782 space_split, term_dst))
784 yaz_log(log_level_rpn, "Relation <=");
787 for (i = 0; term_component[i]; )
792 string_rel_add_char(&term_tmp, term_component, &j);
796 string_rel_add_char(&term_tmp, term_component, &i);
805 if ((term_tmp - term_dict) > IT_MAX_WORD)
808 for (i = 0; term_component[i]; )
809 string_rel_add_char(&term_tmp, term_component, &i);
814 if (!term_100 (zh->reg->zebra_maps, reg_type,
815 term_sub, term_component, space_split, term_dst))
817 yaz_log(log_level_rpn, "Relation >");
820 for (i = 0; term_component[i];)
825 string_rel_add_char(&term_tmp, term_component, &j);
830 string_rel_add_char(&term_tmp, term_component, &i);
838 if ((term_tmp - term_dict) > IT_MAX_WORD)
841 for (i = 0; term_component[i];)
842 string_rel_add_char(&term_tmp, term_component, &i);
849 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
850 term_component, space_split, term_dst))
852 yaz_log(log_level_rpn, "Relation >=");
855 for (i = 0; term_component[i];)
862 string_rel_add_char(&term_tmp, term_component, &j);
865 if (term_component[i+1])
869 string_rel_add_char(&term_tmp, term_component, &i);
873 string_rel_add_char(&term_tmp, term_component, &i);
880 if ((term_tmp - term_dict) > IT_MAX_WORD)
891 yaz_log(log_level_rpn, "Relation =");
892 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
893 term_component, space_split, term_dst))
895 strcat(term_tmp, "(");
896 strcat(term_tmp, term_component);
897 strcat(term_tmp, ")");
900 yaz_log(log_level_rpn, "Relation always matches");
901 /* skip to end of term (we don't care what it is) */
902 while (**term_sub != '\0')
906 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
912 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
913 const char **term_sub,
914 oid_value attributeSet, NMEM stream,
915 struct grep_info *grep_info,
916 int reg_type, int complete_flag,
917 int num_bases, char **basenames,
919 const char *xpath_use,
920 struct ord_list **ol);
922 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
923 Z_AttributesPlusTerm *zapt,
924 zint *hits_limit_value,
925 const char **term_ref_id_str,
928 AttrType term_ref_id_attr;
929 AttrType hits_limit_attr;
932 attr_init_APT(&hits_limit_attr, zapt, 9);
933 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
935 attr_init_APT(&term_ref_id_attr, zapt, 10);
936 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
937 if (term_ref_id_int >= 0)
939 char *res = nmem_malloc(nmem, 20);
940 sprintf(res, "%d", term_ref_id_int);
941 *term_ref_id_str = res;
944 /* no limit given ? */
945 if (*hits_limit_value == -1)
947 if (*term_ref_id_str)
949 /* use global if term_ref is present */
950 *hits_limit_value = zh->approx_limit;
954 /* no counting if term_ref is not present */
955 *hits_limit_value = 0;
958 else if (*hits_limit_value == 0)
960 /* 0 is the same as global limit */
961 *hits_limit_value = zh->approx_limit;
963 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
964 *term_ref_id_str ? *term_ref_id_str : "none",
969 static ZEBRA_RES term_trunc(ZebraHandle zh,
970 Z_AttributesPlusTerm *zapt,
971 const char **term_sub,
972 oid_value attributeSet, NMEM stream,
973 struct grep_info *grep_info,
974 int reg_type, int complete_flag,
975 int num_bases, char **basenames,
977 const char *rank_type,
978 const char *xpath_use,
981 struct rset_key_control *kc)
985 zint hits_limit_value;
986 const char *term_ref_id_str = 0;
989 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
990 grep_info->isam_p_indx = 0;
991 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
992 reg_type, complete_flag, num_bases, basenames,
993 term_dst, xpath_use, &ol);
996 if (!*term_sub) /* no more terms ? */
998 yaz_log(log_level_rpn, "term: %s", term_dst);
999 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1000 grep_info->isam_p_indx, term_dst,
1001 strlen(term_dst), rank_type, 1 /* preserve pos */,
1002 zapt->term->which, rset_nmem,
1003 kc, kc->scope, ol, reg_type, hits_limit_value,
1010 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1011 const char **term_sub,
1012 oid_value attributeSet, NMEM stream,
1013 struct grep_info *grep_info,
1014 int reg_type, int complete_flag,
1015 int num_bases, char **basenames,
1017 const char *xpath_use,
1018 struct ord_list **ol)
1020 char term_dict[2*IT_MAX_WORD+4000];
1022 AttrType truncation;
1023 int truncation_value;
1025 struct rpn_char_map_info rcmi;
1026 int space_split = complete_flag ? 0 : 1;
1028 int bases_ok = 0; /* no of databases with OK attribute */
1030 *ol = ord_list_create(stream);
1032 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1033 attr_init_APT(&truncation, zapt, 5);
1034 truncation_value = attr_find(&truncation, NULL);
1035 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1037 for (base_no = 0; base_no < num_bases; base_no++)
1040 int regex_range = 0;
1041 int max_pos, prefix_len = 0;
1046 termp = *term_sub; /* start of term for each database */
1048 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1050 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1051 basenames[base_no]);
1055 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1056 attributeSet, &ord) != ZEBRA_OK)
1061 *ol = ord_list_append(stream, *ol, ord);
1062 ord_len = key_SU_encode (ord, ord_buf);
1064 term_dict[prefix_len++] = '(';
1065 for (i = 0; i<ord_len; i++)
1067 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1068 term_dict[prefix_len++] = ord_buf[i];
1070 term_dict[prefix_len++] = ')';
1071 term_dict[prefix_len] = '\0';
1073 switch (truncation_value)
1075 case -1: /* not specified */
1076 case 100: /* do not truncate */
1077 if (!string_relation(zh, zapt, &termp, term_dict,
1079 reg_type, space_split, term_dst,
1084 zebra_setError(zh, relation_error, 0);
1091 case 1: /* right truncation */
1092 term_dict[j++] = '(';
1093 if (!term_100(zh->reg->zebra_maps, reg_type,
1094 &termp, term_dict + j, space_split, term_dst))
1099 strcat(term_dict, ".*)");
1101 case 2: /* keft truncation */
1102 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1103 if (!term_100(zh->reg->zebra_maps, reg_type,
1104 &termp, term_dict + j, space_split, term_dst))
1109 strcat(term_dict, ")");
1111 case 3: /* left&right truncation */
1112 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1113 if (!term_100(zh->reg->zebra_maps, reg_type,
1114 &termp, term_dict + j, space_split, term_dst))
1119 strcat(term_dict, ".*)");
1121 case 101: /* process # in term */
1122 term_dict[j++] = '(';
1123 if (!term_101(zh->reg->zebra_maps, reg_type,
1124 &termp, term_dict + j, space_split, term_dst))
1129 strcat(term_dict, ")");
1131 case 102: /* Regexp-1 */
1132 term_dict[j++] = '(';
1133 if (!term_102(zh->reg->zebra_maps, reg_type,
1134 &termp, term_dict + j, space_split, term_dst))
1139 strcat(term_dict, ")");
1141 case 103: /* Regexp-2 */
1143 term_dict[j++] = '(';
1144 if (!term_103(zh->reg->zebra_maps, reg_type,
1145 &termp, term_dict + j, ®ex_range,
1146 space_split, term_dst))
1151 strcat(term_dict, ")");
1153 case 104: /* process # and ! in term */
1154 term_dict[j++] = '(';
1155 if (!term_104(zh->reg->zebra_maps, reg_type,
1156 &termp, term_dict + j, space_split, term_dst))
1161 strcat(term_dict, ")");
1163 case 105: /* process * and ! in term */
1164 term_dict[j++] = '(';
1165 if (!term_105(zh->reg->zebra_maps, reg_type,
1166 &termp, term_dict + j, space_split, term_dst, 1))
1171 strcat(term_dict, ")");
1173 case 106: /* process * and ! in term */
1174 term_dict[j++] = '(';
1175 if (!term_105(zh->reg->zebra_maps, reg_type,
1176 &termp, term_dict + j, space_split, term_dst, 0))
1181 strcat(term_dict, ")");
1184 zebra_setError_zint(zh,
1185 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1192 const char *input = term_dict + prefix_len;
1193 esc_str(buf, sizeof(buf), input, strlen(input));
1195 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1196 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1197 grep_info, &max_pos,
1198 ord_len /* number of "exact" chars */,
1201 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1206 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1211 /* convert APT search term to UTF8 */
1212 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1216 Z_Term *term = zapt->term;
1218 switch (term->which)
1220 case Z_Term_general:
1221 if (zh->iconv_to_utf8 != 0)
1223 char *inbuf = (char *) term->u.general->buf;
1224 size_t inleft = term->u.general->len;
1225 char *outbuf = termz;
1226 size_t outleft = IT_MAX_WORD-1;
1229 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1231 if (ret == (size_t)(-1))
1233 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1236 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1244 sizez = term->u.general->len;
1245 if (sizez > IT_MAX_WORD-1)
1246 sizez = IT_MAX_WORD-1;
1247 memcpy (termz, term->u.general->buf, sizez);
1248 termz[sizez] = '\0';
1251 case Z_Term_characterString:
1252 sizez = strlen(term->u.characterString);
1253 if (sizez > IT_MAX_WORD-1)
1254 sizez = IT_MAX_WORD-1;
1255 memcpy (termz, term->u.characterString, sizez);
1256 termz[sizez] = '\0';
1259 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1265 /* convert APT SCAN term to internal cmap */
1266 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1267 char *termz, int reg_type)
1269 char termz0[IT_MAX_WORD];
1271 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1272 return ZEBRA_FAIL; /* error */
1276 const char *cp = (const char *) termz0;
1277 const char *cp_end = cp + strlen(cp);
1280 const char *space_map = NULL;
1283 while ((len = (cp_end - cp)) > 0)
1285 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1286 if (**map == *CHR_SPACE)
1291 for (src = space_map; *src; src++)
1294 for (src = *map; *src; src++)
1303 static void grep_info_delete(struct grep_info *grep_info)
1306 xfree(grep_info->term_no);
1308 xfree(grep_info->isam_p_buf);
1311 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1312 Z_AttributesPlusTerm *zapt,
1313 struct grep_info *grep_info,
1317 int termset_value_numeric;
1318 const char *termset_value_string;
1321 grep_info->term_no = 0;
1323 grep_info->isam_p_size = 0;
1324 grep_info->isam_p_buf = NULL;
1326 grep_info->reg_type = reg_type;
1327 grep_info->termset = 0;
1331 attr_init_APT(&termset, zapt, 8);
1332 termset_value_numeric =
1333 attr_find_ex(&termset, NULL, &termset_value_string);
1334 if (termset_value_numeric != -1)
1337 const char *termset_name = 0;
1338 if (termset_value_numeric != -2)
1341 sprintf(resname, "%d", termset_value_numeric);
1342 termset_name = resname;
1345 termset_name = termset_value_string;
1346 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1347 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1348 if (!grep_info->termset)
1350 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1358 \brief Create result set(s) for list of terms
1359 \param zh Zebra Handle
1360 \param termz term as used in query but converted to UTF-8
1361 \param attributeSet default attribute set
1362 \param stream memory for result
1363 \param reg_type register type ('w', 'p',..)
1364 \param complete_flag whether it's phrases or not
1365 \param rank_type term flags for ranking
1366 \param xpath_use use attribute for X-Path (-1 for no X-path)
1367 \param num_bases number of databases
1368 \param basenames array of databases
1369 \param rset_mem memory for result sets
1370 \param result_sets output result set for each term in list (output)
1371 \param number number of output result sets
1372 \param kc rset key control to be used for created result sets
1374 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1375 Z_AttributesPlusTerm *zapt,
1377 oid_value attributeSet,
1379 int reg_type, int complete_flag,
1380 const char *rank_type,
1381 const char *xpath_use,
1382 int num_bases, char **basenames,
1384 RSET **result_sets, int *num_result_sets,
1385 struct rset_key_control *kc)
1387 char term_dst[IT_MAX_WORD+1];
1388 struct grep_info grep_info;
1389 const char *termp = termz;
1392 *num_result_sets = 0;
1394 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1400 if (alloc_sets == *num_result_sets)
1403 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1406 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1407 alloc_sets = alloc_sets + add;
1408 *result_sets = rnew;
1410 res = term_trunc(zh, zapt, &termp, attributeSet,
1412 reg_type, complete_flag,
1413 num_bases, basenames,
1414 term_dst, rank_type,
1415 xpath_use, rset_nmem,
1416 &(*result_sets)[*num_result_sets],
1418 if (res != ZEBRA_OK)
1421 for (i = 0; i < *num_result_sets; i++)
1422 rset_delete((*result_sets)[i]);
1423 grep_info_delete (&grep_info);
1426 if ((*result_sets)[*num_result_sets] == 0)
1428 (*num_result_sets)++;
1433 grep_info_delete(&grep_info);
1437 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1438 Z_AttributesPlusTerm *zapt,
1439 const char *termz_org,
1440 oid_value attributeSet,
1442 int reg_type, int complete_flag,
1443 const char *rank_type,
1444 const char *xpath_use,
1445 int num_bases, char **basenames,
1448 struct rset_key_control *kc)
1450 RSET *result_sets = 0;
1451 int num_result_sets = 0;
1453 term_list_trunc(zh, zapt, termz_org, attributeSet,
1454 stream, reg_type, complete_flag,
1455 rank_type, xpath_use,
1456 num_bases, basenames,
1458 &result_sets, &num_result_sets, kc);
1459 if (res != ZEBRA_OK)
1461 if (num_result_sets == 0)
1462 *rset = rset_create_null(rset_nmem, kc, 0);
1463 else if (num_result_sets == 1)
1464 *rset = result_sets[0];
1466 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1467 num_result_sets, result_sets,
1468 1 /* ordered */, 0 /* exclusion */,
1469 3 /* relation */, 1 /* distance */);
1475 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1476 Z_AttributesPlusTerm *zapt,
1477 const char *termz_org,
1478 oid_value attributeSet,
1480 int reg_type, int complete_flag,
1481 const char *rank_type,
1482 const char *xpath_use,
1483 int num_bases, char **basenames,
1486 struct rset_key_control *kc)
1488 RSET *result_sets = 0;
1489 int num_result_sets = 0;
1491 term_list_trunc(zh, zapt, termz_org, attributeSet,
1492 stream, reg_type, complete_flag,
1493 rank_type, xpath_use,
1494 num_bases, basenames,
1496 &result_sets, &num_result_sets, kc);
1497 if (res != ZEBRA_OK)
1499 if (num_result_sets == 0)
1500 *rset = rset_create_null(rset_nmem, kc, 0);
1501 else if (num_result_sets == 1)
1502 *rset = result_sets[0];
1504 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1505 num_result_sets, result_sets);
1511 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1512 Z_AttributesPlusTerm *zapt,
1513 const char *termz_org,
1514 oid_value attributeSet,
1516 int reg_type, int complete_flag,
1517 const char *rank_type,
1518 const char *xpath_use,
1519 int num_bases, char **basenames,
1522 struct rset_key_control *kc)
1524 RSET *result_sets = 0;
1525 int num_result_sets = 0;
1527 term_list_trunc(zh, zapt, termz_org, attributeSet,
1528 stream, reg_type, complete_flag,
1529 rank_type, xpath_use,
1530 num_bases, basenames,
1532 &result_sets, &num_result_sets,
1534 if (res != ZEBRA_OK)
1536 if (num_result_sets == 0)
1537 *rset = rset_create_null(rset_nmem, kc, 0);
1538 else if (num_result_sets == 1)
1539 *rset = result_sets[0];
1541 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1542 num_result_sets, result_sets);
1548 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1549 const char **term_sub,
1551 oid_value attributeSet,
1552 struct grep_info *grep_info,
1562 char *term_tmp = term_dict + strlen(term_dict);
1565 attr_init_APT(&relation, zapt, 2);
1566 relation_value = attr_find(&relation, NULL);
1568 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1570 switch (relation_value)
1573 yaz_log(log_level_rpn, "Relation <");
1574 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1577 term_value = atoi (term_tmp);
1578 gen_regular_rel(term_tmp, term_value-1, 1);
1581 yaz_log(log_level_rpn, "Relation <=");
1582 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1585 term_value = atoi (term_tmp);
1586 gen_regular_rel(term_tmp, term_value, 1);
1589 yaz_log(log_level_rpn, "Relation >=");
1590 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1593 term_value = atoi (term_tmp);
1594 gen_regular_rel(term_tmp, term_value, 0);
1597 yaz_log(log_level_rpn, "Relation >");
1598 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1601 term_value = atoi (term_tmp);
1602 gen_regular_rel(term_tmp, term_value+1, 0);
1606 yaz_log(log_level_rpn, "Relation =");
1607 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1610 term_value = atoi (term_tmp);
1611 sprintf(term_tmp, "(0*%d)", term_value);
1614 /* term_tmp untouched.. */
1615 while (**term_sub != '\0')
1619 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1622 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1623 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1626 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1627 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1631 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1632 const char **term_sub,
1633 oid_value attributeSet, NMEM stream,
1634 struct grep_info *grep_info,
1635 int reg_type, int complete_flag,
1636 int num_bases, char **basenames,
1638 const char *xpath_use,
1639 struct ord_list **ol)
1641 char term_dict[2*IT_MAX_WORD+2];
1644 struct rpn_char_map_info rcmi;
1646 int bases_ok = 0; /* no of databases with OK attribute */
1648 *ol = ord_list_create(stream);
1650 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1652 for (base_no = 0; base_no < num_bases; base_no++)
1654 int max_pos, prefix_len = 0;
1655 int relation_error = 0;
1656 int ord, ord_len, i;
1661 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1663 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1664 basenames[base_no]);
1668 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1669 attributeSet, &ord) != ZEBRA_OK)
1673 *ol = ord_list_append(stream, *ol, ord);
1675 ord_len = key_SU_encode (ord, ord_buf);
1677 term_dict[prefix_len++] = '(';
1678 for (i = 0; i < ord_len; i++)
1680 term_dict[prefix_len++] = 1;
1681 term_dict[prefix_len++] = ord_buf[i];
1683 term_dict[prefix_len++] = ')';
1684 term_dict[prefix_len] = '\0';
1686 if (!numeric_relation(zh, zapt, &termp, term_dict,
1687 attributeSet, grep_info, &max_pos, reg_type,
1688 term_dst, &relation_error))
1692 zebra_setError(zh, relation_error, 0);
1702 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1707 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1708 Z_AttributesPlusTerm *zapt,
1710 oid_value attributeSet,
1712 int reg_type, int complete_flag,
1713 const char *rank_type,
1714 const char *xpath_use,
1715 int num_bases, char **basenames,
1718 struct rset_key_control *kc)
1720 char term_dst[IT_MAX_WORD+1];
1721 const char *termp = termz;
1722 RSET *result_sets = 0;
1723 int num_result_sets = 0;
1725 struct grep_info grep_info;
1727 zint hits_limit_value;
1728 const char *term_ref_id_str = 0;
1730 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1732 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1733 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1737 struct ord_list *ol;
1738 if (alloc_sets == num_result_sets)
1741 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1744 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1745 alloc_sets = alloc_sets + add;
1748 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1749 grep_info.isam_p_indx = 0;
1750 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1751 reg_type, complete_flag, num_bases, basenames,
1752 term_dst, xpath_use, &ol);
1753 if (res == ZEBRA_FAIL || termp == 0)
1755 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1756 result_sets[num_result_sets] =
1757 rset_trunc(zh, grep_info.isam_p_buf,
1758 grep_info.isam_p_indx, term_dst,
1759 strlen(term_dst), rank_type,
1760 0 /* preserve position */,
1761 zapt->term->which, rset_nmem,
1762 kc, kc->scope, ol, reg_type,
1765 if (!result_sets[num_result_sets])
1771 grep_info_delete(&grep_info);
1773 if (res != ZEBRA_OK)
1775 if (num_result_sets == 0)
1776 *rset = rset_create_null(rset_nmem, kc, 0);
1777 else if (num_result_sets == 1)
1778 *rset = result_sets[0];
1780 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1781 num_result_sets, result_sets);
1787 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1788 Z_AttributesPlusTerm *zapt,
1790 oid_value attributeSet,
1792 const char *rank_type, NMEM rset_nmem,
1794 struct rset_key_control *kc)
1799 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1800 res_get (zh->res, "setTmpDir"),0 );
1801 rsfd = rset_open(*rset, RSETF_WRITE);
1809 rset_write (rsfd, &key);
1814 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1815 oid_value attributeSet, NMEM stream,
1816 Z_SortKeySpecList *sort_sequence,
1817 const char *rank_type,
1820 struct rset_key_control *kc)
1823 int sort_relation_value;
1824 AttrType sort_relation_type;
1831 attr_init_APT(&sort_relation_type, zapt, 7);
1832 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1834 if (!sort_sequence->specs)
1836 sort_sequence->num_specs = 10;
1837 sort_sequence->specs = (Z_SortKeySpec **)
1838 nmem_malloc(stream, sort_sequence->num_specs *
1839 sizeof(*sort_sequence->specs));
1840 for (i = 0; i<sort_sequence->num_specs; i++)
1841 sort_sequence->specs[i] = 0;
1843 if (zapt->term->which != Z_Term_general)
1846 i = atoi_n ((char *) zapt->term->u.general->buf,
1847 zapt->term->u.general->len);
1848 if (i >= sort_sequence->num_specs)
1850 sprintf(termz, "%d", i);
1852 oe.proto = PROTO_Z3950;
1853 oe.oclass = CLASS_ATTSET;
1854 oe.value = attributeSet;
1855 if (!oid_ent_to_oid (&oe, oid))
1858 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1859 sks->sortElement = (Z_SortElement *)
1860 nmem_malloc(stream, sizeof(*sks->sortElement));
1861 sks->sortElement->which = Z_SortElement_generic;
1862 sk = sks->sortElement->u.generic = (Z_SortKey *)
1863 nmem_malloc(stream, sizeof(*sk));
1864 sk->which = Z_SortKey_sortAttributes;
1865 sk->u.sortAttributes = (Z_SortAttributes *)
1866 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1868 sk->u.sortAttributes->id = oid;
1869 sk->u.sortAttributes->list = zapt->attributes;
1871 sks->sortRelation = (int *)
1872 nmem_malloc(stream, sizeof(*sks->sortRelation));
1873 if (sort_relation_value == 1)
1874 *sks->sortRelation = Z_SortKeySpec_ascending;
1875 else if (sort_relation_value == 2)
1876 *sks->sortRelation = Z_SortKeySpec_descending;
1878 *sks->sortRelation = Z_SortKeySpec_ascending;
1880 sks->caseSensitivity = (int *)
1881 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1882 *sks->caseSensitivity = 0;
1884 sks->which = Z_SortKeySpec_null;
1885 sks->u.null = odr_nullval ();
1886 sort_sequence->specs[i] = sks;
1887 *rset = rset_create_null(rset_nmem, kc, 0);
1892 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1893 oid_value attributeSet,
1894 struct xpath_location_step *xpath, int max,
1897 oid_value curAttributeSet = attributeSet;
1899 const char *use_string = 0;
1901 attr_init_APT(&use, zapt, 1);
1902 attr_find_ex(&use, &curAttributeSet, &use_string);
1904 if (!use_string || *use_string != '/')
1907 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1912 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1913 int reg_type, const char *term,
1914 const char *xpath_use,
1916 struct rset_key_control *kc)
1919 struct grep_info grep_info;
1920 char term_dict[2048];
1923 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1924 zinfo_index_category_index,
1927 int ord_len, i, r, max_pos;
1928 int term_type = Z_Term_characterString;
1929 const char *flags = "void";
1931 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1932 return rset_create_null(rset_nmem, kc, 0);
1935 return rset_create_null(rset_nmem, kc, 0);
1937 term_dict[prefix_len++] = '|';
1939 term_dict[prefix_len++] = '(';
1941 ord_len = key_SU_encode (ord, ord_buf);
1942 for (i = 0; i<ord_len; i++)
1944 term_dict[prefix_len++] = 1;
1945 term_dict[prefix_len++] = ord_buf[i];
1947 term_dict[prefix_len++] = ')';
1948 strcpy(term_dict+prefix_len, term);
1950 grep_info.isam_p_indx = 0;
1951 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1952 &grep_info, &max_pos, 0, grep_handle);
1953 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1954 grep_info.isam_p_indx);
1955 rset = rset_trunc(zh, grep_info.isam_p_buf,
1956 grep_info.isam_p_indx, term, strlen(term),
1957 flags, 1, term_type,rset_nmem,
1958 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
1959 0 /* term_ref_id_str */);
1960 grep_info_delete(&grep_info);
1965 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1966 int num_bases, char **basenames,
1967 NMEM stream, const char *rank_type, RSET rset,
1968 int xpath_len, struct xpath_location_step *xpath,
1971 struct rset_key_control *kc)
1975 int always_matches = rset ? 0 : 1;
1983 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1984 for (i = 0; i<xpath_len; i++)
1986 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1998 a[@attr = value]/b[@other = othervalue]
2000 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2001 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2002 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2003 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2004 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2005 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2009 dict_grep_cmap (zh->reg->dict, 0, 0);
2011 for (base_no = 0; base_no < num_bases; base_no++)
2013 int level = xpath_len;
2016 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2018 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2019 basenames[base_no]);
2023 while (--level >= 0)
2025 WRBUF xpath_rev = wrbuf_alloc();
2027 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2029 for (i = level; i >= 1; --i)
2031 const char *cp = xpath[i].part;
2037 wrbuf_puts(xpath_rev, "[^/]*");
2038 else if (*cp == ' ')
2039 wrbuf_puts(xpath_rev, "\001 ");
2041 wrbuf_putc(xpath_rev, *cp);
2043 /* wrbuf_putc does not null-terminate , but
2044 wrbuf_puts below ensures it does.. so xpath_rev
2045 is OK iff length is > 0 */
2047 wrbuf_puts(xpath_rev, "/");
2049 else if (i == 1) /* // case */
2050 wrbuf_puts(xpath_rev, ".*");
2052 if (xpath[level].predicate &&
2053 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2054 xpath[level].predicate->u.relation.name[0])
2056 WRBUF wbuf = wrbuf_alloc();
2057 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2058 if (xpath[level].predicate->u.relation.value)
2060 const char *cp = xpath[level].predicate->u.relation.value;
2061 wrbuf_putc(wbuf, '=');
2065 if (strchr(REGEX_CHARS, *cp))
2066 wrbuf_putc(wbuf, '\\');
2067 wrbuf_putc(wbuf, *cp);
2071 wrbuf_puts(wbuf, "");
2072 rset_attr = xpath_trunc(
2073 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2075 wrbuf_free(wbuf, 1);
2081 wrbuf_free(xpath_rev, 1);
2085 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2086 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2087 if (wrbuf_len(xpath_rev))
2089 rset_start_tag = xpath_trunc(zh, stream, '0',
2090 wrbuf_buf(xpath_rev),
2091 ZEBRA_XPATH_ELM_BEGIN,
2094 rset = rset_start_tag;
2097 rset_end_tag = xpath_trunc(zh, stream, '0',
2098 wrbuf_buf(xpath_rev),
2099 ZEBRA_XPATH_ELM_END,
2102 rset = rset_create_between(rset_nmem, kc, kc->scope,
2103 rset_start_tag, rset,
2104 rset_end_tag, rset_attr);
2107 wrbuf_free(xpath_rev, 1);
2115 #define MAX_XPATH_STEPS 10
2117 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2118 oid_value attributeSet, NMEM stream,
2119 Z_SortKeySpecList *sort_sequence,
2120 int num_bases, char **basenames,
2123 struct rset_key_control *kc)
2125 ZEBRA_RES res = ZEBRA_OK;
2127 char *search_type = NULL;
2128 char rank_type[128];
2131 char termz[IT_MAX_WORD+1];
2133 const char *xpath_use = 0;
2134 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2138 log_level_rpn = yaz_log_module_level("rpn");
2141 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2142 rank_type, &complete_flag, &sort_flag);
2144 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2145 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2146 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2147 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2149 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2153 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2154 rank_type, rset_nmem, rset, kc);
2155 /* consider if an X-Path query is used */
2156 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2157 xpath, MAX_XPATH_STEPS, stream);
2160 if (xpath[xpath_len-1].part[0] == '@')
2161 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2163 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2170 attr_init_APT(&relation, zapt, 2);
2171 relation_value = attr_find(&relation, NULL);
2173 if (relation_value == 103) /* alwaysmatches */
2175 *rset = 0; /* signal no "term" set */
2176 return rpn_search_xpath(zh, num_bases, basenames,
2177 stream, rank_type, *rset,
2178 xpath_len, xpath, rset_nmem, rset, kc);
2183 /* search using one of the various search type strategies
2184 termz is our UTF-8 search term
2185 attributeSet is top-level default attribute set
2186 stream is ODR for search
2187 reg_id is the register type
2188 complete_flag is 1 for complete subfield, 0 for incomplete
2189 xpath_use is use-attribute to be used for X-Path search, 0 for none
2191 if (!strcmp(search_type, "phrase"))
2193 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2194 reg_id, complete_flag, rank_type,
2196 num_bases, basenames, rset_nmem,
2199 else if (!strcmp(search_type, "and-list"))
2201 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2202 reg_id, complete_flag, rank_type,
2204 num_bases, basenames, rset_nmem,
2207 else if (!strcmp(search_type, "or-list"))
2209 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2210 reg_id, complete_flag, rank_type,
2212 num_bases, basenames, rset_nmem,
2215 else if (!strcmp(search_type, "local"))
2217 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2218 rank_type, rset_nmem, rset, kc);
2220 else if (!strcmp(search_type, "numeric"))
2222 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2223 reg_id, complete_flag, rank_type,
2225 num_bases, basenames, rset_nmem,
2230 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2233 if (res != ZEBRA_OK)
2237 return rpn_search_xpath(zh, num_bases, basenames,
2238 stream, rank_type, *rset,
2239 xpath_len, xpath, rset_nmem, rset, kc);
2242 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2243 oid_value attributeSet,
2244 NMEM stream, NMEM rset_nmem,
2245 Z_SortKeySpecList *sort_sequence,
2246 int num_bases, char **basenames,
2247 RSET **result_sets, int *num_result_sets,
2248 Z_Operator *parent_op,
2249 struct rset_key_control *kc);
2251 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2252 oid_value attributeSet,
2253 NMEM stream, NMEM rset_nmem,
2254 Z_SortKeySpecList *sort_sequence,
2255 int num_bases, char **basenames,
2258 RSET *result_sets = 0;
2259 int num_result_sets = 0;
2261 struct rset_key_control *kc = zebra_key_control_create(zh);
2263 res = rpn_search_structure(zh, zs, attributeSet,
2266 num_bases, basenames,
2267 &result_sets, &num_result_sets,
2268 0 /* no parent op */,
2270 if (res != ZEBRA_OK)
2273 for (i = 0; i<num_result_sets; i++)
2274 rset_delete(result_sets[i]);
2279 assert(num_result_sets == 1);
2280 assert(result_sets);
2281 assert(*result_sets);
2282 *result_set = *result_sets;
2288 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2289 oid_value attributeSet,
2290 NMEM stream, NMEM rset_nmem,
2291 Z_SortKeySpecList *sort_sequence,
2292 int num_bases, char **basenames,
2293 RSET **result_sets, int *num_result_sets,
2294 Z_Operator *parent_op,
2295 struct rset_key_control *kc)
2297 *num_result_sets = 0;
2298 if (zs->which == Z_RPNStructure_complex)
2301 Z_Operator *zop = zs->u.complex->roperator;
2302 RSET *result_sets_l = 0;
2303 int num_result_sets_l = 0;
2304 RSET *result_sets_r = 0;
2305 int num_result_sets_r = 0;
2307 res = rpn_search_structure(zh, zs->u.complex->s1,
2308 attributeSet, stream, rset_nmem,
2310 num_bases, basenames,
2311 &result_sets_l, &num_result_sets_l,
2313 if (res != ZEBRA_OK)
2316 for (i = 0; i<num_result_sets_l; i++)
2317 rset_delete(result_sets_l[i]);
2320 res = rpn_search_structure(zh, zs->u.complex->s2,
2321 attributeSet, stream, rset_nmem,
2323 num_bases, basenames,
2324 &result_sets_r, &num_result_sets_r,
2326 if (res != ZEBRA_OK)
2329 for (i = 0; i<num_result_sets_l; i++)
2330 rset_delete(result_sets_l[i]);
2331 for (i = 0; i<num_result_sets_r; i++)
2332 rset_delete(result_sets_r[i]);
2336 /* make a new list of result for all children */
2337 *num_result_sets = num_result_sets_l + num_result_sets_r;
2338 *result_sets = nmem_malloc(stream, *num_result_sets *
2339 sizeof(**result_sets));
2340 memcpy(*result_sets, result_sets_l,
2341 num_result_sets_l * sizeof(**result_sets));
2342 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2343 num_result_sets_r * sizeof(**result_sets));
2345 if (!parent_op || parent_op->which != zop->which
2346 || (zop->which != Z_Operator_and &&
2347 zop->which != Z_Operator_or))
2349 /* parent node different from this one (or non-present) */
2350 /* we must combine result sets now */
2354 case Z_Operator_and:
2355 rset = rset_create_and(rset_nmem, kc,
2357 *num_result_sets, *result_sets);
2360 rset = rset_create_or(rset_nmem, kc,
2361 kc->scope, 0, /* termid */
2362 *num_result_sets, *result_sets);
2364 case Z_Operator_and_not:
2365 rset = rset_create_not(rset_nmem, kc,
2370 case Z_Operator_prox:
2371 if (zop->u.prox->which != Z_ProximityOperator_known)
2374 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2378 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2380 zebra_setError_zint(zh,
2381 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2382 *zop->u.prox->u.known);
2387 rset = rset_create_prox(rset_nmem, kc,
2389 *num_result_sets, *result_sets,
2390 *zop->u.prox->ordered,
2391 (!zop->u.prox->exclusion ?
2392 0 : *zop->u.prox->exclusion),
2393 *zop->u.prox->relationType,
2394 *zop->u.prox->distance );
2398 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2401 *num_result_sets = 1;
2402 *result_sets = nmem_malloc(stream, *num_result_sets *
2403 sizeof(**result_sets));
2404 (*result_sets)[0] = rset;
2407 else if (zs->which == Z_RPNStructure_simple)
2412 if (zs->u.simple->which == Z_Operand_APT)
2414 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2415 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2416 attributeSet, stream, sort_sequence,
2417 num_bases, basenames, rset_nmem, &rset,
2419 if (res != ZEBRA_OK)
2422 else if (zs->u.simple->which == Z_Operand_resultSetId)
2424 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2425 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2429 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2430 zs->u.simple->u.resultSetId);
2437 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2440 *num_result_sets = 1;
2441 *result_sets = nmem_malloc(stream, *num_result_sets *
2442 sizeof(**result_sets));
2443 (*result_sets)[0] = rset;
2447 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2453 struct scan_info_entry {
2459 struct scan_info_entry *list;
2465 static int scan_handle (char *name, const char *info, int pos, void *client)
2467 int len_prefix, idx;
2468 struct scan_info *scan_info = (struct scan_info *) client;
2470 len_prefix = strlen(scan_info->prefix);
2471 if (memcmp (name, scan_info->prefix, len_prefix))
2474 idx = scan_info->after - pos + scan_info->before;
2480 scan_info->list[idx].term = (char *)
2481 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2482 strcpy(scan_info->list[idx].term, name + len_prefix);
2483 assert (*info == sizeof(ISAM_P));
2484 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2488 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2489 char **dst, const char *src)
2491 char term_src[IT_MAX_WORD];
2492 char term_dst[IT_MAX_WORD];
2494 zebra_term_untrans (zh, reg_type, term_src, src);
2496 if (zh->iconv_from_utf8 != 0)
2499 char *inbuf = term_src;
2500 size_t inleft = strlen(term_src);
2501 char *outbuf = term_dst;
2502 size_t outleft = sizeof(term_dst)-1;
2505 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2507 if (ret == (size_t)(-1))
2510 len = outbuf - term_dst;
2511 *dst = nmem_malloc(stream, len + 1);
2513 memcpy (*dst, term_dst, len);
2517 *dst = nmem_strdup(stream, term_src);
2520 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2526 yaz_log(YLOG_DEBUG, "count_set");
2528 rset->hits_limit = zh->approx_limit;
2531 rfd = rset_open(rset, RSETF_READ);
2532 while (rset_read(rfd, &key,0 /* never mind terms */))
2534 if (key.mem[0] != psysno)
2536 psysno = key.mem[0];
2537 if (rfd->counted_items >= rset->hits_limit)
2542 *count = rset->hits_count;
2545 #define RPN_MAX_ORDS 32
2547 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2548 oid_value attributeset,
2549 int num_bases, char **basenames,
2550 int *position, int *num_entries, ZebraScanEntry **list,
2551 int *is_partial, RSET limit_set, int return_zero)
2554 int pos = *position;
2555 int num = *num_entries;
2559 char termz[IT_MAX_WORD+20];
2560 struct scan_info *scan_info_array;
2561 ZebraScanEntry *glist;
2562 int ords[RPN_MAX_ORDS], ord_no = 0;
2563 int ptr[RPN_MAX_ORDS];
2565 unsigned index_type;
2566 char *search_type = NULL;
2567 char rank_type[128];
2570 NMEM rset_nmem = NULL;
2571 struct rset_key_control *kc = 0;
2576 if (attributeset == VAL_NONE)
2577 attributeset = VAL_BIB1;
2582 int termset_value_numeric;
2583 const char *termset_value_string;
2584 attr_init_APT(&termset, zapt, 8);
2585 termset_value_numeric =
2586 attr_find_ex(&termset, NULL, &termset_value_string);
2587 if (termset_value_numeric != -1)
2590 const char *termset_name = 0;
2592 if (termset_value_numeric != -2)
2595 sprintf(resname, "%d", termset_value_numeric);
2596 termset_name = resname;
2599 termset_name = termset_value_string;
2601 limit_set = resultSetRef (zh, termset_name);
2605 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2606 pos, num, attributeset);
2608 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2609 rank_type, &complete_flag, &sort_flag))
2612 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2615 for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2619 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2621 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2622 basenames[base_no]);
2626 if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
2629 ords[ord_no++] = ord;
2636 /* prepare dictionary scanning */
2648 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2649 "after=%d before+after=%d",
2650 pos, num, before, after, before+after);
2651 scan_info_array = (struct scan_info *)
2652 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2653 for (i = 0; i < ord_no; i++)
2655 int j, prefix_len = 0;
2656 int before_tmp = before, after_tmp = after;
2657 struct scan_info *scan_info = scan_info_array + i;
2658 struct rpn_char_map_info rcmi;
2660 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2662 scan_info->before = before;
2663 scan_info->after = after;
2664 scan_info->odr = stream;
2666 scan_info->list = (struct scan_info_entry *)
2667 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2668 for (j = 0; j<before+after; j++)
2669 scan_info->list[j].term = NULL;
2671 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2672 termz[prefix_len] = 0;
2673 strcpy(scan_info->prefix, termz);
2675 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2679 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2680 scan_info, scan_handle);
2682 glist = (ZebraScanEntry *)
2683 odr_malloc(stream, (before+after)*sizeof(*glist));
2685 rset_nmem = nmem_create();
2686 kc = zebra_key_control_create(zh);
2688 /* consider terms after main term */
2689 for (i = 0; i < ord_no; i++)
2693 for (i = 0; i<after; i++)
2696 const char *mterm = NULL;
2699 int lo = i + pos-1; /* offset in result list */
2701 /* find: j0 is the first of the minimal values */
2702 for (j = 0; j < ord_no; j++)
2704 if (ptr[j] < before+after && ptr[j] >= 0 &&
2705 (tst = scan_info_array[j].list[ptr[j]].term) &&
2706 (!mterm || strcmp (tst, mterm) < 0))
2713 break; /* no value found, stop */
2715 /* get result set for first one , but only if it's within bounds */
2718 /* get result set for first term */
2719 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2720 &glist[lo].term, mterm);
2721 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2722 glist[lo].term, strlen(glist[lo].term),
2723 NULL, 0, zapt->term->which, rset_nmem,
2724 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2725 0 /* term_ref_id_str */);
2727 ptr[j0]++; /* move index for this set .. */
2728 /* get result set for remaining scan terms */
2729 for (j = j0+1; j<ord_no; j++)
2731 if (ptr[j] < before+after && ptr[j] >= 0 &&
2732 (tst = scan_info_array[j].list[ptr[j]].term) &&
2733 !strcmp (tst, mterm))
2742 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2744 strlen(glist[lo].term), NULL, 0,
2745 zapt->term->which,rset_nmem,
2746 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2747 0 /* term_ref_id_str */ );
2748 rset = rset_create_or(rset_nmem, kc,
2749 kc->scope, 0 /* termid */,
2758 /* merge with limit_set if given */
2763 rsets[1] = rset_dup(limit_set);
2765 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2768 count_set(zh, rset, &count);
2769 glist[lo].occurrences = count;
2775 *num_entries -= (after-i);
2777 if (*num_entries < 0)
2780 nmem_destroy(rset_nmem);
2785 /* consider terms before main term */
2786 for (i = 0; i<ord_no; i++)
2789 for (i = 0; i<before; i++)
2792 const char *mterm = NULL;
2795 int lo = before-1-i; /* offset in result list */
2798 for (j = 0; j <ord_no; j++)
2800 if (ptr[j] < before && ptr[j] >= 0 &&
2801 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2802 (!mterm || strcmp (tst, mterm) > 0))
2811 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2812 &glist[lo].term, mterm);
2815 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2816 glist[lo].term, strlen(glist[lo].term),
2817 NULL, 0, zapt->term->which, rset_nmem,
2818 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2819 0 /* term_ref_id_str */);
2823 for (j = j0+1; j<ord_no; j++)
2825 if (ptr[j] < before && ptr[j] >= 0 &&
2826 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2827 !strcmp (tst, mterm))
2832 rsets[1] = rset_trunc(
2834 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2836 strlen(glist[lo].term), NULL, 0,
2837 zapt->term->which, rset_nmem,
2838 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2839 0 /* term_ref_id_str */);
2840 rset = rset_create_or(rset_nmem, kc,
2841 kc->scope, 0 /* termid */, 2, rsets);
2850 rsets[1] = rset_dup(limit_set);
2852 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2854 count_set(zh, rset, &count);
2855 glist[lo].occurrences = count;
2859 nmem_destroy(rset_nmem);
2866 if (*num_entries <= 0)
2873 *list = glist + i; /* list is set to first 'real' entry */
2875 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2876 *position, *num_entries);
2883 * indent-tabs-mode: nil
2885 * vim: shiftwidth=4 tabstop=8 expandtab