}
+static int term_102_icu(zebra_map_t zm,
+ const char **src, WRBUF term_dict, int space_split,
+ WRBUF display_term)
+{
+ int no_terms = 0;
+ const char *s0 = *src, *s1;
+ while (*s0 == ' ')
+ s0++;
+ s1 = s0;
+ for (;;)
+ {
+ if (*s1 == ' ' && space_split)
+ break;
+ else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
+ s1++;
+ else
+ {
+ /* EOF or regex reserved char */
+ if (s0 != s1)
+ {
+ const char *res_buf = 0;
+ size_t res_len = 0;
+ const char *display_buf;
+ size_t display_len;
+
+ zebra_map_tokenize_start(zm, s0, s1 - s0);
+
+ if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
+ &display_buf, &display_len))
+ {
+ size_t i = res_len;
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ if (i > 0)
+ {
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ }
+ res_len = i; /* reduce res_len */
+ for (i = 0; i < res_len; i++)
+ {
+ if (strchr(REGEX_CHARS "\\", res_buf[i]))
+ wrbuf_putc(term_dict, '\\');
+ if (res_buf[i] < 32)
+ wrbuf_putc(term_dict, '\x01');
+
+ wrbuf_putc(term_dict, res_buf[i]);
+ }
+ wrbuf_write(display_term, display_buf, display_len);
+
+ no_terms++;
+ }
+ }
+ if (*s1 == '\0')
+ break;
+
+ wrbuf_putc(term_dict, *s1);
+ wrbuf_putc(display_term, *s1);
+
+ s1++;
+ s0 = s1;
+ }
+ }
+ if (no_terms)
+ wrbuf_puts(term_dict, "\x01\x01.*");
+ *src = s1;
+ return no_terms;
+}
+
static int term_100_icu(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
WRBUF display_term,
int mode)
{
- int i;
+ size_t i;
const char *res_buf = 0;
size_t res_len = 0;
const char *display_buf;
size_t display_len;
+ const char *s0 = *src, *s1;
+
+ while (*s0 == ' ')
+ s0++;
+
+ if (*s0 == '\0')
+ return 0;
+
+ if (space_split)
+ {
+ s1 = s0;
+ while (*s1 && *s1 != ' ')
+ s1++;
+ }
+ else
+ s1 = s0 + strlen(s0);
+
+ *src = s1;
+
+ zebra_map_tokenize_start(zm, s0, s1 - s0);
+
if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
&display_buf, &display_len))
{
- *src += strlen(*src);
return 0;
}
wrbuf_write(display_term, display_buf, display_len);
if (strchr(REGEX_CHARS "\\", res_buf[i]))
wrbuf_putc(term_dict, '\\');
if (res_buf[i] < 32)
- wrbuf_putc(term_dict, 1);
-
+ wrbuf_putc(term_dict, '\x01');
+
wrbuf_putc(term_dict, res_buf[i]);
}
if (mode & 1)
wrbuf_puts(term_dict, ".*");
else if (mode)
wrbuf_puts(term_dict, "\x01\x01.*");
-
return 1;
}
return ZEBRA_OK;
}
break;
+ case 102:
+ if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
case 1: /* right truncation */
if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
{
struct rset_key_control *kc)
{
zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
- if (zebra_maps_is_icu(zm))
- zebra_map_tokenize_start(zm, termz, strlen(termz));
return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
stream, index_type, complete_flag,
rank_type, xpath_use,
return rset_create_null(rset_nmem, kc, 0);
else
{
- int i, r, max_pos;
+ int i, max_pos;
char ord_buf[32];
RSET rset;
WRBUF term_dict = wrbuf_alloc();
wrbuf_puts(term_dict, term);
grep_info.isam_p_indx = 0;
- r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
- &grep_info, &max_pos, 0, grep_handle);
+ dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
+ &grep_info, &max_pos, 0, grep_handle);
yaz_log(YLOG_DEBUG, "%s %d positions", term,
grep_info.isam_p_indx);
rset = rset_trunc(zh, grep_info.isam_p_buf,
"<gils>\n<title>My computer</title>\n</gils>\n",
"<gils>\n<title>My x computer</title>\n</gils>\n",
"<gils>\n<title>My computer x</title>\n</gils>\n" ,
- "<gils>\n<title>" char_ae "</title>\n</gils>\n" ,
+ "<gils>\n<title>" char_ae "rme</title>\n</gils>\n" ,
"<gils>\n<title>B" char_aring "d</title>\n"
"<abstract>זיהוי סדר הארועים בסיפור המרד הגדול מאת צביה בן-שלום 提示:直接点击数据库名称,将进入单库检索 Ngày xửa ngày xưa D.W. all wet</abstract>\n</gils>\n" ,
0} ;
YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 3));
- YAZ_CHECK(tl_query(zh, "@attr 1=title @attr 5=1 comput", 3));
+ YAZ_CHECK(tl_query(zh, "@attr 5=1 @attr 1=title computer", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=1 @attr 1=title compute", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=1 @attr 1=title computee", 0));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=1 @attr 1=title co", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=2 @attr 1=title computer", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=2 @attr 1=title compute", 0));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=2 @attr 1=title er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=3 @attr 1=title computer", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=3 @attr 1=title compute", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=3 @attr 1=title er", 4));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=3 @attr 1=title ompute", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title com.*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title cm.*er", 0));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title com.*ër", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title com?m.*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title coy?m.*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title co[m].*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title co[mn].*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title co[m-n].*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title co[a-z].*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 @attr 1=title co[a-n].*er", 3));
+
+ YAZ_CHECK(tl_query(zh, "@attr 1=title com.*ër", 0));
+
+ YAZ_CHECK(tl_query(zh, "@attr 1=title @and @attr 5=102 com.*er x", 2));
+
+ YAZ_CHECK(tl_query(zh, "@attr 1=title @and x @attr 5=102 com.*er", 2));
YAZ_CHECK(tl_query(zh, "@attr 1=title .computer.", 3));
YAZ_CHECK(tl_query(zh, "@attr 1=title mY", 3));
- YAZ_CHECK(tl_query(zh, char_ae, 1));
- YAZ_CHECK(tl_query(zh, char_AE, 1));
+ YAZ_CHECK(tl_query(zh, char_ae "rme", 1));
+ YAZ_CHECK(tl_query(zh, char_AE "RME", 1));
YAZ_CHECK(tl_query(zh, "b" char_aring "d", 1));
YAZ_CHECK(tl_query(zh, "B" char_Aring "D", 1));
YAZ_CHECK(tl_query(zh, "b" char_aring1 "d", 1));
YAZ_CHECK(tl_query(zh, "B" char_Aring1 "D", 1));
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 b" char_aring "d", 1));
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 b.d", 1));
+
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 " char_ae "rme", 1));
+ YAZ_CHECK(tl_query(zh, "@attr 5=102 " "..rme", 1));
+
/* Abstract searches . Chinese mostly */
YAZ_CHECK(tl_query(zh, "@attr 1=abstract בן", 1));
YAZ_CHECK(tl_query(zh, "@attr 1=abstract צביה", 1));
YAZ_CHECK(tl_query(zh, "@attr 1=abstract @attr 5=1 בס", 1));
YAZ_CHECK(tl_query(zh, "@attr 1=abstract @attr 5=1 ב", 1));
+ YAZ_CHECK(tl_query(zh, "@attr 1=abstract @attr 5=102 בן", 1));
+
/* phrase search */
YAZ_CHECK(tl_query(zh, "@attr 1=title {my computer}", 2));
YAZ_CHECK(tl_query(zh, "@attr 1=title @attr 6=1 {my computer}", 2));
/* scan */
{ /* word search */
- const char *ent[] = { char_ae, "B" char_aring "d", "computer",
+ const char *ent[] = { char_ae "rme", "B" char_aring "d", "computer",
"My", "x", 0 };
YAZ_CHECK(tl_scan(zh, "@attr 1=title 0", 1, 10, 1, 5, 1, ent));
}
}
{ /* phrase search */
- const char *ent[] = { char_ae, "B" char_aring "d", "My computer" };
+ const char *ent[] = { char_ae "rme", "B" char_aring "d", "My computer" };
YAZ_CHECK(tl_scan(zh, "@attr 1=title @attr 6=2 0", 1, 3, 1, 3, 0, ent));
}