-/* $Id: extract.c,v 1.268 2007-11-06 10:29:59 adam Exp $
+/* $Id: extract.c,v 1.269 2007-11-08 21:21:58 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
key.mem[i++] = p->seqno;
key.len = i;
- yaz_log(YLOG_LOG, "keys_write %.*s", (int) res_len, res_buf);
zebra_rec_keys_write(zh->reg->keys, res_buf, res_len, &key);
p->seqno++;
-/* $Id: rpnsearch.c,v 1.20 2007-11-01 14:10:03 adam Exp $
+/* $Id: rpnsearch.c,v 1.21 2007-11-08 21:21:58 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
struct rpn_char_map_info *map_info)
{
map_info->zm = zm;
- dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
+ if (zebra_maps_is_icu(zm))
+ dict_grep_cmap(reg->dict, 0, 0);
+ else
+ dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
}
#define TERM_COUNT
}
}
+
+static int term_100_icu(zebra_map_t zm,
+ const char **src, WRBUF term_dict, int space_split,
+ char *dst_term)
+{
+ int no = 0;
+ const char *res_buf = 0;
+ size_t res_len = 0;
+ int r = zebra_map_tokenize(zm, *src, strlen(*src),
+ &res_buf, &res_len);
+
+ yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
+ if (r)
+ strcat(dst_term, *src);
+ *src += strlen(*src);
+ while (r)
+ {
+ int i;
+ no++;
+ for (i = 0; i < res_len; i++)
+ {
+ if (strchr(REGEX_CHARS, res_buf[i]))
+ wrbuf_putc(term_dict, '\\');
+ if (res_buf[i] < 32)
+ wrbuf_putc(term_dict, 1);
+ wrbuf_putc(term_dict, res_buf[i]);
+ }
+ r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
+ }
+ return no;
+}
+
/* term_100: handle term, where trunc = none(no operators at all) */
static int term_100(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
const char *space_start = 0;
const char *space_end = 0;
+ if (zebra_maps_is_icu(zm))
+ return term_100_icu(zm, src, term_dict, space_split, dst_term);
+
if (!term_pre(zm, src, NULL, NULL, !space_split))
return 0;
s0 = *src;
char ord_buf[32];
int ord_len, i;
zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
-
+
*ol = ord_list_create(stream);
rpn_char_map_prepare(zh->reg, zm, &rcmi);
const char *input = wrbuf_cstr(term_dict) + prefix_len;
esc_str(buf, sizeof(buf), input, strlen(input));
}
- yaz_log(log_level_rpn, "dict_lookup_grep: %s",
- wrbuf_cstr(term_dict) + prefix_len);
+ {
+ WRBUF pr_wr = wrbuf_alloc();
+
+ wrbuf_verbose_str(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
+ yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
+ wrbuf_destroy(pr_wr);
+ }
r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
grep_info, &max_pos,
ord_len /* number of "exact" chars */,
# Zebra indexes as referred to from the *.abs-files.
-# $Id: t17.idx,v 1.3 2007-11-08 13:35:36 adam Exp $
+# $Id: t17.idx,v 1.4 2007-11-08 21:21:58 adam Exp $
#
# Traditional word index
firstinfield 1
# simplechain dummy
icuchain words-icu.xml
+debug 1
# Phrase index
# Used if completeness is 'complete {sub}field' (@attr 6=2, @attr 6=1)
-/* $Id: zebramap.c,v 1.70 2007-11-08 13:35:36 adam Exp $
+/* $Id: zebramap.c,v 1.71 2007-11-08 21:21:58 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
int first_in_field;
int type;
int use_chain;
+ int debug;
union {
struct {
int entry_size;
#if YAZ_HAVE_ICU
struct icu_chain *icu_chain;
#endif
- WRBUF simple_buf;
+ WRBUF input_str;
+ WRBUF print_str;
size_t simple_off;
struct zebra_map *next;
};
#if YAZ_HAVE_XML2
xmlFreeDoc(zm->doc);
#endif
- wrbuf_destroy(zm->simple_buf);
+ wrbuf_destroy(zm->input_str);
+ wrbuf_destroy(zm->print_str);
zm = zm->next;
}
wrbuf_destroy(zms->wrbuf_1);
zm->id = nmem_strdup(zms->nmem, index_type);
zm->maptab_name = 0;
zm->use_chain = 0;
+ zm->debug = 0;
zm->maptab = 0;
zm->type = map_type;
zm->completeness = 0;
#if YAZ_HAVE_XML2
zm->doc = 0;
#endif
- zm->simple_buf = wrbuf_alloc();
+ zm->input_str = wrbuf_alloc();
+ zm->print_str = wrbuf_alloc();
return zm;
}
return -1;
#endif
}
+ else if (!yaz_matchstr(argv[0], "debug") && argc == 2)
+ {
+ zm->debug = atoi(argv[1]);
+ }
else
{
yaz_log(YLOG_WARN, "%s:%d: Unrecognized directive '%s'",
static int tokenize_simple(zebra_map_t zm,
const char **result_buf, size_t *result_len)
{
- char *buf = wrbuf_buf(zm->simple_buf);
- size_t len = wrbuf_len(zm->simple_buf);
+ char *buf = wrbuf_buf(zm->input_str);
+ size_t len = wrbuf_len(zm->input_str);
size_t i = zm->simple_off;
size_t start;
if (buf)
{
- wrbuf_rewind(zm->simple_buf);
- wrbuf_write(zm->simple_buf, buf, len);
+ wrbuf_rewind(zm->input_str);
+ wrbuf_write(zm->input_str, buf, len);
zm->simple_off = 0;
}
UErrorCode status;
if (buf)
{
- yaz_log(YLOG_LOG, "assicn_cstr %s", wrbuf_cstr(zm->simple_buf));
+ if (zm->debug)
+ {
+ wrbuf_rewind(zm->print_str);
+ wrbuf_verbose_str(zm->print_str, wrbuf_buf(zm->input_str),
+ wrbuf_len(zm->input_str));
+
+ yaz_log(YLOG_LOG, "input %s",
+ wrbuf_cstr(zm->print_str));
+ }
icu_chain_assign_cstr(zm->icu_chain,
- wrbuf_cstr(zm->simple_buf),
+ wrbuf_cstr(zm->input_str),
&status);
assert(U_SUCCESS(status));
}
while (icu_chain_next_token(zm->icu_chain, &status))
{
assert(U_SUCCESS(status));
- *result_buf = icu_chain_token_norm(zm->icu_chain);
+ *result_buf = icu_chain_token_sortkey(zm->icu_chain);
assert(*result_buf);
- yaz_log(YLOG_LOG, "got result %s", *result_buf);
+
*result_len = strlen(*result_buf);
+
+ if (zm->debug)
+ {
+ wrbuf_rewind(zm->print_str);
+ wrbuf_verbose_str(zm->print_str, *result_buf, *result_len);
+ yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str));
+ }
+
if (**result_buf != '\0')
return 1;
}