-## $Id: Makefile.am,v 1.68 2007-12-03 16:54:49 adam Exp $
+## $Id: Makefile.am,v 1.69 2007-12-13 11:09:20 adam Exp $
aux_libs = \
../rset/libidzebra-rset.la \
rank.h rank1.c ranksimilarity.c rankstatic.c \
records.c recindex.c recindex.h reckeys.c reckeys.h \
retrieve.c \
- rpnscan.c rpnsearch.c rpnfacet.c sortidx.c stream.c \
+ rpnscan.c rpnsearch.c sortidx.c stream.c \
update_path.c update_file.c trunc.c untrans.c isam_methods.c \
zaptterm.c zebraapi.c zinfo.c zinfo.h zsets.c key_block.c key_block.h \
check_res.c rset_isam.c
-/* $Id: extract.c,v 1.272 2007-12-10 17:06:08 adam Exp $
+/* $Id: extract.c,v 1.273 2007-12-13 11:09:20 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
}
+static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm)
+{
+ struct snip_rec_info *h = p->extractCtrl->handle;
+
+ const char *res_buf = 0;
+ size_t res_len = 0;
+
+ const char *display_buf = 0;
+ size_t display_len = 0;
+
+ zebra_map_tokenize_start(zm, p->term_buf, p->term_len);
+ while (zebra_map_tokenize_next(zm, &res_buf, &res_len,
+ &display_buf, &display_len))
+ {
+ zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
+ display_buf, display_len);
+ p->seqno++;
+ }
+}
+
static void snippet_token_add(RecWord *p)
{
struct snip_rec_info *h = p->extractCtrl->handle;
int ch = zebraExplain_lookup_attr_str(
zei, zinfo_index_category_index, p->index_type, p->index_name);
- if (zebra_maps_is_complete(zm))
- snippet_add_complete_field(p, ch, zm);
+ if (zebra_maps_is_icu(zm))
+ snippet_add_icu(p, ch, zm);
else
- snippet_add_incomplete_field(p, ch, zm);
+ {
+ if (zebra_maps_is_complete(zm))
+ snippet_add_complete_field(p, ch, zm);
+ else
+ snippet_add_incomplete_field(p, ch, zm);
+ }
}
}
}
-ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh,
+ZEBRA_RES zebra_rec_keys_to_snippets1(ZebraHandle zh,
zebra_rec_keys_t reckeys,
zebra_snippets *snippets)
{
\param p token data to be indexed
Call sequence:
- extract_token
- zebra_add_{in}_complete
+ extract_token_add
+ extract_add_{in}_complete
extract_add_string
extract_add_index_string
-/* $Id: index.h,v 1.212 2007-12-03 13:04:04 adam Exp $
+/* $Id: index.h,v 1.213 2007-12-13 11:09:20 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
void zebra_setError(ZebraHandle zh, int code, const char *addinfo);
void zebra_setError_zint(ZebraHandle zh, int code, zint i);
-void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream,
- const char *index_type,
- char **dst, const char *src);
+int zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream,
+ const char *index_type,
+ char **dst, const char *src);
ZEBRA_RES zebra_get_hit_vector(ZebraHandle zh, const char *setname, zint sysno);
-void zebra_term_untrans(ZebraHandle zh, const char *index_type,
- char *dst, const char *src);
+int zebra_term_untrans(ZebraHandle zh, const char *index_type,
+ char *dst, const char *src);
ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
const char **term_ref_id_str,
NMEM nmem);
-ZEBRA_RES rpn_facet(ZebraHandle zh, ODR stream,
- Z_AttributesPlusTerm *zapt,
- const Odr_oid *attributeset,
- int *position, int *num_entries,
- ZebraScanEntry **list,
- int *is_partial, const char *set_name);
-
ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
const char *setname,
zint recid,
+++ /dev/null
-/* $Id: rpnfacet.c,v 1.3 2007-11-05 11:20:39 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
-
-This file is part of the Zebra server.
-
-Zebra is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*/
-
-#include <stdio.h>
-#include <assert.h>
-#if HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <ctype.h>
-
-#include <yaz/diagbib1.h>
-#include "index.h"
-#include <zebra_xpath.h>
-#include <yaz/wrbuf.h>
-#include <attrfind.h>
-#include <charmap.h>
-#include <rset.h>
-#include <yaz/oid_db.h>
-
-ZEBRA_RES rpn_facet(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
- const Odr_oid *attributeset,
- int *position, int *num_entries,
- ZebraScanEntry **list, int *is_partial,
- const char *set_name)
-{
- int ord;
- int use_sort_idx = 1;
- ZEBRA_RES res = zebra_attr_list_get_ord(zh,
- zapt->attributes,
- zinfo_index_category_sort,
- 0 /* index_type */,
- attributeset, &ord);
- if (res != ZEBRA_OK)
- return res;
- else if (use_sort_idx)
- {
- const char *index_type = 0;
- const char *db = 0;
- const char *string_index = 0;
- /* for each ord .. */
- /* check that sort idx exist for ord */
- /* sweep through result set and sort_idx at the same time */
- char *this_entry_buf = xmalloc(SORT_IDX_ENTRYSIZE);
- char *dst_buf = xmalloc(SORT_IDX_ENTRYSIZE);
- size_t sysno_mem_index = 0;
- RSET rset = resultSetRef(zh, set_name);
- zint p_this_sys = 0;
- RSFD rfd;
- TERMID termid;
- struct it_key key;
-
- if (zebraExplain_lookup_ord(zh->reg->zei,
- ord, &index_type, &db, &string_index))
- {
- yaz_log(YLOG_WARN, "zebraExplain_lookup_ord failed");
- }
-
- if (zh->m_staticrank)
- sysno_mem_index = 1;
-
- rfd = rset_open(rset, RSETF_READ);
- while (rset_read(rfd, &key, &termid))
- {
- zint sysno = key.mem[sysno_mem_index];
- if (sysno != p_this_sys)
- {
- p_this_sys = sysno;
- zebra_sort_sysno(zh->reg->sort_index, sysno);
- zebra_sort_type(zh->reg->sort_index, ord);
- zebra_sort_read(zh->reg->sort_index, this_entry_buf);
-
- zebra_term_untrans(zh, index_type, dst_buf, this_entry_buf);
- yaz_log(YLOG_LOG, "dst_buf=%s", dst_buf);
- }
- }
- rset_close(rfd);
- xfree(this_entry_buf);
- xfree(dst_buf);
- zebra_setError(zh, YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, "facet not done1");
- return ZEBRA_FAIL;
- }
- else
- {
- int num = 100; /* to be customizable */
- int i;
-
- ZebraMetaRecord *meta = zebra_meta_records_create_range(
- zh, set_name, 0, num);
-
- for (i = 0; i < num; i++)
- {
- zint sysno = meta[i].sysno;
- Record rec = rec_get(zh->reg->records, sysno);
- if (!rec)
- {
- yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT,
- sysno);
- break;
- }
- else
- {
-
-
- rec_free(&rec);
- }
- }
- zebra_meta_records_destroy(zh, meta, num);
- zebra_setError(zh, YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, "facet not done2");
- return ZEBRA_FAIL;
- }
-}
-
-/*
- * Local variables:
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
-/* $Id: rpnscan.c,v 1.23 2007-12-03 11:49:11 adam Exp $
+/* $Id: rpnscan.c,v 1.24 2007-12-13 11:09:20 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
char *termz, zebra_map_t zm)
{
- char termz0[IT_MAX_WORD];
+ char term_utf8[IT_MAX_WORD];
- if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
+ if (zapt_term_to_utf8(zh, zapt, term_utf8) == ZEBRA_FAIL)
return ZEBRA_FAIL; /* error */
+ else if (zebra_maps_is_icu(zm))
+ {
+ const char *res_buf;
+ size_t res_len;
+ zebra_map_tokenize_start(zm, term_utf8, strlen(term_utf8));
+
+ if (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0))
+ {
+ memcpy(termz, res_buf, res_len);
+ termz[res_len] = '\0';
+ }
+ else
+ termz[0] = '\0';
+ }
else
{
const char **map;
- const char *cp = (const char *) termz0;
+ const char *cp = (const char *) term_utf8;
const char *cp_end = cp + strlen(cp);
const char *src;
int i = 0;
if (pos != -1)
{
zint sysno;
- int code = -1;
- zebra_snippets *rec_snippets = zebra_snippets_create();
zebra_snippets *hit_snippets = zebra_snippets_create();
glist[pos].term = 0;
get_first_snippet_from_rset(zh, rset, hit_snippets, &sysno);
if (sysno)
- code = zebra_get_rec_snippets(zh, sysno, rec_snippets);
-
- if (code == 0)
{
- const struct zebra_snippet_word *w =
- zebra_snippets_lookup(rec_snippets, hit_snippets);
- if (w)
+ zebra_snippets *rec_snippets = zebra_snippets_create();
+ int code = zebra_get_rec_snippets(zh, sysno, rec_snippets);
+ if (code == 0)
{
- glist[pos].display_term = odr_strdup(stream, w->term);
+ const struct zebra_snippet_word *w =
+ zebra_snippets_lookup(rec_snippets, hit_snippets);
+ if (w)
+ {
+ glist[pos].display_term = odr_strdup(stream, w->term);
+ }
}
+ zebra_snippets_destroy(rec_snippets);
}
- if (!glist[pos].term)
- zebra_term_untrans_iconv(zh, stream->mem, index_type,
- &glist[pos].term, term);
+ if (zebra_term_untrans_iconv(zh, stream->mem, index_type,
+ &glist[pos].term, term))
+ {
+ /* failed.. use display_term instead (which could be 0) */
+ glist[pos].term = glist[pos].display_term;
+ }
+
glist[pos].occurrences = count;
- zebra_snippets_destroy(rec_snippets);
zebra_snippets_destroy(hit_snippets);
}
rset_delete(rset);
zebra_setError(zh, YAZ_BIB1_TOO_MANY_DATABASES_SPECIFIED, 0);
return ZEBRA_FAIL;
}
- if (sort_flag)
- {
- return rpn_facet(zh, stream, zapt, attributeset, position, num_entries,
- list, is_partial, set_name);
- }
for (base_no = 0; base_no < num_bases; base_no++)
{
int ord;
-/* $Id: untrans.c,v 1.5 2007-10-31 16:56:14 adam Exp $
+/* $Id: untrans.c,v 1.6 2007-12-13 11:09:20 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
#include "index.h"
#include <charmap.h>
-void zebra_term_untrans(ZebraHandle zh, const char *index_type,
- char *dst, const char *src)
+int zebra_term_untrans(ZebraHandle zh, const char *index_type,
+ char *dst, const char *src)
{
zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
- int len = 0;
- while (*src)
+ if (zebra_maps_is_icu(zm))
+ return -1;
+ else
{
- const char *cp = zebra_maps_output(zm, &src);
- if (!cp)
- {
- if (len < IT_MAX_WORD-1)
- dst[len++] = *src;
- src++;
- }
- else
- while (*cp && len < IT_MAX_WORD-1)
- dst[len++] = *cp++;
+ int len = 0;
+ while (*src)
+ {
+ const char *cp = zebra_maps_output(zm, &src);
+ if (!cp)
+ {
+ if (len < IT_MAX_WORD-1)
+ dst[len++] = *src;
+ src++;
+ }
+ else
+ while (*cp && len < IT_MAX_WORD-1)
+ dst[len++] = *cp++;
+ }
+ dst[len] = '\0';
}
- dst[len] = '\0';
+ return 0;
}
-void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream,
- const char *index_type,
- char **dst, const char *src)
+int zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream,
+ const char *index_type,
+ char **dst, const char *src)
{
char term_src[IT_MAX_WORD];
char term_dst[IT_MAX_WORD];
+ int r;
- zebra_term_untrans (zh, index_type, term_src, src);
+ r = zebra_term_untrans (zh, index_type, term_src, src);
+ if (r)
+ return r;
if (zh->iconv_from_utf8 != 0)
{
}
else
*dst = nmem_strdup(stream, term_src);
+ return 0;
}
-/* $Id: t17.c,v 1.8 2007-12-07 14:17:37 adam Exp $
+/* $Id: t17.c,v 1.9 2007-12-13 11:09:20 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
#include <yaz/test.h>
#include "testlib.h"
+#define char_ae "\xc3\xa6"
+#define char_AE "\xc3\x86"
+#define char_oslash "\xc3\xb8"
+#define char_Oslash "\xc3\x98"
+
const char *myrec[] = {
"<gils>\n<title>My computer</title>\n</gils>\n",
"<gils>\n<title>My x computer</title>\n</gils>\n",
"<gils>\n<title>My computer x</title>\n</gils>\n" ,
+
+ "<gils>\n<title>" char_ae "</title>\n</gils>\n" ,
0} ;
static void tst(int argc, char **argv)
YAZ_CHECK(tl_query(zh, "@attr 1=title my", 3));
+ YAZ_CHECK(tl_query(zh, "@attr 1=title mY", 3));
+
+ YAZ_CHECK(tl_query(zh, char_ae, 1));
+#if 0
+ YAZ_CHECK(tl_query(zh, char_AE, 1));
+#endif
+
/* phrase search */
YAZ_CHECK(tl_query(zh, "@attr 1=title {my computer}", 2));
YAZ_CHECK(tl_query(zh, "@attr 1=title @attr 6=1 {my computer}", 2));
/* complete-subfield search */
YAZ_CHECK(tl_query(zh, "@attr 1=title @attr 6=2 {my computer}", 1));
YAZ_CHECK(tl_query(zh, "@attr 1=title @attr 6=2 {my}", 0));
-
+
+ /* scan */
+
YAZ_CHECK(tl_close_down(zh, zs));
#endif
}
# Zebra indexes as referred to from the *.abs-files.
-# $Id: t17.idx,v 1.5 2007-12-07 14:17:37 adam Exp $
+# $Id: t17.idx,v 1.6 2007-12-13 11:09:20 adam Exp $
#
# Traditional word index
firstinfield 1
# simplechain dummy
icuchain words-icu.xml
-debug 1
+# debug 1
# Phrase index
# Used if completeness is 'complete {sub}field' (@attr 6=2, @attr 6=1)
# and structure is word/phrase/word-list/free-form-text/document-text
index p
completeness 1
-charmap phrases-icu.xml
+icuchain phrases-icu.xml
+# debug 1
# Sort register
sort s
# Zebra makefile for MS NMAKE
-# $Id: makefile,v 1.71 2007-12-03 17:16:48 adam Exp $
+# $Id: makefile,v 1.72 2007-12-13 11:09:20 adam Exp $
###########################################################
############### Parameters
$(OBJDIR)\regxread.obj \
$(OBJDIR)\res.obj \
$(OBJDIR)\retrieve.obj \
- $(OBJDIR)\rpnfacet.obj \
$(OBJDIR)\rpnscan.obj \
$(OBJDIR)\rpnsearch.obj \
$(OBJDIR)\rsbetween.obj \