-# $Id: Makefile.am,v 1.28 2007-10-25 09:22:36 adam Exp $
+# $Id: Makefile.am,v 1.29 2007-11-06 10:29:58 adam Exp $
noinst_HEADERS = bset.h charmap.h \
direntz.h passwddb.h dfa.h zebra_xpath.h d1_absyn.h \
rset.h dfaset.h sortidx.h zebra-lock.h attrfind.h zebramap.h \
- it_key.h su_codec.h index_types.h
+ it_key.h su_codec.h
SUBDIRS = idzebra
+++ /dev/null
-/* $Id: index_types.h,v 1.2 2007-10-25 19:25:00 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
-
-This file is part of the Zebra server.
-
-Zebra is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*/
-
-/**
- \file
- \brief Definitions for Zebra's index types
-*/
-
-#ifndef ZEBRA_INDEX_TYPES_H
-#define ZEBRA_INDEX_TYPES_H
-
-#include <yaz/yconfig.h>
-#include <yaz/xmltypes.h>
-
-YAZ_BEGIN_CDECL
-
-/** \brief zebra index types handle (ptr) */
-typedef struct zebra_index_types_s *zebra_index_types_t;
-
-/** \brief zebra index type handle (ptr) */
-typedef struct zebra_index_type_s *zebra_index_type_t;
-
-/** \brief creates index types handler/object from file
- \param fname filename
- \returns handle (NULL if unsuccessful)
-
- Config file format:
- \verbatim
- <indextypes>
- <indextype id="*:w" position="1" alwaysmatches="1" firstinfield="1"
- locale="en">
- <!-- conversion rules for words -->
- </indextype>
- <indextype id="*:p" position="0" alwaysmatches="0" firstinfield="0"
- locale="en">
- <!-- conversion rules for phrase -->
- </indextype>
- <indextype id="*:s" sort="1"
- locale="en">
- <!-- conversion rules for phrase -->
- </indextype>
- </indextypes>
- \endverbatim
- */
-zebra_index_types_t zebra_index_types_create(const char *fname);
-
-/** \brief destroys index rules object
- \param types handle
- */
-void zebra_index_types_destroy(zebra_index_types_t types);
-
-
-/** \brief creates index types handler/object from xml Doc
- \param doc Libxml2 document
- \returns handle (NULL if unsuccessful)
-
- Similar to zebra_index_types_create
-*/
-zebra_index_types_t zebra_index_types_create_doc(xmlDocPtr doc);
-
-
-/** \brief lookup of index type
- \param types types
- \param id id to search for
- \returns pattern ID
-*/
-const char *zebra_index_type_lookup_str(zebra_index_types_t types,
- const char *id);
-
-
-/** \brief get index type of a given ID
- \param types types
- \param id ID to search for
- \returns index type handle
-*/
-zebra_index_type_t zebra_index_type_get(zebra_index_types_t types,
- const char *id);
-
-/** \brief check whether index type is of type 'index'
- \param type index type
- \retval 1 YES
- \retval 0 NO
-*/
-int zebra_index_type_is_index(zebra_index_type_t type);
-
-/** \brief check whether index type is of type 'sort'
- \param type index type
- \retval 1 YES
- \retval 0 NO
-*/
-int zebra_index_type_is_sort(zebra_index_type_t type);
-
-/** \brief check whether index type is of type 'staticrank'
- \param type index type
- \retval 1 YES
- \retval 0 NO
-*/
-int zebra_index_type_is_staticrank(zebra_index_type_t type);
-
-
-/** \brief tokenize a term for an index type
- \param type index type
- \param buf term buffer (pass 0 to continue with previous buf)
- \param len term length
- \param result_buf resulting token buffer
- \param result_len resulting token length
- \retval 1 token read and result is in result_buf
- \retval 0 no token read (no more tokens in buf)
-*/
-int zebra_index_type_tokenize(zebra_index_type_t type,
- const char *buf, size_t len,
- const char **result_buf, size_t *result_len);
-
-YAZ_END_CDECL
-
-#endif
-/*
- * Local variables:
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
-/* $Id: zebramap.h,v 1.26 2007-10-31 16:56:13 adam Exp $
+/* $Id: zebramap.h,v 1.27 2007-11-06 10:29:58 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
int zebra_maps_is_positioned(zebra_map_t zm);
YAZ_EXPORT
+int zebra_maps_is_icu(zebra_map_t zm);
+
+YAZ_EXPORT
int zebra_maps_is_first_in_field(zebra_map_t zm);
YAZ_EXPORT
YAZ_EXPORT
zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id);
+
+int zebra_map_tokenize(zebra_map_t zm,
+ const char *buf, size_t len,
+ const char **result_buf, size_t *result_len);
+
YAZ_END_CDECL
#endif
-/* $Id: check_res.c,v 1.7 2007-10-29 09:25:40 adam Exp $
+/* $Id: check_res.c,v 1.8 2007-11-06 10:29:59 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
res_add(v, "threads", "");
res_add(v, "trunclimit", "");
res_add(v, "truncmax", "");
- res_add(v, "indextypes", "");
res_add(v, "database", "p");
res_add(v, "explainDatabase", "p");
res_add(v, "fileVerboseLimit", "p");
-/* $Id: extract.c,v 1.267 2007-10-31 16:56:14 adam Exp $
+/* $Id: extract.c,v 1.268 2007-11-06 10:29:59 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
int cmd, zebra_rec_keys_t skp);
static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid);
static void extract_token_add(RecWord *p);
-static void extract_token_add2(RecWord *p);
static void check_log_limit(ZebraHandle zh)
{
stream->endf(stream, &null_offset);;
extractCtrl.init = extract_init;
- if (zh->reg->index_types)
- {
- extractCtrl.tokenAdd = extract_token_add2;
- }
- else
- {
- extractCtrl.tokenAdd = extract_token_add;
- }
+ extractCtrl.tokenAdd = extract_token_add;
extractCtrl.schemaAdd = extract_schema_add;
extractCtrl.dh = zh->reg->dh;
extractCtrl.handle = zh;
extract_add_string(p, zm, buf, i);
}
-static void extract_token_add2_index(ZebraHandle zh, zebra_index_type_t type,
- RecWord *p)
+static void extract_add_icu(RecWord *p, zebra_map_t zm)
{
struct it_key key;
const char *res_buf = 0;
size_t res_len = 0;
- int r = zebra_index_type_tokenize(type, p->term_buf, p->term_len,
- &res_buf, &res_len);
+ ZebraHandle zh = p->extractCtrl->handle;
+ int r = zebra_map_tokenize(zm, p->term_buf, p->term_len,
+ &res_buf, &res_len);
int cat = zinfo_index_category_index;
int ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, p->index_type, p->index_name);
if (ch < 0)
zebra_rec_keys_write(zh->reg->keys, res_buf, res_len, &key);
p->seqno++;
- r = zebra_index_type_tokenize(type, 0, 0, &res_buf, &res_len);
+ r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
}
}
-static void extract_token_add2(RecWord *p)
-{
- ZebraHandle zh = p->extractCtrl->handle;
- zebra_index_type_t type = zebra_index_type_get(zh->reg->index_types, p->index_type);
- if (type)
- {
- if (zebra_index_type_is_index(type))
- {
- extract_token_add2_index(zh, type, p);
- }
- else if (zebra_index_type_is_sort(type))
- {
- ;
-
- }
- }
-}
/** \brief top-level indexing handler for recctrl system
\param p token data to be indexed
}
if ((wrbuf = zebra_replace(zm, 0, p->term_buf, p->term_len)))
{
- p->term_buf = wrbuf_buf(wrbuf);
- p->term_len = wrbuf_len(wrbuf);
+ p->term_buf = wrbuf_buf(wrbuf);
+ p->term_len = wrbuf_len(wrbuf);
+ }
+ if (zebra_maps_is_icu(zm))
+ {
+ extract_add_icu(p, zm);
}
- if (zebra_maps_is_complete(zm))
- extract_add_complete_field(p, zm);
else
- extract_add_incomplete_field(p, zm);
+ {
+ if (zebra_maps_is_complete(zm))
+ extract_add_complete_field(p, zm);
+ else
+ extract_add_incomplete_field(p, zm);
+ }
}
static void extract_set_store_data_cb(struct recExtractCtrl *p,
-/* $Id: index.h,v 1.208 2007-11-05 11:20:39 adam Exp $
+/* $Id: index.h,v 1.209 2007-11-06 10:29:59 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
#include <idzebra/util.h>
#include <idzebra/flock.h>
#include <sortidx.h>
-#include <index_types.h>
#if HAVE_SYS_TIMES_H
#include <sys/times.h>
#endif
char *server_path_prefix;
data1_handle dh;
- zebra_index_types_t index_types;
zebra_maps_t zebra_maps;
ZebraRankClass rank_classes;
RecTypes recTypes;
-/* $Id: zebraapi.c,v 1.263 2007-11-05 11:20:39 adam Exp $
+/* $Id: zebraapi.c,v 1.264 2007-11-06 10:29:59 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
data1_set_tabroot (reg->dh, reg_path);
reg->recTypes = recTypes_init (zs->record_classes, reg->dh);
- reg->index_types = 0;
reg->zebra_maps =
zebra_maps_open(res, reg_path, profilePath);
if (!reg->zebra_maps)
record_compression = REC_COMPRESS_BZIP2;
{
- const char *index_types_fname = res_get(res, "indextypes");
- if (index_types_fname)
- {
- char tmp_full_name[1024];
-
- if (!yaz_filepath_resolve(index_types_fname,
- profilePath,
- reg_path,
- tmp_full_name))
- {
- yaz_log(YLOG_WARN, "Could not find %s", index_types_fname);
- ret = ZEBRA_FAIL;
- }
- else
- {
- reg->index_types = zebra_index_types_create(
- tmp_full_name);
- yaz_log(YLOG_LOG, "zebra_index_types_create returned %p",
- reg->index_types);
- }
- }
-
- }
- {
const char *index_fname = res_get_def(res, "index", "default.idx");
if (index_fname && *index_fname)
{
recTypes_destroy (reg->recTypes);
zebra_maps_close (reg->zebra_maps);
- zebra_index_types_destroy(reg->index_types);
zebraRankDestroy (reg);
bfs_destroy (reg->bfs);
data1_destroy (reg->dh);
-# $Id: Makefile.am,v 1.41 2007-10-29 13:43:58 adam Exp $
+# $Id: Makefile.am,v 1.42 2007-11-06 10:29:59 adam Exp $
noinst_PROGRAMS = testclient
testclient_SOURCES = testclient.c
TESTS = $(check_PROGRAMS)
EXTRA_DIST=zebra.cfg zebra6.cfg zebra8.cfg zebra10.cfg zebra15.cfg safari.cfg \
- t10.att t10.abs zebra17.cfg indextypes17.xml
+ t10.att t10.abs zebra17.cfg t17.idx
noinst_LIBRARIES = libtestlib.a
+++ /dev/null
-<indextypes>
- <indextype id="w:el" position="1" alwaysmatches="1" firstinfield="1"
- locale="el">
- <simple/>
- </indextype>
- <indextype id="w" position="1" alwaysmatches="1" firstinfield="1"
- locale="en">
- <simple/>
- </indextype>
- <indextype id="p" position="0" alwaysmatches="0" firstinfield="0"
- locale="en">
- <simple/>
- </indextype>
- <indextype id="s" sort="1"
- locale="en">
- <simple/>
- </indextype>
-</indextypes>
-/* $Id: t17.c,v 1.1 2007-10-29 13:43:58 adam Exp $
+/* $Id: t17.c,v 1.2 2007-11-06 10:29:59 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
YAZ_CHECK(tl_query(zh, "@attr 1=title notfound", 0));
/* we should get 3 hits. But 0 for now */
-#if 0
-
+#if 1
YAZ_CHECK(tl_query(zh, "@attr 1=title title", 3));
#else
YAZ_CHECK(tl_query(zh, "@attr 1=title title", 0));
--- /dev/null
+# Zebra indexes as referred to from the *.abs-files.
+# $Id: t17.idx,v 1.1 2007-11-06 10:30:00 adam Exp $
+#
+
+# Traditional word index
+# Used if completenss is 'incomplete field' (@attr 6=1) and
+# structure is word/phrase/word-list/free-form-text/document-text
+index w
+completeness 0
+position 1
+alwaysmatches 1
+firstinfield 1
+simplechain dummy
+
+# Phrase index
+# Used if completeness is 'complete {sub}field' (@attr 6=2, @attr 6=1)
+# and structure is word/phrase/word-list/free-form-text/document-text
+index p
+completeness 1
+charmap string.chr
+
+# URX (URL) index
+# Used if structure=urx (@attr 4=104)
+index u
+completeness 0
+charmap urx.chr
+
+# Numeric index
+# Used if structure=numeric (@attr 4=109)
+index n
+completeness 0
+charmap numeric.chr
+
+# Null map index (no mapping at all)
+# Used if structure=key (@attr 4=3)
+index 0
+completeness 0
+position 1
+charmap @
+
+# Year
+# Used if structure=year (@attr 4=4)
+index y
+completeness 0
+charmap @
+
+# Date
+# Used if structure=date (@attr 4=5)
+index d
+completeness 0
+charmap @
+
+# Sort register
+sort s
+completeness 1
+charmap string.chr
+
+# Staticrank (uncomment to enable)
+#staticrank r
-# $Id: zebra17.cfg,v 1.1 2007-10-29 13:43:58 adam Exp $
+# $Id: zebra17.cfg,v 1.2 2007-11-06 10:30:00 adam Exp $
profilepath: ${srcdir:-.}:${srcdir:-.}/../../tab
attset: bib1.att
recordType: grs.sgml
-indextypes: indextypes17.xml
+index: t17.idx
isam: b
tstlockscope
tstpass
tstres
-tst_index_types
-## $Id: Makefile.am,v 1.36 2007-10-25 09:22:36 adam Exp $
+## $Id: Makefile.am,v 1.37 2007-11-06 10:30:00 adam Exp $
noinst_LTLIBRARIES = libidzebra-util.la
-check_PROGRAMS = tstcharmap tstflock tstlockscope tstpass tstres \
- tst_index_types
+check_PROGRAMS = tstcharmap tstflock tstlockscope tstpass tstres
TESTS = $(check_PROGRAMS)
libidzebra_util_la_SOURCES = version.c zint.c res.c charmap.c zebramap.c \
passwddb.c zebra-lock.c dirent.c xpath.c atoi_zn.c snippet.c flock.c \
- attrfind.c exit.c it_key.c su_codec.c index_types.c
+ attrfind.c exit.c it_key.c su_codec.c
tstpass_SOURCES = tstpass.c
tstres_SOURCES = tstres.c
-tst_index_types_SOURCES = tst_index_types.c
-
clean-local:
-rm -rf *.LCK
-rm -rf *.log
+++ /dev/null
-/* $Id: index_types.c,v 1.3 2007-10-29 08:20:16 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
-
- This file is part of the Zebra server.
-
- Zebra is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 2, or (at your option) any later
- version.
-
- Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
-
- You should have received a copy of the GNU General Public License
- along with Zebra; see the file LICENSE.zebra. If not, write to the
- Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA.
-*/
-
-/**
- \file
- \brief Implementation of Zebra's index types system
-*/
-
-#include <assert.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-
-#include "index_types.h"
-#if HAVE_ICU
-#include <yaz/icu_I18N.h>
-#endif
-#include <yaz/match_glob.h>
-#include <yaz/xmalloc.h>
-#include <yaz/wrbuf.h>
-#include <yaz/log.h>
-
-struct zebra_index_types_s {
-#if YAZ_HAVE_XML2
- zebra_index_type_t rules;
- xmlDocPtr doc;
-#endif
-};
-
-#if YAZ_HAVE_XML2
-struct zebra_index_type_s {
- const xmlNode *ptr;
- const char *id;
- const char *locale;
- const char *position;
- const char *alwaysmatches;
- const char *firstinfield;
- int sort_flag;
- int index_flag;
- int staticrank_flag;
- int simple_chain;
-#if HAVE_ICU
- struct icu_chain *chain;
-#endif
- zebra_index_type_t next;
- WRBUF simple_buf;
- size_t simple_off;
-};
-
-static void index_type_destroy(zebra_index_type_t t);
-
-zebra_index_type_t parse_index_type(const xmlNode *ptr)
-{
- struct _xmlAttr *attr;
- struct zebra_index_type_s *rule;
-
- rule = xmalloc(sizeof(*rule));
- rule->next = 0;
-#if HAVE_ICU
- rule->chain = 0;
-#endif
- rule->ptr = ptr;
- rule->locale = 0;
- rule->id = 0;
- rule->position = 0;
- rule->alwaysmatches = 0;
- rule->firstinfield = 0;
- rule->sort_flag = 0;
- rule->index_flag = 1;
- rule->staticrank_flag = 0;
- rule->simple_chain = 0;
- rule->simple_buf = wrbuf_alloc();
- for (attr = ptr->properties; attr; attr = attr->next)
- {
- if (attr->children && attr->children->type == XML_TEXT_NODE)
- {
- if (!strcmp((const char *) attr->name, "id"))
- rule->id = (const char *) attr->children->content;
- else if (!strcmp((const char *) attr->name, "locale"))
- rule->locale = (const char *) attr->children->content;
- else if (!strcmp((const char *) attr->name, "position"))
- rule->position = (const char *) attr->children->content;
- else if (!strcmp((const char *) attr->name, "alwaysmatches"))
- rule->alwaysmatches = (const char *) attr->children->content;
- else if (!strcmp((const char *) attr->name, "firstinfield"))
- rule->firstinfield = (const char *) attr->children->content;
- else if (!strcmp((const char *) attr->name, "index"))
- {
- const char *v = (const char *) attr->children->content;
- if (v)
- rule->index_flag = *v == '1';
- }
- else if (!strcmp((const char *) attr->name, "sort"))
- {
- const char *v = (const char *) attr->children->content;
- if (v)
- rule->sort_flag = *v == '1';
- }
- else if (!strcmp((const char *) attr->name, "staticrank"))
- {
- const char *v = (const char *) attr->children->content;
- if (v)
- rule->staticrank_flag = *v == '1';
- }
- else
- {
- yaz_log(YLOG_WARN, "Unsupport attribute '%s' for indextype",
- attr->name);
- index_type_destroy(rule);
- return 0;
- }
- }
- }
- ptr = ptr->children;
- while (ptr && ptr->type != XML_ELEMENT_NODE)
- ptr = ptr->next;
- if (!ptr)
- {
- yaz_log(YLOG_WARN, "Missing rules for indexrule");
- index_type_destroy(rule);
- rule = 0;
- }
- else if (!strcmp((const char *) ptr->name, "icu_chain"))
- {
-#if HAVE_ICU
- UErrorCode status;
- rule->chain = icu_chain_xml_config(ptr,
- rule->locale,
- rule->sort_flag,
- &status);
- if (!rule->chain)
- {
- index_type_destroy(rule);
- rule = 0;
- }
-#else
- yaz_log(YLOG_WARN, "ICU unsupported (must be part of YAZ)");
- xfree(rule);
- rule = 0;
-#endif
- }
- else if (!strcmp((const char *) ptr->name, "simple"))
- {
- rule->simple_chain = 1;
- }
- else
- {
- yaz_log(YLOG_WARN, "Unsupported mapping %s for indexrule", ptr->name);
- index_type_destroy(rule);
- rule = 0;
- }
- return rule;
-}
-/* YAZ_HAVE_XML2 */
-#endif
-
-zebra_index_types_t zebra_index_types_create(const char *fname)
-{
- xmlDocPtr doc = xmlParseFile(fname);
- if (!doc)
- return 0;
- return zebra_index_types_create_doc(doc);
-}
-
-zebra_index_types_t zebra_index_types_create_doc(xmlDocPtr doc)
-{
-#if YAZ_HAVE_XML2
- zebra_index_types_t r = xmalloc(sizeof(*r));
- zebra_index_type_t *rp = &r->rules;
- const xmlNode *top = xmlDocGetRootElement(doc);
-
- r->doc = doc;
- *rp = 0;
- if (top && top->type == XML_ELEMENT_NODE
- && !strcmp((const char *) top->name, "indextypes"))
- {
- const xmlNode *ptr = top->children;
- for (; ptr; ptr = ptr->next)
- {
- if (ptr->type == XML_ELEMENT_NODE
- && !strcmp((const char *) ptr->name, "indextype"))
- {
- *rp = parse_index_type(ptr);
- if (!*rp)
- {
- zebra_index_types_destroy(r);
- return 0;
- }
- rp = &(*rp)->next;
- }
- }
- }
- else
- {
- zebra_index_types_destroy(r);
- r = 0;
- }
- return r;
-#else
- yaz_log(YLOG_WARN, "XML unsupported. Cannot read index rules");
- return 0;
-/* YAZ_HAVE_XML2 */
-#endif
-}
-
-static void index_type_destroy(zebra_index_type_t t)
-{
- if (t)
- {
-#if HAVE_ICU
- if (t->chain)
- icu_chain_destroy(t->chain);
-#endif
- wrbuf_destroy(t->simple_buf);
- xfree(t);
- }
-}
-
-void zebra_index_types_destroy(zebra_index_types_t r)
-{
- if (r)
- {
-#if YAZ_HAVE_XML2
- zebra_index_type_t rule;
- while (r->rules)
- {
- rule = r->rules;
- r->rules = rule->next;
- index_type_destroy(rule);
- }
- xmlFreeDoc(r->doc);
-
-#endif
- xfree(r);
- }
-}
-
-zebra_index_type_t zebra_index_type_get(zebra_index_types_t types,
- const char *id)
-{
-#if YAZ_HAVE_XML2
- zebra_index_type_t rule = types->rules;
-
- while (rule && !yaz_match_glob(rule->id, id))
- rule = rule->next;
- return rule;
-#endif
- return 0;
-}
-
-const char *zebra_index_type_lookup_str(zebra_index_types_t types,
- const char *id)
-{
- zebra_index_type_t t = zebra_index_type_get(types, id);
- if (t)
- return t->id;
- return 0;
-}
-
-int zebra_index_type_is_index(zebra_index_type_t type)
-{
- return type->index_flag;
-}
-
-int zebra_index_type_is_sort(zebra_index_type_t type)
-{
- return type->sort_flag;
-}
-
-int zebra_index_type_is_staticrank(zebra_index_type_t type)
-{
- return type->staticrank_flag;
-}
-
-#define SE_CHARS ";,.()-/?<> \r\n\t"
-
-int tokenize_simple(zebra_index_type_t type,
- const char **result_buf, size_t *result_len)
-{
- char *buf = wrbuf_buf(type->simple_buf);
- size_t len = wrbuf_len(type->simple_buf);
- size_t i = type->simple_off;
- size_t start;
-
- while (i < len && strchr(SE_CHARS, buf[i]))
- i++;
- start = i;
- while (i < len && !strchr(SE_CHARS, buf[i]))
- {
- if (buf[i] > 32 && buf[i] < 127)
- buf[i] = tolower(buf[i]);
- i++;
- }
-
- type->simple_off = i;
- if (start != i)
- {
- *result_buf = buf + start;
- *result_len = i - start;
- return 1;
- }
- return 0;
- }
-
-int zebra_index_type_tokenize(zebra_index_type_t type,
- const char *buf, size_t len,
- const char **result_buf, size_t *result_len)
-{
- if (type->simple_chain)
- {
- if (buf)
- {
- wrbuf_rewind(type->simple_buf);
- wrbuf_write(type->simple_buf, buf, len);
- type->simple_off = 0;
- }
- return tokenize_simple(type, result_buf, result_len);
- }
- return 0;
-}
-
-/*
- * Local variables:
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
+++ /dev/null
-/* $Id: tst_index_types.c,v 1.3 2007-10-25 19:25:00 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
-
-This file is part of the Zebra server.
-
-Zebra is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*/
-
-#include <yaz/test.h>
-#include <index_types.h>
-#include <stdlib.h>
-#include <string.h>
-
-const char *xml_str =
-" <indextypes>"
-" <indextype id=\"*:w:el\" position=\"1\" alwaysmatches=\"1\" firstinfield=\"1\"\n"
-" locale=\"el\">\n"
-" <simple/>\n"
-" </indextype>\n"
-" <indextype id=\"*:w\" position=\"1\" alwaysmatches=\"1\" firstinfield=\"1\"\n"
-" locale=\"en\">\n"
-" <simple/>\n"
-" </indextype>\n"
-" <indextype id=\"*:p\" position=\"0\" alwaysmatches=\"0\" firstinfield=\"0\"\n"
-" locale=\"en\">\n"
-" <simple/>\n"
-" </indextype>\n"
-" <indextype id=\"*:s\" sort=\"1\" \n"
-" locale=\"en\">\n"
-" <simple/>\n"
-" </indextype>\n"
-" </indextypes>\n"
-;
-
-int compare_lookup(zebra_index_types_t r, const char *id,
- const char *expected_id)
-{
- const char *got_id = zebra_index_type_lookup_str(r, id);
- if (!got_id && !expected_id)
- return 1; /* none expected */
-
- if (got_id && expected_id && !strcmp(got_id, expected_id))
- return 1;
- return 0;
-}
-
-void tst1(void)
-{
-#if YAZ_HAVE_XML2
- xmlDocPtr doc = xmlParseMemory(xml_str, strlen(xml_str));
-
- YAZ_CHECK(doc);
- if (doc)
- {
- zebra_index_types_t rules = zebra_index_types_create_doc(doc);
- zebra_index_type_t type;
- YAZ_CHECK(rules);
-
- if (!rules)
- return ;
-
- YAZ_CHECK(compare_lookup(rules, "title:s", "*:s"));
- YAZ_CHECK(compare_lookup(rules, "title:sx", 0));
- YAZ_CHECK(compare_lookup(rules, "title:Sx", 0));
- YAZ_CHECK(compare_lookup(rules, "any:w", "*:w"));
- YAZ_CHECK(compare_lookup(rules, "any:w:en", 0));
- YAZ_CHECK(compare_lookup(rules, "any:w:el", "*:w:el"));
-
- {
- int i, iter = 3333;
- for (i = 0; i < iter; i++)
- {
- compare_lookup(rules, "title:s", "*:s");
- compare_lookup(rules, "title:sx", 0);
- compare_lookup(rules, "title:Sx", 0);
- }
- }
-
- type = zebra_index_type_get(rules, "any:w");
- YAZ_CHECK(type);
- if (type)
- {
- const char *buf = " How are you?";
- size_t len = strlen(buf);
- int r = 1;
-
- if (r)
- {
- const char *result_buf = 0;
- size_t result_len = 0;
- r = zebra_index_type_tokenize(type, buf, len,
- &result_buf, &result_len);
- YAZ_CHECK_EQ(r, 1);
- YAZ_CHECK(result_len == 3 &&
- !memcmp(result_buf, "how", result_len));
- }
-
- if (r)
- {
- const char *result_buf = 0;
- size_t result_len = 0;
- r = zebra_index_type_tokenize(type, 0, 0,
- &result_buf, &result_len);
- YAZ_CHECK_EQ(r, 1);
- YAZ_CHECK(result_len == 3 &&
- !memcmp(result_buf, "are", result_len));
- }
-
- if (r)
- {
- const char *result_buf = 0;
- size_t result_len = 0;
- r = zebra_index_type_tokenize(type, 0, 0,
- &result_buf, &result_len);
- YAZ_CHECK_EQ(r, 1);
- YAZ_CHECK(result_len == 3 &&
- !memcmp(result_buf, "you", result_len));
- }
-
- if (r)
- {
- const char *result_buf = 0;
- size_t result_len = 0;
- r = zebra_index_type_tokenize(type, 0, 0,
- &result_buf, &result_len);
- YAZ_CHECK_EQ(r, 0);
- }
- }
- zebra_index_types_destroy(rules);
- }
-#else
- zebra_index_types_t rules = zebra_index_types_create_doc(doc);
- YAZ_CHECK(!rules);
-#endif
-}
-
-int main(int argc, char **argv)
-{
- YAZ_CHECK_INIT(argc, argv);
- YAZ_CHECK_LOG();
-
- tst1();
-
- YAZ_CHECK_TERM;
-}
-
-/*
- * Local variables:
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
-/* $Id: zebramap.c,v 1.64 2007-11-05 13:58:01 adam Exp $
+/* $Id: zebramap.c,v 1.65 2007-11-06 10:30:02 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
int alwaysmatches;
int first_in_field;
int type;
+ int use_chain;
union {
struct {
int entry_size;
#if HAVE_ICU
struct icu_chain *icu_chain;
#endif
+ WRBUF simple_buf;
+ size_t simple_off;
struct zebra_map *next;
};
#if YAZ_HAVE_XML2
xmlFreeDoc(zm->doc);
#endif
+ wrbuf_destroy(zm->simple_buf);
zm = zm->next;
}
wrbuf_destroy(zms->wrbuf_1);
zm->zebra_maps = zms;
zm->id = nmem_strdup(zms->nmem, index_type);
zm->maptab_name = 0;
+ zm->use_chain = 0;
zm->locale = 0;
zm->maptab = 0;
zm->type = map_type;
#if YAZ_HAVE_XML2
zm->doc = 0;
#endif
+ zm->simple_buf = wrbuf_alloc();
return zm;
}
{
zm->locale = nmem_strdup(zms->nmem, argv[1]);
}
+ else if (!yaz_matchstr(argv[0], "simplechain"))
+ {
+ zm->use_chain = 1;
+ zm->icu_chain = 0;
+ }
else if (!yaz_matchstr(argv[0], "icuchain"))
{
#if YAZ_HAVE_XML2
+ if (!zm->locale)
+ {
+ yaz_log(YLOG_WARN, "%s:%d: locale required before icuchain",
+ fname, lineno);
+ return -1;
+ }
zm->doc = xmlParseFile(argv[1]);
if (!zm->doc)
{
yaz_log(YLOG_WARN, "%s:%d: Failed to load ICU chain %s",
fname, lineno, argv[1]);
}
+ zm->use_chain = 1;
#else
yaz_log(YLOG_WARN, "%s:%d: ICU support unavailable",
fname, lineno);
return zm->zebra_maps->wrbuf_1;
}
+#define SE_CHARS ";,.()-/?<> \r\n\t"
+
+static int tokenize_simple(zebra_map_t zm,
+ const char **result_buf, size_t *result_len)
+{
+ char *buf = wrbuf_buf(zm->simple_buf);
+ size_t len = wrbuf_len(zm->simple_buf);
+ size_t i = zm->simple_off;
+ size_t start;
+
+ while (i < len && strchr(SE_CHARS, buf[i]))
+ i++;
+ start = i;
+ while (i < len && !strchr(SE_CHARS, buf[i]))
+ {
+ if (buf[i] > 32 && buf[i] < 127)
+ buf[i] = tolower(buf[i]);
+ i++;
+ }
+
+ zm->simple_off = i;
+ if (start != i)
+ {
+ *result_buf = buf + start;
+ *result_len = i - start;
+ return 1;
+ }
+ return 0;
+ }
+
+int zebra_map_tokenize(zebra_map_t zm,
+ const char *buf, size_t len,
+ const char **result_buf, size_t *result_len)
+{
+ assert(zm->use_chain);
+ if (!zm->icu_chain)
+ {
+ if (buf)
+ {
+ wrbuf_rewind(zm->simple_buf);
+ wrbuf_write(zm->simple_buf, buf, len);
+ zm->simple_off = 0;
+ }
+ return tokenize_simple(zm, result_buf, result_len);
+ }
+ return 0;
+}
+
+int zebra_maps_is_icu(zebra_map_t zm)
+{
+#if HAVE_ICU
+ return zm->use_chain;
+#else
+ return 0;
+#endif
+}
+
+
/*
* Local variables:
* c-basic-offset: 4