1 /* $Id: index_types.c,v 1.2 2007-10-25 19:25:00 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 \brief Implementation of Zebra's index types system
33 #include "index_types.h"
34 #include <yaz/icu_I18N.h>
35 #include <yaz/match_glob.h>
36 #include <yaz/xmalloc.h>
37 #include <yaz/wrbuf.h>
40 struct zebra_index_types_s {
42 zebra_index_type_t rules;
48 struct zebra_index_type_s {
53 const char *alwaysmatches;
54 const char *firstinfield;
60 struct icu_chain *chain;
62 zebra_index_type_t next;
67 static void index_type_destroy(zebra_index_type_t t);
69 zebra_index_type_t parse_index_type(const xmlNode *ptr)
71 struct _xmlAttr *attr;
72 struct zebra_index_type_s *rule;
74 rule = xmalloc(sizeof(*rule));
83 rule->alwaysmatches = 0;
84 rule->firstinfield = 0;
87 rule->staticrank_flag = 0;
88 rule->simple_chain = 0;
89 rule->simple_buf = wrbuf_alloc();
90 for (attr = ptr->properties; attr; attr = attr->next)
92 if (attr->children && attr->children->type == XML_TEXT_NODE)
94 if (!strcmp((const char *) attr->name, "id"))
95 rule->id = (const char *) attr->children->content;
96 else if (!strcmp((const char *) attr->name, "locale"))
97 rule->locale = (const char *) attr->children->content;
98 else if (!strcmp((const char *) attr->name, "position"))
99 rule->position = (const char *) attr->children->content;
100 else if (!strcmp((const char *) attr->name, "alwaysmatches"))
101 rule->alwaysmatches = (const char *) attr->children->content;
102 else if (!strcmp((const char *) attr->name, "firstinfield"))
103 rule->firstinfield = (const char *) attr->children->content;
104 else if (!strcmp((const char *) attr->name, "index"))
106 const char *v = (const char *) attr->children->content;
108 rule->index_flag = *v == '1';
110 else if (!strcmp((const char *) attr->name, "sort"))
112 const char *v = (const char *) attr->children->content;
114 rule->sort_flag = *v == '1';
116 else if (!strcmp((const char *) attr->name, "staticrank"))
118 const char *v = (const char *) attr->children->content;
120 rule->staticrank_flag = *v == '1';
124 yaz_log(YLOG_WARN, "Unsupport attribute '%s' for indextype",
126 index_type_destroy(rule);
132 while (ptr && ptr->type != XML_ELEMENT_NODE)
136 yaz_log(YLOG_WARN, "Missing rules for indexrule");
137 index_type_destroy(rule);
140 else if (!strcmp((const char *) ptr->name, "icu_chain"))
144 rule->chain = icu_chain_xml_config(ptr,
150 index_type_destroy(rule);
154 yaz_log(YLOG_WARN, "ICU unsupported (must be part of YAZ)");
159 else if (!strcmp((const char *) ptr->name, "simple"))
161 rule->simple_chain = 1;
165 yaz_log(YLOG_WARN, "Unsupported mapping %s for indexrule", ptr->name);
166 index_type_destroy(rule);
174 zebra_index_types_t zebra_index_types_create(const char *fname)
176 xmlDocPtr doc = xmlParseFile(fname);
179 return zebra_index_types_create_doc(doc);
182 zebra_index_types_t zebra_index_types_create_doc(xmlDocPtr doc)
185 zebra_index_types_t r = xmalloc(sizeof(*r));
186 zebra_index_type_t *rp = &r->rules;
187 const xmlNode *top = xmlDocGetRootElement(doc);
191 if (top && top->type == XML_ELEMENT_NODE
192 && !strcmp((const char *) top->name, "indextypes"))
194 const xmlNode *ptr = top->children;
195 for (; ptr; ptr = ptr->next)
197 if (ptr->type == XML_ELEMENT_NODE
198 && !strcmp((const char *) ptr->name, "indextype"))
200 *rp = parse_index_type(ptr);
203 zebra_index_types_destroy(r);
212 zebra_index_types_destroy(r);
217 yaz_log(YLOG_WARN, "XML unsupported. Cannot read index rules");
223 static void index_type_destroy(zebra_index_type_t t)
229 icu_chain_destroy(t->chain);
231 wrbuf_destroy(t->simple_buf);
236 void zebra_index_types_destroy(zebra_index_types_t r)
241 zebra_index_type_t rule;
245 r->rules = rule->next;
246 index_type_destroy(rule);
255 zebra_index_type_t zebra_index_type_get(zebra_index_types_t types,
259 zebra_index_type_t rule = types->rules;
261 while (rule && !yaz_match_glob(rule->id, id))
268 const char *zebra_index_type_lookup_str(zebra_index_types_t types,
271 zebra_index_type_t t = zebra_index_type_get(types, id);
277 int zebra_index_type_is_index(zebra_index_type_t type)
279 return type->index_flag;
282 int zebra_index_type_is_sort(zebra_index_type_t type)
284 return type->sort_flag;
287 int zebra_index_type_is_staticrank(zebra_index_type_t type)
289 return type->staticrank_flag;
292 #define SE_CHARS ";,.()-/?<> \r\n\t"
294 int tokenize_simple(zebra_index_type_t type,
295 const char **result_buf, size_t *result_len)
297 char *buf = wrbuf_buf(type->simple_buf);
298 size_t len = wrbuf_len(type->simple_buf);
299 size_t i = type->simple_off;
302 while (i < len && strchr(SE_CHARS, buf[i]))
305 while (i < len && !strchr(SE_CHARS, buf[i]))
307 if (buf[i] > 32 && buf[i] < 127)
308 buf[i] = tolower(buf[i]);
312 type->simple_off = i;
315 *result_buf = buf + start;
316 *result_len = i - start;
322 int zebra_index_type_tokenize(zebra_index_type_t type,
323 const char *buf, size_t len,
324 const char **result_buf, size_t *result_len)
326 if (type->simple_chain)
330 wrbuf_rewind(type->simple_buf);
331 wrbuf_write(type->simple_buf, buf, len);
332 type->simple_off = 0;
334 return tokenize_simple(type, result_buf, result_len);
342 * indent-tabs-mode: nil
344 * vim: shiftwidth=4 tabstop=8 expandtab