-# $Id: Makefile.am,v 1.25 2006-11-21 14:32:38 adam Exp $
+# $Id: Makefile.am,v 1.26 2007-10-23 12:26:25 adam Exp $
noinst_HEADERS = bset.h charmap.h \
direntz.h passwddb.h dfa.h zebra_xpath.h d1_absyn.h \
rset.h dfaset.h sortidx.h zebra-lock.h attrfind.h zebramap.h \
- it_key.h su_codec.h
+ it_key.h su_codec.h index_rules.h rob_regexp.h
SUBDIRS = idzebra
--- /dev/null
+/* $Id: index_rules.h,v 1.1 2007-10-23 12:26:25 adam Exp $
+ Copyright (C) 1995-2007
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+/**
+ \brief Definitions for Zebra's index rules system
+*/
+
+#ifndef ZEBRA_INDEX_RULES_H
+#define ZEBRA_INDEX_RULES_H
+
+#include <yaz/yconfig.h>
+#include <yaz/xmltypes.h>
+
+YAZ_BEGIN_CDECL
+
+typedef struct zebra_index_rules_s *zebra_index_rules_t;
+
+/** \brief creates index rules handler/object from file
+ \param fname filename
+ \returns handle (NULL if unsuccessful)
+
+ Config file format:
+ \verbatim
+ <indexrules>
+ <indexrule id="*:w" position="1" alwaysmatches="1" firstinfield="1"
+ locale="en">
+ <!-- conversion rules for words -->
+ </indexrule>
+ <indexrule id="*:p" position="0" alwaysmatches="0" firstinfield="0"
+ locale="en">
+ <!-- conversion rules for phrase -->
+ </indexrule>
+ <indexrule id="*:s" sort="1"
+ locale="en">
+ <!-- conversion rules for phrase -->
+ </indexrule>
+ </indexrules>
+ \endverbatim
+ */
+zebra_index_rules_t zebra_index_rules_create(const char *fname);
+
+/** \brief destroys index rules object
+ \param r handle
+ */
+void zebra_index_rules_destroy(zebra_index_rules_t r);
+
+
+/** \brief creates index rules handler/object from xml Doc
+ \param fname filename
+ \returns handle (NULL if unsuccessful)
+
+ Similar to zebra_index_rules_create
+*/
+zebra_index_rules_t zebra_index_rules_create_doc(xmlDocPtr doc);
+
+
+/** \brief lookup of indexrule
+ \param r rules
+ \param id id to search for
+ \returns pattern ID
+*/
+const char *zebra_index_rule_lookup_str(zebra_index_rules_t r, const char *id);
+
+YAZ_END_CDECL
+
+#endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* $Id: rob_regexp.h,v 1.1 2007-10-23 12:26:25 adam Exp $
+ Copyright (C) 1995-2007
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+/**
+ \brief Definitions for Zebra's index rules system
+*/
+
+#ifndef ZEBRA_ROB_REGEXP_H
+#define ZEBRA_ROB_REGEXP_H
+
+#include <yaz/yconfig.h>
+
+YAZ_BEGIN_CDECL
+
+int zebra_rob_regexp(const char *regexp, const char *text);
+
+YAZ_END_CDECL
+
+#endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
-## $Id: Makefile.am,v 1.33 2007-08-27 17:22:22 adam Exp $
+## $Id: Makefile.am,v 1.34 2007-10-23 12:26:26 adam Exp $
noinst_LTLIBRARIES = libidzebra-util.la
-check_PROGRAMS = tstcharmap tstflock tstlockscope tstpass tstres
+check_PROGRAMS = tstcharmap tstflock tstlockscope tstpass tstres \
+ tst_index_rules
TESTS = $(check_PROGRAMS)
libidzebra_util_la_SOURCES = version.c zint.c res.c charmap.c zebramap.c \
passwddb.c zebra-lock.c dirent.c xpath.c atoi_zn.c snippet.c flock.c \
- attrfind.c exit.c it_key.c su_codec.c
+ attrfind.c exit.c it_key.c su_codec.c index_rules.c rob_regexp.c
tstpass_SOURCES = tstpass.c
tstres_SOURCES = tstres.c
+tst_index_rules_SOURCES = tst_index_rules.c
+
clean-local:
-rm -rf *.LCK
-rm -rf *.log
--- /dev/null
+/* $Id: index_rules.c,v 1.1 2007-10-23 12:26:26 adam Exp $
+ Copyright (C) 1995-2007
+ Index Data ApS
+
+ This file is part of the Zebra server.
+
+ Zebra is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+
+ Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Zebra; see the file LICENSE.zebra. If not, write to the
+ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.
+*/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "index_rules.h"
+#include "rob_regexp.h"
+#include <yaz/xmalloc.h>
+#include <yaz/wrbuf.h>
+#include <yaz/log.h>
+
+struct zebra_index_rules_s {
+ WRBUF last_id;
+#if YAZ_HAVE_XML2
+ struct zebra_index_rule *rules;
+ struct zebra_index_rule *last_rule_match;
+ xmlDocPtr doc;
+#endif
+};
+
+#if YAZ_HAVE_XML2
+struct zebra_index_rule {
+ const xmlNode *ptr;
+ const char *id;
+ const char *locale;
+ const char *position;
+ const char *alwaysmatches;
+ const char *firstinfield;
+ const char *sort;
+ struct zebra_index_rule *next;
+};
+
+struct zebra_index_rule *parse_index_rule(const xmlNode *ptr)
+{
+ struct _xmlAttr *attr;
+ struct zebra_index_rule *rule;
+
+ rule = xmalloc(sizeof(*rule));
+ rule->next = 0;
+ rule->ptr = ptr;
+ rule->locale = 0;
+ rule->id = 0;
+ rule->position = 0;
+ rule->alwaysmatches = 0;
+ rule->firstinfield = 0;
+ rule->sort = 0;
+ for (attr = ptr->properties; attr; attr = attr->next)
+ {
+ if (attr->children && attr->children->type == XML_TEXT_NODE)
+ {
+ if (!strcmp((const char *) attr->name, "id"))
+ rule->id = (const char *) attr->children->content;
+ else if (!strcmp((const char *) attr->name, "locale"))
+ rule->locale = (const char *) attr->children->content;
+ else if (!strcmp((const char *) attr->name, "position"))
+ rule->position = (const char *) attr->children->content;
+ else if (!strcmp((const char *) attr->name, "alwaysmatches"))
+ rule->alwaysmatches = (const char *) attr->children->content;
+ else if (!strcmp((const char *) attr->name, "firstinfield"))
+ rule->firstinfield = (const char *) attr->children->content;
+ else if (!strcmp((const char *) attr->name, "sort"))
+ rule->sort = (const char *) attr->children->content;
+ else
+ {
+ yaz_log(YLOG_WARN, "Unsupport attribute '%s' for indexrule",
+ attr->name);
+ xfree(rule);
+ return 0;
+ }
+ }
+ }
+ return rule;
+}
+/* YAZ_HAVE_XML2 */
+#endif
+
+zebra_index_rules_t zebra_index_rules_create(const char *fname)
+{
+ xmlDocPtr doc = xmlParseFile(fname);
+ if (!doc)
+ return 0;
+ return zebra_index_rules_create_doc(doc);
+}
+
+zebra_index_rules_t zebra_index_rules_create_doc(xmlDocPtr doc)
+{
+#if YAZ_HAVE_XML2
+ zebra_index_rules_t r = xmalloc(sizeof(*r));
+ struct zebra_index_rule **rp = &r->rules;
+ const xmlNode *top = xmlDocGetRootElement(doc);
+
+ r->doc = doc;
+ r->last_rule_match = 0;
+ r->last_id = wrbuf_alloc();
+ *rp = 0;
+ if (top && top->type == XML_ELEMENT_NODE
+ && !strcmp((const char *) top->name, "indexrules"))
+ {
+ const xmlNode *ptr = top->children;
+ for (; ptr; ptr = ptr->next)
+ {
+ if (ptr->type == XML_ELEMENT_NODE
+ && !strcmp((const char *) ptr->name, "indexrule"))
+ {
+ *rp = parse_index_rule(ptr);
+ if (!*rp)
+ {
+ zebra_index_rules_destroy(r);
+ return 0;
+ }
+ rp = &(*rp)->next;
+ }
+ }
+ }
+ else
+ {
+ zebra_index_rules_destroy(r);
+ r = 0;
+ }
+ return r;
+#else
+ yaz_log(YLOG_WARN, "Cannot read index rules %s because YAZ is without XML "
+ "support", fname);
+ return 0;
+/* YAZ_HAVE_XML2 */
+#endif
+}
+
+void zebra_index_rules_destroy(zebra_index_rules_t r)
+{
+#if YAZ_HAVE_XML2
+ struct zebra_index_rule *rule;
+ while (r->rules)
+ {
+ rule = r->rules;
+ r->rules = rule->next;
+ xfree(rule);
+ }
+ xmlFreeDoc(r->doc);
+
+#endif
+ wrbuf_destroy(r->last_id);
+ xfree(r);
+}
+
+const char *zebra_index_rule_lookup_str(zebra_index_rules_t r, const char *id)
+{
+#if YAZ_HAVE_XML2
+ if (r->last_rule_match && !strcmp(wrbuf_cstr(r->last_id), id))
+ return r->last_rule_match->id;
+ else
+ {
+ struct zebra_index_rule *rule = r->rules;
+
+ wrbuf_rewind(r->last_id);
+ wrbuf_puts(r->last_id, id);
+ while (rule && !zebra_rob_regexp(rule->id, id))
+ rule = rule->next;
+ r->last_rule_match = rule;
+ if (rule)
+ return rule->id;
+ }
+#endif
+ return 0;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* $Id: rob_regexp.c,v 1.1 2007-10-23 12:26:26 adam Exp $
+ Copyright (C) 1995-2007
+ Index Data ApS
+
+ This file is part of the Zebra server.
+
+ Zebra is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+
+ Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Zebra; see the file LICENSE.zebra. If not, write to the
+ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.
+*/
+
+/**
+ \brief Rob Pike's regular expresion parser
+
+ Taken verbatim from Beautiful code.. ANSIfied a bit.
+ */
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "rob_regexp.h"
+#include <yaz/xmalloc.h>
+#include <yaz/wrbuf.h>
+#include <yaz/log.h>
+
+static int matchhere(const char *regexp, const char *text);
+static int matchstar(int c, const char *regexp, const char *text);
+
+int zebra_rob_regexp(const char *regexp, const char *text)
+{
+ if (regexp[0] == '^')
+ return matchhere(regexp+1, text);
+ do
+ {
+ if (matchhere(regexp, text))
+ return 1;
+ }
+ while (*text++);
+ return 0;
+}
+
+static int matchhere(const char *regexp, const char *text)
+{
+ if (regexp[0] == '\0')
+ return 1;
+ if (regexp[1] == '*')
+ return matchstar(regexp[0], regexp+2, text);
+ if (regexp[0] == '$' && regexp[1] == '\0')
+ return *text == '\0';
+ if (*text && (regexp[0] == '.' || regexp[0] == *text))
+ return matchhere(regexp+1, text+1);
+ return 0;
+}
+
+static int matchstar(int c, const char *regexp, const char *text)
+{
+ do
+ {
+ if (matchhere(regexp, text))
+ return 1;
+ }
+ while (*text && (*text++ == c || c == '.'));
+ return 0;
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* $Id: tst_index_rules.c,v 1.1 2007-10-23 12:26:26 adam Exp $
+ Copyright (C) 1995-2007
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#include <charmap.h>
+#include <yaz/test.h>
+#include <index_rules.h>
+#include <stdlib.h>
+#include <string.h>
+
+const char *xml_str =
+" <indexrules>"
+" <indexrule id=\"^.*:w:el$\" position=\"1\" alwaysmatches=\"1\" firstinfield=\"1\"\n"
+" locale=\"el\">\n"
+" <!-- conversion rules for words -->\n"
+" </indexrule>\n"
+" <indexrule id=\"^.*:w$\" position=\"1\" alwaysmatches=\"1\" firstinfield=\"1\"\n"
+" locale=\"en\">\n"
+" <!-- conversion rules for words -->\n"
+" </indexrule>\n"
+" <indexrule id=\"^.*:p$\" position=\"0\" alwaysmatches=\"0\" firstinfield=\"0\"\n"
+" locale=\"en\">\n"
+" <!-- conversion rules for phrase -->\n"
+" </indexrule>\n"
+" <indexrule id=\"^.*:s$\" sort=\"1\" \n"
+" locale=\"en\">\n"
+" <!-- conversion rules for phrase -->\n"
+" </indexrule>\n"
+" </indexrules>\n"
+;
+
+int compare_lookup(zebra_index_rules_t r, const char *id,
+ const char *expected_id)
+{
+ const char *got_id = zebra_index_rule_lookup_str(r, id);
+ if (!got_id && !expected_id)
+ return 1; /* none expected */
+
+ if (got_id && expected_id && !strcmp(got_id, expected_id))
+ return 1;
+ return 0;
+}
+
+void tst1(void)
+{
+ xmlDocPtr doc = xmlParseMemory(xml_str, strlen(xml_str));
+
+ YAZ_CHECK(doc);
+ if (doc)
+ {
+ zebra_index_rules_t rules = zebra_index_rules_create_doc(doc);
+ YAZ_CHECK(rules);
+
+ if (!rules)
+ return ;
+
+ YAZ_CHECK(compare_lookup(rules, "title:s", "^.*:s$"));
+ YAZ_CHECK(compare_lookup(rules, "title:sx", 0));
+ YAZ_CHECK(compare_lookup(rules, "title:Sx", 0));
+ YAZ_CHECK(compare_lookup(rules, "any:w", "^.*:w$"));
+ YAZ_CHECK(compare_lookup(rules, "any:w:en", 0));
+ YAZ_CHECK(compare_lookup(rules, "any:w:el", "^.*:w:el$"));
+
+ {
+ int i, iter = 3333;
+ for (i = 0; i < iter; i++)
+ {
+ compare_lookup(rules, "title:s", "^.*:s$");
+ compare_lookup(rules, "title:sx", 0);
+ compare_lookup(rules, "title:Sx", 0);
+ }
+ }
+
+ zebra_index_rules_destroy(rules);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ YAZ_CHECK_INIT(argc, argv);
+ YAZ_CHECK_LOG();
+
+ tst1();
+
+ YAZ_CHECK_TERM;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+