charmap test.
-/* $Id: charmap.h,v 1.13 2005-06-15 18:52:49 adam Exp $
+/* $Id: charmap.h,v 1.14 2005-06-15 21:31:45 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
typedef struct chrmaptab_info *chrmaptab;
YAZ_EXPORT chrmaptab chrmaptab_create(const char *tabpath, const char *name,
- int map_only, const char *tabroot);
+ const char *tabroot);
YAZ_EXPORT void chrmaptab_destroy (chrmaptab tab);
YAZ_EXPORT const char **chr_map_input(chrmaptab t, const char **from, int len, int first);
Makefile
Makefile.in
passtest
+tstcharmap
idzebra-config
*.lo
*.la
+*.log
-## $Id: Makefile.am,v 1.14 2005-06-07 11:36:42 adam Exp $
+## $Id: Makefile.am,v 1.15 2005-06-15 21:31:45 adam Exp $
lib_LTLIBRARIES = libidzebra-util.la
noinst_PROGRAMS = passtest
+check_PROGRAMS = tstcharmap
+
+TESTS = $(check_PROGRAMS)
+
bin_SCRIPTS = idzebra-config
-EXTRA_DIST = zebrasrv.rh
+EXTRA_DIST = zebrasrv.rh tstcharmap.chr
DISTCLEANFILES = idzebra-config
zebra-lock.c dirent.c xpath.c atoi_zn.c snippet.c
passtest_SOURCES = passtest.c
+
+tstcharmap_SOURCES = tstcharmap.c
-/* $Id: charmap.c,v 1.37 2005-06-14 12:42:49 adam Exp $
+/* $Id: charmap.c,v 1.38 2005-06-15 21:31:45 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
return 0;
}
-chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only,
- const char *tabroot)
+chrmaptab chrmaptab_create(const char *tabpath, const char *name,
+ const char *tabroot)
{
FILE *f;
char line[512], *argv[50];
unsigned endian = 31;
const char *ucs4_native = "UCS-4";
- if (*(char*) &endian == 31) /* little endian? */
- ucs4_native = "UCS-4LE";
-
- t_utf8 = yaz_iconv_open ("UTF-8", ucs4_native);
-
yaz_log (YLOG_DEBUG, "maptab %s open", name);
if (!(f = yaz_fopen(tabpath, name, "r", tabroot)))
{
yaz_log(YLOG_WARN|YLOG_ERRNO, "%s", name);
return 0;
}
+
+ if (*(char*) &endian == 31) /* little endian? */
+ ucs4_native = "UCS-4LE";
+
+ t_utf8 = yaz_iconv_open ("UTF-8", ucs4_native);
+
nmem = nmem_create ();
res = (chrmaptab) nmem_malloc(nmem, sizeof(*res));
res->nmem = nmem;
res->input->children[i]->target = (unsigned char **)
nmem_malloc (res->nmem, 2 * sizeof(unsigned char *));
res->input->children[i]->target[1] = 0;
- if (map_only)
- {
- res->input->children[i]->target[0] = (unsigned char *)
- nmem_malloc (res->nmem, 2 * sizeof(unsigned char));
- res->input->children[i]->target[0][0] = i;
- res->input->children[i]->target[0][1] = 0;
- }
- else
- res->input->children[i]->target[0] = (unsigned char*) CHR_UNKNOWN;
+ res->input->children[i]->target[0] = (unsigned char*) CHR_UNKNOWN;
}
res->q_input = (chr_t_entry *)
nmem_malloc(res->nmem, sizeof(*res->q_input));
res->base_uppercase = 0;
while (!errors && (argc = readconf_line(f, &lineno, line, 512, argv, 50)))
- if (!map_only && !yaz_matchstr(argv[0], "lowercase"))
+ if (!yaz_matchstr(argv[0], "lowercase"))
{
if (argc != 2)
{
res->output[(int) *CHR_UNKNOWN + num] = (unsigned char*) "@";
num = (int) *CHR_BASE;
}
- else if (!map_only && !yaz_matchstr(argv[0], "uppercase"))
+ else if (!yaz_matchstr(argv[0], "uppercase"))
{
if (!res->base_uppercase)
{
++errors;
}
}
- else if (!map_only && !yaz_matchstr(argv[0], "space"))
+ else if (!yaz_matchstr(argv[0], "space"))
{
if (argc != 2)
{
++errors;
}
}
- else if (!map_only && !yaz_matchstr(argv[0], "cut"))
+ else if (!yaz_matchstr(argv[0], "cut"))
{
if (argc != 2)
{
--- /dev/null
+/* $Id: tstcharmap.c,v 1.1 2005-06-15 21:31:45 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <charmap.h>
+
+void tst1()
+{
+ /* open existing map chrmaptab.chr */
+ chrmaptab tab = chrmaptab_create(0 /* tabpath */,
+ "tstcharmap.chr" /* file */,
+ 0 /* tabroot */ );
+ assert(tab);
+
+ chrmaptab_destroy(tab);
+}
+
+void tst2()
+{
+ /* open non-existing nonexist.chr */
+ chrmaptab tab = chrmaptab_create(0 /* tabpath */,
+ "nonexist.chr" /* file */,
+ 0 /* tabroot */ );
+ assert(!tab);
+}
+
+int main(int argc, char **argv)
+{
+ char logname[2048];
+ sprintf(logname, "%s.log", argv[0]);
+ yaz_log_init_file(logname);
+
+ tst1();
+ tst2();
+
+ exit(0);
+}
+
--- /dev/null
+# Generic character map.
+#
+# $Id: tstcharmap.chr,v 1.1 2005-06-15 21:31:45 adam Exp $
+
+# Define the basic value-set. *Beware* of changing this without re-indexing
+# your databases.
+
+lowercase {0-9}{a-y}üzæäøöå
+uppercase {0-9}{A-Y}ÜZÆÄØÖÅ
+
+# Breaking characters
+
+space {\001-\040}!"#$%&'\()*+,-./:;<=>?@\[\\]^_`\{|}~
+
+# Characters to be considered equivalent for searching purposes.
+
+# equivalent æä(ae)
+# equivalent øö(oe)
+# equivalent å(aa)
+# equivalent uü
+
+# Supplemental mappings
+
+#map (ä) ä
+#map (æ) æ
+#map (ø) ø
+#map (å) å
+#map (ö) ö
+#map (Ä) Ä
+#map (&Aelig;) Æ
+#map (Ø) Ø
+#map (Å) Å
+#map (Ö) Ö
+
+#map éÉ e
+#map á a
+#map ó o
+#map í i
+
+#map (Aa) (AA)
+
+#map (aa) a
-/* $Id: zebramap.c,v 1.41 2005-06-14 12:42:49 adam Exp $
+/* $Id: zebramap.c,v 1.42 2005-06-15 21:31:45 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
if (!zm->maptab_name || !yaz_matchstr (zm->maptab_name, "@"))
return NULL;
if (!(zm->maptab = chrmaptab_create (zms->tabpath,
- zm->maptab_name, 0,
+ zm->maptab_name,
zms->tabroot)))
yaz_log(YLOG_WARN, "Failed to read character table %s",
zm->maptab_name);