-/*
- * Copyright (C) 1995-2002, Index Data
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Id: zrpn.c,v 1.119 2002-08-02 10:07:48 adam Exp $
- */
+/* $Id: zrpn.c,v 1.125 2002-10-03 10:16:23 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
#include <stdio.h>
#include <assert.h>
#ifdef WIN32
static const char **rpn_char_map_handler (void *vp, const char **from, int len)
{
struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
- return zebra_maps_input (p->zm, p->reg_type, from, len);
+ const char **out = zebra_maps_input (p->zm, p->reg_type, from, len);
+#if 0
+ if (out && *out)
+ {
+ const char *outp = *out;
+ yaz_log (LOG_LOG, "---");
+ while (*outp)
+ {
+ yaz_log (LOG_LOG, "%02X", *outp);
+ outp++;
+ }
+ }
+#endif
+ return out;
}
static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
static void term_untrans (ZebraHandle zh, int reg_type,
char *dst, const char *src)
{
+ int len = 0;
while (*src)
{
const char *cp = zebra_maps_output (zh->reg->zebra_maps,
reg_type, &src);
- if (!cp)
- *dst++ = *src++;
+ if (!cp && len < IT_MAX_WORD-1)
+ dst[len++] = *src++;
else
- while (*cp)
- *dst++ = *cp++;
+ while (*cp && len < IT_MAX_WORD-1)
+ dst[len++] = *cp++;
}
- *dst = '\0';
+ dst[len] = '\0';
}
static void add_isam_p (const char *name, const char *info,
{
const char *db;
int set, use;
- char term_tmp[512];
+ char term_tmp[IT_MAX_WORD];
int su_code = 0;
int len = key_SU_decode (&su_code, name);
return *s0;
}
-#define REGEX_CHARS "[]()|.*+!"
+#define REGEX_CHARS " []()|.*+?!"
/* term_100: handle term, where trunc=none (no operators at all) */
static int term_100 (ZebraMaps zebra_maps, int reg_type,
attributeSet,
reg_type, space_split, term_dst))
return 0;
- logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
+ logf (LOG_LOG, "dict_lookup_grep: %s", term_dict+prefix_len);
r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
grep_info, &max_pos, 0, grep_handle);
if (r)
- logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
+ logf (LOG_WARN, "dict_lookup_grep fail %d", r);
break;
case 1: /* right truncation */
term_dict[j++] = '(';
}
-static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
- char *termz)
+/* convert APT search term to UTF8 */
+static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz)
{
size_t sizez;
Z_Term *term = zapt->term;
switch (term->which)
{
case Z_Term_general:
-#if HAVE_ICONV_H
- if (zh->iconv_to_utf8 != (iconv_t)(-1))
+ if (zh->iconv_to_utf8 != 0)
{
char *inbuf = term->u.general->buf;
size_t inleft = term->u.general->len;
size_t outleft = IT_MAX_WORD-1;
size_t ret;
- yaz_log (LOG_DEBUG, "converting general from ISO-8859-1");
- ret = iconv(zh->iconv_to_utf8, &inbuf, &inleft,
+ ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
&outbuf, &outleft);
if (ret == (size_t)(-1))
{
- ret = iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
+ ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
zh->errCode = 125;
return -1;
}
*outbuf = 0;
- return 0;
}
-#endif
- sizez = term->u.general->len;
- if (sizez > IT_MAX_WORD-1)
- sizez = IT_MAX_WORD-1;
- memcpy (termz, term->u.general->buf, sizez);
- termz[sizez] = '\0';
+ else
+ {
+ sizez = term->u.general->len;
+ if (sizez > IT_MAX_WORD-1)
+ sizez = IT_MAX_WORD-1;
+ memcpy (termz, term->u.general->buf, sizez);
+ termz[sizez] = '\0';
+ }
break;
case Z_Term_characterString:
sizez = strlen(term->u.characterString);
break;
default:
zh->errCode = 124;
+ return -1;
}
return 0;
}
-static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
- char *termz, int reg_type)
+/* convert APT SCAN term to internal cmap */
+static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz, int reg_type)
{
- Z_Term *term = zapt->term;
- const char **map;
- const char *cp = (const char *) term->u.general->buf;
- const char *cp_end = cp + term->u.general->len;
- const char *src;
- int i = 0;
- const char *space_map = NULL;
- int len;
-
- while ((len = (cp_end - cp)) > 0)
+ char termz0[IT_MAX_WORD];
+
+ if (zapt_term_to_utf8(zh, zapt, termz0))
+ return -1; /* error */
+ else
{
- map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
- if (**map == *CHR_SPACE)
- space_map = *map;
- else
+ const char **map;
+ const char *cp = (const char *) termz0;
+ const char *cp_end = cp + strlen(cp);
+ const char *src;
+ int i = 0;
+ const char *space_map = NULL;
+ int len;
+
+ while ((len = (cp_end - cp)) > 0)
{
- if (i && space_map)
- for (src = space_map; *src; src++)
+ map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
+ if (**map == *CHR_SPACE)
+ space_map = *map;
+ else
+ {
+ if (i && space_map)
+ for (src = space_map; *src; src++)
+ termz[i++] = *src;
+ space_map = NULL;
+ for (src = *map; *src; src++)
termz[i++] = *src;
- space_map = NULL;
- for (src = *map; *src; src++)
- termz[i++] = *src;
+ }
}
+ termz[i] = '\0';
}
- termz[i] = '\0';
+ return 0;
}
static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no,
int prefix_len = 0;
int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
int ord_len, i, r, max_pos;
+ int term_type = Z_Term_characterString;
+ const char *flags = "void";
if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
- return 0;
+ {
+ rset_null_parms parms;
+
+ parms.rset_term = rset_term_create (term, strlen(term),
+ flags, term_type);
+ parms.rset_term->nn = 0;
+ return rset_create (rset_kind_null, &parms);
+ }
if (ord < 0)
- return 0;
+ {
+ rset_null_parms parms;
+
+ parms.rset_term = rset_term_create (term, strlen(term),
+ flags, term_type);
+ parms.rset_term->nn = 0;
+ return rset_create (rset_kind_null, &parms);
+ }
if (prefix_len)
term_dict[prefix_len++] = '|';
else
grep_info.isam_p_indx);
rset = rset_trunc (zh, grep_info.isam_p_buf,
grep_info.isam_p_indx, term, strlen(term),
- "void", 1, Z_Term_characterString);
+ flags, 1, term_type);
grep_info_delete (&grep_info);
return rset;
}
rset_end_tag = xpath_trunc(zh, stream,
'0', xpath_rev, 2, curAttributeSet);
-
+
parms.key_size = sizeof(struct it_key);
parms.cmp = key_compare_it;
parms.rset_l = rset_start_tag;
logf (LOG_DEBUG, "search_type=%s", search_type);
logf (LOG_DEBUG, "rank_type=%s", rank_type);
- if (trans_term (zh, zapt, termz))
+ if (zapt_term_to_utf8(zh, zapt, termz))
return 0;
if (sort_flag)
static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
char **dst, const char *src)
{
- char term_dst[1024];
-
- term_untrans (zh, reg_type, term_dst, src);
+ char term_src[IT_MAX_WORD];
+ char term_dst[IT_MAX_WORD];
- *dst = (char *) nmem_malloc (stream, strlen(term_dst)+1);
- strcpy (*dst, term_dst);
+ term_untrans (zh, reg_type, term_src, src);
+
+ if (zh->iconv_from_utf8 != 0)
+ {
+ int len;
+ char *inbuf = term_src;
+ size_t inleft = strlen(term_src);
+ char *outbuf = term_dst;
+ size_t outleft = sizeof(term_dst)-1;
+ size_t ret;
+
+ ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
+ &outbuf, &outleft);
+ if (ret == (size_t)(-1))
+ len = 0;
+ else
+ len = outbuf - term_dst;
+ *dst = nmem_malloc (stream, len + 1);
+ if (len > 0)
+ memcpy (*dst, term_dst, len);
+ (*dst)[len] = '\0';
+ }
+ else
+ *dst = nmem_strdup (stream, term_src);
}
static void count_set (RSET r, int *count)
termz[prefix_len] = 0;
strcpy (scan_info->prefix, termz);
- trans_scan_term (zh, zapt, termz+prefix_len, reg_id);
+ if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
+ return ;
dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
scan_info, scan_handle);