-/* $Id: zrpn.c,v 1.198 2005-06-09 10:39:53 adam Exp $
+/* $Id: zrpn.c,v 1.199 2005-06-14 12:42:48 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
return ZEBRA_OK;
}
-char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
- const char *termz, NMEM stream, unsigned reg_id)
-{
- WRBUF wrbuf = 0;
- AttrType truncation;
- int truncation_value;
- char *ex_list = 0;
-
- attr_init(&truncation, zapt, 5);
- truncation_value = attr_find(&truncation, NULL);
-
- switch (truncation_value)
- {
- default:
- ex_list = "";
- break;
- case 101:
- ex_list = "#";
- break;
- case 102:
- case 103:
- ex_list = 0;
- break;
- case 104:
- ex_list = "!#";
- break;
- case 105:
- ex_list = "!*";
- break;
- }
- if (ex_list)
- wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
- termz, strlen(termz));
- if (!wrbuf)
- return nmem_strdup(stream, termz);
- else
- {
- char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
- memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
- buf[wrbuf_len(wrbuf)] = '\0';
- return buf;
- }
-}
-
static void grep_info_delete(struct grep_info *grep_info)
{
#ifdef TERM_COUNT
/**
\brief Create result set(s) for list of terms
\param zh Zebra Handle
- \param termz_org term as used in query but converted to UTF-8
+ \param termz term as used in query but converted to UTF-8
\param attributeSet default attribute set
\param stream memory for result
\param reg_type register type ('w', 'p',..)
*/
static ZEBRA_RES term_list_trunc(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
- const char *termz_org,
+ const char *termz,
oid_value attributeSet,
NMEM stream,
int reg_type, int complete_flag,
{
char term_dst[IT_MAX_WORD+1];
struct grep_info grep_info;
- char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
const char *termp = termz;
int alloc_sets = 0;
-/* $Id: charmap.c,v 1.36 2005-03-11 17:56:36 adam Exp $
+/* $Id: charmap.c,v 1.37 2005-06-14 12:42:49 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
return (const char*) maptab->output[c];
}
-unsigned char zebra_prim(char **s)
-{
- unsigned char c;
- unsigned int i = 0;
-
- yaz_log (YLOG_DEBUG, "prim %.3s", *s);
- if (**s == '\\')
- {
- (*s)++;
- c = **s;
- switch (c)
- {
- case '\\': c = '\\'; (*s)++; break;
- case 'r': c = '\r'; (*s)++; break;
- case 'n': c = '\n'; (*s)++; break;
- case 't': c = '\t'; (*s)++; break;
- case 's': c = ' '; (*s)++; break;
- case 'x': sscanf(*s, "x%2x", &i); c = i; *s += 3; break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- sscanf(*s, "%3o", &i);
- c = i;
- *s += 3;
- break;
- default:
- (*s)++;
- }
- }
- else
- {
- c = **s;
- ++(*s);
- }
- return c;
-}
-
static int zebra_ucs4_strlen(ucs4_t *s)
{
int i = 0;
char fmtstr[8];
yaz_log (YLOG_DEBUG, "prim_w %.3s", (char *) *s);
- if (**s == '\\')
+ if (**s == '\\' && 1[*s])
{
(*s)++;
c = **s;
(*fun)(str, data, num ? (*num)++ : 0);
}
break;
- case '[': s++; abort(); break;
case '(':
++s;
s0 = s; i = 0;
-/* $Id: zebramap.c,v 1.40 2005-03-11 17:56:36 adam Exp $
+/* $Id: zebramap.c,v 1.41 2005-06-14 12:42:49 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#define ZEBRA_REPLACE_ANY 300
-struct zm_token {
- int *token_from;
- char *token_to;
- int token_min;
- struct zm_token *next;
-};
-
struct zebra_map {
unsigned reg_id;
int completeness;
chrmaptab maptab;
const char *maptab_name;
struct zebra_map *next;
- struct zm_token *replace_tokens;
};
struct zebra_maps {
char temp_map_str[2];
const char *temp_map_ptr[2];
struct zebra_map **lookup_array;
- WRBUF wrbuf_1, wrbuf_2;
+ WRBUF wrbuf_1;
};
void zebra_maps_close (ZebraMaps zms)
zm = zm->next;
}
wrbuf_free (zms->wrbuf_1, 1);
- wrbuf_free (zms->wrbuf_2, 1);
nmem_destroy (zms->nmem);
xfree (zms);
}
(*zm)->type = ZEBRA_MAP_TYPE_INDEX;
(*zm)->completeness = 0;
(*zm)->positioned = 1;
- (*zm)->replace_tokens = 0;
}
else if (!yaz_matchstr (argv[0], "sort") && argc == 2)
{
(*zm)->maptab = NULL;
(*zm)->completeness = 0;
(*zm)->positioned = 0;
- (*zm)->replace_tokens = 0;
}
else if (zm && !yaz_matchstr (argv[0], "charmap") && argc == 2)
{
if ((*zm)->type == ZEBRA_MAP_TYPE_SORT)
(*zm)->u.sort.entry_size = atoi (argv[1]);
}
- else if (zm && !yaz_matchstr (argv[0], "replace") && argc >= 2)
- {
- struct zm_token *token = nmem_malloc (zms->nmem, sizeof(*token));
- token->next = (*zm)->replace_tokens;
- (*zm)->replace_tokens = token;
-#if 0
- yaz_log (YLOG_LOG, "replace %s", argv[1]);
-#endif
- token->token_from = 0;
- if (argc >= 2)
- {
- char *cp = argv[1];
- int *dp = token->token_from = (int *)
- nmem_malloc (zms->nmem, (1+strlen(cp))*sizeof(int));
- while (*cp)
- if (*cp == '$')
- {
- *dp++ = ' ';
- cp++;
- }
- else if (*cp == '.')
- {
- *dp++ = ZEBRA_REPLACE_ANY;
- cp++;
- }
- else
- {
- *dp++ = zebra_prim(&cp);
-#if 0
- yaz_log (YLOG_LOG, " char %2X %c", dp[-1], dp[-1]);
-#endif
- }
- *dp = '\0';
- }
- if (argc >= 3)
- {
- char *cp = argv[2];
- char *dp = token->token_to =
- nmem_malloc (zms->nmem, strlen(cp)+1);
- while (*cp)
- if (*cp == '$')
- {
- *dp++ = ' ';
- cp++;
- }
- else
- *dp++ = zebra_prim(&cp);
- *dp = '\0';
- }
- else
- token->token_to = 0;
- }
}
if (zm)
(*zm)->next = NULL;
zebra_map_read (zms, "default.idx");
zms->wrbuf_1 = wrbuf_alloc();
- zms->wrbuf_2 = wrbuf_alloc();
return zms;
}
zm->type = ZEBRA_MAP_TYPE_INDEX;
zm->completeness = 0;
zm->next = zms->map_list;
- zm->replace_tokens = 0;
zms->map_list = zm->next;
zms->lookup_array[zm->reg_id & 255] = zm;
return 0;
}
-int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list,
- const char *input_str, int input_len, WRBUF wrbuf);
-
WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list,
const char *input_str, int input_len)
{
- struct zebra_map *zm = zebra_map_get (zms, reg_id);
-
wrbuf_rewind(zms->wrbuf_1);
wrbuf_write(zms->wrbuf_1, input_str, input_len);
- if (!zm || !zm->replace_tokens)
- return zms->wrbuf_1;
-
-#if 0
- yaz_log (YLOG_LOG, "in:%.*s:", wrbuf_len(zms->wrbuf_1),
- wrbuf_buf(zms->wrbuf_1));
-#endif
- for (;;)
- {
- if (!zebra_replace_sub(zms, reg_id, ex_list, wrbuf_buf(zms->wrbuf_1),
- wrbuf_len(zms->wrbuf_1), zms->wrbuf_2))
- return zms->wrbuf_2;
- if (!zebra_replace_sub(zms, reg_id, ex_list, wrbuf_buf(zms->wrbuf_2),
- wrbuf_len(zms->wrbuf_2), zms->wrbuf_1))
- return zms->wrbuf_1;
- }
- return 0;
+ return zms->wrbuf_1;
}
-int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list,
- const char *input_str, int input_len, WRBUF wrbuf)
-{
- int i = -1;
- int no_replaces = 0;
- struct zebra_map *zm = zebra_map_get (zms, reg_id);
-
- wrbuf_rewind(wrbuf);
- for (i = -1; i <= input_len; )
- {
- struct zm_token *token;
- char replace_string[128];
- int replace_out = 0;
- int replace_in = 0;
-
- for (token = zm->replace_tokens; !replace_in && token;
- token = token->next)
- {
- int j = 0;
- int replace_done = 0;
- replace_out = 0;
- for (;; j++)
- {
- int c;
- if (!token->token_from[j])
- {
- replace_in = j;
- break;
- }
- if (ex_list && strchr (ex_list, token->token_from[j]))
- break;
- if (i+j < 0 || j+i >= input_len)
- c = ' ';
- else
- c = input_str[j+i] & 255;
- if (token->token_from[j] == ZEBRA_REPLACE_ANY)
- {
- if (c == ' ')
- break;
- replace_string[replace_out++] = c;
- }
- else
- {
- if (c != token->token_from[j])
- {
- break;
- }
- if (!replace_done)
- {
- const char *cp = token->token_to;
- replace_done = 1;
- for (; cp && *cp; cp++)
- replace_string[replace_out++] = *cp;
- }
- }
- }
- }
- if (!replace_in)
- {
- if (i >= 0 && i < input_len)
- wrbuf_putc(wrbuf, input_str[i]);
- i++;
- }
- else
- {
- no_replaces++;
- if (replace_out)
- wrbuf_write(wrbuf, replace_string, replace_out);
- i += replace_in;
- }
- }
-#if 0
- yaz_log (YLOG_LOG, "out:%.*s:", wrbuf_len(wrbuf), wrbuf_buf(wrbuf));
-#endif
- return no_replaces;
-}