From 25aa9d21203318cca1927f906ab4f7370e43a5fe Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 22 Jan 2007 18:15:02 +0000 Subject: [PATCH] Staticrank indexing is now an index register type defined in default.idx via directive 'staticrank'. The 'staticrank' directive for grs is no longer supported (was only implemented for Zebra 2.0.8). --- NEWS | 4 ++++ data1/d1_absyn.c | 20 +------------------ include/d1_absyn.h | 3 +-- include/zebramap.h | 8 +++++++- index/extract.c | 28 ++++++++++++++++++++++---- index/recgrs.c | 25 +----------------------- tab/default.idx | 4 +++- util/zebramap.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++--- 8 files changed, 93 insertions(+), 54 deletions(-) diff --git a/NEWS b/NEWS index f2ce8cf..b44ef7a 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Staticrank indexing is now an index register type defined in default.idx +via directive 'staticrank'. The 'staticrank' directive for grs is no longer +supported (was only implemented for Zebra 2.0.8). + For searches, allow truncmax value to be controlled with attribute 13. If given, that overrides the value of 'truncmax'. diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index 4bf44b1..4af2713 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.c,v 1.33 2007-01-15 15:10:14 adam Exp $ +/* $Id: d1_absyn.c,v 1.34 2007-01-22 18:15:02 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -703,7 +703,6 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, res->reference = VAL_NONE; res->tagset = 0; res->encoding = 0; - res->staticrank = 0; res->xpath_indexing = (f ? DATA1_XPATH_INDEXING_DISABLE : default_xpath); res->systags = 0; @@ -1183,18 +1182,6 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, (*systagsp)->value = nmem_strdup(data1_nmem_get(dh), argv[2]); systagsp = &(*systagsp)->next; } - else if (!strcmp(cmd, "staticrank")) - { - if (argc != 2) - { - yaz_log(YLOG_WARN, "%s:%d: Bad # or args for staticrank", - file, lineno); - } - else - { - res->staticrank = nmem_strdup(data1_nmem_get(dh), argv[1]); - } - } else { yaz_log(YLOG_WARN, "%s:%d: Unknown directive '%s'", file, @@ -1216,11 +1203,6 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, return res; } -YAZ_EXPORT const char *data1_absyn_get_staticrank(data1_absyn *absyn) -{ - return absyn ? absyn->staticrank : 0; -} - /* * Local variables: * c-basic-offset: 4 diff --git a/include/d1_absyn.h b/include/d1_absyn.h index 2c8a581..ff85ba6 100644 --- a/include/d1_absyn.h +++ b/include/d1_absyn.h @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.h,v 1.10 2007-01-15 20:08:24 adam Exp $ +/* $Id: d1_absyn.h,v 1.11 2007-01-22 18:15:03 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -61,7 +61,6 @@ struct data1_absyn struct data1_xpelement *xp_elements; /* pop */ struct data1_systag *systags; char *encoding; - char *staticrank; enum DATA1_XPATH_INDEXING xpath_indexing; }; diff --git a/include/zebramap.h b/include/zebramap.h index d62c70c..dc244ae 100644 --- a/include/zebramap.h +++ b/include/zebramap.h @@ -1,4 +1,4 @@ -/* $Id: zebramap.h,v 1.21 2007-01-15 20:08:24 adam Exp $ +/* $Id: zebramap.h,v 1.22 2007-01-22 18:15:03 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -66,6 +66,12 @@ YAZ_EXPORT int zebra_maps_is_sort (ZebraMaps zms, unsigned reg_id); YAZ_EXPORT +int zebra_maps_is_index (ZebraMaps zms, unsigned reg_id); + +YAZ_EXPORT +int zebra_maps_is_staticrank (ZebraMaps zms, unsigned reg_id); + +YAZ_EXPORT int zebra_maps_is_alwaysmatches (ZebraMaps zms, unsigned reg_id); YAZ_EXPORT diff --git a/index/extract.c b/index/extract.c index 526a05f..fb576a2 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.247 2007-01-15 15:10:16 adam Exp $ +/* $Id: extract.c,v 1.248 2007-01-22 18:15:03 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -1158,6 +1158,20 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length) zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } +static void extract_add_staticrank_string(RecWord *p, + const char *str, int length) +{ + char valz[40]; + struct recExtractCtrl *ctrl = p->extractCtrl; + + if (length > sizeof(valz)-1) + length = sizeof(valz)-1; + + memcpy(valz, str, length); + valz[length] = '\0'; + ctrl->staticrank = atozint(valz); +} + static void extract_add_string(RecWord *p, const char *string, int length) { ZebraHandle zh = p->extractCtrl->handle; @@ -1166,9 +1180,7 @@ static void extract_add_string(RecWord *p, const char *string, int length) if (!p->index_name) return; - if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) - extract_add_sort_string(p, string, length); - else + if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type)) { extract_add_index_string(p, zinfo_index_category_index, string, length); @@ -1182,6 +1194,14 @@ static void extract_add_string(RecWord *p, const char *string, int length) &word, zinfo_index_category_alwaysmatches, "", 0); } } + else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) + { + extract_add_sort_string(p, string, length); + } + else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type)) + { + extract_add_staticrank_string(p, string, length); + } } static void extract_add_incomplete_field(RecWord *p) diff --git a/index/recgrs.c b/index/recgrs.c index c85b635..d665864 100644 --- a/index/recgrs.c +++ b/index/recgrs.c @@ -1,4 +1,4 @@ -/* $Id: recgrs.c,v 1.13 2007-01-15 15:10:17 adam Exp $ +/* $Id: recgrs.c,v 1.14 2007-01-22 18:15:03 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -525,25 +525,6 @@ static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n) } -static void index_staticrank(struct recExtractCtrl *p, - RecWord *wrd, - data1_absyn *absyn) -{ - const char *staticrank_index = data1_absyn_get_staticrank(absyn); - - if (staticrank_index && !strcmp(wrd->index_name, staticrank_index)) - { - char valz[20]; - size_t len = wrd->term_len; - - if (len > sizeof(valz)-1) - len = sizeof(valz)-1; - memcpy(valz, wrd->term_buf, len); - valz[len] = '\0'; - p->staticrank = atozint(valz); - } -} - static void index_xpath(struct source_parser *sp, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd, @@ -607,7 +588,6 @@ static void index_xpath(struct source_parser *sp, data1_node *n, else { (*p->tokenAdd)(&wrd_tl); - index_staticrank(p, &wrd_tl, n->root->u.root.absyn); } if (wrd_tl.seqno > max_seqno) max_seqno = wrd_tl.seqno; @@ -724,8 +704,6 @@ static void index_xpath(struct source_parser *sp, data1_node *n, wrd->term_buf = xp->value; wrd->term_len = strlen(xp->value); (*p->tokenAdd)(wrd); - index_staticrank(p, wrd, - n->root->u.root.absyn); } } } @@ -793,7 +771,6 @@ static void index_termlist (struct source_parser *sp, data1_node *par, { wrd->index_type = *tlist->structure; wrd->index_name = tlist->index_name; - index_staticrank(p, wrd, n->root->u.root.absyn); (*p->tokenAdd)(wrd); } } diff --git a/tab/default.idx b/tab/default.idx index 96f128f..a2b64bf 100644 --- a/tab/default.idx +++ b/tab/default.idx @@ -1,5 +1,5 @@ # Zebra indexes as referred to from the *.abs-files. -# $Id: default.idx,v 1.14 2006-09-08 14:40:56 adam Exp $ +# $Id: default.idx,v 1.15 2007-01-22 18:15:04 adam Exp $ # # Traditional word index @@ -55,3 +55,5 @@ sort s completeness 1 charmap string.chr +# Staticrank (uncomment to enable) +#staticrank r diff --git a/util/zebramap.c b/util/zebramap.c index 2a836b0..dfec14c 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,4 +1,4 @@ -/* $Id: zebramap.c,v 1.55 2007-01-15 15:10:26 adam Exp $ +/* $Id: zebramap.c,v 1.56 2007-01-22 18:15:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -32,6 +32,7 @@ #define ZEBRA_MAP_TYPE_SORT 1 #define ZEBRA_MAP_TYPE_INDEX 2 +#define ZEBRA_MAP_TYPE_STATICRANK 3 #define ZEBRA_REPLACE_ANY 300 @@ -147,6 +148,23 @@ ZEBRA_RES zebra_maps_read_file(ZebraMaps zms, const char *fname) (*zm)->first_in_field = 0; zms->no_maps++; } + else if (!yaz_matchstr(argv[0], "staticrank")) + { + if (!zm) + zm = &zms->map_list; + else + zm = &(*zm)->next; + *zm = (struct zebra_map *) nmem_malloc(zms->nmem, sizeof(**zm)); + (*zm)->reg_id = argv[1][0]; + (*zm)->maptab_name = NULL; + (*zm)->type = ZEBRA_MAP_TYPE_STATICRANK; + (*zm)->maptab = NULL; + (*zm)->completeness = 1; + (*zm)->positioned = 0; + (*zm)->alwaysmatches = 0; + (*zm)->first_in_field = 0; + zms->no_maps++; + } else if (!zm) { yaz_log(YLOG_WARN, "%s:%d: Missing sort/index before '%s'", @@ -155,7 +173,15 @@ ZEBRA_RES zebra_maps_read_file(ZebraMaps zms, const char *fname) } else if (!yaz_matchstr(argv[0], "charmap") && argc == 2) { - (*zm)->maptab_name = nmem_strdup(zms->nmem, argv[1]); + if ((*zm)->type != ZEBRA_MAP_TYPE_STATICRANK) + (*zm)->maptab_name = nmem_strdup(zms->nmem, argv[1]); + else + { + yaz_log(YLOG_WARN|YLOG_FATAL, "%s:%d: charmap for " + "staticrank is invalid", fname, lineno); + yaz_log(YLOG_LOG, "Type is %d", (*zm)->type); + failures++; + } } else if (!yaz_matchstr(argv[0], "completeness") && argc == 2) { @@ -167,7 +193,14 @@ ZEBRA_RES zebra_maps_read_file(ZebraMaps zms, const char *fname) } else if (!yaz_matchstr(argv[0], "alwaysmatches") && argc == 2) { - (*zm)->alwaysmatches = atoi(argv[1]); + if ((*zm)->type != ZEBRA_MAP_TYPE_STATICRANK) + (*zm)->alwaysmatches = atoi(argv[1]); + else + { + yaz_log(YLOG_WARN|YLOG_FATAL, "%s:%d: alwaysmatches for " + "staticrank is invalid", fname, lineno); + failures++; + } } else if (!yaz_matchstr(argv[0], "firstinfield") && argc == 2) { @@ -336,6 +369,22 @@ int zebra_maps_is_positioned(ZebraMaps zms, unsigned reg_id) return zm->positioned; return 0; } + +int zebra_maps_is_index(ZebraMaps zms, unsigned reg_id) +{ + struct zebra_map *zm = zebra_map_get(zms, reg_id); + if (zm) + return zm->type == ZEBRA_MAP_TYPE_INDEX; + return 0; +} + +int zebra_maps_is_staticrank(ZebraMaps zms, unsigned reg_id) +{ + struct zebra_map *zm = zebra_map_get(zms, reg_id); + if (zm) + return zm->type == ZEBRA_MAP_TYPE_STATICRANK; + return 0; +} int zebra_maps_is_sort(ZebraMaps zms, unsigned reg_id) { -- 1.7.10.4