From 161aa7805930f6b91ccea027e4afa4ccce41c379 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 18 Dec 2006 23:40:06 +0000 Subject: [PATCH] Implemented sorting via the ISAMB system. To enable, use sortindex:b in zebra.cfg. The sort keys can also be fetched using zebra::sort:field . --- NEWS | 3 + examples/gils/zebra.cfg | 4 +- include/idzebra/isamb.h | 13 +- include/sortidx.h | 20 ++- index/check_res.c | 3 +- index/extract.c | 18 ++- index/index.h | 4 +- index/retrieve.c | 141 +++++++++++++++--- index/sortidx.c | 379 ++++++++++++++++++++++++++++++++++++++++------- index/zebraapi.c | 24 ++- index/zsets.c | 8 +- isamb/isamb.c | 79 +++++++--- 12 files changed, 583 insertions(+), 113 deletions(-) diff --git a/NEWS b/NEWS index e7c16ba..ffcb297 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +Implemented sorting via the ISAMB system. To enable, use sortindex:b +in zebra.cfg. The sort keys can also be fetched using zebra::sort:field . + Added support for specification of approximative limits for whole query. This is specified as attribute type 12. Semantics is the same as estimatehits in zebra.cfg. diff --git a/examples/gils/zebra.cfg b/examples/gils/zebra.cfg index 7fc2886..4cb52d4 100644 --- a/examples/gils/zebra.cfg +++ b/examples/gils/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.14 2006-08-22 13:39:23 adam Exp $ +# $Id: zebra.cfg,v 1.15 2006-12-18 23:40:06 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: .:../../tab @@ -20,6 +20,8 @@ recordId: (bib-1,title) storedata: 1 +sortindex: i + modulePath: ../../index/.libs #shadow: shadow:100M # register: register:100M diff --git a/include/idzebra/isamb.h b/include/idzebra/isamb.h index 183e13b..d42a1ef 100644 --- a/include/idzebra/isamb.h +++ b/include/idzebra/isamb.h @@ -1,4 +1,4 @@ -/* $Id: isamb.h,v 1.8 2006-12-12 13:46:42 adam Exp $ +/* $Id: isamb.h,v 1.9 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -36,6 +36,10 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache); YAZ_EXPORT +ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, + int cache, int no_cat, int *sizes, int use_root_ptr); + +YAZ_EXPORT void isamb_close(ISAMB isamb); YAZ_EXPORT @@ -84,6 +88,13 @@ void isamb_set_int_count(ISAMB b, int v); YAZ_EXPORT void isamb_set_cache_size(ISAMB b, int sz); +YAZ_EXPORT +zint isamb_get_root_ptr(ISAMB b); + +YAZ_EXPORT +void isamb_set_root_ptr(ISAMB b, zint root_ptr); + + YAZ_END_CDECL #endif diff --git a/include/sortidx.h b/include/sortidx.h index 6ee16c4..cf3ae64 100644 --- a/include/sortidx.h +++ b/include/sortidx.h @@ -1,4 +1,4 @@ -/* $Id: sortidx.h,v 1.11 2006-11-21 22:17:49 adam Exp $ +/* $Id: sortidx.h,v 1.12 2006-12-18 23:40:06 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -31,14 +31,18 @@ YAZ_BEGIN_CDECL #define SORT_IDX_ENTRYSIZE 64 -typedef struct sortIdx *SortIdx; +typedef struct zebra_sort_index *zebra_sort_index_t; -SortIdx sortIdx_open(BFiles bfs, int write_flag); -void sortIdx_close(SortIdx si); -int sortIdx_type(SortIdx si, int type); -void sortIdx_sysno(SortIdx si, zint sysno); -void sortIdx_add(SortIdx si, const char *buf, int len); -void sortIdx_read(SortIdx si, char *buf); +#define ZEBRA_SORT_TYPE_FLAT 1 +#define ZEBRA_SORT_TYPE_ISAMB 2 + +zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int sort_type); +void zebra_sort_close(zebra_sort_index_t si); +int zebra_sort_type(zebra_sort_index_t si, int type); +void zebra_sort_sysno(zebra_sort_index_t si, zint sysno); +void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len); +void zebra_sort_delete(zebra_sort_index_t si); +void zebra_sort_read(zebra_sort_index_t si, char *buf); YAZ_END_CDECL diff --git a/index/check_res.c b/index/check_res.c index 0e7091d..33def9d 100644 --- a/index/check_res.c +++ b/index/check_res.c @@ -1,4 +1,4 @@ -/* $Id: check_res.c,v 1.2 2006-12-06 10:26:54 adam Exp $ +/* $Id: check_res.c,v 1.3 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -57,6 +57,7 @@ int zebra_check_res(Res res) res_add(v, "shadow", ""); res_add(v, "segment", ""); res_add(v, "setTmpDir", ""); + res_add(v, "sortindex", ""); res_add(v, "staticrank", ""); res_add(v, "threads", ""); res_add(v, "database", "p"); diff --git a/index/extract.c b/index/extract.c index 665640e..614c0f1 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.245 2006-12-11 17:08:03 adam Exp $ +/* $Id: extract.c,v 1.246 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -1361,24 +1361,30 @@ static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid) void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t reckeys) { +#if 0 + yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT, + cmd, sysno); + extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG); +#endif + if (zebra_rec_keys_rewind(reckeys)) { - SortIdx sortIdx = zh->reg->sortIdx; + zebra_sort_index_t si = zh->reg->sort_index; size_t slen; const char *str; struct it_key key_in; - sortIdx_sysno (sortIdx, sysno); + zebra_sort_sysno(si, sysno); while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - sortIdx_type(sortIdx, ord); + zebra_sort_type(si, ord); if (cmd == 1) - sortIdx_add(sortIdx, str, slen); + zebra_sort_add(si, str, slen); else - sortIdx_add(sortIdx, "", 1); + zebra_sort_delete(si); } } } diff --git a/index/index.h b/index/index.h index 5e6cd48..46ebddf 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.186 2006-12-05 14:06:29 adam Exp $ +/* $Id: index.h,v 1.187 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -134,7 +134,7 @@ struct zebra_register { ISAMB isamb; Dict dict; Dict matchDict; - SortIdx sortIdx; + zebra_sort_index_t sort_index; int registerState; /* 0 (no commit pages), 1 (use commit pages) */ time_t registerChange; BFiles bfs; diff --git a/index/retrieve.c b/index/retrieve.c index d75e937..1afe24f 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.60 2006-11-29 09:01:53 marc Exp $ +/* $Id: retrieve.c,v 1.61 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -75,8 +75,8 @@ static int zebra_create_record_stream(ZebraHandle zh, static int parse_zebra_elem(const char *elem, - const char **index, size_t *index_len, - const char **type, size_t *type_len) + const char **index, size_t *index_len, + const char **type, size_t *type_len) { *index = 0; *index_len = 0; @@ -119,6 +119,103 @@ static int parse_zebra_elem(const char *elem, } +int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr, + const char *elemsetname, + oid_value input_format, + oid_value *output_format, + char **rec_bufp, int *rec_lenp) +{ + const char *retrieval_index; + size_t retrieval_index_len; + const char *retrieval_type; + size_t retrieval_type_len; + char retrieval_index_cstr[256]; + int ord; + + /* only accept XML and SUTRS requests */ + if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS) + { + yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", + elemsetname); + *output_format = VAL_NONE; + return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST; + } + + if (!parse_zebra_elem(elemsetname, + &retrieval_index, &retrieval_index_len, + &retrieval_type, &retrieval_type_len)) + { + return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + } + + if (retrieval_type_len != 0 && retrieval_type_len != 1) + { + return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + } + if (!retrieval_index_len || + retrieval_index_len >= sizeof(retrieval_index_cstr)-1) + { + return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + } + + memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len); + retrieval_index_cstr[retrieval_index_len] = '\0'; + + ord = zebraExplain_lookup_attr_str(zh->reg->zei, + zinfo_index_category_sort, + (retrieval_type_len == 0 ? -1 : + retrieval_type[0]), + retrieval_index_cstr); + if (ord == -1) + return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + else + { + char dst_buf[IT_MAX_WORD]; + char str[IT_MAX_WORD]; + int index_type; + const char *db = 0; + const char *string_index = 0; + WRBUF wrbuf = wrbuf_alloc(); + + zebra_sort_sysno(zh->reg->sort_index, sysno); + zebra_sort_type(zh->reg->sort_index, ord); + zebra_sort_read(zh->reg->sort_index, str); + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index); + + zebra_term_untrans(zh, index_type, dst_buf, str); + + + if (input_format == VAL_TEXT_XML) + { + *output_format = VAL_TEXT_XML; + wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR + " sysno=\"" ZINT_FORMAT "\"" + " set=\"zebra::sort%s/\">\n", + sysno, elemsetname); + + wrbuf_printf(wrbuf, " ", index_type); + wrbuf_xmlputs(wrbuf, dst_buf); + wrbuf_printf(wrbuf, "\n"); + wrbuf_printf(wrbuf, "\n"); + } + else if (input_format == VAL_SUTRS) + { + *output_format = VAL_SUTRS; + + wrbuf_printf(wrbuf, "%s %c %s\n", string_index, index_type, + dst_buf); + } + *rec_lenp = wrbuf_len(wrbuf); + *rec_bufp = odr_malloc(odr, *rec_lenp); + memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp); + wrbuf_free(wrbuf, 1); + return 0; + } +} + int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, Record rec, const char *elemsetname, @@ -137,8 +234,8 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, /* *rec_lenp = 0; */ /* only accept XML and SUTRS requests */ - if (input_format != VAL_TEXT_XML - && input_format != VAL_SUTRS){ + if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS) + { yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", elemsetname); *output_format = VAL_NONE; @@ -159,7 +256,7 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, { char retrieval_index_cstr[256]; - if (retrieval_index_len < sizeof(retrieval_index_cstr) -1) + if (retrieval_index_len < sizeof(retrieval_index_cstr) -1) { memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len); retrieval_index_cstr[retrieval_index_len] = '\0'; @@ -194,7 +291,8 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, else if (input_format == VAL_SUTRS) *output_format = VAL_SUTRS; - while(zebra_rec_keys_read(keys, &str, &slen, &key_in)){ + while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) + { int i; int ord = CAST_ZINT_TO_INT(key_in.mem[0]); int index_type; @@ -212,17 +310,16 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, if (retrieval_index == 0 || (string_index_len == retrieval_index_len && !memcmp(string_index, retrieval_index, - string_index_len))){ - + string_index_len))) + { /* process only if type is not defined, or is matching */ if (retrieval_type == 0 || (retrieval_type_len == 1 - && retrieval_type[0] == index_type)){ - - + && retrieval_type[0] == index_type)) + { zebra_term_untrans(zh, index_type, dst_buf, str); - if (strlen(dst_buf)){ - + if (strlen(dst_buf)) + { if (input_format == VAL_TEXT_XML){ wrbuf_printf(wrbuf, " \n"); - } + } *rec_lenp = wrbuf_len(wrbuf); *rec_bufp = odr_malloc(odr, *rec_lenp); memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp); @@ -335,6 +432,16 @@ int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr, return ret; } + /* processing special elementsetnames zebra::sort:: */ + if (elemsetname && 0 == strncmp(elemsetname, "sort", 4)) + { + return zebra_special_sort_fetch(zh, sysno, odr, + elemsetname + 4, + input_format, output_format, + rec_bufp, rec_lenp); + } + + /* fetching binary record up for all other display elementsets */ rec = rec_get(zh->reg->records, sysno); if (!rec) @@ -424,8 +531,8 @@ int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr, } /* processing special elementsetnames zebra::index:: */ - if (elemsetname && 0 == strncmp(elemsetname, "index", 5)){ - + if (elemsetname && 0 == strncmp(elemsetname, "index", 5)) + { int ret = zebra_special_index_fetch(zh, sysno, odr, rec, elemsetname + 5, input_format, output_format, diff --git a/index/sortidx.c b/index/sortidx.c index 08b16e6..464bfaa 100644 --- a/index/sortidx.c +++ b/index/sortidx.c @@ -1,4 +1,4 @@ -/* $Id: sortidx.c,v 1.19 2006-11-21 22:17:49 adam Exp $ +/* $Id: sortidx.c,v 1.20 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -26,126 +26,405 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include #include #include #include "recindex.h" +#define SORT_MAX_TERM 127 + #define SORT_IDX_BLOCKSIZE 64 +struct sort_term { + zint sysno; + char term[SORT_MAX_TERM]; +}; + + +static void sort_term_log_item(int level, const void *b, const char *txt) +{ + struct sort_term a1; + + memcpy(&a1, b, sizeof(a1)); + + yaz_log(level, "%s " ZINT_FORMAT " %s", txt, a1.sysno, a1.term); +} + +int sort_term_compare(const void *a, const void *b) +{ + struct sort_term a1, b1; + + memcpy(&a1, a, sizeof(a1)); + memcpy(&b1, b, sizeof(b1)); + + if (a1.sysno > b1.sysno) + return 1; + else if (a1.sysno < b1.sysno) + return -1; + return 0; +} + +void *sort_term_code_start(void) +{ + return 0; +} + +void sort_term_encode(void *p, char **dst, const char **src) +{ + struct sort_term a1; + + memcpy(&a1, *src, sizeof(a1)); + *src += sizeof(a1); + + zebra_zint_encode(dst, a1.sysno); /* encode record id */ + strcpy(*dst, a1.term); /* then sort term, 0 terminated */ + *dst += strlen(a1.term) + 1; +} + +void sort_term_decode(void *p, char **dst, const char **src) +{ + struct sort_term a1; + + zebra_zint_decode(src, &a1.sysno); + + strcpy(a1.term, *src); + *src += strlen(a1.term) + 1; + + memcpy(*dst, &a1, sizeof(a1)); + *dst += sizeof(a1); +} + +void sort_term_code_reset(void *p) +{ +} + +void sort_term_code_stop(void *p) +{ +} + + +struct sort_term_stream { + int no; + int insert_flag; + struct sort_term st; +}; + +int sort_term_code_read(void *vp, char **dst, int *insertMode) +{ + struct sort_term_stream *s = (struct sort_term_stream *) vp; + + if (s->no == 0) + return 0; + + (s->no)--; + + *insertMode = s->insert_flag; + memcpy(*dst, &s->st, sizeof(s->st)); + *dst += sizeof(s->st); + return 1; +} + + struct sortFileHead { zint sysno_max; }; struct sortFile { - int type; - BFile bf; + int id; + union { + BFile bf; + ISAMB isamb; + } u; + ISAM_P isam_p; + ISAMB_PP isam_pp; struct sortFile *next; struct sortFileHead head; + int no_inserted; + int no_deleted; }; -struct sortIdx { +struct zebra_sort_index { BFiles bfs; int write_flag; zint sysno; + int type; char *entry_buf; struct sortFile *current_file; struct sortFile *files; }; -SortIdx sortIdx_open (BFiles bfs, int write_flag) +zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type) { - SortIdx si = (SortIdx) xmalloc (sizeof(*si)); + zebra_sort_index_t si = (zebra_sort_index_t) xmalloc(sizeof(*si)); si->bfs = bfs; si->write_flag = write_flag; si->current_file = NULL; si->files = NULL; - si->entry_buf = (char *) xmalloc (SORT_IDX_ENTRYSIZE); + si->type = type; + si->entry_buf = (char *) xmalloc(SORT_IDX_ENTRYSIZE); return si; } -void sortIdx_close (SortIdx si) +void zebra_sort_close(zebra_sort_index_t si) { struct sortFile *sf = si->files; while (sf) { struct sortFile *sf_next = sf->next; - if (sf->bf) - bf_close (sf->bf); - xfree (sf); + switch(si->type) + { + case ZEBRA_SORT_TYPE_FLAT: + bf_close(sf->u.bf); + break; + case ZEBRA_SORT_TYPE_ISAMB: + if (sf->isam_pp) + isamb_pp_close(sf->isam_pp); + isamb_set_root_ptr(sf->u.isamb, sf->isam_p); + isamb_close(sf->u.isamb); + break; + } + xfree(sf); sf = sf_next; } - xfree (si->entry_buf); - xfree (si); + xfree(si->entry_buf); + xfree(si); } -int sortIdx_type (SortIdx si, int type) +int zebra_sort_type(zebra_sort_index_t si, int id) { + int isam_block_size = 4096; + ISAMC_M method; char fname[80]; struct sortFile *sf; - if (si->current_file && si->current_file->type == type) + if (si->current_file && si->current_file->id == id) return 0; for (sf = si->files; sf; sf = sf->next) - if (sf->type == type) + if (sf->id == id) { si->current_file = sf; return 0; } - sf = (struct sortFile *) xmalloc (sizeof(*sf)); - sf->type = type; - sf->bf = NULL; - sprintf (fname, "sort%d", type); - yaz_log (YLOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag); - sf->bf = bf_open (si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag); - if (!sf->bf) - { - xfree (sf); - return -1; - } - if (!bf_read (sf->bf, 0, 0, sizeof(sf->head), &sf->head)) + sf = (struct sortFile *) xmalloc(sizeof(*sf)); + sf->id = id; + + method.compare_item = sort_term_compare; + method.log_item = sort_term_log_item; + method.codec.start = sort_term_code_start; + method.codec.encode = sort_term_encode; + method.codec.decode = sort_term_decode; + method.codec.reset = sort_term_code_reset; + method.codec.stop = sort_term_code_stop; + + switch(si->type) { - sf->head.sysno_max = 0; - if (!si->write_flag) + case ZEBRA_SORT_TYPE_FLAT: + sf->u.bf = NULL; + sprintf(fname, "sort%d", id); + yaz_log(YLOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag); + sf->u.bf = bf_open(si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag); + if (!sf->u.bf) + { + xfree(sf); + return -1; + } + if (!bf_read(sf->u.bf, 0, 0, sizeof(sf->head), &sf->head)) + { + sf->head.sysno_max = 0; + if (!si->write_flag) + { + bf_close(sf->u.bf); + xfree(sf); + return -1; + } + } + break; + case ZEBRA_SORT_TYPE_ISAMB: + sprintf(fname, "sortb%d", id); + + sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method, + /* cache */ 0, + /* no_cat */ 1, &isam_block_size, + /* use_root_ptr */ 1); + if (!sf->u.isamb) + { + xfree(sf); + return -1; + } + else { - bf_close (sf->bf); - xfree (sf); - return -1; + sf->isam_p = isamb_get_root_ptr(sf->u.isamb); + sf->isam_pp = 0; } + break; } + sf->no_inserted = 0; + sf->no_deleted = 0; sf->next = si->files; si->current_file = si->files = sf; return 0; } -void sortIdx_sysno(SortIdx si, zint sysno) +void zebra_sort_sysno(zebra_sort_index_t si, zint sysno) { - si->sysno = rec_sysno_to_int(sysno); + struct sortFile *sf = si->current_file; + zint new_sysno = rec_sysno_to_int(sysno); + + for (sf = si->files; sf; sf = sf->next) + { + sf->no_inserted = 0; + sf->no_deleted = 0; + if (new_sysno < si->sysno && sf->isam_pp) + { + isamb_pp_close(sf->isam_pp); + sf->isam_pp = 0; + } + } + si->sysno = new_sysno; } -void sortIdx_add(SortIdx si, const char *buf, int len) + +void zebra_sort_delete(zebra_sort_index_t si) { - if (!si->current_file || !si->current_file->bf) - return; - if (len > SORT_IDX_ENTRYSIZE) + struct sortFile *sf = si->current_file; + + if (!sf || !sf->u.bf) + return; + switch(si->type) { - len = SORT_IDX_ENTRYSIZE; - memcpy (si->entry_buf, buf, len); + case ZEBRA_SORT_TYPE_FLAT: + zebra_sort_add(si, "", 0); + break; + case ZEBRA_SORT_TYPE_ISAMB: + assert(sf->u.isamb); + if (sf->no_deleted == 0) + { + struct sort_term_stream s; + ISAMC_I isamc_i; + + s.st.sysno = si->sysno; + s.st.term[0] = '\0'; + + s.no = 1; + s.insert_flag = 0; + isamc_i.clientData = &s; + isamc_i.read_item = sort_term_code_read; + + isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); + sf->no_deleted++; + } + break; } - else +} + +void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) +{ + struct sortFile *sf = si->current_file; + + if (!sf || !sf->u.bf) + return; + switch(si->type) { - memcpy (si->entry_buf, buf, len); - memset (si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); + case ZEBRA_SORT_TYPE_FLAT: + if (len > SORT_IDX_ENTRYSIZE) + { + len = SORT_IDX_ENTRYSIZE; + memcpy(si->entry_buf, buf, len); + } + else + { + memcpy(si->entry_buf, buf, len); + memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); + } + bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); + break; + case ZEBRA_SORT_TYPE_ISAMB: + assert(sf->u.isamb); + if (sf->no_inserted == 0) + { + struct sort_term_stream s; + ISAMC_I isamc_i; + + s.st.sysno = si->sysno; + if (len >= SORT_MAX_TERM) + len = SORT_MAX_TERM-1; + memcpy(s.st.term, buf, len); + s.st.term[len] = '\0'; + s.no = 1; + s.insert_flag = 1; + isamc_i.clientData = &s; + isamc_i.read_item = sort_term_code_read; + + isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); + sf->no_inserted++; + } + break; } - bf_write (si->current_file->bf, si->sysno+1, 0, 0, si->entry_buf); } -void sortIdx_read (SortIdx si, char *buf) +void zebra_sort_read(zebra_sort_index_t si, char *buf) { int r; + struct sortFile *sf = si->current_file; + + assert(sf); + + switch(si->type) + { + case ZEBRA_SORT_TYPE_FLAT: + r = bf_read(sf->u.bf, si->sysno+1, 0, 0, buf); + if (!r) + memset(buf, 0, SORT_IDX_ENTRYSIZE); + break; + case ZEBRA_SORT_TYPE_ISAMB: + memset(buf, 0, SORT_IDX_ENTRYSIZE); + assert(sf->u.bf); + if (sf->u.bf) + { + struct sort_term st, st_untilbuf; + + st.sysno = 99999; + if (!sf->isam_pp) + { + yaz_log(YLOG_LOG, "isamb_pp_open " ZINT_FORMAT, sf->isam_p); + sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1); + } + if (!sf->isam_pp) + return; - assert(si->current_file); - r = bf_read (si->current_file->bf, si->sysno+1, 0, 0, buf); - if (!r) - memset (buf, 0, SORT_IDX_ENTRYSIZE); +#if 0 + while (1) + { + r = isamb_pp_read(sf->isam_pp, &st); + if (!r) + break; + if (st.sysno == si->sysno) + break; + yaz_log(YLOG_LOG, "Received sysno=" ZINT_FORMAT " looking for " + ZINT_FORMAT, st.sysno, si->sysno); + } +#else + st_untilbuf.sysno = si->sysno; + st_untilbuf.term[0] = '\0'; + r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf); + if (!r) + return; +#endif + if (r) + { + if (st.sysno != si->sysno) + { + yaz_log(YLOG_LOG, "Received sysno=" ZINT_FORMAT " looking for " + ZINT_FORMAT, st.sysno, si->sysno); + return; + } + if (strlen(st.term) < SORT_IDX_ENTRYSIZE) + strcpy(buf, st.term); + else + memcpy(buf, st.term, SORT_IDX_ENTRYSIZE); + } + } + break; + } } /* * Local variables: diff --git a/index/zebraapi.c b/index/zebraapi.c index 7b14661..9905413 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.237 2006-12-05 14:06:29 adam Exp $ +/* $Id: zebraapi.c,v 1.238 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -314,6 +314,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, const char *recordCompression = 0; const char *profilePath; char cwd[1024]; + int sort_type = ZEBRA_SORT_TYPE_FLAT; ZEBRA_RES ret = ZEBRA_OK; ASSERTZS; @@ -385,7 +386,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, reg->records = 0; reg->dict = 0; - reg->sortIdx = 0; + reg->sort_index = 0; reg->isams = 0; reg->matchDict = 0; reg->isamc = 0; @@ -427,9 +428,22 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, yaz_log (YLOG_WARN, "dict_open failed"); ret = ZEBRA_FAIL; } - if (!(reg->sortIdx = sortIdx_open (reg->bfs, rw))) + + + if (res_get_match (res, "sortindex", "f", "f")) + sort_type = ZEBRA_SORT_TYPE_FLAT; + else if (res_get_match (res, "sortindex", "i", "f")) + sort_type = ZEBRA_SORT_TYPE_ISAMB; + else + { + yaz_log (YLOG_WARN, "bad_value for 'sort:'"); + ret = ZEBRA_FAIL; + } + + + if (!(reg->sort_index = zebra_sort_open(reg->bfs, rw, sort_type))) { - yaz_log (YLOG_WARN, "sortIdx_open failed"); + yaz_log (YLOG_WARN, "zebra_sort_open failed"); ret = ZEBRA_FAIL; } if (res_get_match (res, "isam", "s", ISAM_DEFAULT)) @@ -540,7 +554,7 @@ static void zebra_register_close(ZebraService zs, struct zebra_register *reg) dict_close (reg->dict); if (reg->matchDict) dict_close (reg->matchDict); - sortIdx_close (reg->sortIdx); + zebra_sort_close(reg->sort_index); if (reg->isams) isams_close (reg->isams); if (reg->isamc) diff --git a/index/zsets.c b/index/zsets.c index 5c02dd4..a20056b 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.113 2006-11-30 10:33:19 adam Exp $ +/* $Id: zsets.c,v 1.114 2006-12-18 23:40:08 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -552,15 +552,15 @@ void resultSetInsertSort(ZebraHandle zh, ZebraSet sset, struct zset_sort_info *sort_info = sset->sort_info; int i, j; - sortIdx_sysno (zh->reg->sortIdx, sysno); + zebra_sort_sysno(zh->reg->sort_index, sysno); for (i = 0; ireg->sortIdx, criteria[i].ord); - sortIdx_read(zh->reg->sortIdx, this_entry_buf); + zebra_sort_type(zh->reg->sort_index, criteria[i].ord); + zebra_sort_read(zh->reg->sort_index, this_entry_buf); } } i = sort_info->num_entries; diff --git a/isamb/isamb.c b/isamb/isamb.c index 3e85ee1..7cd0ea9 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,4 +1,4 @@ -/* $Id: isamb.c,v 1.88 2006-12-12 13:46:41 adam Exp $ +/* $Id: isamb.c,v 1.89 2006-12-18 23:40:08 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -33,7 +33,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define ISAMB_MAJOR_VERSION 3 -#define ISAMB_MINOR_VERSION 0 +#define ISAMB_MINOR_VERSION_NO_ROOT 0 +#define ISAMB_MINOR_VERSION_WITH_ROOT 1 struct ISAMB_head { zint first_block; @@ -104,6 +105,8 @@ struct ISAMB_s { zint number_of_leaf_splits; int enable_int_count; /* whether we count nodes (or not) */ int cache_size; /* size of blocks to cache (if cache=1) */ + int minor_version; + zint root_ptr; }; struct ISAMB_block { @@ -195,16 +198,18 @@ void isamb_set_cache_size(ISAMB b, int v) b->cache_size = v; } -ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, - int cache) +ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, + int cache, int no_cat, int *sizes, int use_root_ptr) { ISAMB isamb = xmalloc(sizeof(*isamb)); - int i, b_size = ISAMB_MIN_SIZE; + int i; + + assert(no_cat <= CAT_MAX); isamb->bfs = bfs; isamb->method = (ISAMC_M *) xmalloc(sizeof(*method)); memcpy(isamb->method, method, sizeof(*method)); - isamb->no_cat = CAT_NO; + isamb->no_cat = no_cat; isamb->log_io = 0; isamb->log_freelist = 0; isamb->cache = cache; @@ -215,6 +220,13 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, isamb->enable_int_count = 1; isamb->cache_size = 40; + if (use_root_ptr) + isamb->minor_version = ISAMB_MINOR_VERSION_WITH_ROOT; + else + isamb->minor_version = ISAMB_MINOR_VERSION_NO_ROOT; + + isamb->root_ptr = 0; + for (i = 0; iskipped_nodes[i] = isamb->accessed_nodes[i] = 0; @@ -245,7 +257,7 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, isamb->file[i].bf = bf_open(bfs, fname, ISAMB_CACHE_ENTRY_SIZE, writeflag); else - isamb->file[i].bf = bf_open(bfs, fname, b_size, writeflag); + isamb->file[i].bf = bf_open(bfs, fname, sizes[i], writeflag); if (!isamb->file[i].bf) { @@ -254,12 +266,12 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, } /* fill-in default values (for empty isamb) */ - isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; + isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/sizes[i]+1; isamb->file[i].head.last_block = isamb->file[i].head.first_block; - isamb->file[i].head.block_size = b_size; - assert(b_size <= ISAMB_CACHE_ENTRY_SIZE); + isamb->file[i].head.block_size = sizes[i]; + assert(sizes[i] <= ISAMB_CACHE_ENTRY_SIZE); #if ISAMB_PTR_CODEC - if (i == isamb->no_cat-1 || b_size > 128) + if (i == isamb->no_cat-1 || sizes[i] > 128) isamb->file[i].head.block_offset = 8; else isamb->file[i].head.block_offset = 4; @@ -267,7 +279,7 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, isamb->file[i].head.block_offset = 11; #endif isamb->file[i].head.block_max = - b_size - isamb->file[i].head.block_offset; + sizes[i] - isamb->file[i].head.block_offset; isamb->file[i].head.free_list = 0; if (bf_read(isamb->file[i].bf, 0, 0, 0, hbuf)) { @@ -292,10 +304,10 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, fname, major, ISAMB_MAJOR_VERSION); return 0; } - for (left = len - b_size; left > 0; left = left - b_size) + for (left = len - sizes[i]; left > 0; left = left - sizes[i]) { pos++; - if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size)) + if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*sizes[i])) { yaz_log(YLOG_WARN, "truncated isamb header for " "file=%s len=%d pos=%d", @@ -311,11 +323,12 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, decode_ptr(&src, &zint_tmp); isamb->file[i].head.block_max = (int) zint_tmp; decode_ptr(&src, &isamb->file[i].head.free_list); + if (isamb->minor_version >= ISAMB_MINOR_VERSION_WITH_ROOT) + decode_ptr(&src, &isamb->root_ptr); } assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset); isamb->file[i].head_dirty = 0; - assert(isamb->file[i].head.block_size == b_size); - b_size = b_size * ISAMB_FAC_SIZE; + assert(isamb->file[i].head.block_size == sizes[i]); } #if ISAMB_DEBUG yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual"); @@ -323,6 +336,21 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, return isamb; } +ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, + int cache) +{ + int sizes[CAT_NO]; + int i, b_size = ISAMB_MIN_SIZE; + + for (i = 0; ifile[cat].cache_entries) @@ -429,7 +457,6 @@ void isamb_close (ISAMB isamb) { char hbuf[DST_BUF_SIZE]; int major = ISAMB_MAJOR_VERSION; - int minor = ISAMB_MINOR_VERSION; int len = 16; char *dst = hbuf + 16; int pos = 0, left; @@ -440,12 +467,17 @@ void isamb_close (ISAMB isamb) encode_ptr(&dst, isamb->file[i].head.block_size); encode_ptr(&dst, isamb->file[i].head.block_max); encode_ptr(&dst, isamb->file[i].head.free_list); + + if (isamb->minor_version >= ISAMB_MINOR_VERSION_WITH_ROOT) + encode_ptr(&dst, isamb->root_ptr); + memset(dst, '\0', b_size); /* ensure no random bytes are written */ len = dst - hbuf; /* print exactly 16 bytes (including trailing 0) */ - sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len); + sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, + isamb->minor_version, len); bf_write(isamb->file[i].bf, pos, 0, 0, hbuf); @@ -2009,6 +2041,17 @@ zint isamb_get_leaf_splits(ISAMB b) return b->number_of_leaf_splits; } +zint isamb_get_root_ptr(ISAMB b) +{ + return b->root_ptr; +} + +void isamb_set_root_ptr(ISAMB b, zint root_ptr) +{ + b->root_ptr = root_ptr; +} + + /* * Local variables: * c-basic-offset: 4 -- 1.7.10.4