in zebra.cfg. The sort keys can also be fetched using zebra::sort:field .
+Implemented sorting via the ISAMB system. To enable, use sortindex:b
+in zebra.cfg. The sort keys can also be fetched using zebra::sort:field .
+
Added support for specification of approximative limits for whole query.
This is specified as attribute type 12. Semantics is the same as
estimatehits in zebra.cfg.
# Simple Zebra configuration file
-# $Id: zebra.cfg,v 1.14 2006-08-22 13:39:23 adam Exp $
+# $Id: zebra.cfg,v 1.15 2006-12-18 23:40:06 adam Exp $
#
# Where the schema files, attribute files, etc are located.
profilePath: .:../../tab
storedata: 1
+sortindex: i
+
modulePath: ../../index/.libs
#shadow: shadow:100M
# register: register:100M
-/* $Id: isamb.h,v 1.8 2006-12-12 13:46:42 adam Exp $
+/* $Id: isamb.h,v 1.9 2006-12-18 23:40:07 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int cache);
YAZ_EXPORT
+ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
+ int cache, int no_cat, int *sizes, int use_root_ptr);
+
+YAZ_EXPORT
void isamb_close(ISAMB isamb);
YAZ_EXPORT
YAZ_EXPORT
void isamb_set_cache_size(ISAMB b, int sz);
+YAZ_EXPORT
+zint isamb_get_root_ptr(ISAMB b);
+
+YAZ_EXPORT
+void isamb_set_root_ptr(ISAMB b, zint root_ptr);
+
+
YAZ_END_CDECL
#endif
-/* $Id: sortidx.h,v 1.11 2006-11-21 22:17:49 adam Exp $
+/* $Id: sortidx.h,v 1.12 2006-12-18 23:40:06 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#define SORT_IDX_ENTRYSIZE 64
-typedef struct sortIdx *SortIdx;
+typedef struct zebra_sort_index *zebra_sort_index_t;
-SortIdx sortIdx_open(BFiles bfs, int write_flag);
-void sortIdx_close(SortIdx si);
-int sortIdx_type(SortIdx si, int type);
-void sortIdx_sysno(SortIdx si, zint sysno);
-void sortIdx_add(SortIdx si, const char *buf, int len);
-void sortIdx_read(SortIdx si, char *buf);
+#define ZEBRA_SORT_TYPE_FLAT 1
+#define ZEBRA_SORT_TYPE_ISAMB 2
+
+zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int sort_type);
+void zebra_sort_close(zebra_sort_index_t si);
+int zebra_sort_type(zebra_sort_index_t si, int type);
+void zebra_sort_sysno(zebra_sort_index_t si, zint sysno);
+void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len);
+void zebra_sort_delete(zebra_sort_index_t si);
+void zebra_sort_read(zebra_sort_index_t si, char *buf);
YAZ_END_CDECL
-/* $Id: check_res.c,v 1.2 2006-12-06 10:26:54 adam Exp $
+/* $Id: check_res.c,v 1.3 2006-12-18 23:40:07 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
res_add(v, "shadow", "");
res_add(v, "segment", "");
res_add(v, "setTmpDir", "");
+ res_add(v, "sortindex", "");
res_add(v, "staticrank", "");
res_add(v, "threads", "");
res_add(v, "database", "p");
-/* $Id: extract.c,v 1.245 2006-12-11 17:08:03 adam Exp $
+/* $Id: extract.c,v 1.246 2006-12-18 23:40:07 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
int cmd, zebra_rec_keys_t reckeys)
{
+#if 0
+ yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT,
+ cmd, sysno);
+ extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG);
+#endif
+
if (zebra_rec_keys_rewind(reckeys))
{
- SortIdx sortIdx = zh->reg->sortIdx;
+ zebra_sort_index_t si = zh->reg->sort_index;
size_t slen;
const char *str;
struct it_key key_in;
- sortIdx_sysno (sortIdx, sysno);
+ zebra_sort_sysno(si, sysno);
while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
{
int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
- sortIdx_type(sortIdx, ord);
+ zebra_sort_type(si, ord);
if (cmd == 1)
- sortIdx_add(sortIdx, str, slen);
+ zebra_sort_add(si, str, slen);
else
- sortIdx_add(sortIdx, "", 1);
+ zebra_sort_delete(si);
}
}
}
-/* $Id: index.h,v 1.186 2006-12-05 14:06:29 adam Exp $
+/* $Id: index.h,v 1.187 2006-12-18 23:40:07 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
ISAMB isamb;
Dict dict;
Dict matchDict;
- SortIdx sortIdx;
+ zebra_sort_index_t sort_index;
int registerState; /* 0 (no commit pages), 1 (use commit pages) */
time_t registerChange;
BFiles bfs;
-/* $Id: retrieve.c,v 1.60 2006-11-29 09:01:53 marc Exp $
+/* $Id: retrieve.c,v 1.61 2006-12-18 23:40:07 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
static int parse_zebra_elem(const char *elem,
- const char **index, size_t *index_len,
- const char **type, size_t *type_len)
+ const char **index, size_t *index_len,
+ const char **type, size_t *type_len)
{
*index = 0;
*index_len = 0;
}
+int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr,
+ const char *elemsetname,
+ oid_value input_format,
+ oid_value *output_format,
+ char **rec_bufp, int *rec_lenp)
+{
+ const char *retrieval_index;
+ size_t retrieval_index_len;
+ const char *retrieval_type;
+ size_t retrieval_type_len;
+ char retrieval_index_cstr[256];
+ int ord;
+
+ /* only accept XML and SUTRS requests */
+ if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
+ {
+ yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
+ elemsetname);
+ *output_format = VAL_NONE;
+ return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
+ }
+
+ if (!parse_zebra_elem(elemsetname,
+ &retrieval_index, &retrieval_index_len,
+ &retrieval_type, &retrieval_type_len))
+ {
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ }
+
+ if (retrieval_type_len != 0 && retrieval_type_len != 1)
+ {
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ }
+ if (!retrieval_index_len ||
+ retrieval_index_len >= sizeof(retrieval_index_cstr)-1)
+ {
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ }
+
+ memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
+ retrieval_index_cstr[retrieval_index_len] = '\0';
+
+ ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+ zinfo_index_category_sort,
+ (retrieval_type_len == 0 ? -1 :
+ retrieval_type[0]),
+ retrieval_index_cstr);
+ if (ord == -1)
+ return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ else
+ {
+ char dst_buf[IT_MAX_WORD];
+ char str[IT_MAX_WORD];
+ int index_type;
+ const char *db = 0;
+ const char *string_index = 0;
+ WRBUF wrbuf = wrbuf_alloc();
+
+ zebra_sort_sysno(zh->reg->sort_index, sysno);
+ zebra_sort_type(zh->reg->sort_index, ord);
+ zebra_sort_read(zh->reg->sort_index, str);
+
+ zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index);
+
+ zebra_term_untrans(zh, index_type, dst_buf, str);
+
+
+ if (input_format == VAL_TEXT_XML)
+ {
+ *output_format = VAL_TEXT_XML;
+ wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
+ " sysno=\"" ZINT_FORMAT "\""
+ " set=\"zebra::sort%s/\">\n",
+ sysno, elemsetname);
+
+ wrbuf_printf(wrbuf, " <sort name=\"%s\"",
+ string_index);
+ wrbuf_printf(wrbuf, " type=\"%c\">", index_type);
+ wrbuf_xmlputs(wrbuf, dst_buf);
+ wrbuf_printf(wrbuf, "</sort>\n");
+ wrbuf_printf(wrbuf, "</record>\n");
+ }
+ else if (input_format == VAL_SUTRS)
+ {
+ *output_format = VAL_SUTRS;
+
+ wrbuf_printf(wrbuf, "%s %c %s\n", string_index, index_type,
+ dst_buf);
+ }
+ *rec_lenp = wrbuf_len(wrbuf);
+ *rec_bufp = odr_malloc(odr, *rec_lenp);
+ memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
+ wrbuf_free(wrbuf, 1);
+ return 0;
+ }
+}
+
int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
Record rec,
const char *elemsetname,
/* *rec_lenp = 0; */
/* only accept XML and SUTRS requests */
- if (input_format != VAL_TEXT_XML
- && input_format != VAL_SUTRS){
+ if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
+ {
yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
elemsetname);
*output_format = VAL_NONE;
{
char retrieval_index_cstr[256];
- if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
+ if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
{
memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
retrieval_index_cstr[retrieval_index_len] = '\0';
else if (input_format == VAL_SUTRS)
*output_format = VAL_SUTRS;
- while(zebra_rec_keys_read(keys, &str, &slen, &key_in)){
+ while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
+ {
int i;
int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
int index_type;
if (retrieval_index == 0
|| (string_index_len == retrieval_index_len
&& !memcmp(string_index, retrieval_index,
- string_index_len))){
-
+ string_index_len)))
+ {
/* process only if type is not defined, or is matching */
if (retrieval_type == 0
|| (retrieval_type_len == 1
- && retrieval_type[0] == index_type)){
-
-
+ && retrieval_type[0] == index_type))
+ {
zebra_term_untrans(zh, index_type, dst_buf, str);
- if (strlen(dst_buf)){
-
+ if (strlen(dst_buf))
+ {
if (input_format == VAL_TEXT_XML){
wrbuf_printf(wrbuf, " <index name=\"%s\"",
string_index);
}
if (input_format == VAL_TEXT_XML)
wrbuf_printf(wrbuf, "</record>\n");
- }
+ }
*rec_lenp = wrbuf_len(wrbuf);
*rec_bufp = odr_malloc(odr, *rec_lenp);
memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
return ret;
}
+ /* processing special elementsetnames zebra::sort:: */
+ if (elemsetname && 0 == strncmp(elemsetname, "sort", 4))
+ {
+ return zebra_special_sort_fetch(zh, sysno, odr,
+ elemsetname + 4,
+ input_format, output_format,
+ rec_bufp, rec_lenp);
+ }
+
+
/* fetching binary record up for all other display elementsets */
rec = rec_get(zh->reg->records, sysno);
if (!rec)
}
/* processing special elementsetnames zebra::index:: */
- if (elemsetname && 0 == strncmp(elemsetname, "index", 5)){
-
+ if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
+ {
int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
elemsetname + 5,
input_format, output_format,
-/* $Id: sortidx.c,v 1.19 2006-11-21 22:17:49 adam Exp $
+/* $Id: sortidx.c,v 1.20 2006-12-18 23:40:07 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#include <yaz/log.h>
#include <yaz/xmalloc.h>
+#include <idzebra/isamb.h>
#include <idzebra/bfile.h>
#include <sortidx.h>
#include "recindex.h"
+#define SORT_MAX_TERM 127
+
#define SORT_IDX_BLOCKSIZE 64
+struct sort_term {
+ zint sysno;
+ char term[SORT_MAX_TERM];
+};
+
+
+static void sort_term_log_item(int level, const void *b, const char *txt)
+{
+ struct sort_term a1;
+
+ memcpy(&a1, b, sizeof(a1));
+
+ yaz_log(level, "%s " ZINT_FORMAT " %s", txt, a1.sysno, a1.term);
+}
+
+int sort_term_compare(const void *a, const void *b)
+{
+ struct sort_term a1, b1;
+
+ memcpy(&a1, a, sizeof(a1));
+ memcpy(&b1, b, sizeof(b1));
+
+ if (a1.sysno > b1.sysno)
+ return 1;
+ else if (a1.sysno < b1.sysno)
+ return -1;
+ return 0;
+}
+
+void *sort_term_code_start(void)
+{
+ return 0;
+}
+
+void sort_term_encode(void *p, char **dst, const char **src)
+{
+ struct sort_term a1;
+
+ memcpy(&a1, *src, sizeof(a1));
+ *src += sizeof(a1);
+
+ zebra_zint_encode(dst, a1.sysno); /* encode record id */
+ strcpy(*dst, a1.term); /* then sort term, 0 terminated */
+ *dst += strlen(a1.term) + 1;
+}
+
+void sort_term_decode(void *p, char **dst, const char **src)
+{
+ struct sort_term a1;
+
+ zebra_zint_decode(src, &a1.sysno);
+
+ strcpy(a1.term, *src);
+ *src += strlen(a1.term) + 1;
+
+ memcpy(*dst, &a1, sizeof(a1));
+ *dst += sizeof(a1);
+}
+
+void sort_term_code_reset(void *p)
+{
+}
+
+void sort_term_code_stop(void *p)
+{
+}
+
+
+struct sort_term_stream {
+ int no;
+ int insert_flag;
+ struct sort_term st;
+};
+
+int sort_term_code_read(void *vp, char **dst, int *insertMode)
+{
+ struct sort_term_stream *s = (struct sort_term_stream *) vp;
+
+ if (s->no == 0)
+ return 0;
+
+ (s->no)--;
+
+ *insertMode = s->insert_flag;
+ memcpy(*dst, &s->st, sizeof(s->st));
+ *dst += sizeof(s->st);
+ return 1;
+}
+
+
struct sortFileHead {
zint sysno_max;
};
struct sortFile {
- int type;
- BFile bf;
+ int id;
+ union {
+ BFile bf;
+ ISAMB isamb;
+ } u;
+ ISAM_P isam_p;
+ ISAMB_PP isam_pp;
struct sortFile *next;
struct sortFileHead head;
+ int no_inserted;
+ int no_deleted;
};
-struct sortIdx {
+struct zebra_sort_index {
BFiles bfs;
int write_flag;
zint sysno;
+ int type;
char *entry_buf;
struct sortFile *current_file;
struct sortFile *files;
};
-SortIdx sortIdx_open (BFiles bfs, int write_flag)
+zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type)
{
- SortIdx si = (SortIdx) xmalloc (sizeof(*si));
+ zebra_sort_index_t si = (zebra_sort_index_t) xmalloc(sizeof(*si));
si->bfs = bfs;
si->write_flag = write_flag;
si->current_file = NULL;
si->files = NULL;
- si->entry_buf = (char *) xmalloc (SORT_IDX_ENTRYSIZE);
+ si->type = type;
+ si->entry_buf = (char *) xmalloc(SORT_IDX_ENTRYSIZE);
return si;
}
-void sortIdx_close (SortIdx si)
+void zebra_sort_close(zebra_sort_index_t si)
{
struct sortFile *sf = si->files;
while (sf)
{
struct sortFile *sf_next = sf->next;
- if (sf->bf)
- bf_close (sf->bf);
- xfree (sf);
+ switch(si->type)
+ {
+ case ZEBRA_SORT_TYPE_FLAT:
+ bf_close(sf->u.bf);
+ break;
+ case ZEBRA_SORT_TYPE_ISAMB:
+ if (sf->isam_pp)
+ isamb_pp_close(sf->isam_pp);
+ isamb_set_root_ptr(sf->u.isamb, sf->isam_p);
+ isamb_close(sf->u.isamb);
+ break;
+ }
+ xfree(sf);
sf = sf_next;
}
- xfree (si->entry_buf);
- xfree (si);
+ xfree(si->entry_buf);
+ xfree(si);
}
-int sortIdx_type (SortIdx si, int type)
+int zebra_sort_type(zebra_sort_index_t si, int id)
{
+ int isam_block_size = 4096;
+ ISAMC_M method;
char fname[80];
struct sortFile *sf;
- if (si->current_file && si->current_file->type == type)
+ if (si->current_file && si->current_file->id == id)
return 0;
for (sf = si->files; sf; sf = sf->next)
- if (sf->type == type)
+ if (sf->id == id)
{
si->current_file = sf;
return 0;
}
- sf = (struct sortFile *) xmalloc (sizeof(*sf));
- sf->type = type;
- sf->bf = NULL;
- sprintf (fname, "sort%d", type);
- yaz_log (YLOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag);
- sf->bf = bf_open (si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag);
- if (!sf->bf)
- {
- xfree (sf);
- return -1;
- }
- if (!bf_read (sf->bf, 0, 0, sizeof(sf->head), &sf->head))
+ sf = (struct sortFile *) xmalloc(sizeof(*sf));
+ sf->id = id;
+
+ method.compare_item = sort_term_compare;
+ method.log_item = sort_term_log_item;
+ method.codec.start = sort_term_code_start;
+ method.codec.encode = sort_term_encode;
+ method.codec.decode = sort_term_decode;
+ method.codec.reset = sort_term_code_reset;
+ method.codec.stop = sort_term_code_stop;
+
+ switch(si->type)
{
- sf->head.sysno_max = 0;
- if (!si->write_flag)
+ case ZEBRA_SORT_TYPE_FLAT:
+ sf->u.bf = NULL;
+ sprintf(fname, "sort%d", id);
+ yaz_log(YLOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag);
+ sf->u.bf = bf_open(si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag);
+ if (!sf->u.bf)
+ {
+ xfree(sf);
+ return -1;
+ }
+ if (!bf_read(sf->u.bf, 0, 0, sizeof(sf->head), &sf->head))
+ {
+ sf->head.sysno_max = 0;
+ if (!si->write_flag)
+ {
+ bf_close(sf->u.bf);
+ xfree(sf);
+ return -1;
+ }
+ }
+ break;
+ case ZEBRA_SORT_TYPE_ISAMB:
+ sprintf(fname, "sortb%d", id);
+
+ sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method,
+ /* cache */ 0,
+ /* no_cat */ 1, &isam_block_size,
+ /* use_root_ptr */ 1);
+ if (!sf->u.isamb)
+ {
+ xfree(sf);
+ return -1;
+ }
+ else
{
- bf_close (sf->bf);
- xfree (sf);
- return -1;
+ sf->isam_p = isamb_get_root_ptr(sf->u.isamb);
+ sf->isam_pp = 0;
}
+ break;
}
+ sf->no_inserted = 0;
+ sf->no_deleted = 0;
sf->next = si->files;
si->current_file = si->files = sf;
return 0;
}
-void sortIdx_sysno(SortIdx si, zint sysno)
+void zebra_sort_sysno(zebra_sort_index_t si, zint sysno)
{
- si->sysno = rec_sysno_to_int(sysno);
+ struct sortFile *sf = si->current_file;
+ zint new_sysno = rec_sysno_to_int(sysno);
+
+ for (sf = si->files; sf; sf = sf->next)
+ {
+ sf->no_inserted = 0;
+ sf->no_deleted = 0;
+ if (new_sysno < si->sysno && sf->isam_pp)
+ {
+ isamb_pp_close(sf->isam_pp);
+ sf->isam_pp = 0;
+ }
+ }
+ si->sysno = new_sysno;
}
-void sortIdx_add(SortIdx si, const char *buf, int len)
+
+void zebra_sort_delete(zebra_sort_index_t si)
{
- if (!si->current_file || !si->current_file->bf)
- return;
- if (len > SORT_IDX_ENTRYSIZE)
+ struct sortFile *sf = si->current_file;
+
+ if (!sf || !sf->u.bf)
+ return;
+ switch(si->type)
{
- len = SORT_IDX_ENTRYSIZE;
- memcpy (si->entry_buf, buf, len);
+ case ZEBRA_SORT_TYPE_FLAT:
+ zebra_sort_add(si, "", 0);
+ break;
+ case ZEBRA_SORT_TYPE_ISAMB:
+ assert(sf->u.isamb);
+ if (sf->no_deleted == 0)
+ {
+ struct sort_term_stream s;
+ ISAMC_I isamc_i;
+
+ s.st.sysno = si->sysno;
+ s.st.term[0] = '\0';
+
+ s.no = 1;
+ s.insert_flag = 0;
+ isamc_i.clientData = &s;
+ isamc_i.read_item = sort_term_code_read;
+
+ isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
+ sf->no_deleted++;
+ }
+ break;
}
- else
+}
+
+void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len)
+{
+ struct sortFile *sf = si->current_file;
+
+ if (!sf || !sf->u.bf)
+ return;
+ switch(si->type)
{
- memcpy (si->entry_buf, buf, len);
- memset (si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
+ case ZEBRA_SORT_TYPE_FLAT:
+ if (len > SORT_IDX_ENTRYSIZE)
+ {
+ len = SORT_IDX_ENTRYSIZE;
+ memcpy(si->entry_buf, buf, len);
+ }
+ else
+ {
+ memcpy(si->entry_buf, buf, len);
+ memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
+ }
+ bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
+ break;
+ case ZEBRA_SORT_TYPE_ISAMB:
+ assert(sf->u.isamb);
+ if (sf->no_inserted == 0)
+ {
+ struct sort_term_stream s;
+ ISAMC_I isamc_i;
+
+ s.st.sysno = si->sysno;
+ if (len >= SORT_MAX_TERM)
+ len = SORT_MAX_TERM-1;
+ memcpy(s.st.term, buf, len);
+ s.st.term[len] = '\0';
+ s.no = 1;
+ s.insert_flag = 1;
+ isamc_i.clientData = &s;
+ isamc_i.read_item = sort_term_code_read;
+
+ isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
+ sf->no_inserted++;
+ }
+ break;
}
- bf_write (si->current_file->bf, si->sysno+1, 0, 0, si->entry_buf);
}
-void sortIdx_read (SortIdx si, char *buf)
+void zebra_sort_read(zebra_sort_index_t si, char *buf)
{
int r;
+ struct sortFile *sf = si->current_file;
+
+ assert(sf);
+
+ switch(si->type)
+ {
+ case ZEBRA_SORT_TYPE_FLAT:
+ r = bf_read(sf->u.bf, si->sysno+1, 0, 0, buf);
+ if (!r)
+ memset(buf, 0, SORT_IDX_ENTRYSIZE);
+ break;
+ case ZEBRA_SORT_TYPE_ISAMB:
+ memset(buf, 0, SORT_IDX_ENTRYSIZE);
+ assert(sf->u.bf);
+ if (sf->u.bf)
+ {
+ struct sort_term st, st_untilbuf;
+
+ st.sysno = 99999;
+ if (!sf->isam_pp)
+ {
+ yaz_log(YLOG_LOG, "isamb_pp_open " ZINT_FORMAT, sf->isam_p);
+ sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1);
+ }
+ if (!sf->isam_pp)
+ return;
- assert(si->current_file);
- r = bf_read (si->current_file->bf, si->sysno+1, 0, 0, buf);
- if (!r)
- memset (buf, 0, SORT_IDX_ENTRYSIZE);
+#if 0
+ while (1)
+ {
+ r = isamb_pp_read(sf->isam_pp, &st);
+ if (!r)
+ break;
+ if (st.sysno == si->sysno)
+ break;
+ yaz_log(YLOG_LOG, "Received sysno=" ZINT_FORMAT " looking for "
+ ZINT_FORMAT, st.sysno, si->sysno);
+ }
+#else
+ st_untilbuf.sysno = si->sysno;
+ st_untilbuf.term[0] = '\0';
+ r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf);
+ if (!r)
+ return;
+#endif
+ if (r)
+ {
+ if (st.sysno != si->sysno)
+ {
+ yaz_log(YLOG_LOG, "Received sysno=" ZINT_FORMAT " looking for "
+ ZINT_FORMAT, st.sysno, si->sysno);
+ return;
+ }
+ if (strlen(st.term) < SORT_IDX_ENTRYSIZE)
+ strcpy(buf, st.term);
+ else
+ memcpy(buf, st.term, SORT_IDX_ENTRYSIZE);
+ }
+ }
+ break;
+ }
}
/*
* Local variables:
-/* $Id: zebraapi.c,v 1.237 2006-12-05 14:06:29 adam Exp $
+/* $Id: zebraapi.c,v 1.238 2006-12-18 23:40:07 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
const char *recordCompression = 0;
const char *profilePath;
char cwd[1024];
+ int sort_type = ZEBRA_SORT_TYPE_FLAT;
ZEBRA_RES ret = ZEBRA_OK;
ASSERTZS;
reg->records = 0;
reg->dict = 0;
- reg->sortIdx = 0;
+ reg->sort_index = 0;
reg->isams = 0;
reg->matchDict = 0;
reg->isamc = 0;
yaz_log (YLOG_WARN, "dict_open failed");
ret = ZEBRA_FAIL;
}
- if (!(reg->sortIdx = sortIdx_open (reg->bfs, rw)))
+
+
+ if (res_get_match (res, "sortindex", "f", "f"))
+ sort_type = ZEBRA_SORT_TYPE_FLAT;
+ else if (res_get_match (res, "sortindex", "i", "f"))
+ sort_type = ZEBRA_SORT_TYPE_ISAMB;
+ else
+ {
+ yaz_log (YLOG_WARN, "bad_value for 'sort:'");
+ ret = ZEBRA_FAIL;
+ }
+
+
+ if (!(reg->sort_index = zebra_sort_open(reg->bfs, rw, sort_type)))
{
- yaz_log (YLOG_WARN, "sortIdx_open failed");
+ yaz_log (YLOG_WARN, "zebra_sort_open failed");
ret = ZEBRA_FAIL;
}
if (res_get_match (res, "isam", "s", ISAM_DEFAULT))
dict_close (reg->dict);
if (reg->matchDict)
dict_close (reg->matchDict);
- sortIdx_close (reg->sortIdx);
+ zebra_sort_close(reg->sort_index);
if (reg->isams)
isams_close (reg->isams);
if (reg->isamc)
-/* $Id: zsets.c,v 1.113 2006-11-30 10:33:19 adam Exp $
+/* $Id: zsets.c,v 1.114 2006-12-18 23:40:08 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
struct zset_sort_info *sort_info = sset->sort_info;
int i, j;
- sortIdx_sysno (zh->reg->sortIdx, sysno);
+ zebra_sort_sysno(zh->reg->sort_index, sysno);
for (i = 0; i<num_criteria; i++)
{
char *this_entry_buf = tmp_cmp_buf[i];
memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
if (criteria[i].ord != -1)
{
- sortIdx_type(zh->reg->sortIdx, criteria[i].ord);
- sortIdx_read(zh->reg->sortIdx, this_entry_buf);
+ zebra_sort_type(zh->reg->sort_index, criteria[i].ord);
+ zebra_sort_read(zh->reg->sort_index, this_entry_buf);
}
}
i = sort_info->num_entries;
-/* $Id: isamb.c,v 1.88 2006-12-12 13:46:41 adam Exp $
+/* $Id: isamb.c,v 1.89 2006-12-18 23:40:08 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#define ISAMB_MAJOR_VERSION 3
-#define ISAMB_MINOR_VERSION 0
+#define ISAMB_MINOR_VERSION_NO_ROOT 0
+#define ISAMB_MINOR_VERSION_WITH_ROOT 1
struct ISAMB_head {
zint first_block;
zint number_of_leaf_splits;
int enable_int_count; /* whether we count nodes (or not) */
int cache_size; /* size of blocks to cache (if cache=1) */
+ int minor_version;
+ zint root_ptr;
};
struct ISAMB_block {
b->cache_size = v;
}
-ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
- int cache)
+ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
+ int cache, int no_cat, int *sizes, int use_root_ptr)
{
ISAMB isamb = xmalloc(sizeof(*isamb));
- int i, b_size = ISAMB_MIN_SIZE;
+ int i;
+
+ assert(no_cat <= CAT_MAX);
isamb->bfs = bfs;
isamb->method = (ISAMC_M *) xmalloc(sizeof(*method));
memcpy(isamb->method, method, sizeof(*method));
- isamb->no_cat = CAT_NO;
+ isamb->no_cat = no_cat;
isamb->log_io = 0;
isamb->log_freelist = 0;
isamb->cache = cache;
isamb->enable_int_count = 1;
isamb->cache_size = 40;
+ if (use_root_ptr)
+ isamb->minor_version = ISAMB_MINOR_VERSION_WITH_ROOT;
+ else
+ isamb->minor_version = ISAMB_MINOR_VERSION_NO_ROOT;
+
+ isamb->root_ptr = 0;
+
for (i = 0; i<ISAMB_MAX_LEVEL; i++)
isamb->skipped_nodes[i] = isamb->accessed_nodes[i] = 0;
isamb->file[i].bf = bf_open(bfs, fname, ISAMB_CACHE_ENTRY_SIZE,
writeflag);
else
- isamb->file[i].bf = bf_open(bfs, fname, b_size, writeflag);
+ isamb->file[i].bf = bf_open(bfs, fname, sizes[i], writeflag);
if (!isamb->file[i].bf)
{
}
/* fill-in default values (for empty isamb) */
- isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1;
+ isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/sizes[i]+1;
isamb->file[i].head.last_block = isamb->file[i].head.first_block;
- isamb->file[i].head.block_size = b_size;
- assert(b_size <= ISAMB_CACHE_ENTRY_SIZE);
+ isamb->file[i].head.block_size = sizes[i];
+ assert(sizes[i] <= ISAMB_CACHE_ENTRY_SIZE);
#if ISAMB_PTR_CODEC
- if (i == isamb->no_cat-1 || b_size > 128)
+ if (i == isamb->no_cat-1 || sizes[i] > 128)
isamb->file[i].head.block_offset = 8;
else
isamb->file[i].head.block_offset = 4;
isamb->file[i].head.block_offset = 11;
#endif
isamb->file[i].head.block_max =
- b_size - isamb->file[i].head.block_offset;
+ sizes[i] - isamb->file[i].head.block_offset;
isamb->file[i].head.free_list = 0;
if (bf_read(isamb->file[i].bf, 0, 0, 0, hbuf))
{
fname, major, ISAMB_MAJOR_VERSION);
return 0;
}
- for (left = len - b_size; left > 0; left = left - b_size)
+ for (left = len - sizes[i]; left > 0; left = left - sizes[i])
{
pos++;
- if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size))
+ if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*sizes[i]))
{
yaz_log(YLOG_WARN, "truncated isamb header for "
"file=%s len=%d pos=%d",
decode_ptr(&src, &zint_tmp);
isamb->file[i].head.block_max = (int) zint_tmp;
decode_ptr(&src, &isamb->file[i].head.free_list);
+ if (isamb->minor_version >= ISAMB_MINOR_VERSION_WITH_ROOT)
+ decode_ptr(&src, &isamb->root_ptr);
}
assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset);
isamb->file[i].head_dirty = 0;
- assert(isamb->file[i].head.block_size == b_size);
- b_size = b_size * ISAMB_FAC_SIZE;
+ assert(isamb->file[i].head.block_size == sizes[i]);
}
#if ISAMB_DEBUG
yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual");
return isamb;
}
+ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
+ int cache)
+{
+ int sizes[CAT_NO];
+ int i, b_size = ISAMB_MIN_SIZE;
+
+ for (i = 0; i<CAT_NO; i++)
+ {
+ sizes[i] = b_size;
+ b_size = b_size * ISAMB_FAC_SIZE;
+ }
+ return isamb_open2(bfs, name, writeflag, method, cache,
+ CAT_NO, sizes, 0);
+}
+
static void flush_blocks (ISAMB b, int cat)
{
while (b->file[cat].cache_entries)
{
char hbuf[DST_BUF_SIZE];
int major = ISAMB_MAJOR_VERSION;
- int minor = ISAMB_MINOR_VERSION;
int len = 16;
char *dst = hbuf + 16;
int pos = 0, left;
encode_ptr(&dst, isamb->file[i].head.block_size);
encode_ptr(&dst, isamb->file[i].head.block_max);
encode_ptr(&dst, isamb->file[i].head.free_list);
+
+ if (isamb->minor_version >= ISAMB_MINOR_VERSION_WITH_ROOT)
+ encode_ptr(&dst, isamb->root_ptr);
+
memset(dst, '\0', b_size); /* ensure no random bytes are written */
len = dst - hbuf;
/* print exactly 16 bytes (including trailing 0) */
- sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len);
+ sprintf(hbuf, "isamb%02d %02d %02d\r\n", major,
+ isamb->minor_version, len);
bf_write(isamb->file[i].bf, pos, 0, 0, hbuf);
return b->number_of_leaf_splits;
}
+zint isamb_get_root_ptr(ISAMB b)
+{
+ return b->root_ptr;
+}
+
+void isamb_set_root_ptr(ISAMB b, zint root_ptr)
+{
+ b->root_ptr = root_ptr;
+}
+
+
/*
* Local variables:
* c-basic-offset: 4