field/segment).
+Experimental segment facility (for matching of words within one
+field/segment).
+
--- 2.0.0 2006/08/14
New record filter (record type) 'alvis' which uses XSLT transformations
dnl Zebra, Index Data ApS, 1995-2006
-dnl $Id: configure.ac,v 1.26 2006-08-14 12:18:46 adam Exp $
+dnl $Id: configure.ac,v 1.27 2006-08-16 13:16:35 adam Exp $
dnl
AC_PREREQ(2.59)
-AC_INIT([idzebra],[2.0.0],[adam@indexdata.dk])
+AC_INIT([idzebra],[2.0.1],[adam@indexdata.dk])
AC_CONFIG_SRCDIR(configure.ac)
AC_CONFIG_AUX_DIR(config)
AM_INIT_AUTOMAKE([1.8])
-/* $Id: recctrl.h,v 1.27 2006-08-15 14:28:33 adam Exp $
+/* $Id: recctrl.h,v 1.28 2006-08-16 13:16:35 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
const char *term_buf;
int term_len;
zint seqno;
+ zint segment;
zint record_id;
zint section_id;
struct recExtractCtrl *extractCtrl;
-/* $Id: version.h,v 1.7 2006-08-14 10:40:14 adam Exp $
+/* $Id: version.h,v 1.8 2006-08-16 13:16:35 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#ifndef ZEBRAVER
-#define ZEBRAVER "2.0.0"
+#define ZEBRAVER "2.0.1"
-#define ZEBRADATE "$Date: 2006-08-14 10:40:14 $"
+#define ZEBRADATE "$Date: 2006-08-16 13:16:35 $"
#endif
/*
-/* $Id: extract.c,v 1.226 2006-08-15 14:28:33 adam Exp $
+/* $Id: extract.c,v 1.227 2006-08-16 13:16:36 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
static void extract_set_store_data_prepare(struct recExtractCtrl *p);
-static void extract_init (struct recExtractCtrl *p, RecWord *w)
+static void extract_init(struct recExtractCtrl *p, RecWord *w)
{
w->seqno = 1;
w->index_name = "any";
w->extractCtrl = p;
w->record_id = 0;
w->section_id = 0;
+ w->segment = 0;
}
static void searchRecordKey(ZebraHandle zh,
zint seqno;
while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
{
- assert(key.len <= 4 && key.len > 2);
+ assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
seqno = key.mem[key.len-1];
while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
{
int ch = 0;
+ int i, j = 0;
struct it_key key_out;
- zint *keyp = key_out.mem;
- assert(key_in.len == 4);
+ assert(key_in.len >= 2);
+ assert(key_in.len <= IT_KEY_LEVEL_MAX);
/* check for buffer overflow */
if (zh->reg->key_buf_used + 1024 >
(zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
(char*)zh->reg->key_buf + zh->reg->key_buf_used;
+ /* key_in.mem[0] ord/ch */
+ /* key_in.mem[1] filter specified record ID */
+
/* encode the ordinal value (field/use/attribute) .. */
ch = CAST_ZINT_TO_INT(key_in.mem[0]);
zh->reg->key_buf_used +=
(long) staticrank);
staticrank = 0;
}
- *keyp++ = staticrank;
- key_out.len = 4;
+ key_out.mem[j++] = staticrank;
}
- else
- key_out.len = 3;
if (key_in.mem[1]) /* filter specified record ID */
- *keyp++ = key_in.mem[1];
+ key_out.mem[j++] = key_in.mem[1];
else
- *keyp++ = sysno;
- *keyp++ = key_in.mem[2]; /* section_id */
- *keyp++ = key_in.mem[3]; /* sequence .. */
-
+ key_out.mem[j++] = sysno;
+ for (i = 2; i < key_in.len; i++)
+ key_out.mem[j++] = key_in.mem[i];
+ key_out.len = j;
+
memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used,
&key_out, sizeof(key_out));
(zh->reg->key_buf_used) += sizeof(key_out);
zint seqno;
int index_type;
- assert(key.len <= 4 && key.len > 2);
+ assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
seqno = key.mem[key.len-1];
ord = CAST_ZINT_TO_INT(key.mem[0]);
int index_type;
int ord = CAST_ZINT_TO_INT(key.mem[0]);
const char *db = 0;
- assert(key.len <= 4 && key.len > 2);
+ assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, 0);
struct it_key key;
ZebraHandle zh = p->extractCtrl->handle;
ZebraExplainInfo zei = zh->reg->zei;
- int ch;
-
- if (!p->index_name)
- return;
+ int ch, i;
ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
if (ch < 0)
ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
- key.len = 4;
- key.mem[0] = ch;
- key.mem[1] = p->record_id;
- key.mem[2] = p->section_id;
- key.mem[3] = p->seqno;
+ i = 0;
+ key.mem[i++] = ch;
+ key.mem[i++] = p->record_id;
+ key.mem[i++] = p->section_id;
-#if 0
- if (1)
- {
- char strz[80];
- int i;
-
- strz[0] = 0;
- for (i = 0; i<length && i < 20; i++)
- sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
- /* just for debugging .. */
- yaz_log(YLOG_LOG, "add: set=%d use=%d "
- "record_id=%lld section_id=%lld seqno=%lld %s",
- p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
- strz);
- }
-#endif
+ if (zh->m_segment_indexing)
+ key.mem[i++] = p->segment;
+ key.mem[i++] = p->seqno;
+ key.len = i;
zebra_rec_keys_write(zh->reg->keys, str, length, &key);
}
ZebraExplainInfo zei = zh->reg->zei;
int ch;
zinfo_index_category_t cat = zinfo_index_category_sort;
-
-
- if (!p->index_name)
- return;
ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
if (ch < 0)
ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
- key.len = 4;
+ key.len = 2;
key.mem[0] = ch;
key.mem[1] = p->record_id;
- key.mem[2] = p->section_id;
- key.mem[3] = p->seqno;
-#if 0
- if (1)
- {
- char strz[80];
- int i;
-
- strz[0] = 0;
- for (i = 0; i<length && i < 20; i++)
- sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
- /* just for debugging .. */
- yaz_log(YLOG_LOG, "add: set=%d use=%d "
- "record_id=%lld section_id=%lld seqno=%lld %s",
- p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
- strz);
- }
-#endif
zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
}
-static void extract_add_string (RecWord *p, const char *string, int length)
+static void extract_add_string(RecWord *p, const char *string, int length)
{
ZebraHandle zh = p->extractCtrl->handle;
assert (length > 0);
- if (zebra_maps_is_sort (zh->reg->zebra_maps, p->index_type))
- extract_add_sort_string (p, string, length);
+
+ if (!p->index_name)
+ return;
+
+ if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type))
+ extract_add_sort_string(p, string, length);
else
{
extract_add_index_string(p, zinfo_index_category_index,
-/* $Id: index.h,v 1.172 2006-08-15 14:28:34 adam Exp $
+/* $Id: index.h,v 1.173 2006-08-16 13:16:36 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int shadow_enable;
int m_staticrank;
+ int m_segment_indexing;
zint records_inserted;
zint records_updated;
-/* $Id: kcompare.c,v 1.60 2006-08-14 10:40:15 adam Exp $
+/* $Id: kcompare.c,v 1.61 2006-08-16 13:16:36 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
int i, l = ((struct it_key *) p1)->len;
if (((struct it_key *) p2)->len > l)
l = ((struct it_key *) p2)->len;
- assert (l <= 4 && l > 0);
+ assert (l <= IT_KEY_LEVEL_MAX && l > 0);
for (i = 0; i < l; i++)
{
if (((struct it_key *) p1)->mem[i] != ((struct it_key *) p2)->mem[i])
l = i1.len;
if (i2.len > l)
l = i2.len;
- assert (l <= 4 && l > 0);
+ assert (l <= IT_KEY_LEVEL_MAX && l > 0);
for (i = 0; i < l; i++)
{
if (i1.mem[i] != i2.mem[i])
{
struct it_key k;
memcpy (&k, p, sizeof(k));
- return k.mem[k.len-1] / KEY_SEGMENT_SIZE;
+ return k.mem[k.len-2];
}
int key_qsort_compare (const void *p1, const void *p2)
{
int i;
key->len = 0;
- for (i = 0; i<IT_KEY_LEVEL_MAX; i++)
+ for (i = 0; i < IT_KEY_LEVEL_MAX; i++)
key->mem[i] = 0;
}
struct iscz1_code_info *p = (struct iscz1_code_info *) vp;
int i;
p->key.len = 0;
- for (i = 0; i< IT_KEY_LEVEL_MAX; i++)
+ for (i = 0; i < IT_KEY_LEVEL_MAX; i++)
p->key.mem[i] = 0;
}
/* deal with leader + delta encoding .. */
d = 0;
- assert(tkey.len > 0 && tkey.len <= 4);
+ assert(tkey.len > 0 && tkey.len <= IT_KEY_LEVEL_MAX);
for (i = 0; i < tkey.len; i++)
{
d = tkey.mem[i] - p->key.mem[i];
-/* $Id: kcontrol.c,v 1.4 2006-08-14 10:40:15 adam Exp $
+/* $Id: kcontrol.c,v 1.5 2006-08-16 13:16:36 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
kc->context = cp;
kc->key_size = sizeof(struct it_key);
- kc->scope = 2;
kc->cmp = key_compare_it;
kc->key_logdump_txt = key_logdump_txt;
kc->getseq = key_get_seq;
- res_val = zebra_get_resource(zh, "segment", 0);
- kc->get_segment = 0;
- if (res_val && atoi(res_val))
+
+ if (zh->m_segment_indexing)
{
+ kc->scope = 3; /* segment + seq is "same" record */
kc->get_segment = key_get_segment;
}
+ else
+ {
+ kc->scope = 2; /* seq is "same" record */
+ kc->get_segment = 0;
+ }
+
zebra_limit_for_rset(zh->m_limit,
&kc->filter_func,
&cp->filter_destroy,
-/* $Id: limit.c,v 1.8 2006-08-14 10:40:15 adam Exp $
+/* $Id: limit.c,v 1.9 2006-08-16 13:16:36 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#if ZEBRA_LIMIT_DEBUG
yaz_log(YLOG_LOG, "zebra_limit_filter_cb zl=%p key->len=%d", zl, key->len);
#endif
- if (key->len != 3)
- return 1;
for (i = 0; zl->ids[i]; i++)
{
#if ZEBRA_LIMIT_DEBUG
-/* $Id: safari.c,v 1.2 2006-08-14 10:40:15 adam Exp $
+/* $Id: safari.c,v 1.3 2006-08-16 13:16:36 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
#include <idzebra/recctrl.h>
struct filter_info {
- char *sep;
+ int segments;
};
-static void *filter_init (Res res, RecType recType)
+static void *filter_init(Res res, RecType recType)
{
struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
- tinfo->sep = 0;
+ tinfo->segments = 0;
+ return tinfo;
+}
+
+static void *filter_init2(Res res, RecType recType)
+{
+ struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
+ tinfo->segments = 1;
return tinfo;
}
static void filter_destroy(void *clientData)
{
struct filter_info *tinfo = clientData;
- xfree (tinfo->sep);
xfree (tinfo);
}
yaz_log(YLOG_LOG, "filter_extract off=%ld",
(long) (*fi->p->tellf)(fi->p->fh));
#endif
- xfree(tinfo->sep);
- tinfo->sep = 0;
(*p->init)(p, &recWord);
if (!fi_gets(fi, line, sizeof(line)-1))
#if 0
yaz_log(YLOG_LOG, "safari line: %s", line);
#endif
- if (sscanf(line, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT " %39s %n",
- &recWord.record_id, &recWord.section_id, &recWord.seqno,
- field, &nor) < 4)
- {
- yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
- return RECCTRL_EXTRACT_ERROR_GENERIC;
- }
+ if (tinfo->segments)
+ {
+ if (sscanf(line, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT
+ ZINT_FORMAT " %39s %n",
+ &recWord.record_id, &recWord.section_id,
+ &recWord.segment,
+ &recWord.seqno,
+ field, &nor) < 5)
+ {
+ yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+ }
+ }
+ else
+ {
+ if (sscanf(line, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT " %39s %n",
+ &recWord.record_id, &recWord.section_id, &recWord.seqno,
+ field, &nor) < 4)
+ {
+ yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+ }
+ }
for (cp = line + nor; *cp == ' '; cp++)
;
recWord.index_name = field;
filter_retrieve
};
+static struct recType filter_type2 = {
+ 0,
+ "safari2",
+ filter_init2,
+ filter_config,
+ filter_destroy,
+ filter_extract,
+ filter_retrieve
+};
+
RecType
#ifdef IDZEBRA_STATIC_SAFARI
idzebra_filter_safari
[] = {
&filter_type,
+ &filter_type2,
0,
};
/*
-/* $Id: zebraapi.c,v 1.224 2006-08-14 10:40:15 adam Exp $
+/* $Id: zebraapi.c,v 1.225 2006-08-16 13:16:36 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
zh->shadow_enable = 1;
zh->m_staticrank = 0;
+ zh->m_segment_indexing = 0;
default_encoding = res_get_def(zh->session_res, "encoding", "ISO-8859-1");
if (res_get_int(zh->res, "staticrank", &zh->m_staticrank) == ZEBRA_OK)
yaz_log(YLOG_LOG, "static rank set and is %d", zh->m_staticrank);
}
+ if (zh->res)
+ {
+ if (res_get_int(zh->res, "segment", &zh->m_segment_indexing) ==
+ ZEBRA_OK)
+ yaz_log(YLOG_LOG, "segment indexing set and is %d",
+ zh->m_segment_indexing);
+ }
}
void map_basenames_func (void *vp, const char *name, const char *value)
-# $Id: safari.cfg,v 1.4 2006-07-04 14:10:32 adam Exp $
+# $Id: safari.cfg,v 1.5 2006-08-16 13:16:37 adam Exp $
profilepath: ${srcdir:-.}/../../tab
attset: bib1.att
-recordType: safari
+recordType: safari2
segment: 1024
-/* $Id: safari1.c,v 1.13 2006-08-14 10:40:22 adam Exp $
+/* $Id: safari1.c,v 1.14 2006-08-16 13:16:37 adam Exp $
Copyright (C) 1995-2006
Index Data ApS
{
"1234\n" /* ID first record */
/* chunk owner seq idx term */
- "00024338 125060 1 any the\n"
- "00024338 125060 2 any art\n"
- "00024338 125060 3 any mand\n"
+ "00024338 125060 0 1 any the\n"
+ "00024338 125060 0 2 any art\n"
+ "00024338 125060 0 3 any mand\n"
,
"5678\n" /* other record - same owner id */
- "00024339 125060 1 any den\n"
- "00024339 125060 2 any gamle\n"
- "00024339 125060 3 any mand\n"
+ "00024339 125060 0 1 any den\n"
+ "00024339 125060 0 2 any gamle\n"
+ "00024339 125060 0 3 any mand\n"
,
"5678\n" /* same record chunk id as before .. */
- "00024339 125060 1 any the\n"
- "00024339 125060 2 any gamle\n"
- "00024339 125060 3 any mand\n"
+ "00024339 125060 0 1 any the\n"
+ "00024339 125060 0 2 any gamle\n"
+ "00024339 125060 0 3 any mand\n"
,
"1000\n" /* separate record */
- "00024339 125061 1 any the\n"
- "00024339 125061 2 any gamle\n"
- "00024339 125061 3 any mand\n"
+ "00024339 125061 0 1 any the\n"
+ "00024339 125061 0 2 any gamle\n"
+ "00024339 125061 0 3 any mand\n"
,
"1001\n" /* separate record */
- "00024340 125062 1 any the\n"
- "00024340 125062 1 any the\n" /* DUP KEY, bug #432 */
- "00024340 125062 2 any old\n"
- "00024340 125062 3 any mand\n"
+ "00024340 125062 0 1 any the\n"
+ "00024340 125062 0 1 any the\n" /* DUP KEY, bug #432 */
+ "00024340 125062 0 2 any old\n"
+ "00024340 125062 0 3 any mand\n"
,
"1002\n" /* segment testing record */
- "00024341 125062 1 title a\n"
- "00024341 125062 2 title b\n"
+ "00024341 125062 0 1 title a\n"
+ "00024341 125062 0 2 title b\n"
- "00024341 125062 1024 title b\n"
- "00024341 125062 1025 title c\n"
- "00024341 125062 1026 title d\n"
- "00024341 125062 1027 title e\n"
- "00024341 125062 1028 title f\n"
+ "00024341 125062 1 1024 title b\n"
+ "00024341 125062 1 1025 title c\n"
+ "00024341 125062 1 1026 title d\n"
+ "00024341 125062 1 1027 title e\n"
+ "00024341 125062 1 1028 title f\n"
- "00024341 125062 2048 title g\n"
- "00024341 125062 2049 title c\n"
+ "00024341 125062 2 2048 title g\n"
+ "00024341 125062 2 2049 title c\n"
,
0