-/* $Id: extract.c,v 1.197 2005-10-28 09:22:50 adam Exp $
+/* $Id: extract.c,v 1.198 2005-11-09 11:51:29 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
{
dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
}
+#if NATTR
+ extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
recordAttr->staticrank);
zh->records_inserted++;
yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
zh->m_record_type, fname, recordOffset);
recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+ extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
recordAttr->staticrank);
zh->records_updated++;
/* update sort keys */
xfree (rec->info[recInfo_sortKeys]);
+#if NATTR
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+#else
rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
zh->reg->sortKeys.buf = NULL;
zh->reg->sortKeys.buf_max = 0;
+#endif
/* save file size of original record */
zebraExplain_recordBytesIncrement (zh->reg->zei,
dict_insert (zh->reg->matchDict, matchStr,
sizeof(*sysno), sysno);
}
+#if NATTR
+ extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
#if 0
print_rec_keys(zh, zh->reg->keys);
rec->info[recInfo_delKeys],
rec->size[recInfo_delKeys],
0);
+#if NATTR
+ zebra_rec_keys_set_buf(sortKeys,
+ rec->info[recInfo_sortKeys],
+ rec->size[recInfo_sortKeys],
+ 0);
+#else
sortKeys.buf_used = rec->size[recInfo_sortKeys];
sortKeys.buf = rec->info[recInfo_sortKeys];
+#endif
#if NATTR
extract_flushSortKeys (zh, *sysno, 0, sortKeys);
yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
pr_fname, (long) recordOffset);
recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+ extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
recordAttr->staticrank);
zh->records_updated++;
/* update sort keys */
xfree (rec->info[recInfo_sortKeys]);
+#if NATTR
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+#else
rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
zh->reg->sortKeys.buf = NULL;
zh->reg->sortKeys.buf_max = 0;
+#endif
/* save file size of original record */
zebraExplain_recordBytesIncrement (zh->reg->zei,
zebra_rec_keys_t delkeys = zebra_rec_keys_open();
#if NATTR
- zebra_rec_keys_t sortkeys = zzebra_rec_keys_open();
+ zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
#else
struct sortKeys sortkeys;
#endif
&rec->size[recInfo_delKeys]);
xfree (rec->info[recInfo_sortKeys]);
+#if NATTR
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+#else
rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
zh->reg->sortKeys.buf = NULL;
zh->reg->sortKeys.buf_max = 0;
+#endif
return 0;
}
zh->reg->key_buf_used = 0;
}
-void extract_add_it_key (ZebraHandle zh,
- zebra_rec_keys_t *keys,
- int reg_type,
- const char *str, int slen, struct it_key *key)
-{
- zebra_rec_keys_write(*keys, reg_type, str, slen, key);
-}
-
ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh,
zebra_rec_keys_t reckeys,
zebra_snippets *snippets)
key.mem[3] = p->seqno;
#if 0
- /* just for debugging .. */
- yaz_log(YLOG_LOG, "add: set=%d use=%d "
- "record_id=%lld section_id=%lld seqno=%lld",
- p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno);
+ if (1)
+ {
+ char strz[80];
+ int i;
+
+ strz[0] = 0;
+ for (i = 0; i<length && i < 20; i++)
+ sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
+ /* just for debugging .. */
+ yaz_log(YLOG_LOG, "add: set=%d use=%d "
+ "record_id=%lld section_id=%lld seqno=%lld %s",
+ p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
+ strz);
+ }
#endif
- extract_add_it_key(p->extractCtrl->handle,
- &zh->reg->keys,
- p->index_type, str,
- length, &key);
+ zebra_rec_keys_write(zh->reg->keys, str, length, &key);
}
#if NATTR
key.mem[2] = p->section_id;
key.mem[3] = p->seqno;
- extract_add_it_key(p->extractCtrl->handle,
- &zh->reg->sortKeys,
- p->index_type, str,
- length, &key);
+ zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
}
#else
static void extract_add_sort_string (RecWord *p, const char *str, int length)
-/* $Id: reckeys.c,v 1.2 2005-11-09 08:27:28 adam Exp $
+/* $Id: reckeys.c,v 1.3 2005-11-09 11:51:29 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <assert.h>
#include <ctype.h>
+#include <yaz/nmem.h>
#include "index.h"
#include "reckeys.h"
+struct zebra_rec_key_entry {
+ char *buf;
+ size_t len;
+ struct it_key key;
+ struct zebra_rec_key_entry *next;
+};
+
struct zebra_rec_keys_t_ {
size_t buf_used;
size_t buf_max;
void *encode_handle;
void *decode_handle;
char owner_of_buffer;
+
+ NMEM nmem;
+ size_t hash_size;
+ struct zebra_rec_key_entry **entries;
};
+
+struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
+ const char *buf,
+ size_t len)
+{
+ unsigned h = 0;
+ size_t i;
+ for (i = 0; i<len; i++)
+ h = h * 65509 + buf[i];
+ return &p->entries[h % (unsigned) p->hash_size];
+}
+
+static void init_hash(zebra_rec_keys_t p)
+{
+ p->entries = 0;
+ nmem_reset(p->nmem);
+ if (p->hash_size)
+ {
+ size_t i;
+ p->entries = nmem_malloc(p->nmem, p->hash_size * sizeof(*p->entries));
+ for (i = 0; i<p->hash_size; i++)
+ p->entries[i] = 0;
+ }
+}
+
zebra_rec_keys_t zebra_rec_keys_open()
{
zebra_rec_keys_t p = xmalloc(sizeof(*p));
p->owner_of_buffer = 1;
p->encode_handle = iscz1_start();
p->decode_handle = iscz1_start();
+
+ p->nmem = nmem_create();
+ p->hash_size = 127;
+ p->entries = 0;
+
+ init_hash(p);
+
return p;
}
-
+
void zebra_rec_keys_set_buf(zebra_rec_keys_t p, char *buf, size_t sz,
int copy_buf)
{
iscz1_stop(p->encode_handle);
if (p->decode_handle)
iscz1_stop(p->decode_handle);
+ nmem_destroy(p->nmem);
xfree(p);
}
+int zebra_rec_keys_add_hash(zebra_rec_keys_t keys,
+ const char *str, size_t slen,
+ const struct it_key *key)
+{
+ struct zebra_rec_key_entry **kep = zebra_rec_keys_mk_hash(keys, str, slen);
+ while (*kep)
+ {
+ struct zebra_rec_key_entry *e = *kep;
+ if (slen == e->len && !memcmp(str, e->buf, slen) &&
+ !key_compare(key, &e->key))
+ {
+ return 0;
+ }
+ kep = &(*kep)->next;
+ }
+ *kep = nmem_malloc(keys->nmem, sizeof(**kep));
+ (*kep)->next = 0;
+ (*kep)->len = slen;
+ memcpy(&(*kep)->key, key, sizeof(*key));
+ (*kep)->buf = nmem_malloc(keys->nmem, slen);
+ memcpy((*kep)->buf, str, slen);
+ return 1;
+}
+
void zebra_rec_keys_write(zebra_rec_keys_t keys,
- int reg_type,
const char *str, size_t slen,
const struct it_key *key)
{
assert(keys->owner_of_buffer);
+ if (!zebra_rec_keys_add_hash(keys, str, slen, key))
+ return; /* key already there . Omit it */
if (keys->buf_used+1024 > keys->buf_max)
{
char *b = (char *) xmalloc (keys->buf_max += 128000);
iscz1_encode(keys->encode_handle, &dst, &src);
-#if REG_TYPE_PREFIX
- *dst++ = reg_type;
-#endif
memcpy (dst, str, slen);
dst += slen;
*dst++ = '\0';
keys->buf_used = 0;
iscz1_reset(keys->encode_handle);
+
+ init_hash(keys);
+
}
int zebra_rec_keys_rewind(zebra_rec_keys_t keys)
-/* $Id: zrpn.c,v 1.206 2005-11-02 11:43:26 adam Exp $
+/* $Id: zrpn.c,v 1.207 2005-11-09 11:51:30 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
attr_ok = 1;
term_dict[prefix_len++] = ')';
-#if REG_TYPE_PREFIX
- term_dict[prefix_len++] = 1;
- term_dict[prefix_len++] = reg_type;
- yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
-#endif
term_dict[prefix_len] = '\0';
j = prefix_len;
switch (truncation_value)
}
bases_ok++;
term_dict[prefix_len++] = ')';
-#if REG_TYPE_PREFIX
- term_dict[prefix_len++] = 1;
- term_dict[prefix_len++] = reg_type;
- yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
-#endif
term_dict[prefix_len] = '\0';
if (!numeric_relation(zh, zapt, &termp, term_dict,
attributeSet, grep_info, &max_pos, reg_type,
term_dict[prefix_len++] = ord_buf[i];
}
term_dict[prefix_len++] = ')';
-#if REG_TYPE_PREFIX
- term_dict[prefix_len++] = 1;
- term_dict[prefix_len++] = reg_type;
-#endif
strcpy(term_dict+prefix_len, term);
grep_info.isam_p_indx = 0;
scan_info->list[j].term = NULL;
prefix_len += key_SU_encode (ords[i], termz + prefix_len);
-#if REG_TYPE_PREFIX
- termz[prefix_len++] = reg_id;
-#endif
termz[prefix_len] = 0;
strcpy(scan_info->prefix, termz);