#define ZEBRA_SORT_TYPE_ISAMB 2
#define ZEBRA_SORT_TYPE_MULTI 3
+struct zebra_sort_ent {
+ int num;
+ WRBUF wrbuf;
+};
/** \brief creates sort handle
\param bfs block files handle
void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len);
+/** \brief adds multi-map content to sort file
+ \param si sort index handle
+ \param ent multi-map value
+
+ zebra_sort_type and zebra_sort_sysno must be called prior to this
+*/
+void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent);
+
+
/** \brief delete sort entry
\param si sort index handle
const char *str;
struct it_key key_in;
+#define USE_SORT_ENT 1
+#if USE_SORT_ENT
+ NMEM nmem = nmem_create();
+ struct sort_add_ent {
+ int ord;
+ int cmd;
+ struct sort_add_ent *next;
+ struct zebra_sort_ent sort_ent;
+ };
+ struct sort_add_ent *sort_ent_list = 0;
+#endif
zebra_sort_sysno(si, sysno);
+#if USE_SORT_ENT
+ while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
+
+ struct sort_add_ent **e = &sort_ent_list;
+ while (*e && (*e)->ord != ord)
+ e = &(*e)->next;
+ if (!*e)
+ {
+ *e = nmem_malloc(nmem, sizeof(**e));
+ (*e)->next = 0;
+ (*e)->sort_ent.wrbuf = wrbuf_alloc();
+ (*e)->sort_ent.num = 0;
+ (*e)->ord = ord;
+ (*e)->cmd = cmd;
+ }
+
+ wrbuf_write((*e)->sort_ent.wrbuf, str, slen);
+ wrbuf_putc((*e)->sort_ent.wrbuf, '\0');
+ (*e)->sort_ent.num++;
+ }
+ if (sort_ent_list)
+ {
+ struct sort_add_ent *e = sort_ent_list;
+ for (; e; e = e->next)
+ {
+ zebra_sort_type(si, e->ord);
+ if (e->cmd == 1)
+ zebra_sort_add_ent(si, &e->sort_ent);
+ else
+ zebra_sort_delete(si);
+ wrbuf_destroy(e->sort_ent.wrbuf);
+ }
+ }
+ nmem_destroy(nmem);
+#else
while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
{
int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
else
zebra_sort_delete(si);
}
+#endif
}
}
#include <yaz/nmem.h>
#include <yaz/xmalloc.h>
-#define NEW 0
-
-#if NEW
-struct zebra_rec_word_entry {
- char *buf;
- size_t len;
- int ord;
- int max_seq;
- struct zebra_rec_word_entry *next;
- struct zebra_rec_key_entry *keys;
- struct zebra_rec_key_entry **last_key;
-};
-
-struct zebra_rec_key_entry {
- struct it_key key;
- struct zebra_rec_key_entry *next;
-};
-#else
struct zebra_rec_key_entry {
char *buf;
size_t len;
struct it_key key;
struct zebra_rec_key_entry *next;
};
-#endif
struct zebra_rec_keys_t_ {
size_t buf_used;
NMEM nmem;
size_t hash_size;
-#if NEW
- struct zebra_rec_word_entry **entries;
-#else
struct zebra_rec_key_entry **entries;
-#endif
};
-#if NEW
-struct zebra_rec_word_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
- const char *buf,
- size_t len,
- int ord)
-{
- int i;
- unsigned h = ord;
-
- for (i = 0; i<len; i++)
- h = h * 65509 + buf[i];
- return &p->entries[h % (unsigned) p->hash_size];
-}
-#else
struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
const char *buf,
size_t len,
#endif
return &p->entries[h % (unsigned) p->hash_size];
}
-#endif
static void init_hash(zebra_rec_keys_t p)
{
xfree(p);
}
-#if NEW
-void zebra_rec_keys_write(zebra_rec_keys_t keys,
- const char *str, size_t slen,
- const struct it_key *key)
-{
- char *dst;
- const char *src = (char*) key;
-
- struct zebra_rec_word_entry **wep;
- struct zebra_rec_key_entry **kep;
- int ord = key->mem[0];
- int seq = key->mem[key->len-1];
-
- assert(keys->owner_of_buffer);
-
- wep = zebra_rec_keys_mk_hash(keys, str, slen, ord);
-
- while (*wep)
- {
- struct zebra_rec_word_entry *e = *wep;
- if (ord == e->ord && slen == e->len && !memcmp(str, e->buf, slen))
- break;
- wep = &(*wep)->next;
- }
-
- if (!*wep)
- {
- *wep = nmem_malloc(keys->nmem, sizeof(**wep));
- (*wep)->buf = nmem_malloc(keys->nmem, slen);
- memcpy((*wep)->buf, str, slen);
- (*wep)->len = slen;
- (*wep)->ord = ord;
- (*wep)->next = 0;
- (*wep)->keys = 0;
- (*wep)->max_seq = 0;
- (*wep)->last_key = &(*wep)->keys;
- }
- if (seq > (*wep)->max_seq)
- kep = (*wep)->last_key;
- else
- {
- kep = &(*wep)->keys;
- while (*kep)
- {
- if (!key_compare(key, &(*kep)->key))
- return;
- kep = &(*kep)->next;
- }
- }
- *kep = nmem_malloc(keys->nmem, sizeof(**kep));
- (*kep)->next = 0;
- (*wep)->last_key = &(*kep)->next;
- memcpy(&(*kep)->key, key, sizeof(*key));
- if (seq > (*wep)->max_seq)
- {
- (*wep)->max_seq = seq;
- }
-}
-#else
int zebra_rec_keys_add_hash(zebra_rec_keys_t keys,
const char *str, size_t slen,
const struct it_key *key)
*dst++ = '\0';
keys->buf_used = dst - keys->buf;
}
-#endif
void zebra_rec_keys_reset(zebra_rec_keys_t keys)
{
assert(keys);
iscz1_reset(keys->decode_handle);
-#if NEW
- if (keys->buf_used == 0)
- {
- size_t i;
- for (i = 0; i<keys->hash_size; i++)
- {
- struct zebra_rec_word_entry *we = keys->entries[i];
- for (; we; we = we->next)
- {
- struct zebra_rec_key_entry *ke = we->keys;
- for (; ke; ke = ke->next)
- {
- const char *src = (char*) &ke->key;
- char *dst;
- if (keys->buf_used+1024 > keys->buf_max)
- {
- char *b = (char *) xmalloc (keys->buf_max += 128000);
- if (keys->buf_used > 0)
- memcpy (b, keys->buf, keys->buf_used);
- xfree (keys->buf);
- keys->buf = b;
- }
-
- dst = keys->buf + keys->buf_used;
-
- iscz1_encode(keys->encode_handle, &dst, &src);
-
- memcpy (dst, we->buf, we->len);
- dst += we->len;
- *dst++ = '\0';
- keys->buf_used = dst - keys->buf;
- }
- }
- }
- }
-#endif
keys->fetch_offset = 0;
if (keys->buf_used == 0)
}
}
+void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent)
+{
+ struct sortFile *sf = si->current_file;
+ int len;
+
+ if (!sf || !sf->u.bf)
+ return;
+ switch(si->type)
+ {
+ case ZEBRA_SORT_TYPE_FLAT:
+ /* take first entry from wrbuf - itself is 0-terminated */
+ len = strlen(wrbuf_buf(ent->wrbuf));
+ if (len > SORT_IDX_ENTRYSIZE)
+ len = SORT_IDX_ENTRYSIZE;
+
+ memcpy(si->entry_buf, wrbuf_buf(ent->wrbuf), len);
+ if (len < SORT_IDX_ENTRYSIZE-len)
+ memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
+ bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
+ break;
+ case ZEBRA_SORT_TYPE_ISAMB:
+ assert(sf->u.isamb);
+
+ assert(sf->no_inserted == 0);
+ if (sf->no_inserted == 0)
+ {
+ struct sort_term_stream s;
+ ISAMC_I isamc_i;
+ /* take first entry from wrbuf - itself is 0-terminated */
+ len = strlen(wrbuf_buf(ent->wrbuf));
+
+ s.st.sysno = si->sysno;
+ if (len >= SORT_MAX_TERM)
+ len = SORT_MAX_TERM-1;
+ memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len);
+ s.st.term[len] = '\0';
+ s.st.length = len;
+ s.no = 1;
+ s.insert_flag = 1;
+ isamc_i.clientData = &s;
+ isamc_i.read_item = sort_term_code_read;
+
+ isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
+ sf->no_inserted++;
+ }
+ break;
+ case ZEBRA_SORT_TYPE_MULTI:
+ assert(sf->u.isamb);
+ if (sf->no_inserted == 0)
+ {
+ struct sort_term_stream s;
+ ISAMC_I isamc_i;
+ len = wrbuf_len(ent->wrbuf);
+
+ s.st.sysno = si->sysno;
+ if (len >= SORT_MAX_MULTI)
+ len = SORT_MAX_MULTI-1;
+ memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len);
+ s.st.length = len;
+ s.no = 1;
+ s.insert_flag = 1;
+ isamc_i.clientData = &s;
+ isamc_i.read_item = sort_term_code_read;
+
+ isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
+ sf->no_inserted++;
+ }
+ break;
+ }
+}
+
void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len)
{
struct sortFile *sf = si->current_file;