2 * Copyright (c) 1995-1998, Index Data.
3 * See the file LICENSE for details.
5 * $Id: isamb.c,v 1.6 2002-04-17 09:03:38 adam Exp $
7 #include <yaz/xmalloc.h>
18 #define ISAMB_DATA_OFFSET 3
20 #define DST_BUF_SIZE 4500
21 #define DST_ITEM_MAX 256
26 struct ISAMB_head head;
33 struct ISAMB_file *file;
45 void *decodeClientData;
51 struct ISAMB_block **block;
54 void encode_ptr (char **dst, int pos)
56 memcpy (*dst, &pos, sizeof(pos));
57 (*dst) += sizeof(pos);
60 void decode_ptr (char **src, int *pos)
62 memcpy (pos, *src, sizeof(*pos));
63 (*src) += sizeof(*pos);
66 ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method)
68 ISAMB isamb = xmalloc (sizeof(*isamb));
72 isamb->method = (ISAMC_M) xmalloc (sizeof(*method));
73 memcpy (isamb->method, method, sizeof(*method));
76 isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat);
77 for (i = 0; i<isamb->no_cat; i++)
79 char fname[DST_BUF_SIZE];
80 isamb->file[i].head.first_block = 1;
81 isamb->file[i].head.last_block = 1;
82 isamb->file[i].head.block_size = b_size;
84 isamb->file[i].head_dirty = 0;
85 sprintf (fname, "%s-%d", name, i);
87 bf_open (bfs, fname, isamb->file[i].head.block_size, writeflag);
89 bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head),
90 &isamb->file[i].head);
95 void isamb_close (ISAMB isamb)
98 for (i = 0; i<isamb->no_cat; i++)
100 if (isamb->file[i].head_dirty)
101 bf_write (isamb->file[i].bf, 0, 0,
102 sizeof(struct ISAMB_head), &isamb->file[i].head);
105 xfree (isamb->method);
109 struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos)
112 struct ISAMB_block *p;
115 p = xmalloc (sizeof(*p));
118 p->bytes = xmalloc (b->file[cat].head.block_size);
119 bf_read (b->file[cat].bf, pos/4, 0, 0, p->bytes);
120 p->leaf = p->bytes[0];
121 p->size = p->bytes[1] + 256 * p->bytes[2];
122 p->offset = ISAMB_DATA_OFFSET;
124 p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE);
128 struct ISAMB_block *new_block (ISAMB b, int leaf, int cat)
130 struct ISAMB_block *p;
133 p = xmalloc (sizeof(*p));
134 block_no = b->file[cat].head.last_block++;
136 p->pos = block_no * 4 + cat;
138 b->file[cat].head_dirty = 1;
139 p->bytes = xmalloc (b->file[cat].head.block_size);
140 memset (p->bytes, 0, b->file[cat].head.block_size);
142 p->size = ISAMB_DATA_OFFSET;
144 p->offset = ISAMB_DATA_OFFSET;
145 p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE);
149 void close_block (ISAMB b, struct ISAMB_block *p)
155 p->bytes[0] = p->leaf;
156 p->bytes[1] = p->size & 255;
157 p->bytes[2] = p->size >> 8;
158 bf_write (b->file[p->cat].bf, p->pos/4, 0, 0, p->bytes);
160 (*b->method->code_stop)(ISAMC_DECODE, p->decodeClientData);
165 void insert_sub (ISAMB b, struct ISAMB_block *p, const void *new_item,
166 struct ISAMB_block **sp,
167 void *sub_item, int *sub_size);
169 void insert_leaf (ISAMB b, struct ISAMB_block *p, const void *new_item,
170 struct ISAMB_block **sp,
171 void *sub_item, int *sub_size)
173 char dst_buf[DST_BUF_SIZE];
175 char *src = p->bytes + ISAMB_DATA_OFFSET;
176 char *endp = p->bytes + p->size;
177 void *c1 = (*b->method->code_start)(ISAMC_DECODE);
178 void *c2 = (*b->method->code_start)(ISAMC_ENCODE);
181 char *cut = dst_buf + p->size / 2;
182 char cut_item_buf[DST_ITEM_MAX];
183 int cut_item_size = 0;
187 char file_item_buf[DST_ITEM_MAX];
188 char *file_item = file_item_buf;
190 (*b->method->code_item)(ISAMC_DECODE, c1, &file_item, &src);
193 int d = (*b->method->compare_item)(file_item_buf, new_item);
196 char *item_ptr = (char*) new_item;
197 (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr);
207 if (!half1 && dst > cut)
209 half1 = dst; /* candidate for splitting */
211 file_item = file_item_buf;
212 (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &file_item);
214 cut_item_size = file_item - file_item_buf;
215 memcpy (cut_item_buf, file_item_buf, cut_item_size);
221 file_item = file_item_buf;
222 (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &file_item);
227 char *item_ptr = (char*) new_item;
228 (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr);
232 p->size = dst - dst_buf + ISAMB_DATA_OFFSET;
233 if (p->size > b->file[p->cat].head.block_size)
236 char *cut_item = cut_item_buf;
239 p->size = half1 - dst_buf + ISAMB_DATA_OFFSET;
240 memcpy (p->bytes+ISAMB_DATA_OFFSET, dst_buf, half1 - dst_buf);
243 *sp = new_block (b, 1, p->cat);
245 (*b->method->code_reset)(c2);
247 first_dst = (*sp)->bytes + ISAMB_DATA_OFFSET;
249 (*b->method->code_item)(ISAMC_ENCODE, c2, &first_dst, &cut_item);
251 memcpy (first_dst, half2, dst - half2);
253 (*sp)->size = (first_dst - (char*) (*sp)->bytes) + (dst - half2);
256 memcpy (sub_item, cut_item_buf, cut_item_size);
257 *sub_size = cut_item_size;
259 yaz_log (LOG_LOG, "l split %d / %d", p->size, (*sp)->size);
264 assert (p->size > ISAMB_DATA_OFFSET);
265 assert (p->size <= b->file[p->cat].head.block_size);
266 memcpy (p->bytes+ISAMB_DATA_OFFSET, dst_buf, dst - dst_buf);
269 (*b->method->code_stop)(ISAMC_DECODE, c1);
270 (*b->method->code_stop)(ISAMC_ENCODE, c2);
273 void insert_int (ISAMB b, struct ISAMB_block *p, const void *new_item,
274 struct ISAMB_block **sp,
275 void *split_item, int *split_size)
277 char *startp = p->bytes + ISAMB_DATA_OFFSET;
279 char *endp = p->bytes + p->size;
281 struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0;
282 char sub_item[DST_ITEM_MAX];
287 decode_ptr (&src, &pos);
292 decode_ptr (&src, &item_len);
293 d = (*b->method->compare_item)(src, new_item);
296 sub_p1 = open_block (b, pos);
298 insert_sub (b, sub_p1, new_item, &sub_p2,
299 sub_item, &sub_size);
303 decode_ptr (&src, &pos);
307 sub_p1 = open_block (b, pos);
309 insert_sub (b, sub_p1, new_item, &sub_p2,
310 sub_item, &sub_size);
314 char dst_buf[DST_BUF_SIZE];
317 assert (sub_size < 20);
319 memcpy (dst, startp, src - startp);
323 encode_ptr (&dst, sub_size); /* sub length and item */
324 memcpy (dst, sub_item, sub_size);
327 encode_ptr (&dst, sub_p2->pos); /* pos */
329 if (endp - src) /* remaining data */
331 memcpy (dst, src, endp - src);
334 p->size = dst - dst_buf + ISAMB_DATA_OFFSET;
335 if (p->size <= b->file[p->cat].head.block_size)
337 memcpy (startp, dst_buf, dst - dst_buf);
346 half = src + b->file[p->cat].head.block_size/2;
347 decode_ptr (&src, &pos);
350 decode_ptr (&src, split_size);
352 decode_ptr (&src, &pos);
354 p_new_size = src - dst_buf;
355 memcpy (p->bytes + ISAMB_DATA_OFFSET, dst_buf, p_new_size);
356 p_new_size += ISAMB_DATA_OFFSET;
358 decode_ptr (&src, split_size);
359 memcpy (split_item, src, *split_size);
362 *sp = new_block (b, 0, p->cat);
363 (*sp)->size = endp - src;
364 memcpy ((*sp)->bytes+ISAMB_DATA_OFFSET, src, (*sp)->size);
365 (*sp)->size += ISAMB_DATA_OFFSET;
367 yaz_log (LOG_LOG, "i split %d -> %d %d",
368 p->size, p_new_size, (*sp)->size);
369 p->size = p_new_size;
372 close_block (b, sub_p2);
374 close_block (b, sub_p1);
377 void insert_sub (ISAMB b, struct ISAMB_block *p, const void *new_item,
378 struct ISAMB_block **sp,
379 void *sub_item, int *sub_size)
382 insert_leaf (b, p, new_item, sp, sub_item, sub_size);
384 insert_int (b, p, new_item, sp, sub_item, sub_size);
387 int insert_flat (ISAMB b, const void *new_item, ISAMC_P *posp)
389 struct ISAMB_block *p = 0;
390 char *src = 0, *endp = 0;
391 char dst_buf[DST_BUF_SIZE], *dst = dst_buf;
394 void *c1 = (*b->method->code_start)(ISAMC_DECODE);
395 void *c2 = (*b->method->code_start)(ISAMC_ENCODE);
399 p = open_block (b, pos);
402 src = p->bytes + ISAMB_DATA_OFFSET;
403 endp = p->bytes + p->size;
406 while (p && src != endp)
408 char file_item_buf[DST_ITEM_MAX];
409 char *file_item = file_item_buf;
411 (*b->method->code_item)(ISAMC_DECODE, c1, &file_item, &src);
414 int d = (*b->method->compare_item)(file_item_buf, new_item);
417 char *item_ptr = (char*) new_item;
418 (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr);
427 file_item = file_item_buf;
428 (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &file_item);
432 char *item_ptr = (char*) new_item;
433 (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr);
438 new_size = dst - dst_buf + ISAMB_DATA_OFFSET;
439 if (p && new_size > b->file[p->cat].head.block_size)
441 yaz_log (LOG_LOG, "resize %d -> %d", p->size, new_size);
443 /* delete it too!! */
444 p = 0; /* make a new one anyway */
447 { /* must create a new one */
449 for (i = 0; i < b->no_cat; i++)
450 if (new_size <= b->file[i].head.block_size)
452 p = new_block (b, 1, i);
454 memcpy (p->bytes+ISAMB_DATA_OFFSET, dst_buf, dst - dst_buf);
457 (*b->method->code_stop)(ISAMC_DECODE, c1);
458 (*b->method->code_stop)(ISAMC_ENCODE, c2);
463 int isamb_insert_one (ISAMB b, const void *item, ISAMC_P pos)
466 if ((pos & 3) != b->no_cat-1)
468 /* note if pos == 0 we go here too! */
470 insert_flat (b, item, &pos);
475 struct ISAMB_block *p = open_block (b, pos), *sp = 0;
476 char sub_item[DST_ITEM_MAX];
480 insert_sub (b, p, item, &sp, sub_item, &sub_size);
482 { /* increase level of tree by one */
483 struct ISAMB_block *p2 = new_block (b, 0, p->cat);
484 char *dst = p2->bytes + p2->size;
486 encode_ptr (&dst, p->pos);
487 assert (sub_size < 20);
488 encode_ptr (&dst, sub_size);
489 memcpy (dst, sub_item, sub_size);
491 encode_ptr (&dst, sp->pos);
493 p2->size = dst - (char*) p2->bytes;
494 pos = p2->pos; /* return new super page */
499 pos = p->pos; /* return current one (again) */
505 ISAMB_P isamb_merge (ISAMB b, ISAMB_P pos, ISAMC_I data)
508 char item_buf[DST_ITEM_MAX];
509 char *item_ptr = item_buf;
510 while ((*data->read_item)(data->clientData, &item_ptr, &i_mode))
513 pos = isamb_insert_one (b, item_buf, pos);
518 ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos)
520 ISAMB_PP pp = xmalloc (sizeof(*pp));
523 pp->block = xmalloc (10 * sizeof(*pp->block));
528 struct ISAMB_block *p = open_block (isamb, pos);
529 char *src = p->bytes + p->offset;
530 pp->block[pp->level] = p;
532 if (p->bytes[0]) /* leaf */
535 decode_ptr (&src, &pos);
536 p->offset = src - (char*) p->bytes;
539 pp->block[pp->level+1] = 0;
543 void isamb_pp_close (ISAMB_PP pp)
548 for (i = 0; i <= pp->level; i++)
549 close_block (pp->isamb, pp->block[i]);
554 int isamb_pp_read (ISAMB_PP pp, void *buf)
558 struct ISAMB_block *p = pp->block[pp->level];
562 while (p->offset == p->size)
565 while (p->offset == p->size)
569 close_block (pp->isamb, pp->block[pp->level]);
570 pp->block[pp->level] = 0;
572 p = pp->block[pp->level];
573 assert (p->bytes[0] == 0); /* must be int */
575 src = p->bytes + p->offset;
577 decode_ptr (&src, &item_len);
579 decode_ptr (&src, &pos);
581 p->offset = src - (char*) p->bytes;
587 pp->block[pp->level] = p = open_block (pp->isamb, pos);
589 if (p->bytes[0]) /* leaf */
593 src = p->bytes + p->offset;
594 decode_ptr (&src, &pos);
595 p->offset = src - (char*) p->bytes;
599 assert (p->offset < p->size);
600 assert (p->bytes[0]);
601 src = p->bytes + p->offset;
602 (*pp->isamb->method->code_item)(ISAMC_DECODE, p->decodeClientData,
604 p->offset = src - (char*) p->bytes;
608 int isamb_pp_num (ISAMB_PP pp)