return cat + firstpos * 8;
}
-char *hexdump(unsigned char *p, int len, char *buff) {
+static char *hexdump(unsigned char *p, int len, char *buff) {
static char localbuff[128];
char bytebuff[8];
if (!buff) buff=localbuff;
return buff;
}
+
+/* isamh - heikki's append-only isam
+ * missing:
+ * read the code from an existing block, to synch encoding
+ * append to single-block entries
+ * append to multi-block entries
+*/
+
ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data)
{
ISAMH_PP pp;
- char f_item[128];
- char *f_item_ptr=f_item;
- int fmore=1;
char i_item[128];
char *i_item_ptr;
- int i_more=1, i_mode, i;
-
- char *r_out_ptr;
+ int i_more=1, i_mode;
char codebuffer[128];
char *codeptr;
+ char *bufptr;
int codelen;
ISAMH_PP firstpp;
void *r_clientData; /* encode client data */
int newblock;
int newcat;
- int numKeys = 0;
+ int maxkeys;
int maxsize;
int retval;
if ( 0==ipos)
{ /* new block */
- pp->cat=0;
+ pp->cat=0; /* start small... */
pp->pos = isamh_alloc_block(is,pp->cat);
pp->size= pp->offset = ISAMH_BLOCK_OFFSET_1 ;
- logf(LOG_LOG,"isamh_append: starting with new block");
+ r_clientData = (*is->method->code_start)(ISAMH_ENCODE);
+ logf(LOG_LOG,"isamh_append: starting with new block %d",pp->pos);
}
else
{ /* existing block */
- if (firstpp->lastblock == firstpp->pos)
+ if (isamh_block(firstpp->lastblock) == firstpp->pos)
{ /* only one block, we have it already */
pp->offset=ISAMH_BLOCK_OFFSET_1;
- logf(LOG_LOG,"isamh_append: starting with one block");
+ logf(LOG_LOG,"isamh_append: starting with one block %d",pp->pos);
}
else
- { /* TODO: Read the last block (into what buffer?) */
+ {
+ logf(LOG_LOG,"isamh_append: starting with multiple blocks %d>%d>%d",
+ firstpp->pos,isamh_block(firstpp->next),isamh_block(firstpp->lastblock));
+ pp=isamh_pp_open(is,firstpp->lastblock);
+ /* dirty, but this can also read a N-block. Just clear extra values*/
+ pp->lastblock=0;
pp->offset=ISAMH_BLOCK_OFFSET_N;
- logf(LOG_LOG,"isamh_append: starting with multiple blocks");
} /* get last */
/* read pointers in it to synchronize the encoder ??!! */
+ r_clientData = (*is->method->code_start)(ISAMH_ENCODE);
+ logf(LOG_LOG,"isamh_append: scanning to end of block %d %d->%d",
+ pp->pos, pp->offset, pp->size);
codeptr=codebuffer;
- //while () {
- //}
+ while (pp->offset<pp->size) {
+ codeptr=codebuffer;
+ bufptr=pp->buf + pp->offset;
+ (*is->method->code_item)(ISAMH_DECODE, r_clientData, &codeptr, &bufptr);
+ codelen = bufptr - (pp->buf+pp->offset) ;
+ logf(LOG_LOG,"isamh_append: dec at %d %d/%d:%s",
+ pp->offset, codelen, codeptr-codebuffer,
+ hexdump(codebuffer,codeptr-codebuffer,0) );
+ pp->offset += codelen;
+ }
} /* existing block */
- r_clientData = (*is->method->code_start)(ISAMH_ENCODE);
i_item_ptr = i_item;
i_more = (*data->read_item)(data->clientData,&i_item_ptr,&i_mode);
maxsize = is->method->filecat[pp->cat].bsize;
while(i_more) {
- codeptr = codebuffer;
- i_item_ptr=i_item;
- (*is->method->code_item)(ISAMH_ENCODE, r_clientData, &codeptr, &i_item_ptr);
- codelen = codeptr-codebuffer;
+ if (i_mode)
+ { /* insert key, ignore all delete keys time being... */
+ codeptr = codebuffer;
+ i_item_ptr=i_item;
+ (*is->method->code_item)(ISAMH_ENCODE, r_clientData, &codeptr, &i_item_ptr);
+ codelen = codeptr-codebuffer;
- assert( (codelen < 128) && (codelen>0));
+ assert( (codelen < 128) && (codelen>0));
- logf(LOG_LOG,"isamh_append: coded into %d:%s",
- codelen,hexdump(codebuffer,codelen,0));
-
-
- if ( pp->offset + codelen > maxsize )
- {
- logf(LOG_LOG,"isamh_append: need new block: %d > %d ",
- pp->offset + codelen, maxsize );
- newcat = pp->cat; /* TODO - grow that block some day... */
- newblock = isamh_alloc_block(is,newcat);
- pp->next = newblock;
- if (firstpp!=pp)
- { /* not first block, write to disk already now */
- isamh_buildlaterblock(pp);
- isamh_write_block(is,pp->cat,pp->pos,pp->buf);
- //if (cat != newcat)
- // realloc buf !!!!
- }
- else
- { /* we had only one block, allocate a second buffer */
- pp = (ISAMH_PP) xmalloc (sizeof(*pp));
- assert(pp);
- *pp = *firstpp; /* copy most fields directly over */
- pp->buf = (char *) xmalloc (is->method->filecat[newcat].bsize);
- }
- pp->cat = newcat;
- pp->pos = newblock;
- pp->size=pp->offset=ISAMH_BLOCK_OFFSET_N ;
- pp->next=0;
- logf(LOG_LOG,"isamh_append: got a new block %d",pp->pos);
-
- /* reset the encoding, and code again */
- (*is->method->code_reset)(r_clientData);
- codeptr = codebuffer;
- i_item_ptr=i_item;
- (*is->method->code_item)(ISAMH_ENCODE, r_clientData, &codeptr, &i_item_ptr);
- codelen = codeptr-codebuffer;
- logf(LOG_LOG,"isamh_append: coded again %d:%s",
- codelen,hexdump(codebuffer,codelen,0));
-
- } /* new block needed */
-
- /* ok, now we can write it */
- memcpy(&(pp->buf[pp->offset]), codebuffer, codelen);
- pp->offset += codelen;
- pp->size += codelen;
- firstpp->numKeys++;
+ logf(LOG_LOG,"isamh_append: coded into %d:%s",
+ codelen,hexdump(codebuffer,codelen,0));
+
+ if ( pp->offset + codelen > maxsize )
+ {
+ newcat = pp->cat;
+ maxkeys = is->method->filecat[pp->cat].mblocks; /* max keys */
+ logf(LOG_LOG,"isamh_append: need new block: %d > %d (k:%d/%d)",
+ pp->offset + codelen, maxsize, firstpp->numKeys,maxkeys );
+ if ( (maxkeys>0) && (firstpp->numKeys > maxkeys) )
+ { /* time to increase block size */
+ newcat++;
+ maxsize = is->method->filecat[newcat].bsize;
+ pp->buf=xrealloc(pp->buf,maxsize);
+ logf(LOG_LOG,"isamh_append: increased to cat %d ",newcat);
+ }
- /* and try to read the next element */
+ newblock = isamh_alloc_block(is,newcat);
+ pp->next = isamh_addr(newblock,newcat);
+ if (firstpp!=pp)
+ { /* not first block, write to disk already now */
+ isamh_buildlaterblock(pp);
+ isamh_write_block(is,pp->cat,pp->pos,pp->buf);
+ //if (cat != newcat)
+ // realloc buf !!!!
+ }
+ else
+ { /* we had only one block, allocate a second buffer */
+ pp = isamh_pp_open(is,0);
+ }
+ pp->cat = newcat;
+ pp->pos = newblock;
+
+ pp->size=pp->offset=ISAMH_BLOCK_OFFSET_N ;
+ pp->next=0;
+ pp->lastblock=0;
+ logf(LOG_LOG,"isamh_append: got a new block %d:%d",pp->cat,pp->pos);
+
+ /* reset the encoding, and code again */
+ (*is->method->code_reset)(r_clientData);
+ codeptr = codebuffer;
+ i_item_ptr=i_item;
+ (*is->method->code_item)(ISAMH_ENCODE, r_clientData, &codeptr, &i_item_ptr);
+ codelen = codeptr-codebuffer;
+ logf(LOG_LOG,"isamh_append: coded again %d:%s",
+ codelen,hexdump(codebuffer,codelen,0));
+
+ } /* new block needed */
+
+ /* ok, now we can write it */
+ memcpy(&(pp->buf[pp->offset]), codebuffer, codelen);
+ pp->offset += codelen;
+ pp->size += codelen;
+ firstpp->numKeys++;
+ } /* not a delete */
+
+ /* try to read the next element */
i_item_ptr = i_item;
i_more = (*data->read_item)(data->clientData,&i_item_ptr,&i_mode);
logf(LOG_LOG,"isamh_append 2: m=%d l=%d %s",
i_mode, i_item_ptr-i_item, hexdump(i_item,i_item_ptr-i_item,0));
-
- }
+
+ } /* while */
/* Write the last (partial) block, if needed. */
if (pp!=firstpp)
}
/* update first block and write it */
- firstpp->lastblock = pp->pos;
+ firstpp->lastblock = isamh_addr(pp->pos,pp->cat);
isamh_buildfirstblock(firstpp);
isamh_write_block(is,firstpp->cat,firstpp->pos,firstpp->buf);
/* release the second block, if we allocated one */
if ( firstpp != pp )
- {
- xfree(pp->buf);
- xfree(pp);
- }
-
- retval = firstpp->pos*8 + firstpp->cat;
+ isamh_pp_close(pp);
+
+ /* get return value (before it disappears at close! */
+ retval = isamh_addr(firstpp->pos,firstpp->cat);
isamh_pp_close(firstpp);
} /* isamh_append */
-ISAMC_P test_isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data)
-/* test routines while fighting it */
-{
- /* ipos is always ==0, in my test, as I have no earlier base to insert */
- /* into. The key extractor calls this only once for each key to be inserted */
-
-
- ISAMH_PP pp;
- char f_item[128];
- char *f_item_ptr=f_item;
- int fmore=1;
-
- char i_item[128];
- char *i_item_ptr;
- int i_more=1, i_mode, i;
-
- pp = isamh_pp_open (is, ipos);
- logf (LOG_LOG, "isamh_append:scannig fmore loop (ipos=%d)",ipos);
- while (fmore)
- {
- f_item_ptr=f_item;
- fmore = isamh_read_item (pp,&f_item_ptr);
- logf (LOG_LOG, "isamh_append: fmore=%d len=%d",
- fmore, f_item_ptr-f_item);
- } /* while fmore */
-
- logf (LOG_LOG, "isamh_append:scannig imore loop");
-
- while(i_more) {
- i_item_ptr = i_item;
- i_more = (*data->read_item)(data->clientData,&i_item_ptr,&i_mode);
- logf(LOG_LOG,"isamh_append: mode=%d len=%d",i_mode, i_item_ptr-i_item);
- }
-
- isamh_pp_close(pp);
-} /* foo isamh_append */
-
-#ifdef SKIPOLDISAM
-
-ISAMC_P old_isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data)
-{
-
- char i_item[128], *i_item_ptr;
- int i_more, i_mode, i;
-
- ISAMH_PP pp;
- char f_item[128], *f_item_ptr;
- int f_more;
- int last_dirty = 0;
- int debug = is->method->debug;
-
- struct isc_merge_block mb[200];
-
- int firstpos = 0;
- int cat = 0;
- char r_item_buf[128]; /* temporary result output */
- char *r_buf; /* block with resulting data */
- int r_offset = 0; /* current offset in r_buf */
- int ptr = 0; /* pointer */
- void *r_clientData; /* encode client data */
- int border;
- int numKeys = 0;
-
- r_clientData = (*is->method->code_start)(ISAMH_ENCODE);
- r_buf = is->merge_buf + 128;
-
- pp = isamh_pp_open (is, ipos);
- /* read first item from file. make sure f_more indicates no boundary */
- f_item_ptr = f_item;
- f_more = isamh_read_item (pp, &f_item_ptr);
- if (f_more > 0)
- f_more = 1;
- cat = pp->cat;
-
- if (debug > 1)
- logf (LOG_LOG, "isc: isamh_append begin %d %d %d", cat, pp->pos, ipos);
-
- /* read first item from i */
- i_item_ptr = i_item;
- i_more = (*data->read_item)(data->clientData, &i_item_ptr, &i_mode);
-
- mb[ptr].block = pp->pos; /* is zero if no block on disk */
- mb[ptr].dirty = 0;
- mb[ptr].offset = 0;
-
- border = is->method->filecat[cat].bsize;
- /* border = get_border (is, mb, ptr, cat, firstpos); */
- while (i_more || f_more)
- {
- char *r_item = r_item_buf;
- int cmp;
-
- if (f_more > 1)
- {
- /* block to block boundary in the original file. */
- f_more = 1;
- if (cat == pp->cat)
- {
- /* the resulting output is of the same category as the
- the original
- */
-
-#ifdef SKIPTHIS /* should not happen when just appending new records */
- if (r_offset <= mb[ptr].offset +is->method->filecat[cat].mfill)
-#else
- if (0)
-#endif
- {
- /* the resulting output block is too small/empty. Delete
- the original (if any)
- */
- if (debug > 3)
- logf (LOG_LOG, "isc: release A");
- if (mb[ptr].block)
- isamh_release_block (is, pp->cat, mb[ptr].block);
- mb[ptr].block = pp->pos;
- if (!mb[ptr].dirty)
- mb[ptr].dirty = 1;
- if (ptr > 0)
- mb[ptr-1].dirty = 1;
- }
- else
- {
-
- /* indicate new boundary based on the original file */
- mb[++ptr].block = pp->pos;
- mb[ptr].dirty = last_dirty;
- mb[ptr].offset = r_offset;
- if (debug > 3)
- logf (LOG_LOG, "isc: bound ptr=%d,offset=%d",
- ptr, r_offset);
- if (cat==is->max_cat && ptr >= is->method->max_blocks_mem)
- {
- /* We are dealing with block(s) of max size. Block(s)
- except 1 will be flushed.
- */
- if (debug > 2)
- logf (LOG_LOG, "isc: flush A %d sections", ptr);
- flush_blocks ((ISAMC)is, mb, ptr-1, r_buf, &firstpos, cat,
- 0, &pp->numKeys);
- mb[0].block = mb[ptr-1].block;
- mb[0].dirty = mb[ptr-1].dirty;
- memcpy (r_buf, r_buf + mb[ptr-1].offset,
- mb[ptr].offset - mb[ptr-1].offset);
- mb[0].offset = 0;
-
- mb[1].block = mb[ptr].block;
- mb[1].dirty = mb[ptr].dirty;
- mb[1].offset = mb[ptr].offset - mb[ptr-1].offset;
- ptr = 1;
- r_offset = mb[ptr].offset;
- }
- }
- }
- /*border = get_border (is, mb, ptr, cat, firstpos);*/
- border = is->method->filecat[cat].bsize;
- }
- last_dirty = 0;
- if (!f_more)
- cmp = -1;
- else if (!i_more)
- cmp = 1;
- else
- cmp = (*is->method->compare_item)(i_item, f_item);
- if (cmp == 0) /* insert i=f */
- {
- if (!i_mode) /* delete item? */
- {
- /* move i */
- i_item_ptr = i_item;
- i_more = (*data->read_item)(data->clientData, &i_item_ptr,
- &i_mode);
- /* is next input item the same as current except
- for the delete flag? */
- cmp = (*is->method->compare_item)(i_item, f_item);
- if (!cmp && i_mode) /* delete/insert nop? */
- {
- /* yes! insert as if it was an insert only */
- memcpy (r_item, i_item, i_item_ptr - i_item);
- i_item_ptr = i_item;
- i_more = (*data->read_item)(data->clientData, &i_item_ptr,
- &i_mode);
- }
- else
- {
- /* no! delete the item */
- r_item = NULL;
- last_dirty = 1;
- mb[ptr].dirty = 2;
- }
- }
- else
- {
- memcpy (r_item, f_item, f_item_ptr - f_item);
-
- /* move i */
- i_item_ptr = i_item;
- i_more = (*data->read_item)(data->clientData, &i_item_ptr,
- &i_mode);
- }
- /* move f */
- f_item_ptr = f_item;
- f_more = isamh_read_item (pp, &f_item_ptr);
- }
- else if (cmp > 0) /* insert f */
- {
- memcpy (r_item, f_item, f_item_ptr - f_item);
- /* move f */
- f_item_ptr = f_item;
- f_more = isamh_read_item (pp, &f_item_ptr);
- }
- else /* insert i */
- {
- if (!i_mode) /* delete item which isn't there? */
- {
- logf (LOG_FATAL, "Inconsistent register at offset %d",
- r_offset);
- abort ();
- }
- memcpy (r_item, i_item, i_item_ptr - i_item);
- mb[ptr].dirty = 2;
- last_dirty = 1;
- /* move i */
- i_item_ptr = i_item;
- i_more = (*data->read_item)(data->clientData, &i_item_ptr,
- &i_mode);
- }
- if (r_item) /* insert resulting item? */
- {
- char *r_out_ptr = r_buf + r_offset;
- int new_offset;
-
- (*is->method->code_item)(ISAMH_ENCODE, r_clientData,
- &r_out_ptr, &r_item);
- new_offset = r_out_ptr - r_buf;
-
- numKeys++;
-
- if (border < new_offset && border >= r_offset)
- {
- if (debug > 2)
- logf (LOG_LOG, "isc: border %d %d", ptr, border);
- /* Max size of current block category reached ...
- make new virtual block entry */
- mb[++ptr].block = 0;
- mb[ptr].dirty = 1;
- mb[ptr].offset = r_offset;
- if (cat == is->max_cat && ptr >= is->method->max_blocks_mem)
- {
- /* We are dealing with block(s) of max size. Block(s)
- except one will be flushed. Note: the block(s) are
- surely not the last one(s).
- */
- if (debug > 2)
- logf (LOG_LOG, "isc: flush B %d sections", ptr-1);
- flush_blocks ((ISAMC)is, mb, ptr-1, r_buf, &firstpos, cat,
- 0, &pp->numKeys);
- mb[0].block = mb[ptr-1].block;
- mb[0].dirty = mb[ptr-1].dirty;
- memcpy (r_buf, r_buf + mb[ptr-1].offset,
- mb[ptr].offset - mb[ptr-1].offset);
- mb[0].offset = 0;
-
- mb[1].block = mb[ptr].block;
- mb[1].dirty = mb[0].dirty;
- mb[1].offset = mb[ptr].offset - mb[ptr-1].offset;
- memcpy (r_buf + mb[1].offset, r_buf + r_offset,
- new_offset - r_offset);
- new_offset = (new_offset - r_offset) + mb[1].offset;
- ptr = 1;
- }
- border = is->method->filecat[cat].bsize;
- /* get_border (is, mb, ptr, cat, firstpos); */
- }
- r_offset = new_offset;
- }
-#ifdef SKIPTHIS /* categories are handled differently in isamH */
- /* to be implemented later... */
-
- if (cat < is->max_cat && ptr >= is->method->filecat[cat].mblocks)
- {
- /* Max number blocks in current category reached ->
- must switch to next category (with larger block size)
- */
- int j = 0;
-
- (is->files[cat].no_remap)++;
- /* delete all original block(s) read so far */
- for (i = 0; i < ptr; i++)
- if (mb[i].block)
- isamh_release_block (is, pp->cat, mb[i].block);
- /* also delete all block to be read in the future */
- pp->deleteFlag = 1;
-
- /* remap block offsets */
- assert (mb[j].offset == 0);
- cat++;
- mb[j].dirty = 1;
- mb[j].block = 0;
- mb[ptr].offset = r_offset;
- for (i = 1; i < ptr; i++)
- {
- int border = is->method->filecat[cat].ifill -
- ISAMH_BLOCK_OFFSET_1 + mb[j].offset;
- if (debug > 3)
- logf (LOG_LOG, "isc: remap %d border=%d", i, border);
- if (mb[i+1].offset > border && mb[i].offset <= border)
- {
- if (debug > 3)
- logf (LOG_LOG, "isc: to %d %d", j, mb[i].offset);
- mb[++j].dirty = 1;
- mb[j].block = 0;
- mb[j].offset = mb[i].offset;
- }
- }
- if (debug > 2)
- logf (LOG_LOG, "isc: remap from %d to %d sections to cat %d",
- ptr, j, cat);
- ptr = j;
- border = is->method->filecat[cat].bsize;
- /*border = get_border (is, mb, ptr, cat, firstpos);*/
- if (debug > 3)
- logf (LOG_LOG, "isc: border=%d r_offset=%d", border, r_offset);
- }
-#endif /* skipthis */
-
- }
- if (mb[ptr].offset < r_offset)
- { /* make the final boundary offset */
- mb[++ptr].dirty = 1;
- mb[ptr].block = 0;
- mb[ptr].offset = r_offset;
- }
- else
- { /* empty output. Release last block if any */
- if (cat == pp->cat && mb[ptr].block)
- {
- if (debug > 3)
- logf (LOG_LOG, "isc: release C");
- isamh_release_block (is, pp->cat, mb[ptr].block);
- mb[ptr].block = 0;
- if (ptr > 0)
- mb[ptr-1].dirty = 1;
- }
- }
-
- if (debug > 2)
- logf (LOG_LOG, "isc: flush C, %d sections", ptr);
-
- if (firstpos)
- {
- /* we have to patch initial block with num keys if that
- has changed */
- if (numKeys != isamh_pp_num (pp))
- {
- if (debug > 2)
- logf (LOG_LOG, "isc: patch num keys firstpos=%d num=%d",
- firstpos, numKeys);
- bf_write (is->files[cat].bf, firstpos, ISAMH_BLOCK_OFFSET_N,
- sizeof(numKeys), &numKeys);
- }
- }
- else if (ptr > 0)
- { /* we haven't flushed initial block yet and there surely are some
- blocks to flush. Make first block dirty if numKeys differ */
- if (numKeys != isamh_pp_num (pp))
- mb[0].dirty = 1;
- }
- /* flush rest of block(s) in r_buf */
- flush_blocks ((ISAMC)is, mb, ptr, r_buf, &firstpos, cat, 1, &numKeys);
-
- (*is->method->code_stop)(ISAMH_ENCODE, r_clientData);
- if (!firstpos)
- cat = 0;
- if (debug > 1)
- logf (LOG_LOG, "isc: isamh_append return %d %d", cat, firstpos);
- isamh_pp_close (pp);
- return cat + firstpos * 8;
-}
-#endif /* SKIPOLDISAM */
/*
* $Log: merge.c,v $
- * Revision 1.13 1999-07-06 09:37:05 heikki
+ * Revision 1.14 1999-07-06 16:30:20 heikki
+ * IsamH startss to work - at least it builds indexes. Can not search yet...
+ *
+ * Revision 1.13 1999/07/06 09:37:05 heikki
* Working on isamh - not ready yet.
*
* Revision 1.12 1999/06/30 15:03:55 heikki