From b8e1425a7f78791afbf45dc11acd82808e455c65 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Fri, 12 Jul 2002 18:12:21 +0000 Subject: [PATCH] Isam-D now stores small entries directly in the dictionary. Needs more tuning and cleaning... --- include/isamd.h | 25 ++++++++--- include/rsisamd.h | 6 ++- index/invstat.c | 33 +++++++++----- index/kinput.c | 54 ++++++++++++++++------- index/trunc.c | 10 +++-- isamc/isamd.c | 77 +++++++++++++++++++++------------ isamc/merge-d.c | 125 +++++++++++++++++++++++++++++++++++------------------ rset/rsisamd.c | 18 +++++--- 8 files changed, 235 insertions(+), 113 deletions(-) diff --git a/include/isamd.h b/include/isamd.h index 3228ad3..158d553 100644 --- a/include/isamd.h +++ b/include/isamd.h @@ -1,10 +1,13 @@ /* - * Copyright (c) 1995-2000, Index Data. + * Copyright (c) 1995-2002, Index Data. * See the file LICENSE for details. * - * IsamH is a simple ISAM that can only append to the end of the list. - * It will need a clean-up process occasionally... Code stolen from - * isamc... + * Isam-D, Heikki's second Isam. + * The first block of an isam entry contains deltas to the + * (possibly long) sequence of keys. Thus small changes consist + * only of adding deltas to a small list, and affect only one + * block. Occasionally the block gets full, and needs to be + * merged with the rest. * * Heikki Levanto * @@ -53,11 +56,19 @@ ISAMD_M isamd_getmethod (ISAMD_M me); ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M method); int isamd_close (ISAMD is); -ISAMD_P isamd_append (ISAMD is, ISAMD_P pos, ISAMD_I data); - /* corresponds to isc_merge */ +/*ISAMD_P isamd_append (ISAMD is, ISAMD_P pos, ISAMD_I data);*/ +int isamd_append (ISAMD is, char *dictentry, int dictlen, ISAMD_I data); + -ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P pos); +/* Shortcut: If the isam is relatively short, we store the */ +/* whole thing in the dictionary, and allocate no blocks at all! */ +#define ISAMD_MAX_DICT_LEN 16 + +/*ISAMD_PP isamd_pp_open (ISAMD is, const char *dictbuf);*/ +ISAMD_PP isamd_pp_open (ISAMD is, const char *dictbuf, int dictlen); +ISAMD_PP isamd_pp_create (ISAMD is, int cat); + void isamd_pp_close (ISAMD_PP pp); int isamd_read_item (ISAMD_PP pp, char **dst); int isamd_read_main_item (ISAMD_PP pp, char **dst); diff --git a/include/rsisamd.h b/include/rsisamd.h index 75cb2c1..990aaed 100644 --- a/include/rsisamd.h +++ b/include/rsisamd.h @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss, * - * $Id: rsisamd.h,v 1.1 2001-01-16 19:17:54 heikki Exp $ + * $Id: rsisamd.h,v 1.2 2002-07-12 18:12:22 heikki Exp $ */ #ifndef RSET_ISAMD_H @@ -21,7 +21,9 @@ extern const struct rset_control *rset_kind_isamd; typedef struct rset_isamd_parms { ISAMD is; - ISAMD_P pos; +/* ISAMD_P pos; */ + char dictentry[ISAMD_MAX_DICT_LEN+1]; + int dictlen; RSET_TERM rset_term; } rset_isamd_parms; diff --git a/index/invstat.c b/index/invstat.c index 27636ae..69973eb 100644 --- a/index/invstat.c +++ b/index/invstat.c @@ -68,9 +68,11 @@ static int inv_stat_handle (char *name, const char *info, int pos, stat_info->no_dict_entries++; stat_info->no_dict_bytes += strlen(name); - assert (*info == sizeof(ISAMS_P)); - memcpy (&isam_p, info+1, sizeof(ISAMS_P)); - + if (!stat_info->zh->reg->isamd) + { + assert (*info == sizeof(ISAMS_P)); + memcpy (&isam_p, info+1, sizeof(ISAMS_P)); + } if (stat_info->zh->reg->isams) { @@ -136,8 +138,9 @@ static int inv_stat_handle (char *name, const char *info, int pos, ISAMD_PP pp; int occurx = 0; struct it_key key; - - pp = isamd_pp_open (stat_info->zh->reg->isamd, isam_p); + /* printf("[%d: %d %d %d %d %d %d] ", */ + /* info[0], info[1], info[2], info[3], info[4], info[5], info[7]);*/ + pp = isamd_pp_open (stat_info->zh->reg->isamd, info+1, info[0]); occur = isamd_pp_num (pp); while (isamd_pp_read(pp, &key)) @@ -145,6 +148,7 @@ static int inv_stat_handle (char *name, const char *info, int pos, stat_info->cksum = stat_info->cksum * 65509 + key.sysno + 11 * key.seqno; occurx++; + /* printf("%d.%d ", key.sysno, key.seqno); */ /*!*/ if (-1==firstsys) { firstseq=key.seqno; @@ -158,17 +162,20 @@ static int inv_stat_handle (char *name, const char *info, int pos, key.sysno, key.seqno, occur,occurx, pp->offset); } + /* printf("\n"); */ /*!*/ +#ifdef SKIPTHIS if ( pp->is->method->debug >7 ) logf(LOG_LOG,"item %d=%d:%d says %d keys, counted %d", isam_p, isamd_type(isam_p), isamd_block(isam_p), occur, occurx); +#endif if (occurx != occur) logf(LOG_LOG,"Count error!!! read %d, counted %d", occur, occurx); assert (occurx == occur); - if ( is_singleton(isam_p) ) - stat_info->no_isam_entries[SINGLETON_TYPE] += occur; - else - stat_info->no_isam_entries[isamd_type(isam_p)] += occur; + i = pp->cat; + if (info[1]) + i=SINGLETON_TYPE; + stat_info->no_isam_entries[i] += occur; isamd_pp_close (pp); } if (stat_info->zh->reg->isamb) @@ -203,7 +210,7 @@ static int inv_stat_handle (char *name, const char *info, int pos, stat_info->isamb_levels[cat][level] ++; stat_info->no_isam_entries[cat] += occur; } - + i=0; while (occur > stat_info->isam_bounds[i] && stat_info->isam_bounds[i]) i++; ++(stat_info->isam_occurrences[i]); @@ -376,7 +383,11 @@ void zebra_register_statistics (ZebraHandle zh, int dumpdict) /* * * $Log: invstat.c,v $ - * Revision 1.31 2002-07-11 16:16:00 heikki + * Revision 1.32 2002-07-12 18:12:22 heikki + * Isam-D now stores small entries directly in the dictionary. + * Needs more tuning and cleaning... + * + * Revision 1.31 2002/07/11 16:16:00 heikki * Fixed a bug in isamd, failed to store a single key when its bits * did not fit into a singleton. * diff --git a/index/kinput.c b/index/kinput.c index 01a074c..9b7b8b2 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto * - * $Id: kinput.c,v 1.50 2002-04-23 13:39:10 adam Exp $ + * $Id: kinput.c,v 1.51 2002-07-12 18:12:22 heikki Exp $ * * Bugs * - Allocates a lot of memory for the merge process, but never releases it. @@ -528,35 +528,59 @@ int heap_inpd (struct heap_info *hi) while (hci.more) { char this_name[INP_NAME_MAX]; - ISAMD_P isamd_p, isamd_p2; char *dict_info; + char dictentry[ISAMD_MAX_DICT_LEN+1]; + char dictlen; strcpy (this_name, hci.cur_name); + + /* print_dict_item (hi->reg->zebra_maps, hci.cur_name); */ + /*!*/ /* FIXME: depend on isamd-debug */ + assert (hci.cur_name[1]); hi->no_diffs++; if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { - memcpy (&isamd_p, dict_info+1, sizeof(ISAMD_P)); - isamd_p2 = isamd_append (hi->reg->isamd, isamd_p, isamd_i); - if (!isamd_p2) + dictlen=dict_info[0]; + memcpy (dictentry, dict_info+1, dictlen ); +#ifdef SKIPTHIS + logf(LOG_LOG,"dictentry before. len=%d: %d %d %d %d %d %d %d %d %d", + dictlen,dictentry[0], dictentry[1], dictentry[2], + dictentry[3], dictentry[4], dictentry[5], + dictentry[6], dictentry[7], dictentry[8]); /*!*/ +#endif + dictlen= isamd_append(hi->reg->isamd, dictentry, dictlen, isamd_i); + /* logf dictentry after */ + if (dictlen) { - hi->no_deletions++; - if (!dict_delete (hi->reg->dict, this_name)) - abort(); + hi->no_updates++; + if ( (dictlen!=dict_info[0]) || + (0!=memcmp(dictentry, dict_info+1, dictlen)) ) + { + dict_insert(hi->reg->dict, this_name, + dictlen,dictentry); + } } - else + else { - hi->no_updates++; - if (isamd_p2 != isamd_p) - dict_insert (hi->reg->dict, this_name, - sizeof(ISAMD_P), &isamd_p2); + hi->no_deletions++; + if (!dict_delete (hi->reg->dict, this_name)) + { + logf (LOG_FATAL, "dict_delete failed"); + abort(); + } } } else { - isamd_p = isamd_append (hi->reg->isamd, 0, isamd_i); + dictlen=0; + memset (dictentry, '\0', ISAMD_MAX_DICT_LEN); + dictlen= isamd_append(hi->reg->isamd, dictentry, dictlen, isamd_i); + /* logf dictentry first */ hi->no_insertions++; - dict_insert (hi->reg->dict, this_name, sizeof(ISAMD_P), &isamd_p); + if (dictlen) + dict_insert(hi->reg->dict, this_name, + dictlen,dictentry); } } xfree (isamd_i); diff --git a/index/trunc.c b/index/trunc.c index 8f4a42b..8d97ee0 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto * - * $Id: trunc.c,v 1.24 2002-04-16 22:31:42 adam Exp $ + * $Id: trunc.c,v 1.25 2002-07-12 18:12:22 heikki Exp $ * */ #include @@ -319,7 +319,9 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, key_compare_it); for (i = to-from; --i >= 0; ) { - ispt[i] = isamd_pp_open (zi->reg->isamd, isam_p[from+i]); + logf(LOG_FATAL, "isam_d does not (currently) support truncs"); + abort(); + /*ispt[i] = isamd_pp_open (zi->reg->isamd, isam_p[from+i]); */ if (isamd_pp_read (ispt[i], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, i); else @@ -578,7 +580,9 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, { rset_isamd_parms parms; - parms.pos = *isam_p; + logf(LOG_FATAL, "isam_d does not (currently) support truncs"); + abort(); + /* parms.pos = *isam_p; */ parms.is = zi->reg->isamd; parms.rset_term = rset_term_create (term, length, flags); return rset_create (rset_kind_isamd, &parms); diff --git a/isamc/isamd.c b/isamc/isamd.c index f06cda0..46b22ba 100644 --- a/isamc/isamd.c +++ b/isamc/isamd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 1995-1998, Index Data. * See the file LICENSE for details. - * $Id: isamd.c,v 1.21 2002-07-11 16:16:00 heikki Exp $ + * $Id: isamd.c,v 1.22 2002-07-12 18:12:21 heikki Exp $ * * Isamd - isam with diffs * Programmed by: Heikki Levanto @@ -557,9 +557,35 @@ void isamd_pp_close (ISAMD_PP pp) } +ISAMD_PP isamd_pp_create (ISAMD is, int cat) +/* creates a pp_buff without data in it. pos=0, cat as given */ +{ + ISAMD_PP pp = (ISAMD_PP) xmalloc (sizeof(*pp)); + int sz = is->method->filecat[is->max_cat].bsize; + + pp->numKeys = 0; + pp->buf = (char *) xmalloc (sz); + memset(pp->buf,'\0',sz); /* clear the buffer, for new blocks */ + + pp->next = 0; + pp->size = 0; + pp->offset = 0; + pp->is = is; + pp->diffs=0; + pp->diffbuf=0; + pp->diffinfo=0; + pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); + pp->cat = cat; + pp->pos = 0; + is->no_op_new++; + return pp; + +} -ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) + +ISAMD_PP isamd_pp_open (ISAMD is, const char *dictbuf, int dictlen) { + ISAMD_P ipos; ISAMD_PP pp = (ISAMD_PP) xmalloc (sizeof(*pp)); char *src; int sz = is->method->filecat[is->max_cat].bsize; @@ -568,6 +594,7 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) char *c_ptr; /* for fake encoding the singlekey */ char *i_ptr; int ofs; + int dictnum; pp->numKeys = 0; src = pp->buf = (char *) xmalloc (sz); @@ -582,36 +609,24 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) pp->diffinfo=0; pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); - if ( is_singleton(ipos) ) + dictnum=*dictbuf; // numkeys for internals, 0 for externals + + if (0==dictnum) + { + memcpy(&ipos, dictbuf+1, sizeof(ISAMD_P) ); + } + else /* dictionary block, fake a real one */ { pp->cat=0; pp->pos=0; if (is->method->debug > 5) - logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d", - pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, - pp->next, isamd_type(pp->next), isamd_block(pp->next) ); - singleton_decode(ipos, &singlekey ); - pp->offset=ISAMD_BLOCK_OFFSET_1; - pp->numKeys = 1; - ofs=pp->offset+sizeof(int); /* reserve length of diffsegment */ - singlekey.seqno = singlekey.seqno * 2 + 1; /* make an insert diff */ - c_ptr=&(pp->buf[ofs]); - i_ptr=(char*)(&singlekey); - (*is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData, - &c_ptr, &i_ptr); - (*is->method->code_reset)(pp->decodeClientData); - ofs += c_ptr-&(pp->buf[ofs]); - memcpy( &(pp->buf[pp->offset]), &ofs, sizeof(int) ); - /* since we memset buf earlier, we already have a zero endmark! */ - pp->size = ofs; - if (is->method->debug > 5) - logf (LOG_LOG, "isamd_pp_open single %d=%x: %d.%d sz=%d", - ipos,ipos, - singlekey.sysno, singlekey.seqno/2, - pp->size ); + logf (LOG_LOG, "isamd_pp_open dict"); + pp->numKeys=(unsigned char) dictbuf[0]; + memcpy(pp->buf+ISAMD_BLOCK_OFFSET_1, dictbuf+1,dictlen-1); + pp->size=pp->offset=dictlen+ISAMD_BLOCK_OFFSET_1-1; is->no_op_single++; return pp; - } /* singleton */ + } /* dict block */ pp->cat = isamd_type(ipos); pp->pos = isamd_block(ipos); @@ -787,6 +802,8 @@ static char *hexdump(unsigned char *p, int len, char *buff) { } +#ifdef SKIPTHIS + /* needs different arguments, or something */ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) { ISAMD_PP pp; @@ -849,9 +866,15 @@ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) is->method->debug=olddebug; } /* dump */ +#endif + /* * $Log: isamd.c,v $ - * Revision 1.21 2002-07-11 16:16:00 heikki + * Revision 1.22 2002-07-12 18:12:21 heikki + * Isam-D now stores small entries directly in the dictionary. + * Needs more tuning and cleaning... + * + * Revision 1.21 2002/07/11 16:16:00 heikki * Fixed a bug in isamd, failed to store a single key when its bits * did not fit into a singleton. * diff --git a/isamc/merge-d.c b/isamc/merge-d.c index ba10b9e..f1494ee 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.26 2002-07-11 16:16:00 heikki Exp $ + * $Id: merge-d.c,v 1.27 2002-07-12 18:12:21 heikki Exp $ * * bugs * sinleton-bit has to be in the high end, not low, so as not to confuse @@ -262,6 +262,7 @@ static void filter_backfill(FILTER F, struct it_key *k, int mode) int is_singleton(ISAMD_P ipos) { + return 0; /* no singletons any more */ return ( ipos != 0 ) && ( ipos & SINGLETON_BIT ); } @@ -269,6 +270,7 @@ int is_singleton(ISAMD_P ipos) int singleton_encode(struct it_key *k) /* encodes the key into one int. If it does not fit, returns 0 */ { + return 0; /* no more singletons */ if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno ) return 0; /* no room dor sysno */ if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno ) @@ -784,8 +786,9 @@ int isamd_read_item (ISAMD_PP pp, char **dst) static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ struct it_key *p_key, /* the data item that didn't fit*/ - /* ISAMD_I data) */ /* more input data comes here */ - FILTER filt) /* more input data arriving here */ + FILTER filt, /* more input data arriving here */ + char *dictentry, /* the thin in the dictionary */ + int dictlen) /* and its size */ { int diffidx; int killblk=0; @@ -794,7 +797,7 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ int r_more = 1; ISAMD_PP pp; ISAMD_PP readpp=firstpp; - int retval=0; + int retpos=0; int diffcat = firstpp->cat; /* keep the category of the diffblock even */ /* if it is going to be empty now. */ /* Alternative: Make it the minimal, and */ @@ -825,7 +828,6 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ r_ptr= (char *) &r_key; -/* r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data); */ r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, filt); if (!r_more) { /* oops, all data has been deleted! what to do??? */ @@ -840,13 +842,14 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ /* set up the new blocks for simple writing */ - firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); + /* firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); */ + firstpp=isamd_pp_create(readpp->is, diffcat); firstpp->pos=isamd_alloc_block(firstpp->is,diffcat); if (readpp->is->method->debug >3) logf(LOG_LOG,"isamd_merge: allocated new firstpp %d=%d:%d", isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos ); - pp=isamd_pp_open(readpp->is,isamd_addr(0,readpp->is->max_cat) ); + pp=isamd_pp_create(readpp->is,readpp->is->max_cat ); pp->offset=pp->size=ISAMD_BLOCK_OFFSET_N; while (r_more) @@ -899,10 +902,17 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; /* nothing there */ memset(firstpp->buf,'\0',firstpp->is->method->filecat[firstpp->cat].bsize); save_first_pp(firstpp); - retval = isamd_addr(firstpp->pos, firstpp->cat); + retpos = isamd_addr(firstpp->pos, firstpp->cat); isamd_pp_close(firstpp); - return retval; + /* Create the dict entry */ + /*!*/ /* it could be this could go in the dict as well, if there's */ + /* been really many deletes. Somehow I suspect that is not the */ + /* case. FIXME: Collect statistics and see if needed */ + dictentry[0]=0; /* mark as a real isam */ + memcpy(dictentry+1, &retpos, sizeof(ISAMD_P)); + dictlen=sizeof(ISAMD_P)+1; + return dictlen; } /* merge */ @@ -917,10 +927,10 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ static int append_diffs( ISAMD is, - ISAMD_P ipos, - /*ISAMD_I data)*/ + char *dictentry, int dictlen, FILTER filt) { + ISAMD_P ipos; struct it_key i_key; /* one input item */ char *i_item = (char *) &i_key; /* same as chars */ char *i_ptr=i_item; @@ -936,26 +946,31 @@ static int append_diffs( char *c_ptr = codebuff; int codelen; int merge_rc; - int retval=0; + ISAMD_P retpos; + int dsize; - if (0==ipos) + if (0==dictlen) { - firstpp=isamd_pp_open(is, isamd_addr(0,0) ); + firstpp=isamd_pp_create(is, 0 ); firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1; /* create in smallest category, will expand later */ ++(is->no_fbuilds); } else { - firstpp=isamd_pp_open(is, ipos); + firstpp=isamd_pp_open(is, dictentry, dictlen); + if (dictentry[0] ) + ipos=0; + else + memcpy(&ipos,dictentry+1,sizeof(ISAMD_P)); ++(is->no_appds); } if (is->method->debug >2) - logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d", + logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d sz=%d", ipos, isamd_type(ipos), isamd_block(ipos), firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next), - firstpp->numKeys); + firstpp->numKeys, firstpp->size); maxsize = is->method->filecat[firstpp->cat].bsize; difflenidx = diffidx = firstpp->size; @@ -1013,7 +1028,7 @@ static int append_diffs( if (is->method->debug >9) //!!!!! logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d", i_mode, i_key.sysno, i_key.seqno); - merge_rc = merge (firstpp, &i_key, filt); + merge_rc = merge (firstpp, &i_key, filt, dictentry, dictlen); if (0!=merge_rc) return merge_rc; /* merge handled them all ! */ assert(!"merge returned zero ??"); @@ -1061,13 +1076,40 @@ static int append_diffs( while ( (difflenidx-diffidx<=sizeof(int)+1) && (difflenidxbuf[difflenidx++]='\0'; - if (0==firstpp->pos) /* need to (re)alloc the block */ - firstpp->pos = isamd_alloc_block(is, firstpp->cat); + if (firstpp->numKeys==0) + { + /* FIXME: Release blocks that may be allocated !!! */ + return 0; /* don't bother storing this! */ + } - retval = save_first_pp( firstpp ); - isamd_pp_close(firstpp); + dsize=diffidx-ISAMD_BLOCK_OFFSET_1; + /* logf(LOG_LOG,"!! nxt=%d diffidx=%d ds=%d", + firstpp->next, diffidx, dsize); */ + + if ( (0==firstpp->next) && (dsize numKeys < 128); + assert(firstpp->numKeys >0); + /* actually, 255 is good enough, but sign mismatches... */ + /* in real life, 4-5 is as much as we can hope for, as long */ + /* as ISAMD_MAX_DICT_LEN is reasonably small (8) */ + dictentry[0]=firstpp->numKeys; + memcpy(dictentry+1, firstpp->buf+ISAMD_BLOCK_OFFSET_1, dsize); + dictlen=dsize+1; + } + else + { + if (0==firstpp->pos) /* need to (re)alloc the block */ + firstpp->pos = isamd_alloc_block(is, firstpp->cat); + retpos = save_first_pp( firstpp ); + isamd_pp_close(firstpp); + dictentry[0]=0; /* mark as a real isam */ + memcpy(dictentry+1, &retpos, sizeof(ISAMD_P)); + dictlen=sizeof(ISAMD_P)+1; + } - return retval; + return dictlen; } /* append_diffs */ @@ -1077,24 +1119,23 @@ static int append_diffs( * isamd_append itself *************************************************************/ -ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) +int isamd_append (ISAMD is, char *dictentry, int dictlen, ISAMD_I data) +/*ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) */ { FILTER F = filter_open(is,data); - ISAMD_P rc=0; + int newlen=0; - int olddebug= is->method->debug; - if (ipos == 7320) - is->method->debug = 99; /*!*/ - if ( filter_isempty(F) ) /* can be, if del-ins of the same */ { if (is->method->debug >3) - logf(LOG_LOG,"isamd_appd: nothing to do for %d=",ipos); + logf(LOG_LOG,"isamd_appd: nothing to do "); filter_close(F); ++(is->no_non); - return ipos; /* without doing anything at all */ + return dictlen; /* without doing anything at all */ } +#ifdef SKIPTHIS + /* The old way to handle singletons */ if ( ( 0==ipos) && filter_only_one(F) ) { struct it_key k; @@ -1115,20 +1156,14 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) is->no_singles++; assert ( (rc==0) || is_singleton(rc) ); } - if ( 0==rc) /* either not single, or it did not fit */ - { - rc = append_diffs(is,ipos,F); - assert ( ! is_singleton(rc) ); - /* can happen if we run out of bits, so that block numbers overflow */ - /* to SINGLETON_BIT */ - } + newlen = append_diffs(is,ipos,F); +#endif + newlen = append_diffs(is,dictentry,dictlen,F); filter_close(F); if (is->method->debug >2) - logf(LOG_LOG,"isamd_appd: ret %d=%x (%d=%x)", - rc,rc,ipos,ipos); - is->method->debug=olddebug; /*!*/ - return rc; + logf(LOG_LOG,"isamd_appd: ret len=%d ", newlen); + return newlen; } /* isamd_append */ @@ -1139,7 +1174,11 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.26 2002-07-11 16:16:00 heikki + * Revision 1.27 2002-07-12 18:12:21 heikki + * Isam-D now stores small entries directly in the dictionary. + * Needs more tuning and cleaning... + * + * Revision 1.26 2002/07/11 16:16:00 heikki * Fixed a bug in isamd, failed to store a single key when its bits * did not fit into a singleton. * diff --git a/rset/rsisamd.c b/rset/rsisamd.c index a66c5b1..b0e06ca 100644 --- a/rset/rsisamd.c +++ b/rset/rsisamd.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rsisamd.c,v $ - * Revision 1.2 2002-04-05 08:46:26 adam + * Revision 1.3 2002-07-12 18:12:22 heikki + * Isam-D now stores small entries directly in the dictionary. + * Needs more tuning and cleaning... + * + * Revision 1.2 2002/04/05 08:46:26 adam * Zebra with full functionality * * Revision 1.1 2001/01/16 19:17:18 heikki @@ -50,8 +54,10 @@ struct rset_pp_info { }; struct rset_isamd_info { - ISAMD is; - ISAMD_P pos; + ISAMD is; + /* ISAMD_P pos; */ + char dictentry[ISAMD_MAX_DICT_LEN]; + int dictlen; struct rset_pp_info *ispt_list; }; @@ -63,7 +69,9 @@ static void *r_create(RSET ct, const struct rset_control *sel, void *parms) ct->flags |= RSET_FLAG_VOLATILE; info = (struct rset_isamd_info *) xmalloc (sizeof(*info)); info->is = pt->is; - info->pos = pt->pos; + /*info->pos = pt->pos;*/ + info->dictlen = pt->dictlen; + memcpy(info->dictentry, pt->dictentry, pt->dictlen); info->ispt_list = NULL; ct->no_rset_terms = 1; ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms)); @@ -85,7 +93,7 @@ RSFD r_open (RSET ct, int flag) ptinfo = (struct rset_pp_info *) xmalloc (sizeof(*ptinfo)); ptinfo->next = info->ispt_list; info->ispt_list = ptinfo; - ptinfo->pt = isamd_pp_open (info->is, info->pos); + ptinfo->pt = isamd_pp_open (info->is, info->dictentry, info->dictlen); ptinfo->info = info; if (ct->rset_terms[0]->nn < 0) ct->rset_terms[0]->nn = isamd_pp_num (ptinfo->pt); -- 1.7.10.4