From: Mike Taylor Date: Thu, 8 Oct 2009 20:22:32 +0000 (+0100) Subject: Merge branch 'master' of ssh://git.indexdata.com/home/git/pub/idzebra X-Git-Tag: v2.0.44~12 X-Git-Url: http://jsfdemo.indexdata.com/?a=commitdiff_plain;h=d50b29c14d00e25ad1a85dfb2b7571326324a22e;hp=7d746ecdedc82d5656c74900cf8310433183e25c;p=idzebra-moved-to-github.git Merge branch 'master' of ssh://git.indexdata.com/home/git/pub/idzebra --- diff --git a/NEWS b/NEWS index c18295b..b552f20 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,14 @@ +--- 2.0.42 2009/10/08 + +Proximity modifications. The optimized proximity now deals with more +cases, ie all prox of the form @prox 0 d 1 1,2,3 k 2 .. +That's exlusive==0, any distance, ordered (true), relation (<, =, <=), +known unit word. + +Fix bug WRT inconsistent registers, bug #3062. + +Fix install doc rule so it's compatible with Automake 1.11. + --- 2.0.41 2009/08/24 Fix typos and other errors in the documentation. Thanks to Galen diff --git a/configure.ac b/configure.ac index 8cf0f1e..0a8d6fb 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ dnl This file is part of the Zebra server. dnl Copyright (C) 1994-2009 Index Data dnl AC_PREREQ(2.60) -AC_INIT([idzebra],[2.0.41],[zebra-help@indexdata.dk]) +AC_INIT([idzebra],[2.0.42],[zebra-help@indexdata.dk]) AC_CONFIG_SRCDIR(configure.ac) AC_CONFIG_AUX_DIR(config) AM_INIT_AUTOMAKE([1.9]) diff --git a/debian/changelog b/debian/changelog index 532f693..f68d301 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +idzebra (2.0.42-1indexdata) unstable; urgency=low + + * Upstream. + + -- Adam Dickmeiss Thu, 08 Oct 2009 09:36:43 +0200 + idzebra (2.0.41-1indexdata) unstable; urgency=low * Upstream. diff --git a/index/kinput.c b/index/kinput.c index f5afa44..de1d563 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -185,7 +185,7 @@ int key_file_read(struct key_file *f, char *key) key[i++] = c; while ((c = key_file_getc(f))) { - if (i <= IT_MAX_WORD) + if (i < INP_NAME_MAX-2) key[i++] = c; } key[i++] = '\0'; @@ -360,7 +360,7 @@ static int heap_read_one(struct heap_info *hi, char *name, char *key) /* for debugging only */ void zebra_log_dict_entry(ZebraHandle zh, const char *s) { - char dst[IT_MAX_WORD+1]; + char dst[INP_NAME_MAX+1]; int ord; int len = key_SU_decode(&ord, (const unsigned char *) s); const char *index_type; diff --git a/rset/rsprox.c b/rset/rsprox.c index 6972828..fa35f38 100644 --- a/rset/rsprox.c +++ b/rset/rsprox.c @@ -29,13 +29,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define RSET_DEBUG 0 #endif -static RSFD r_open (RSET ct, int flag); -static void r_close (RSFD rfd); -static void r_delete (RSET ct); +static RSFD r_open(RSET ct, int flag); +static void r_close(RSFD rfd); +static void r_delete(RSET ct); static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); -static int r_read (RSFD rfd, void *buf, TERMID *term); -static int r_write (RSFD rfd, const void *buf); -static void r_pos (RSFD rfd, double *current, double *total); +static int r_read(RSFD rfd, void *buf, TERMID *term); +static int r_write(RSFD rfd, const void *buf); +static void r_pos(RSFD rfd, double *current, double *total); static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); static const struct rset_control control = @@ -85,11 +85,11 @@ RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol, return rnew; } -static void r_delete (RSET ct) +static void r_delete(RSET ct) { } -static RSFD r_open (RSET ct, int flag) +static RSFD r_open(RSET ct, int flag) { RSFD rfd; struct rset_prox_rfd *p; @@ -103,10 +103,11 @@ static RSFD r_open (RSET ct, int flag) rfd = rfd_create_base(ct); if (rfd->priv) p = (struct rset_prox_rfd *)(rfd->priv); - else { + else + { p = (struct rset_prox_rfd *) nmem_malloc(ct->nmem,sizeof(*p)); rfd->priv = p; - p->more = nmem_malloc (ct->nmem,sizeof(*p->more) * ct->no_children); + p->more = nmem_malloc(ct->nmem,sizeof(*p->more) * ct->no_children); p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * ct->no_children); p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * ct->no_children); for (i = 0; i < ct->no_children; i++) @@ -118,22 +119,23 @@ static RSFD r_open (RSET ct, int flag) } yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d", ct->control->desc, rfd, ct->no_children); - - for (i = 0; i < ct->no_children; i++) { - p->rfd[i] = rset_open (ct->children[i], RSETF_READ); - p->more[i] = rset_read (p->rfd[i], p->buf[i], &p->terms[i]); + + for (i = 0; i < ct->no_children; i++) + { + p->rfd[i] = rset_open(ct->children[i], RSETF_READ); + p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]); } p->hits = 0; return rfd; } -static void r_close (RSFD rfd) +static void r_close(RSFD rfd) { RSET ct = rfd->rset; struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv); int i; - for (i = 0; ino_children; i++) + for (i = 0; i < ct->no_children; i++) rset_close(p->rfd[i]); } @@ -149,13 +151,12 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf) if (untilbuf) { /* it is enough to forward first one. Other will follow. */ - if ( p->more[0] && /* was: cmp >=2 */ - ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) ) + if (p->more[0] && /* was: cmp >=2 */ + ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) ) p->more[0] = rset_forward(p->rfd[0], p->buf[0], &p->terms[0], untilbuf); } - if (info->ordered && info->relation == 3 && info->exclusion == 0 - && info->distance == 1) + if (info->ordered && info->relation <= 3 && info->exclusion == 0) { while (p->more[0]) { @@ -166,29 +167,34 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf) p->more[0] = 0; /* saves us a goto out of while loop. */ break; } - cmp = (*kctrl->cmp) (p->buf[i], p->buf[i-1]); - if (cmp >= rfd->rset->scope ) /* cmp>1 */ + cmp = (*kctrl->cmp)(p->buf[i], p->buf[i-1]); + if (cmp >= rfd->rset->scope) /* not same record */ { - p->more[i-1] = rset_forward (p->rfd[i-1], - p->buf[i-1], - &p->terms[i-1], - p->buf[i]); + p->more[i-1] = rset_forward(p->rfd[i-1], + p->buf[i-1], + &p->terms[i-1], + p->buf[i]); break; } - else if ( cmp>0 ) /* cmp == 1*/ + else if (cmp > 0) /* within record and ordered */ { - if ((*kctrl->getseq)(p->buf[i-1]) +1 != - (*kctrl->getseq)(p->buf[i])) - { /* FIXME - We need more flexible multilevel stuff */ - p->more[i-1] = rset_read ( p->rfd[i-1], p->buf[i-1], - &p->terms[i-1]); - break; - } + zint diff = (*kctrl->getseq)(p->buf[i]) - + (*kctrl->getseq)(p->buf[i-1]); + if (info->relation == 3 && diff == info->distance) + continue; + else if (info->relation == 2 && diff <= info->distance) + continue; + else if (info->relation == 1 && diff < info->distance) + continue; + + p->more[i-1] = rset_read(p->rfd[i-1], p->buf[i-1], + &p->terms[i-1]); + break; } - else + else /* within record - wrong order */ { - p->more[i] = rset_forward (p->rfd[i], - p->buf[i], &p->terms[i], p->buf[i-1]); + p->more[i] = rset_forward(p->rfd[i], p->buf[i], + &p->terms[i], p->buf[i-1]); break; } } @@ -210,19 +216,19 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf) { int cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]); if ( cmp <= - rfd->rset->scope) /* cmp<-1*/ - p->more[0] = rset_forward (p->rfd[0], p->buf[0], - &p->terms[0],p->buf[1]); + p->more[0] = rset_forward(p->rfd[0], p->buf[0], + &p->terms[0],p->buf[1]); else if ( cmp >= rfd->rset->scope ) /* cmp>1 */ - p->more[1] = rset_forward (p->rfd[1], p->buf[1], - &p->terms[1],p->buf[0]); + p->more[1] = rset_forward(p->rfd[1], p->buf[1], + &p->terms[1],p->buf[0]); else { zint seqno[500]; /* FIXME - why 500 ?? */ int n = 0; seqno[n++] = (*kctrl->getseq)(p->buf[0]); - while ((p->more[0] = rset_read (p->rfd[0], - p->buf[0], &p->terms[0]))) + while ((p->more[0] = rset_read(p->rfd[0], + p->buf[0], &p->terms[0]))) { cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]); if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope) @@ -230,51 +236,60 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf) if (n < 500) seqno[n++] = (*kctrl->getseq)(p->buf[0]); } - for (i = 0; ibuf[1], kctrl->key_size); + if (term) + *term = p->terms[1]; + while (1) { - zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i]; - int excl = info->exclusion; - if (!info->ordered && diff < 0) - diff = -diff; - switch (info->relation) + for (i = 0; i < n; i++) { - case 1: /* < */ - if (diff < info->distance && diff >= 0) - excl = !excl; - break; - case 2: /* <= */ - if (diff <= info->distance && diff >= 0) - excl = !excl; - break; - case 3: /* == */ - if (diff == info->distance && diff >= 0) - excl = !excl; - break; - case 4: /* >= */ - if (diff >= info->distance && diff >= 0) - excl = !excl; - break; - case 5: /* > */ - if (diff > info->distance && diff >= 0) - excl = !excl; + zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i]; + int excl = info->exclusion; + if (!info->ordered && diff < 0) + diff = -diff; + switch (info->relation) + { + case 1: /* < */ + if (diff < info->distance && diff >= 0) + excl = !excl; + break; + case 2: /* <= */ + if (diff <= info->distance && diff >= 0) + excl = !excl; + break; + case 3: /* == */ + if (diff == info->distance && diff >= 0) + excl = !excl; + break; + case 4: /* >= */ + if (diff >= info->distance && diff >= 0) + excl = !excl; + break; + case 5: /* > */ + if (diff > info->distance && diff >= 0) + excl = !excl; + break; + case 6: /* != */ + if (diff != info->distance && diff >= 0) + excl = !excl; + break; + } + if (excl) + { + p->more[1] = rset_read( p->rfd[1], p->buf[1], + &p->terms[1]); + p->hits++; + return 1; + } + } + p->more[1] = rset_read(p->rfd[1], p->buf[1], &p->terms[1]); + if (!p->more[1]) break; - case 6: /* != */ - if (diff != info->distance && diff >= 0) - excl = !excl; + cmp = (*kctrl->cmp)(buf, p->buf[1]); + if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope) break; - } - if (excl) - { - memcpy (buf, p->buf[1], kctrl->key_size); - if (term) - *term = p->terms[1]; - p->more[1] = rset_read ( p->rfd[1], p->buf[1], - &p->terms[1]); - p->hits++; - return 1; - } } - p->more[1] = rset_read (p->rfd[1], p->buf[1], &p->terms[1]); } } } @@ -282,18 +297,18 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf) } -static int r_read (RSFD rfd, void *buf, TERMID *term) +static int r_read(RSFD rfd, void *buf, TERMID *term) { return r_forward(rfd, buf, term, 0); } -static int r_write (RSFD rfd, const void *buf) +static int r_write(RSFD rfd, const void *buf) { yaz_log(YLOG_FATAL, "prox set type is read-only"); return -1; } -static void r_pos (RSFD rfd, double *current, double *total) +static void r_pos(RSFD rfd, double *current, double *total) { RSET ct = rfd->rset; struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv); @@ -307,30 +322,36 @@ static void r_pos (RSFD rfd, double *current, double *total) for (i = 0; i < ct->no_children; i++) { rset_pos(p->rfd[i], &cur, &tot); - if (tot>0) { + if (tot > 0) + { scur += cur; stot += tot; } } - if (tot <0) { /* nothing found */ + if (tot < 0) + { /* nothing found */ *current = -1; *total = -1; - } else if (tot < 1) { /* most likely tot==0 */ + } + else if (tot < 1) + { /* most likely tot==0 */ *current = 0; *total = 0; - } else { + } + else + { r = scur/stot; *current = (double) p->hits; - *total=*current/r ; + *total = *current/r ; } yaz_log(YLOG_DEBUG,"prox_pos: [%d] %0.1f/%0.1f= %0.4f ", - i,*current, *total, r); + i,*current, *total, r); } static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) { int i; - for (i = 0; ino_children; i++) + for (i = 0; i < ct->no_children; i++) rset_getterms(ct->children[i], terms, maxterms, curterm); } diff --git a/test/api/test_search.c b/test/api/test_search.c index 80e75d3..7dae442 100644 --- a/test/api/test_search.c +++ b/test/api/test_search.c @@ -50,6 +50,7 @@ const char *myrec[] = { "\n", "\nMy title xa b c c c a y\n\n" , + "\ntesta1 a2 c a1 a2 a3\n\n" , "\n" " 2107-09-19 00:00:00\n" @@ -173,8 +174,8 @@ static void tst(int argc, char **argv) YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 title", 2)); /* always-matches relation */ - YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 4)); - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 3)); + YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 5)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 4)); YAZ_CHECK(tl_query_x(zh, "@attr 1=1 @attr 2=103 {x my}", 0, 114)); /* and searches */ @@ -246,6 +247,16 @@ static void tst(int argc, char **argv) /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word */ YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 a b", 1)); + + /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word */ + YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 c a", 1)); + /* exl=0 distance=1 order=1 relation=2 (<=), known, unit=word */ + YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 c a", 1)); + + /* exl=0 distance=1 order=1 relation=2 (<=), known, unit=word */ + YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 @prox 0 1 1 2 k 2 a1 a2 a3", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 @prox 0 1 1 3 k 2 a1 a2 a3", 1)); + /* 3 term @prox test.. */ YAZ_CHECK(tl_query(zh, "@attr 1=1016 \"a b c\"", 1)); @@ -255,7 +266,7 @@ static void tst(int argc, char **argv) /* left associative (works fine) */ YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 @prox 0 1 1 2 k 2 a b c", 1)); - /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word * + /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word */ /* right associative (does not work, so zero hits) */ YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 a @prox 0 1 1 3 k 2 b c", 0)); /* left associative (works fine) */