1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * \file rsmultiandor.c
23 * \brief This module implements the rsmulti_or and rsmulti_and result sets
25 * rsmultior is based on a heap, from which we find the next hit.
27 * rsmultiand is based on a simple array of rsets, and a linear
28 * search to find the record that exists in all of those rsets.
29 * To speed things up, the array is sorted so that the smallest
30 * rsets come first, they are most likely to have the hits furthest
31 * away, and thus forwarding to them makes the most sense.
43 #include <idzebra/util.h>
44 #include <idzebra/isamc.h>
47 static RSFD r_open_and(RSET ct, int flag);
48 static RSFD r_open_or(RSET ct, int flag);
49 static void r_close(RSFD rfd);
50 static void r_delete(RSET ct);
51 static int r_read_and(RSFD rfd, void *buf, TERMID *term);
52 static int r_read_or(RSFD rfd, void *buf, TERMID *term);
53 static int r_write(RSFD rfd, const void *buf);
54 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
55 const void *untilbuf);
56 static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
57 const void *untilbuf);
58 static void r_pos_and(RSFD rfd, double *current, double *total);
59 static void r_pos_or(RSFD rfd, double *current, double *total);
60 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
62 static const struct rset_control control_or =
75 static const struct rset_control control_and =
88 /* The heap structure:
89 * The rset contains a list or rsets we are ORing together
90 * The rfd contains a heap of heap-items, which contain
91 * a rfd opened to those rsets, and a buffer for one key.
92 * They also contain a ptr to the rset list in the rset
93 * itself, for practical reasons.
106 const struct rset_key_control *kctrl;
107 struct heap_item **heap; /* ptrs to the rfd */
109 typedef struct heap *HEAP;
112 struct rset_private {
118 struct heap_item *items; /* we alloc and free them here */
119 HEAP h; /* and move around here */
120 zint hits; /* returned so far */
121 int eof; /* seen the end of it */
122 int tailcount; /* how many items are tailing */
128 static int log_level = 0;
129 static int log_level_initialized = 0;
131 /* Heap functions ***********************/
133 static void heap_swap(HEAP h, int x, int y)
135 struct heap_item *swap;
137 h->heap[x] = h->heap[y];
141 static int heap_cmp(HEAP h, int x, int y)
143 return (*h->kctrl->cmp)(h->heap[x]->buf, h->heap[y]->buf);
146 static int heap_empty(HEAP h)
148 return 0 == h->heapnum;
151 /** \brief deletes the first item in the heap, and balances the rest
153 static void heap_delete(HEAP h)
155 int cur = 1, child = 2;
156 h->heap[1] = 0; /* been deleted */
157 heap_swap(h, 1, h->heapnum--);
158 while (child <= h->heapnum)
160 if (child < h->heapnum && heap_cmp(h, child, 1 + child) > 0)
162 if (heap_cmp(h,cur,child) > 0)
164 heap_swap(h, cur, child);
173 /** \brief puts item into heap.
174 The heap root element has changed value (to bigger)
175 Swap downwards until the heap is ordered again
177 static void heap_balance(HEAP h)
179 int cur = 1, child = 2;
180 while (child <= h->heapnum)
182 if (child < h->heapnum && heap_cmp(h, child, 1 + child) > 0)
184 if (heap_cmp(h,cur,child) > 0)
186 heap_swap(h, cur, child);
195 static void heap_insert(HEAP h, struct heap_item *hi)
199 cur = ++(h->heapnum);
200 assert(cur <= h->heapmax);
203 while (parent && (heap_cmp(h,parent,cur) > 0))
206 heap_swap(h, cur, parent);
213 HEAP heap_create(NMEM nmem, int size, const struct rset_key_control *kctrl)
215 HEAP h = (HEAP) nmem_malloc(nmem, sizeof(*h));
217 ++size; /* heap array starts at 1 */
221 h->heap = (struct heap_item**) nmem_malloc(nmem, size * sizeof(*h->heap));
222 h->heap[0] = 0; /* not used */
226 static void heap_clear( HEAP h)
232 static void heap_destroy(HEAP h)
234 /* nothing to delete, all is nmem'd, and will go away in due time */
237 /** \brief compare and items for quicksort
238 used in qsort to get the multi-and args in optimal order
239 that is, those with fewest occurrences first
241 int compare_ands(const void *x, const void *y)
242 { const struct heap_item *hx = x;
243 const struct heap_item *hy = y;
244 double cur, totx, toty;
245 rset_pos(hx->fd, &cur, &totx);
246 rset_pos(hy->fd, &cur, &toty);
247 if (totx > toty + 0.5)
249 if (totx < toty - 0.5)
251 return 0; /* return totx - toty, except for overflows and rounding */
254 static RSET rsmulti_andor_create(NMEM nmem,
255 struct rset_key_control *kcontrol,
256 int scope, TERMID termid,
257 int no_rsets, RSET* rsets,
258 const struct rset_control *ctrl)
260 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
262 struct rset_private *info;
263 if (!log_level_initialized)
265 log_level = yaz_log_module_level("rsmultiandor");
266 log_level_initialized = 1;
268 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
269 info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
274 RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol,
275 int scope, TERMID termid, int no_rsets, RSET* rsets)
277 return rsmulti_andor_create(nmem, kcontrol, scope, termid,
278 no_rsets, rsets, &control_or);
281 RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol,
282 int scope, int no_rsets, RSET* rsets)
284 return rsmulti_andor_create(nmem, kcontrol, scope, 0,
285 no_rsets, rsets, &control_and);
288 static void r_delete(RSET ct)
292 static RSFD r_open_andor(RSET ct, int flag, int is_and)
295 struct rfd_private *p;
296 const struct rset_key_control *kctrl = ct->keycontrol;
299 if (flag & RSETF_WRITE)
301 yaz_log(YLOG_FATAL, "multiandor set type is read-only");
304 rfd = rfd_create_base(ct);
307 p = (struct rfd_private *)rfd->priv;
311 /* all other pointers shouls already be allocated, in right sizes! */
315 p = (struct rfd_private *) nmem_malloc(ct->nmem,sizeof(*p));
320 p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
322 p->h = heap_create(ct->nmem, ct->no_children, kctrl);
323 p->items = (struct heap_item *)
324 nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
325 for (i = 0; i < ct->no_children; i++)
327 p->items[i].rset = ct->children[i];
328 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
336 { /* read the array and sort it */
337 for (i = 0; i < ct->no_children; i++)
339 p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
340 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
344 qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
347 { /* fill the heap for ORing */
348 for (i = 0; i < ct->no_children; i++)
350 p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
351 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
352 heap_insert(p->h, &(p->items[i]));
358 static RSFD r_open_or(RSET ct, int flag)
360 return r_open_andor(ct, flag, 0);
363 static RSFD r_open_and(RSET ct, int flag)
365 return r_open_andor(ct, flag, 1);
368 static void r_close(RSFD rfd)
370 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
375 for (i = 0; i < rfd->rset->no_children; i++)
377 rset_close(p->items[i].fd);
380 static int r_forward_or(RSFD rfd, void *buf,
381 TERMID *term, const void *untilbuf)
382 { /* while heap head behind untilbuf, forward it and rebalance heap */
383 struct rfd_private *p = rfd->priv;
384 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
385 if (heap_empty(p->h))
387 while ((*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
389 if (rset_forward(p->h->heap[1]->fd, p->h->heap[1]->buf,
390 &p->h->heap[1]->term, untilbuf))
395 if (heap_empty(p->h))
400 return r_read_or(rfd, buf, term);
403 /** \brief reads one item key from an 'or' set
404 \param rfd set handle
405 \param buf resulting item buffer
406 \param term resulting term
408 \retval 1 item could be read
410 static int r_read_or(RSFD rfd, void *buf, TERMID *term)
412 RSET rset = rfd->rset;
413 struct rfd_private *mrfd = rfd->priv;
414 const struct rset_key_control *kctrl = rset->keycontrol;
415 struct heap_item *it;
417 if (heap_empty(mrfd->h))
419 it = mrfd->h->heap[1];
420 memcpy(buf, it->buf, kctrl->key_size);
429 rdres = rset_read(it->fd, it->buf, &it->term);
431 heap_balance(mrfd->h);
433 heap_delete(mrfd->h);
437 /** \brief reads one item key from an 'and' set
438 \param rfd set handle
439 \param buf resulting item buffer
440 \param term resulting term
442 \retval 1 item could be read
444 Has to return all hits where each item points to the
445 same sysno (scope), in order. Keep an extra key (hitkey)
446 as long as all records do not point to hitkey, forward
447 them, and update hitkey to be the highest seen so far.
448 (if any item eof's, mark eof, and return 0 thereafter)
449 Once a hit has been found, scan all items for the smallest
450 value. Mark all as being in the tail. Read next from that
451 item, and if not in the same record, clear its tail bit
453 static int r_read_and(RSFD rfd, void *buf, TERMID *term)
455 struct rfd_private *p = rfd->priv;
457 const struct rset_key_control *kctrl = ct->keycontrol;
463 { /* we are tailing, find lowest tail and return it */
467 for (i = 0; i < ct->no_children; i++)
473 (p->items[i].buf, p->items[mintail].buf);
479 if (kctrl->get_segment)
480 { /* segments enabled */
481 zint segment = kctrl->get_segment(p->items[i].buf);
482 /* store segment if not stored already */
483 if (!p->segment && segment)
484 p->segment = segment;
486 /* skip rest entirely if segments don't match */
487 if (p->segment && segment && p->segment != segment)
492 /* return the lowest tail */
493 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
495 *term = p->items[mintail].term;
496 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
497 &p->items[mintail].term))
499 p->eof = 1; /* game over, once tails have been returned */
500 p->tailbits[mintail] = 0;
506 cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
507 if (cmp >= rfd->rset->scope)
509 p->tailbits[mintail] = 0;
514 continue; /* skip again.. eventually tailcount will be 0 */
515 if (p->tailcount == 0)
519 /* not tailing, forward until all records match, and set up */
520 /* as tails. the earlier 'if' will then return the hits */
522 return 0; /* nothing more to see */
523 i = 1; /* assume items[0] is highest up */
524 while (i < ct->no_children)
526 int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
527 if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
528 if (!rset_forward(p->items[0].fd, p->items[0].buf,
529 &p->items[0].term, p->items[i].buf))
531 p->eof = 1; /* game over */
534 i = 0; /* start forwarding from scratch */
536 else if (cmp >= rfd->rset->scope)
537 { /* [0] was ahead, forward i */
538 if (!rset_forward(p->items[i].fd, p->items[i].buf,
539 &p->items[i].term, p->items[0].buf))
541 p->eof = 1; /* game over */
548 /* if we get this far, all rsets are now within +- scope of [0] */
549 /* ergo, we have a hit. Mark them all as tailing, and let the */
550 /* upper 'if' return the hits in right order */
551 for (i = 0; i < ct->no_children; i++)
553 p->tailcount = ct->no_children;
560 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
561 const void *untilbuf)
563 struct rfd_private *p = rfd->priv;
565 const struct rset_key_control *kctrl = ct->keycontrol;
570 for (i = 0; i < ct->no_children; i++)
572 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
573 if (cmp <= -rfd->rset->scope)
575 killtail = 1; /* we are moving to a different hit */
576 if (!rset_forward(p->items[i].fd, p->items[i].buf,
577 &p->items[i].term, untilbuf))
579 p->eof = 1; /* game over */
587 for (i = 0; i < ct->no_children; i++)
591 return r_read_and(rfd,buf,term);
594 static void r_pos_x(RSFD rfd, double *current, double *total, int and_op)
597 struct rfd_private *mrfd = (struct rfd_private *)(rfd->priv);
598 double ratio = and_op ? 0.0 : 1.0;
600 double sum_cur = 0.0;
601 double sum_tot = 0.0;
602 for (i = 0; i < ct->no_children; i++)
605 rset_pos(mrfd->items[i].fd, &cur, &tot);
607 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
612 double nratio = cur / tot;
620 sum_cur += (cur - 1);
624 if (!and_op && sum_tot > 0.0)
626 yaz_log(YLOG_LOG, "or op sum_cur=%0.1f sum_tot=%0.1f hits=%f", sum_cur, sum_tot, (double) mrfd->hits);
627 ratio = sum_cur / sum_tot;
629 if (ratio == 0.0 || ratio == 1.0)
630 { /* nothing there */
633 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
637 *current = (double) (mrfd->hits);
638 *total = *current / ratio;
639 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
643 static void r_pos_and(RSFD rfd, double *current, double *total)
645 r_pos_x(rfd, current, total, 1);
648 static void r_pos_or(RSFD rfd, double *current, double *total)
650 r_pos_x(rfd, current, total, 0);
653 static int r_write(RSFD rfd, const void *buf)
655 yaz_log(YLOG_FATAL, "multior set type is read-only");
659 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
662 rset_get_one_term(ct, terms, maxterms, curterm);
665 /* Special case: Some multi-ors have all terms pointing to the same
666 term. We do not want to duplicate those. Other multiors (and ands)
667 have different terms under them. Those we want.
669 int firstterm = *curterm;
671 for (i = 0; i < ct->no_children; i++)
673 rset_getterms(ct->children[i], terms, maxterms, curterm);
674 if (*curterm > firstterm + 1 && *curterm <= maxterms &&
675 terms[(*curterm) - 1] == terms[firstterm])
676 (*curterm)--; /* forget the term, seen that before */
685 * c-file-style: "Stroustrup"
686 * indent-tabs-mode: nil
688 * vim: shiftwidth=4 tabstop=8 expandtab