descent server API. System information stored as "SGML" records.
+Changed the way Zebra keeps its maintenance information. Records
+in "SGML" notation using an EXPLAIN schema is now used when
+appropriate.
+
+Bug fix: Index didn't handle update/insert/delete of the same
+record (i.e. same recordId) in one run (one invocation of zebraidx).
+Only the first occurence of a record is considered.
+
+Most searches now return correct number of hits.
+
+New modular ranking system. Interested programmers are encouraged to
+inspect rank1.c and improve the algorithm.
+
Bug fix: Lock files weren't removed as they should when using NT.
Implemented Z39.50 Sort. Zebra's sort handler uses use attributes to
-# Copyright (C) 1994-1997, Index Data I/S
+# Copyright (C) 1994-1998, Index Data I/S
# All rights reserved.
# Sebastian Hammer, Adam Dickmeiss
-# $Id: Makefile,v 1.60 1997-09-17 12:19:05 adam Exp $
+# $Id: Makefile,v 1.61 1998-03-05 08:45:11 adam Exp $
SHELL=/bin/sh
MAKE=make
-Zebra TODO $Id: TODO,v 1.5 1998-01-29 13:40:27 adam Exp $
+Zebra TODO $Id: TODO,v 1.6 1998-03-05 08:45:11 adam Exp $
Make regx-filter thread safe.
-Better ranking in searches. Admin should be able specify initial
- weight to certain fields.
+Size of sort entries should be configurable.
+
+Use first field in sorting, i.e. author.
-Search result should hold information about hits for each term,
- especially when using ranked search.
+System number sorting.
-Admin should be able to specify set/attr when none is specified
- (currently bib1,use=any is used).
+Configurable default sorting criteria - used when query is not ranked.
+
+Configurable default search attributes.
+
+Better ranking in searches. Admin should be able specify initial
+ weight to certain fields.
Explain support - including d1 to grs (d1_grs.c) in YAZ; Zebra
to auto-generate explain information depending on data1 system.
ISAMC optimization: indirect block with pointers to all blocks
in chain. The initial block should include the count as well.
+
+Create surrogate diagnostic when ONE record is missing insead of a
+non-surrogate diagnostic.
+
# Copyright (C) 1995-1998, Index Data I/S
# All rights reserved.
# Sebastian Hammer, Adam Dickmeiss
-# $Id: Makefile,v 1.48 1998-02-10 12:03:05 adam Exp $
+# $Id: Makefile,v 1.49 1998-03-05 08:45:11 adam Exp $
SHELL=/bin/sh
RANLIB=ranlib
zinfo.o invstat.o sortidx.o
O2 = kdump.o
O3 = zserver.o kcompare.o zrpn.o zsets.o attribute.o recindex.o \
- zlogs.o lockutil.o locksrv.o zinfo.o trunc.o sortidx.o
+ zlogs.o lockutil.o locksrv.o zinfo.o trunc.o sortidx.o rank1.o zebraapi.o \
+ retrieve.o
O4 = hlvl.o hlvltest.o kcompare.o
CPP=$(CC) -E
-all: $(TPROG1) $(TPROG3)
+all: $(TPROG1) $(TPROG2) $(TPROG3)
$(TPROG1): $(O1) ../lib/dict.a ../lib/isam.a ../lib/isamc.a ../lib/recctrl.a \
../lib/bfile.a ../lib/dfa.a ../lib/zebrautl.a $(YAZLIB)
/*
- * Copyright (C) 1994-1997, Index Data I/S
+ * Copyright (C) 1994-1998, Index Data I/S
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: attribute.c,v $
- * Revision 1.7 1997-10-29 12:05:01 adam
+ * Revision 1.8 1998-03-05 08:45:11 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.7 1997/10/29 12:05:01 adam
* Server produces diagnostic "Unsupported Attribute Set" when appropriate.
*
* Revision 1.6 1997/09/17 12:19:11 adam
static void att_loadset(void *p, const char *n, const char *name)
{
data1_attset *cnew;
- ZServerInfo *zi = p;
+ ZebraHandle zi = p;
if (!(cnew = data1_read_attset(zi->dh, (char*) name)))
{
zi->registered_sets = cnew;
}
-static void load_atts(ZServerInfo *zi)
+static void load_atts(ZebraHandle zi)
{
res_trav(zi->res, "attset", zi, att_loadset);
}
return 0;
}
-int att_getentbyatt(ZServerInfo *zi, attent *res, oid_value set, int att)
+int att_getentbyatt(ZebraHandle zi, attent *res, oid_value set, int att)
{
data1_att *r;
data1_attset *p;
load_atts(zi);
for (p = zi->registered_sets; p; p = p->next)
if (p->reference == set)
- break;;
+ break;
if (!p)
return -2;
if (!(r = getatt(p, att)))
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: extract.c,v $
- * Revision 1.79 1998-02-17 10:32:52 adam
+ * Revision 1.80 1998-03-05 08:45:11 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.79 1998/02/17 10:32:52 adam
* Fixed bug: binary files weren't opened with flag b on NT.
*
* Revision 1.78 1998/02/10 12:03:05 adam
static int records_deleted = 0;
static int records_processed = 0;
-static ZebTargetInfo *zti = NULL;
+static ZebraExplainInfo zti = NULL;
static void logRecord (int showFlag)
{
}
}
-int key_open (BFiles bfs, int mem, int rw)
+int key_open (BFiles bfs, int mem, int rw, data1_handle dh)
{
if (!mem)
mem = atoi(res_get_def (common_resource, "memMax", "4"))*1024*1024;
dict_close (matchDict);
return -1;
}
- zti = zebTargetInfo_open (records, rw);
+ zti = zebraExplain_open (records, dh, rw);
if (!zti)
{
rec_close (&records);
key_buf_used = 0;
}
-int key_close ()
+int key_close (int rw)
{
key_flush ();
xfree (key_buf);
-#if 1
- zebTargetInfo_close (zti, 1);
-#endif
+ if (rw)
+ zebraExplain_runNumberIncrement (zti, 1);
+ zebraExplain_close (zti, rw);
rec_close (&records);
dict_close (matchDict);
sortIdx_close (sortIdx);
int seqno = 0;
int off = 0;
- if (zebTargetInfo_curDatabase (zti, databaseName))
+ if (zebraExplain_curDatabase (zti, databaseName))
{
- if (zebTargetInfo_newDatabase (zti, databaseName))
+ if (zebraExplain_newDatabase (zti, databaseName))
abort ();
}
+ zebraExplain_recordCountIncrement (zti, cmd ? 1 : -1);
while (off < reckeys->buf_used)
{
const char *src = reckeys->buf + off;
++ptr_i;
key_buf[ptr_top-ptr_i] = (char*)key_buf + key_buf_used;
- ch = zebTargetInfo_lookupSU (zti, attrSet, attrUse);
+ ch = zebraExplain_lookupSU (zti, attrSet, attrUse);
if (ch < 0)
- ch = zebTargetInfo_addSU (zti, attrSet, attrUse);
+ ch = zebraExplain_addSU (zti, attrSet, attrUse);
assert (ch > 0);
((char*) key_buf) [key_buf_used++] = ch;
while (*src)
}
static const char **searchRecordKey (struct recKeys *reckeys,
- int attrSetS, int attrUseS)
+ int attrSetS, int attrUseS)
{
static const char *ws[32];
int off = 0;
char *subType)
{
struct recExtractCtrl extractCtrl;
+ RecordAttr *recordAttr;
int r;
char *matchStr;
SYSNO sysnotmp;
logf (LOG_LOG, "add %s %s %ld", rGroup->recordType,
fname, (long) recordOffset);
rec = rec_new (records);
+
*sysno = rec->sysno;
+ recordAttr = rec_init_attr (zti, rec);
+
if (matchStr)
{
dict_insert (matchDict, matchStr, sizeof(*sysno), sysno);
rec = rec_get (records, *sysno);
assert (rec);
+
+ recordAttr = rec_init_attr (zti, rec);
+
+ if (recordAttr->runNumber == zebraExplain_runNumberIncrement (zti, 0))
+ {
+ logf (LOG_LOG, "skipped %s %s %ld", rGroup->recordType,
+ fname, (long) recordOffset);
+ rec_rm (&rec);
+ logRecord (0);
+ return 1;
+ }
delkeys.buf_used = rec->size[recInfo_delKeys];
delkeys.buf = rec->info[recInfo_delKeys];
flushSortKeys (*sysno, 0);
dict_delete (matchDict, matchStr);
rec_del (records, &rec);
}
+ rec_rm (&rec);
logRecord (0);
return 1;
}
rec->size[recInfo_delKeys] = 0;
}
+ /* save file size of original record */
+ zebraExplain_recordBytesIncrement (zti, - recordAttr->recordSize);
+ recordAttr->recordSize = fi->file_moffset - recordOffset;
+ if (!recordAttr->recordSize)
+ recordAttr->recordSize = fi->file_max - recordOffset;
+ zebraExplain_recordBytesIncrement (zti, recordAttr->recordSize);
+
+ /* set run-number for this record */
+ recordAttr->runNumber = zebraExplain_runNumberIncrement (zti, 0);
+
/* update store data */
xfree (rec->info[recInfo_storeData]);
if (rGroup->flagStoreData == 1)
{
- int size = fi->file_moffset - recordOffset;
- if (!size)
- size = fi->file_max - recordOffset;
- rec->size[recInfo_storeData] = size;
- rec->info[recInfo_storeData] = xmalloc (size);
+ rec->size[recInfo_storeData] = recordAttr->recordSize;
+ rec->info[recInfo_storeData] = xmalloc (recordAttr->recordSize);
if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
{
logf (LOG_ERRNO|LOG_FATAL, "seek to %ld in %s", fname,
(long) recordOffset);
exit (1);
}
- if (read (fi->fd, rec->info[recInfo_storeData], size) < size)
+ if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)
+ < recordAttr->recordSize)
{
logf (LOG_ERRNO|LOG_FATAL, "read %d bytes of %s",
- fi->file_max, fname);
+ recordAttr->recordSize, fname);
exit (1);
}
}
rec_strdup (rGroup->databaseName, &rec->size[recInfo_databaseName]);
/* update offset */
- xfree (rec->info[recInfo_offset]);
-
- rec->size[recInfo_offset] = sizeof(recordOffset);
- rec->info[recInfo_offset] = xmalloc (sizeof(recordOffset));
- memcpy (rec->info[recInfo_offset], &recordOffset, sizeof(recordOffset));
+ recordAttr->recordOffset = recordOffset;
/* commit this record */
rec_put (records, &rec);
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: index.h,v $
- * Revision 1.56 1998-01-12 15:04:08 adam
+ * Revision 1.57 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.56 1998/01/12 15:04:08 adam
* The test option (-s) only uses read-lock (and not write lock).
*
* Revision 1.55 1997/10/27 14:33:04 adam
void repositoryDelete (struct recordGroup *rGroup);
void repositoryShow (struct recordGroup *rGroup);
-int key_open (BFiles bfs, int mem, int rw);
-int key_close (void);
+int key_open (BFiles bfs, int mem, int rw, data1_handle);
+int key_close (int rw);
int key_compare (const void *p1, const void *p2);
int key_get_pos (const void *p);
int key_compare_it (const void *p1, const void *p2);
int zebra_lock_fd (ZebraLockHandle h);
void zebra_lock_prefix (Res res, char *dst);
-
extern Res common_resource;
/*
- * Copyright (C) 1994-1997, Index Data I/S
+ * Copyright (C) 1994-1998, Index Data I/S
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: kcompare.c,v $
- * Revision 1.22 1997-09-22 12:39:06 adam
+ * Revision 1.23 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.22 1997/09/22 12:39:06 adam
* Added get_pos method for the ranked result sets.
*
* Revision 1.21 1997/09/17 12:19:13 adam
me->debug = atoi(res_get_def (res, "isamcDebug", "0"));
- logf (LOG_LOG, "ISAMC system active");
return me;
}
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: kdump.c,v $
- * Revision 1.14 1997-10-27 14:33:04 adam
+ * Revision 1.15 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.14 1997/10/27 14:33:04 adam
* Moved towards generic character mapping depending on "structure"
* field in abstract syntax file. Fixed a few memory leaks. Fixed
* bug with negative integers when doing searches with relational
char *key_fname = NULL;
char key_string[IT_MAX_WORD];
char key_info[256];
+ ZebraMaps zm;
FILE *inf;
+ Res res = NULL;
struct it_key prevk;
- chrmaptab map = 0;
prevk.sysno = 0;
prevk.seqno = 0;
prog = *argv;
- while ((ret = options ("m:v:", argv, argc, &arg)) != -2)
+ while ((ret = options ("c:v:", argv, argc, &arg)) != -2)
{
if (ret == 0)
{
{
log_init (log_mask_str(arg), prog, NULL);
}
- else if (ret == 'm')
+ else if (ret == 'c')
{
- if (!(map = chrmaptab_create (NULL, arg, 0)))
- {
- logf(LOG_FATAL, "Failed to open maptab");
- exit(1);
+ if (!(res = res_open (arg)))
+ {
+ logf(LOG_FATAL, "Failed to open resource file %s", arg);
+ exit (1);
}
}
else
}
if (!key_fname)
{
- fprintf (stderr, "kdump [-m maptab -v log] file\n");
+ fprintf (stderr, "kdump [-c config] [-v log] file\n");
exit (1);
}
+ if (!res)
+ res = res_open ("zebra.cfg");
+ zm = zebra_maps_open (res);
if (!(inf = fopen (key_fname, "r")))
{
logf (LOG_FATAL|LOG_ERRNO, "fopen %s", key_fname);
struct it_key k;
int op;
char keybuf[IT_MAX_WORD+1];
+ char *to = keybuf;
+ const char *from = key_string;
+ int usedb_type = from[0];
+ int reg_type = from[1];
op = key_info[0];
memcpy (&k, 1+key_info, sizeof(k));
- if (map)
- {
- char *to = keybuf, *from = key_string;
- while (*from)
- {
- char *res = chr_map_output(map, from, 1);
- while (*res)
- *(to++) = *(res++);
- }
- *to = '\0';
+ from += 2;
+ while (*from)
+ {
+ const char *res = zebra_maps_output (zm, reg_type, &from);
+ while (*res)
+ *(to++) = *(res++);
}
- else
- strcpy(keybuf, key_string);
- printf ("%7d op=%d s=%-5d %s\n", k.sysno, op, k.seqno,
- keybuf);
+ *to = '\0';
+ printf ("%c %3d %c %7d %5d %s\n", reg_type, usedb_type, op ? 'i':'d',
+ k.sysno, k.seqno, keybuf);
}
+ zebra_maps_close (zm);
if (fclose (inf))
{
logf (LOG_FATAL|LOG_ERRNO, "fclose %s", key_fname);
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: kinput.c,v $
- * Revision 1.27 1998-02-17 10:32:52 adam
+ * Revision 1.28 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.27 1998/02/17 10:32:52 adam
* Fixed bug: binary files weren't opened with flag b on NT.
*
* Revision 1.26 1998/01/29 13:39:13 adam
char *dict_info;
strcpy (this_name, hci.cur_name);
- logf (LOG_DEBUG, "inserting %s", 1+hci.cur_name);
assert (hci.cur_name[1]);
no_diffs++;
if ((dict_info = dict_lookup (hi->dict, hci.cur_name)))
if ((info = dict_lookup (hi->dict, cur_name)))
{
ISAM_P isam_p, isam_p2;
- logf (LOG_DEBUG, "updating %s", 1+cur_name);
memcpy (&isam_p, info+1, sizeof(ISAM_P));
isam_p2 = is_merge (hi->isam, isam_p, nmemb, key_buf);
if (!isam_p2)
else
{
ISAM_P isam_p;
- logf (LOG_DEBUG, "inserting %s", 1+cur_name);
no_insertions++;
isam_p = is_merge (hi->isam, 0, nmemb, key_buf);
dict_insert (hi->dict, cur_name, sizeof(ISAM_P), &isam_p);
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: locksrv.c,v $
- * Revision 1.10 1997-09-29 09:08:36 adam
+ * Revision 1.11 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.10 1997/09/29 09:08:36 adam
* Revised locking system to be thread safe for the server.
*
* Revision 1.9 1997/09/25 14:54:43 adam
#include "zserver.h"
-int zebra_server_lock_init (ZServerInfo *zi)
+int zebra_server_lock_init (ZebraHandle zi)
{
char path_prefix[1024];
return 0;
}
-int zebra_server_lock_destroy (ZServerInfo *zi)
+int zebra_server_lock_destroy (ZebraHandle zi)
{
xfree (zi->server_path_prefix);
zebra_lock_destroy (zi->server_lock_cmt);
return 0;
}
-int zebra_server_lock (ZServerInfo *zi, int commitPhase)
+int zebra_server_lock (ZebraHandle zi, int commitPhase)
{
if (!zi->server_lock_cmt)
{
return 0;
}
-void zebra_server_unlock (ZServerInfo *zi, int commitPhase)
+void zebra_server_unlock (ZebraHandle zi, int commitPhase)
{
if (zi->server_lock_org == NULL)
return;
}
}
-int zebra_server_lock_get_state (ZServerInfo *zi, time_t *timep)
+int zebra_server_lock_get_state (ZebraHandle zi, time_t *timep)
{
char path[1024];
char buf[256];
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: main.c,v $
- * Revision 1.55 1998-01-26 10:37:34 adam
+ * Revision 1.56 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.55 1998/01/26 10:37:34 adam
* Minor changes.
*
* Revision 1.54 1998/01/12 15:04:08 adam
bf_lockDir (rGroupDef.bfs,
res_get (common_resource, "lockDir"));
- rGroupDef.zebra_maps = zebra_maps_open (res_get(
- common_resource, "profilePath"), common_resource);
+ rGroupDef.zebra_maps = zebra_maps_open (common_resource);
}
if (!strcmp (arg, "update"))
cmd = 'u';
switch (cmd)
{
case 'u':
- if (!key_open (rGroup.bfs, mem_max, rGroup.flagRw))
+ if (!key_open (rGroup.bfs, mem_max, rGroup.flagRw,
+ rGroup.dh))
{
logf (LOG_LOG, "updating %s", rGroup.path);
repositoryUpdate (&rGroup);
- nsections = key_close ();
+ nsections = key_close (rGroup.flagRw);
}
break;
case 'U':
- if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw))
+ if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw,
+ rGroup.dh))
{
logf (LOG_LOG, "updating (pass 1) %s", rGroup.path);
repositoryUpdate (&rGroup);
- key_close ();
+ key_close (rGroup.flagRw);
}
nsections = 0;
break;
case 'd':
- if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw))
+ if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw,
+ rGroup.dh))
{
logf (LOG_LOG, "deleting %s", rGroup.path);
repositoryDelete (&rGroup);
- nsections = key_close ();
+ nsections = key_close (rGroup.flagRw);
}
break;
case 's':
--- /dev/null
+/*
+ * Copyright (C) 1998, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: rank1.c,v $
+ * Revision 1.1 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#ifdef WINDOWS
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+#include "zserver.h"
+
+struct rank_class_info {
+ int dummy;
+};
+
+struct rank_term_info {
+ int local_occur;
+ int global_occur;
+ int global_inv;
+ int rank_flag;
+};
+
+struct rank_set_info {
+ int last_pos;
+ int no_entries;
+ int no_rank_entries;
+ struct rank_term_info *entries;
+};
+
+static int log2_int (unsigned g)
+{
+ int n = 0;
+ while ((g = g>>1))
+ n++;
+ return n;
+}
+
+/*
+ * create: Creates/Initialises this rank handler. This routine is
+ * called exactly once. The routine returns the class_handle.
+ */
+static void *create (ZebraHandle zh)
+{
+ struct rank_class_info *ci = xmalloc (sizeof(*ci));
+
+ logf (LOG_DEBUG, "rank-1 create");
+ return ci;
+}
+
+/*
+ * destroy: Destroys this rank handler. This routine is called
+ * when the handler is no longer needed - i.e. when the server
+ * dies. The class_handle was previously returned by create.
+ */
+static void destroy (ZebraHandle zh, void *class_handle)
+{
+ struct rank_class_info *ci = class_handle;
+
+ logf (LOG_DEBUG, "rank-1 destroy");
+ xfree (ci);
+}
+
+
+/*
+ * begin: Prepares beginning of "real" ranking. Called once for
+ * each result set. The returned handle is a "set handle" and
+ * will be used in each of the handlers below.
+ */
+static void *begin (ZebraHandle zh, void *class_handle, RSET rset)
+{
+ struct rank_set_info *si = xmalloc (sizeof(*si));
+ int i;
+
+ logf (LOG_DEBUG, "rank-1 begin");
+ si->no_entries = rset->no_rset_terms;
+ si->no_rank_entries = 0;
+ si->entries = xmalloc (sizeof(*si->entries)*si->no_entries);
+ for (i = 0; i < si->no_entries; i++)
+ {
+ int g = rset->rset_terms[i]->nn;
+ if (!strcmp (rset->rset_terms[i]->flags, "rank"))
+ {
+ si->entries[i].rank_flag = 1;
+ (si->no_rank_entries)++;
+ }
+ else
+ si->entries[i].rank_flag = 0;
+ si->entries[i].local_occur = 0;
+ si->entries[i].global_occur = g;
+ si->entries[i].global_inv = 32 - log2_int (g);
+ logf (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g));
+ }
+ return si;
+}
+
+/*
+ * end: Terminates ranking process. Called after a result set
+ * has been ranked.
+ */
+static void end (ZebraHandle zh, void *set_handle)
+{
+ struct rank_set_info *si = set_handle;
+ logf (LOG_DEBUG, "rank-1 end");
+ xfree (si);
+}
+
+/*
+ * add: Called for each word occurence in a result set. This routine
+ * should be as fast as possible. This routine should "incrementally"
+ * update the score.
+ */
+static void add (void *set_handle, int seqno, int term_index)
+{
+ struct rank_set_info *si = set_handle;
+ logf (LOG_DEBUG, "rank-1 add seqno=%d term_index=%d", seqno, term_index);
+ si->last_pos = seqno;
+ si->entries[term_index].local_occur++;
+}
+
+/*
+ * calc: Called for each document in a result. This handler should
+ * produce a score based on previous call(s) to the add handler. The
+ * score should be between 0 and 1000. If score cannot be obtained
+ * -1 should be returned.
+ */
+static int calc (void *set_handle, int sysno)
+{
+ int i, lu, score = 0;
+ struct rank_set_info *si = set_handle;
+
+ logf (LOG_DEBUG, "rank-1 calc sysno=%d", sysno);
+
+ if (!si->no_rank_entries)
+ return -1;
+ for (i = 0; i < si->no_entries; i++)
+ if (si->entries[i].rank_flag && (lu = si->entries[i].local_occur))
+ score += (2+log2_int (lu)) * si->entries[i].global_inv;
+ logf (LOG_DEBUG, " dividend=%d", 60*score);
+ logf (LOG_DEBUG, " divisor=%d", si->no_rank_entries * log2_int (4+si->last_pos));
+ score = (60 * score)/(si->no_rank_entries * log2_int (4+si->last_pos));
+ for (i = 0; i < si->no_entries; i++)
+ si->entries[i].local_occur = 0;
+ return score;
+}
+
+/*
+ * Pseudo-meta code with sequence of calls as they occur in a
+ * server. Handlers are prefixed by --:
+ *
+ * server init
+ * -- create
+ * foreach search
+ * rank result set
+ * -- begin
+ * foreach record
+ * foreach word
+ * -- add
+ * -- calc
+ * -- end
+ * -- destroy
+ * server close
+ */
+
+static struct rank_control rank_control = {
+ "rank-1",
+ create,
+ destroy,
+ begin,
+ end,
+ calc,
+ add,
+};
+
+struct rank_control *rank1_class = &rank_control;
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: recindex.h,v $
- * Revision 1.12 1998-01-12 15:04:08 adam
+ * Revision 1.13 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.12 1998/01/12 15:04:08 adam
* The test option (-s) only uses read-lock (and not write lock).
*
* Revision 1.11 1997/09/17 12:19:16 adam
recInfo_delKeys,
recInfo_databaseName,
recInfo_storeData,
- recInfo_offset
+ recInfo_attr
};
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: recindxp.h,v $
- * Revision 1.3 1995-12-11 11:45:55 adam
+ * Revision 1.4 1998-03-05 08:45:12 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.3 1995/12/11 11:45:55 adam
* Removed commented code.
*
* Revision 1.2 1995/12/11 09:12:51 adam
char *index_fname;
BFile index_BFile;
-
char *data_fname[REC_BLOCK_TYPES];
BFile data_BFile[REC_BLOCK_TYPES];
--- /dev/null
+/*
+ * Copyright (C) 1995-1998, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: retrieve.c,v $
+ * Revision 1.1 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ */
+
+#include <stdio.h>
+#include <assert.h>
+
+#include <fcntl.h>
+#ifdef WINDOWS
+#include <io.h>
+#include <process.h>
+#else
+#include <unistd.h>
+#endif
+
+#include <recctrl.h>
+#include "zserver.h"
+
+struct fetch_control {
+ int record_offset;
+ int record_int_pos;
+ char *record_int_buf;
+ int record_int_len;
+ int fd;
+};
+
+static int record_ext_read (void *fh, char *buf, size_t count)
+{
+ struct fetch_control *fc = fh;
+ return read (fc->fd, buf, count);
+}
+
+static off_t record_ext_seek (void *fh, off_t offset)
+{
+ struct fetch_control *fc = fh;
+ return lseek (fc->fd, offset + fc->record_offset, SEEK_SET);
+}
+
+static off_t record_ext_tell (void *fh)
+{
+ struct fetch_control *fc = fh;
+ return lseek (fc->fd, 0, SEEK_CUR) - fc->record_offset;
+}
+
+static off_t record_int_seek (void *fh, off_t offset)
+{
+ struct fetch_control *fc = fh;
+ return (off_t) (fc->record_int_pos = offset);
+}
+
+static off_t record_int_tell (void *fh)
+{
+ struct fetch_control *fc = fh;
+ return (off_t) fc->record_int_pos;
+}
+
+static int record_int_read (void *fh, char *buf, size_t count)
+{
+ struct fetch_control *fc = fh;
+ int l = fc->record_int_len - fc->record_int_pos;
+ if (l <= 0)
+ return 0;
+ l = (l < count) ? l : count;
+ memcpy (buf, fc->record_int_buf + fc->record_int_pos, l);
+ fc->record_int_pos += l;
+ return l;
+}
+
+int zebra_record_fetch (ZebraHandle zh, int sysno, int score, ODR stream,
+ oid_value input_format, Z_RecordComposition *comp,
+ oid_value *output_format, char **rec_bufp,
+ int *rec_lenp, char **basenamep)
+{
+ Record rec;
+ char *fname, *file_type, *basename;
+ RecType rt;
+ struct recRetrieveCtrl retrieveCtrl;
+ char subType[128];
+ struct fetch_control fc;
+ RecordAttr *recordAttr;
+
+ rec = rec_get (zh->records, sysno);
+ if (!rec)
+ {
+ logf (LOG_DEBUG, "rec_get fail on sysno=%d", sysno);
+ return 14;
+ }
+ recordAttr = rec_init_attr (zh->zei, rec);
+
+ file_type = rec->info[recInfo_fileType];
+ fname = rec->info[recInfo_filename];
+ basename = rec->info[recInfo_databaseName];
+ *basenamep = odr_malloc (stream, strlen(basename)+1);
+ strcpy (*basenamep, basename);
+
+ if (!(rt = recType_byName (file_type, subType)))
+ {
+ logf (LOG_WARN, "Retrieve: Cannot handle type %s", file_type);
+ return 14;
+ }
+ logf (LOG_DEBUG, "retrieve localno=%d score=%d", sysno, score);
+ retrieveCtrl.fh = &fc;
+ fc.fd = -1;
+ if (rec->size[recInfo_storeData] > 0)
+ {
+ retrieveCtrl.readf = record_int_read;
+ retrieveCtrl.seekf = record_int_seek;
+ retrieveCtrl.tellf = record_int_tell;
+ fc.record_int_len = rec->size[recInfo_storeData];
+ fc.record_int_buf = rec->info[recInfo_storeData];
+ fc.record_int_pos = 0;
+ logf (LOG_DEBUG, "Internal retrieve. %d bytes", fc.record_int_len);
+ }
+ else
+ {
+ if ((fc.fd = open (fname, O_BINARY|O_RDONLY)) == -1)
+ {
+ logf (LOG_WARN|LOG_ERRNO, "Retrieve fail; missing file: %s",
+ fname);
+ rec_rm (&rec);
+ return 14;
+ }
+ fc.record_offset = recordAttr->recordOffset;
+
+ retrieveCtrl.readf = record_ext_read;
+ retrieveCtrl.seekf = record_ext_seek;
+ retrieveCtrl.tellf = record_ext_tell;
+
+ record_ext_seek (retrieveCtrl.fh, 0);
+ }
+ retrieveCtrl.subType = subType;
+ retrieveCtrl.localno = sysno;
+ retrieveCtrl.score = score;
+ retrieveCtrl.recordSize = recordAttr->recordSize;
+ retrieveCtrl.odr = stream;
+ retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
+ retrieveCtrl.comp = comp;
+ retrieveCtrl.diagnostic = 0;
+ retrieveCtrl.dh = zh->dh;
+ (*rt->retrieve)(&retrieveCtrl);
+ *output_format = retrieveCtrl.output_format;
+ *rec_bufp = retrieveCtrl.rec_buf;
+ *rec_lenp = retrieveCtrl.rec_len;
+ if (fc.fd != -1)
+ close (fc.fd);
+ rec_rm (&rec);
+
+ return retrieveCtrl.diagnostic;
+}
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: trunc.c,v $
- * Revision 1.9 1998-01-12 15:04:09 adam
+ * Revision 1.10 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.9 1998/01/12 15:04:09 adam
* The test option (-s) only uses read-lock (and not write lock).
*
* Revision 1.8 1997/10/31 12:34:27 adam
xfree (ti);
}
-static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to,
+static RSET rset_trunc_r (ZebraHandle zi, ISAM_P *isam_p, int from, int to,
int merge_chunk)
{
RSET result;
parms.key_size = sizeof(struct it_key);
parms.temp_path = res_get (zi->res, "setTmpDir");
result = rset_create (rset_kind_temp, &parms);
- result_rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO);
+ result_rsfd = rset_open (result, RSETF_WRITE);
if (to - from > merge_chunk)
{
RSFD *rsfd;
RSET *rset;
+ int term_index;
int i, i_add = (to-from)/merge_chunk + 1;
struct trunc_info *ti;
int rscur = 0;
ti = heap_init (rscur, sizeof(struct it_key), key_compare_it);
for (i = rscur; --i >= 0; )
{
- rsfd[i] = rset_open (rset[i], RSETF_READ|RSETF_SORT_SYSNO);
- if (rset_read (rset[i], rsfd[i], ti->tmpbuf))
+ rsfd[i] = rset_open (rset[i], RSETF_READ);
+ if (rset_read (rset[i], rsfd[i], ti->tmpbuf, &term_index))
heap_insert (ti, ti->tmpbuf, i);
else
{
while (1)
{
- if (!rset_read (rset[n], rsfd[n], ti->tmpbuf))
+ if (!rset_read (rset[n], rsfd[n], ti->tmpbuf, &term_index))
{
heap_delete (ti);
rset_close (rset[n], rsfd[n]);
return isc_block (i1) - isc_block (i2);
}
-RSET rset_trunc (ZServerInfo *zi, ISAM_P *isam_p, int no)
+RSET rset_trunc (ZebraHandle zi, ISAM_P *isam_p, int no,
+ const char *term, int length, const char *flags)
{
logf (LOG_DEBUG, "rset_trunc no=%d", no);
if (zi->isam)
parms.pos = *isam_p;
parms.is = zi->isam;
+ parms.rset_term = rset_term_create (term, length, flags);
return rset_create (rset_kind_isam, &parms);
}
qsort (isam_p, no, sizeof(*isam_p), isam_trunc_cmp);
parms.pos = *isam_p;
parms.is = zi->isamc;
+ parms.rset_term = rset_term_create (term, length, flags);
return rset_create (rset_kind_isamc, &parms);
}
#if NEW_TRUNC
{
rset_m_or_parms parms;
- logf (LOG_LOG, "new_trunc");
parms.key_size = sizeof(struct it_key);
parms.cmp = key_compare_it;
parms.isc = zi->isamc;
parms.isam_positions = isam_p;
parms.no_isam_positions = no;
- parms.no_save_positions = 100;
+ parms.no_save_positions = 100000;
+ parms.rset_term = rset_term_create (term, length, flags);
return rset_create (rset_kind_m_or, &parms);
}
#endif
--- /dev/null
+/*
+ * Copyright (C) 1995-1998, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: zebraapi.c,v $
+ * Revision 1.1 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ */
+
+#include <stdio.h>
+#ifdef WINDOWS
+#include <io.h>
+#include <process.h>
+#else
+#include <unistd.h>
+#endif
+
+#include "zserver.h"
+
+static int zebra_register_lock (ZebraHandle zh)
+{
+ time_t lastChange;
+ int state = zebra_server_lock_get_state(zh, &lastChange);
+
+ switch (state)
+ {
+ case 'c':
+ state = 1;
+ break;
+ default:
+ state = 0;
+ }
+ zebra_server_lock (zh, state);
+#if USE_TIMES
+ times (&zh->tms1);
+#endif
+ if (zh->registerState == state)
+ {
+ if (zh->registerChange >= lastChange)
+ return 0;
+ logf (LOG_LOG, "Register completely updated since last access");
+ }
+ else if (zh->registerState == -1)
+ logf (LOG_LOG, "Reading register using state %d pid=%ld", state,
+ (long) getpid());
+ else
+ logf (LOG_LOG, "Register has changed state from %d to %d",
+ zh->registerState, state);
+ zh->registerChange = lastChange;
+ if (zh->records)
+ {
+ zebraExplain_close (zh->zei, 0);
+ dict_close (zh->dict);
+ sortIdx_close (zh->sortIdx);
+ if (zh->isam)
+ is_close (zh->isam);
+ if (zh->isamc)
+ isc_close (zh->isamc);
+ rec_close (&zh->records);
+ }
+ bf_cache (zh->bfs, state ? res_get (zh->res, "shadow") : NULL);
+ zh->registerState = state;
+ zh->records = rec_open (zh->bfs, 0);
+ if (!(zh->dict = dict_open (zh->bfs, FNAME_DICT, 40, 0)))
+ return -1;
+ if (!(zh->sortIdx = sortIdx_open (zh->bfs, 0)))
+ return -1;
+ zh->isam = NULL;
+ zh->isamc = NULL;
+ if (!res_get_match (zh->res, "isam", "i", NULL))
+ {
+ if (!(zh->isamc = isc_open (zh->bfs, FNAME_ISAMC,
+ 0, key_isamc_m(zh->res))))
+ return -1;
+
+ }
+ else
+ {
+ if (!(zh->isam = is_open (zh->bfs, FNAME_ISAM, key_compare, 0,
+ sizeof (struct it_key), zh->res)))
+ return -1;
+ }
+ zh->zei = zebraExplain_open (zh->records, zh->dh, 0);
+
+ return 0;
+}
+
+static void zebra_register_unlock (ZebraHandle zh)
+{
+ static int waitSec = -1;
+
+#if USE_TIMES
+ times (&zh->tms2);
+ logf (LOG_LOG, "user/system: %ld/%ld",
+ (long) (zh->tms2.tms_utime - zh->tms1.tms_utime),
+ (long) (zh->tms2.tms_stime - zh->tms1.tms_stime));
+#endif
+ if (waitSec == -1)
+ {
+ char *s = res_get (zh->res, "debugRequestWait");
+ if (s)
+ waitSec = atoi (s);
+ else
+ waitSec = 0;
+ }
+#ifdef WINDOWS
+#else
+ if (waitSec > 0)
+ sleep (waitSec);
+#endif
+ if (zh->registerState != -1)
+ zebra_server_unlock (zh, zh->registerState);
+}
+
+ZebraHandle zebra_open (const char *host, const char *configName)
+{
+ ZebraHandle zh = xmalloc (sizeof(*zh));
+
+ if (!(zh->res = res_open (configName)))
+ {
+ logf (LOG_WARN, "Failed to read resources `%s'", configName);
+ return NULL;
+ }
+ zebra_server_lock_init (zh);
+ zh->dh = data1_create ();
+ zh->bfs = bfs_create (res_get (zh->res, "register"));
+ bf_lockDir (zh->bfs, res_get (zh->res, "lockDir"));
+ data1_set_tabpath (zh->dh, res_get(zh->res, "profilePath"));
+ zh->sets = NULL;
+ zh->registerState = -1; /* trigger open of registers! */
+ zh->registerChange = 0;
+
+ zh->records = NULL;
+ zh->registered_sets = NULL;
+ zh->zebra_maps = zebra_maps_open (zh->res);
+ zh->rank_classes = NULL;
+
+ zebraRankInstall (zh, rank1_class);
+ return zh;
+}
+
+void zebra_close (ZebraHandle zh)
+{
+ if (zh->records)
+ {
+ resultSetDestroy (zh);
+ zebraExplain_close (zh->zei, 0);
+ dict_close (zh->dict);
+ sortIdx_close (zh->sortIdx);
+ if (zh->isam)
+ is_close (zh->isam);
+ if (zh->isamc)
+ isc_close (zh->isamc);
+ rec_close (&zh->records);
+ zebra_register_unlock (zh);
+ }
+ zebra_maps_close (zh->zebra_maps);
+ zebraRankDestroy (zh);
+ bfs_destroy (zh->bfs);
+ data1_destroy (zh->dh);
+ zebra_server_lock_destroy (zh);
+
+ res_close (zh->res);
+ xfree (zh);
+}
+
+void zebra_search_rpn (ZebraHandle zh, ODR stream,
+ Z_RPNQuery *query, int num_bases, char **basenames,
+ const char *setname)
+{
+ zebra_register_lock (zh);
+ zh->errCode = 0;
+ zh->errString = NULL;
+ zh->hits = 0;
+ rpn_search (zh, stream, query, num_bases, basenames, setname);
+ zebra_register_unlock (zh);
+}
+
+void zebra_records_retrieve (ZebraHandle zh, ODR stream,
+ const char *setname, Z_RecordComposition *comp,
+ oid_value input_format, int num_recs,
+ ZebraRetrievalRecord *recs)
+{
+ ZebraPosSet poset;
+ int i, *pos_array;
+
+ pos_array = xmalloc (sizeof(*pos_array));
+ for (i = 0; i<num_recs; i++)
+ pos_array[i] = recs[i].position;
+
+ zebra_register_lock (zh);
+
+ poset = zebraPosSetCreate (zh, setname, num_recs, pos_array);
+ if (!poset)
+ {
+ logf (LOG_DEBUG, "zebraPosSetCreate error");
+ zh->errCode = 13;
+ }
+ else
+ {
+ for (i = 0; i<num_recs; i++)
+ {
+ if (!poset[i].sysno)
+ {
+ zh->errCode = 13;
+ logf (LOG_DEBUG, "Out of range. pos=%d", pos_array[i]);
+ }
+ else
+ {
+ zh->errCode =
+ zebra_record_fetch (zh, poset[i].sysno, poset[i].score,
+ stream, input_format, comp,
+ &recs[i].format, &recs[i].buf,
+ &recs[i].len,
+ &recs[i].base);
+ }
+ }
+ zebraPosSetDestroy (zh, poset, num_recs);
+ }
+ zebra_register_unlock (zh);
+ xfree (pos_array);
+}
+
+void zebra_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
+ oid_value attributeset,
+ int num_bases, char **basenames,
+ int *position, int *num_entries, ZebraScanEntry **entries,
+ int *is_partial)
+{
+ zebra_register_lock (zh);
+ rpn_scan (zh, stream, zapt, attributeset,
+ num_bases, basenames, position,
+ num_entries, entries, is_partial);
+ zebra_register_unlock (zh);
+}
+
+void zebra_sort (ZebraHandle zh, ODR stream,
+ int num_input_setnames, char **input_setnames,
+ char *output_setname, Z_SortKeySpecList *sort_sequence,
+ int *sort_status)
+{
+ zebra_register_lock (zh);
+ resultSetSort (zh, stream, num_input_setnames, input_setnames,
+ output_setname, sort_sequence, sort_status);
+ zebra_register_unlock (zh);
+}
+
+void zebra_setDB (ZebraHandle zh, int num_bases, char **basenames)
+{
+
+}
+
+void zebra_setRecordType (ZebraHandle zh, const char *type)
+{
+
+}
+
+void zebra_setGroup (ZebraHandle zh, const char *group)
+{
+
+}
+
+void zebra_admin (ZebraHandle zh, const char *command)
+{
+
+}
/*
- * Copyright (C) 1994-1997, Index Data I/S
+ * Copyright (C) 1994-1998, Index Data I/S
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zinfo.c,v $
- * Revision 1.6 1998-02-17 10:29:27 adam
+ * Revision 1.7 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.6 1998/02/17 10:29:27 adam
* Moved towards 'automatic' EXPLAIN database.
*
* Revision 1.5 1997/10/27 14:33:05 adam
#include <assert.h>
#include <string.h>
+#include <zebraver.h>
#include "zinfo.h"
+#define ZINFO_DEBUG 0
+
struct zebSUInfo {
int set;
int use;
struct zebDatabaseInfoB {
struct zebSUInfoB *SUInfo;
char *databaseName;
- int sysno;
- int readFlag;
- int dirty;
- struct zebDatabaseInfo info;
+ data1_node *data1_database;
+ int recordCount; /* records in db */
+ int recordBytes; /* size of records */
+ int sysno; /* sysno of database info */
+ int readFlag; /* 1: read is needed when referenced; 0 if not */
+ int dirty; /* 1: database is dirty: write is needed */
struct zebDatabaseInfoB *next;
};
-struct zebTargetInfo {
- int dictNum;
+struct zebraExplainAttset {
+ char *name;
+ int ordinal;
+ struct zebraExplainAttset *next;
+};
+
+struct zebraExplainInfo {
+ int ordinalSU;
+ int runNumber;
int dirty;
Records records;
+ data1_handle dh;
+ struct zebraExplainAttset *attsets;
+ NMEM nmem;
+ data1_node *data1_target;
struct zebDatabaseInfoB *databaseInfo;
struct zebDatabaseInfoB *curDatabaseInfo;
};
-void zebTargetInfo_close (ZebTargetInfo *zti, int writeFlag)
+static data1_node *read_sgml_rec (data1_handle dh, NMEM nmem, Record rec)
+{
+ return data1_read_sgml (dh, nmem, rec->info[recInfo_storeData]);
+}
+
+static data1_node *data1_search_tag (data1_handle dh, data1_node *n,
+ const char *tag)
{
- struct zebDatabaseInfoB *zdi, *zdi1;
+ logf (LOG_DEBUG, "data1_search_tag %s", tag);
+ for (; n; n = n->next)
+ if (n->which == DATA1N_tag && n->u.tag.tag &&
+ !yaz_matchstr (tag, n->u.tag.tag))
+ {
+ logf (LOG_DEBUG, " found");
+ return n;
+ }
+ logf (LOG_DEBUG, " not found");
+ return 0;
+}
+
+static data1_node *data1_add_tag (data1_handle dh, data1_node *at,
+ const char *tag, NMEM nmem)
+{
+ data1_node *partag = get_parent_tag(dh, at);
+ data1_node *res = data1_mk_node (dh, nmem);
+ data1_element *e = NULL;
+
+ res->parent = at;
+ res->which = DATA1N_tag;
+ res->u.tag.tag = data1_insert_string (dh, res, nmem, tag);
+ res->u.tag.node_selected = 0;
+ res->u.tag.make_variantlist = 0;
+ res->u.tag.no_data_requested = 0;
+ res->u.tag.get_bytes = -1;
+
+ if (partag)
+ e = partag->u.tag.element;
+ res->u.tag.element =
+ data1_getelementbytagname (dh, at->root->u.root.absyn,
+ e, res->u.tag.tag);
+ res->root = at->root;
+ if (!at->child)
+ at->child = res;
+ else
+ {
+ assert (at->last_child);
+ at->last_child->next = res;
+ }
+ at->last_child = res;
+ return res;
+}
+
+static data1_node *data1_make_tag (data1_handle dh, data1_node *at,
+ const char *tag, NMEM nmem)
+{
+ data1_node *node;
+
+ node = data1_search_tag (dh, at->child, tag);
+ if (!node)
+ node = data1_add_tag (dh, at, tag, nmem);
+ else
+ node->child = node->last_child = NULL;
+ return node;
+}
+
+static data1_node *data1_add_tagdata_int (data1_handle dh, data1_node *at,
+ const char *tag, int num,
+ NMEM nmem)
+{
+ data1_node *node_data;
+ node_data = data1_add_taggeddata (dh, at->root, at, tag, nmem);
+ if (!node_data)
+ return 0;
+ node_data->u.data.what = DATA1I_num;
+ node_data->u.data.data = node_data->lbuf;
+ sprintf (node_data->u.data.data, "%d", num);
+ node_data->u.data.len = strlen (node_data->u.data.data);
+ return node_data;
+}
+
+static data1_node *data1_add_tagdata_text (data1_handle dh, data1_node *at,
+ const char *tag, const char *str,
+ NMEM nmem)
+{
+ data1_node *node_data;
+
+ node_data = data1_add_taggeddata (dh, at->root, at, tag, nmem);
+ if (!node_data)
+ return 0;
+ node_data->u.data.what = DATA1I_text;
+ node_data->u.data.data = node_data->lbuf;
+ strcpy (node_data->u.data.data, str);
+ node_data->u.data.len = strlen (node_data->u.data.data);
+ return node_data;
+}
+
+static void zebraExplain_writeDatabase (ZebraExplainInfo zei,
+ struct zebDatabaseInfoB *zdi);
+static void zebraExplain_writeTarget (ZebraExplainInfo zei);
+
+void zebraExplain_close (ZebraExplainInfo zei, int writeFlag)
+{
+ struct zebDatabaseInfoB *zdi, *zdi_next;
+
+ logf (LOG_DEBUG, "zebraExplain_close wr=%d", writeFlag);
if (writeFlag)
{
- char p0[4096], *p = p0;
-
- memcpy (p, &zti->dictNum, sizeof(zti->dictNum));
- p += sizeof(zti->dictNum);
- for (zdi = zti->databaseInfo; zdi; zdi=zdi->next)
- {
- if (zdi->dirty)
- {
- char q0[4096], *q = q0;
- struct zebSUInfoB *zsui;
- Record drec;
- int no = 0;
-
- if (zdi->sysno)
- drec = rec_get (zti->records, zdi->sysno);
- else
- {
- drec = rec_new (zti->records);
-
- drec->info[recInfo_fileType] =
- rec_strdup ("grs.explain.databaseInfo",
- &drec->size[recInfo_fileType]);
-
- drec->info[recInfo_databaseName] =
- rec_strdup ("IR-Explain-1",
- &drec->size[recInfo_databaseName]);
- zdi->sysno = drec->sysno;
- }
- assert (drec);
- for (zsui = zdi->SUInfo; zsui; zsui=zsui->next)
- no++;
- memcpy (q, &zdi->info, sizeof(zdi->info));
- q += sizeof(zdi->info);
- memcpy (q, &no, sizeof(no));
- q += sizeof(no);
- for (zsui = zdi->SUInfo; zsui; zsui=zsui->next)
- {
- memcpy (q, &zsui->info, sizeof(zsui->info));
- q += sizeof(zsui->info);
- }
- xfree (drec->info[recInfo_storeData]);
- drec->size[recInfo_storeData] = q-q0;
- drec->info[recInfo_storeData] = xmalloc (drec->size[recInfo_storeData]);
- memcpy (drec->info[recInfo_storeData], q0, drec->size[recInfo_storeData]);
- rec_put (zti->records, &drec);
- }
- strcpy (p, zdi->databaseName);
- p += strlen(p)+1;
- memcpy (p, &zdi->sysno, sizeof(zdi->sysno));
- p += sizeof(zdi->sysno);
- }
- *p++ = '\0';
- if (zti->dirty)
- {
- Record grec = rec_get (zti->records, 1);
-
- assert (grec);
- xfree (grec->info[recInfo_storeData]);
- grec->size[recInfo_storeData] = p-p0;
- grec->info[recInfo_storeData] = xmalloc (grec->size[recInfo_storeData]);
- memcpy (grec->info[recInfo_storeData], p0, grec->size[recInfo_storeData]);
- rec_put (zti->records, &grec);
- }
+ /* write each database info record */
+ for (zdi = zei->databaseInfo; zdi; zdi = zdi->next)
+ zebraExplain_writeDatabase (zei, zdi);
+ zebraExplain_writeTarget (zei);
}
- for (zdi = zti->databaseInfo; zdi; zdi = zdi1)
+ for (zdi = zei->databaseInfo; zdi; zdi = zdi_next)
{
- struct zebSUInfoB *zsui, *zsui1;
+ struct zebSUInfoB *zsui, *zsui_next;
- zdi1 = zdi->next;
- for (zsui = zdi->SUInfo; zsui; zsui = zsui1)
+ zdi_next = zdi->next;
+ for (zsui = zdi->SUInfo; zsui; zsui = zsui_next)
{
- zsui1 = zsui->next;
+ zsui_next = zsui->next;
xfree (zsui);
}
- xfree (zdi->databaseName);
xfree (zdi);
}
- xfree (zti);
+ nmem_destroy (zei->nmem);
+ xfree (zei);
}
-ZebTargetInfo *zebTargetInfo_open (Records records, int writeFlag)
+
+ZebraExplainInfo zebraExplain_open (Records records, data1_handle dh,
+ int writeFlag)
{
Record trec;
- ZebTargetInfo *zti;
- struct zebDatabaseInfoB **zdi;
-
- zti = xmalloc (sizeof(*zti));
- zti->dirty = 0;
- zti->curDatabaseInfo = NULL;
- zti->records = records;
+ ZebraExplainInfo zei;
+ struct zebDatabaseInfoB **zdip;
- zdi = &zti->databaseInfo;
-
+ logf (LOG_DEBUG, "zebraExplain_open wr=%d", writeFlag);
+ zei = xmalloc (sizeof(*zei));
+ zei->dirty = 0;
+ zei->curDatabaseInfo = NULL;
+ zei->records = records;
+ zei->nmem = nmem_create ();
+ zei->dh = dh;
+ zei->attsets = NULL;
+ zdip = &zei->databaseInfo;
trec = rec_get (records, 1);
+
if (trec)
{
- const char *p;
+ data1_node *node_tgtinfo, *node_zebra, *node_list, *np;
- p = trec->info[recInfo_storeData];
+ zei->data1_target = read_sgml_rec (zei->dh, zei->nmem, trec);
- memcpy (&zti->dictNum, p, sizeof(zti->dictNum));
- p += sizeof(zti->dictNum);
- while (*p)
- {
- *zdi = xmalloc (sizeof(**zdi));
- (*zdi)->SUInfo = NULL;
- (*zdi)->databaseName = xstrdup (p);
- p += strlen(p)+1;
- memcpy (&(*zdi)->sysno, p, sizeof((*zdi)->sysno));
- p += sizeof((*zdi)->sysno);
- (*zdi)->readFlag = 1;
- (*zdi)->dirty = 0;
- zdi = &(*zdi)->next;
- }
- assert (p - trec->info[recInfo_storeData] == trec->size[recInfo_storeData]-1);
+#if ZINFO_DEBUG
+ data1_pr_tree (zei->dh, zei->data1_target, stderr);
+#endif
+ node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target->child,
+ "targetInfo");
+ node_zebra = data1_search_tag (zei->dh, node_tgtinfo->child,
+ "zebraInfo");
+ node_list = data1_search_tag (zei->dh, node_zebra->child,
+ "databaseList");
+ for (np = node_list->child; np; np = np->next)
+ {
+ data1_node *node_name = NULL;
+ data1_node *node_id = NULL;
+ data1_node *np2;
+ if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "database"))
+ continue;
+ for (np2 = np->child; np2; np2 = np2->next)
+ {
+ if (np2->which != DATA1N_tag)
+ continue;
+ if (!strcmp (np2->u.tag.tag, "name"))
+ node_name = np2->child;
+ else if (!strcmp (np2->u.tag.tag, "id"))
+ node_id = np2->child;
+ }
+ assert (node_id && node_name);
+
+ *zdip = xmalloc (sizeof(**zdip));
+
+ (*zdip)->readFlag = 1;
+ (*zdip)->dirty = 0;
+ (*zdip)->data1_database = NULL;
+ (*zdip)->recordCount = 0;
+ (*zdip)->recordBytes = 0;
+ (*zdip)->SUInfo = NULL;
+
+ (*zdip)->databaseName = nmem_malloc (zei->nmem,
+ 1+node_name->u.data.len);
+ memcpy ((*zdip)->databaseName, node_name->u.data.data,
+ node_name->u.data.len);
+ (*zdip)->databaseName[node_name->u.data.len] = '\0';
+ (*zdip)->sysno = atoi_n (node_id->u.data.data,
+ node_id->u.data.len);
+ zdip = &(*zdip)->next;
+ }
+ np = data1_search_tag (zei->dh, node_zebra->child,
+ "ordinalSU");
+ np = np->child;
+ assert (np && np->which == DATA1N_data);
+ zei->ordinalSU = atoi_n (np->u.data.data, np->u.data.len);
+
+ np = data1_search_tag (zei->dh, node_zebra->child,
+ "runNumber");
+ np = np->child;
+ assert (np && np->which == DATA1N_data);
+ zei->runNumber = atoi_n (np->u.data.data, np->u.data.len);
}
else
{
- zti->dictNum = 1;
- if (writeFlag)
- {
- trec = rec_new (records);
+ zei->ordinalSU = 1;
+ zei->runNumber = 0;
+ if (writeFlag)
+ {
+ char *sgml_buf;
+ int sgml_len;
+ zei->data1_target =
+ data1_read_sgml (zei->dh, zei->nmem,
+ "<explain><targetInfo>targetInfo\n"
+ "<name>Zebra</>\n"
+ "<namedResultSets>1</>\n"
+ "<multipleDBSearch>1</>\n"
+ "<nicknames><name>Zebra</></>\n"
+ "</></>\n" );
+ /* write now because we want to be sure about the sysno */
+ trec = rec_new (records);
trec->info[recInfo_fileType] =
- rec_strdup ("grs.explain.targetInfo",
- &trec->size[recInfo_fileType]);
+ rec_strdup ("grs.sgml", &trec->size[recInfo_fileType]);
trec->info[recInfo_databaseName] =
- rec_strdup ("IR-Explain-1",
- &trec->size[recInfo_databaseName]);
- trec->info[recInfo_databaseName] = xstrdup ("IR-Explain-1");
- trec->info[recInfo_storeData] = xmalloc (1+sizeof(zti->dictNum));
- memcpy (trec->info[recInfo_storeData], &zti->dictNum, sizeof(zti->dictNum));
- trec->info[recInfo_storeData][sizeof(zti->dictNum)] = '\0';
- trec->size[recInfo_storeData] = sizeof(zti->dictNum)+1;
- rec_put (records, &trec);
- }
+ rec_strdup ("IR-Explain-1", &trec->size[recInfo_databaseName]);
+
+ sgml_buf = data1_nodetoidsgml(dh, zei->data1_target, 0, &sgml_len);
+ trec->info[recInfo_storeData] = xmalloc (sgml_len);
+ memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len);
+ trec->size[recInfo_storeData] = sgml_len;
+
+ rec_put (records, &trec);
+ }
}
- *zdi = NULL;
+ *zdip = NULL;
rec_rm (&trec);
- return zti;
+ zebraExplain_newDatabase (zei, "IR-Explain-1");
+ return zei;
}
-static void zebTargetInfo_readDatabase (ZebTargetInfo *zti,
- struct zebDatabaseInfoB *zdi)
+
+static void zebraExplain_readDatabase (ZebraExplainInfo zei,
+ struct zebDatabaseInfoB *zdi)
{
- const char *p;
- struct zebSUInfoB **zsuip = &zdi->SUInfo;
- int i, no;
Record rec;
+ data1_node *node_dbinfo, *node_zebra, *node_list, *np;
+ struct zebSUInfoB **zsuip = &zdi->SUInfo;
+
+ assert (zdi->sysno);
+ rec = rec_get (zei->records, zdi->sysno);
- rec = rec_get (zti->records, zdi->sysno);
- assert (rec);
- p = rec->info[recInfo_storeData];
- memcpy (&zdi->info, p, sizeof(zdi->info));
- p += sizeof(zdi->info);
- memcpy (&no, p, sizeof(no));
- p += sizeof(no);
- for (i = 0; i<no; i++)
+ zdi->data1_database = read_sgml_rec (zei->dh, zei->nmem, rec);
+
+ node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child,
+ "databaseInfo");
+
+ node_zebra = data1_search_tag (zei->dh, node_dbinfo->child,
+ "zebraInfo");
+ np = data1_search_tag (zei->dh, node_dbinfo->child,
+ "recordBytes");
+ if (np && np->child && np->child->which == DATA1N_data)
+ {
+ zdi->recordBytes = atoi_n (np->child->u.data.data,
+ np->child->u.data.len);
+ }
+ node_list = data1_search_tag (zei->dh, node_zebra->child,
+ "attrlist");
+ for (np = node_list->child; np; np = np->next)
{
+ data1_node *node_set = NULL;
+ data1_node *node_use = NULL;
+ data1_node *node_ordinal = NULL;
+ data1_node *np2;
+ if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "attr"))
+ continue;
+ for (np2 = np->child; np2; np2 = np2->next)
+ {
+ if (np2->which != DATA1N_tag || !np2->child ||
+ np2->child->which != DATA1N_data)
+ continue;
+ if (!strcmp (np2->u.tag.tag, "set"))
+ node_set = np2->child;
+ else if (!strcmp (np2->u.tag.tag, "use"))
+ node_use = np2->child;
+ else if (!strcmp (np2->u.tag.tag, "ordinal"))
+ node_ordinal = np2->child;
+ }
+ assert (node_set && node_use && node_ordinal);
+
*zsuip = xmalloc (sizeof(**zsuip));
- memcpy (&(*zsuip)->info, p, sizeof((*zsuip)->info));
- p += sizeof((*zsuip)->info);
+ (*zsuip)->info.set = atoi_n (node_set->u.data.data,
+ node_set->u.data.len);
+ (*zsuip)->info.use = atoi_n (node_use->u.data.data,
+ node_use->u.data.len);
+ (*zsuip)->info.ordinal = atoi_n (node_ordinal->u.data.data,
+ node_ordinal->u.data.len);
+ logf (LOG_DEBUG, "set=%d use=%d ordinal=%d",
+ (*zsuip)->info.set, (*zsuip)->info.use, (*zsuip)->info.ordinal);
zsuip = &(*zsuip)->next;
}
*zsuip = NULL;
+
+ if ((np = data1_search_tag (zei->dh, node_dbinfo->child,
+ "recordCount")) &&
+ (np = data1_search_tag (zei->dh, np->child,
+ "recordCountActual")) &&
+ np->child->which == DATA1N_data)
+ {
+ zdi->recordCount = atoi_n (np->child->u.data.data,
+ np->child->u.data.len);
+ }
zdi->readFlag = 0;
rec_rm (&rec);
}
-int zebTargetInfo_curDatabase (ZebTargetInfo *zti, const char *database)
+int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database)
{
struct zebDatabaseInfoB *zdi;
- assert (zti);
- if (zti->curDatabaseInfo &&
- !strcmp (zti->curDatabaseInfo->databaseName, database))
+ assert (zei);
+ if (zei->curDatabaseInfo &&
+ !strcmp (zei->curDatabaseInfo->databaseName, database))
return 0;
- for (zdi = zti->databaseInfo; zdi; zdi=zdi->next)
+ for (zdi = zei->databaseInfo; zdi; zdi=zdi->next)
{
if (!strcmp (zdi->databaseName, database))
break;
if (!zdi)
return -1;
if (zdi->readFlag)
- zebTargetInfo_readDatabase (zti, zdi);
- zti->curDatabaseInfo = zdi;
+ zebraExplain_readDatabase (zei, zdi);
+ zei->curDatabaseInfo = zdi;
return 0;
}
-int zebTargetInfo_newDatabase (ZebTargetInfo *zti, const char *database)
+int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database)
{
struct zebDatabaseInfoB *zdi;
+ data1_node *node_dbinfo;
- assert (zti);
- for (zdi = zti->databaseInfo; zdi; zdi=zdi->next)
+ assert (zei);
+ for (zdi = zei->databaseInfo; zdi; zdi=zdi->next)
{
if (!strcmp (zdi->databaseName, database))
break;
}
if (zdi)
return -1;
+ /* it's new really. make it */
zdi = xmalloc (sizeof(*zdi));
- zdi->next = zti->databaseInfo;
- zti->databaseInfo = zdi;
+ zdi->next = zei->databaseInfo;
+ zei->databaseInfo = zdi;
zdi->sysno = 0;
+ zdi->recordCount = 0;
+ zdi->recordBytes = 0;
zdi->readFlag = 0;
- zdi->databaseName = xstrdup (database);
+ zdi->databaseName = nmem_strdup (zei->nmem, database);
zdi->SUInfo = NULL;
+
+ assert (zei->dh);
+ assert (zei->nmem);
+
+ zdi->data1_database =
+ data1_read_sgml (zei->dh, zei->nmem,
+ "<explain><databaseInfo>databaseInfo\n"
+ "<userFee>0</>\n"
+ "<available>1</>\n"
+ "</></>\n");
+
+ node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child,
+ "databaseInfo");
+ assert (node_dbinfo);
+
+ data1_add_tagdata_text (zei->dh, node_dbinfo, "name",
+ database, zei->nmem);
+
+#if ZINFO_DEBUG
+ data1_pr_tree (zei->dh, zdi->data1_database, stderr);
+#endif
zdi->dirty = 1;
- zti->dirty = 1;
- zti->curDatabaseInfo = zdi;
+ zei->dirty = 1;
+ zei->curDatabaseInfo = zdi;
return 0;
}
-int zebTargetInfo_lookupSU (ZebTargetInfo *zti, int set, int use)
+static void zebraExplain_writeDatabase (ZebraExplainInfo zei,
+ struct zebDatabaseInfoB *zdi)
{
+ char *sgml_buf;
+ int sgml_len;
+ Record drec;
+ data1_node *node_dbinfo, *node_list, *node_count, *node_zebra;
struct zebSUInfoB *zsui;
+
+ if (!zdi->dirty)
+ return;
+
+ if (zdi->sysno)
+ {
+ drec = rec_get (zei->records, zdi->sysno);
+ xfree (drec->info[recInfo_storeData]);
+ }
+ else
+ {
+ drec = rec_new (zei->records);
+ zdi->sysno = drec->sysno;
+
+ drec->info[recInfo_fileType] =
+ rec_strdup ("grs.sgml", &drec->size[recInfo_fileType]);
+ drec->info[recInfo_databaseName] =
+ rec_strdup ("IR-Explain-1",
+ &drec->size[recInfo_databaseName]);
+ }
+ assert (zdi->data1_database);
+ node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child,
+ "databaseInfo");
+ /* record count */
+ node_count = data1_make_tag (zei->dh, node_dbinfo,
+ "recordCount", zei->nmem);
+ data1_add_tagdata_int (zei->dh, node_count, "recordCountActual",
+ zdi->recordCount, zei->nmem);
+
+ /* zebra info (private) */
+ node_zebra = data1_make_tag (zei->dh, node_dbinfo,
+ "zebraInfo", zei->nmem);
+ node_list = data1_make_tag (zei->dh, node_zebra,
+ "attrlist", zei->nmem);
+ for (zsui = zdi->SUInfo; zsui; zsui = zsui->next)
+ {
+ data1_node *node_attr;
+ node_attr = data1_add_tag (zei->dh, node_list,
+ "attr", zei->nmem);
+ data1_add_tagdata_int (zei->dh, node_attr, "set",
+ zsui->info.set, zei->nmem);
+ data1_add_tagdata_int (zei->dh, node_attr, "use",
+ zsui->info.use, zei->nmem);
+ data1_add_tagdata_int (zei->dh, node_attr, "ordinal",
+ zsui->info.ordinal, zei->nmem);
+ }
+ data1_add_tagdata_int (zei->dh, node_zebra,
+ "recordBytes", zdi->recordBytes, zei->nmem);
+ /* convert to "SGML" and write it */
+#if ZINFO_DEBUG
+ data1_pr_tree (zei->dh, zdi->data1_database, stderr);
+#endif
+ sgml_buf = data1_nodetoidsgml(zei->dh, zdi->data1_database,
+ 0, &sgml_len);
+ drec->info[recInfo_storeData] = xmalloc (sgml_len);
+ memcpy (drec->info[recInfo_storeData], sgml_buf, sgml_len);
+ drec->size[recInfo_storeData] = sgml_len;
+
+ rec_put (zei->records, &drec);
+}
+
+static void trav_attset (data1_handle dh, ZebraExplainInfo zei,
+ data1_attset *p_this)
+{
+ struct zebraExplainAttset *p_reg = zei->attsets;
+
+ if (!p_this)
+ return ;
+ while (p_reg)
+ {
+ if (!strcmp (p_this->name, p_reg->name))
+ break;
+ p_reg = p_reg->next;
+ }
+ if (!p_this)
+ {
+ p_reg = nmem_malloc (zei->nmem, sizeof (*p_reg));
+ p_reg->name = nmem_strdup (zei->nmem, p_this->name);
+ p_reg->ordinal = p_this->ordinal;
+ p_reg->next = zei->attsets;
+ zei->attsets = p_reg;
+ }
+ trav_attset (dh, zei, p_this->children);
+}
- assert (zti->curDatabaseInfo);
- for (zsui = zti->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next)
+static void trav_absyn (data1_handle dh, void *h, data1_absyn *a)
+{
+ logf (LOG_LOG, "absyn %s", a->name);
+ trav_attset (dh, (ZebraExplainInfo) h, a->attset);
+}
+
+static void zebraExplain_writeTarget (ZebraExplainInfo zei)
+{
+ struct zebDatabaseInfoB *zdi;
+ data1_node *node_tgtinfo, *node_list, *node_zebra;
+ Record trec;
+ int sgml_len;
+ char *sgml_buf;
+
+ if (!zei->dirty)
+ return;
+
+ trec = rec_get (zei->records, 1);
+ xfree (trec->info[recInfo_storeData]);
+
+ node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target->child,
+ "targetInfo");
+ assert (node_tgtinfo);
+
+ node_zebra = data1_make_tag (zei->dh, node_tgtinfo,
+ "zebraInfo", zei->nmem);
+ data1_add_tagdata_text (zei->dh, node_zebra, "version",
+ ZEBRAVER, zei->nmem);
+ node_list = data1_add_tag (zei->dh, node_zebra,
+ "databaseList", zei->nmem);
+ for (zdi = zei->databaseInfo; zdi; zdi = zdi->next)
+ {
+ data1_node *node_db;
+ node_db = data1_add_tag (zei->dh, node_list,
+ "database", zei->nmem);
+ data1_add_tagdata_text (zei->dh, node_db, "name",
+ zdi->databaseName, zei->nmem);
+ data1_add_tagdata_int (zei->dh, node_db, "id",
+ zdi->sysno, zei->nmem);
+ }
+ data1_add_tagdata_int (zei->dh, node_zebra, "ordinalSU",
+ zei->ordinalSU, zei->nmem);
+
+ data1_add_tagdata_int (zei->dh, node_zebra, "runNumber",
+ zei->runNumber, zei->nmem);
+
+ node_list = data1_add_tag (zei->dh, node_zebra,
+ "attsetList", zei->nmem);
+ /* convert to "SGML" and write it */
+#if ZINFO_DEBUG
+ data1_pr_tree (zei->dh, zei->data1_target, stderr);
+#endif
+ sgml_buf = data1_nodetoidsgml(zei->dh, zei->data1_target,
+ 0, &sgml_len);
+ trec->info[recInfo_storeData] = xmalloc (sgml_len);
+ memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len);
+ trec->size[recInfo_storeData] = sgml_len;
+
+ rec_put (zei->records, &trec);
+}
+
+int zebraExplain_lookupSU (ZebraExplainInfo zei, int set, int use)
+{
+ struct zebSUInfoB *zsui;
+
+ assert (zei->curDatabaseInfo);
+ for (zsui = zei->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next)
if (zsui->info.use == use && zsui->info.set == set)
return zsui->info.ordinal;
return -1;
}
-int zebTargetInfo_addSU (ZebTargetInfo *zti, int set, int use)
+int zebraExplain_addSU (ZebraExplainInfo zei, int set, int use)
{
struct zebSUInfoB *zsui;
- assert (zti->curDatabaseInfo);
- for (zsui = zti->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next)
+ assert (zei->curDatabaseInfo);
+ for (zsui = zei->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next)
if (zsui->info.use == use && zsui->info.set == set)
return -1;
zsui = xmalloc (sizeof(*zsui));
- zsui->next = zti->curDatabaseInfo->SUInfo;
- zti->curDatabaseInfo->SUInfo = zsui;
- zti->curDatabaseInfo->dirty = 1;
- zti->dirty = 1;
+ zsui->next = zei->curDatabaseInfo->SUInfo;
+ zei->curDatabaseInfo->SUInfo = zsui;
+ zei->curDatabaseInfo->dirty = 1;
+ zei->dirty = 1;
zsui->info.set = set;
zsui->info.use = use;
- zsui->info.ordinal = (zti->dictNum)++;
+ zsui->info.ordinal = (zei->ordinalSU)++;
return zsui->info.ordinal;
}
-ZebDatabaseInfo *zebTargetInfo_getDB (ZebTargetInfo *zti)
+void zebraExplain_recordBytesIncrement (ZebraExplainInfo zei, int adjust_num)
+{
+ assert (zei->curDatabaseInfo);
+
+ zei->curDatabaseInfo->recordBytes += adjust_num;
+ zei->curDatabaseInfo->dirty = 1;
+}
+
+void zebraExplain_recordCountIncrement (ZebraExplainInfo zei, int adjust_num)
{
- assert (zti->curDatabaseInfo);
+ assert (zei->curDatabaseInfo);
- return &zti->curDatabaseInfo->info;
+ zei->curDatabaseInfo->recordCount += adjust_num;
+ zei->curDatabaseInfo->dirty = 1;
}
-void zebTargetInfo_setDB (ZebTargetInfo *zti, ZebDatabaseInfo *zdi)
+int zebraExplain_runNumberIncrement (ZebraExplainInfo zei, int adjust_num)
{
- assert (zti->curDatabaseInfo);
+ if (adjust_num)
+ zei->dirty = 1;
+ return zei->runNumber += adjust_num;
+}
- zti->curDatabaseInfo->dirty = 1;
- memcpy (&zti->curDatabaseInfo->info, zdi, sizeof(*zdi));
+RecordAttr *rec_init_attr (ZebraExplainInfo zei, Record rec)
+{
+ RecordAttr *recordAttr;
+
+ if (rec->info[recInfo_attr])
+ return (RecordAttr *) rec->info[recInfo_attr];
+ recordAttr = xmalloc (sizeof(*recordAttr));
+ rec->info[recInfo_attr] = (char *) recordAttr;
+ rec->size[recInfo_attr] = sizeof(*recordAttr);
+
+ recordAttr->recordSize = 0;
+ recordAttr->recordOffset = 0;
+ recordAttr->runNumber = zei->runNumber;
+ return recordAttr;
}
/*
- * Copyright (C) 1994-1996, Index Data I/S
+ * Copyright (C) 1994-1998, Index Data I/S
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zinfo.h,v $
- * Revision 1.2 1996-05-22 08:22:00 adam
+ * Revision 1.3 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.2 1996/05/22 08:22:00 adam
* Added public ZebDatabaseInfo structure.
*
* Revision 1.1 1996/05/13 14:23:07 adam
* Work on compaction of set/use bytes in dictionary.
*
*/
+#ifndef ZINFO_H
+#define ZINFO_H
+#include <data1.h>
#include "recindex.h"
-typedef struct zebTargetInfo ZebTargetInfo;
-typedef struct zebDatabaseInfo {
- int noOfRecords;
-} ZebDatabaseInfo;
+typedef struct zebraExplainInfo *ZebraExplainInfo;
+typedef struct zebDatabaseInfo ZebDatabaseInfo;
+ZebraExplainInfo zebraExplain_open (Records records, data1_handle,
+ int writeFlag);
+void zebraExplain_close (ZebraExplainInfo zei, int writeFlag);
+int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database);
+int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database);
+int zebraExplain_lookupSU (ZebraExplainInfo zei, int set, int use);
+int zebraExplain_addSU (ZebraExplainInfo zei, int set, int use);
+void zebraExplain_recordCountIncrement (ZebraExplainInfo zei, int adjust_num);
+void zebraExplain_recordBytesIncrement (ZebraExplainInfo zei, int adjust_num);
+int zebraExplain_runNumberIncrement (ZebraExplainInfo zei, int adjust_num);
+
+typedef struct {
+ int recordSize;
+ int recordOffset;
+ int runNumber;
+} RecordAttr;
+RecordAttr *rec_init_attr (ZebraExplainInfo zei, Record rec);
-ZebTargetInfo *zebTargetInfo_open (Records records, int writeFlag);
-void zebTargetInfo_close (ZebTargetInfo *zti, int writeFlag);
-int zebTargetInfo_curDatabase (ZebTargetInfo *zti, const char *database);
-int zebTargetInfo_newDatabase (ZebTargetInfo *zti, const char *database);
-int zebTargetInfo_lookupSU (ZebTargetInfo *zti, int set, int use);
-int zebTargetInfo_addSU (ZebTargetInfo *zti, int set, int use);
-ZebDatabaseInfo *zebTargetInfo_getDB (ZebTargetInfo *zti);
-void zebTargetInfo_setDB (ZebTargetInfo *zti, ZebDatabaseInfo *zdi);
+#endif
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zrpn.c,v $
- * Revision 1.74 1998-02-10 12:03:06 adam
+ * Revision 1.75 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.74 1998/02/10 12:03:06 adam
* Implemented Sort.
*
* Revision 1.73 1998/01/29 13:40:11 adam
#include <rstemp.h>
#include <rsnull.h>
#include <rsbool.h>
-#include <rsrel.h>
struct rpn_char_map_info {
ZebraMaps zm;
return zebra_maps_input (p->zm, p->reg_type, from, len);
}
-static void rpn_char_map_prepare (ZServerInfo *zi, int reg_type,
+static void rpn_char_map_prepare (ZebraHandle zh, int reg_type,
struct rpn_char_map_info *map_info)
{
- map_info->zm = zi->zebra_maps;
+ map_info->zm = zh->zebra_maps;
map_info->reg_type = reg_type;
- dict_grep_cmap (zi->dict, map_info, rpn_char_map_handler);
+ dict_grep_cmap (zh->dict, map_info, rpn_char_map_handler);
}
typedef struct {
#ifdef TERM_COUNT
int *term_no;
#endif
- ISAM_P *isam_p_buf;
+ ISAM_P *isam_p_buf;
int isam_p_size;
- int isam_p_indx;
+ int isam_p_indx;
+ ZebraHandle zh;
+ int reg_type;
};
-static void add_isam_p (const char *info, struct grep_info *p)
+static void term_untrans (ZebraHandle zh, int reg_type,
+ char *dst, const char *src)
+{
+ while (*src)
+ {
+ const char *cp = zebra_maps_output (zh->zebra_maps, reg_type, &src);
+ while (*cp)
+ *dst++ = *cp++;
+ }
+ *dst = '\0';
+}
+
+static void add_isam_p (const char *name, const char *info,
+ struct grep_info *p)
{
+ char term_tmp[1024];
if (p->isam_p_indx == p->isam_p_size)
{
ISAM_P *new_isam_p_buf;
}
assert (*info == sizeof(*p->isam_p_buf));
memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
+
+ term_untrans (p->zh, p->reg_type, term_tmp, name+2);
+ logf (LOG_DEBUG, "grep: %s", term_tmp);
+
(p->isam_p_indx)++;
}
static int grep_handle (char *name, const char *info, void *p)
{
- add_isam_p (info, p);
+ add_isam_p (name, info, p);
return 0;
}
}
static int term_100 (ZebraMaps zebra_maps, int reg_type,
- const char **src, char *dst, int space_split)
+ const char **src, char *dst, int space_split,
+ char *dst_term)
{
const char *s0, *s1;
const char **map;
int i = 0;
+ int j = 0;
if (!term_pre (zebra_maps, reg_type, src, NULL, NULL))
return 0;
{
if (!isalnum (*s1) && *s1 != '-')
dst[i++] = '\\';
+ dst_term[j++] = *s1;
dst[i++] = *s1++;
}
}
dst[i] = '\0';
+ dst_term[j] = '\0';
*src = s0;
return i;
}
static int term_101 (ZebraMaps zebra_maps, int reg_type,
- const char **src, char *dst, int space_split)
+ const char **src, char *dst, int space_split,
+ char *dst_term)
{
const char *s0, *s1;
const char **map;
int i = 0;
+ int j = 0;
if (!term_pre (zebra_maps, reg_type, src, "#", "#"))
return 0;
{
dst[i++] = '.';
dst[i++] = '*';
- s0++;
+ dst_term[j++] = *s0++;
}
else
{
{
if (!isalnum (*s1))
dst[i++] = '\\';
+ dst_term[j++] = *s1;
dst[i++] = *s1++;
}
}
}
dst[i] = '\0';
+ dst_term[j++] = '\0';
*src = s0;
return i;
}
static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
- char *dst, int *errors, int space_split)
+ char *dst, int *errors, int space_split,
+ char *dst_term)
{
int i = 0;
+ int j = 0;
const char *s0, *s1;
const char **map;
while (*s0)
{
if (strchr ("^\\()[].*+?|-", *s0))
+ {
+ dst_term[j++] = *s0;
dst[i++] = *s0++;
+ }
else
{
s1 = s0;
{
if (!isalnum (*s1))
dst[i++] = '\\';
+ dst_term[j++] = *s1;
dst[i++] = *s1++;
}
}
}
dst[i] = '\0';
+ dst_term[j] = '\0';
*src = s0;
return i;
}
static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
- char *dst, int space_split)
+ char *dst, int space_split, char *dst_term)
{
- return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split);
+ return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
+ dst_term);
}
/* gen_regular_rel - generate regular expression from relation
strcat (dst, "))");
}
-static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
+static int relational_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
const char **term_sub,
char *term_dict,
oid_value attributeSet,
struct grep_info *grep_info,
int *max_pos,
- int reg_type)
+ int reg_type,
+ char *term_dst)
{
AttrType relation;
int relation_value;
switch (relation_value)
{
case 1:
- if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1))
+ if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
return 0;
term_value = atoi (term_tmp);
logf (LOG_DEBUG, "Relation <");
gen_regular_rel (term_tmp, term_value-1, 1);
break;
case 2:
- if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1))
+ if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
return 0;
term_value = atoi (term_tmp);
logf (LOG_DEBUG, "Relation <=");
gen_regular_rel (term_tmp, term_value, 1);
break;
case 4:
- if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1))
+ if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
return 0;
term_value = atoi (term_tmp);
logf (LOG_DEBUG, "Relation >=");
gen_regular_rel (term_tmp, term_value, 0);
break;
case 5:
- if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1))
+ if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
+ term_dst))
return 0;
term_value = atoi (term_tmp);
logf (LOG_DEBUG, "Relation >");
return 0;
}
logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
- r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, max_pos,
+ r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, max_pos,
0, grep_handle);
if (r)
logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
return 1;
}
-static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
+static int field_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
const char **term_sub,
oid_value attributeSet, struct grep_info *grep_info,
int reg_type, int complete_flag,
- int num_bases, char **basenames)
+ int num_bases, char **basenames,
+ char *term_dst)
{
char term_dict[2*IT_MAX_WORD+2];
int j, r, base_no;
struct rpn_char_map_info rcmi;
int space_split = complete_flag ? 0 : 1;
- rpn_char_map_prepare (zi, reg_type, &rcmi);
+ rpn_char_map_prepare (zh, reg_type, &rcmi);
attr_init (&use, zapt, 1);
use_value = attr_find (&use, &curAttributeSet);
logf (LOG_DEBUG, "field_term, use value %d", use_value);
int max_pos, prefix_len = 0;
termp = *term_sub;
- if ((r=att_getentbyatt (zi, &attp, curAttributeSet, use_value)))
+ if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
{
logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
curAttributeSet, use_value, r);
if (r == -1)
- zi->errCode = 114;
+ zh->errCode = 114;
else
- zi->errCode = 121;
+ zh->errCode = 121;
return -1;
}
- if (zebTargetInfo_curDatabase (zi->zti, basenames[base_no]))
+ if (zebraExplain_curDatabase (zh->zei, basenames[base_no]))
{
- zi->errCode = 109; /* Database unavailable */
- zi->errString = basenames[base_no];
+ zh->errCode = 109; /* Database unavailable */
+ zh->errString = basenames[base_no];
return -1;
}
for (local_attr = attp.local_attributes; local_attr;
{
int ord;
- ord = zebTargetInfo_lookupSU (zi->zti, attp.attset_ordinal,
+ ord = zebraExplain_lookupSU (zh->zei, attp.attset_ordinal,
local_attr->local);
if (ord < 0)
continue;
}
if (!prefix_len)
{
- zi->errCode = 114;
+ zh->errCode = 114;
return -1;
}
term_dict[prefix_len++] = ')';
term_dict[prefix_len++] = reg_type;
logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
term_dict[prefix_len] = '\0';
- if (!relational_term (zi, zapt, &termp, term_dict,
- attributeSet, grep_info, &max_pos, reg_type))
+ if (!relational_term (zh, zapt, &termp, term_dict,
+ attributeSet, grep_info, &max_pos, reg_type,
+ term_dst))
{
j = prefix_len;
switch (truncation_value)
case -1: /* not specified */
case 100: /* do not truncate */
term_dict[j++] = '(';
- if (!term_100 (zi->zebra_maps, reg_type,
- &termp, term_dict + j, space_split))
+ if (!term_100 (zh->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
return 0;
strcat (term_dict, ")");
- r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
+ r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
&max_pos, 0, grep_handle);
if (r)
logf (LOG_WARN, "dict_lookup_grep err, trunc=none:%d", r);
break;
case 1: /* right truncation */
term_dict[j++] = '(';
- if (!term_100 (zi->zebra_maps, reg_type,
- &termp, term_dict + j, space_split))
+ if (!term_100 (zh->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
return 0;
strcat (term_dict, ".*)");
- dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
+ dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
&max_pos, 0, grep_handle);
break;
case 2: /* left truncation */
case 3: /* left&right truncation */
- zi->errCode = 120;
+ zh->errCode = 120;
return -1;
case 101: /* process # in term */
term_dict[j++] = '(';
- if (!term_101 (zi->zebra_maps, reg_type,
- &termp, term_dict + j, space_split))
+ if (!term_101 (zh->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
return 0;
strcat (term_dict, ")");
- r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
+ r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
&max_pos, 0, grep_handle);
if (r)
logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
break;
case 102: /* Regexp-1 */
term_dict[j++] = '(';
- if (!term_102 (zi->zebra_maps, reg_type,
- &termp, term_dict + j, space_split))
+ if (!term_102 (zh->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
return 0;
strcat (term_dict, ")");
logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
- r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
+ r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
&max_pos, 0, grep_handle);
if (r)
logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
case 103: /* Regexp-2 */
r = 1;
term_dict[j++] = '(';
- if (!term_103 (zi->zebra_maps, reg_type,
- &termp, term_dict + j, &r, space_split))
+ if (!term_103 (zh->zebra_maps, reg_type,
+ &termp, term_dict + j, &r, space_split, term_dst))
return 0;
strcat (term_dict, ")");
logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
- r = dict_lookup_grep (zi->dict, term_dict, r, grep_info,
+ r = dict_lookup_grep (zh->dict, term_dict, r, grep_info,
&max_pos, 2, grep_handle);
if (r)
logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
return 1;
}
-static void trans_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
+static void trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
char *termz)
{
size_t sizez;
termz[sizez] = '\0';
}
-static void trans_scan_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
+static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
char *termz, int reg_type)
{
Z_Term *term = zapt->term;
while ((len = (cp_end - cp)) > 0)
{
- map = zebra_maps_input (zi->zebra_maps, reg_type, &cp, len);
+ map = zebra_maps_input (zh->zebra_maps, reg_type, &cp, len);
if (**map == *CHR_SPACE)
space_map = *map;
else
termz[i] = '\0';
}
-static RSET rpn_search_APT_relevance (ZServerInfo *zi,
- Z_AttributesPlusTerm *zapt,
- oid_value attributeSet,
- int reg_type, int complete_flag,
- int num_bases, char **basenames)
-{
- rset_relevance_parms parms;
- char termz[IT_MAX_WORD+1];
- const char *termp = termz;
- struct grep_info grep_info;
- RSET result;
- int term_index = 0;
- int r;
-
- parms.key_size = sizeof(struct it_key);
- parms.max_rec = 200;
- parms.cmp = key_compare_it;
- parms.get_pos = key_get_pos;
- parms.is = zi->isam;
- parms.isc = zi->isamc;
- parms.no_terms = 0;
- parms.method = RSREL_METHOD_A;
-
- if (zapt->term->which != Z_Term_general)
- {
- zi->errCode = 124;
- return NULL;
- }
- trans_term (zi, zapt, termz);
-
-#ifdef TERM_COUNT
- grep_info.term_no = 0;
-#endif
- grep_info.isam_p_indx = 0;
- grep_info.isam_p_size = 0;
- grep_info.isam_p_buf = NULL;
- while (1)
- {
- r = field_term (zi, zapt, &termp, attributeSet, &grep_info,
- reg_type, complete_flag, num_bases, basenames);
- if (r <= 0)
- break;
-#ifdef TERM_COUNT
- for (; term_index < grep_info.isam_p_indx; term_index++)
- grep_info.term_no[term_index] = parms.no_terms;
- parms.no_terms++;
-#endif
- }
- parms.term_no = grep_info.term_no;
- parms.isam_positions = grep_info.isam_p_buf;
- parms.no_isam_positions = grep_info.isam_p_indx;
- if (grep_info.isam_p_indx > 0)
- result = rset_create (rset_kind_relevance, &parms);
- else
- result = rset_create (rset_kind_null, NULL);
-#ifdef TERM_COUNT
- xfree(grep_info.term_no);
-#endif
- xfree (grep_info.isam_p_buf);
- return result;
-}
-
-static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2,
+static RSET rpn_proximity (ZebraHandle zh, RSET rset1, RSET rset2,
int ordered,
int exclusion, int relation, int distance)
{
RSFD rsfd_result;
RSET result;
rset_temp_parms parms;
+ int term_index;
- rsfd1 = rset_open (rset1, RSETF_READ|RSETF_SORT_SYSNO);
- more1 = rset_read (rset1, rsfd1, &buf1);
+ rsfd1 = rset_open (rset1, RSETF_READ);
+ more1 = rset_read (rset1, rsfd1, &buf1, &term_index);
- rsfd2 = rset_open (rset2, RSETF_READ|RSETF_SORT_SYSNO);
- more2 = rset_read (rset2, rsfd2, &buf2);
+ rsfd2 = rset_open (rset2, RSETF_READ);
+ more2 = rset_read (rset2, rsfd2, &buf2, &term_index);
parms.key_size = sizeof (struct it_key);
- parms.temp_path = res_get (zi->res, "setTmpDir");
+ parms.temp_path = res_get (zh->res, "setTmpDir");
result = rset_create (rset_kind_temp, &parms);
- rsfd_result = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO);
+ rsfd_result = rset_open (result, RSETF_WRITE);
logf (LOG_DEBUG, "rpn_proximity excl=%d ord=%d rel=%d dis=%d",
exclusion, ordered, relation, distance);
{
int cmp = key_compare_it (&buf1, &buf2);
if (cmp < -1)
- more1 = rset_read (rset1, rsfd1, &buf1);
+ more1 = rset_read (rset1, rsfd1, &buf1, &term_index);
else if (cmp > 1)
- more2 = rset_read (rset2, rsfd2, &buf2);
+ more2 = rset_read (rset2, rsfd2, &buf2, &term_index);
else
{
int sysno = buf1.sysno;
int n = 0;
seqno[n++] = buf1.seqno;
- while ((more1 = rset_read (rset1, rsfd1, &buf1)) &&
+ while ((more1 = rset_read (rset1, rsfd1, &buf1, &term_index)) &&
sysno == buf1.sysno)
if (n < 500)
seqno[n++] = buf1.seqno;
if (excl)
rset_write (result, rsfd_result, &buf2);
}
- } while ((more2 = rset_read (rset2, rsfd2, &buf2)) &&
+ } while ((more2 = rset_read (rset2, rsfd2, &buf2, &term_index)) &&
sysno == buf2.sysno);
}
}
return result;
}
-static RSET rpn_prox (ZServerInfo *zi, RSET *rset, int rset_no)
+static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no)
{
int i;
RSFD *rsfd;
int *more;
struct it_key **buf;
- RSFD rsfd_result;
RSET result;
- rset_temp_parms parms;
+ char prox_term[1024];
+ int length_prox_term = 0;
+ int min_nn = 10000000;
+ int term_index;
+ const char *flags = NULL;
rsfd = xmalloc (sizeof(*rsfd)*rset_no);
more = xmalloc (sizeof(*more)*rset_no);
for (i = 0; i<rset_no; i++)
{
- buf[i] = xmalloc (sizeof(**buf));
- rsfd[i] = rset_open (rset[i], RSETF_READ|RSETF_SORT_SYSNO);
- if (!(more[i] = rset_read (rset[i], rsfd[i], buf[i])))
- {
- while (i >= 0)
- {
- rset_close (rset[i], rsfd[i]);
- xfree (buf[i]);
- --i;
- }
- xfree (rsfd);
- xfree (more);
- xfree (buf);
- return rset_create (rset_kind_null, NULL);
- }
+ int j;
+ buf[i] = xmalloc (sizeof(**buf));
+ rsfd[i] = rset_open (rset[i], RSETF_READ);
+ if (!(more[i] = rset_read (rset[i], rsfd[i], buf[i], &term_index)))
+ break;
+ for (j = 0; j<rset[i]->no_rset_terms; j++)
+ {
+ const char *nflags = rset[i]->rset_terms[j]->flags;
+ char *term = rset[i]->rset_terms[j]->name;
+ int lterm = strlen(term);
+ if (length_prox_term)
+ prox_term[length_prox_term++] = ' ';
+ strcpy (prox_term + length_prox_term, term);
+ length_prox_term += lterm;
+ if (min_nn > rset[i]->rset_terms[j]->nn)
+ min_nn = rset[i]->rset_terms[j]->nn;
+ flags = nflags;
+ }
}
- parms.key_size = sizeof (struct it_key);
- parms.temp_path = res_get (zi->res, "setTmpDir");
- result = rset_create (rset_kind_temp, &parms);
- rsfd_result = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO);
-
- while (*more)
+ if (i != rset_no)
{
- for (i = 1; i<rset_no; i++)
- {
- int cmp;
-
- if (!more[i])
- {
- *more = 0;
- break;
- }
- cmp = key_compare_it (buf[i], buf[i-1]);
- if (cmp > 1)
- {
- more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1]);
- break;
- }
- else if (cmp == 1)
- {
- if (buf[i-1]->seqno+1 != buf[i]->seqno)
- {
- more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1]);
- break;
- }
- }
- else
- {
- more[i] = rset_read (rset[i], rsfd[i], buf[i]);
- break;
- }
- }
- if (i == rset_no)
- {
- rset_write (result, rsfd_result, buf[0]);
- more[0] = rset_read (*rset, *rsfd, *buf);
- }
+ rset_null_parms parms;
+
+ while (i >= 0)
+ {
+ rset_close (rset[i], rsfd[i]);
+ xfree (buf[i]);
+ --i;
+ }
+ parms.rset_term = rset_term_create (prox_term, -1, flags);
+ parms.rset_term->nn = 0;
+ result = rset_create (rset_kind_null, &parms);
}
-
- for (i = 0; i<rset_no; i++)
+ else
{
- rset_close (rset[i], rsfd[i]);
- xfree (buf[i]);
+ rset_temp_parms parms;
+ RSFD rsfd_result;
+
+ parms.rset_term = rset_term_create (prox_term, -1, flags);
+ parms.rset_term->nn = min_nn;
+ parms.key_size = sizeof (struct it_key);
+ parms.temp_path = res_get (zh->res, "setTmpDir");
+ result = rset_create (rset_kind_temp, &parms);
+ rsfd_result = rset_open (result, RSETF_WRITE);
+
+ while (*more)
+ {
+ for (i = 1; i<rset_no; i++)
+ {
+ int cmp;
+
+ if (!more[i])
+ {
+ *more = 0;
+ break;
+ }
+ cmp = key_compare_it (buf[i], buf[i-1]);
+ if (cmp > 1)
+ {
+ more[i-1] = rset_read (rset[i-1], rsfd[i-1],
+ buf[i-1], &term_index);
+ break;
+ }
+ else if (cmp == 1)
+ {
+ if (buf[i-1]->seqno+1 != buf[i]->seqno)
+ {
+ more[i-1] = rset_read (rset[i-1], rsfd[i-1],
+ buf[i-1], &term_index);
+ break;
+ }
+ }
+ else
+ {
+ more[i] = rset_read (rset[i], rsfd[i], buf[i],
+ &term_index);
+ break;
+ }
+ }
+ if (i == rset_no)
+ {
+ rset_write (result, rsfd_result, buf[0]);
+ more[0] = rset_read (*rset, *rsfd, *buf, &term_index);
+ }
+ }
+
+ for (i = 0; i<rset_no; i++)
+ {
+ rset_close (rset[i], rsfd[i]);
+ xfree (buf[i]);
+ }
+ rset_close (result, rsfd_result);
}
- rset_close (result, rsfd_result);
xfree (buf);
xfree (more);
xfree (rsfd);
return result;
}
-static RSET rpn_search_APT_phrase (ZServerInfo *zi,
+static RSET rpn_search_APT_phrase (ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
+ const char *termz,
oid_value attributeSet,
int reg_type, int complete_flag,
- int num_bases, char **basenames)
+ const char *rank_type,
+ int num_bases, char **basenames)
{
- char termz[IT_MAX_WORD+1];
+ char term_dst[IT_MAX_WORD+1];
const char *termp = termz;
RSET rset[60], result;
int i, r, rset_no = 0;
struct grep_info grep_info;
- if (zapt->term->which != Z_Term_general)
- {
- zi->errCode = 124;
- return NULL;
- }
- trans_term (zi, zapt, termz);
-
#ifdef TERM_COUNT
grep_info.term_no = 0;
#endif
grep_info.isam_p_size = 0;
grep_info.isam_p_buf = NULL;
+ grep_info.zh = zh;
+ grep_info.reg_type = reg_type;
while (1)
{
logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
grep_info.isam_p_indx = 0;
- r = field_term (zi, zapt, &termp, attributeSet, &grep_info,
- reg_type, complete_flag, num_bases, basenames);
+ r = field_term (zh, zapt, &termp, attributeSet, &grep_info,
+ reg_type, complete_flag, num_bases, basenames,
+ term_dst);
if (r < 1)
break;
- rset[rset_no] = rset_trunc (zi, grep_info.isam_p_buf,
- grep_info.isam_p_indx);
+ logf (LOG_DEBUG, "term: %s", term_dst);
+ rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
+ grep_info.isam_p_indx, term_dst,
+ strlen(term_dst), rank_type);
assert (rset[rset_no]);
if (++rset_no >= sizeof(rset)/sizeof(*rset))
break;
#endif
xfree (grep_info.isam_p_buf);
if (rset_no == 0)
- return rset_create (rset_kind_null, NULL);
+ {
+ rset_null_parms parms;
+
+ parms.rset_term = rset_term_create (term_dst, -1, rank_type);
+ return rset_create (rset_kind_null, &parms);
+ }
else if (rset_no == 1)
return (rset[0]);
- result = rpn_prox (zi, rset, rset_no);
+ result = rpn_prox (zh, rset, rset_no);
for (i = 0; i<rset_no; i++)
rset_delete (rset[i]);
return result;
}
-static RSET rpn_search_APT_local (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
- oid_value attributeSet)
+static RSET rpn_search_APT_or_list (ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ const char *termz,
+ oid_value attributeSet,
+ int reg_type, int complete_flag,
+ const char *rank_type,
+ int num_bases, char **basenames)
+{
+ char term_dst[IT_MAX_WORD+1];
+ const char *termp = termz;
+ RSET rset[60], result;
+ int i, r, rset_no = 0;
+ struct grep_info grep_info;
+
+#ifdef TERM_COUNT
+ grep_info.term_no = 0;
+#endif
+ grep_info.isam_p_size = 0;
+ grep_info.isam_p_buf = NULL;
+ grep_info.zh = zh;
+ grep_info.reg_type = reg_type;
+
+ while (1)
+ {
+ logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
+ grep_info.isam_p_indx = 0;
+ r = field_term (zh, zapt, &termp, attributeSet, &grep_info,
+ reg_type, complete_flag, num_bases, basenames,
+ term_dst);
+ if (r < 1)
+ break;
+ logf (LOG_DEBUG, "term: %s", term_dst);
+ rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
+ grep_info.isam_p_indx, term_dst,
+ strlen(term_dst), rank_type);
+ assert (rset[rset_no]);
+ if (++rset_no >= sizeof(rset)/sizeof(*rset))
+ break;
+ }
+#ifdef TERM_COUNT
+ xfree(grep_info.term_no);
+#endif
+ xfree (grep_info.isam_p_buf);
+ if (rset_no == 0)
+ {
+ rset_null_parms parms;
+
+ parms.rset_term = rset_term_create (term_dst, -1, rank_type);
+ return rset_create (rset_kind_null, &parms);
+ }
+ result = rset[0];
+ for (i = 1; i<rset_no; i++)
+ {
+ rset_bool_parms bool_parms;
+
+ bool_parms.rset_l = result;
+ bool_parms.rset_r = rset[i];
+ bool_parms.key_size = sizeof(struct it_key);
+ bool_parms.cmp = key_compare_it;
+ result = rset_create (rset_kind_or, &bool_parms);
+ }
+ return result;
+}
+
+static RSET rpn_search_APT_and_list (ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ const char *termz,
+ oid_value attributeSet,
+ int reg_type, int complete_flag,
+ const char *rank_type,
+ int num_bases, char **basenames)
+{
+ char term_dst[IT_MAX_WORD+1];
+ const char *termp = termz;
+ RSET rset[60], result;
+ int i, r, rset_no = 0;
+ struct grep_info grep_info;
+
+#ifdef TERM_COUNT
+ grep_info.term_no = 0;
+#endif
+ grep_info.isam_p_size = 0;
+ grep_info.isam_p_buf = NULL;
+ grep_info.zh = zh;
+ grep_info.reg_type = reg_type;
+
+ while (1)
+ {
+ logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
+ grep_info.isam_p_indx = 0;
+ r = field_term (zh, zapt, &termp, attributeSet, &grep_info,
+ reg_type, complete_flag, num_bases, basenames,
+ term_dst);
+ if (r < 1)
+ break;
+ logf (LOG_DEBUG, "term: %s", term_dst);
+ rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
+ grep_info.isam_p_indx, term_dst,
+ strlen(term_dst), rank_type);
+ assert (rset[rset_no]);
+ if (++rset_no >= sizeof(rset)/sizeof(*rset))
+ break;
+ }
+#ifdef TERM_COUNT
+ xfree(grep_info.term_no);
+#endif
+ xfree (grep_info.isam_p_buf);
+ if (rset_no == 0)
+ {
+ rset_null_parms parms;
+
+ parms.rset_term = rset_term_create (term_dst, -1, rank_type);
+ return rset_create (rset_kind_null, &parms);
+ }
+ result = rset[0];
+ for (i = 1; i<rset_no; i++)
+ {
+ rset_bool_parms bool_parms;
+
+ bool_parms.rset_l = result;
+ bool_parms.rset_r = rset[i];
+ bool_parms.key_size = sizeof(struct it_key);
+ bool_parms.cmp = key_compare_it;
+ result = rset_create (rset_kind_and, &bool_parms);
+ }
+ return result;
+}
+
+
+static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char *termz,
+ oid_value attributeSet,
+ const char *rank_type)
{
RSET result;
RSFD rsfd;
struct it_key key;
rset_temp_parms parms;
- char termz[IT_MAX_WORD+1];
- if (zapt->term->which != Z_Term_general)
- {
- zi->errCode = 124;
- return NULL;
- }
+ parms.rset_term = rset_term_create (termz, -1, rank_type);
parms.key_size = sizeof (struct it_key);
- parms.temp_path = res_get (zi->res, "setTmpDir");
+ parms.temp_path = res_get (zh->res, "setTmpDir");
result = rset_create (rset_kind_temp, &parms);
- rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO);
-
- trans_term (zi, zapt, termz);
+ rsfd = rset_open (result, RSETF_WRITE);
key.sysno = atoi (termz);
+ key.seqno = 1;
if (key.sysno <= 0)
key.sysno = 1;
rset_write (result, rsfd, &key);
return result;
}
-static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
+static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
oid_value attributeSet,
int num_bases, char **basenames)
{
- int reg_type;
+ unsigned reg_id;
char *search_type = NULL;
+ char *rank_type = NULL;
int complete_flag;
+ char termz[IT_MAX_WORD+1];
- zebra_maps_attr (zi->zebra_maps, zapt, ®_type, &search_type,
- &complete_flag);
+ zebra_maps_attr (zh->zebra_maps, zapt, ®_id, &search_type,
+ &rank_type, &complete_flag);
- logf (LOG_DEBUG, "reg_type=%c", reg_type);
+ logf (LOG_DEBUG, "reg_id=%c", reg_id);
logf (LOG_DEBUG, "complete_flag=%d", complete_flag);
logf (LOG_DEBUG, "search_type=%s", search_type);
+ logf (LOG_DEBUG, "rank_type=%s", rank_type);
+
+ if (zapt->term->which != Z_Term_general)
+ {
+ zh->errCode = 124;
+ return NULL;
+ }
+ trans_term (zh, zapt, termz);
+
if (!strcmp (search_type, "phrase"))
{
- return rpn_search_APT_phrase (zi, zapt, attributeSet,
- reg_type, complete_flag,
+ return rpn_search_APT_phrase (zh, zapt, termz, attributeSet,
+ reg_id, complete_flag, rank_type,
num_bases, basenames);
}
- else if (!strcmp (search_type, "ranked"))
+ else if (!strcmp (search_type, "and-list"))
+ {
+ return rpn_search_APT_and_list (zh, zapt, termz, attributeSet,
+ reg_id, complete_flag, rank_type,
+ num_bases, basenames);
+ }
+ else if (!strcmp (search_type, "or-list"))
{
- return rpn_search_APT_relevance (zi, zapt, attributeSet,
- reg_type, complete_flag,
- num_bases, basenames);
+ return rpn_search_APT_or_list (zh, zapt, termz, attributeSet,
+ reg_id, complete_flag, rank_type,
+ num_bases, basenames);
}
else if (!strcmp (search_type, "local"))
{
- return rpn_search_APT_local (zi, zapt, attributeSet);
+ return rpn_search_APT_local (zh, zapt, termz, attributeSet,
+ rank_type);
}
- zi->errCode = 118;
+ zh->errCode = 118;
return NULL;
}
-static RSET rpn_search_ref (ZServerInfo *zi, Z_ResultSetId *resultSetId)
-{
- ZServerSet *s;
-
- if (!(s = resultSetGet (zi, resultSetId)))
- return rset_create (rset_kind_null, NULL);
- return s->rset;
-}
-
-static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs,
+static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
oid_value attributeSet, ODR stream,
int num_bases, char **basenames)
{
{
Z_Operator *zop = zs->u.complex->roperator;
rset_bool_parms bool_parms;
- int soft = 0;
-
- bool_parms.rset_l = rpn_search_structure (zi, zs->u.complex->s1,
+ bool_parms.rset_l = rpn_search_structure (zh, zs->u.complex->s1,
attributeSet, stream,
num_bases, basenames);
if (bool_parms.rset_l == NULL)
return NULL;
- if (rset_is_ranked(bool_parms.rset_l))
- soft = 1;
- bool_parms.rset_r = rpn_search_structure (zi, zs->u.complex->s2,
+ bool_parms.rset_r = rpn_search_structure (zh, zs->u.complex->s2,
attributeSet, stream,
num_bases, basenames);
if (bool_parms.rset_r == NULL)
rset_delete (bool_parms.rset_l);
return NULL;
}
- if (rset_is_ranked(bool_parms.rset_r))
- soft = 1;
bool_parms.key_size = sizeof(struct it_key);
bool_parms.cmp = key_compare_it;
switch (zop->which)
{
case Z_Operator_and:
- r = rset_create (soft ? rset_kind_sand:rset_kind_and, &bool_parms);
+ r = rset_create (rset_kind_and, &bool_parms);
break;
case Z_Operator_or:
- r = rset_create (soft ? rset_kind_sor:rset_kind_or, &bool_parms);
+ r = rset_create (rset_kind_or, &bool_parms);
break;
case Z_Operator_and_not:
- r = rset_create (soft ? rset_kind_snot:rset_kind_not, &bool_parms);
+ r = rset_create (rset_kind_not, &bool_parms);
break;
case Z_Operator_prox:
if (zop->u.prox->which != Z_ProxCode_known)
{
- zi->errCode = 132;
+ zh->errCode = 132;
return NULL;
}
if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word)
{
char *val = odr_malloc (stream, 16);
- zi->errCode = 132;
- zi->errString = val;
+ zh->errCode = 132;
+ zh->errString = val;
sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
return NULL;
}
- r = rpn_proximity (zi, bool_parms.rset_l, bool_parms.rset_r,
+ r = rpn_proximity (zh, bool_parms.rset_l, bool_parms.rset_r,
*zop->u.prox->ordered,
(!zop->u.prox->exclusion ? 0 :
*zop->u.prox->exclusion),
*zop->u.prox->distance);
break;
default:
- zi->errCode = 110;
+ zh->errCode = 110;
return NULL;
}
}
if (zs->u.simple->which == Z_Operand_APT)
{
logf (LOG_DEBUG, "rpn_search_APT");
- r = rpn_search_APT (zi, zs->u.simple->u.attributesPlusTerm,
+ r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
attributeSet, num_bases, basenames);
}
else if (zs->u.simple->which == Z_Operand_resultSetId)
{
logf (LOG_DEBUG, "rpn_search_ref");
- r = rpn_search_ref (zi, zs->u.simple->u.resultSetId);
+ r = resultSetRef (zh, zs->u.simple->u.resultSetId);
+ if (!r)
+ r = rset_create (rset_kind_null, NULL);
}
else
{
- zi->errCode = 3;
+ zh->errCode = 3;
return NULL;
}
}
else
{
- zi->errCode = 3;
+ zh->errCode = 3;
return NULL;
}
return r;
}
-void count_set_save (ZServerInfo *zi, RSET *r, int *count)
-{
- int psysno = 0;
- int kno = 0;
- struct it_key key;
- RSFD rfd, wfd;
- RSET w;
- rset_temp_parms parms;
- int maxResultSetSize = atoi (res_get_def (zi->res,
- "maxResultSetSize", "400"));
- logf (LOG_DEBUG, "count_set_save");
- *count = 0;
- parms.key_size = sizeof(struct it_key);
- parms.temp_path = res_get (zi->res, "setTmpDir");
- w = rset_create (rset_kind_temp, &parms);
- wfd = rset_open (w, RSETF_WRITE|RSETF_SORT_SYSNO);
- rfd = rset_open (*r, RSETF_READ|RSETF_SORT_SYSNO);
- while (rset_read (*r, rfd, &key))
- {
- if (key.sysno != psysno)
- {
- if (*count < maxResultSetSize)
- rset_write (w, wfd, &key);
- (*count)++;
- psysno = key.sysno;
- }
- kno++;
- }
- rset_close (*r, rfd);
- rset_delete (*r);
- rset_close (w, wfd);
- *r = w;
- logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, *count);
-}
-
-static void count_set (RSET r, int *count)
+void rpn_search (ZebraHandle zh, ODR stream,
+ Z_RPNQuery *rpn, int num_bases, char **basenames,
+ const char *setname)
{
- int psysno = 0;
- int kno = 0;
- struct it_key key;
- RSFD rfd;
-
- logf (LOG_DEBUG, "count_set");
-
- *count = 0;
- rfd = rset_open (r, RSETF_READ|RSETF_SORT_SYSNO);
- while (rset_read (r, rfd, &key))
- {
- if (key.sysno != psysno)
- {
- psysno = key.sysno;
- (*count)++;
- }
- kno++;
- }
- rset_close (r, rfd);
- logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, *count);
-}
-
-int rpn_search (ZServerInfo *zi, ODR stream,
- Z_RPNQuery *rpn, int num_bases, char **basenames,
- const char *setname, int *hits)
-{
- int i;
RSET rset;
oident *attrset;
oid_value attributeSet;
zlog_rpn (rpn);
- zi->errCode = 0;
- zi->errString = NULL;
+ zh->errCode = 0;
+ zh->errString = NULL;
+ zh->hits = 0;
attrset = oid_getentbyoid (rpn->attributeSetId);
attributeSet = attrset->value;
- rset = rpn_search_structure (zi, rpn->RPNStructure, attributeSet, stream,
+ rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet, stream,
num_bases, basenames);
if (!rset)
- return zi->errCode;
- if (rset_is_volatile(rset))
- count_set_save(zi, &rset, hits);
- else if ((i = rset_hits (rset)) >= 0)
- *hits = i;
- else
- count_set (rset, hits);
- resultSetAdd (zi, setname, 1, rset);
- if (zi->errCode)
- logf (LOG_DEBUG, "search error: %d", zi->errCode);
- return zi->errCode;
+ return;
+
+ resultSetAdd (zh, setname, 1, rset, &zh->hits);
+ if (zh->errCode)
+ logf (LOG_DEBUG, "search error: %d", zh->errCode);
}
struct scan_info_entry {
return 0;
}
-
-static void scan_term_untrans (ZServerInfo *zi, ODR stream, int reg_type,
+static void scan_term_untrans (ZebraHandle zh, ODR stream, int reg_type,
char **dstp, const char *src)
-{
- char *dst = odr_malloc (stream, strlen(src)*2+1);
- *dstp = dst;
+{
+ char term_dst[1024], **dst;
+
+ term_untrans (zh, reg_type, term_dst, src);
+
+ *dst = odr_malloc (stream, strlen(term_dst)+1);
+ strcpy (*dst, term_dst);
+}
- while (*src)
+static void count_set (RSET r, int *count)
+{
+ int psysno = 0;
+ int kno = 0;
+ struct it_key key;
+ RSFD rfd;
+ int term_index;
+
+ logf (LOG_DEBUG, "count_set");
+
+ *count = 0;
+ rfd = rset_open (r, RSETF_READ);
+ while (rset_read (r, rfd, &key, &term_index))
{
- const char *cp = zebra_maps_output (zi->zebra_maps, reg_type, &src);
- while (*cp)
- *dst++ = *cp++;
+ if (key.sysno != psysno)
+ {
+ psysno = key.sysno;
+ (*count)++;
+ }
+ kno++;
}
- *dst = '\0';
+ rset_close (r, rfd);
+ logf (LOG_DEBUG, "%d keys, %d records", kno, *count);
}
-int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt,
- oid_value attributeset,
- int num_bases, char **basenames,
- int *position, int *num_entries, struct scan_entry **list,
- int *status)
+void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
+ oid_value attributeset,
+ int num_bases, char **basenames,
+ int *position, int *num_entries, ZebraScanEntry **list,
+ int *is_partial)
{
int i;
int pos = *position;
AttrType use;
int use_value;
struct scan_info *scan_info_array;
- struct scan_entry *glist;
+ ZebraScanEntry *glist;
int ords[32], ord_no = 0;
int ptr[32];
- int reg_type;
+ unsigned reg_id;
char *search_type = NULL;
+ char *rank_type = NULL;
int complete_flag;
if (attributeset == VAL_NONE)
attr_init (&use, zapt, 1);
use_value = attr_find (&use, &attributeset);
- if (zebra_maps_attr (zi->zebra_maps, zapt, ®_type, &search_type,
- &complete_flag))
+ if (zebra_maps_attr (zh->zebra_maps, zapt, ®_id, &search_type,
+ &rank_type, &complete_flag))
{
- zi->errCode = 113;
- return zi->errCode;
+ zh->errCode = 113;
+ return ;
}
if (use_value == -1)
attent attp;
data1_local_attribute *local_attr;
- if ((r=att_getentbyatt (zi, &attp, attributeset, use_value)))
+ if ((r=att_getentbyatt (zh, &attp, attributeset, use_value)))
{
logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
attributeset, use_value);
if (r == -1)
- zi->errCode = 114;
+ zh->errCode = 114;
else
- zi->errCode = 121;
+ zh->errCode = 121;
}
- if (zebTargetInfo_curDatabase (zi->zti, basenames[base_no]))
+ if (zebraExplain_curDatabase (zh->zei, basenames[base_no]))
{
- zi->errString = basenames[base_no];
- return zi->errCode = 109; /* Database unavailable */
+ zh->errString = basenames[base_no];
+ zh->errCode = 109; /* Database unavailable */
+ return;
}
for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
local_attr = local_attr->next)
{
int ord;
- ord = zebTargetInfo_lookupSU (zi->zti, attp.attset_ordinal,
- local_attr->local);
+ ord = zebraExplain_lookupSU (zh->zei, attp.attset_ordinal,
+ local_attr->local);
if (ord > 0)
ords[ord_no++] = ord;
}
}
if (ord_no == 0)
- return zi->errCode = 113;
+ {
+ zh->errCode = 113;
+ return;
+ }
before = pos-1;
after = 1+num-pos;
scan_info_array = odr_malloc (stream, ord_no * sizeof(*scan_info_array));
struct scan_info *scan_info = scan_info_array + i;
struct rpn_char_map_info rcmi;
- rpn_char_map_prepare (zi, reg_type, &rcmi);
+ rpn_char_map_prepare (zh, reg_id, &rcmi);
scan_info->before = before;
scan_info->after = after;
for (j = 0; j<before+after; j++)
scan_info->list[j].term = NULL;
termz[prefix_len++] = ords[i];
- termz[prefix_len++] = reg_type;
+ termz[prefix_len++] = reg_id;
termz[prefix_len] = 0;
strcpy (scan_info->prefix, termz);
- trans_scan_term (zi, zapt, termz+prefix_len, reg_type);
+ trans_scan_term (zh, zapt, termz+prefix_len, reg_id);
- dict_scan (zi->dict, termz, &before_tmp, &after_tmp, scan_info,
+ dict_scan (zh->dict, termz, &before_tmp, &after_tmp, scan_info,
scan_handle);
}
glist = odr_malloc (stream, (before+after)*sizeof(*glist));
for (i = 0; i < ord_no; i++)
ptr[i] = before;
- *status = BEND_SCAN_SUCCESS;
+ *is_partial = 0;
for (i = 0; i<after; i++)
{
int j, j0 = -1;
}
if (j0 == -1)
break;
- scan_term_untrans (zi, stream, reg_type,
+ scan_term_untrans (zh, stream, reg_id,
&glist[i+before].term, mterm);
- rset = rset_trunc (zi, &scan_info_array[j0].list[ptr[j0]].isam_p, 1);
+ rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
+ glist[i+before].term, strlen(glist[i+before].term),
+ NULL);
ptr[j0]++;
for (j = j0+1; j<ord_no; j++)
RSET rset2;
rset2 =
- rset_trunc (zi, &scan_info_array[j].list[ptr[j]].isam_p, 1);
+ rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
+ glist[i+before].term,
+ strlen(glist[i+before].term), NULL);
bool_parms.key_size = sizeof(struct it_key);
bool_parms.cmp = key_compare_it;
if (i < after)
{
*num_entries -= (after-i);
- *status = BEND_SCAN_PARTIAL;
+ *is_partial = 1;
}
for (i = 0; i<ord_no; i++)
if (j0 == -1)
break;
- scan_term_untrans (zi, stream, reg_type,
+ scan_term_untrans (zh, stream, reg_id,
&glist[before-1-i].term, mterm);
rset = rset_trunc
- (zi, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1);
+ (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
+ glist[before-1-i].term, strlen(glist[before-1-i].term),
+ NULL);
ptr[j0]++;
rset_bool_parms bool_parms;
RSET rset2;
- rset2 = rset_trunc (zi,
- &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1);
+ rset2 = rset_trunc (zh,
+ &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
+ glist[before-1-i].term,
+ strlen(glist[before-1-i].term), NULL);
bool_parms.key_size = sizeof(struct it_key);
bool_parms.cmp = key_compare_it;
i = before-i;
if (i)
{
- *status = BEND_SCAN_PARTIAL;
+ *is_partial = 1;
*position -= i;
*num_entries -= i;
}
logf (LOG_DEBUG, "position = %d, num_entries = %d",
*position, *num_entries);
- if (zi->errCode)
- logf (LOG_DEBUG, "scan error: %d", zi->errCode);
- return zi->errCode;
+ if (zh->errCode)
+ logf (LOG_DEBUG, "scan error: %d", zh->errCode);
}
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zserver.c,v $
- * Revision 1.55 1998-02-10 12:03:06 adam
+ * Revision 1.56 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.55 1998/02/10 12:03:06 adam
* Implemented Sort.
*
* Revision 1.54 1998/01/29 13:39:13 adam
* Initial work on z39 server.
*
*/
+
#include <stdio.h>
#include <assert.h>
+#include <fcntl.h>
#ifdef WINDOWS
#include <io.h>
#include <process.h>
#else
#include <unistd.h>
#endif
-#include <fcntl.h>
#include <data1.h>
-#include <recctrl.h>
#include <dmalloc.h>
#include "zserver.h"
-static int register_lock (ZServerInfo *zi)
-{
- time_t lastChange;
- int state = zebra_server_lock_get_state(zi, &lastChange);
-
- switch (state)
- {
- case 'c':
- state = 1;
- break;
- default:
- state = 0;
- }
- zebra_server_lock (zi, state);
-#if USE_TIMES
- times (&zi->tms1);
-#endif
- if (zi->registerState == state)
- {
- if (zi->registerChange >= lastChange)
- return 0;
- logf (LOG_LOG, "Register completely updated since last access");
- }
- else if (zi->registerState == -1)
- logf (LOG_LOG, "Reading register using state %d pid=%ld", state,
- (long) getpid());
- else
- logf (LOG_LOG, "Register has changed state from %d to %d",
- zi->registerState, state);
- zi->registerChange = lastChange;
- if (zi->records)
- {
- zebTargetInfo_close (zi->zti, 0);
- dict_close (zi->dict);
- sortIdx_close (zi->sortIdx);
- if (zi->isam)
- is_close (zi->isam);
- if (zi->isamc)
- isc_close (zi->isamc);
- rec_close (&zi->records);
- }
- bf_cache (zi->bfs, state ? res_get (zi->res, "shadow") : NULL);
- zi->registerState = state;
- zi->records = rec_open (zi->bfs, 0);
- if (!(zi->dict = dict_open (zi->bfs, FNAME_DICT, 40, 0)))
- return -1;
- if (!(zi->sortIdx = sortIdx_open (zi->bfs, 0)))
- return -1;
- zi->isam = NULL;
- zi->isamc = NULL;
- if (!res_get_match (zi->res, "isam", "i", NULL))
- {
- if (!(zi->isamc = isc_open (zi->bfs, FNAME_ISAMC,
- 0, key_isamc_m(zi->res))))
- return -1;
-
- }
- else
- {
- if (!(zi->isam = is_open (zi->bfs, FNAME_ISAM, key_compare, 0,
- sizeof (struct it_key), zi->res)))
- return -1;
- }
- zi->zti = zebTargetInfo_open (zi->records, 0);
-
- return 0;
-}
-
-static void register_unlock (ZServerInfo *zi)
-{
- static int waitSec = -1;
-
-#if USE_TIMES
- times (&zi->tms2);
- logf (LOG_LOG, "user/system: %ld/%ld",
- (long) (zi->tms2.tms_utime - zi->tms1.tms_utime),
- (long) (zi->tms2.tms_stime - zi->tms1.tms_stime));
-#endif
- if (waitSec == -1)
- {
- char *s = res_get (zi->res, "debugRequestWait");
- if (s)
- waitSec = atoi (s);
- else
- waitSec = 0;
- }
-#ifdef WINDOWS
-#else
- if (waitSec > 0)
- sleep (waitSec);
-#endif
- if (zi->registerState != -1)
- zebra_server_unlock (zi, zi->registerState);
-}
-
static int bend_sort (void *handle, bend_sort_rr *rr);
bend_initresult *bend_init (bend_initrequest *q)
{
bend_initresult *r = odr_malloc (q->stream, sizeof(*r));
- ZServerInfo *zi = xmalloc (sizeof(*zi));
+ ZebraHandle zh;
struct statserv_options_block *sob;
r->errcode = 0;
r->errstring = 0;
- r->handle = zi;
q->bend_sort = bend_sort;
logf (LOG_DEBUG, "bend_init");
sob = statserv_getcontrol ();
- logf (LOG_LOG, "Reading resources from %s", sob->configname);
- if (!(zi->res = res_open (sob->configname)))
+ if (!(zh = zebra_open (NULL, sob->configname)))
{
- logf (LOG_FATAL, "Failed to read resources `%s'", sob->configname);
+ logf (LOG_FATAL, "Failed to open Zebra `%s'", sob->configname);
r->errcode = 1;
return r;
}
- zebra_server_lock_init (zi);
- zi->dh = data1_create ();
- zi->bfs = bfs_create (res_get (zi->res, "register"));
- bf_lockDir (zi->bfs, res_get (zi->res, "lockDir"));
- data1_set_tabpath (zi->dh, res_get(zi->res, "profilePath"));
- zi->sets = NULL;
- zi->registerState = -1; /* trigger open of registers! */
- zi->registerChange = 0;
-
- zi->records = NULL;
- zi->registered_sets = NULL;
- zi->zebra_maps = zebra_maps_open (res_get(zi->res, "profilePath"),
- zi->res);
+ r->handle = zh;
return r;
}
bend_searchresult *bend_search (void *handle, bend_searchrequest *q, int *fd)
{
- ZServerInfo *zi = handle;
+ ZebraHandle zh = handle;
bend_searchresult *r = odr_malloc (q->stream, sizeof(*r));
- r->errcode = 0;
- r->errstring = 0;
r->hits = 0;
-
- register_lock (zi);
- zi->errCode = 0;
- zi->errString = NULL;
-
+ r->errcode = 0;
+ r->errstring = NULL;
+
logf (LOG_LOG, "ResultSet '%s'", q->setname);
switch (q->query->which)
{
case Z_Query_type_1: case Z_Query_type_101:
- r->errcode = rpn_search (zi, q->stream, q->query->u.type_1,
- q->num_bases, q->basenames, q->setname,
- &r->hits);
- r->errstring = zi->errString;
+ zebra_search_rpn (zh, q->stream, q->query->u.type_1,
+ q->num_bases, q->basenames, q->setname);
+ r->errcode = zh->errCode;
+ r->errstring = zh->errString;
+ r->hits = zh->hits;
break;
default:
r->errcode = 107;
}
- register_unlock (zi);
return r;
}
-struct fetch_control {
- int record_offset;
- int record_int_pos;
- char *record_int_buf;
- int record_int_len;
- int fd;
-};
-
-static int record_ext_read (void *fh, char *buf, size_t count)
-{
- struct fetch_control *fc = fh;
- return read (fc->fd, buf, count);
-}
-
-static off_t record_ext_seek (void *fh, off_t offset)
-{
- struct fetch_control *fc = fh;
- return lseek (fc->fd, offset + fc->record_offset, SEEK_SET);
-}
-
-static off_t record_ext_tell (void *fh)
-{
- struct fetch_control *fc = fh;
- return lseek (fc->fd, 0, SEEK_CUR) - fc->record_offset;
-}
-
-static off_t record_int_seek (void *fh, off_t offset)
-{
- struct fetch_control *fc = fh;
- return (off_t) (fc->record_int_pos = offset);
-}
-
-static off_t record_int_tell (void *fh)
-{
- struct fetch_control *fc = fh;
- return (off_t) fc->record_int_pos;
-}
-
-static int record_int_read (void *fh, char *buf, size_t count)
-{
- struct fetch_control *fc = fh;
- int l = fc->record_int_len - fc->record_int_pos;
- if (l <= 0)
- return 0;
- l = (l < count) ? l : count;
- memcpy (buf, fc->record_int_buf + fc->record_int_pos, l);
- fc->record_int_pos += l;
- return l;
-}
-
-static int record_fetch (ZServerInfo *zi, int sysno, int score, ODR stream,
- oid_value input_format, Z_RecordComposition *comp,
- oid_value *output_format, char **rec_bufp,
- int *rec_lenp, char **basenamep)
-{
- Record rec;
- char *fname, *file_type, *basename;
- RecType rt;
- struct recRetrieveCtrl retrieveCtrl;
- char subType[128];
- struct fetch_control fc;
-
- rec = rec_get (zi->records, sysno);
- if (!rec)
- {
- logf (LOG_DEBUG, "rec_get fail on sysno=%d", sysno);
- return 14;
- }
- file_type = rec->info[recInfo_fileType];
- fname = rec->info[recInfo_filename];
- basename = rec->info[recInfo_databaseName];
- *basenamep = odr_malloc (stream, strlen(basename)+1);
- strcpy (*basenamep, basename);
-
- if (!(rt = recType_byName (file_type, subType)))
- {
- logf (LOG_WARN, "Retrieve: Cannot handle type %s", file_type);
- return 14;
- }
- logf (LOG_DEBUG, "retrieve localno=%d score=%d", sysno, score);
- retrieveCtrl.fh = &fc;
- fc.fd = -1;
- if (rec->size[recInfo_storeData] > 0)
- {
- retrieveCtrl.readf = record_int_read;
- retrieveCtrl.seekf = record_int_seek;
- retrieveCtrl.tellf = record_int_tell;
- fc.record_int_len = rec->size[recInfo_storeData];
- fc.record_int_buf = rec->info[recInfo_storeData];
- fc.record_int_pos = 0;
- logf (LOG_DEBUG, "Internal retrieve. %d bytes", fc.record_int_len);
- }
- else
- {
- if ((fc.fd = open (fname, O_BINARY|O_RDONLY)) == -1)
- {
- logf (LOG_WARN|LOG_ERRNO, "Retrieve fail; missing file: %s",
- fname);
- rec_rm (&rec);
- return 14;
- }
- memcpy (&fc.record_offset, rec->info[recInfo_offset],
- sizeof(fc.record_offset));
-
- retrieveCtrl.readf = record_ext_read;
- retrieveCtrl.seekf = record_ext_seek;
- retrieveCtrl.tellf = record_ext_tell;
-
- record_ext_seek (retrieveCtrl.fh, 0);
- }
- retrieveCtrl.subType = subType;
- retrieveCtrl.localno = sysno;
- retrieveCtrl.score = score;
- retrieveCtrl.odr = stream;
- retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
- retrieveCtrl.comp = comp;
- retrieveCtrl.diagnostic = 0;
- retrieveCtrl.dh = zi->dh;
- (*rt->retrieve)(&retrieveCtrl);
- *output_format = retrieveCtrl.output_format;
- *rec_bufp = retrieveCtrl.rec_buf;
- *rec_lenp = retrieveCtrl.rec_len;
- if (fc.fd != -1)
- close (fc.fd);
- rec_rm (&rec);
-
- return retrieveCtrl.diagnostic;
-}
-
bend_fetchresult *bend_fetch (void *handle, bend_fetchrequest *q, int *num)
{
- ZServerInfo *zi = handle;
+ ZebraHandle zh = handle;
bend_fetchresult *r = odr_malloc (q->stream, sizeof(*r));
- int positions[2];
- ZServerSetSysno *records;
-
- register_lock (zi);
+ ZebraRetrievalRecord retrievalRecord;
- r->errstring = 0;
- r->last_in_set = 0;
- r->basename = "base";
-
- zi->errCode = 0;
+ retrievalRecord.position = q->number;
+
+ zebra_records_retrieve (zh, q->stream, q->setname, q->comp,
+ q->format, 1, &retrievalRecord);
- positions[0] = q->number;
- records = resultSetSysnoGet (zi, q->setname, 1, positions);
- if (!records)
+ if (zh->errCode)
{
- logf (LOG_DEBUG, "resultSetRecordGet, error");
- r->errcode = 13;
- register_unlock (zi);
- return r;
+ r->errcode = zh->errCode;
+ r->errstring = zh->errString;
}
- if (!records[0].sysno)
+ else
{
- r->errcode = 13;
- logf (LOG_DEBUG, "Out of range. pos=%d", q->number);
- register_unlock (zi);
- return r;
+ r->errcode = 0;
+ r->basename = retrievalRecord.base;
+ r->record = retrievalRecord.buf;
+ r->len = retrievalRecord.len;
+ r->format = retrievalRecord.format;
}
- r->errcode = record_fetch (zi, records[0].sysno,
- records[0].score, q->stream, q->format,
- q->comp, &r->format, &r->record, &r->len,
- &r->basename);
- resultSetSysnoDel (zi, records, 1);
- register_unlock (zi);
return r;
}
bend_deleteresult *bend_delete (void *handle, bend_deleterequest *q, int *num)
{
- ZServerInfo *zi = handle;
- register_lock (zi);
- register_unlock (zi);
return 0;
}
bend_scanresult *bend_scan (void *handle, bend_scanrequest *q, int *num)
{
- ZServerInfo *zi = handle;
+ ZebraScanEntry *entries;
+ ZebraHandle zh = handle;
bend_scanresult *r = odr_malloc (q->stream, sizeof(*r));
- int status;
-
- register_lock (zi);
- zi->errCode = 0;
- zi->errString = 0;
-
+ int is_partial, i;
+
r->term_position = q->term_position;
r->num_entries = q->num_entries;
- r->errcode = rpn_scan (zi, q->stream, q->term,
- q->attributeset,
- q->num_bases, q->basenames,
- &r->term_position,
- &r->num_entries, &r->entries, &status);
- r->errstring = zi->errString;
- r->status = status;
- register_unlock (zi);
+
+ r->entries = odr_malloc (q->stream, sizeof(*r->entries) * q->num_entries);
+ zebra_scan (zh, q->stream, q->term,
+ q->attributeset,
+ q->num_bases, q->basenames,
+ &r->term_position,
+ &r->num_entries, &entries, &is_partial);
+ if (is_partial)
+ r->status = BEND_SCAN_PARTIAL;
+ else
+ r->status = BEND_SCAN_SUCCESS;
+ for (i = 0; i < r->num_entries; i++)
+ {
+ r->entries[i].term = entries[i].term;
+ r->entries[i].occurrences = entries[i].occurrences;
+ }
+ r->errcode = zh->errCode;
+ r->errstring = zh->errString;
return r;
}
void bend_close (void *handle)
{
- ZServerInfo *zi = handle;
+ zebra_close ((ZebraHandle) handle);
+}
- if (zi->records)
- {
- resultSetDestroy (zi);
- zebTargetInfo_close (zi->zti, 0);
- dict_close (zi->dict);
- sortIdx_close (zi->sortIdx);
- if (zi->isam)
- is_close (zi->isam);
- if (zi->isamc)
- isc_close (zi->isamc);
- rec_close (&zi->records);
- register_unlock (zi);
- }
- zebra_maps_close (zi->zebra_maps);
- bfs_destroy (zi->bfs);
- data1_destroy (zi->dh);
- zebra_server_lock_destroy (zi);
+int bend_sort (void *handle, bend_sort_rr *rr)
+{
+ ZebraHandle zh = handle;
- res_close (zi->res);
- xfree (zi);
+ zebra_sort (zh, rr->stream, rr->num_input_setnames, rr->input_setnames,
+ rr->output_setname, rr->sort_sequence, &rr->sort_status);
+ rr->errcode = zh->errCode;
+ rr->errstring = zh->errString;
+ return 0;
}
#ifndef WINDOWS
}
#endif
-int bend_sort (void *handle, bend_sort_rr *rr)
-{
- ZServerInfo *zi = handle;
-
-#if 1
- register_lock (zi);
-
- resultSetSort (zi, rr);
-
- register_unlock (zi);
-#endif
- return 0;
-}
-
int main (int argc, char **argv)
{
struct statserv_options_block *sob;
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zserver.h,v $
- * Revision 1.29 1998-02-10 12:03:06 adam
+ * Revision 1.30 1998-03-05 08:45:13 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.29 1998/02/10 12:03:06 adam
* Implemented Sort.
*
* Revision 1.28 1998/01/29 13:40:11 adam
typedef struct {
int sysno;
int score;
-} ZServerSetSysno;
+} *ZebraPosSet;
-typedef struct ZServerSet_ {
- char *name;
- RSET rset;
- int size;
- struct zset_sort_info *sort_info;
- struct ZServerSet_ *next;
-} ZServerSet;
+typedef struct zebra_set *ZebraSet;
+
+typedef struct zebra_rank_class {
+ struct rank_control *control;
+ int init_flag;
+ void *class_handle;
+ struct zebra_rank_class *next;
+} *ZebraRankClass;
-typedef struct {
+struct zebra_info {
int registerState; /* 0 (no commit pages), 1 (use commit pages) */
time_t registerChange;
- ZServerSet *sets;
+ ZebraSet sets;
Dict dict;
SortIdx sortIdx;
ISAM isam;
ISAMC isamc;
Records records;
int errCode;
+ int hits;
char *errString;
- ZebTargetInfo *zti;
+ ZebraExplainInfo zei;
data1_handle dh;
data1_attset *registered_sets;
BFiles bfs;
struct tms tms2;
#endif
ZebraMaps zebra_maps;
-} ZServerInfo;
+ ZebraRankClass rank_classes;
+};
+
+typedef struct zebra_info *ZebraHandle;
+
+struct rank_control {
+ char *name;
+ void *(*create)(ZebraHandle zh);
+ void (*destroy)(ZebraHandle zh, void *class_handle);
+ void *(*begin)(ZebraHandle zh, void *class_handle, RSET rset);
+ void (*end)(ZebraHandle zh, void *set_handle);
+ int (*calc)(void *set_handle, int sysno);
+ void (*add)(void *set_handle, int seqno, int term_index);
+};
+
+void rpn_search (ZebraHandle zh, ODR stream,
+ Z_RPNQuery *rpn, int num_bases, char **basenames,
+ const char *setname);
+
+
+typedef struct {
+ int occurrences;
+ char *term;
+} ZebraScanEntry;
-int rpn_search (ZServerInfo *zi, ODR stream,
- Z_RPNQuery *rpn, int num_bases, char **basenames,
- const char *setname, int *hits);
+void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
+ oid_value attributeset,
+ int num_bases, char **basenames,
+ int *position, int *num_entries, ZebraScanEntry **list,
+ int *is_partial);
-int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt,
- oid_value attributeset,
- int num_bases, char **basenames,
- int *position, int *num_entries, struct scan_entry **list,
- int *status);
+RSET rset_trunc (ZebraHandle zh, ISAM_P *isam_p, int no,
+ const char *term, int length_term, const char *flags);
-RSET rset_trunc (ZServerInfo *zi, ISAM_P *isam_p, int no);
+ZebraSet resultSetAdd (ZebraHandle zh, const char *name,
+ int ov, RSET rset, int *hits);
+ZebraSet resultSetGet (ZebraHandle zh, const char *name);
+RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId);
+void resultSetDestroy (ZebraHandle zh);
-ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name,
- int ov, RSET rset);
-ZServerSet *resultSetGet (ZServerInfo *zi, const char *name);
-void resultSetDestroy (ZServerInfo *zi);
+ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name,
+ int num, int *positions);
+void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num);
-ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name,
- int num, int *positions);
-void resultSetSysnoDel (ZServerInfo *zi, ZServerSetSysno *records, int num);
+void resultSetSort (ZebraHandle zh, ODR stream,
+ int num_input_setnames, char **input_setnames,
+ char *output_setname, Z_SortKeySpecList *sort_sequence,
+ int *sort_status);
-int resultSetSort (ZServerInfo *zi, bend_sort_rr *rr);
+void zebra_sort (ZebraHandle zh, ODR stream,
+ int num_input_setnames, char **input_setnames,
+ char *output_setname, Z_SortKeySpecList *sort_sequence,
+ int *sort_status);
void zlog_rpn (Z_RPNQuery *rpn);
void zlog_scan (Z_AttributesPlusTerm *zapt, oid_value ast);
-int zebra_server_lock_init (ZServerInfo *zi);
-int zebra_server_lock_destroy (ZServerInfo *zi);
-int zebra_server_lock (ZServerInfo *zi, int lockCommit);
-void zebra_server_unlock (ZServerInfo *zi, int commitPhase);
-int zebra_server_lock_get_state (ZServerInfo *zi, time_t *timep);
+int zebra_server_lock_init (ZebraHandle zh);
+int zebra_server_lock_destroy (ZebraHandle zh);
+int zebra_server_lock (ZebraHandle zh, int lockCommit);
+void zebra_server_unlock (ZebraHandle zh, int commitPhase);
+int zebra_server_lock_get_state (ZebraHandle zh, time_t *timep);
typedef struct attent
{
data1_local_attribute *local_attributes;
} attent;
-int att_getentbyatt(ZServerInfo *zi, attent *res, oid_value set, int att);
+void zebraRankInstall (ZebraHandle zh, struct rank_control *ctrl);
+ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name);
+void zebraRankDestroy (ZebraHandle zh);
+
+int att_getentbyatt(ZebraHandle zh, attent *res, oid_value set, int att);
+
+extern struct rank_control *rank1_class;
+
+ZebraHandle zebra_open (const char *host, const char *configName);
+void zebra_search_rpn (ZebraHandle zh, ODR stream,
+ Z_RPNQuery *query, int num_bases, char **basenames,
+ const char *setname);
+
+typedef struct {
+ int position;
+ char *buf;
+ int len;
+ oid_value format;
+ char *base;
+} ZebraRetrievalRecord;
+
+void zebra_records_retrieve (ZebraHandle zh, ODR stream,
+ const char *setname, Z_RecordComposition *comp,
+ oid_value input_format,
+ int num_recs, ZebraRetrievalRecord *recs);
+
+int zebra_record_fetch (ZebraHandle zh, int sysno, int score, ODR stream,
+ oid_value input_format, Z_RecordComposition *comp,
+ oid_value *output_format, char **rec_bufp,
+ int *rec_lenp, char **basenamep);
+
+void zebra_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
+ oid_value attributeset,
+ int num_bases, char **basenames,
+ int *position, int *num_entries, ZebraScanEntry **list,
+ int *is_partial);
+
+void zebra_close (ZebraHandle zh);
+
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zsets.c,v $
- * Revision 1.14 1998-02-10 16:39:15 adam
+ * Revision 1.15 1998-03-05 08:45:14 adam
+ * New result set model and modular ranking system. Moved towards
+ * descent server API. System information stored as "SGML" records.
+ *
+ * Revision 1.14 1998/02/10 16:39:15 adam
* Minor change.
*
* Revision 1.13 1998/02/10 12:03:06 adam
#define SORT_IDX_ENTRYSIZE 64
#define ZSET_SORT_MAX_LEVEL 3
+struct zebra_set {
+ char *name;
+ RSET rset;
+ int size;
+ struct zset_sort_info *sort_info;
+ struct zebra_set *next;
+};
+
struct zset_sort_entry {
int sysno;
+ int score;
char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE];
};
struct zset_sort_entry **entries;
};
-void resultSetSortReset (struct zset_sort_info **si)
-{
- int i;
- if (!*si)
- return ;
- for (i = 0; i<(*si)->num_entries; i++)
- xfree ((*si)->entries[i]);
- xfree ((*si)->entries);
- xfree (*si);
- *si = NULL;
-}
+void resultSetRank (ZebraHandle zh, struct zset_sort_info *si,
+ RSET rset, int *hits);
-ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, int ov, RSET rset)
+ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov,
+ RSET rset, int *hits)
{
- ZServerSet *s;
+ ZebraSet s;
+ int i;
- for (s = zi->sets; s; s = s->next)
+ for (s = zh->sets; s; s = s->next)
if (!strcmp (s->name, name))
{
logf (LOG_DEBUG, "updating result set %s", name);
if (!ov)
return NULL;
- resultSetSortReset (&s->sort_info);
rset_delete (s->rset);
s->rset = rset;
+ resultSetRank (zh, s->sort_info, rset, hits);
return s;
}
logf (LOG_DEBUG, "adding result set %s", name);
s = xmalloc (sizeof(*s));
- s->next = zi->sets;
- zi->sets = s;
+ s->next = zh->sets;
+ zh->sets = s;
s->name = xmalloc (strlen(name)+1);
strcpy (s->name, name);
s->rset = rset;
- s->sort_info = NULL;
+
+ s->sort_info = xmalloc (sizeof(*s->sort_info));
+ s->sort_info->max_entries = 1000;
+ s->sort_info->entries = xmalloc (sizeof(*s->sort_info->entries) *
+ s->sort_info->max_entries);
+ for (i = 0; i < s->sort_info->max_entries; i++)
+ s->sort_info->entries[i] = xmalloc (sizeof(**s->sort_info->entries));
+ resultSetRank (zh, s->sort_info, rset, hits);
return s;
}
-ZServerSet *resultSetGet (ZServerInfo *zi, const char *name)
+ZebraSet resultSetGet (ZebraHandle zh, const char *name)
{
- ZServerSet *s;
+ ZebraSet s;
- for (s = zi->sets; s; s = s->next)
+ for (s = zh->sets; s; s = s->next)
if (!strcmp (s->name, name))
return s;
return NULL;
}
-void resultSetDestroy (ZServerInfo *zi)
+void resultSetDestroy (ZebraHandle zh)
{
- ZServerSet *s, *s1;
+ ZebraSet s, s1;
- for (s = zi->sets; s; s = s1)
+ for (s = zh->sets; s; s = s1)
{
+ int i;
s1 = s->next;
- resultSetSortReset (&s->sort_info);
+
+ for (i = 0; i < s->sort_info->max_entries; i++)
+ xfree (s->sort_info->entries[i]);
+ xfree (s->sort_info);
+
rset_delete (s->rset);
xfree (s->name);
xfree (s);
}
- zi->sets = NULL;
+ zh->sets = NULL;
}
-ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name,
- int num, int *positions)
+ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name,
+ int num, int *positions)
{
- ZServerSet *sset;
- ZServerSetSysno *sr;
+ ZebraSet sset;
+ ZebraPosSet sr;
RSET rset;
int i;
struct zset_sort_info *sort_info;
- if (!(sset = resultSetGet (zi, name)))
+ if (!(sset = resultSetGet (zh, name)))
return NULL;
if (!(rset = sset->rset))
return NULL;
{
logf (LOG_DEBUG, "got pos=%d (sorted)", position);
sr[i].sysno = sort_info->entries[position-1]->sysno;
+ sr[i].score = sort_info->entries[position-1]->score;
}
}
}
int position = 0;
int num_i = 0;
int psysno = 0;
+ int term_index;
RSFD rfd;
struct it_key key;
position = sort_info->num_entries;
while (num_i < num && positions[num_i] < position)
num_i++;
- rfd = rset_open (rset, RSETF_READ|RSETF_SORT_RANK);
- while (num_i < num && rset_read (rset, rfd, &key))
+ rfd = rset_open (rset, RSETF_READ);
+ while (num_i < num && rset_read (rset, rfd, &key, &term_index))
{
if (key.sysno != psysno)
{
{
sr[num_i].sysno = psysno;
logf (LOG_DEBUG, "got pos=%d (unsorted)", position);
- rset_score (rset, rfd, &sr[num_i].score);
+ sr[num_i].score = -1;
num_i++;
}
}
return sr;
}
-void resultSetSysnoDel (ZServerInfo *zi, ZServerSetSysno *records, int num)
+void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num)
{
xfree (records);
}
int attrUse;
};
-void resultSetInsertSort (ZServerInfo *zi, ZServerSet *sset,
+void resultSetInsertSort (ZebraHandle zh, ZebraSet sset,
struct sortKey *criteria, int num_criteria,
int sysno)
{
struct zset_sort_info *sort_info = sset->sort_info;
int i, j;
- sortIdx_sysno (zi->sortIdx, sysno);
+ sortIdx_sysno (zh->sortIdx, sysno);
for (i = 0; i<num_criteria; i++)
{
- sortIdx_type (zi->sortIdx, criteria[i].attrUse);
- sortIdx_read (zi->sortIdx, this_entry.buf[i]);
+ sortIdx_type (zh->sortIdx, criteria[i].attrUse);
+ sortIdx_read (zh->sortIdx, this_entry.buf[i]);
}
i = sort_info->num_entries;
while (--i >= 0)
if (rel)
break;
}
- if (rel)
+ if (!rel)
+ break;
+ if (criteria[j].relation == 'D')
+ {
+ if (rel > 0)
+ break;
+ }
+ else if (criteria[j].relation == 'A')
{
- if (criteria[j].relation == 'D')
- if (rel > 0)
- break;
- if (criteria[j].relation == 'A')
- if (rel < 0)
- break;
+ if (rel < 0)
+ break;
}
}
j = sort_info->max_entries-1;
for (i = 0; i<num_criteria; i++)
memcpy (new_entry->buf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE);
new_entry->sysno = sysno;
+ new_entry->score = -1;
}
-
-int resultSetSort (ZServerInfo *zi, bend_sort_rr *rr)
+
+void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info,
+ int sysno, int score, int relation)
{
- ZServerSet *sset;
+ struct zset_sort_entry *new_entry = NULL;
+ int i, j;
+
+ i = sort_info->num_entries;
+ while (--i >= 0)
+ {
+ int rel = 0;
+
+ rel = score - sort_info->entries[i]->score;
+
+ if (relation == 'D')
+ {
+ if (rel >= 0)
+ break;
+ }
+ else if (relation == 'A')
+ {
+ if (rel <= 0)
+ break;
+ }
+ }
+ j = sort_info->max_entries-1;
+ if (i == j)
+ return;
+ ++i;
+ new_entry = sort_info->entries[j];
+ while (j != i)
+ {
+ sort_info->entries[j] = sort_info->entries[j-1];
+ --j;
+ }
+ sort_info->entries[j] = new_entry;
+ assert (new_entry);
+ if (sort_info->num_entries != sort_info->max_entries)
+ (sort_info->num_entries)++;
+ new_entry->sysno = sysno;
+ new_entry->score = score;
+}
+
+void resultSetSort (ZebraHandle zh, ODR stream,
+ int num_input_setnames, char **input_setnames,
+ char *output_setname, Z_SortKeySpecList *sort_sequence,
+ int *sort_status)
+{
+ ZebraSet sset;
RSET rset;
int i, psysno = 0;
struct it_key key;
struct sortKey sort_criteria[3];
int num_criteria;
+ int term_index;
RSFD rfd;
- if (rr->num_input_setnames == 0)
+ if (num_input_setnames == 0)
{
- rr->errcode = 208;
- return 0;
+ zh->errCode = 208;
+ return ;
}
- if (rr->num_input_setnames > 1)
+ if (num_input_setnames > 1)
{
- rr->errcode = 230;
- return 0;
+ zh->errCode = 230;
+ return;
}
- sset = resultSetGet (zi, rr->input_setnames[0]);
+ sset = resultSetGet (zh, input_setnames[0]);
if (!sset)
{
- rr->errcode = 30;
- rr->errstring = rr->input_setnames[0];
- return 0;
+ zh->errCode = 30;
+ zh->errString = input_setnames[0];
+ return;
}
if (!(rset = sset->rset))
{
- rr->errcode = 30;
- rr->errstring = rr->input_setnames[0];
- return 0;
+ zh->errCode = 30;
+ zh->errString = input_setnames[0];
+ return;
}
- num_criteria = rr->sort_sequence->num_specs;
+ num_criteria = sort_sequence->num_specs;
if (num_criteria > 3)
num_criteria = 3;
for (i = 0; i < num_criteria; i++)
{
- Z_SortKeySpec *sks = rr->sort_sequence->specs[i];
+ Z_SortKeySpec *sks = sort_sequence->specs[i];
Z_SortKey *sk;
if (*sks->sortRelation == Z_SortRelation_ascending)
sort_criteria[i].relation = 'D';
else
{
- rr->errcode = 214;
- return 0;
+ zh->errCode = 214;
+ return;
}
if (sks->sortElement->which == Z_SortElement_databaseSpecific)
{
- rr->errcode = 210;
- return 0;
+ zh->errCode = 210;
+ return;
}
else if (sks->sortElement->which != Z_SortElement_generic)
{
- rr->errcode = 237;
- return 0;
+ zh->errCode = 237;
+ return;
}
sk = sks->sortElement->u.generic;
switch (sk->which)
{
case Z_SortKey_sortField:
logf (LOG_DEBUG, "Sort: key %d is of type sortField", i+1);
- rr->errcode = 207;
- return 0;
+ zh->errCode = 207;
+ return;
case Z_SortKey_elementSpec:
logf (LOG_DEBUG, "Sort: key %d is of type elementSpec", i+1);
- return 0;
+ zh->errCode = 207;
+ return;
case Z_SortKey_sortAttributes:
logf (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1);
sort_criteria[i].attrUse =
- zebra_maps_sort (zi->zebra_maps, sk->u.sortAttributes);
+ zebra_maps_sort (zh->zebra_maps, sk->u.sortAttributes);
logf (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse);
if (sort_criteria[i].attrUse == -1)
{
- rr->errcode = 116;
- return 0;
+ zh->errCode = 116;
+ return;
}
- if (sortIdx_type (zi->sortIdx, sort_criteria[i].attrUse))
+ if (sortIdx_type (zh->sortIdx, sort_criteria[i].attrUse))
{
- rr->errcode = 207;
- return 0;
+ zh->errCode = 207;
+ return;
}
break;
}
}
- if (strcmp (rr->output_setname, rr->input_setnames[0]))
+ if (strcmp (output_setname, input_setnames[0]))
{
+ int hits;
rset = rset_dup (rset);
- sset = resultSetAdd (zi, rr->output_setname, 1, rset);
+ sset = resultSetAdd (zh, output_setname, 1, rset, &hits);
}
- resultSetSortReset (&sset->sort_info);
-
- sset->sort_info = xmalloc (sizeof(*sset->sort_info));
- sset->sort_info->max_entries = 100;
sset->sort_info->num_entries = 0;
- sset->sort_info->entries = xmalloc (sizeof(*sset->sort_info->entries) *
- sset->sort_info->max_entries);
- for (i = 0; i<sset->sort_info->max_entries; i++)
- sset->sort_info->entries[i] =
- xmalloc (sizeof(*sset->sort_info->entries[i]));
-
- rfd = rset_open (rset, RSETF_READ|RSETF_SORT_SYSNO);
- while (rset_read (rset, rfd, &key))
+ rfd = rset_open (rset, RSETF_READ);
+ while (rset_read (rset, rfd, &key, &term_index))
{
if (key.sysno != psysno)
{
psysno = key.sysno;
- resultSetInsertSort (zi, sset,
+ resultSetInsertSort (zh, sset,
sort_criteria, num_criteria, psysno);
}
}
rset_close (rset, rfd);
- rr->errcode = 0;
- rr->sort_status = Z_SortStatus_success;
+ zh->errCode = 0;
+ *sort_status = Z_SortStatus_success;
- return 0;
+ return;
+}
+
+RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId)
+{
+ ZebraSet s;
+
+ if ((s = resultSetGet (zh, resultSetId)))
+ return s->rset;
+ return NULL;
+}
+
+void resultSetRank (ZebraHandle zh, struct zset_sort_info *sort_info,
+ RSET rset, int *hits)
+{
+ int kno = 0;
+ struct it_key key;
+ RSFD rfd;
+ int term_index, i;
+ ZebraRankClass rank_class;
+ struct rank_control *rc;
+
+ sort_info->num_entries = 0;
+ *hits = 0;
+ rfd = rset_open (rset, RSETF_READ);
+
+ logf (LOG_DEBUG, "resultSetRank");
+ for (i = 0; i < rset->no_rset_terms; i++)
+ logf (LOG_DEBUG, "term=\"%s\" cnt=%d type=%s",
+ rset->rset_terms[i]->name,
+ rset->rset_terms[i]->nn,
+ rset->rset_terms[i]->flags);
+
+ rank_class = zebraRankLookup (zh, "rank-1");
+ rc = rank_class->control;
+
+ if (rset_read (rset, rfd, &key, &term_index))
+ {
+ int psysno = key.sysno;
+ int score;
+ void *handle =
+ (*rc->begin) (zh, rank_class->class_handle, rset);
+ (*hits)++;
+ do
+ {
+ kno++;
+ if (key.sysno != psysno)
+ {
+ score = (*rc->calc) (handle, psysno);
+
+ resultSetInsertRank (zh, sort_info, psysno, score, 'A');
+ (*hits)++;
+ psysno = key.sysno;
+ }
+ (*rc->add) (handle, key.seqno, term_index);
+ }
+ while (rset_read (rset, rfd, &key, &term_index));
+ score = (*rc->calc) (handle, psysno);
+ resultSetInsertRank (zh, sort_info, psysno, score, 'A');
+ (*rc->end) (zh, handle);
+ }
+ rset_close (rset, rfd);
+ logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, *hits);
}
+ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name)
+{
+ ZebraRankClass p = zh->rank_classes;
+ while (p && strcmp (p->control->name, name))
+ p = p->next;
+ if (p && !p->init_flag)
+ {
+ if (p->control->create)
+ p->class_handle = (*p->control->create)(zh);
+ p->init_flag = 1;
+ }
+ return p;
+}
+
+void zebraRankInstall (ZebraHandle zh, struct rank_control *ctrl)
+{
+ ZebraRankClass p = xmalloc (sizeof(*p));
+ p->control = xmalloc (sizeof(*p->control));
+ memcpy (p->control, ctrl, sizeof(*p->control));
+ p->control->name = xstrdup (ctrl->name);
+ p->init_flag = 0;
+ p->next = zh->rank_classes;
+ zh->rank_classes = p;
+}
+
+void zebraRankDestroy (ZebraHandle zh)
+{
+ ZebraRankClass p = zh->rank_classes;
+ while (p)
+ {
+ ZebraRankClass p_next = p->next;
+ if (p->init_flag && p->control->destroy)
+ (*p->control->destroy)(zh, p->class_handle);
+ xfree (p->control->name);
+ xfree (p->control);
+ xfree (p);
+ p = p_next;
+ }
+ zh->rank_classes = NULL;
+}