Added simple staticrank algorithm/example.
-## $Id: Makefile.am,v 1.31 2005-05-03 09:11:34 adam Exp $
+## $Id: Makefile.am,v 1.32 2005-08-19 09:21:34 adam Exp $
noinst_PROGRAMS = apitest kdump
libidzebra_api_la_SOURCES = dir.c dirs.c trav.c kinput.c kcompare.c \
attribute.c symtab.c recindex.c recstat.c lockutil.c \
zebraapi.c zinfo.c invstat.c sortidx.c compact.c zsets.c zrpn.c \
- rank1.c trunc.c retrieve.c extract.c \
+ rank1.c trunc.c retrieve.c extract.c rankstatic.c \
index.h recindex.h recindxp.h \
zinfo.h zserver.h zvrank.c limit.c kcontrol.c
-/* $Id: index.h,v 1.147 2005-08-18 12:50:17 adam Exp $
+/* $Id: index.h,v 1.148 2005-08-19 09:21:34 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
* int rssize; // number of records in result set (estimate?)
*/
void (*end)(struct zebra_register *reg, void *set_handle);
- int (*calc)(void *set_handle, zint sysno);
+ int (*calc)(void *set_handle, zint sysno, zint staticrank);
void (*add)(void *set_handle, int seqno, TERMID term);
};
int att_getentbyatt(ZebraHandle zh, attent *res, oid_value set, int att,
const char *sattr);
-extern struct rank_control *rank1_class;
-extern struct rank_control *rankzv_class;
-extern struct rank_control *rankliv_class;
+extern struct rank_control *rank_1_class;
+extern struct rank_control *rank_zv_class;
+extern struct rank_control *rank_static_class;
int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score,
zebra_snippets *hit_snippet, ODR stream,
-/* $Id: rank1.c,v 1.25 2005-06-14 20:28:54 adam Exp $
+/* $Id: rank1.c,v 1.26 2005-08-19 09:21:34 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <unistd.h>
#endif
-
#include "index.h"
-static int log_level=0;
-static int log_initialized=0;
+static int log_level = 0;
+static int log_initialized = 0;
struct rank_class_info {
int dummy;
if (!log_initialized)
{
- log_level=yaz_log_module_level("rank1");
- log_initialized=1;
+ log_level = yaz_log_module_level("rank1");
+ log_initialized = 1;
}
- yaz_log (log_level, "rank-1 create");
+ yaz_log(log_level, "rank-1 create");
return ci;
}
{
struct rank_class_info *ci = (struct rank_class_info *) class_handle;
- yaz_log (log_level, "rank-1 destroy");
+ yaz_log(log_level, "rank-1 destroy");
xfree (ci);
}
(struct rank_set_info *) nmem_malloc (nmem,sizeof(*si));
int i;
- yaz_log (log_level, "rank-1 begin");
+ yaz_log(log_level, "rank-1 begin");
si->no_entries = numterms;
si->no_rank_entries = 0;
si->nmem=nmem;
si->entries[i].rank_weight = atoi (cp+3);
else
si->entries[i].rank_weight = 34;
- yaz_log (log_level, " i=%d weight=%d g="ZINT_FORMAT, i,
+ yaz_log(log_level, " i=%d weight=%d g="ZINT_FORMAT, i,
si->entries[i].rank_weight, g);
(si->no_rank_entries)++;
}
si->entries[i].local_occur = 0; /* FIXME */
si->entries[i].global_occur = g;
si->entries[i].global_inv = 32 - log2_int (g);
- yaz_log (log_level, " global_inv = %d g = " ZINT_FORMAT,
+ yaz_log(log_level, " global_inv = %d g = " ZINT_FORMAT,
(int) (32-log2_int (g)), g);
- si->entries[i].term=terms[i];
+ si->entries[i].term = terms[i];
si->entries[i].term_index=i;
- terms[i]->rankpriv=&(si->entries[i]);
+ terms[i]->rankpriv = &(si->entries[i]);
}
return si;
}
*/
static void end (struct zebra_register *reg, void *set_handle)
{
- yaz_log (log_level, "rank-1 end");
+ yaz_log(log_level, "rank-1 end");
/* no need to free anything, they are in nmems */
}
assert(si);
if (!term)
{
- yaz_log (log_level, "rank-1 add NULL term");
+ yaz_log(log_level, "rank-1 add NULL term");
return;
}
ti= (struct rank_term_info *) term->rankpriv;
assert(ti);
si->last_pos = seqno;
ti->local_occur++;
- yaz_log (log_level, "rank-1 add seqno=%d term=%s count=%d",
+ yaz_log(log_level, "rank-1 add seqno=%d term=%s count=%d",
seqno, term->name,ti->local_occur);
}
* score should be between 0 and 1000. If score cannot be obtained
* -1 should be returned.
*/
-static int calc (void *set_handle, zint sysno)
+static int calc (void *set_handle, zint sysno, zint staticrank)
{
int i, lo, divisor, score = 0;
struct rank_set_info *si = (struct rank_set_info *) set_handle;
- if (!si->no_rank_entries)
- return -1;
+ if (!si->no_rank_entries)
+ return -1; /* ranking not enabled for any terms */
for (i = 0; i < si->no_entries; i++)
{
}
divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries));
score = score / divisor;
- yaz_log (log_level, "calc sysno=" ZINT_FORMAT " score=%d", sysno, score);
+ yaz_log(log_level, "calc sysno=" ZINT_FORMAT " score=%d", sysno, score);
if (score > 1000)
score = 1000;
/* reset the counts for the next term */
add,
};
-struct rank_control *rank1_class = &rank_control;
+struct rank_control *rank_1_class = &rank_control;
--- /dev/null
+/* $Id: rankstatic.c,v 1.1 2005-08-19 09:21:34 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#ifdef WIN32
+#include <io.h>
+#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "index.h"
+
+static int log_level = 0;
+static int log_initialized = 0;
+
+struct rank_set_info {
+ int no_rank_entries;
+};
+
+/*
+ * create: Creates/Initialises this rank handler. This routine is
+ * called exactly once. The routine returns the class_handle.
+ */
+static void *create (ZebraHandle zh)
+{
+ if (!log_initialized)
+ {
+ log_level = yaz_log_module_level("rankstatic");
+ log_initialized = 1;
+ }
+ yaz_log(log_level, "rank-static create");
+ return 0;
+}
+
+/*
+ * destroy: Destroys this rank handler. This routine is called
+ * when the handler is no longer needed - i.e. when the server
+ * dies. The class_handle was previously returned by create.
+ */
+static void destroy (struct zebra_register *reg, void *class_handle)
+{
+ yaz_log(log_level, "rank-static destroy");
+}
+
+
+/**
+ * begin: Prepares beginning of "real" ranking. Called once for
+ * each result set. The returned handle is a "set handle" and
+ * will be used in each of the handlers below.
+ */
+static void *begin (struct zebra_register *reg,
+ void *class_handle, RSET rset, NMEM nmem,
+ TERMID *terms, int numterms)
+{
+ struct rank_set_info *si =
+ (struct rank_set_info *) nmem_malloc (nmem, sizeof(*si));
+ int i;
+
+ yaz_log(log_level, "rank-static begin");
+ /* count how many terms are ranked (2=102 or similar) */
+ si->no_rank_entries = 0;
+ for (i = 0; i < numterms; i++)
+ {
+ yaz_log(log_level, "i=%d flags=%s '%s'", i,
+ terms[i]->flags, terms[i]->name );
+ if (!strncmp (terms[i]->flags, "rank,", 5))
+ (si->no_rank_entries)++;
+ }
+ return si;
+}
+
+/*
+ * end: Terminates ranking process. Called after a result set
+ * has been ranked.
+ */
+static void end (struct zebra_register *reg, void *set_handle)
+{
+ yaz_log(log_level, "rank-static end");
+}
+
+
+/**
+ * add: Called for each word occurence in a result set. This routine
+ * should be as fast as possible. This routine should "incrementally"
+ * update the score.
+ */
+static void add (void *set_handle, int seqno, TERMID term)
+{
+}
+
+/*
+ * calc: Called for each document in a result. This handler should
+ * produce a score based on previous call(s) to the add handler. The
+ * score should be between 0 and 1000. If score cannot be obtained
+ * -1 should be returned.
+ */
+static int calc (void *set_handle, zint sysno, zint staticrank)
+{
+ struct rank_set_info *si = (struct rank_set_info *) set_handle;
+
+ if (!si->no_rank_entries)
+ return -1; /* ranking not enabled for any terms */
+
+ return staticrank+10;
+}
+
+/*
+ * Pseudo-meta code with sequence of calls as they occur in a
+ * server. Handlers are prefixed by --:
+ *
+ * server init
+ * -- create
+ * foreach search
+ * rank result set
+ * -- begin
+ * foreach record
+ * foreach word
+ * -- add
+ * -- calc
+ * -- end
+ * -- destroy
+ * server close
+ */
+
+static struct rank_control rank_control = {
+ "rank-static",
+ create,
+ destroy,
+ begin,
+ end,
+ calc,
+ add,
+};
+
+struct rank_control *rank_static_class = &rank_control;
-/* $Id: zebraapi.c,v 1.182 2005-08-18 12:50:17 adam Exp $
+/* $Id: zebraapi.c,v 1.183 2005-08-19 09:21:34 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
reg->key_file_no = 0;
reg->ptr_i = 0;
- zebraRankInstall (reg, rank1_class);
- zebraRankInstall (reg, rankzv_class);
+ zebraRankInstall (reg, rank_1_class);
+ zebraRankInstall (reg, rank_zv_class);
+ zebraRankInstall (reg, rank_static_class);
recordCompression = res_get_def (res, "recordCompression", "none");
if (!strcmp (recordCompression, "none"))
-/* $Id: zsets.c,v 1.92 2005-08-18 19:20:38 adam Exp $
+/* $Id: zsets.c,v 1.93 2005-08-19 09:21:34 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
void *handle =
(*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
terms, numTerms);
- zint psysno = 0;
+ zint psysno = 0; /* previous doc id / sys no */
+ zint pstaticrank = 0; /* previous static rank */
while (rset_read(rfd, &key, &termid))
{
zint this_sys = key.mem[sysno_mem_index];
+
zint seqno = key.mem[key.len-1];
kno++;
if (log_level_searchhits)
key_logdump_txt(log_level_searchhits, &key, termid->name);
- if (this_sys != psysno)
- {
+ if (this_sys != psysno)
+ { /* new record .. */
if (rfd->counted_items > rset->hits_limit)
break;
if (psysno)
- {
- score = (*rc->calc) (handle, psysno);
+ { /* only if we did have a previous record */
+ score = (*rc->calc) (handle, psysno, pstaticrank);
+ /* insert the hit. A=Ascending */
resultSetInsertRank (zh, sort_info, psysno, score, 'A');
count++;
}
psysno = this_sys;
+ if (zh->m_staticrank)
+ pstaticrank = key.mem[0];
}
(*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
}
+ /* no more items */
if (psysno)
- {
- score = (*rc->calc)(handle, psysno);
+ { /* we had - at least - one record */
+ score = (*rc->calc)(handle, psysno, pstaticrank);
+ /* insert the hit. A=Ascending */
resultSetInsertRank(zh, sort_info, psysno, score, 'A');
count++;
}
-/* $Id: zvrank.c,v 1.17 2005-05-24 11:27:19 adam Exp $
+/* $Id: zvrank.c,v 1.18 2005-08-19 09:21:34 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
* score should be between 0 and 1000. If score cannot be obtained
* -1 should be returned.
*/
-static int zv_calc (void *rsi, zint sysno)
+static int zv_calc (void *rsi, zint sysno, zint staticrank)
{
int i, veclen;
int score=0;
zv_add,
};
-struct rank_control *rankzv_class = &rank_control_vsm;
+struct rank_control *rank_zv_class = &rank_control_vsm;
-/* EOF */
staticrank: 1
+rank: rank-static