-## $Id: Makefile.am,v 1.37 2006-05-01 08:27:20 adam Exp $
+## $Id: Makefile.am,v 1.38 2006-05-03 09:31:25 marc Exp $
noinst_PROGRAMS = apitest kdump zebrasrv2 zebraidx2
lib_LTLIBRARIES = libidzebra-api.la libidzebra-all.la
libidzebra_all_la_SOURCES =
-libidzebra_api_la_SOURCES = dir.c dirs.c trav.c kinput.c kcompare.c \
- attribute.c symtab.c recindex.c recstat.c \
- zebraapi.c api_swig.c \
- zinfo.c invstat.c sortidx.c compact.c zsets.c zrpn.c \
- rank1.c trunc.c retrieve.c extract.c rankstatic.c \
- index.h recindex.h recindxp.h reckeys.c reckeys.h \
- zinfo.h zserver.h zvrank.c limit.c kcontrol.c orddict.c orddict.h
+libidzebra_api_la_SOURCES = \
+ api_swig.c attribute.c \
+ compact.c \
+ dir.c dirs.c \
+ extract.c \
+ index.h invstat.c \
+ kinput.c kcompare.c kcontrol.c \
+ limit.c \
+ orddict.c orddict.h \
+ rank.h rank1.c ranksimilarity.c rankstatic.c \
+ recindex.c recindex.h recindxp.h reckeys.c reckeys.h recstat.c retrieve.c \
+ sortidx.c symtab.c \
+ trav.c trunc.c \
+ zebraapi.c zinfo.c zinfo.h zserver.h zsets.c zrpn.c \
+ zvrank.c
bin_PROGRAMS = zebraidx zebrasrv zebrash
-/* $Id: index.h,v 1.158 2006-04-05 02:11:44 adam Exp $
+/* $Id: index.h,v 1.159 2006-05-03 09:31:26 marc Exp $
Copyright (C) 1995-2005
Index Data ApS
struct zebra_limit *m_limit;
};
-struct rank_control {
- char *name;
- void *(*create)(ZebraHandle zh);
- void (*destroy)(struct zebra_register *reg, void *class_handle);
- void *(*begin)(struct zebra_register *reg,
- void *class_handle, RSET rset, NMEM nmem,
- TERMID *terms, int numterms);
- /* ### Could add parameters to begin:
- * char *index; // author, title, etc.
- * int dbsize; // number of records in database
- * int rssize; // number of records in result set (estimate?)
- */
- void (*end)(struct zebra_register *reg, void *set_handle);
- int (*calc)(void *set_handle, zint sysno, zint staticrank,
- int *stop_flag);
- void (*add)(void *set_handle, int seqno, TERMID term);
-};
struct term_set_entry {
char *term;
data1_local_attribute *local_attributes;
} attent;
-void zebraRankInstall (struct zebra_register *reg, struct rank_control *ctrl);
-ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name);
-void zebraRankDestroy (struct zebra_register *reg);
-
int att_getentbyatt(ZebraHandle zh, attent *res, oid_value set, int att,
const char *sattr);
-extern struct rank_control *rank_1_class;
-extern struct rank_control *rank_zv_class;
-extern struct rank_control *rank_static_class;
-
int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score,
zebra_snippets *hit_snippet, ODR stream,
oid_value input_format, Z_RecordComposition *comp,
--- /dev/null
+/* $Id: rank.h,v 1.1 2006-05-03 09:31:26 marc Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#ifndef RANK_H
+#define RANK_H
+
+#include <idzebra/api.h>
+
+YAZ_BEGIN_CDECL
+
+struct rank_control {
+ char *name;
+ void *(*create)(ZebraHandle zh);
+ void (*destroy)(struct zebra_register *reg, void *class_handle);
+ void *(*begin)(struct zebra_register *reg,
+ void *class_handle, RSET rset, NMEM nmem,
+ TERMID *terms, int numterms);
+ /* ### Could add parameters to begin:
+ * char *index; // author, title, etc.
+ * int dbsize; // number of records in database
+ * int rssize; // number of records in result set (estimate?)
+ */
+ void (*end)(struct zebra_register *reg, void *set_handle);
+ int (*calc)(void *set_handle, zint sysno, zint staticrank,
+ int *stop_flag);
+ void (*add)(void *set_handle, int seqno, TERMID term);
+};
+
+void zebraRankInstall (struct zebra_register *reg, struct rank_control *ctrl);
+ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name);
+void zebraRankDestroy (struct zebra_register *reg);
+
+/* declaring externally defined rank class structures */
+/* remember to install rank classes in zebraapi.c as well!! */
+extern struct rank_control *rank_1_class;
+extern struct rank_control *rank_zv_class;
+extern struct rank_control *rank_static_class;
+extern struct rank_control *rank_similarity_class;
+
+
+
+YAZ_END_CDECL
+
+#endif
-/* $Id: rank1.c,v 1.27 2005-08-19 11:04:23 adam Exp $
+/* $Id: rank1.c,v 1.28 2006-05-03 09:31:26 marc Exp $
Copyright (C) 1995-2005
Index Data ApS
#endif
#include "index.h"
+#include "rank.h"
static int log_level = 0;
static int log_initialized = 0;
--- /dev/null
+/* $Id: ranksimilarity.c,v 1.1 2006-05-03 09:31:26 marc Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#ifdef WIN32
+#include <io.h>
+#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "index.h"
+#include "rank.h"
+
+static int log_level = 0;
+static int log_initialized = 0;
+
+struct ranksimilarity_class_info {
+ int dummy;
+};
+
+struct ranksimilarity_term_info {
+ int local_occur;
+ zint global_occur;
+ int global_inv;
+ int rank_flag;
+ int rank_weight;
+ TERMID term;
+ int term_index;
+};
+
+struct ranksimilarity_set_info {
+ int last_pos;
+ int no_entries;
+ int no_rank_entries;
+ struct ranksimilarity_term_info *entries;
+ NMEM nmem;
+};
+
+
+/*
+ * create: Creates/Initialises this rank handler. This routine is
+ * called exactly once. The routine returns the class_handle.
+ */
+static void *create (ZebraHandle zh)
+{
+ struct ranksimilarity_class_info *ci =
+ (struct ranksimilarity_class_info *) xmalloc (sizeof(*ci));
+
+ if (!log_initialized)
+ {
+ log_level = yaz_log_module_level("ranksimilarity");
+ log_initialized = 1;
+ }
+ yaz_log(log_level, "create()");
+ return 0;
+}
+
+/*
+ * destroy: Destroys this rank handler. This routine is called
+ * when the handler is no longer needed - i.e. when the server
+ * dies. The class_handle was previously returned by create.
+ */
+static void destroy (struct zebra_register *reg, void *class_handle)
+{
+ struct ranksimilarity_class_info *ci
+ = (struct ranksimilarity_class_info *) class_handle;
+ yaz_log(log_level, "destroy()");
+ xfree (ci);
+}
+
+
+/**
+ * begin: Prepares beginning of "real" ranking. Called once for
+ * each result set. The returned handle is a "set handle" and
+ * will be used in each of the handlers below.
+ */
+static void *begin (struct zebra_register *reg,
+ void *class_handle, RSET rset, NMEM nmem,
+ TERMID *terms, int numterms)
+{
+ struct ranksimilarity_set_info *si =
+ (struct ranksimilarity_set_info *) nmem_malloc (nmem, sizeof(*si));
+ int i;
+
+ yaz_log(log_level, "begin()");
+
+ /* count how many terms are ranked (2=102 or similar) */
+ si->no_entries = numterms;
+ si->no_rank_entries = 0;
+ si->nmem=nmem;
+ si->entries = (struct ranksimilarity_term_info *)
+ nmem_malloc (si->nmem, sizeof(*si->entries)*numterms);
+
+ /* looping all terms in a specific field of query */
+ for (i = 0; i < numterms; i++)
+ {
+ struct ord_list *ol = terms[i]->ol;
+
+ yaz_log(log_level, "begin() term i=%d flags=%s '%s'", i,
+ terms[i]->flags, terms[i]->name );
+
+ for (; ol; ol = ol->next)
+ {
+ int index_type = 0;
+ const char *db = 0;
+ const char *string_index = 0;
+ int set = -1;
+ int use = -1;
+
+ zebraExplain_lookup_ord(reg->zei,
+ ol->ord, &index_type, &db, &set, &use,
+ &string_index);
+
+ if (string_index)
+ yaz_log(log_level, "begin() ord=%d index_type=%c db=%s str-index=%s",
+ ol->ord, index_type, db, string_index);
+ else
+ yaz_log(log_level, "begin() ord=%d index_type=%c db=%s set=%d use=%d",
+ ol->ord, index_type, db, set, use);
+ }
+ if (!strncmp (terms[i]->flags, "rank,", 5))
+ (si->no_rank_entries)++;
+
+ /* setting next entry in term */
+ terms[i]->rankpriv = &(si->entries[i]);
+ }
+ return si;
+}
+
+/*
+ * end: Terminates ranking process. Called after a result set
+ * has been ranked.
+ */
+static void end (struct zebra_register *reg, void *set_handle)
+{
+ yaz_log(log_level, "end()");
+}
+
+
+/**
+ * add: Called for each word occurence in a result set. This routine
+ * should be as fast as possible. This routine should "incrementally"
+ * update the score.
+ */
+static void add (void *set_handle, int seqno, TERMID term)
+{
+ struct ranksimilarity_set_info *si = (struct ranksimilarity_set_info *) set_handle;
+ struct ranksimilarity_term_info *ti;
+ assert(si);
+ if (!term)
+ {
+ /* yaz_log(log_level, "add() NULL term"); */
+ return;
+ }
+
+
+ ti= (struct ranksimilarity_term_info *) term->rankpriv;
+ assert(ti);
+ si->last_pos = seqno;
+ ti->local_occur++;
+ /* yaz_log(log_level, "add() seqno=%d term=%s count=%d",
+ seqno, term->name,ti->local_occur); */
+}
+
+/*
+ * calc: Called for each document in a result. This handler should
+ * produce a score based on previous call(s) to the add handler. The
+ * score should be between 0 and 1000. If score cannot be obtained
+ * -1 should be returned.
+ */
+static int calc (void *set_handle, zint sysno, zint staticrank,
+ int *stop_flag)
+{
+ int i, score = 0;
+ struct ranksimilarity_set_info *si
+ = (struct ranksimilarity_set_info *) set_handle;
+
+ yaz_log(log_level, "calc()");
+
+ if (!si->no_rank_entries)
+ return -1; /* ranking not enabled for any terms */
+
+ /* here you put in your own score function */
+
+
+ /* reset the counts for the next term */
+ for (i = 0; i < si->no_entries; i++)
+ si->entries[i].local_occur = 0;
+
+ /* if we set *stop_flag = 1, we stop processing (of result set list) */
+ /* staticrank = 0 is highest, MAXINT lowest */
+
+
+ /* here goes your formula to compute a scoring function */
+ /* you may use all the gathered statistics here */
+
+ score = INT_MAX - staticrank; /* but score is reverse (logical) */
+
+
+
+ return score;
+}
+
+/*
+ * Pseudo-meta code with sequence of calls as they occur in a
+ * server. Handlers are prefixed by --:
+ *
+ * server init
+ * -- create
+ * foreach search
+ * rank result set
+ * -- begin
+ * foreach record
+ * foreach word
+ * -- add
+ * -- calc
+ * -- end
+ * -- destroy
+ * server close
+ */
+
+static struct rank_control rank_control = {
+ "rank-similarity",
+ create,
+ destroy,
+ begin,
+ end,
+ calc,
+ add,
+};
+
+struct rank_control *rank_similarity_class = &rank_control;
-/* $Id: rankstatic.c,v 1.4 2006-03-30 09:52:15 adam Exp $
+/* $Id: rankstatic.c,v 1.5 2006-05-03 09:31:26 marc Exp $
Copyright (C) 1995-2005
Index Data ApS
#endif
#include "index.h"
+#include "rank.h"
static int log_level = 0;
static int log_initialized = 0;
-/* $Id: zebraapi.c,v 1.214 2006-04-25 19:37:21 adam Exp $
+/* $Id: zebraapi.c,v 1.215 2006-05-03 09:31:26 marc Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <yaz/pquery.h>
#include <yaz/sortspec.h>
#include "index.h"
+#include "rank.h"
#include "orddict.h"
#include <charmap.h>
#include <idzebra/api.h>
reg->key_file_no = 0;
reg->ptr_i = 0;
+ /* installing rank classes */
zebraRankInstall (reg, rank_1_class);
zebraRankInstall (reg, rank_zv_class);
+ zebraRankInstall (reg, rank_similarity_class);
zebraRankInstall (reg, rank_static_class);
recordCompression = res_get_def (res, "recordCompression", "none");
-/* $Id: zsets.c,v 1.99 2006-01-26 22:17:16 adam Exp $
+/* $Id: zsets.c,v 1.100 2006-05-03 09:31:26 marc Exp $
Copyright (C) 1995-2005
Index Data ApS
#endif
#include "index.h"
+#include "rank.h"
#include <yaz/diagbib1.h>
#include <rset.h>
-/* $Id: zvrank.c,v 1.19 2005-08-19 11:04:23 adam Exp $
+/* $Id: zvrank.c,v 1.20 2006-05-03 09:31:26 marc Exp $
Copyright (C) 1995-2005
Index Data ApS
#endif
#include "index.h"
+#include "rank.h"
static int log_level = 0;
static int log_initialized = 0;