1 /* $Id: ranksimilarity.c,v 1.1 2006-05-03 09:31:26 marc Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
36 static int log_level = 0;
37 static int log_initialized = 0;
39 struct ranksimilarity_class_info {
43 struct ranksimilarity_term_info {
53 struct ranksimilarity_set_info {
57 struct ranksimilarity_term_info *entries;
63 * create: Creates/Initialises this rank handler. This routine is
64 * called exactly once. The routine returns the class_handle.
66 static void *create (ZebraHandle zh)
68 struct ranksimilarity_class_info *ci =
69 (struct ranksimilarity_class_info *) xmalloc (sizeof(*ci));
73 log_level = yaz_log_module_level("ranksimilarity");
76 yaz_log(log_level, "create()");
81 * destroy: Destroys this rank handler. This routine is called
82 * when the handler is no longer needed - i.e. when the server
83 * dies. The class_handle was previously returned by create.
85 static void destroy (struct zebra_register *reg, void *class_handle)
87 struct ranksimilarity_class_info *ci
88 = (struct ranksimilarity_class_info *) class_handle;
89 yaz_log(log_level, "destroy()");
95 * begin: Prepares beginning of "real" ranking. Called once for
96 * each result set. The returned handle is a "set handle" and
97 * will be used in each of the handlers below.
99 static void *begin (struct zebra_register *reg,
100 void *class_handle, RSET rset, NMEM nmem,
101 TERMID *terms, int numterms)
103 struct ranksimilarity_set_info *si =
104 (struct ranksimilarity_set_info *) nmem_malloc (nmem, sizeof(*si));
107 yaz_log(log_level, "begin()");
109 /* count how many terms are ranked (2=102 or similar) */
110 si->no_entries = numterms;
111 si->no_rank_entries = 0;
113 si->entries = (struct ranksimilarity_term_info *)
114 nmem_malloc (si->nmem, sizeof(*si->entries)*numterms);
116 /* looping all terms in a specific field of query */
117 for (i = 0; i < numterms; i++)
119 struct ord_list *ol = terms[i]->ol;
121 yaz_log(log_level, "begin() term i=%d flags=%s '%s'", i,
122 terms[i]->flags, terms[i]->name );
124 for (; ol; ol = ol->next)
128 const char *string_index = 0;
132 zebraExplain_lookup_ord(reg->zei,
133 ol->ord, &index_type, &db, &set, &use,
137 yaz_log(log_level, "begin() ord=%d index_type=%c db=%s str-index=%s",
138 ol->ord, index_type, db, string_index);
140 yaz_log(log_level, "begin() ord=%d index_type=%c db=%s set=%d use=%d",
141 ol->ord, index_type, db, set, use);
143 if (!strncmp (terms[i]->flags, "rank,", 5))
144 (si->no_rank_entries)++;
146 /* setting next entry in term */
147 terms[i]->rankpriv = &(si->entries[i]);
153 * end: Terminates ranking process. Called after a result set
156 static void end (struct zebra_register *reg, void *set_handle)
158 yaz_log(log_level, "end()");
163 * add: Called for each word occurence in a result set. This routine
164 * should be as fast as possible. This routine should "incrementally"
167 static void add (void *set_handle, int seqno, TERMID term)
169 struct ranksimilarity_set_info *si = (struct ranksimilarity_set_info *) set_handle;
170 struct ranksimilarity_term_info *ti;
174 /* yaz_log(log_level, "add() NULL term"); */
179 ti= (struct ranksimilarity_term_info *) term->rankpriv;
181 si->last_pos = seqno;
183 /* yaz_log(log_level, "add() seqno=%d term=%s count=%d",
184 seqno, term->name,ti->local_occur); */
188 * calc: Called for each document in a result. This handler should
189 * produce a score based on previous call(s) to the add handler. The
190 * score should be between 0 and 1000. If score cannot be obtained
191 * -1 should be returned.
193 static int calc (void *set_handle, zint sysno, zint staticrank,
197 struct ranksimilarity_set_info *si
198 = (struct ranksimilarity_set_info *) set_handle;
200 yaz_log(log_level, "calc()");
202 if (!si->no_rank_entries)
203 return -1; /* ranking not enabled for any terms */
205 /* here you put in your own score function */
208 /* reset the counts for the next term */
209 for (i = 0; i < si->no_entries; i++)
210 si->entries[i].local_occur = 0;
212 /* if we set *stop_flag = 1, we stop processing (of result set list) */
213 /* staticrank = 0 is highest, MAXINT lowest */
216 /* here goes your formula to compute a scoring function */
217 /* you may use all the gathered statistics here */
219 score = INT_MAX - staticrank; /* but score is reverse (logical) */
227 * Pseudo-meta code with sequence of calls as they occur in a
228 * server. Handlers are prefixed by --:
244 static struct rank_control rank_control = {
254 struct rank_control *rank_similarity_class = &rank_control;