2 * Copyright (C) 1994-1995, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.3 1995-09-11 15:23:40 adam
8 * More work on relevance search.
10 * Revision 1.2 1995/09/11 13:09:41 adam
11 * More work on relevance feedback.
13 * Revision 1.1 1995/09/08 14:52:42 adam
14 * Work on relevance feedback.
27 static rset_control *r_create(const struct rset_control *sel, void *parms);
28 static RSFD r_open (rset_control *ct, int wflag);
29 static void r_close (RSFD rfd);
30 static void r_delete (rset_control *ct);
31 static void r_rewind (RSFD rfd);
32 static int r_count (rset_control *ct);
33 static int r_read (RSFD rfd, void *buf);
34 static int r_write (RSFD rfd, const void *buf);
36 static const rset_control control =
50 const rset_control *rset_kind_relevance = &control;
52 struct rset_rel_info {
56 int (*cmp)(const void *p1, const void *p2);
61 struct rset_rel_rfd *rfd_list;
66 struct rset_rel_rfd *next;
67 struct rset_rel_info *info;
70 static void add_rec (struct rset_rel_info *info, double score, void *key)
74 for (i = 0; i<info->no_rec; i++)
76 idx = info->sort_idx[i];
77 if (score <= info->score_buf[idx])
82 if (info->no_rec == info->max_rec)
84 for (j = info->no_rec; j > 0; --j)
85 info->sort_idx[j] = info->sort_idx[j-1];
86 idx = info->sort_idx[j] = info->no_rec;
91 idx = info->sort_idx[0];
94 for (j = 0; j < i; ++j)
95 info->sort_idx[j] = info->sort_idx[j+1];
96 info->sort_idx[j] = idx;
99 memcpy (info->key_buf + idx*info->key_size, key, info->key_size);
100 info->score_buf[idx] = score;
103 static void relevance (struct rset_rel_info *info, rset_relevance_parms *parms)
113 logf (LOG_DEBUG, "relevance");
114 isam_buf = xmalloc (parms->no_isam_positions * sizeof(*isam_buf));
115 isam_r = xmalloc (sizeof (*isam_r) * parms->no_isam_positions);
116 isam_pt = xmalloc (sizeof (*isam_pt) * parms->no_isam_positions);
117 isam_tmp_buf = xmalloc (info->key_size);
118 max_tf = xmalloc (sizeof (*max_tf) * parms->no_isam_positions);
119 wgt = xmalloc (sizeof (*wgt) * parms->no_isam_positions);
121 for (i = 0; i<parms->no_isam_positions; i++)
123 isam_buf[i] = xmalloc (info->key_size);
124 isam_pt[i] = is_position (parms->is, parms->isam_positions[i]);
125 max_tf [i] = is_numkeys (isam_pt[i]);
126 isam_r[i] = is_readkey (isam_pt[i], isam_buf[i]);
127 logf (LOG_DEBUG, "max tf %d = %d", i, max_tf[i]);
132 double length, score;
134 /* find min with lowest sysno */
135 for (i = 0; i<parms->no_isam_positions; i++)
137 (min < 0 || (*parms->cmp)(isam_buf[i], isam_buf[min]) < 1))
141 memcpy (isam_tmp_buf, isam_buf[min], info->key_size);
142 /* calculate for all with those sysno */
144 for (i = 0; i<parms->no_isam_positions; i++)
149 r = (*parms->cmp)(isam_buf[i], isam_tmp_buf);
160 isam_r[i] = is_readkey (isam_pt[i], isam_buf[i]);
161 } while (isam_r[i] &&
162 (*parms->cmp)(isam_buf[i], isam_tmp_buf) <= 1);
163 wgt[i] = 0.5+tf*0.5/max_tf[i];
164 length += wgt[i] * wgt[i];
167 /* calculate relevance value */
168 length = sqrt (length);
170 for (i = 0; i<parms->no_isam_positions; i++)
171 score += wgt[i]/length;
174 key_logdump (LOG_LOG, isam_tmp_buf);
175 logf (LOG_LOG, " %f", score);
177 /* if value is in the top score, then save it - don't emit yet */
178 add_rec (info, score, isam_tmp_buf);
180 for (i = 0; i<parms->no_isam_positions; i++)
182 is_pt_free (isam_pt[i]);
186 xfree (isam_tmp_buf);
193 static rset_control *r_create (const struct rset_control *sel, void *parms)
196 rset_relevance_parms *r_parms = parms;
197 struct rset_rel_info *info;
199 newct = xmalloc(sizeof(*newct));
200 memcpy(newct, sel, sizeof(*sel));
201 newct->buf = xmalloc (sizeof(struct rset_rel_info));
204 info->key_size = r_parms->key_size;
205 assert (info->key_size > 1);
206 info->max_rec = r_parms->max_rec;
207 assert (info->max_rec > 1);
208 info->cmp = r_parms->cmp;
210 info->key_buf = xmalloc (info->key_size * info->max_rec);
211 info->score_buf = xmalloc (sizeof(*info->score_buf) * info->max_rec);
212 info->sort_idx = xmalloc (sizeof(*info->sort_idx) * info->max_rec);
214 info->rfd_list = NULL;
216 relevance (info, r_parms);
220 static RSFD r_open (rset_control *ct, int wflag)
222 struct rset_rel_rfd *rfd;
223 struct rset_rel_info *info = ct->buf;
227 logf (LOG_FATAL, "relevance set type is read-only");
230 rfd = xmalloc (sizeof(*rfd));
231 rfd->next = info->rfd_list;
232 info->rfd_list = rfd;
233 rfd->position = info->no_rec;
238 static void r_close (RSFD rfd)
240 struct rset_rel_info *info = ((struct rset_rel_rfd*)rfd)->info;
241 struct rset_rel_rfd **rfdp;
243 for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next)
246 *rfdp = (*rfdp)->next;
250 logf (LOG_FATAL, "r_close but no rfd match!");
254 static void r_delete (rset_control *ct)
256 struct rset_rel_info *info = ct->buf;
258 assert (info->rfd_list == NULL);
259 xfree (info->key_buf);
260 xfree (info->score_buf);
261 xfree (info->sort_idx);
266 static void r_rewind (RSFD rfd)
268 struct rset_rel_rfd *p = rfd;
269 struct rset_rel_info *info = p->info;
271 p->position = info->no_rec;
274 static int r_count (rset_control *ct)
276 struct rset_rel_info *info = ct->buf;
281 static int r_read (RSFD rfd, void *buf)
283 struct rset_rel_rfd *p = rfd;
284 struct rset_rel_info *info = p->info;
286 if (p->position <= 0)
289 logf (LOG_DEBUG, "score: %f",
290 info->score_buf[info->sort_idx[p->position]]);
292 info->key_buf + info->key_size * info->sort_idx[p->position],
297 static int r_write (RSFD rfd, const void *buf)
299 logf (LOG_FATAL, "relevance set type is read-only");