/* This file is part of Pazpar2.
- Copyright (C) 2006-2012 Index Data
+ Copyright (C) 2006-2013 Index Data
Pazpar2 is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
#include <math.h>
#include <stdlib.h>
+#include "pazpar2_config.h"
#include "relevance.h"
#include "session.h"
+#include "client.h"
#ifdef WIN32
#define log2(x) (log(x)/log(2))
return 0;
}
+int relevance_snippet(struct relevance *r,
+ const char *words, const char *name,
+ WRBUF w_snippet)
+{
+ int no = 0;
+ const char *norm_str;
+ int highlight = 0;
+
+ pp2_charset_token_first(r->prt, words, 0);
+ while ((norm_str = pp2_charset_token_next(r->prt)))
+ {
+ size_t org_start, org_len;
+ struct word_entry *entries = r->entries;
+ int i;
+
+ pp2_get_org(r->prt, &org_start, &org_len);
+ for (; entries; entries = entries->next, i++)
+ {
+ if (*norm_str && !strcmp(norm_str, entries->norm_str))
+ break;
+ }
+ if (entries)
+ {
+ if (!highlight)
+ {
+ highlight = 1;
+ wrbuf_puts(w_snippet, "<match>");
+ no++;
+ }
+ }
+ else
+ {
+ if (highlight)
+ {
+ highlight = 0;
+ wrbuf_puts(w_snippet, "</match>");
+ }
+ }
+ wrbuf_xmlputs_n(w_snippet, words + org_start, org_len);
+ }
+ if (highlight)
+ wrbuf_puts(w_snippet, "</match>");
+ if (no)
+ {
+ yaz_log(YLOG_DEBUG, "SNIPPET match: %s", wrbuf_cstr(w_snippet));
+ }
+ return no;
+}
+
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
const char *words, const char *rank,
const char *name)
break;
}
}
+void relevance_clear(struct relevance *r)
+{
+ if (r)
+ {
+ int i;
+ for (i = 0; i < r->vec_len; i++)
+ r->doc_frequency_vec[i] = 0;
+ }
+}
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
struct ccl_rpn_node *query,
{
NMEM nmem = nmem_create();
struct relevance *res = nmem_malloc(nmem, sizeof(*res));
- int i;
res->nmem = nmem;
res->entries = 0;
pull_terms(res, query);
res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
- for (i = 0; i < res->vec_len; i++)
- res->doc_frequency_vec[i] = 0;
// worker array
res->term_frequency_vec_tmp =
res->term_pos =
nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos));
+ relevance_clear(res);
return res;
}
}
// Prepare for a relevance-sorted read
-void relevance_prepare_read(struct relevance *rel, struct reclist *reclist)
+void relevance_prepare_read(struct relevance *rel, struct reclist *reclist,
+ enum conf_sortkey_type type)
{
int i;
float *idfvec = xmalloc(rel->vec_len * sizeof(float));
+ int n_clients = clients_count();
+ struct client * clients[n_clients];
+ yaz_log(YLOG_LOG,"round-robin: have %d clients", n_clients);
+ for (i = 0; i < n_clients; i++)
+ clients[i] = 0;
+
reclist_enter(reclist);
// Calculate document frequency vector for each term.
{
wrbuf_printf(w, "score = relevance(%d);\n", relevance);
}
+ // Experimental round-robin
+ // Overwrites the score calculated above, but I keep it there to
+ // get the log entries
+ if (type == Metadata_sortkey_relevance_h) {
+ struct record *record;
+ int thisclient = 0;
+ struct record *bestrecord = 0;
+ int nclust = 0;
+ for (record = rec->records; record; record = record->next) {
+ if ( bestrecord == 0 || bestrecord->position < record->position )
+ bestrecord = record;
+ nclust++;
+ }
+ while ( clients[thisclient] != 0
+ && clients[thisclient] != bestrecord->client )
+ thisclient++;
+ if ( clients[thisclient] == 0 )
+ {
+ yaz_log(YLOG_LOG,"round-robin: found new client at %d: p=%p\n", thisclient, bestrecord->client);
+ clients[thisclient] = bestrecord->client;
+ }
+ int tfrel = relevance;
+ relevance = -(bestrecord->position * n_clients + thisclient) ;
+ wrbuf_printf(w,"round-robin score: pos=%d client=%d ncl=%d tfscore=%d score=%d\n",
+ bestrecord->position, thisclient, nclust, tfrel, relevance );
+ yaz_log(YLOG_LOG,"round-robin score: pos=%d client=%d ncl=%d score=%d",
+ bestrecord->position, thisclient, nclust, relevance );
+ }
rec->relevance_score = relevance;
}
reclist_leave(reclist);