X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Frelevance.c;h=2e5411b3343bac4391b32048bf3632f4bb5f2b0c;hb=533ff0b399b62625ddd68e1a59cb34ef70795bf9;hp=d22598dbd21d4eb8ea5eaebfffdbaebc9b395754;hpb=63466e474f690a04fe0ebdaaac87914768977757;p=pazpar2-moved-to-github.git diff --git a/src/relevance.c b/src/relevance.c index d22598d..2e5411b 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -1,5 +1,5 @@ /* This file is part of Pazpar2. - Copyright (C) 2006-2012 Index Data + Copyright (C) 2006-2013 Index Data Pazpar2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -25,8 +25,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include "pazpar2_config.h" #include "relevance.h" #include "session.h" +#include "client.h" #ifdef WIN32 #define log2(x) (log(x)/log(2)) @@ -83,6 +85,55 @@ static struct word_entry *word_entry_match(struct relevance *r, return 0; } +int relevance_snippet(struct relevance *r, + const char *words, const char *name, + WRBUF w_snippet) +{ + int no = 0; + const char *norm_str; + int highlight = 0; + + pp2_charset_token_first(r->prt, words, 0); + while ((norm_str = pp2_charset_token_next(r->prt))) + { + size_t org_start, org_len; + struct word_entry *entries = r->entries; + int i; + + pp2_get_org(r->prt, &org_start, &org_len); + for (; entries; entries = entries->next, i++) + { + if (*norm_str && !strcmp(norm_str, entries->norm_str)) + break; + } + if (entries) + { + if (!highlight) + { + highlight = 1; + wrbuf_puts(w_snippet, ""); + no++; + } + } + else + { + if (highlight) + { + highlight = 0; + wrbuf_puts(w_snippet, ""); + } + } + wrbuf_xmlputs_n(w_snippet, words + org_start, org_len); + } + if (highlight) + wrbuf_puts(w_snippet, ""); + if (no) + { + yaz_log(YLOG_DEBUG, "SNIPPET match: %s", wrbuf_cstr(w_snippet)); + } + return no; +} + void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, const char *rank, const char *name) @@ -215,6 +266,15 @@ static void pull_terms(struct relevance *res, struct ccl_rpn_node *n) break; } } +void relevance_clear(struct relevance *r) +{ + if (r) + { + int i; + for (i = 0; i < r->vec_len; i++) + r->doc_frequency_vec[i] = 0; + } +} struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, struct ccl_rpn_node *query, @@ -224,7 +284,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, { NMEM nmem = nmem_create(); struct relevance *res = nmem_malloc(nmem, sizeof(*res)); - int i; res->nmem = nmem; res->entries = 0; @@ -238,8 +297,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, pull_terms(res, query); res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); - for (i = 0; i < res->vec_len; i++) - res->doc_frequency_vec[i] = 0; // worker array res->term_frequency_vec_tmp = @@ -249,6 +306,7 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, res->term_pos = nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos)); + relevance_clear(res); return res; } @@ -296,10 +354,17 @@ void relevance_donerecord(struct relevance *r, struct record_cluster *cluster) } // Prepare for a relevance-sorted read -void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) +void relevance_prepare_read(struct relevance *rel, struct reclist *reclist, + enum conf_sortkey_type type) { int i; float *idfvec = xmalloc(rel->vec_len * sizeof(float)); + int n_clients = clients_count(); + struct client * clients[n_clients]; + yaz_log(YLOG_LOG,"round-robin: have %d clients", n_clients); + for (i = 0; i < n_clients; i++) + clients[i] = 0; + reclist_enter(reclist); // Calculate document frequency vector for each term. @@ -356,6 +421,34 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) { wrbuf_printf(w, "score = relevance(%d);\n", relevance); } + // Experimental round-robin + // Overwrites the score calculated above, but I keep it there to + // get the log entries + if (type == Metadata_sortkey_relevance_h) { + struct record *record; + int thisclient = 0; + struct record *bestrecord = 0; + int nclust = 0; + for (record = rec->records; record; record = record->next) { + if ( bestrecord == 0 || bestrecord->position < record->position ) + bestrecord = record; + nclust++; + } + while ( clients[thisclient] != 0 + && clients[thisclient] != bestrecord->client ) + thisclient++; + if ( clients[thisclient] == 0 ) + { + yaz_log(YLOG_LOG,"round-robin: found new client at %d: p=%p\n", thisclient, bestrecord->client); + clients[thisclient] = bestrecord->client; + } + int tfrel = relevance; + relevance = -(bestrecord->position * n_clients + thisclient) ; + wrbuf_printf(w,"round-robin score: pos=%d client=%d ncl=%d tfscore=%d score=%d\n", + bestrecord->position, thisclient, nclust, tfrel, relevance ); + yaz_log(YLOG_LOG,"round-robin score: pos=%d client=%d ncl=%d score=%d", + bestrecord->position, thisclient, nclust, relevance ); + } rec->relevance_score = relevance; } reclist_leave(reclist);