X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Frelevance.c;h=bc841e4e5e18396ee988824eb57343ca545a0205;hb=aabc654ef7b6c5a6740e95b9580223726a86e43a;hp=4597c675803a90d32de66d493fa2b62ba644ee5a;hpb=1feb0a041e752e096256750c1ec0e8e00f1c30e5;p=pazpar2-moved-to-github.git diff --git a/src/relevance.c b/src/relevance.c index 4597c67..bc841e4 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -1,11 +1,15 @@ /* - * $Id: relevance.c,v 1.2 2006-12-20 22:18:33 adam Exp $ + * $Id: relevance.c,v 1.8 2007-01-15 04:34:28 quinn Exp $ */ #include #include #include +#if HAVE_CONFIG_H +#include +#endif + #include "relevance.h" #include "pazpar2.h" @@ -67,16 +71,16 @@ static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, #define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' : -1) -static int word_trie_match(struct word_trie *t, const char *word, int len, int *skipped) +static int word_trie_match(struct word_trie *t, const char *word, int *skipped) { int c = raw_char(tolower(*word)); - if (!len) + if (!*word) return 0; - word++; len--; + word++; (*skipped)++; - if (!len || raw_char(*word) < 0) + if (!*word || raw_char(*word) < 0) { if (t->list[c].termno > 0) return t->list[c].termno; @@ -87,7 +91,7 @@ static int word_trie_match(struct word_trie *t, const char *word, int len, int * { if (t->list[c].child) { - return word_trie_match(t->list[c].child, word, len, skipped); + return word_trie_match(t->list[c].child, word, skipped); } else return 0; @@ -117,69 +121,63 @@ struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs) ; res->vec_len = ++i; res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); - bzero(res->doc_frequency_vec, res->vec_len * sizeof(int)); + memset(res->doc_frequency_vec, 0, res->vec_len * sizeof(int)); res->nmem = nmem; res->wt = build_word_trie(nmem, terms); return res; } -void relevance_newrec(struct relevance *r, struct record *rec) +void relevance_newrec(struct relevance *r, struct record_cluster *rec) { if (!rec->term_frequency_vec) { rec->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int)); - bzero(rec->term_frequency_vec, r->vec_len * sizeof(int)); + memset(rec->term_frequency_vec, 0, r->vec_len * sizeof(int)); } } // FIXME. The definition of a word is crude here.. should support // some form of localization mechanism? -void relevance_countwords(struct relevance *r, struct record *head, - const char *words, int len, int multiplier) +void relevance_countwords(struct relevance *r, struct record_cluster *cluster, + const char *words, int multiplier) { - while (len) + while (*words) { char c; int res; int skipped; - while (len && (c = raw_char(tolower(*words))) < 0) - { + while (*words && (c = raw_char(tolower(*words))) < 0) words++; - len--; - } - if (!len) + if (!*words) return; skipped = 0; - if ((res = word_trie_match(r->wt, words, len, &skipped))) + if ((res = word_trie_match(r->wt, words, &skipped))) { words += skipped; - len -= skipped; - head->term_frequency_vec[res] += multiplier; + cluster->term_frequency_vec[res] += multiplier; } else { - while (len && (c = raw_char(tolower(*words))) >= 0) - { + while (*words && (c = raw_char(tolower(*words))) >= 0) words++; - len--; - } } - head->term_frequency_vec[0]++; + cluster->term_frequency_vec[0]++; } } -void relevance_donerecord(struct relevance *r, struct record *head) +void relevance_donerecord(struct relevance *r, struct record_cluster *cluster) { int i; for (i = 1; i < r->vec_len; i++) - if (head->term_frequency_vec[i] > 0) + if (cluster->term_frequency_vec[i] > 0) r->doc_frequency_vec[i]++; r->doc_frequency_vec[0]++; } +#ifdef GAGA #ifdef FLOAT_REL static int comp(const void *p1, const void *p2) { @@ -197,13 +195,14 @@ static int comp(const void *p1, const void *p2) #else static int comp(const void *p1, const void *p2) { - struct record **r1 = (struct record **) p1; - struct record **r2 = (struct record **) p2; + struct record_cluster **r1 = (struct record_cluster **) p1; + struct record_cluster **r2 = (struct record_cluster **) p2; return (*r2)->relevance - (*r1)->relevance; } #endif +#endif -// Prepare for a relevance-sorted read of up to num entries +// Prepare for a relevance-sorted read void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) { int i; @@ -221,7 +220,7 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) for (i = 0; i < reclist->num_records; i++) { int t; - struct record *rec = reclist->flatlist[i]; + struct record_cluster *rec = reclist->flatlist[i]; float relevance; relevance = 0; for (t = 1; t < rel->vec_len; t++) @@ -234,7 +233,9 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) } rec->relevance = (int) (relevance * 100000); } +#ifdef GAGA qsort(reclist->flatlist, reclist->num_records, sizeof(struct record*), comp); +#endif reclist->pointer = 0; xfree(idfvec); }