- char c;
- int res;
- int skipped;
- while (len && (c = raw_char(tolower(*words))) < 0)
- {
- words++;
- len--;
- }
- if (!len)
- return;
- skipped = 0;
- if ((res = word_trie_match(r->wt, words, len, &skipped)))
- {
- words += skipped;
- len -= skipped;
- head->term_frequency_vec[res] += multiplier;
- }
- else
- {
- while (len && (c = raw_char(tolower(*words))) >= 0)
- {
- words++;
- len--;
- }
- }
- head->term_frequency_vec[0]++;
+ pp2_charset_token_destroy((*rp)->prt);
+ *rp = 0;
+ }
+}
+
+void relevance_newrec(struct relevance *r, struct record_cluster *rec)
+{
+ if (!rec->term_frequency_vec)
+ {
+ int i;
+
+ // term frequency [1,..] . [0] is total length of all fields
+ rec->term_frequency_vec =
+ nmem_malloc(r->nmem,
+ r->vec_len * sizeof(*rec->term_frequency_vec));
+ for (i = 0; i < r->vec_len; i++)
+ rec->term_frequency_vec[i] = 0;
+
+ // term frequency divided by length of field [1,...]
+ rec->term_frequency_vecf =
+ nmem_malloc(r->nmem,
+ r->vec_len * sizeof(*rec->term_frequency_vecf));
+ for (i = 0; i < r->vec_len; i++)
+ rec->term_frequency_vecf[i] = 0.0;
+
+ // for relevance_countwords (so we don't have to xmalloc/xfree)
+ rec->term_frequency_vec_tmp =
+ nmem_malloc(r->nmem,
+ r->vec_len * sizeof(*rec->term_frequency_vec_tmp));