if they are number a part (all in order). Default is 0 (following
terms has no effect).
-Rank tweak: lead=number will increase mult by number if term is first
-term in field, number-1 if second, ... 0 if term is at offset
-number of more. Default value is 0 (position irrelevant).
+Rank tweak: lead=k will divide mult by 1 + log2(1+k*l) where k is
+value given by lead and l is length from beginning of field where
+term occurs (l=0 for first term, l=1 for second term, ..). Default
+value of k is 0.0.
Rank tweak: length=strategy. length="linear" if mult is to be divided
by length (existing, default behavior), length="log" if mult is to be
pp2_charset_token_t prt;
int rank_cluster;
int follow_boost;
- int lead_boost;
+ double lead_decay;
int length_divide;
NMEM nmem;
};
struct word_entry *next;
};
-static int word_entry_match(struct relevance *r, const char *norm_str,
- const char *rank, int *mult)
+static struct word_entry *word_entry_match(struct relevance *r,
+ const char *norm_str,
+ const char *rank, int *mult)
{
int i = 1;
struct word_entry *entries = r->entries;
{
e_follow->follow_boost = extra--;
}
- return entries->termno;
+ return entries;
}
entries->follow_boost = 0;
}
int *mult = r->term_frequency_vec_tmp;
const char *norm_str;
int i, length = 0;
- int lead_mult = r->lead_boost;
+ double lead_decay = r->lead_decay;
struct word_entry *e;
WRBUF w = cluster->relevance_explain1;
while ((norm_str = pp2_charset_token_next(r->prt)))
{
int local_mult = 0;
- int res = word_entry_match(r, norm_str, rank, &local_mult);
- if (res)
+ e = word_entry_match(r, norm_str, rank, &local_mult);
+ if (e)
{
+ int res = e->termno;
assert(res < r->vec_len);
- mult[res] += local_mult + lead_mult;
+ mult[res] += local_mult / (1 + log2(1 + lead_decay * length));
+ wrbuf_printf(w, "%s: mult[%d] += local_mult(%d) / (1+log2(1+lead_decay(%f) * length(%d)));\n", e->display_str, res, local_mult, lead_decay, length);
}
- if (lead_mult > 0)
- --lead_mult;
length++;
}
{
if (length == 0 || mult[i] == 0)
continue;
- wrbuf_printf(w, "%s: field=%s vecf[%d] += mult(%d)",
- e->display_str, name, i, mult[i]);
+ wrbuf_printf(w, "%s: field=%s vecf[%d] += mult[%d](%d)",
+ e->display_str, name, i, i, mult[i]);
switch (r->length_divide)
{
case 0:
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
struct ccl_rpn_node *query,
int rank_cluster,
- int follow_boost, int lead_boost,
+ int follow_boost, double lead_decay,
int length_divide)
{
NMEM nmem = nmem_create();
res->vec_len = 1;
res->rank_cluster = rank_cluster;
res->follow_boost = follow_boost;
- res->lead_boost = lead_boost;
+ res->lead_decay = lead_decay;
res->length_divide = length_divide;
res->prt = pp2_charset_token_create(pft, "relevance");
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
struct ccl_rpn_node *query,
int rank_cluster, int follow_boost,
- int lead_boost, int length_divide);
+ double lead_decay, int length_divide);
void relevance_destroy(struct relevance **rp);
void relevance_newrec(struct relevance *r, struct record_cluster *cluster);
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,