+ {
+ // This conditional may be terribly wrong
+ // It was there to address the situation where vec[0] == vec[i]
+ // which leads to idfvec[i] == 0... not sure about this
+ // Traditional TF-IDF may assume that a word that occurs in every
+ // record is irrelevant, but this is actually something we will
+ // see a lot
+ if ((idfvec[i] = log((float) rel->doc_frequency_vec[0] /
+ rel->doc_frequency_vec[i])) < 0.0000001)
+ idfvec[i] = 1;
+ }