X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Fpazpar2.c;h=c24ec00ec07bc45b8036f91ceace1641cdc209c0;hb=82b7dfb4f633ed64ddb402d3cd81d391e6a936bc;hp=fc5ee382a1e6380407da577cfc2a190db9d834e6;hpb=c30942b8016d7f38d221a1ecdaebf25ecdd1805a;p=pazpar2-moved-to-github.git diff --git a/src/pazpar2.c b/src/pazpar2.c index fc5ee38..c24ec00 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -1,4 +1,4 @@ -/* $Id: pazpar2.c,v 1.28 2007-01-12 23:37:15 adam Exp $ */ +/* $Id: pazpar2.c,v 1.34 2007-01-15 20:01:53 quinn Exp $ */ #include #include @@ -286,10 +286,27 @@ static void do_searchResponse(IOCHAN i, Z_APDU *a) } } -char *normalize_mergekey(char *buf) +char *normalize_mergekey(char *buf, int skiparticle) { char *p = buf, *pout = buf; + if (skiparticle) + { + char firstword[64]; + char articles[] = "the den der die des an a "; // must end in space + + while (*p && !isalnum(*p)) + p++; + pout = firstword; + while (*p && *p != ' ' && pout - firstword < 62) + *(pout++) = tolower(*(p++)); + *(pout++) = ' '; + *(pout++) = '\0'; + if (!strstr(articles, firstword)) + p = buf; + pout = buf; + } + while (*p) { while (*p && !isalnum(*p)) @@ -302,7 +319,10 @@ char *normalize_mergekey(char *buf) p++; } if (buf != pout) - *pout = '\0'; + do { + *(pout--) = '\0'; + } + while (pout > buf && *pout == ' '); return buf; } @@ -347,6 +367,8 @@ static void add_facet(struct session *s, const char *type, const char *value) { int i; + if (!*value) + return; for (i = 0; i < s->num_termlists; i++) if (!strcmp(s->termlists[i].name, type)) break; @@ -423,7 +445,11 @@ static xmlDoc *normalize_record(struct client *cl, Z_External *rec) if (global_parameters.dump_records) { fprintf(stderr, "Record:\n----------------\n"); +#if LIBXML_VERSION >= 20600 xmlDocFormatDump(stderr, rdoc, 1); +#else + xmlDocDump(stderr, rdoc); +#endif } return rdoc; } @@ -488,7 +514,8 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey); xmlFree(mergekey); - normalize_mergekey(mergekey_norm); + normalize_mergekey(mergekey_norm, 0); + yaz_log(YLOG_LOG, "MK: '%s'", mergekey_norm); cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged); if (!cluster) @@ -513,6 +540,7 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) if (!strcmp(n->name, "metadata")) { struct conf_metadata *md = 0; + struct conf_sortkey *sk = 0; struct record_metadata **wheretoput, *newm; int imeta; int first, last; @@ -524,6 +552,8 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) if (!strcmp(type, service->metadata[imeta].name)) { md = &service->metadata[imeta]; + if (md->sortkey_offset >= 0) + sk = &service->sortkeys[md->sortkey_offset]; break; } if (!md) @@ -543,7 +573,12 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) newm->next = 0; if (md->type == Metadata_type_generic) { + char *p; newm->data.text = nmem_strdup(se->nmem, value); + for (p = newm->data.text + strlen(newm->data.text) - 1; + p > newm->data.text && strchr(" ,/.", *p); p--) + *p = '\0'; + } else if (md->type == Metadata_type_year) { @@ -576,7 +611,19 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) { if (!*wheretoput || strlen(newm->data.text) > strlen((*wheretoput)->data.text)) - *wheretoput = newm; + { + *wheretoput = newm; + if (sk) + { + char *s = nmem_strdup(se->nmem, newm->data.text); + if (!cluster->sortkeys[md->sortkey_offset]) + cluster->sortkeys[md->sortkey_offset] = + nmem_malloc(se->nmem, sizeof(union data_types)); + normalize_mergekey(s, + (sk->type == Metadata_sortkey_skiparticle)); + cluster->sortkeys[md->sortkey_offset]->text = s; + } + } } else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no) { @@ -589,16 +636,31 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) if (!*wheretoput) { *wheretoput = newm; - (*wheretoput)->data.year.year1 = first; - (*wheretoput)->data.year.year2 = last; + (*wheretoput)->data.number.min = first; + (*wheretoput)->data.number.max = last; + if (sk) + cluster->sortkeys[md->sortkey_offset] = &newm->data; } else { - if (first < (*wheretoput)->data.year.year1) - (*wheretoput)->data.year.year1 = first; - if (last > (*wheretoput)->data.year.year2) - (*wheretoput)->data.year.year2 = last; + if (first < (*wheretoput)->data.number.min) + (*wheretoput)->data.number.min = first; + if (last > (*wheretoput)->data.number.max) + (*wheretoput)->data.number.max = last; + if (sk) + { + union data_types *sdata = cluster->sortkeys[md->sortkey_offset]; + sdata->number.min = first; + sdata->number.max = last; + } + } +#ifdef GAGA + if (sk) + { + union data_types *sdata = cluster->sortkeys[md->sortkey_offset]; + yaz_log(YLOG_LOG, "SK range: %d-%d", sdata->number.min, sdata->number.max); } +#endif } else yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name); @@ -1317,16 +1379,24 @@ struct record_cluster *show_single(struct session *s, int id) return 0; } -struct record_cluster **show(struct session *s, int start, int *num, int *total, - int *sumhits, NMEM nmem_show) +struct record_cluster **show(struct session *s, struct reclist_sortparms *sp, int start, + int *num, int *total, int *sumhits, NMEM nmem_show) { struct record_cluster **recs = nmem_malloc(nmem_show, *num * sizeof(struct record_cluster *)); + struct reclist_sortparms *spp; int i; #if USE_TIMING yaz_timing_t t = yaz_timing_create(); #endif - relevance_prepare_read(s->relevance, s->reclist); + + for (spp = sp; spp; spp = spp->next) + if (spp->type == Metadata_sortkey_relevance) + { + relevance_prepare_read(s->relevance, s->reclist); + break; + } + reclist_sort(s->reclist, sp); *total = s->reclist->num_records; *sumhits = s->total_hits;