X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Fpazpar2.c;h=c24ec00ec07bc45b8036f91ceace1641cdc209c0;hb=82b7dfb4f633ed64ddb402d3cd81d391e6a936bc;hp=b5a0ee71719a7047a332ef2ca58028b7cc27d88c;hpb=7c0b91839755ff1090d9ae90f846de9c66f04f6a;p=pazpar2-moved-to-github.git diff --git a/src/pazpar2.c b/src/pazpar2.c index b5a0ee7..c24ec00 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -1,4 +1,4 @@ -/* $Id: pazpar2.c,v 1.22 2007-01-09 22:06:49 quinn Exp $ */; +/* $Id: pazpar2.c,v 1.34 2007-01-15 20:01:53 quinn Exp $ */ #include #include @@ -29,6 +29,8 @@ #include #endif +#include + #include "pazpar2.h" #include "eventl.h" #include "http.h" @@ -70,6 +72,8 @@ static char *client_states[] = { // Note: Some things in this structure will eventually move to configuration struct parameters global_parameters = { + "", + "", 0, 0, 30, @@ -282,10 +286,27 @@ static void do_searchResponse(IOCHAN i, Z_APDU *a) } } -char *normalize_mergekey(char *buf) +char *normalize_mergekey(char *buf, int skiparticle) { char *p = buf, *pout = buf; + if (skiparticle) + { + char firstword[64]; + char articles[] = "the den der die des an a "; // must end in space + + while (*p && !isalnum(*p)) + p++; + pout = firstword; + while (*p && *p != ' ' && pout - firstword < 62) + *(pout++) = tolower(*(p++)); + *(pout++) = ' '; + *(pout++) = '\0'; + if (!strstr(articles, firstword)) + p = buf; + pout = buf; + } + while (*p) { while (*p && !isalnum(*p)) @@ -298,7 +319,10 @@ char *normalize_mergekey(char *buf) p++; } if (buf != pout) - *pout = '\0'; + do { + *(pout--) = '\0'; + } + while (pout > buf && *pout == ' '); return buf; } @@ -343,6 +367,8 @@ static void add_facet(struct session *s, const char *type, const char *value) { int i; + if (!*value) + return; for (i = 0; i < s->num_termlists; i++) if (!strcmp(s->termlists[i].name, type)) break; @@ -419,7 +445,11 @@ static xmlDoc *normalize_record(struct client *cl, Z_External *rec) if (global_parameters.dump_records) { fprintf(stderr, "Record:\n----------------\n"); +#if LIBXML_VERSION >= 20600 xmlDocFormatDump(stderr, rdoc, 1); +#else + xmlDocDump(stderr, rdoc); +#endif } return rdoc; } @@ -480,11 +510,12 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) res->next = 0; res->metadata = nmem_malloc(se->nmem, sizeof(struct record_metadata*) * service->num_metadata); - bzero(res->metadata, sizeof(struct record_metadata*) * service->num_metadata); + memset(res->metadata, 0, sizeof(struct record_metadata*) * service->num_metadata); mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey); xmlFree(mergekey); - normalize_mergekey(mergekey_norm); + normalize_mergekey(mergekey_norm, 0); + yaz_log(YLOG_LOG, "MK: '%s'", mergekey_norm); cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged); if (!cluster) @@ -508,18 +539,21 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) continue; if (!strcmp(n->name, "metadata")) { - type = xmlGetProp(n, "type"); - value = xmlNodeListGetString(xdoc, n->children, 0); struct conf_metadata *md = 0; + struct conf_sortkey *sk = 0; struct record_metadata **wheretoput, *newm; int imeta; int first, last; + type = xmlGetProp(n, "type"); + value = xmlNodeListGetString(xdoc, n->children, 0); // First, find out what field we're looking at for (imeta = 0; imeta < service->num_metadata; imeta++) if (!strcmp(type, service->metadata[imeta].name)) { md = &service->metadata[imeta]; + if (md->sortkey_offset >= 0) + sk = &service->sortkeys[md->sortkey_offset]; break; } if (!md) @@ -539,7 +573,12 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) newm->next = 0; if (md->type == Metadata_type_generic) { + char *p; newm->data.text = nmem_strdup(se->nmem, value); + for (p = newm->data.text + strlen(newm->data.text) - 1; + p > newm->data.text && strchr(" ,/.", *p); p--) + *p = '\0'; + } else if (md->type == Metadata_type_year) { @@ -572,7 +611,19 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) { if (!*wheretoput || strlen(newm->data.text) > strlen((*wheretoput)->data.text)) - *wheretoput = newm; + { + *wheretoput = newm; + if (sk) + { + char *s = nmem_strdup(se->nmem, newm->data.text); + if (!cluster->sortkeys[md->sortkey_offset]) + cluster->sortkeys[md->sortkey_offset] = + nmem_malloc(se->nmem, sizeof(union data_types)); + normalize_mergekey(s, + (sk->type == Metadata_sortkey_skiparticle)); + cluster->sortkeys[md->sortkey_offset]->text = s; + } + } } else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no) { @@ -585,16 +636,31 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) if (!*wheretoput) { *wheretoput = newm; - (*wheretoput)->data.year.year1 = first; - (*wheretoput)->data.year.year2 = last; + (*wheretoput)->data.number.min = first; + (*wheretoput)->data.number.max = last; + if (sk) + cluster->sortkeys[md->sortkey_offset] = &newm->data; } else { - if (first < (*wheretoput)->data.year.year1) - (*wheretoput)->data.year.year1 = first; - if (last > (*wheretoput)->data.year.year2) - (*wheretoput)->data.year.year2 = last; + if (first < (*wheretoput)->data.number.min) + (*wheretoput)->data.number.min = first; + if (last > (*wheretoput)->data.number.max) + (*wheretoput)->data.number.max = last; + if (sk) + { + union data_types *sdata = cluster->sortkeys[md->sortkey_offset]; + sdata->number.min = first; + sdata->number.max = last; + } } +#ifdef GAGA + if (sk) + { + union data_types *sdata = cluster->sortkeys[md->sortkey_offset]; + yaz_log(YLOG_LOG, "SK range: %d-%d", sdata->number.min, sdata->number.max); + } +#endif } else yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name); @@ -859,7 +925,8 @@ static struct connection *connection_create(struct client *cl) if (!(addr = cs_straddr(link, cl->database->host->ipport))) { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Lookup of IP address failed?"); + yaz_log(YLOG_WARN|YLOG_ERRNO, "Lookup of IP address %s failed?", + cl->database->host->ipport); return 0; } @@ -1015,7 +1082,7 @@ void load_simpletargets(const char *fn) } assert(addrinfo->ai_family == PF_INET); memcpy(addrbuf, &((struct sockaddr_in*)addrinfo->ai_addr)->sin_addr.s_addr, 4); - sprintf(ipport, "%hhd.%hhd.%hhd.%hhd:%s", + sprintf(ipport, "%u.%u.%u.%u:%s", addrbuf[0], addrbuf[1], addrbuf[2], addrbuf[3], port); host->ipport = xstrdup(ipport); freeaddrinfo(addrinfo); @@ -1312,16 +1379,24 @@ struct record_cluster *show_single(struct session *s, int id) return 0; } -struct record_cluster **show(struct session *s, int start, int *num, int *total, - int *sumhits, NMEM nmem_show) +struct record_cluster **show(struct session *s, struct reclist_sortparms *sp, int start, + int *num, int *total, int *sumhits, NMEM nmem_show) { struct record_cluster **recs = nmem_malloc(nmem_show, *num * sizeof(struct record_cluster *)); + struct reclist_sortparms *spp; int i; #if USE_TIMING yaz_timing_t t = yaz_timing_create(); #endif - relevance_prepare_read(s->relevance, s->reclist); + + for (spp = sp; spp; spp = spp->next) + if (spp->type == Metadata_sortkey_relevance) + { + relevance_prepare_read(s->relevance, s->reclist); + break; + } + reclist_sort(s->reclist, sp); *total = s->reclist->num_records; *sumhits = s->total_hits; @@ -1359,7 +1434,7 @@ void statistics(struct session *se, struct statistics *stat) struct client *cl; int count = 0; - bzero(stat, sizeof(*stat)); + memset(stat, 0, sizeof(*stat)); for (cl = se->clients; cl; cl = cl->next) { if (!cl->connection) @@ -1394,13 +1469,55 @@ static CCL_bibset load_cclfile(const char *fn) return res; } +static void start_http_listener(void) +{ + char hp[128] = ""; + struct conf_server *ser = global_parameters.server; + + if (*global_parameters.listener_override) + strcpy(hp, global_parameters.listener_override); + else + { + strcpy(hp, ser->host ? ser->host : ""); + if (ser->port) + { + if (*hp) + strcat(hp, ":"); + sprintf(hp + strlen(hp), "%d", ser->port); + } + } + http_init(hp); +} + +static void start_proxy(void) +{ + char hp[128] = ""; + struct conf_server *ser = global_parameters.server; + + if (*global_parameters.proxy_override) + strcpy(hp, global_parameters.proxy_override); + else if (ser->proxy_host || ser->proxy_port) + { + strcpy(hp, ser->proxy_host ? ser->proxy_host : ""); + if (ser->proxy_port) + { + if (*hp) + strcat(hp, ":"); + sprintf(hp + strlen(hp), "%d", ser->proxy_port); + } + } + else + return; + + http_set_proxyaddr(hp); +} + int main(int argc, char **argv) { int ret; char *arg; - int setport = 0; - if (signal(SIGPIPE, SIG_IGN) < 0) + if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) yaz_log(YLOG_WARN|YLOG_ERRNO, "signal"); yaz_log_init(YLOG_DEFAULT_LEVEL, "pazpar2", 0); @@ -1413,14 +1530,13 @@ int main(int argc, char **argv) exit(1); break; case 'h': - http_init(arg); - setport++; + strcpy(global_parameters.listener_override, arg); break; case 'C': global_parameters.ccl_filter = load_cclfile(arg); break; case 'p': - http_set_proxyaddr(arg); + strcpy(global_parameters.proxy_override, arg); break; case 's': load_simpletargets(arg); @@ -1446,12 +1562,8 @@ int main(int argc, char **argv) } global_parameters.server = config->servers; - if (!setport) - { - fprintf(stderr, "Set command port with -h\n"); - exit(1); - } - + start_http_listener(); + start_proxy(); global_parameters.ccl_filter = load_cclfile("../etc/default.bib"); global_parameters.yaz_marc = yaz_marc_create(); yaz_marc_subfield_str(global_parameters.yaz_marc, "\t");