From: Adam Dickmeiss Date: Fri, 1 May 2009 13:04:23 +0000 (+0200) Subject: New mergekey facilities. Bug #2343 and bug #2800). X-Git-Tag: v1.1.0~26 X-Git-Url: http://jsfdemo.indexdata.com/cgi-bin?a=commitdiff_plain;h=a6b5e633581cbae0759c3b68faa7a0cd0197f790;p=pazpar2-moved-to-github.git New mergekey facilities. Bug #2343 and bug #2800). The mergekey (recid in display) is now generated by the following rules: the contents of attribute mergekey and all metadata elements with attribute mergekey=yes are combined in to one resulting mergekey. If the resulting mergekey is empty a unique mergekey is generated (which prevents merging). This allows a system with no merging enabled. It also allows mergekeys to be customized "per-service". --- diff --git a/doc/pazpar2_conf.xml b/doc/pazpar2_conf.xml index 931bc3f..5e8ef1a 100644 --- a/doc/pazpar2_conf.xml +++ b/doc/pazpar2_conf.xml @@ -302,6 +302,17 @@ + mergekey + + + If set to yes, the value of this + metadata element is appended to the resulting mergekey. + By default metadata is not part of a mergekey. + + + + + setting diff --git a/src/logic.c b/src/logic.c index 4f811a4..02a133d 100644 --- a/src/logic.c +++ b/src/logic.c @@ -951,6 +951,106 @@ static struct record_metadata *record_metadata_init( return rec_md; } +const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no, + struct conf_service *service, NMEM nmem) +{ + char *mergekey_norm = 0; + xmlNode *root = xmlDocGetRootElement(doc); + WRBUF norm_wr = wrbuf_alloc(); + xmlNode *n; + + /* create mergekey based on mergekey attribute from XSL (if any) */ + xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey"); + if (mergekey) + { + const char *norm_str; + pp2_relevance_token_t prt = + pp2_relevance_tokenize( + global_parameters.server->mergekey_pct, + (const char *) mergekey); + + while ((norm_str = pp2_relevance_token_next(prt))) + { + if (*norm_str) + { + if (wrbuf_len(norm_wr)) + wrbuf_puts(norm_wr, " "); + wrbuf_puts(norm_wr, norm_str); + } + } + pp2_relevance_token_destroy(prt); + xmlFree(mergekey); + } + /* append (if any) mergekey=yes metadata values */ + for (n = root->children; n; n = n->next) + { + if (n->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) n->name, "metadata")) + { + struct conf_metadata *ser_md = 0; + int md_field_id = -1; + + xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); + + if (!type) + continue; + + md_field_id + = conf_service_metadata_field_id(service, + (const char *) type); + if (md_field_id >= 0) + { + ser_md = &service->metadata[md_field_id]; + if (ser_md->mergekey == Metadata_mergekey_yes) + { + xmlChar *value = xmlNodeListGetString(doc, n->children, 1); + if (value) + { + const char *norm_str; + pp2_relevance_token_t prt = + pp2_relevance_tokenize( + global_parameters.server->mergekey_pct, + (const char *) value); + + while ((norm_str = pp2_relevance_token_next(prt))) + { + if (*norm_str) + { + if (wrbuf_len(norm_wr)) + wrbuf_puts(norm_wr, " "); + wrbuf_puts(norm_wr, norm_str); + } + } + xmlFree(value); + pp2_relevance_token_destroy(prt); + } + } + } + xmlFree(type); + } + } + + /* generate unique key if none is not generated already or is empty */ + if (wrbuf_len(norm_wr) == 0) + { + wrbuf_printf(norm_wr, "%s-%d", + client_get_database(cl)->database->url, record_no); + } + if (wrbuf_len(norm_wr) > 0) + mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr)); + wrbuf_destroy(norm_wr); + return mergekey_norm; +} + + + +/** \brief ingest XML record + \param cl client holds the result set for record + \param rec record buffer (0 terminated) + \param record_no record position (1, 2, ..) + \returns resulting record or NULL on failure +*/ struct record *ingest_record(struct client *cl, const char *rec, int record_no) { @@ -960,52 +1060,27 @@ struct record *ingest_record(struct client *cl, const char *rec, struct record *record; struct record_cluster *cluster; struct session *se = client_get_session(cl); - xmlChar *mergekey, *mergekey_norm; + const char *mergekey_norm; xmlChar *type = 0; xmlChar *value = 0; struct conf_service *service = global_parameters.server->service; - const char *norm_str = 0; - pp2_relevance_token_t prt = 0; - WRBUF norm_wr = 0; if (!xdoc) return 0; root = xmlDocGetRootElement(xdoc); - if (!(mergekey = xmlGetProp(root, (xmlChar *) "mergekey"))) + + mergekey_norm = get_mergekey(xdoc, cl, record_no, service, se->nmem); + if (!mergekey_norm) { - yaz_log(YLOG_WARN, "No mergekey found in record"); + yaz_log(YLOG_WARN, "Got no mergekey"); xmlFreeDoc(xdoc); return 0; } - record = record_create(se->nmem, service->num_metadata, service->num_sortkeys, cl, record_no); - prt = pp2_relevance_tokenize( - global_parameters.server->mergekey_pct, (const char *) mergekey); - - - norm_wr = wrbuf_alloc(); - - while ((norm_str = pp2_relevance_token_next(prt))) - { - if (*norm_str) - { - if (wrbuf_len(norm_wr)) - wrbuf_puts(norm_wr, " "); - wrbuf_puts(norm_wr, norm_str); - } - } - - mergekey_norm = (xmlChar *)nmem_strdup(se->nmem, wrbuf_cstr(norm_wr)); - wrbuf_destroy(norm_wr); - - pp2_relevance_token_destroy(prt); - - xmlFree(mergekey); - cluster = reclist_insert(se->reclist, global_parameters.server->service, record, (char *) mergekey_norm, @@ -1021,10 +1096,10 @@ struct record *ingest_record(struct client *cl, const char *rec, } relevance_newrec(se->relevance, cluster); - // now parsing XML record and adding data to cluster or record metadata for (n = root->children; n; n = n->next) { + pp2_relevance_token_t prt; if (type) xmlFree(type); if (value) diff --git a/src/pazpar2_config.c b/src/pazpar2_config.c index 43d5bcf..169de11 100644 --- a/src/pazpar2_config.c +++ b/src/pazpar2_config.c @@ -44,6 +44,7 @@ static char confdir[256] = "."; struct conf_config *config = 0; +static struct conf_metadata * conf_metadata_assign(NMEM nmem, struct conf_metadata * metadata, const char *name, @@ -53,7 +54,8 @@ struct conf_metadata * conf_metadata_assign(NMEM nmem, int brief, int termlist, int rank, - int sortkey_offset) + int sortkey_offset, + enum conf_metadata_mergekey mt) { if (!nmem || !metadata || !name) return 0; @@ -73,10 +75,12 @@ struct conf_metadata * conf_metadata_assign(NMEM nmem, metadata->termlist = termlist; metadata->rank = rank; metadata->sortkey_offset = sortkey_offset; + metadata->mergekey = mt; return metadata; } +static struct conf_sortkey * conf_sortkey_assign(NMEM nmem, struct conf_sortkey * sortkey, const char *name, @@ -127,7 +131,8 @@ struct conf_metadata* conf_service_add_metadata(NMEM nmem, int brief, int termlist, int rank, - int sortkey_offset) + int sortkey_offset, + enum conf_metadata_mergekey mt) { struct conf_metadata * md = 0; @@ -138,7 +143,8 @@ struct conf_metadata* conf_service_add_metadata(NMEM nmem, //md = &((service->metadata)[field_id]); md = service->metadata + field_id; md = conf_metadata_assign(nmem, md, name, type, merge, setting, - brief, termlist, rank, sortkey_offset); + brief, termlist, rank, sortkey_offset, + mt); return md; } @@ -239,11 +245,13 @@ static struct conf_service *parse_service(xmlNode *node) xmlChar *xml_termlist = xmlGetProp(n, (xmlChar *) "termlist"); xmlChar *xml_rank = xmlGetProp(n, (xmlChar *) "rank"); xmlChar *xml_setting = xmlGetProp(n, (xmlChar *) "setting"); + xmlChar *xml_mergekey = xmlGetProp(n, (xmlChar *) "mergekey"); enum conf_metadata_type type = Metadata_type_generic; enum conf_metadata_merge merge = Metadata_merge_no; enum conf_setting_type setting = Metadata_setting_no; enum conf_sortkey_type sk_type = Metadata_sortkey_relevance; + enum conf_metadata_mergekey mergekey_type = Metadata_mergekey_no; int brief = 0; int termlist = 0; int rank = 0; @@ -372,11 +380,18 @@ static struct conf_service *parse_service(xmlNode *node) else sortkey_offset = -1; + if (xml_mergekey && strcmp((const char *) xml_mergekey, "no")) + { + mergekey_type = Metadata_mergekey_yes; + } + + // metadata known, assign values conf_service_add_metadata(nmem, service, md_node, (const char *) xml_name, type, merge, setting, - brief, termlist, rank, sortkey_offset); + brief, termlist, rank, sortkey_offset, + mergekey_type); xmlFree(xml_name); xmlFree(xml_brief); diff --git a/src/pazpar2_config.h b/src/pazpar2_config.h index 233b557..1e71619 100644 --- a/src/pazpar2_config.h +++ b/src/pazpar2_config.h @@ -56,6 +56,11 @@ enum conf_setting_type { Metadata_setting_parameter // Expose value to normalization stylesheets }; +enum conf_metadata_mergekey { + Metadata_mergekey_no, + Metadata_mergekey_yes +}; + // Describes known metadata elements and how they are to be manipulated // An array of these structure provides a 'map' against which // discovered metadata elements are matched. It also governs storage, @@ -73,23 +78,11 @@ struct conf_metadata enum conf_metadata_type type; enum conf_metadata_merge merge; enum conf_setting_type setting; // Value is to be taken from session/db settings? + enum conf_metadata_type mergekey; }; -struct conf_metadata * conf_metadata_assign(NMEM nmem, - struct conf_metadata * metadata, - const char *name, - enum conf_metadata_type type, - enum conf_metadata_merge merge, - enum conf_setting_type setting, - int brief, - int termlist, - int rank, - int sortkey_offset); - - - // Controls sorting struct conf_sortkey { @@ -97,13 +90,6 @@ struct conf_sortkey enum conf_sortkey_type type; }; -struct conf_sortkey * conf_sortkey_assign(NMEM nmem, - struct conf_sortkey * sortkey, - const char *name, - enum conf_sortkey_type type); - - - // It is conceivable that there will eventually be several 'services' // offered from one server, with separate configuration -- possibly // more than one services associated with the same port. For now, @@ -120,7 +106,6 @@ struct conf_service struct conf_service * conf_service_create(NMEM nmem, int num_metadata, int num_sortkeys); - struct conf_metadata* conf_service_add_metadata(NMEM nmem, struct conf_service *service, int field_id, @@ -131,7 +116,8 @@ struct conf_metadata* conf_service_add_metadata(NMEM nmem, int brief, int termlist, int rank, - int sortkey_offset); + int sortkey_offset, + enum conf_metadata_mergekey mt); struct conf_sortkey * conf_service_add_sortkey(NMEM nmem, struct conf_service *service, diff --git a/src/test_config.c b/src/test_config.c index c0b43d3..3d7e091 100644 --- a/src/test_config.c +++ b/src/test_config.c @@ -43,59 +43,68 @@ void test_conf_service(int argc, char **argv) // expected metadata failures YAZ_CHECK(!conf_service_add_metadata(0, service, 0, "dead_nmem", - Metadata_type_generic, - Metadata_merge_unique, - Metadata_setting_no, - 1, 1, 1, 0)); + Metadata_type_generic, + Metadata_merge_unique, + Metadata_setting_no, + 1, 1, 1, 0, + Metadata_mergekey_no)); YAZ_CHECK(!conf_service_add_metadata(nmem, 0, 0, "service_needed", Metadata_type_generic, Metadata_merge_unique, Metadata_setting_no, - 1, 1, 1, 0)); + 1, 1, 1, 0, + Metadata_mergekey_no)); YAZ_CHECK(!conf_service_add_metadata(nmem, service, -1, "out_of_bounds", Metadata_type_generic, Metadata_merge_unique, Metadata_setting_no, - 1, 1, 1, 0)); + 1, 1, 1, 0, + Metadata_mergekey_no)); YAZ_CHECK(!conf_service_add_metadata(nmem, service, 4, "out_of_bounds", Metadata_type_generic, Metadata_merge_unique, Metadata_setting_no, - 1, 1, 1, 0)); + 1, 1, 1, 0, + Metadata_mergekey_no)); YAZ_CHECK(!conf_service_add_metadata(nmem, service, 0, 0, //missing name Metadata_type_generic, Metadata_merge_unique, Metadata_setting_no, - 1, 1, 1, 0)); + 1, 1, 1, 0, + Metadata_mergekey_no)); // expected metadata sucesses YAZ_CHECK(conf_service_add_metadata(nmem, service, 0, "title", Metadata_type_generic, Metadata_merge_unique, Metadata_setting_no, - 1, 1, 1, 0)); + 1, 1, 1, 0, + Metadata_mergekey_no)); YAZ_CHECK(conf_service_add_metadata(nmem, service, 1, "author", Metadata_type_generic, Metadata_merge_longest, - Metadata_setting_no, - 1, 1, 1, 0)); + Metadata_setting_no, + 1, 1, 1, 0, + Metadata_mergekey_no)); YAZ_CHECK(conf_service_add_metadata(nmem, service, 2, "isbn", Metadata_type_number, Metadata_merge_no, - Metadata_setting_no, - 1, 1, 1, 0)); + Metadata_setting_no, + 1, 1, 1, 0, + Metadata_mergekey_no)); YAZ_CHECK(conf_service_add_metadata(nmem, service, 3, "year", Metadata_type_year, Metadata_merge_range, - Metadata_setting_no, - 1, 1, 1, 0)); + Metadata_setting_no, + 1, 1, 1, 0, + Metadata_mergekey_no)); // expected sortkey failures diff --git a/src/test_record.c b/src/test_record.c index 58da9e3..b55aa7c 100644 --- a/src/test_record.c +++ b/src/test_record.c @@ -56,30 +56,42 @@ void test_record(int argc, char **argv) service = conf_service_create(nmem, 4, 3); YAZ_CHECK(service); - YAZ_CHECK(conf_service_add_metadata(nmem, service, 0, "title", - Metadata_type_generic, Metadata_merge_unique, - Metadata_setting_no, 1, 1, 1, 0)); - - YAZ_CHECK(conf_service_add_metadata(nmem, service, 1, "author", - Metadata_type_generic, Metadata_merge_longest, - Metadata_setting_no,1, 1, 1, 0)); - - YAZ_CHECK(conf_service_add_metadata(nmem, service, 2, "isbn", - Metadata_type_number, Metadata_merge_no, - Metadata_setting_no, 1, 1, 1, 0)); - - YAZ_CHECK(conf_service_add_metadata(nmem, service, 3, "year", - Metadata_type_year, Metadata_merge_range, - Metadata_setting_no, 1, 1, 1, 0)); - - YAZ_CHECK(conf_service_add_sortkey(nmem, service, 0, "relevance", - Metadata_sortkey_relevance)); - - YAZ_CHECK(conf_service_add_sortkey(nmem, service, 1, "title", - Metadata_sortkey_string)); + YAZ_CHECK(conf_service_add_metadata( + nmem, service, 0, "title", + Metadata_type_generic, Metadata_merge_unique, + Metadata_setting_no, 1, 1, 1, 0, + Metadata_mergekey_no + )); + + YAZ_CHECK(conf_service_add_metadata( + nmem, service, 1, "author", + Metadata_type_generic, Metadata_merge_longest, + Metadata_setting_no,1, 1, 1, 0, + Metadata_mergekey_no)); + + YAZ_CHECK(conf_service_add_metadata( + nmem, service, 2, "isbn", + Metadata_type_number, Metadata_merge_no, + Metadata_setting_no, 1, 1, 1, 0, + Metadata_mergekey_no)); + + YAZ_CHECK(conf_service_add_metadata( + nmem, service, 3, "year", + Metadata_type_year, Metadata_merge_range, + Metadata_setting_no, 1, 1, 1, 0, + Metadata_mergekey_no)); + + YAZ_CHECK(conf_service_add_sortkey( + nmem, service, 0, "relevance", + Metadata_sortkey_relevance)); + + YAZ_CHECK(conf_service_add_sortkey( + nmem, service, 1, "title", + Metadata_sortkey_string)); - YAZ_CHECK(conf_service_add_sortkey(nmem, service, 2, "year", - Metadata_sortkey_numeric)); + YAZ_CHECK(conf_service_add_sortkey( + nmem, service, 2, "year", + Metadata_sortkey_numeric));