</listitem>
</varlistentry>
+ <varlistentry><term>mergekey</term>
+ <listitem>
+ <para>
+ If set to <literal>yes</literal>, the value of this
+ metadata element is appended to the resulting mergekey.
+ By default metadata is not part of a mergekey.
+ </para>
+ </listitem>
+ </varlistentry>
+
+
<varlistentry><term>setting</term>
<listitem>
<para>
return rec_md;
}
+const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
+ struct conf_service *service, NMEM nmem)
+{
+ char *mergekey_norm = 0;
+ xmlNode *root = xmlDocGetRootElement(doc);
+ WRBUF norm_wr = wrbuf_alloc();
+ xmlNode *n;
+
+ /* create mergekey based on mergekey attribute from XSL (if any) */
+ xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey");
+ if (mergekey)
+ {
+ const char *norm_str;
+ pp2_relevance_token_t prt =
+ pp2_relevance_tokenize(
+ global_parameters.server->mergekey_pct,
+ (const char *) mergekey);
+
+ while ((norm_str = pp2_relevance_token_next(prt)))
+ {
+ if (*norm_str)
+ {
+ if (wrbuf_len(norm_wr))
+ wrbuf_puts(norm_wr, " ");
+ wrbuf_puts(norm_wr, norm_str);
+ }
+ }
+ pp2_relevance_token_destroy(prt);
+ xmlFree(mergekey);
+ }
+ /* append (if any) mergekey=yes metadata values */
+ for (n = root->children; n; n = n->next)
+ {
+ if (n->type != XML_ELEMENT_NODE)
+ continue;
+ if (!strcmp((const char *) n->name, "metadata"))
+ {
+ struct conf_metadata *ser_md = 0;
+ int md_field_id = -1;
+
+ xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
+
+ if (!type)
+ continue;
+
+ md_field_id
+ = conf_service_metadata_field_id(service,
+ (const char *) type);
+ if (md_field_id >= 0)
+ {
+ ser_md = &service->metadata[md_field_id];
+ if (ser_md->mergekey == Metadata_mergekey_yes)
+ {
+ xmlChar *value = xmlNodeListGetString(doc, n->children, 1);
+ if (value)
+ {
+ const char *norm_str;
+ pp2_relevance_token_t prt =
+ pp2_relevance_tokenize(
+ global_parameters.server->mergekey_pct,
+ (const char *) value);
+
+ while ((norm_str = pp2_relevance_token_next(prt)))
+ {
+ if (*norm_str)
+ {
+ if (wrbuf_len(norm_wr))
+ wrbuf_puts(norm_wr, " ");
+ wrbuf_puts(norm_wr, norm_str);
+ }
+ }
+ xmlFree(value);
+ pp2_relevance_token_destroy(prt);
+ }
+ }
+ }
+ xmlFree(type);
+ }
+ }
+
+ /* generate unique key if none is not generated already or is empty */
+ if (wrbuf_len(norm_wr) == 0)
+ {
+ wrbuf_printf(norm_wr, "%s-%d",
+ client_get_database(cl)->database->url, record_no);
+ }
+ if (wrbuf_len(norm_wr) > 0)
+ mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr));
+ wrbuf_destroy(norm_wr);
+ return mergekey_norm;
+}
+
+
+
+/** \brief ingest XML record
+ \param cl client holds the result set for record
+ \param rec record buffer (0 terminated)
+ \param record_no record position (1, 2, ..)
+ \returns resulting record or NULL on failure
+*/
struct record *ingest_record(struct client *cl, const char *rec,
int record_no)
{
struct record *record;
struct record_cluster *cluster;
struct session *se = client_get_session(cl);
- xmlChar *mergekey, *mergekey_norm;
+ const char *mergekey_norm;
xmlChar *type = 0;
xmlChar *value = 0;
struct conf_service *service = global_parameters.server->service;
- const char *norm_str = 0;
- pp2_relevance_token_t prt = 0;
- WRBUF norm_wr = 0;
if (!xdoc)
return 0;
root = xmlDocGetRootElement(xdoc);
- if (!(mergekey = xmlGetProp(root, (xmlChar *) "mergekey")))
+
+ mergekey_norm = get_mergekey(xdoc, cl, record_no, service, se->nmem);
+ if (!mergekey_norm)
{
- yaz_log(YLOG_WARN, "No mergekey found in record");
+ yaz_log(YLOG_WARN, "Got no mergekey");
xmlFreeDoc(xdoc);
return 0;
}
-
record = record_create(se->nmem,
service->num_metadata, service->num_sortkeys, cl,
record_no);
- prt = pp2_relevance_tokenize(
- global_parameters.server->mergekey_pct, (const char *) mergekey);
-
-
- norm_wr = wrbuf_alloc();
-
- while ((norm_str = pp2_relevance_token_next(prt)))
- {
- if (*norm_str)
- {
- if (wrbuf_len(norm_wr))
- wrbuf_puts(norm_wr, " ");
- wrbuf_puts(norm_wr, norm_str);
- }
- }
-
- mergekey_norm = (xmlChar *)nmem_strdup(se->nmem, wrbuf_cstr(norm_wr));
- wrbuf_destroy(norm_wr);
-
- pp2_relevance_token_destroy(prt);
-
- xmlFree(mergekey);
-
cluster = reclist_insert(se->reclist,
global_parameters.server->service,
record, (char *) mergekey_norm,
}
relevance_newrec(se->relevance, cluster);
-
// now parsing XML record and adding data to cluster or record metadata
for (n = root->children; n; n = n->next)
{
+ pp2_relevance_token_t prt;
if (type)
xmlFree(type);
if (value)
struct conf_config *config = 0;
+static
struct conf_metadata * conf_metadata_assign(NMEM nmem,
struct conf_metadata * metadata,
const char *name,
int brief,
int termlist,
int rank,
- int sortkey_offset)
+ int sortkey_offset,
+ enum conf_metadata_mergekey mt)
{
if (!nmem || !metadata || !name)
return 0;
metadata->termlist = termlist;
metadata->rank = rank;
metadata->sortkey_offset = sortkey_offset;
+ metadata->mergekey = mt;
return metadata;
}
+static
struct conf_sortkey * conf_sortkey_assign(NMEM nmem,
struct conf_sortkey * sortkey,
const char *name,
int brief,
int termlist,
int rank,
- int sortkey_offset)
+ int sortkey_offset,
+ enum conf_metadata_mergekey mt)
{
struct conf_metadata * md = 0;
//md = &((service->metadata)[field_id]);
md = service->metadata + field_id;
md = conf_metadata_assign(nmem, md, name, type, merge, setting,
- brief, termlist, rank, sortkey_offset);
+ brief, termlist, rank, sortkey_offset,
+ mt);
return md;
}
xmlChar *xml_termlist = xmlGetProp(n, (xmlChar *) "termlist");
xmlChar *xml_rank = xmlGetProp(n, (xmlChar *) "rank");
xmlChar *xml_setting = xmlGetProp(n, (xmlChar *) "setting");
+ xmlChar *xml_mergekey = xmlGetProp(n, (xmlChar *) "mergekey");
enum conf_metadata_type type = Metadata_type_generic;
enum conf_metadata_merge merge = Metadata_merge_no;
enum conf_setting_type setting = Metadata_setting_no;
enum conf_sortkey_type sk_type = Metadata_sortkey_relevance;
+ enum conf_metadata_mergekey mergekey_type = Metadata_mergekey_no;
int brief = 0;
int termlist = 0;
int rank = 0;
else
sortkey_offset = -1;
+ if (xml_mergekey && strcmp((const char *) xml_mergekey, "no"))
+ {
+ mergekey_type = Metadata_mergekey_yes;
+ }
+
+
// metadata known, assign values
conf_service_add_metadata(nmem, service, md_node,
(const char *) xml_name,
type, merge, setting,
- brief, termlist, rank, sortkey_offset);
+ brief, termlist, rank, sortkey_offset,
+ mergekey_type);
xmlFree(xml_name);
xmlFree(xml_brief);
Metadata_setting_parameter // Expose value to normalization stylesheets
};
+enum conf_metadata_mergekey {
+ Metadata_mergekey_no,
+ Metadata_mergekey_yes
+};
+
// Describes known metadata elements and how they are to be manipulated
// An array of these structure provides a 'map' against which
// discovered metadata elements are matched. It also governs storage,
enum conf_metadata_type type;
enum conf_metadata_merge merge;
enum conf_setting_type setting; // Value is to be taken from session/db settings?
+ enum conf_metadata_type mergekey;
};
-struct conf_metadata * conf_metadata_assign(NMEM nmem,
- struct conf_metadata * metadata,
- const char *name,
- enum conf_metadata_type type,
- enum conf_metadata_merge merge,
- enum conf_setting_type setting,
- int brief,
- int termlist,
- int rank,
- int sortkey_offset);
-
-
-
// Controls sorting
struct conf_sortkey
{
enum conf_sortkey_type type;
};
-struct conf_sortkey * conf_sortkey_assign(NMEM nmem,
- struct conf_sortkey * sortkey,
- const char *name,
- enum conf_sortkey_type type);
-
-
-
// It is conceivable that there will eventually be several 'services'
// offered from one server, with separate configuration -- possibly
// more than one services associated with the same port. For now,
struct conf_service * conf_service_create(NMEM nmem,
int num_metadata, int num_sortkeys);
-
struct conf_metadata* conf_service_add_metadata(NMEM nmem,
struct conf_service *service,
int field_id,
int brief,
int termlist,
int rank,
- int sortkey_offset);
+ int sortkey_offset,
+ enum conf_metadata_mergekey mt);
struct conf_sortkey * conf_service_add_sortkey(NMEM nmem,
struct conf_service *service,
// expected metadata failures
YAZ_CHECK(!conf_service_add_metadata(0, service, 0, "dead_nmem",
- Metadata_type_generic,
- Metadata_merge_unique,
- Metadata_setting_no,
- 1, 1, 1, 0));
+ Metadata_type_generic,
+ Metadata_merge_unique,
+ Metadata_setting_no,
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
YAZ_CHECK(!conf_service_add_metadata(nmem, 0, 0, "service_needed",
Metadata_type_generic,
Metadata_merge_unique,
Metadata_setting_no,
- 1, 1, 1, 0));
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
YAZ_CHECK(!conf_service_add_metadata(nmem, service, -1, "out_of_bounds",
Metadata_type_generic,
Metadata_merge_unique,
Metadata_setting_no,
- 1, 1, 1, 0));
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
YAZ_CHECK(!conf_service_add_metadata(nmem, service, 4, "out_of_bounds",
Metadata_type_generic,
Metadata_merge_unique,
Metadata_setting_no,
- 1, 1, 1, 0));
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
YAZ_CHECK(!conf_service_add_metadata(nmem, service, 0, 0, //missing name
Metadata_type_generic,
Metadata_merge_unique,
Metadata_setting_no,
- 1, 1, 1, 0));
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
// expected metadata sucesses
YAZ_CHECK(conf_service_add_metadata(nmem, service, 0, "title",
Metadata_type_generic,
Metadata_merge_unique,
Metadata_setting_no,
- 1, 1, 1, 0));
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
YAZ_CHECK(conf_service_add_metadata(nmem, service, 1, "author",
Metadata_type_generic,
Metadata_merge_longest,
- Metadata_setting_no,
- 1, 1, 1, 0));
+ Metadata_setting_no,
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
YAZ_CHECK(conf_service_add_metadata(nmem, service, 2, "isbn",
Metadata_type_number,
Metadata_merge_no,
- Metadata_setting_no,
- 1, 1, 1, 0));
+ Metadata_setting_no,
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
YAZ_CHECK(conf_service_add_metadata(nmem, service, 3, "year",
Metadata_type_year,
Metadata_merge_range,
- Metadata_setting_no,
- 1, 1, 1, 0));
+ Metadata_setting_no,
+ 1, 1, 1, 0,
+ Metadata_mergekey_no));
// expected sortkey failures
service = conf_service_create(nmem, 4, 3);
YAZ_CHECK(service);
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 0, "title",
- Metadata_type_generic, Metadata_merge_unique,
- Metadata_setting_no, 1, 1, 1, 0));
-
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 1, "author",
- Metadata_type_generic, Metadata_merge_longest,
- Metadata_setting_no,1, 1, 1, 0));
-
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 2, "isbn",
- Metadata_type_number, Metadata_merge_no,
- Metadata_setting_no, 1, 1, 1, 0));
-
- YAZ_CHECK(conf_service_add_metadata(nmem, service, 3, "year",
- Metadata_type_year, Metadata_merge_range,
- Metadata_setting_no, 1, 1, 1, 0));
-
- YAZ_CHECK(conf_service_add_sortkey(nmem, service, 0, "relevance",
- Metadata_sortkey_relevance));
-
- YAZ_CHECK(conf_service_add_sortkey(nmem, service, 1, "title",
- Metadata_sortkey_string));
+ YAZ_CHECK(conf_service_add_metadata(
+ nmem, service, 0, "title",
+ Metadata_type_generic, Metadata_merge_unique,
+ Metadata_setting_no, 1, 1, 1, 0,
+ Metadata_mergekey_no
+ ));
+
+ YAZ_CHECK(conf_service_add_metadata(
+ nmem, service, 1, "author",
+ Metadata_type_generic, Metadata_merge_longest,
+ Metadata_setting_no,1, 1, 1, 0,
+ Metadata_mergekey_no));
+
+ YAZ_CHECK(conf_service_add_metadata(
+ nmem, service, 2, "isbn",
+ Metadata_type_number, Metadata_merge_no,
+ Metadata_setting_no, 1, 1, 1, 0,
+ Metadata_mergekey_no));
+
+ YAZ_CHECK(conf_service_add_metadata(
+ nmem, service, 3, "year",
+ Metadata_type_year, Metadata_merge_range,
+ Metadata_setting_no, 1, 1, 1, 0,
+ Metadata_mergekey_no));
+
+ YAZ_CHECK(conf_service_add_sortkey(
+ nmem, service, 0, "relevance",
+ Metadata_sortkey_relevance));
+
+ YAZ_CHECK(conf_service_add_sortkey(
+ nmem, service, 1, "title",
+ Metadata_sortkey_string));
- YAZ_CHECK(conf_service_add_sortkey(nmem, service, 2, "year",
- Metadata_sortkey_numeric));
+ YAZ_CHECK(conf_service_add_sortkey(
+ nmem, service, 2, "year",
+ Metadata_sortkey_numeric));