- if (i == s->num_termlists)
- {
- if (i == SESSION_MAX_TERMLISTS)
- {
- yaz_log(YLOG_FATAL, "Too many termlists");
- exit(1);
- }
- s->termlists[i].name = nmem_strdup(s->nmem, type);
- s->termlists[i].termlist = termlist_create(s->nmem, s->expected_maxrecs, 15);
- s->num_termlists = i + 1;
- }
- termlist_insert(s->termlists[i].termlist, value);
-}
-
-static xmlDoc *normalize_record(struct client *cl, Z_External *rec)
-{
- struct database_retrievalmap *m;
- struct database *db = cl->database->database;
- xmlNode *res;
- xmlDoc *rdoc;
-
- // First normalize to XML
- if (db->yaz_marc)
- {
- char *buf;
- int len;
- if (rec->which != Z_External_octet)
- {
- yaz_log(YLOG_WARN, "Unexpected external branch, probably BER %s",
- cl->database->database->url);
- return 0;
- }
- buf = (char*) rec->u.octet_aligned->buf;
- len = rec->u.octet_aligned->len;
- if (yaz_marc_read_iso2709(db->yaz_marc, buf, len) < 0)
- {
- yaz_log(YLOG_WARN, "Failed to decode MARC %s",
- cl->database->database->url);
- return 0;
- }
-
- yaz_marc_write_using_libxml2(db->yaz_marc, 1);
- if (yaz_marc_write_xml(db->yaz_marc, &res,
- "http://www.loc.gov/MARC21/slim", 0, 0) < 0)
- {
- yaz_log(YLOG_WARN, "Failed to encode as XML %s",
- cl->database->database->url);
- return 0;
- }
- rdoc = xmlNewDoc((xmlChar *) "1.0");
- xmlDocSetRootElement(rdoc, res);
-
- }
- else
- {
- yaz_log(YLOG_FATAL,
- "Unknown native_syntax in normalize_record from %s",
- cl->database->database->url);
- exit(1);
- }
-
- if (global_parameters.dump_records){
- fprintf(stderr,
- "Input Record (normalized) from %s\n----------------\n",
- cl->database->database->url);
-#if LIBXML_VERSION >= 20600
- xmlDocFormatDump(stderr, rdoc, 1);
-#else
- xmlDocDump(stderr, rdoc);
-#endif
- }
-
- for (m = db->map; m; m = m->next){
- xmlDoc *new = 0;
-
-#if 1
- {
- xmlNodePtr root = 0;
- new = xsltApplyStylesheet(m->stylesheet, rdoc, 0);
- root= xmlDocGetRootElement(new);
- if (!new || !root || !(root->children))
- {
- yaz_log(YLOG_WARN, "XSLT transformation failed from %s",
- cl->database->database->url);
- xmlFreeDoc(new);
- xmlFreeDoc(rdoc);
- return 0;
- }
- }
-#endif
-
-#if 0
- // do it another way to detect transformation errors right now
- // but does not seem to work either!
- {
- xsltTransformContextPtr ctxt;
- ctxt = xsltNewTransformContext(m->stylesheet, rdoc);
- new = xsltApplyStylesheetUser(m->stylesheet, rdoc, 0, 0, 0, ctxt);
- if ((ctxt->state == XSLT_STATE_ERROR) ||
- (ctxt->state == XSLT_STATE_STOPPED)){
- yaz_log(YLOG_WARN, "XSLT transformation failed from %s",
- cl->database->database->url);
- xmlFreeDoc(new);
- xmlFreeDoc(rdoc);
- return 0;
- }
- }
-#endif
-
- xmlFreeDoc(rdoc);
- rdoc = new;
- }
- if (global_parameters.dump_records)
- {
- fprintf(stderr, "Record from %s\n----------------\n",
- cl->database->database->url);
-#if LIBXML_VERSION >= 20600
- xmlDocFormatDump(stderr, rdoc, 1);
-#else
- xmlDocDump(stderr, rdoc);
-#endif
- }
- return rdoc;
-}
-
-// Extract what appears to be years from buf, storing highest and
-// lowest values.
-static int extract_years(const char *buf, int *first, int *last)
-{
- *first = -1;
- *last = -1;
- while (*buf)
- {
- const char *e;
- int len;
-
- while (*buf && !isdigit(*buf))
- buf++;
- len = 0;
- for (e = buf; *e && isdigit(*e); e++)
- len++;
- if (len == 4)
- {
- int value = atoi(buf);
- if (*first < 0 || value < *first)
- *first = value;
- if (*last < 0 || value > *last)
- *last = value;
- }
- buf = e;
- }
- return *first;
-}
-
-static struct record *ingest_record(struct client *cl, Z_External *rec)
-{
- xmlDoc *xdoc = normalize_record(cl, rec);
- xmlNode *root, *n;
- struct record *res;
- struct record_cluster *cluster;
- struct session *se = cl->session;
- xmlChar *mergekey, *mergekey_norm;
- xmlChar *type = 0;
- xmlChar *value = 0;
- struct conf_service *service = global_parameters.server->service;
-
- if (!xdoc)
- return 0;
-
- root = xmlDocGetRootElement(xdoc);
- if (!(mergekey = xmlGetProp(root, (xmlChar *) "mergekey")))
- {
- yaz_log(YLOG_WARN, "No mergekey found in record");
- xmlFreeDoc(xdoc);
- return 0;
- }
-
- res = nmem_malloc(se->nmem, sizeof(struct record));
- res->next = 0;
- res->client = cl;
- res->metadata = nmem_malloc(se->nmem,
- sizeof(struct record_metadata*) * service->num_metadata);
- memset(res->metadata, 0, sizeof(struct record_metadata*) * service->num_metadata);
-
- mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey);
- xmlFree(mergekey);
- normalize_mergekey((char *) mergekey_norm, 0);
-
- cluster = reclist_insert(se->reclist, res, (char *) mergekey_norm,
- &se->total_merged);
- if (global_parameters.dump_records)
- yaz_log(YLOG_LOG, "Cluster id %d from %s (#%d)", cluster->recid,
- cl->database->database->url, cl->records);
- if (!cluster)
- {
- /* no room for record */
- xmlFreeDoc(xdoc);
- return 0;
- }
- relevance_newrec(se->relevance, cluster);
-
- for (n = root->children; n; n = n->next)
- {
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
- type = value = 0;
-
- if (n->type != XML_ELEMENT_NODE)
- continue;
- if (!strcmp((const char *) n->name, "metadata"))
- {
- struct conf_metadata *md = 0;
- struct conf_sortkey *sk = 0;
- struct record_metadata **wheretoput, *newm;
- int imeta;
- int first, last;
-
- type = xmlGetProp(n, (xmlChar *) "type");
- value = xmlNodeListGetString(xdoc, n->children, 0);
-
- if (!type || !value)
- continue;
-
- // First, find out what field we're looking at
- for (imeta = 0; imeta < service->num_metadata; imeta++)
- if (!strcmp((const char *) type, service->metadata[imeta].name))
- {
- md = &service->metadata[imeta];
- if (md->sortkey_offset >= 0)
- sk = &service->sortkeys[md->sortkey_offset];
- break;
- }
- if (!md)
- {
- yaz_log(YLOG_WARN, "Ignoring unknown metadata element: %s", type);
- continue;
- }
-
- // Find out where we are putting it
- if (md->merge == Metadata_merge_no)
- wheretoput = &res->metadata[imeta];
- else
- wheretoput = &cluster->metadata[imeta];
-
- // Put it there
- newm = nmem_malloc(se->nmem, sizeof(struct record_metadata));
- newm->next = 0;
- if (md->type == Metadata_type_generic)
- {
- char *p, *pe;
- for (p = (char *) value; *p && isspace(*p); p++)
- ;
- for (pe = p + strlen(p) - 1;
- pe > p && strchr(" ,/.:([", *pe); pe--)
- *pe = '\0';
- newm->data.text = nmem_strdup(se->nmem, p);
-
- }
- else if (md->type == Metadata_type_year)
- {
- if (extract_years((char *) value, &first, &last) < 0)
- continue;
- }
- else
- {
- yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type);
- continue;
- }
- if (md->type == Metadata_type_year && md->merge != Metadata_merge_range)
- {
- yaz_log(YLOG_WARN, "Only range merging supported for years");
- continue;
- }
- if (md->merge == Metadata_merge_unique)
- {
- struct record_metadata *mnode;
- for (mnode = *wheretoput; mnode; mnode = mnode->next)
- if (!strcmp((const char *) mnode->data.text, newm->data.text))
- break;
- if (!mnode)
- {
- newm->next = *wheretoput;
- *wheretoput = newm;
- }
- }
- else if (md->merge == Metadata_merge_longest)
- {
- if (!*wheretoput ||
- strlen(newm->data.text) > strlen((*wheretoput)->data.text))
- {
- *wheretoput = newm;
- if (sk)
- {
- char *s = nmem_strdup(se->nmem, newm->data.text);
- if (!cluster->sortkeys[md->sortkey_offset])
- cluster->sortkeys[md->sortkey_offset] =
- nmem_malloc(se->nmem, sizeof(union data_types));
- normalize_mergekey(s,
- (sk->type == Metadata_sortkey_skiparticle));
- cluster->sortkeys[md->sortkey_offset]->text = s;
- }
- }
- }
- else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
- {
- newm->next = *wheretoput;
- *wheretoput = newm;
- }
- else if (md->merge == Metadata_merge_range)
- {
- assert(md->type == Metadata_type_year);
- if (!*wheretoput)
- {
- *wheretoput = newm;
- (*wheretoput)->data.number.min = first;
- (*wheretoput)->data.number.max = last;
- if (sk)
- cluster->sortkeys[md->sortkey_offset] = &newm->data;
- }
- else
- {
- if (first < (*wheretoput)->data.number.min)
- (*wheretoput)->data.number.min = first;
- if (last > (*wheretoput)->data.number.max)
- (*wheretoput)->data.number.max = last;
- }
-#ifdef GAGA
- if (sk)
- {
- union data_types *sdata = cluster->sortkeys[md->sortkey_offset];
- yaz_log(YLOG_LOG, "SK range: %d-%d", sdata->number.min, sdata->number.max);
- }
-#endif
- }
- else
- yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name);
-
- if (md->rank)
- relevance_countwords(se->relevance, cluster,
- (char *) value, md->rank);
- if (md->termlist)
- {
- if (md->type == Metadata_type_year)
- {
- char year[64];
- sprintf(year, "%d", last);
- add_facet(se, (char *) type, year);
- if (first != last)
- {
- sprintf(year, "%d", first);
- add_facet(se, (char *) type, year);
- }
- }
- else
- add_facet(se, (char *) type, (char *) value);
- }
- xmlFree(type);
- xmlFree(value);
- type = value = 0;
- }
- else
- yaz_log(YLOG_WARN, "Unexpected element %s in internal record", n->name);
- }
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
-
- xmlFreeDoc(xdoc);
-
- relevance_donerecord(se->relevance, cluster);
- se->total_records++;
-
- return res;
-}
-
-// Retrieve first defined value for 'name' for given database.
-// Will be extended to take into account user associated with session
-char *session_setting_oneval(struct session_database *db, int offset)
-{
- if (!db->settings[offset])
- return "";
- return db->settings[offset]->value;
-}
-
-static void ingest_records(struct client *cl, Z_Records *r)
-{
-#if USE_TIMING
- yaz_timing_t t = yaz_timing_create();
-#endif
- struct record *rec;
- struct session *s = cl->session;
- Z_NamePlusRecordList *rlist;
- int i;
-
- if (r->which != Z_Records_DBOSD)
- return;
- rlist = r->u.databaseOrSurDiagnostics;
- for (i = 0; i < rlist->num_records; i++)
- {
- Z_NamePlusRecord *npr = rlist->records[i];
-
- cl->records++;
- if (npr->which != Z_NamePlusRecord_databaseRecord)
- {
- yaz_log(YLOG_WARN,
- "Unexpected record type, probably diagnostic %s",
- cl->database->database->url);
- continue;
- }
-
- rec = ingest_record(cl, npr->u.databaseRecord);
- if (!rec)
- continue;
- }
- if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records)
- session_alert_watch(s, SESSION_WATCH_RECORDS);
-
-#if USE_TIMING
- yaz_timing_stop(t);
- yaz_log(YLOG_LOG, "ingest_records %6.5f %3.2f %3.2f",
- yaz_timing_get_real(t), yaz_timing_get_user(t),
- yaz_timing_get_sys(t));
- yaz_timing_destroy(&t);