Because that's what this is about : sessions.
libpazpar2_a_SOURCES = pazpar2_config.c pazpar2_config.h eventl.c eventl.h \
http.c http_command.c http.h \
- logic.c pazpar2.h \
+ session.c session.h \
record.h record.c reclists.c reclists.h \
relevance.c relevance.h termlists.c termlists.h \
normalize7bit.h normalize7bit.c \
#include <yaz/timing.h>
#endif
-#include "pazpar2.h"
+#include "session.h"
#include "parameters.h"
#include "client.h"
#include "connection.h"
#include <yaz/comstack.h>
#include <yaz/tcpip.h>
#include "connection.h"
-#include "pazpar2.h"
+#include "session.h"
#include "host.h"
#include "client.h"
#include "settings.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <yaz/log.h>
+#include <yaz/nmem.h>
-#include "pazpar2.h"
+#include "session.h"
#include "host.h"
+#include "pazpar2_config.h"
#include "settings.h"
#include "http.h"
#include "zeerex.h"
#include <yaz/nmem.h>
#include <yaz/tcpip.h>
-#include "pazpar2.h"
+#include "session.h"
#include "connection.h"
#include "host.h"
#ifndef HOST_H
#define HOST_H
+#include <yaz/mutex.h>
+
/** \brief Represents a host (irrespective of databases) */
struct host {
char *hostport;
#include <yaz/nmem.h>
#include <yaz/mutex.h>
-#include "pazpar2.h"
+#include "session.h"
#include "http.h"
#define MAX_HTTP_HEADER 4096
#ifndef HTTP_H
#define HTTP_H
+#include <yaz/wrbuf.h>
+
#include "eventl.h"
// Generic I/O buffer
struct http_buf;
#include "eventl.h"
#include "parameters.h"
-#include "pazpar2.h"
+#include "session.h"
#include "http.h"
#include "settings.h"
#include "client.h"
+++ /dev/null
-/* This file is part of Pazpar2.
- Copyright (C) 2006-2010 Index Data
-
-Pazpar2 is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*/
-
-/** \file logic.c
- \brief high-level logic; mostly user sessions and settings
-*/
-
-#if HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#if HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#if HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <signal.h>
-#include <ctype.h>
-#include <assert.h>
-
-#include <yaz/marcdisp.h>
-#include <yaz/comstack.h>
-#include <yaz/tcpip.h>
-#include <yaz/proto.h>
-#include <yaz/readconf.h>
-#include <yaz/pquery.h>
-#include <yaz/otherinfo.h>
-#include <yaz/yaz-util.h>
-#include <yaz/nmem.h>
-#include <yaz/query-charset.h>
-#include <yaz/querytowrbuf.h>
-#include <yaz/oid_db.h>
-#include <yaz/snprintf.h>
-
-#define USE_TIMING 0
-#if USE_TIMING
-#include <yaz/timing.h>
-#endif
-
-#include "parameters.h"
-#include "pazpar2.h"
-#include "eventl.h"
-#include "http.h"
-#include "termlists.h"
-#include "reclists.h"
-#include "relevance.h"
-#include "database.h"
-#include "client.h"
-#include "settings.h"
-#include "normalize7bit.h"
-
-#define TERMLIST_HIGH_SCORE 25
-
-#define MAX_CHUNK 15
-
-// Note: Some things in this structure will eventually move to configuration
-struct parameters global_parameters =
-{
- 0, // dump_records
- 0 // debug_mode
-};
-
-static void log_xml_doc(xmlDoc *doc)
-{
- FILE *lf = yaz_log_file();
- xmlChar *result = 0;
- int len = 0;
-#if LIBXML_VERSION >= 20600
- xmlDocDumpFormatMemory(doc, &result, &len, 1);
-#else
- xmlDocDumpMemory(doc, &result, &len);
-#endif
- if (lf && len)
- {
- (void) fwrite(result, 1, len, lf);
- fprintf(lf, "\n");
- }
- xmlFree(result);
-}
-
-static void session_enter(struct session *s)
-{
- yaz_mutex_enter(s->mutex);
-}
-
-static void session_leave(struct session *s)
-{
- yaz_mutex_leave(s->mutex);
-}
-
-// Recursively traverse query structure to extract terms.
-void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num)
-{
- char **words;
- int numwords;
- int i;
-
- switch (n->kind)
- {
- case CCL_RPN_AND:
- case CCL_RPN_OR:
- case CCL_RPN_NOT:
- case CCL_RPN_PROX:
- pull_terms(nmem, n->u.p[0], termlist, num);
- pull_terms(nmem, n->u.p[1], termlist, num);
- break;
- case CCL_RPN_TERM:
- nmem_strsplit(nmem, " ", n->u.t.term, &words, &numwords);
- for (i = 0; i < numwords; i++)
- termlist[(*num)++] = words[i];
- break;
- default: // NOOP
- break;
- }
-}
-
-
-static void add_facet(struct session *s, const char *type, const char *value)
-{
- int i;
-
- if (!*value)
- return;
- for (i = 0; i < s->num_termlists; i++)
- if (!strcmp(s->termlists[i].name, type))
- break;
- if (i == s->num_termlists)
- {
- if (i == SESSION_MAX_TERMLISTS)
- {
- yaz_log(YLOG_FATAL, "Too many termlists");
- return;
- }
-
- s->termlists[i].name = nmem_strdup(s->nmem, type);
- s->termlists[i].termlist
- = termlist_create(s->nmem, TERMLIST_HIGH_SCORE);
- s->num_termlists = i + 1;
- }
- termlist_insert(s->termlists[i].termlist, value);
-}
-
-static xmlDoc *record_to_xml(struct session_database *sdb, const char *rec)
-{
- struct database *db = sdb->database;
- xmlDoc *rdoc = 0;
-
- rdoc = xmlParseMemory(rec, strlen(rec));
-
- if (!rdoc)
- {
- yaz_log(YLOG_FATAL, "Non-wellformed XML received from %s",
- db->url);
- return 0;
- }
-
- if (global_parameters.dump_records)
- {
- yaz_log(YLOG_LOG, "Un-normalized record from %s", db->url);
- log_xml_doc(rdoc);
- }
-
- return rdoc;
-}
-
-#define MAX_XSLT_ARGS 16
-
-// Add static values from session database settings if applicable
-static void insert_settings_parameters(struct session_database *sdb,
- struct conf_service *service,
- char **parms,
- NMEM nmem)
-{
- int i;
- int nparms = 0;
- int offset = 0;
-
- for (i = 0; i < service->num_metadata; i++)
- {
- struct conf_metadata *md = &service->metadata[i];
- int setting;
-
- if (md->setting == Metadata_setting_parameter &&
- (setting = settings_lookup_offset(service, md->name)) >= 0)
- {
- const char *val = session_setting_oneval(sdb, setting);
- if (val && nparms < MAX_XSLT_ARGS)
- {
- char *buf;
- int len = strlen(val);
- buf = nmem_malloc(nmem, len + 3);
- buf[0] = '\'';
- strcpy(buf + 1, val);
- buf[len+1] = '\'';
- buf[len+2] = '\0';
- parms[offset++] = md->name;
- parms[offset++] = buf;
- nparms++;
- }
- }
- }
- parms[offset] = 0;
-}
-
-// Add static values from session database settings if applicable
-static void insert_settings_values(struct session_database *sdb, xmlDoc *doc,
- struct conf_service *service)
-{
- int i;
-
- for (i = 0; i < service->num_metadata; i++)
- {
- struct conf_metadata *md = &service->metadata[i];
- int offset;
-
- if (md->setting == Metadata_setting_postproc &&
- (offset = settings_lookup_offset(service, md->name)) >= 0)
- {
- const char *val = session_setting_oneval(sdb, offset);
- if (val)
- {
- xmlNode *r = xmlDocGetRootElement(doc);
- xmlNode *n = xmlNewTextChild(r, 0, (xmlChar *) "metadata",
- (xmlChar *) val);
- xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name);
- }
- }
- }
-}
-
-static xmlDoc *normalize_record(struct session_database *sdb,
- struct conf_service *service,
- const char *rec, NMEM nmem)
-{
- xmlDoc *rdoc = record_to_xml(sdb, rec);
-
- if (rdoc)
- {
- char *parms[MAX_XSLT_ARGS*2+1];
-
- insert_settings_parameters(sdb, service, parms, nmem);
-
- if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms))
- {
- yaz_log(YLOG_WARN, "Normalize failed from %s", sdb->database->url);
- }
- else
- {
- insert_settings_values(sdb, rdoc, service);
-
- if (global_parameters.dump_records)
- {
- yaz_log(YLOG_LOG, "Normalized record from %s",
- sdb->database->url);
- log_xml_doc(rdoc);
- }
- }
- }
- return rdoc;
-}
-
-void session_settings_dump(struct session *se,
- struct session_database *db,
- WRBUF w)
-{
- if (db->settings)
- {
- int i, num = db->num_settings;
- for (i = 0; i < num; i++)
- {
- struct setting *s = db->settings[i];
- for (;s ; s = s->next)
- {
- wrbuf_puts(w, "<set name=\"");
- wrbuf_xmlputs(w, s->name);
- wrbuf_puts(w, "\" value=\"");
- wrbuf_xmlputs(w, s->value);
- wrbuf_puts(w, "\"/>");
- }
- if (db->settings[i])
- wrbuf_puts(w, "\n");
- }
- }
-}
-
-// Retrieve first defined value for 'name' for given database.
-// Will be extended to take into account user associated with session
-const char *session_setting_oneval(struct session_database *db, int offset)
-{
- if (offset >= db->num_settings || !db->settings[offset])
- return "";
- return db->settings[offset]->value;
-}
-
-// Prepare XSLT stylesheets for record normalization
-// Structures are allocated on the session_wide nmem to avoid having
-// to recompute this for every search. This would lead
-// to leaking if a single session was to repeatedly change the PZ_XSLT
-// setting. However, this is not a realistic use scenario.
-static int prepare_map(struct session *se, struct session_database *sdb)
-{
- const char *s;
-
- if (!sdb->settings)
- {
- yaz_log(YLOG_WARN, "No settings on %s", sdb->database->url);
- return -1;
- }
- if ((s = session_setting_oneval(sdb, PZ_XSLT)))
- {
- char auto_stylesheet[256];
-
- if (!strcmp(s, "auto"))
- {
- const char *request_syntax = session_setting_oneval(
- sdb, PZ_REQUESTSYNTAX);
- if (request_syntax)
- {
- char *cp;
- yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet),
- "%s.xsl", request_syntax);
- for (cp = auto_stylesheet; *cp; cp++)
- {
- /* deliberately only consider ASCII */
- if (*cp > 32 && *cp < 127)
- *cp = tolower(*cp);
- }
- s = auto_stylesheet;
- }
- else
- {
- yaz_log(YLOG_WARN, "No pz:requestsyntax for auto stylesheet");
- }
- }
- sdb->map = normalize_cache_get(se->normalize_cache,
- se->service, s);
- if (!sdb->map)
- return -1;
- }
- return 0;
-}
-
-// This analyzes settings and recomputes any supporting data structures
-// if necessary.
-static int prepare_session_database(struct session *se,
- struct session_database *sdb)
-{
- if (!sdb->settings)
- {
- yaz_log(YLOG_WARN,
- "No settings associated with %s", sdb->database->url);
- return -1;
- }
- if (sdb->settings[PZ_XSLT] && !sdb->map)
- {
- if (prepare_map(se, sdb) < 0)
- return -1;
- }
- return 0;
-}
-
-// called if watch should be removed because http_channel is to be destroyed
-static void session_watch_cancel(void *data, struct http_channel *c,
- void *data2)
-{
- struct session_watchentry *ent = data;
-
- ent->fun = 0;
- ent->data = 0;
- ent->obs = 0;
-}
-
-// set watch. Returns 0=OK, -1 if watch is already set
-int session_set_watch(struct session *s, int what,
- session_watchfun fun, void *data,
- struct http_channel *chan)
-{
- if (s->watchlist[what].fun)
- return -1;
- s->watchlist[what].fun = fun;
- s->watchlist[what].data = data;
- s->watchlist[what].obs = http_add_observer(chan, &s->watchlist[what],
- session_watch_cancel);
- return 0;
-}
-
-void session_alert_watch(struct session *s, int what)
-{
- if (s->watchlist[what].fun)
- {
- /* our watch is no longer associated with http_channel */
- void *data;
- session_watchfun fun;
-
- http_remove_observer(s->watchlist[what].obs);
- fun = s->watchlist[what].fun;
- data = s->watchlist[what].data;
-
- /* reset watch before fun is invoked - in case fun wants to set
- it again */
- s->watchlist[what].fun = 0;
- s->watchlist[what].data = 0;
- s->watchlist[what].obs = 0;
-
- fun(data);
- }
-}
-
-//callback for grep_databases
-static void select_targets_callback(void *context, struct session_database *db)
-{
- struct session *se = (struct session*) context;
- struct client *cl = client_create();
- client_set_database(cl, db);
- client_set_session(cl, se);
-}
-
-// Associates a set of clients with a session;
-// Note: Session-databases represent databases with per-session
-// setting overrides
-static int select_targets(struct session *se, const char *filter)
-{
- while (se->clients)
- client_destroy(se->clients);
-
- return session_grep_databases(se, filter, select_targets_callback);
-}
-
-int session_active_clients(struct session *s)
-{
- struct client *c;
- int res = 0;
-
- for (c = s->clients; c; c = client_next_in_session(c))
- if (client_is_active(c))
- res++;
-
- return res;
-}
-
-
-enum pazpar2_error_code search(struct session *se,
- const char *query,
- const char *startrecs, const char *maxrecs,
- const char *filter,
- const char **addinfo)
-{
- int live_channels = 0;
- int no_working = 0;
- int no_failed = 0;
- struct client *cl;
-
- yaz_log(YLOG_DEBUG, "Search");
-
- *addinfo = 0;
-
- session_enter(se);
- nmem_reset(se->nmem);
- se->relevance = 0;
- se->total_records = se->total_hits = se->total_merged = 0;
- reclist_destroy(se->reclist);
- se->reclist = 0;
- se->num_termlists = 0;
- live_channels = select_targets(se, filter);
- if (!live_channels)
- {
- session_leave(se);
- return PAZPAR2_NO_TARGETS;
- }
- se->reclist = reclist_create(se->nmem);
-
- for (cl = se->clients; cl; cl = client_next_in_session(cl))
- {
- if (maxrecs)
- client_set_maxrecs(cl, atoi(maxrecs));
- if (startrecs)
- client_set_startrecs(cl, atoi(startrecs));
- if (prepare_session_database(se, client_get_database(cl)) < 0)
- continue;
- // Parse query for target
- if (client_parse_query(cl, query) < 0)
- no_failed++;
- else
- {
- no_working++;
- if (client_prep_connection(cl, se->service->z3950_operation_timeout,
- se->service->z3950_session_timeout,
- se->service->server->iochan_man))
- client_start_search(cl);
- }
- }
- session_leave(se);
- if (no_working == 0)
- {
- if (no_failed > 0)
- {
- *addinfo = "query";
- return PAZPAR2_MALFORMED_PARAMETER_VALUE;
- }
- else
- return PAZPAR2_NO_TARGETS;
- }
- return PAZPAR2_NO_ERROR;
-}
-
-// Creates a new session_database object for a database
-static void session_init_databases_fun(void *context, struct database *db)
-{
- struct session *se = (struct session *) context;
- struct session_database *new = nmem_malloc(se->session_nmem, sizeof(*new));
- int i;
-
- new->database = db;
-
- new->map = 0;
- assert(db->settings);
- new->settings = nmem_malloc(se->session_nmem,
- sizeof(struct settings *) * db->num_settings);
- new->num_settings = db->num_settings;
- for (i = 0; i < db->num_settings; i++)
- {
- struct setting *setting = db->settings[i];
- new->settings[i] = setting;
- }
- new->next = se->databases;
- se->databases = new;
-}
-
-// Doesn't free memory associated with sdb -- nmem takes care of that
-static void session_database_destroy(struct session_database *sdb)
-{
- sdb->map = 0;
-}
-
-// Initialize session_database list -- this represents this session's view
-// of the database list -- subject to modification by the settings ws command
-void session_init_databases(struct session *se)
-{
- se->databases = 0;
- predef_grep_databases(se, se->service, session_init_databases_fun);
-}
-
-// Probably session_init_databases_fun should be refactored instead of
-// called here.
-static struct session_database *load_session_database(struct session *se,
- char *id)
-{
- struct database *db = new_database(id, se->session_nmem);
-
- resolve_database(se->service, db);
-
- session_init_databases_fun((void*) se, db);
-
- // New sdb is head of se->databases list
- return se->databases;
-}
-
-// Find an existing session database. If not found, load it
-static struct session_database *find_session_database(struct session *se,
- char *id)
-{
- struct session_database *sdb;
-
- for (sdb = se->databases; sdb; sdb = sdb->next)
- if (!strcmp(sdb->database->url, id))
- return sdb;
- return load_session_database(se, id);
-}
-
-// Apply a session override to a database
-void session_apply_setting(struct session *se, char *dbname, char *setting,
- char *value)
-{
- struct session_database *sdb = find_session_database(se, dbname);
- struct conf_service *service = se->service;
- struct setting *new = nmem_malloc(se->session_nmem, sizeof(*new));
- int offset = settings_create_offset(service, setting);
-
- expand_settings_array(&sdb->settings, &sdb->num_settings, offset,
- se->session_nmem);
- new->precedence = 0;
- new->target = dbname;
- new->name = setting;
- new->value = value;
- new->next = sdb->settings[offset];
- sdb->settings[offset] = new;
-
- // Force later recompute of settings-driven data structures
- // (happens when a search starts and client connections are prepared)
- switch (offset)
- {
- case PZ_XSLT:
- if (sdb->map)
- {
- sdb->map = 0;
- }
- break;
- }
-}
-
-void destroy_session(struct session *s)
-{
- struct session_database *sdb;
-
- while (s->clients)
- client_destroy(s->clients);
- for (sdb = s->databases; sdb; sdb = sdb->next)
- session_database_destroy(sdb);
- normalize_cache_destroy(s->normalize_cache);
- reclist_destroy(s->reclist);
- nmem_destroy(s->nmem);
- service_destroy(s->service);
- yaz_mutex_destroy(&s->mutex);
- wrbuf_destroy(s->wrbuf);
-}
-
-struct session *new_session(NMEM nmem, struct conf_service *service)
-{
- int i;
- struct session *session = nmem_malloc(nmem, sizeof(*session));
-
- yaz_log(YLOG_DEBUG, "New Pazpar2 session");
-
- session->service = service;
- session->relevance = 0;
- session->total_hits = 0;
- session->total_records = 0;
- session->number_of_warnings_unknown_elements = 0;
- session->number_of_warnings_unknown_metadata = 0;
- session->num_termlists = 0;
- session->reclist = 0;
- session->clients = 0;
- session->session_nmem = nmem;
- session->nmem = nmem_create();
- session->wrbuf = wrbuf_alloc();
- session->databases = 0;
- for (i = 0; i <= SESSION_WATCH_MAX; i++)
- {
- session->watchlist[i].data = 0;
- session->watchlist[i].fun = 0;
- }
- session->normalize_cache = normalize_cache_create();
- session->mutex = 0;
- yaz_mutex_create(&session->mutex);
-
- return session;
-}
-
-struct hitsbytarget *hitsbytarget(struct session *se, int *count, NMEM nmem)
-{
- struct hitsbytarget *res = 0;
- struct client *cl;
- size_t sz = 0;
-
- session_enter(se);
- for (cl = se->clients; cl; cl = client_next_in_session(cl))
- sz++;
-
- res = nmem_malloc(nmem, sizeof(*res) * sz);
- *count = 0;
- for (cl = se->clients; cl; cl = client_next_in_session(cl))
- {
- WRBUF w = wrbuf_alloc();
- const char *name = session_setting_oneval(client_get_database(cl),
- PZ_NAME);
-
- res[*count].id = client_get_database(cl)->database->url;
- res[*count].name = *name ? name : "Unknown";
- res[*count].hits = client_get_hits(cl);
- res[*count].records = client_get_num_records(cl);
- res[*count].diagnostic = client_get_diagnostic(cl);
- res[*count].state = client_get_state_str(cl);
- res[*count].connected = client_get_connection(cl) ? 1 : 0;
- session_settings_dump(se, client_get_database(cl), w);
- res[*count].settings_xml = w;
- (*count)++;
- }
- session_leave(se);
- return res;
-}
-
-struct termlist_score **termlist(struct session *s, const char *name, int *num)
-{
- int i;
- struct termlist_score **tl = 0;
-
- session_enter(s);
- for (i = 0; i < s->num_termlists; i++)
- if (!strcmp((const char *) s->termlists[i].name, name))
- {
- tl = termlist_highscore(s->termlists[i].termlist, num);
- break;
- }
- session_leave(s);
- return tl;
-}
-
-#ifdef MISSING_HEADERS
-void report_nmem_stats(void)
-{
- size_t in_use, is_free;
-
- nmem_get_memory_in_use(&in_use);
- nmem_get_memory_free(&is_free);
-
- yaz_log(YLOG_LOG, "nmem stat: use=%ld free=%ld",
- (long) in_use, (long) is_free);
-}
-#endif
-
-struct record_cluster *show_single_start(struct session *s, const char *id,
- struct record_cluster **prev_r,
- struct record_cluster **next_r)
-{
- struct record_cluster *r;
-
- session_enter(s);
- reclist_enter(s->reclist);
- *prev_r = 0;
- *next_r = 0;
- while ((r = reclist_read_record(s->reclist)))
- {
- if (!strcmp(r->recid, id))
- {
- *next_r = reclist_read_record(s->reclist);
- break;
- }
- *prev_r = r;
- }
- reclist_leave(s->reclist);
- if (!r)
- session_leave(s);
- return r;
-}
-
-void show_single_stop(struct session *s, struct record_cluster *rec)
-{
- session_leave(s);
-}
-
-struct record_cluster **show_range_start(struct session *s,
- struct reclist_sortparms *sp,
- int start, int *num, int *total, Odr_int *sumhits)
-{
- struct record_cluster **recs = nmem_malloc(s->nmem, *num
- * sizeof(struct record_cluster *));
- struct reclist_sortparms *spp;
- int i;
-#if USE_TIMING
- yaz_timing_t t = yaz_timing_create();
-#endif
-
- session_enter(s);
- if (!s->relevance)
- {
- *num = 0;
- *total = 0;
- *sumhits = 0;
- recs = 0;
- }
- else
- {
- for (spp = sp; spp; spp = spp->next)
- if (spp->type == Metadata_sortkey_relevance)
- {
- relevance_prepare_read(s->relevance, s->reclist);
- break;
- }
- reclist_sort(s->reclist, sp);
-
- reclist_enter(s->reclist);
- *total = reclist_get_num_records(s->reclist);
- *sumhits = s->total_hits;
-
- for (i = 0; i < start; i++)
- if (!reclist_read_record(s->reclist))
- {
- *num = 0;
- recs = 0;
- break;
- }
-
- for (i = 0; i < *num; i++)
- {
- struct record_cluster *r = reclist_read_record(s->reclist);
- if (!r)
- {
- *num = i;
- break;
- }
- recs[i] = r;
- }
- reclist_leave(s->reclist);
- }
-#if USE_TIMING
- yaz_timing_stop(t);
- yaz_log(YLOG_LOG, "show %6.5f %3.2f %3.2f",
- yaz_timing_get_real(t), yaz_timing_get_user(t),
- yaz_timing_get_sys(t));
- yaz_timing_destroy(&t);
-#endif
- return recs;
-}
-
-void show_range_stop(struct session *s, struct record_cluster **recs)
-{
- session_leave(s);
-}
-
-void statistics(struct session *se, struct statistics *stat)
-{
- struct client *cl;
- int count = 0;
-
- memset(stat, 0, sizeof(*stat));
- for (cl = se->clients; cl; cl = client_next_in_session(cl))
- {
- if (!client_get_connection(cl))
- stat->num_no_connection++;
- switch (client_get_state(cl))
- {
- case Client_Connecting: stat->num_connecting++; break;
- case Client_Working: stat->num_working++; break;
- case Client_Idle: stat->num_idle++; break;
- case Client_Failed: stat->num_failed++; break;
- case Client_Error: stat->num_error++; break;
- default: break;
- }
- count++;
- }
- stat->num_hits = se->total_hits;
- stat->num_records = se->total_records;
-
- stat->num_clients = count;
-}
-
-static struct record_metadata *record_metadata_init(
- NMEM nmem, const char *value, enum conf_metadata_type type,
- struct _xmlAttr *attr)
-{
- struct record_metadata *rec_md = record_metadata_create(nmem);
- struct record_metadata_attr **attrp = &rec_md->attributes;
-
- for (; attr; attr = attr->next)
- {
- if (attr->children && attr->children->content)
- {
- if (strcmp((const char *) attr->name, "type"))
- { /* skip the "type" attribute.. Its value is already part of
- the element in output (md-%s) and so repeating it here
- is redundant */
- *attrp = nmem_malloc(nmem, sizeof(**attrp));
- (*attrp)->name =
- nmem_strdup(nmem, (const char *) attr->name);
- (*attrp)->value =
- nmem_strdup(nmem, (const char *) attr->children->content);
- attrp = &(*attrp)->next;
- }
- }
- }
- *attrp = 0;
-
- if (type == Metadata_type_generic)
- {
- char *p = nmem_strdup(nmem, value);
-
- p = normalize7bit_generic(p, " ,/.:([");
-
- rec_md->data.text.disp = p;
- rec_md->data.text.sort = 0;
- }
- else if (type == Metadata_type_year || type == Metadata_type_date)
- {
- int first, last;
- int longdate = 0;
-
- if (type == Metadata_type_date)
- longdate = 1;
- if (extract7bit_dates((char *) value, &first, &last, longdate) < 0)
- return 0;
-
- rec_md->data.number.min = first;
- rec_md->data.number.max = last;
- }
- else
- return 0;
- return rec_md;
-}
-
-static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
- struct conf_service *service, WRBUF norm_wr)
-{
- xmlNode *n;
- int no_found = 0;
- for (n = root->children; n; n = n->next)
- {
- if (n->type != XML_ELEMENT_NODE)
- continue;
- if (!strcmp((const char *) n->name, "metadata"))
- {
- xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
- if (!strcmp(name, (const char *) type))
- {
- xmlChar *value = xmlNodeListGetString(doc, n->children, 1);
- if (value)
- {
- const char *norm_str;
- pp2_relevance_token_t prt =
- pp2_relevance_tokenize(
- service->mergekey_pct,
- (const char *) value, 0);
-
- if (wrbuf_len(norm_wr) > 0)
- wrbuf_puts(norm_wr, " ");
- wrbuf_puts(norm_wr, name);
- while ((norm_str =
- pp2_relevance_token_next(prt)))
- {
- if (*norm_str)
- {
- wrbuf_puts(norm_wr, " ");
- wrbuf_puts(norm_wr, norm_str);
- }
- }
- xmlFree(value);
- pp2_relevance_token_destroy(prt);
- no_found++;
- }
- }
- xmlFree(type);
- }
- }
- return no_found;
-}
-
-static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
- struct conf_service *service, NMEM nmem)
-{
- char *mergekey_norm = 0;
- xmlNode *root = xmlDocGetRootElement(doc);
- WRBUF norm_wr = wrbuf_alloc();
-
- /* consider mergekey from XSL first */
- xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey");
- if (mergekey)
- {
- const char *norm_str;
- pp2_relevance_token_t prt =
- pp2_relevance_tokenize(
- service->mergekey_pct,
- (const char *) mergekey, 0);
-
- while ((norm_str = pp2_relevance_token_next(prt)))
- {
- if (*norm_str)
- {
- if (wrbuf_len(norm_wr))
- wrbuf_puts(norm_wr, " ");
- wrbuf_puts(norm_wr, norm_str);
- }
- }
- pp2_relevance_token_destroy(prt);
- xmlFree(mergekey);
- }
- else
- {
- /* no mergekey defined in XSL. Look for mergekey metadata instead */
- int field_id;
- for (field_id = 0; field_id < service->num_metadata; field_id++)
- {
- struct conf_metadata *ser_md = &service->metadata[field_id];
- if (ser_md->mergekey != Metadata_mergekey_no)
- {
- int r = get_mergekey_from_doc(doc, root, ser_md->name,
- service, norm_wr);
- if (r == 0 && ser_md->mergekey == Metadata_mergekey_required)
- {
- /* no mergekey on this one and it is required..
- Generate unique key instead */
- wrbuf_rewind(norm_wr);
- break;
- }
- }
- }
- }
-
- /* generate unique key if none is not generated already or is empty */
- if (wrbuf_len(norm_wr) == 0)
- {
- wrbuf_printf(norm_wr, "%s-%d",
- client_get_database(cl)->database->url, record_no);
- }
- if (wrbuf_len(norm_wr) > 0)
- mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr));
- wrbuf_destroy(norm_wr);
- return mergekey_norm;
-}
-
-/** \brief see if metadata for pz:recordfilter exists
- \param root xml root element of normalized record
- \param sdb session database for client
- \retval 0 if there is no metadata for pz:recordfilter
- \retval 1 if there is metadata for pz:recordfilter
-
- If there is no pz:recordfilter defined, this function returns 1
- as well.
-*/
-
-static int check_record_filter(xmlNode *root, struct session_database *sdb)
-{
- int match = 0;
- xmlNode *n;
- const char *s;
- s = session_setting_oneval(sdb, PZ_RECORDFILTER);
-
- if (!s || !*s)
- return 1;
-
- for (n = root->children; n; n = n->next)
- {
- if (n->type != XML_ELEMENT_NODE)
- continue;
- if (!strcmp((const char *) n->name, "metadata"))
- {
- xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
- if (type)
- {
- size_t len;
- const char *eq = strchr(s, '~');
- if (eq)
- len = eq - s;
- else
- len = strlen(s);
- if (len == strlen((const char *)type) &&
- !memcmp((const char *) type, s, len))
- {
- xmlChar *value = xmlNodeGetContent(n);
- if (value && *value)
- {
- if (!eq || strstr((const char *) value, eq+1))
- match = 1;
- }
- xmlFree(value);
- }
- xmlFree(type);
- }
- }
- }
- return match;
-}
-
-
-static int ingest_to_cluster(struct client *cl,
- xmlDoc *xdoc,
- xmlNode *root,
- int record_no,
- const char *mergekey_norm);
-
-/** \brief ingest XML record
- \param cl client holds the result set for record
- \param rec record buffer (0 terminated)
- \param record_no record position (1, 2, ..)
- \retval 0 OK
- \retval -1 failure
-*/
-int ingest_record(struct client *cl, const char *rec,
- int record_no, NMEM nmem)
-{
- struct session_database *sdb = client_get_database(cl);
- struct session *se = client_get_session(cl);
- struct conf_service *service = se->service;
- xmlDoc *xdoc = normalize_record(sdb, service, rec, nmem);
- xmlNode *root;
- const char *mergekey_norm;
- int ret;
-
- if (!xdoc)
- return -1;
-
- root = xmlDocGetRootElement(xdoc);
-
- if (!check_record_filter(root, sdb))
- {
- yaz_log(YLOG_WARN, "Filtered out record no %d from %s", record_no,
- sdb->database->url);
- xmlFreeDoc(xdoc);
- return -1;
- }
-
- mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem);
- if (!mergekey_norm)
- {
- yaz_log(YLOG_WARN, "Got no mergekey");
- xmlFreeDoc(xdoc);
- return -1;
- }
- session_enter(se);
- ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm);
- session_leave(se);
-
- xmlFreeDoc(xdoc);
-
- return ret;
-}
-
-static int ingest_to_cluster(struct client *cl,
- xmlDoc *xdoc,
- xmlNode *root,
- int record_no,
- const char *mergekey_norm)
-{
- xmlNode *n;
- xmlChar *type = 0;
- xmlChar *value = 0;
- struct session_database *sdb = client_get_database(cl);
- struct session *se = client_get_session(cl);
- struct conf_service *service = se->service;
- struct record *record = record_create(se->nmem,
- service->num_metadata,
- service->num_sortkeys, cl,
- record_no);
- struct record_cluster *cluster = reclist_insert(se->reclist,
- service,
- record,
- mergekey_norm,
- &se->total_merged);
- if (!cluster)
- return -1;
- if (global_parameters.dump_records)
- yaz_log(YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
- sdb->database->url, record_no);
- relevance_newrec(se->relevance, cluster);
-
- // now parsing XML record and adding data to cluster or record metadata
- for (n = root->children; n; n = n->next)
- {
- pp2_relevance_token_t prt;
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
- type = value = 0;
-
- if (n->type != XML_ELEMENT_NODE)
- continue;
- if (!strcmp((const char *) n->name, "metadata"))
- {
- struct conf_metadata *ser_md = 0;
- struct conf_sortkey *ser_sk = 0;
- struct record_metadata **wheretoput = 0;
- struct record_metadata *rec_md = 0;
- int md_field_id = -1;
- int sk_field_id = -1;
-
- type = xmlGetProp(n, (xmlChar *) "type");
- value = xmlNodeListGetString(xdoc, n->children, 1);
-
- if (!type || !value || !*value)
- continue;
-
- md_field_id
- = conf_service_metadata_field_id(service, (const char *) type);
- if (md_field_id < 0)
- {
- if (se->number_of_warnings_unknown_metadata == 0)
- {
- yaz_log(YLOG_WARN,
- "Ignoring unknown metadata element: %s", type);
- }
- se->number_of_warnings_unknown_metadata++;
- continue;
- }
-
- ser_md = &service->metadata[md_field_id];
-
- if (ser_md->sortkey_offset >= 0){
- sk_field_id = ser_md->sortkey_offset;
- ser_sk = &service->sortkeys[sk_field_id];
- }
-
- // non-merged metadata
- rec_md = record_metadata_init(se->nmem, (const char *) value,
- ser_md->type, n->properties);
- if (!rec_md)
- {
- yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'",
- value, type);
- continue;
- }
- wheretoput = &record->metadata[md_field_id];
- while (*wheretoput)
- wheretoput = &(*wheretoput)->next;
- *wheretoput = rec_md;
-
- // merged metadata
- rec_md = record_metadata_init(se->nmem, (const char *) value,
- ser_md->type, 0);
- wheretoput = &cluster->metadata[md_field_id];
-
- // and polulate with data:
- // assign cluster or record based on merge action
- if (ser_md->merge == Metadata_merge_unique)
- {
- struct record_metadata *mnode;
- for (mnode = *wheretoput; mnode; mnode = mnode->next)
- if (!strcmp((const char *) mnode->data.text.disp,
- rec_md->data.text.disp))
- break;
- if (!mnode)
- {
- rec_md->next = *wheretoput;
- *wheretoput = rec_md;
- }
- }
- else if (ser_md->merge == Metadata_merge_longest)
- {
- if (!*wheretoput
- || strlen(rec_md->data.text.disp)
- > strlen((*wheretoput)->data.text.disp))
- {
- *wheretoput = rec_md;
- if (ser_sk)
- {
- const char *sort_str = 0;
- int skip_article =
- ser_sk->type == Metadata_sortkey_skiparticle;
-
- if (!cluster->sortkeys[sk_field_id])
- cluster->sortkeys[sk_field_id] =
- nmem_malloc(se->nmem,
- sizeof(union data_types));
-
- prt = pp2_relevance_tokenize(
- service->sort_pct,
- rec_md->data.text.disp, skip_article);
-
- pp2_relevance_token_next(prt);
-
- sort_str = pp2_get_sort(prt);
-
- cluster->sortkeys[sk_field_id]->text.disp =
- rec_md->data.text.disp;
- if (!sort_str)
- {
- sort_str = rec_md->data.text.disp;
- yaz_log(YLOG_WARN,
- "Could not make sortkey. Bug #1858");
- }
- cluster->sortkeys[sk_field_id]->text.sort =
- nmem_strdup(se->nmem, sort_str);
-#if 0
- yaz_log(YLOG_LOG, "text disp=%s",
- cluster->sortkeys[sk_field_id]->text.disp);
- yaz_log(YLOG_LOG, "text sort=%s",
- cluster->sortkeys[sk_field_id]->text.sort);
-#endif
- pp2_relevance_token_destroy(prt);
- }
- }
- }
- else if (ser_md->merge == Metadata_merge_all)
- {
- rec_md->next = *wheretoput;
- *wheretoput = rec_md;
- }
- else if (ser_md->merge == Metadata_merge_range)
- {
- if (!*wheretoput)
- {
- *wheretoput = rec_md;
- if (ser_sk)
- cluster->sortkeys[sk_field_id]
- = &rec_md->data;
- }
- else
- {
- int this_min = rec_md->data.number.min;
- int this_max = rec_md->data.number.max;
- if (this_min < (*wheretoput)->data.number.min)
- (*wheretoput)->data.number.min = this_min;
- if (this_max > (*wheretoput)->data.number.max)
- (*wheretoput)->data.number.max = this_max;
- }
- }
-
-
- // ranking of _all_ fields enabled ...
- if (ser_md->rank)
- relevance_countwords(se->relevance, cluster,
- (char *) value, ser_md->rank,
- ser_md->name);
-
- // construct facets ...
- if (ser_md->termlist)
- {
- if (ser_md->type == Metadata_type_year)
- {
- char year[64];
- sprintf(year, "%d", rec_md->data.number.max);
- add_facet(se, (char *) type, year);
- if (rec_md->data.number.max != rec_md->data.number.min)
- {
- sprintf(year, "%d", rec_md->data.number.min);
- add_facet(se, (char *) type, year);
- }
- }
- else
- add_facet(se, (char *) type, (char *) value);
- }
-
- // cleaning up
- xmlFree(type);
- xmlFree(value);
- type = value = 0;
- }
- else
- {
- if (se->number_of_warnings_unknown_elements == 0)
- yaz_log(YLOG_WARN,
- "Unexpected element in internal record: %s", n->name);
- se->number_of_warnings_unknown_elements++;
- }
- }
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
-
- relevance_donerecord(se->relevance, cluster);
- se->total_records++;
-
- return 0;
-}
-
-/*
- * Local variables:
- * c-basic-offset: 4
- * c-file-style: "Stroustrup"
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
#include <assert.h>
#include "parameters.h"
-#include "pazpar2.h"
+#include "session.h"
#include <yaz/daemon.h>
#include <yaz/log.h>
#include <yaz/options.h>
+++ /dev/null
-/* This file is part of Pazpar2.
- Copyright (C) 2006-2010 Index Data
-
-Pazpar2 is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*/
-
-#ifndef PAZPAR2_H
-#define PAZPAR2_H
-
-#include <yaz/comstack.h>
-#include <yaz/pquery.h>
-#include <yaz/ccl.h>
-#include <yaz/yaz-ccl.h>
-
-#include "termlists.h"
-#include "reclists.h"
-#include "http.h"
-
-struct record;
-struct client;
-
-
-enum pazpar2_error_code {
- PAZPAR2_NO_ERROR = 0,
-
- PAZPAR2_NO_SESSION,
- PAZPAR2_MISSING_PARAMETER,
- PAZPAR2_MALFORMED_PARAMETER_VALUE,
- PAZPAR2_MALFORMED_PARAMETER_ENCODING,
- PAZPAR2_MALFORMED_SETTING,
- PAZPAR2_HITCOUNTS_FAILED,
- PAZPAR2_RECORD_MISSING,
- PAZPAR2_NO_TARGETS,
- PAZPAR2_CONFIG_TARGET,
- PAZPAR2_RECORD_FAIL,
- PAZPAR2_NOT_IMPLEMENTED,
- PAZPAR2_NO_SERVICE,
-
- PAZPAR2_LAST_ERROR
-};
-
-struct host;
-// Represents a (virtual) database on a host
-struct database {
- struct host *host;
- char *url;
- char **databases;
- int errors;
- struct zr_explain *explain;
- int num_settings;
- struct setting **settings;
- struct database *next;
-};
-
-
-// Represents a database as viewed from one session, possibly with settings overriden
-// for that session
-struct session_database
-{
- struct database *database;
- int num_settings;
- struct setting **settings;
- normalize_record_t map;
- struct session_database *next;
-};
-
-#define SESSION_WATCH_SHOW 0
-#define SESSION_WATCH_RECORD 1
-#define SESSION_WATCH_MAX 1
-
-#define SESSION_MAX_TERMLISTS 10
-
-typedef void (*session_watchfun)(void *data);
-
-struct named_termlist
-{
- char *name;
- struct termlist *termlist;
-};
-
-struct session_watchentry {
- void *data;
- http_channel_observer_t obs;
- session_watchfun fun;
-};
-
-// End-user session
-struct session {
- struct conf_service *service; /* service in use for this session */
- struct session_database *databases; // All databases, settings overriden
- struct client *clients; // Clients connected for current search
- NMEM session_nmem; // Nmem for session-permanent storage
- NMEM nmem; // Nmem for each operation (i.e. search, result set, etc)
- WRBUF wrbuf; // Wrbuf for scratch(i.e. search)
- int num_termlists;
- struct named_termlist termlists[SESSION_MAX_TERMLISTS];
- struct relevance *relevance;
- struct reclist *reclist;
- struct session_watchentry watchlist[SESSION_WATCH_MAX + 1];
- Odr_int total_hits;
- int total_records;
- int total_merged;
- int number_of_warnings_unknown_elements;
- int number_of_warnings_unknown_metadata;
- normalize_cache_t normalize_cache;
- YAZ_MUTEX mutex;
-};
-
-struct statistics {
- int num_clients;
- int num_no_connection;
- int num_connecting;
- int num_working;
- int num_idle;
- int num_failed;
- int num_error;
- Odr_int num_hits;
- int num_records;
-};
-
-struct hitsbytarget {
- char *id;
- const char *name;
- Odr_int hits;
- int diagnostic;
- int records;
- const char *state;
- int connected;
- WRBUF settings_xml;
-};
-
-struct hitsbytarget *hitsbytarget(struct session *s, int *count, NMEM nmem);
-struct session *new_session(NMEM nmem, struct conf_service *service);
-void destroy_session(struct session *s);
-void session_init_databases(struct session *s);
-int load_targets(struct session *s, const char *fn);
-void statistics(struct session *s, struct statistics *stat);
-enum pazpar2_error_code search(struct session *s, const char *query,
- const char *startrecs, const char *maxrecs,
- const char *filter, const char **addinfo);
-struct record_cluster **show_range_start(struct session *s,
- struct reclist_sortparms *sp,
- int start,
- int *num, int *total, Odr_int *sumhits);
-void show_range_stop(struct session *s, struct record_cluster **recs);
-
-struct record_cluster *show_single_start(struct session *s, const char *id,
- struct record_cluster **prev_r,
- struct record_cluster **next_r);
-void show_single_stop(struct session *s, struct record_cluster *rec);
-struct termlist_score **termlist(struct session *s, const char *name, int *num);
-int session_set_watch(struct session *s, int what, session_watchfun fun, void *data, struct http_channel *c);
-int session_active_clients(struct session *s);
-void session_apply_setting(struct session *se, char *dbname, char *setting, char *value);
-const char *session_setting_oneval(struct session_database *db, int offset);
-
-int host_getaddrinfo(struct host *host, iochan_man_t iochan_man);
-
-int ingest_record(struct client *cl, const char *rec, int record_no, NMEM nmem);
-void session_alert_watch(struct session *s, int what);
-void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num);
-
-#endif
-
-/*
- * Local variables:
- * c-basic-offset: 4
- * c-file-style: "Stroustrup"
- * indent-tabs-mode: nil
- * End:
- * vim: shiftwidth=4 tabstop=8 expandtab
- */
-
#include <yaz/yaz-util.h>
-#include "pazpar2.h"
+#include "session.h"
#include "reclists.h"
#include "jenkins_hash.h"
#include <stdlib.h>
#include "relevance.h"
-#include "pazpar2.h"
+#include "session.h"
struct relevance
{
--- /dev/null
+/* This file is part of Pazpar2.
+ Copyright (C) 2006-2010 Index Data
+
+Pazpar2 is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+/** \file session.c
+ \brief high-level logic; mostly user sessions and settings
+*/
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <signal.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include <yaz/marcdisp.h>
+#include <yaz/comstack.h>
+#include <yaz/tcpip.h>
+#include <yaz/proto.h>
+#include <yaz/readconf.h>
+#include <yaz/pquery.h>
+#include <yaz/otherinfo.h>
+#include <yaz/yaz-util.h>
+#include <yaz/nmem.h>
+#include <yaz/query-charset.h>
+#include <yaz/querytowrbuf.h>
+#include <yaz/oid_db.h>
+#include <yaz/snprintf.h>
+
+#define USE_TIMING 0
+#if USE_TIMING
+#include <yaz/timing.h>
+#endif
+
+#include "parameters.h"
+#include "session.h"
+#include "eventl.h"
+#include "http.h"
+#include "termlists.h"
+#include "reclists.h"
+#include "relevance.h"
+#include "database.h"
+#include "client.h"
+#include "settings.h"
+#include "normalize7bit.h"
+
+#define TERMLIST_HIGH_SCORE 25
+
+#define MAX_CHUNK 15
+
+// Note: Some things in this structure will eventually move to configuration
+struct parameters global_parameters =
+{
+ 0, // dump_records
+ 0 // debug_mode
+};
+
+static void log_xml_doc(xmlDoc *doc)
+{
+ FILE *lf = yaz_log_file();
+ xmlChar *result = 0;
+ int len = 0;
+#if LIBXML_VERSION >= 20600
+ xmlDocDumpFormatMemory(doc, &result, &len, 1);
+#else
+ xmlDocDumpMemory(doc, &result, &len);
+#endif
+ if (lf && len)
+ {
+ (void) fwrite(result, 1, len, lf);
+ fprintf(lf, "\n");
+ }
+ xmlFree(result);
+}
+
+static void session_enter(struct session *s)
+{
+ yaz_mutex_enter(s->mutex);
+}
+
+static void session_leave(struct session *s)
+{
+ yaz_mutex_leave(s->mutex);
+}
+
+// Recursively traverse query structure to extract terms.
+void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num)
+{
+ char **words;
+ int numwords;
+ int i;
+
+ switch (n->kind)
+ {
+ case CCL_RPN_AND:
+ case CCL_RPN_OR:
+ case CCL_RPN_NOT:
+ case CCL_RPN_PROX:
+ pull_terms(nmem, n->u.p[0], termlist, num);
+ pull_terms(nmem, n->u.p[1], termlist, num);
+ break;
+ case CCL_RPN_TERM:
+ nmem_strsplit(nmem, " ", n->u.t.term, &words, &numwords);
+ for (i = 0; i < numwords; i++)
+ termlist[(*num)++] = words[i];
+ break;
+ default: // NOOP
+ break;
+ }
+}
+
+
+static void add_facet(struct session *s, const char *type, const char *value)
+{
+ int i;
+
+ if (!*value)
+ return;
+ for (i = 0; i < s->num_termlists; i++)
+ if (!strcmp(s->termlists[i].name, type))
+ break;
+ if (i == s->num_termlists)
+ {
+ if (i == SESSION_MAX_TERMLISTS)
+ {
+ yaz_log(YLOG_FATAL, "Too many termlists");
+ return;
+ }
+
+ s->termlists[i].name = nmem_strdup(s->nmem, type);
+ s->termlists[i].termlist
+ = termlist_create(s->nmem, TERMLIST_HIGH_SCORE);
+ s->num_termlists = i + 1;
+ }
+ termlist_insert(s->termlists[i].termlist, value);
+}
+
+static xmlDoc *record_to_xml(struct session_database *sdb, const char *rec)
+{
+ struct database *db = sdb->database;
+ xmlDoc *rdoc = 0;
+
+ rdoc = xmlParseMemory(rec, strlen(rec));
+
+ if (!rdoc)
+ {
+ yaz_log(YLOG_FATAL, "Non-wellformed XML received from %s",
+ db->url);
+ return 0;
+ }
+
+ if (global_parameters.dump_records)
+ {
+ yaz_log(YLOG_LOG, "Un-normalized record from %s", db->url);
+ log_xml_doc(rdoc);
+ }
+
+ return rdoc;
+}
+
+#define MAX_XSLT_ARGS 16
+
+// Add static values from session database settings if applicable
+static void insert_settings_parameters(struct session_database *sdb,
+ struct conf_service *service,
+ char **parms,
+ NMEM nmem)
+{
+ int i;
+ int nparms = 0;
+ int offset = 0;
+
+ for (i = 0; i < service->num_metadata; i++)
+ {
+ struct conf_metadata *md = &service->metadata[i];
+ int setting;
+
+ if (md->setting == Metadata_setting_parameter &&
+ (setting = settings_lookup_offset(service, md->name)) >= 0)
+ {
+ const char *val = session_setting_oneval(sdb, setting);
+ if (val && nparms < MAX_XSLT_ARGS)
+ {
+ char *buf;
+ int len = strlen(val);
+ buf = nmem_malloc(nmem, len + 3);
+ buf[0] = '\'';
+ strcpy(buf + 1, val);
+ buf[len+1] = '\'';
+ buf[len+2] = '\0';
+ parms[offset++] = md->name;
+ parms[offset++] = buf;
+ nparms++;
+ }
+ }
+ }
+ parms[offset] = 0;
+}
+
+// Add static values from session database settings if applicable
+static void insert_settings_values(struct session_database *sdb, xmlDoc *doc,
+ struct conf_service *service)
+{
+ int i;
+
+ for (i = 0; i < service->num_metadata; i++)
+ {
+ struct conf_metadata *md = &service->metadata[i];
+ int offset;
+
+ if (md->setting == Metadata_setting_postproc &&
+ (offset = settings_lookup_offset(service, md->name)) >= 0)
+ {
+ const char *val = session_setting_oneval(sdb, offset);
+ if (val)
+ {
+ xmlNode *r = xmlDocGetRootElement(doc);
+ xmlNode *n = xmlNewTextChild(r, 0, (xmlChar *) "metadata",
+ (xmlChar *) val);
+ xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name);
+ }
+ }
+ }
+}
+
+static xmlDoc *normalize_record(struct session_database *sdb,
+ struct conf_service *service,
+ const char *rec, NMEM nmem)
+{
+ xmlDoc *rdoc = record_to_xml(sdb, rec);
+
+ if (rdoc)
+ {
+ char *parms[MAX_XSLT_ARGS*2+1];
+
+ insert_settings_parameters(sdb, service, parms, nmem);
+
+ if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms))
+ {
+ yaz_log(YLOG_WARN, "Normalize failed from %s", sdb->database->url);
+ }
+ else
+ {
+ insert_settings_values(sdb, rdoc, service);
+
+ if (global_parameters.dump_records)
+ {
+ yaz_log(YLOG_LOG, "Normalized record from %s",
+ sdb->database->url);
+ log_xml_doc(rdoc);
+ }
+ }
+ }
+ return rdoc;
+}
+
+void session_settings_dump(struct session *se,
+ struct session_database *db,
+ WRBUF w)
+{
+ if (db->settings)
+ {
+ int i, num = db->num_settings;
+ for (i = 0; i < num; i++)
+ {
+ struct setting *s = db->settings[i];
+ for (;s ; s = s->next)
+ {
+ wrbuf_puts(w, "<set name=\"");
+ wrbuf_xmlputs(w, s->name);
+ wrbuf_puts(w, "\" value=\"");
+ wrbuf_xmlputs(w, s->value);
+ wrbuf_puts(w, "\"/>");
+ }
+ if (db->settings[i])
+ wrbuf_puts(w, "\n");
+ }
+ }
+}
+
+// Retrieve first defined value for 'name' for given database.
+// Will be extended to take into account user associated with session
+const char *session_setting_oneval(struct session_database *db, int offset)
+{
+ if (offset >= db->num_settings || !db->settings[offset])
+ return "";
+ return db->settings[offset]->value;
+}
+
+// Prepare XSLT stylesheets for record normalization
+// Structures are allocated on the session_wide nmem to avoid having
+// to recompute this for every search. This would lead
+// to leaking if a single session was to repeatedly change the PZ_XSLT
+// setting. However, this is not a realistic use scenario.
+static int prepare_map(struct session *se, struct session_database *sdb)
+{
+ const char *s;
+
+ if (!sdb->settings)
+ {
+ yaz_log(YLOG_WARN, "No settings on %s", sdb->database->url);
+ return -1;
+ }
+ if ((s = session_setting_oneval(sdb, PZ_XSLT)))
+ {
+ char auto_stylesheet[256];
+
+ if (!strcmp(s, "auto"))
+ {
+ const char *request_syntax = session_setting_oneval(
+ sdb, PZ_REQUESTSYNTAX);
+ if (request_syntax)
+ {
+ char *cp;
+ yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet),
+ "%s.xsl", request_syntax);
+ for (cp = auto_stylesheet; *cp; cp++)
+ {
+ /* deliberately only consider ASCII */
+ if (*cp > 32 && *cp < 127)
+ *cp = tolower(*cp);
+ }
+ s = auto_stylesheet;
+ }
+ else
+ {
+ yaz_log(YLOG_WARN, "No pz:requestsyntax for auto stylesheet");
+ }
+ }
+ sdb->map = normalize_cache_get(se->normalize_cache,
+ se->service, s);
+ if (!sdb->map)
+ return -1;
+ }
+ return 0;
+}
+
+// This analyzes settings and recomputes any supporting data structures
+// if necessary.
+static int prepare_session_database(struct session *se,
+ struct session_database *sdb)
+{
+ if (!sdb->settings)
+ {
+ yaz_log(YLOG_WARN,
+ "No settings associated with %s", sdb->database->url);
+ return -1;
+ }
+ if (sdb->settings[PZ_XSLT] && !sdb->map)
+ {
+ if (prepare_map(se, sdb) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+// called if watch should be removed because http_channel is to be destroyed
+static void session_watch_cancel(void *data, struct http_channel *c,
+ void *data2)
+{
+ struct session_watchentry *ent = data;
+
+ ent->fun = 0;
+ ent->data = 0;
+ ent->obs = 0;
+}
+
+// set watch. Returns 0=OK, -1 if watch is already set
+int session_set_watch(struct session *s, int what,
+ session_watchfun fun, void *data,
+ struct http_channel *chan)
+{
+ if (s->watchlist[what].fun)
+ return -1;
+ s->watchlist[what].fun = fun;
+ s->watchlist[what].data = data;
+ s->watchlist[what].obs = http_add_observer(chan, &s->watchlist[what],
+ session_watch_cancel);
+ return 0;
+}
+
+void session_alert_watch(struct session *s, int what)
+{
+ if (s->watchlist[what].fun)
+ {
+ /* our watch is no longer associated with http_channel */
+ void *data;
+ session_watchfun fun;
+
+ http_remove_observer(s->watchlist[what].obs);
+ fun = s->watchlist[what].fun;
+ data = s->watchlist[what].data;
+
+ /* reset watch before fun is invoked - in case fun wants to set
+ it again */
+ s->watchlist[what].fun = 0;
+ s->watchlist[what].data = 0;
+ s->watchlist[what].obs = 0;
+
+ fun(data);
+ }
+}
+
+//callback for grep_databases
+static void select_targets_callback(void *context, struct session_database *db)
+{
+ struct session *se = (struct session*) context;
+ struct client *cl = client_create();
+ client_set_database(cl, db);
+ client_set_session(cl, se);
+}
+
+// Associates a set of clients with a session;
+// Note: Session-databases represent databases with per-session
+// setting overrides
+static int select_targets(struct session *se, const char *filter)
+{
+ while (se->clients)
+ client_destroy(se->clients);
+
+ return session_grep_databases(se, filter, select_targets_callback);
+}
+
+int session_active_clients(struct session *s)
+{
+ struct client *c;
+ int res = 0;
+
+ for (c = s->clients; c; c = client_next_in_session(c))
+ if (client_is_active(c))
+ res++;
+
+ return res;
+}
+
+
+enum pazpar2_error_code search(struct session *se,
+ const char *query,
+ const char *startrecs, const char *maxrecs,
+ const char *filter,
+ const char **addinfo)
+{
+ int live_channels = 0;
+ int no_working = 0;
+ int no_failed = 0;
+ struct client *cl;
+
+ yaz_log(YLOG_DEBUG, "Search");
+
+ *addinfo = 0;
+
+ session_enter(se);
+ nmem_reset(se->nmem);
+ se->relevance = 0;
+ se->total_records = se->total_hits = se->total_merged = 0;
+ reclist_destroy(se->reclist);
+ se->reclist = 0;
+ se->num_termlists = 0;
+ live_channels = select_targets(se, filter);
+ if (!live_channels)
+ {
+ session_leave(se);
+ return PAZPAR2_NO_TARGETS;
+ }
+ se->reclist = reclist_create(se->nmem);
+
+ for (cl = se->clients; cl; cl = client_next_in_session(cl))
+ {
+ if (maxrecs)
+ client_set_maxrecs(cl, atoi(maxrecs));
+ if (startrecs)
+ client_set_startrecs(cl, atoi(startrecs));
+ if (prepare_session_database(se, client_get_database(cl)) < 0)
+ continue;
+ // Parse query for target
+ if (client_parse_query(cl, query) < 0)
+ no_failed++;
+ else
+ {
+ no_working++;
+ if (client_prep_connection(cl, se->service->z3950_operation_timeout,
+ se->service->z3950_session_timeout,
+ se->service->server->iochan_man))
+ client_start_search(cl);
+ }
+ }
+ session_leave(se);
+ if (no_working == 0)
+ {
+ if (no_failed > 0)
+ {
+ *addinfo = "query";
+ return PAZPAR2_MALFORMED_PARAMETER_VALUE;
+ }
+ else
+ return PAZPAR2_NO_TARGETS;
+ }
+ return PAZPAR2_NO_ERROR;
+}
+
+// Creates a new session_database object for a database
+static void session_init_databases_fun(void *context, struct database *db)
+{
+ struct session *se = (struct session *) context;
+ struct session_database *new = nmem_malloc(se->session_nmem, sizeof(*new));
+ int i;
+
+ new->database = db;
+
+ new->map = 0;
+ assert(db->settings);
+ new->settings = nmem_malloc(se->session_nmem,
+ sizeof(struct settings *) * db->num_settings);
+ new->num_settings = db->num_settings;
+ for (i = 0; i < db->num_settings; i++)
+ {
+ struct setting *setting = db->settings[i];
+ new->settings[i] = setting;
+ }
+ new->next = se->databases;
+ se->databases = new;
+}
+
+// Doesn't free memory associated with sdb -- nmem takes care of that
+static void session_database_destroy(struct session_database *sdb)
+{
+ sdb->map = 0;
+}
+
+// Initialize session_database list -- this represents this session's view
+// of the database list -- subject to modification by the settings ws command
+void session_init_databases(struct session *se)
+{
+ se->databases = 0;
+ predef_grep_databases(se, se->service, session_init_databases_fun);
+}
+
+// Probably session_init_databases_fun should be refactored instead of
+// called here.
+static struct session_database *load_session_database(struct session *se,
+ char *id)
+{
+ struct database *db = new_database(id, se->session_nmem);
+
+ resolve_database(se->service, db);
+
+ session_init_databases_fun((void*) se, db);
+
+ // New sdb is head of se->databases list
+ return se->databases;
+}
+
+// Find an existing session database. If not found, load it
+static struct session_database *find_session_database(struct session *se,
+ char *id)
+{
+ struct session_database *sdb;
+
+ for (sdb = se->databases; sdb; sdb = sdb->next)
+ if (!strcmp(sdb->database->url, id))
+ return sdb;
+ return load_session_database(se, id);
+}
+
+// Apply a session override to a database
+void session_apply_setting(struct session *se, char *dbname, char *setting,
+ char *value)
+{
+ struct session_database *sdb = find_session_database(se, dbname);
+ struct conf_service *service = se->service;
+ struct setting *new = nmem_malloc(se->session_nmem, sizeof(*new));
+ int offset = settings_create_offset(service, setting);
+
+ expand_settings_array(&sdb->settings, &sdb->num_settings, offset,
+ se->session_nmem);
+ new->precedence = 0;
+ new->target = dbname;
+ new->name = setting;
+ new->value = value;
+ new->next = sdb->settings[offset];
+ sdb->settings[offset] = new;
+
+ // Force later recompute of settings-driven data structures
+ // (happens when a search starts and client connections are prepared)
+ switch (offset)
+ {
+ case PZ_XSLT:
+ if (sdb->map)
+ {
+ sdb->map = 0;
+ }
+ break;
+ }
+}
+
+void destroy_session(struct session *s)
+{
+ struct session_database *sdb;
+
+ while (s->clients)
+ client_destroy(s->clients);
+ for (sdb = s->databases; sdb; sdb = sdb->next)
+ session_database_destroy(sdb);
+ normalize_cache_destroy(s->normalize_cache);
+ reclist_destroy(s->reclist);
+ nmem_destroy(s->nmem);
+ service_destroy(s->service);
+ yaz_mutex_destroy(&s->mutex);
+ wrbuf_destroy(s->wrbuf);
+}
+
+struct session *new_session(NMEM nmem, struct conf_service *service)
+{
+ int i;
+ struct session *session = nmem_malloc(nmem, sizeof(*session));
+
+ yaz_log(YLOG_DEBUG, "New Pazpar2 session");
+
+ session->service = service;
+ session->relevance = 0;
+ session->total_hits = 0;
+ session->total_records = 0;
+ session->number_of_warnings_unknown_elements = 0;
+ session->number_of_warnings_unknown_metadata = 0;
+ session->num_termlists = 0;
+ session->reclist = 0;
+ session->clients = 0;
+ session->session_nmem = nmem;
+ session->nmem = nmem_create();
+ session->wrbuf = wrbuf_alloc();
+ session->databases = 0;
+ for (i = 0; i <= SESSION_WATCH_MAX; i++)
+ {
+ session->watchlist[i].data = 0;
+ session->watchlist[i].fun = 0;
+ }
+ session->normalize_cache = normalize_cache_create();
+ session->mutex = 0;
+ yaz_mutex_create(&session->mutex);
+
+ return session;
+}
+
+struct hitsbytarget *hitsbytarget(struct session *se, int *count, NMEM nmem)
+{
+ struct hitsbytarget *res = 0;
+ struct client *cl;
+ size_t sz = 0;
+
+ session_enter(se);
+ for (cl = se->clients; cl; cl = client_next_in_session(cl))
+ sz++;
+
+ res = nmem_malloc(nmem, sizeof(*res) * sz);
+ *count = 0;
+ for (cl = se->clients; cl; cl = client_next_in_session(cl))
+ {
+ WRBUF w = wrbuf_alloc();
+ const char *name = session_setting_oneval(client_get_database(cl),
+ PZ_NAME);
+
+ res[*count].id = client_get_database(cl)->database->url;
+ res[*count].name = *name ? name : "Unknown";
+ res[*count].hits = client_get_hits(cl);
+ res[*count].records = client_get_num_records(cl);
+ res[*count].diagnostic = client_get_diagnostic(cl);
+ res[*count].state = client_get_state_str(cl);
+ res[*count].connected = client_get_connection(cl) ? 1 : 0;
+ session_settings_dump(se, client_get_database(cl), w);
+ res[*count].settings_xml = w;
+ (*count)++;
+ }
+ session_leave(se);
+ return res;
+}
+
+struct termlist_score **termlist(struct session *s, const char *name, int *num)
+{
+ int i;
+ struct termlist_score **tl = 0;
+
+ session_enter(s);
+ for (i = 0; i < s->num_termlists; i++)
+ if (!strcmp((const char *) s->termlists[i].name, name))
+ {
+ tl = termlist_highscore(s->termlists[i].termlist, num);
+ break;
+ }
+ session_leave(s);
+ return tl;
+}
+
+#ifdef MISSING_HEADERS
+void report_nmem_stats(void)
+{
+ size_t in_use, is_free;
+
+ nmem_get_memory_in_use(&in_use);
+ nmem_get_memory_free(&is_free);
+
+ yaz_log(YLOG_LOG, "nmem stat: use=%ld free=%ld",
+ (long) in_use, (long) is_free);
+}
+#endif
+
+struct record_cluster *show_single_start(struct session *s, const char *id,
+ struct record_cluster **prev_r,
+ struct record_cluster **next_r)
+{
+ struct record_cluster *r;
+
+ session_enter(s);
+ reclist_enter(s->reclist);
+ *prev_r = 0;
+ *next_r = 0;
+ while ((r = reclist_read_record(s->reclist)))
+ {
+ if (!strcmp(r->recid, id))
+ {
+ *next_r = reclist_read_record(s->reclist);
+ break;
+ }
+ *prev_r = r;
+ }
+ reclist_leave(s->reclist);
+ if (!r)
+ session_leave(s);
+ return r;
+}
+
+void show_single_stop(struct session *s, struct record_cluster *rec)
+{
+ session_leave(s);
+}
+
+struct record_cluster **show_range_start(struct session *s,
+ struct reclist_sortparms *sp,
+ int start, int *num, int *total, Odr_int *sumhits)
+{
+ struct record_cluster **recs = nmem_malloc(s->nmem, *num
+ * sizeof(struct record_cluster *));
+ struct reclist_sortparms *spp;
+ int i;
+#if USE_TIMING
+ yaz_timing_t t = yaz_timing_create();
+#endif
+
+ session_enter(s);
+ if (!s->relevance)
+ {
+ *num = 0;
+ *total = 0;
+ *sumhits = 0;
+ recs = 0;
+ }
+ else
+ {
+ for (spp = sp; spp; spp = spp->next)
+ if (spp->type == Metadata_sortkey_relevance)
+ {
+ relevance_prepare_read(s->relevance, s->reclist);
+ break;
+ }
+ reclist_sort(s->reclist, sp);
+
+ reclist_enter(s->reclist);
+ *total = reclist_get_num_records(s->reclist);
+ *sumhits = s->total_hits;
+
+ for (i = 0; i < start; i++)
+ if (!reclist_read_record(s->reclist))
+ {
+ *num = 0;
+ recs = 0;
+ break;
+ }
+
+ for (i = 0; i < *num; i++)
+ {
+ struct record_cluster *r = reclist_read_record(s->reclist);
+ if (!r)
+ {
+ *num = i;
+ break;
+ }
+ recs[i] = r;
+ }
+ reclist_leave(s->reclist);
+ }
+#if USE_TIMING
+ yaz_timing_stop(t);
+ yaz_log(YLOG_LOG, "show %6.5f %3.2f %3.2f",
+ yaz_timing_get_real(t), yaz_timing_get_user(t),
+ yaz_timing_get_sys(t));
+ yaz_timing_destroy(&t);
+#endif
+ return recs;
+}
+
+void show_range_stop(struct session *s, struct record_cluster **recs)
+{
+ session_leave(s);
+}
+
+void statistics(struct session *se, struct statistics *stat)
+{
+ struct client *cl;
+ int count = 0;
+
+ memset(stat, 0, sizeof(*stat));
+ for (cl = se->clients; cl; cl = client_next_in_session(cl))
+ {
+ if (!client_get_connection(cl))
+ stat->num_no_connection++;
+ switch (client_get_state(cl))
+ {
+ case Client_Connecting: stat->num_connecting++; break;
+ case Client_Working: stat->num_working++; break;
+ case Client_Idle: stat->num_idle++; break;
+ case Client_Failed: stat->num_failed++; break;
+ case Client_Error: stat->num_error++; break;
+ default: break;
+ }
+ count++;
+ }
+ stat->num_hits = se->total_hits;
+ stat->num_records = se->total_records;
+
+ stat->num_clients = count;
+}
+
+static struct record_metadata *record_metadata_init(
+ NMEM nmem, const char *value, enum conf_metadata_type type,
+ struct _xmlAttr *attr)
+{
+ struct record_metadata *rec_md = record_metadata_create(nmem);
+ struct record_metadata_attr **attrp = &rec_md->attributes;
+
+ for (; attr; attr = attr->next)
+ {
+ if (attr->children && attr->children->content)
+ {
+ if (strcmp((const char *) attr->name, "type"))
+ { /* skip the "type" attribute.. Its value is already part of
+ the element in output (md-%s) and so repeating it here
+ is redundant */
+ *attrp = nmem_malloc(nmem, sizeof(**attrp));
+ (*attrp)->name =
+ nmem_strdup(nmem, (const char *) attr->name);
+ (*attrp)->value =
+ nmem_strdup(nmem, (const char *) attr->children->content);
+ attrp = &(*attrp)->next;
+ }
+ }
+ }
+ *attrp = 0;
+
+ if (type == Metadata_type_generic)
+ {
+ char *p = nmem_strdup(nmem, value);
+
+ p = normalize7bit_generic(p, " ,/.:([");
+
+ rec_md->data.text.disp = p;
+ rec_md->data.text.sort = 0;
+ }
+ else if (type == Metadata_type_year || type == Metadata_type_date)
+ {
+ int first, last;
+ int longdate = 0;
+
+ if (type == Metadata_type_date)
+ longdate = 1;
+ if (extract7bit_dates((char *) value, &first, &last, longdate) < 0)
+ return 0;
+
+ rec_md->data.number.min = first;
+ rec_md->data.number.max = last;
+ }
+ else
+ return 0;
+ return rec_md;
+}
+
+static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
+ struct conf_service *service, WRBUF norm_wr)
+{
+ xmlNode *n;
+ int no_found = 0;
+ for (n = root->children; n; n = n->next)
+ {
+ if (n->type != XML_ELEMENT_NODE)
+ continue;
+ if (!strcmp((const char *) n->name, "metadata"))
+ {
+ xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
+ if (!strcmp(name, (const char *) type))
+ {
+ xmlChar *value = xmlNodeListGetString(doc, n->children, 1);
+ if (value)
+ {
+ const char *norm_str;
+ pp2_relevance_token_t prt =
+ pp2_relevance_tokenize(
+ service->mergekey_pct,
+ (const char *) value, 0);
+
+ if (wrbuf_len(norm_wr) > 0)
+ wrbuf_puts(norm_wr, " ");
+ wrbuf_puts(norm_wr, name);
+ while ((norm_str =
+ pp2_relevance_token_next(prt)))
+ {
+ if (*norm_str)
+ {
+ wrbuf_puts(norm_wr, " ");
+ wrbuf_puts(norm_wr, norm_str);
+ }
+ }
+ xmlFree(value);
+ pp2_relevance_token_destroy(prt);
+ no_found++;
+ }
+ }
+ xmlFree(type);
+ }
+ }
+ return no_found;
+}
+
+static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
+ struct conf_service *service, NMEM nmem)
+{
+ char *mergekey_norm = 0;
+ xmlNode *root = xmlDocGetRootElement(doc);
+ WRBUF norm_wr = wrbuf_alloc();
+
+ /* consider mergekey from XSL first */
+ xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey");
+ if (mergekey)
+ {
+ const char *norm_str;
+ pp2_relevance_token_t prt =
+ pp2_relevance_tokenize(
+ service->mergekey_pct,
+ (const char *) mergekey, 0);
+
+ while ((norm_str = pp2_relevance_token_next(prt)))
+ {
+ if (*norm_str)
+ {
+ if (wrbuf_len(norm_wr))
+ wrbuf_puts(norm_wr, " ");
+ wrbuf_puts(norm_wr, norm_str);
+ }
+ }
+ pp2_relevance_token_destroy(prt);
+ xmlFree(mergekey);
+ }
+ else
+ {
+ /* no mergekey defined in XSL. Look for mergekey metadata instead */
+ int field_id;
+ for (field_id = 0; field_id < service->num_metadata; field_id++)
+ {
+ struct conf_metadata *ser_md = &service->metadata[field_id];
+ if (ser_md->mergekey != Metadata_mergekey_no)
+ {
+ int r = get_mergekey_from_doc(doc, root, ser_md->name,
+ service, norm_wr);
+ if (r == 0 && ser_md->mergekey == Metadata_mergekey_required)
+ {
+ /* no mergekey on this one and it is required..
+ Generate unique key instead */
+ wrbuf_rewind(norm_wr);
+ break;
+ }
+ }
+ }
+ }
+
+ /* generate unique key if none is not generated already or is empty */
+ if (wrbuf_len(norm_wr) == 0)
+ {
+ wrbuf_printf(norm_wr, "%s-%d",
+ client_get_database(cl)->database->url, record_no);
+ }
+ if (wrbuf_len(norm_wr) > 0)
+ mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr));
+ wrbuf_destroy(norm_wr);
+ return mergekey_norm;
+}
+
+/** \brief see if metadata for pz:recordfilter exists
+ \param root xml root element of normalized record
+ \param sdb session database for client
+ \retval 0 if there is no metadata for pz:recordfilter
+ \retval 1 if there is metadata for pz:recordfilter
+
+ If there is no pz:recordfilter defined, this function returns 1
+ as well.
+*/
+
+static int check_record_filter(xmlNode *root, struct session_database *sdb)
+{
+ int match = 0;
+ xmlNode *n;
+ const char *s;
+ s = session_setting_oneval(sdb, PZ_RECORDFILTER);
+
+ if (!s || !*s)
+ return 1;
+
+ for (n = root->children; n; n = n->next)
+ {
+ if (n->type != XML_ELEMENT_NODE)
+ continue;
+ if (!strcmp((const char *) n->name, "metadata"))
+ {
+ xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
+ if (type)
+ {
+ size_t len;
+ const char *eq = strchr(s, '~');
+ if (eq)
+ len = eq - s;
+ else
+ len = strlen(s);
+ if (len == strlen((const char *)type) &&
+ !memcmp((const char *) type, s, len))
+ {
+ xmlChar *value = xmlNodeGetContent(n);
+ if (value && *value)
+ {
+ if (!eq || strstr((const char *) value, eq+1))
+ match = 1;
+ }
+ xmlFree(value);
+ }
+ xmlFree(type);
+ }
+ }
+ }
+ return match;
+}
+
+
+static int ingest_to_cluster(struct client *cl,
+ xmlDoc *xdoc,
+ xmlNode *root,
+ int record_no,
+ const char *mergekey_norm);
+
+/** \brief ingest XML record
+ \param cl client holds the result set for record
+ \param rec record buffer (0 terminated)
+ \param record_no record position (1, 2, ..)
+ \retval 0 OK
+ \retval -1 failure
+*/
+int ingest_record(struct client *cl, const char *rec,
+ int record_no, NMEM nmem)
+{
+ struct session_database *sdb = client_get_database(cl);
+ struct session *se = client_get_session(cl);
+ struct conf_service *service = se->service;
+ xmlDoc *xdoc = normalize_record(sdb, service, rec, nmem);
+ xmlNode *root;
+ const char *mergekey_norm;
+ int ret;
+
+ if (!xdoc)
+ return -1;
+
+ root = xmlDocGetRootElement(xdoc);
+
+ if (!check_record_filter(root, sdb))
+ {
+ yaz_log(YLOG_WARN, "Filtered out record no %d from %s", record_no,
+ sdb->database->url);
+ xmlFreeDoc(xdoc);
+ return -1;
+ }
+
+ mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem);
+ if (!mergekey_norm)
+ {
+ yaz_log(YLOG_WARN, "Got no mergekey");
+ xmlFreeDoc(xdoc);
+ return -1;
+ }
+ session_enter(se);
+ ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm);
+ session_leave(se);
+
+ xmlFreeDoc(xdoc);
+
+ return ret;
+}
+
+static int ingest_to_cluster(struct client *cl,
+ xmlDoc *xdoc,
+ xmlNode *root,
+ int record_no,
+ const char *mergekey_norm)
+{
+ xmlNode *n;
+ xmlChar *type = 0;
+ xmlChar *value = 0;
+ struct session_database *sdb = client_get_database(cl);
+ struct session *se = client_get_session(cl);
+ struct conf_service *service = se->service;
+ struct record *record = record_create(se->nmem,
+ service->num_metadata,
+ service->num_sortkeys, cl,
+ record_no);
+ struct record_cluster *cluster = reclist_insert(se->reclist,
+ service,
+ record,
+ mergekey_norm,
+ &se->total_merged);
+ if (!cluster)
+ return -1;
+ if (global_parameters.dump_records)
+ yaz_log(YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
+ sdb->database->url, record_no);
+ relevance_newrec(se->relevance, cluster);
+
+ // now parsing XML record and adding data to cluster or record metadata
+ for (n = root->children; n; n = n->next)
+ {
+ pp2_relevance_token_t prt;
+ if (type)
+ xmlFree(type);
+ if (value)
+ xmlFree(value);
+ type = value = 0;
+
+ if (n->type != XML_ELEMENT_NODE)
+ continue;
+ if (!strcmp((const char *) n->name, "metadata"))
+ {
+ struct conf_metadata *ser_md = 0;
+ struct conf_sortkey *ser_sk = 0;
+ struct record_metadata **wheretoput = 0;
+ struct record_metadata *rec_md = 0;
+ int md_field_id = -1;
+ int sk_field_id = -1;
+
+ type = xmlGetProp(n, (xmlChar *) "type");
+ value = xmlNodeListGetString(xdoc, n->children, 1);
+
+ if (!type || !value || !*value)
+ continue;
+
+ md_field_id
+ = conf_service_metadata_field_id(service, (const char *) type);
+ if (md_field_id < 0)
+ {
+ if (se->number_of_warnings_unknown_metadata == 0)
+ {
+ yaz_log(YLOG_WARN,
+ "Ignoring unknown metadata element: %s", type);
+ }
+ se->number_of_warnings_unknown_metadata++;
+ continue;
+ }
+
+ ser_md = &service->metadata[md_field_id];
+
+ if (ser_md->sortkey_offset >= 0){
+ sk_field_id = ser_md->sortkey_offset;
+ ser_sk = &service->sortkeys[sk_field_id];
+ }
+
+ // non-merged metadata
+ rec_md = record_metadata_init(se->nmem, (const char *) value,
+ ser_md->type, n->properties);
+ if (!rec_md)
+ {
+ yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'",
+ value, type);
+ continue;
+ }
+ wheretoput = &record->metadata[md_field_id];
+ while (*wheretoput)
+ wheretoput = &(*wheretoput)->next;
+ *wheretoput = rec_md;
+
+ // merged metadata
+ rec_md = record_metadata_init(se->nmem, (const char *) value,
+ ser_md->type, 0);
+ wheretoput = &cluster->metadata[md_field_id];
+
+ // and polulate with data:
+ // assign cluster or record based on merge action
+ if (ser_md->merge == Metadata_merge_unique)
+ {
+ struct record_metadata *mnode;
+ for (mnode = *wheretoput; mnode; mnode = mnode->next)
+ if (!strcmp((const char *) mnode->data.text.disp,
+ rec_md->data.text.disp))
+ break;
+ if (!mnode)
+ {
+ rec_md->next = *wheretoput;
+ *wheretoput = rec_md;
+ }
+ }
+ else if (ser_md->merge == Metadata_merge_longest)
+ {
+ if (!*wheretoput
+ || strlen(rec_md->data.text.disp)
+ > strlen((*wheretoput)->data.text.disp))
+ {
+ *wheretoput = rec_md;
+ if (ser_sk)
+ {
+ const char *sort_str = 0;
+ int skip_article =
+ ser_sk->type == Metadata_sortkey_skiparticle;
+
+ if (!cluster->sortkeys[sk_field_id])
+ cluster->sortkeys[sk_field_id] =
+ nmem_malloc(se->nmem,
+ sizeof(union data_types));
+
+ prt = pp2_relevance_tokenize(
+ service->sort_pct,
+ rec_md->data.text.disp, skip_article);
+
+ pp2_relevance_token_next(prt);
+
+ sort_str = pp2_get_sort(prt);
+
+ cluster->sortkeys[sk_field_id]->text.disp =
+ rec_md->data.text.disp;
+ if (!sort_str)
+ {
+ sort_str = rec_md->data.text.disp;
+ yaz_log(YLOG_WARN,
+ "Could not make sortkey. Bug #1858");
+ }
+ cluster->sortkeys[sk_field_id]->text.sort =
+ nmem_strdup(se->nmem, sort_str);
+#if 0
+ yaz_log(YLOG_LOG, "text disp=%s",
+ cluster->sortkeys[sk_field_id]->text.disp);
+ yaz_log(YLOG_LOG, "text sort=%s",
+ cluster->sortkeys[sk_field_id]->text.sort);
+#endif
+ pp2_relevance_token_destroy(prt);
+ }
+ }
+ }
+ else if (ser_md->merge == Metadata_merge_all)
+ {
+ rec_md->next = *wheretoput;
+ *wheretoput = rec_md;
+ }
+ else if (ser_md->merge == Metadata_merge_range)
+ {
+ if (!*wheretoput)
+ {
+ *wheretoput = rec_md;
+ if (ser_sk)
+ cluster->sortkeys[sk_field_id]
+ = &rec_md->data;
+ }
+ else
+ {
+ int this_min = rec_md->data.number.min;
+ int this_max = rec_md->data.number.max;
+ if (this_min < (*wheretoput)->data.number.min)
+ (*wheretoput)->data.number.min = this_min;
+ if (this_max > (*wheretoput)->data.number.max)
+ (*wheretoput)->data.number.max = this_max;
+ }
+ }
+
+
+ // ranking of _all_ fields enabled ...
+ if (ser_md->rank)
+ relevance_countwords(se->relevance, cluster,
+ (char *) value, ser_md->rank,
+ ser_md->name);
+
+ // construct facets ...
+ if (ser_md->termlist)
+ {
+ if (ser_md->type == Metadata_type_year)
+ {
+ char year[64];
+ sprintf(year, "%d", rec_md->data.number.max);
+ add_facet(se, (char *) type, year);
+ if (rec_md->data.number.max != rec_md->data.number.min)
+ {
+ sprintf(year, "%d", rec_md->data.number.min);
+ add_facet(se, (char *) type, year);
+ }
+ }
+ else
+ add_facet(se, (char *) type, (char *) value);
+ }
+
+ // cleaning up
+ xmlFree(type);
+ xmlFree(value);
+ type = value = 0;
+ }
+ else
+ {
+ if (se->number_of_warnings_unknown_elements == 0)
+ yaz_log(YLOG_WARN,
+ "Unexpected element in internal record: %s", n->name);
+ se->number_of_warnings_unknown_elements++;
+ }
+ }
+ if (type)
+ xmlFree(type);
+ if (value)
+ xmlFree(value);
+
+ relevance_donerecord(se->relevance, cluster);
+ se->total_records++;
+
+ return 0;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of Pazpar2.
+ Copyright (C) 2006-2010 Index Data
+
+Pazpar2 is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#ifndef PAZPAR2_SESSION_H
+#define PAZPAR2_SESSION_H
+
+#include <yaz/comstack.h>
+#include <yaz/pquery.h>
+#include <yaz/ccl.h>
+#include <yaz/yaz-ccl.h>
+
+#include "termlists.h"
+#include "reclists.h"
+#include "http.h"
+
+struct record;
+struct client;
+
+
+enum pazpar2_error_code {
+ PAZPAR2_NO_ERROR = 0,
+
+ PAZPAR2_NO_SESSION,
+ PAZPAR2_MISSING_PARAMETER,
+ PAZPAR2_MALFORMED_PARAMETER_VALUE,
+ PAZPAR2_MALFORMED_PARAMETER_ENCODING,
+ PAZPAR2_MALFORMED_SETTING,
+ PAZPAR2_HITCOUNTS_FAILED,
+ PAZPAR2_RECORD_MISSING,
+ PAZPAR2_NO_TARGETS,
+ PAZPAR2_CONFIG_TARGET,
+ PAZPAR2_RECORD_FAIL,
+ PAZPAR2_NOT_IMPLEMENTED,
+ PAZPAR2_NO_SERVICE,
+
+ PAZPAR2_LAST_ERROR
+};
+
+struct host;
+// Represents a (virtual) database on a host
+struct database {
+ struct host *host;
+ char *url;
+ char **databases;
+ int errors;
+ struct zr_explain *explain;
+ int num_settings;
+ struct setting **settings;
+ struct database *next;
+};
+
+
+// Represents a database as viewed from one session, possibly with settings overriden
+// for that session
+struct session_database
+{
+ struct database *database;
+ int num_settings;
+ struct setting **settings;
+ normalize_record_t map;
+ struct session_database *next;
+};
+
+#define SESSION_WATCH_SHOW 0
+#define SESSION_WATCH_RECORD 1
+#define SESSION_WATCH_MAX 1
+
+#define SESSION_MAX_TERMLISTS 10
+
+typedef void (*session_watchfun)(void *data);
+
+struct named_termlist
+{
+ char *name;
+ struct termlist *termlist;
+};
+
+struct session_watchentry {
+ void *data;
+ http_channel_observer_t obs;
+ session_watchfun fun;
+};
+
+// End-user session
+struct session {
+ struct conf_service *service; /* service in use for this session */
+ struct session_database *databases; // All databases, settings overriden
+ struct client *clients; // Clients connected for current search
+ NMEM session_nmem; // Nmem for session-permanent storage
+ NMEM nmem; // Nmem for each operation (i.e. search, result set, etc)
+ WRBUF wrbuf; // Wrbuf for scratch(i.e. search)
+ int num_termlists;
+ struct named_termlist termlists[SESSION_MAX_TERMLISTS];
+ struct relevance *relevance;
+ struct reclist *reclist;
+ struct session_watchentry watchlist[SESSION_WATCH_MAX + 1];
+ Odr_int total_hits;
+ int total_records;
+ int total_merged;
+ int number_of_warnings_unknown_elements;
+ int number_of_warnings_unknown_metadata;
+ normalize_cache_t normalize_cache;
+ YAZ_MUTEX mutex;
+};
+
+struct statistics {
+ int num_clients;
+ int num_no_connection;
+ int num_connecting;
+ int num_working;
+ int num_idle;
+ int num_failed;
+ int num_error;
+ Odr_int num_hits;
+ int num_records;
+};
+
+struct hitsbytarget {
+ char *id;
+ const char *name;
+ Odr_int hits;
+ int diagnostic;
+ int records;
+ const char *state;
+ int connected;
+ WRBUF settings_xml;
+};
+
+struct hitsbytarget *hitsbytarget(struct session *s, int *count, NMEM nmem);
+struct session *new_session(NMEM nmem, struct conf_service *service);
+void destroy_session(struct session *s);
+void session_init_databases(struct session *s);
+int load_targets(struct session *s, const char *fn);
+void statistics(struct session *s, struct statistics *stat);
+enum pazpar2_error_code search(struct session *s, const char *query,
+ const char *startrecs, const char *maxrecs,
+ const char *filter, const char **addinfo);
+struct record_cluster **show_range_start(struct session *s,
+ struct reclist_sortparms *sp,
+ int start,
+ int *num, int *total, Odr_int *sumhits);
+void show_range_stop(struct session *s, struct record_cluster **recs);
+
+struct record_cluster *show_single_start(struct session *s, const char *id,
+ struct record_cluster **prev_r,
+ struct record_cluster **next_r);
+void show_single_stop(struct session *s, struct record_cluster *rec);
+struct termlist_score **termlist(struct session *s, const char *name, int *num);
+int session_set_watch(struct session *s, int what, session_watchfun fun, void *data, struct http_channel *c);
+int session_active_clients(struct session *s);
+void session_apply_setting(struct session *se, char *dbname, char *setting, char *value);
+const char *session_setting_oneval(struct session_database *db, int offset);
+
+int host_getaddrinfo(struct host *host, iochan_man_t iochan_man);
+
+int ingest_record(struct client *cl, const char *rec, int record_no, NMEM nmem);
+void session_alert_watch(struct session *s, int what);
+void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num);
+
+#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
#include <yaz/nmem.h>
#include <yaz/log.h>
-#include "pazpar2.h"
+#include "session.h"
#include "database.h"
#include "settings.h"
"$(OBJDIR)\http.obj" \
"$(OBJDIR)\eventl.obj" \
"$(OBJDIR)\http_command.obj" \
- "$(OBJDIR)\logic.obj" \
+ "$(OBJDIR)\session.obj" \
"$(OBJDIR)\record.obj" \
"$(OBJDIR)\reclists.obj" \
"$(OBJDIR)\relevance.obj" \