-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <signal.h>
-#include <ctype.h>
-#include <assert.h>
-
-#include <yaz/comstack.h>
-#include <yaz/tcpip.h>
-#include <yaz/proto.h>
-#include <yaz/readconf.h>
-#include <yaz/pquery.h>
-#include <yaz/yaz-util.h>
-#include <yaz/nmem.h>
-
-#include "pazpar2.h"
-#include "eventl.h"
-#include "command.h"
-#include "http.h"
-#include "termlists.h"
-#include "reclists.h"
-#include "relevance.h"
-
-#define PAZPAR2_VERSION "0.1"
-#define MAX_CHUNK 15
-
-static void client_fatal(struct client *cl);
-static void connection_destroy(struct connection *co);
-static int client_prep_connection(struct client *cl);
-static void ingest_records(struct client *cl, Z_Records *r);
-void session_alert_watch(struct session *s, int what);
-
-IOCHAN channel_list = 0; // Master list of connections we're handling events to
-
-static struct connection *connection_freelist = 0;
-static struct client *client_freelist = 0;
-
-static struct host *hosts = 0; // The hosts we know about
-static struct database *databases = 0; // The databases we know about
-
-static char *client_states[] = {
- "Client_Connecting",
- "Client_Connected",
- "Client_Idle",
- "Client_Initializing",
- "Client_Searching",
- "Client_Presenting",
- "Client_Error",
- "Client_Failed",
- "Client_Disconnected",
- "Client_Stopped"
-};
-
-struct parameters global_parameters =
-{
- 30,
- "81",
- "Index Data PazPar2 (MasterKey)",
- PAZPAR2_VERSION,
- 600, // 10 minutes
- 60,
- 100,
- MAX_CHUNK,
- 0,
- 0,
- 0,
- 0,
- 0
-};
-
-
-static int send_apdu(struct client *c, Z_APDU *a)
-{
- struct connection *co = c->connection;
- char *buf;
- int len, r;
-
- if (!z_APDU(global_parameters.odr_out, &a, 0, 0))
- {
- odr_perror(global_parameters.odr_out, "Encoding APDU");
- abort();
- }
- buf = odr_getbuf(global_parameters.odr_out, &len, 0);
- r = cs_put(co->link, buf, len);
- if (r < 0)
- {
- yaz_log(YLOG_WARN, "cs_put: %s", cs_errmsg(cs_errno(co->link)));
- return -1;
- }
- else if (r == 1)
- {
- fprintf(stderr, "cs_put incomplete (ParaZ does not handle that)\n");
- exit(1);
- }
- odr_reset(global_parameters.odr_out); /* release the APDU structure */
- co->state = Conn_Waiting;
- return 0;
-}
-
-
-static void send_init(IOCHAN i)
-{
- struct connection *co = iochan_getdata(i);
- struct client *cl = co->client;
- Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_initRequest);
-
- a->u.initRequest->implementationId = global_parameters.implementationId;
- a->u.initRequest->implementationName = global_parameters.implementationName;
- a->u.initRequest->implementationVersion =
- global_parameters.implementationVersion;
- ODR_MASK_SET(a->u.initRequest->options, Z_Options_search);
- ODR_MASK_SET(a->u.initRequest->options, Z_Options_present);
- ODR_MASK_SET(a->u.initRequest->options, Z_Options_namedResultSets);
-
- ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_1);
- ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_2);
- ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_3);
- if (send_apdu(cl, a) >= 0)
- {
- iochan_setflags(i, EVENT_INPUT);
- cl->state = Client_Initializing;
- }
- else
- cl->state = Client_Error;
- odr_reset(global_parameters.odr_out);
-}
-
-static void send_search(IOCHAN i)
-{
- struct connection *co = iochan_getdata(i);
- struct client *cl = co->client;
- struct session *se = cl->session;
- struct database *db = cl->database;
- Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_searchRequest);
- int ndb, cerror, cpos;
- char **databaselist;
- Z_Query *zquery;
- struct ccl_rpn_node *cn;
- int ssub = 0, lslb = 100000, mspn = 10;
-
- yaz_log(YLOG_DEBUG, "Sending search");
-
- cn = ccl_find_str(global_parameters.ccl_filter, se->query, &cerror, &cpos);
- if (!cn)
- return;
- a->u.searchRequest->query = zquery = odr_malloc(global_parameters.odr_out,
- sizeof(Z_Query));
- zquery->which = Z_Query_type_1;
- zquery->u.type_1 = ccl_rpn_query(global_parameters.odr_out, cn);
- ccl_rpn_delete(cn);
-
- for (ndb = 0; db->databases[ndb]; ndb++)
- ;
- databaselist = odr_malloc(global_parameters.odr_out, sizeof(char*) * ndb);
- for (ndb = 0; db->databases[ndb]; ndb++)
- databaselist[ndb] = db->databases[ndb];
-
- a->u.presentRequest->preferredRecordSyntax =
- yaz_oidval_to_z3950oid(global_parameters.odr_out,
- CLASS_RECSYN, VAL_USMARC);
- a->u.searchRequest->smallSetUpperBound = &ssub;
- a->u.searchRequest->largeSetLowerBound = &lslb;
- a->u.searchRequest->mediumSetPresentNumber = &mspn;
- a->u.searchRequest->resultSetName = "Default";
- a->u.searchRequest->databaseNames = databaselist;
- a->u.searchRequest->num_databaseNames = ndb;
-
- if (send_apdu(cl, a) >= 0)
- {
- iochan_setflags(i, EVENT_INPUT);
- cl->state = Client_Searching;
- cl->requestid = se->requestid;
- }
- else
- cl->state = Client_Error;
-
- odr_reset(global_parameters.odr_out);
-}
-
-static void send_present(IOCHAN i)
-{
- struct connection *co = iochan_getdata(i);
- struct client *cl = co->client;
- Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_presentRequest);
- int toget;
- int start = cl->records + 1;
-
- toget = global_parameters.chunk;
- if (toget > cl->hits - cl->records)
- toget = cl->hits - cl->records;
-
- yaz_log(YLOG_DEBUG, "Trying to present %d records\n", toget);
-
- a->u.presentRequest->resultSetStartPoint = &start;
- a->u.presentRequest->numberOfRecordsRequested = &toget;
-
- a->u.presentRequest->resultSetId = "Default";
-
- a->u.presentRequest->preferredRecordSyntax =
- yaz_oidval_to_z3950oid(global_parameters.odr_out,
- CLASS_RECSYN, VAL_USMARC);
-
- if (send_apdu(cl, a) >= 0)
- {
- iochan_setflags(i, EVENT_INPUT);
- cl->state = Client_Presenting;
- }
- else
- cl->state = Client_Error;
- odr_reset(global_parameters.odr_out);
-}
-
-static void do_initResponse(IOCHAN i, Z_APDU *a)
-{
- struct connection *co = iochan_getdata(i);
- struct client *cl = co->client;
- Z_InitResponse *r = a->u.initResponse;
-
- yaz_log(YLOG_DEBUG, "Received init response");
-
- if (*r->result)
- {
- cl->state = Client_Idle;
- }
- else
- cl->state = Client_Failed; // FIXME need to do something to the connection
-}
-
-static void do_searchResponse(IOCHAN i, Z_APDU *a)
-{
- struct connection *co = iochan_getdata(i);
- struct client *cl = co->client;
- struct session *se = cl->session;
- Z_SearchResponse *r = a->u.searchResponse;
-
- yaz_log(YLOG_DEBUG, "Searchresponse (status=%d)", *r->searchStatus);
-
- if (*r->searchStatus)
- {
- cl->hits = *r->resultCount;
- se->total_hits += cl->hits;
- if (r->presentStatus && !*r->presentStatus && r->records)
- {
- yaz_log(YLOG_DEBUG, "Records in search response");
- cl->records += *r->numberOfRecordsReturned;
- ingest_records(cl, r->records);
- }
- cl->state = Client_Idle;
- }
- else
- { /*"FAILED"*/
- cl->hits = 0;
- cl->state = Client_Error;
- if (r->records) {
- Z_Records *recs = r->records;
- if (recs->which == Z_Records_NSD)
- {
- yaz_log(YLOG_WARN, "Non-surrogate diagnostic");
- cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition;
- cl->state = Client_Error;
- }
- }
- }
-}
-
-const char *find_field(const char *rec, const char *field)
-{
- char lbuf[5];
- char *line;
-
- lbuf[0] = '\n';
- strcpy(lbuf + 1, field);
-
- if ((line = strstr(rec, lbuf)))
- return ++line;
- else
- return 0;
-}
-
-const char *find_subfield(const char *field, char subfield)
-{
- const char *p = field;
-
- while (*p && *p != '\n')
- {
- while (*p != '\n' && *p != '\t')
- p++;
- if (*p == '\t' && *(++p) == subfield) {
- if (*(++p) == ' ')
- {
- while (isspace(*p))
- p++;
- return p;
- }
- }
- }
- return 0;
-}
-
-// Extract 245 $a $b 100 $a
-char *extract_title(struct session *s, const char *rec)
-{
- const char *field, *subfield;
- char *e, *ef;
- unsigned char *obuf, *p;
-
- wrbuf_rewind(s->wrbuf);
-
- if (!(field = find_field(rec, "245")))
- return 0;
- if (!(subfield = find_subfield(field, 'a')))
- return 0;
- ef = index(subfield, '\n');
- if ((e = index(subfield, '\t')) && e < ef)
- ef = e;
- if (ef)
- {
- wrbuf_write(s->wrbuf, subfield, ef - subfield);
- if ((subfield = find_subfield(field, 'b')))
- {
- ef = index(subfield, '\n');
- if ((e = index(subfield, '\t')) && e < ef)
- ef = e;
- if (ef)
- {
- wrbuf_putc(s->wrbuf, ' ');
- wrbuf_write(s->wrbuf, subfield, ef - subfield);
- }
- }
- }
- if ((field = find_field(rec, "100")))
- {
- if ((subfield = find_subfield(field, 'a')))
- {
- ef = index(subfield, '\n');
- if ((e = index(subfield, '\t')) && e < ef)
- ef = e;
- if (ef)
- {
- wrbuf_puts(s->wrbuf, ", by ");
- wrbuf_write(s->wrbuf, subfield, ef - subfield);
- }
- }
- }
- wrbuf_putc(s->wrbuf, '\0');
- obuf = (unsigned char*) nmem_strdup(s->nmem, wrbuf_buf(s->wrbuf));
- for (p = obuf; *p; p++)
- if (*p == '&' || *p == '<' || *p > 122 || *p < ' ')
- *p = ' ';
- return (char*) obuf;
-}
-
-// Extract 245 $a $b 100 $a
-char *extract_mergekey(struct session *s, const char *rec)
-{
- const char *field, *subfield;
- char *e, *ef;
- char *out, *p, *pout;
-
- wrbuf_rewind(s->wrbuf);
-
- if (!(field = find_field(rec, "245")))
- return 0;
- if (!(subfield = find_subfield(field, 'a')))
- return 0;
- ef = index(subfield, '\n');
- if ((e = index(subfield, '\t')) && e < ef)
- ef = e;
- if (ef)
- {
- wrbuf_write(s->wrbuf, subfield, ef - subfield);
- if ((subfield = find_subfield(field, 'b')))
- {
- ef = index(subfield, '\n');
- if ((e = index(subfield, '\t')) && e < ef)
- ef = e;
- if (ef)
- {
- wrbuf_puts(s->wrbuf, " field ");
- wrbuf_write(s->wrbuf, subfield, ef - subfield);
- }
- }
- }
- if ((field = find_field(rec, "100")))
- {
- if ((subfield = find_subfield(field, 'a')))
- {
- ef = index(subfield, '\n');
- if ((e = index(subfield, '\t')) && e < ef)
- ef = e;
- if (ef)
- {
- wrbuf_puts(s->wrbuf, " field ");
- wrbuf_write(s->wrbuf, subfield, ef - subfield);
- }
- }
- }
- wrbuf_putc(s->wrbuf, '\0');
- p = wrbuf_buf(s->wrbuf);
- out = pout = nmem_malloc(s->nmem, strlen(p) + 1);
-
- while (*p)
- {
- while (isalnum(*p))
- *(pout++) = tolower(*(p++));
- while (*p && !isalnum(*p))
- p++;
- *(pout++) = ' ';
- }
- if (out != pout)
- *(--pout) = '\0';
-
- return out;
-}
-
-#ifdef RECHEAP
-static void push_record(struct session *s, struct record *r)
-{
- int p;
- assert(s->recheap_max + 1 < s->recheap_size);
-
- s->recheap[p = ++s->recheap_max] = r;
- while (p > 0)
- {
- int parent = (p - 1) >> 1;
- if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0)
- {
- struct record *tmp;
- tmp = s->recheap[parent];
- s->recheap[parent] = s->recheap[p];
- s->recheap[p] = tmp;
- p = parent;
- }
- else
- break;
- }
-}
-
-static struct record *top_record(struct session *s)
-{
- return s-> recheap_max >= 0 ? s->recheap[0] : 0;
-}
-
-static struct record *pop_record(struct session *s)
-{
- struct record *res;
- int p = 0;
- int lastnonleaf = (s->recheap_max - 1) >> 1;
-
- if (s->recheap_max < 0)
- return 0;
-
- res = s->recheap[0];
-
- s->recheap[p] = s->recheap[s->recheap_max--];
-
- while (p <= lastnonleaf)
- {
- int right = (p + 1) << 1;
- int left = right - 1;
- int min = left;
-
- if (right < s->recheap_max &&
- strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0)
- min = right;
- if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0)
- {
- struct record *tmp = s->recheap[min];
- s->recheap[min] = s->recheap[p];
- s->recheap[p] = tmp;
- p = min;
- }
- else
- break;
- }
- return res;
-}
-
-// Like pop_record but collapses identical (merge_key) records
-// The heap will contain multiple independent matching records and possibly
-// one cluster, created the last time the list was scanned
-static struct record *pop_mrecord(struct session *s)
-{
- struct record *this;
- struct record *next;
-
- if (!(this = pop_record(s)))
- return 0;
-
- // Collapse identical records
- while ((next = top_record(s)))
- {
- struct record *p, *tmpnext;
- if (strcmp(this->merge_key, next->merge_key))
- break;
- // Absorb record (and clustersiblings) into a supercluster
- for (p = next; p; p = tmpnext) {
- tmpnext = p->next_cluster;
- p->next_cluster = this->next_cluster;
- this->next_cluster = p;
- }
-
- pop_record(s);
- }
- return this;
-}
-
-// Reads records in sort order. Store records in top of heapspace until rewind is called.
-static struct record *read_recheap(struct session *s)
-{
- struct record *r = pop_mrecord(s);
-
- if (r)
- {
- if (s->recheap_scratch < 0)
- s->recheap_scratch = s->recheap_size;
- s->recheap[--s->recheap_scratch] = r;
- }
-
- return r;
-}
-
-// Return records to heap after read
-static void rewind_recheap(struct session *s)
-{
- while (s->recheap_scratch >= 0) {
- push_record(s, s->recheap[s->recheap_scratch++]);
- if (s->recheap_scratch >= s->recheap_size)
- s->recheap_scratch = -1;
- }
-}
-
-#endif
-
-// FIXME needs to be generalized. Should flexibly generate X lists per search
-static void extract_subject(struct session *s, const char *rec)
-{
- const char *field, *subfield;
-
- while ((field = find_field(rec, "650")))
- {
- rec = field;
- if ((subfield = find_subfield(field, 'a')))
- {
- char *e, *ef;
- char buf[1024];
- int len;
-
- ef = index(subfield, '\n');
- if (!ef)
- return;
- if ((e = index(subfield, '\t')) && e < ef)
- ef = e;
- while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')')
- ef--;
- len = ef - subfield;
- assert(len < 1023);
- memcpy(buf, subfield, len);
- buf[len] = '\0';
- if (*buf)
- termlist_insert(s->termlist, buf);
- }
- }
-}
-
-static void pull_relevance_field(struct session *s, struct record *head, const char *rec,
- char *field, int mult)
-{
- const char *fb;
- while ((fb = find_field(rec, field)))
- {
- char *ffield = strchr(fb, '\t');
- if (!ffield)
- return;
- char *eol = strchr(ffield, '\n');
- if (!eol)
- return;
- relevance_countwords(s->relevance, head, ffield, eol - ffield, mult);
- rec = field + 1; // Crude way to cause a loop through repeating fields
- }
-}
-
-static void pull_relevance_keys(struct session *s, struct record *head, struct record *rec)
-{
- relevance_newrec(s->relevance, head);
- pull_relevance_field(s, head, rec->buf, "100", 2);
- pull_relevance_field(s, head, rec->buf, "245", 4);
- //pull_relevance_field(s, head, rec->buf, "530", 1);
- pull_relevance_field(s, head, rec->buf, "630", 1);
- pull_relevance_field(s, head, rec->buf, "650", 1);
- pull_relevance_field(s, head, rec->buf, "700", 1);
- relevance_donerecord(s->relevance, head);
-}
-
-static struct record *ingest_record(struct client *cl, char *buf, int len)
-{
- struct session *se = cl->session;
- struct record *res;
- struct record *head;
- const char *recbuf;
-
- wrbuf_rewind(se->wrbuf);
- yaz_marc_xml(global_parameters.yaz_marc, YAZ_MARC_LINE);
- if (yaz_marc_decode_wrbuf(global_parameters.yaz_marc, buf, len, se->wrbuf) < 0)
- {
- yaz_log(YLOG_WARN, "Failed to decode MARC record");
- return 0;
- }
- wrbuf_putc(se->wrbuf, '\0');
- recbuf = wrbuf_buf(se->wrbuf);
-
- res = nmem_malloc(se->nmem, sizeof(struct record));
- res->buf = nmem_strdup(se->nmem, recbuf);
-
- extract_subject(se, res->buf);
-
- res->title = extract_title(se, res->buf);
- res->merge_key = extract_mergekey(se, res->buf);
- if (!res->merge_key)
- return 0;
- res->client = cl;
- res->next_cluster = 0;
- res->target_offset = -1;
- res->term_frequency_vec = 0;
-
- head = reclist_insert(se->reclist, res);
-
- pull_relevance_keys(se, head, res);
-
- se->total_records++;
-
- return res;
-}
-
-static void ingest_records(struct client *cl, Z_Records *r)
-{
- struct record *rec;
- struct session *s = cl->session;
- Z_NamePlusRecordList *rlist;
- int i;
-
- if (r->which != Z_Records_DBOSD)
- return;
- rlist = r->u.databaseOrSurDiagnostics;
- for (i = 0; i < rlist->num_records; i++)
- {
- Z_NamePlusRecord *npr = rlist->records[i];
- Z_External *e;
- char *buf;
- int len;
-
- if (npr->which != Z_NamePlusRecord_databaseRecord)
- {
- yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic");
- continue;
- }
- e = npr->u.databaseRecord;
- if (e->which != Z_External_octet)
- {
- yaz_log(YLOG_WARN, "Unexpected external branch, probably BER");
- continue;
- }
- buf = (char*) e->u.octet_aligned->buf;
- len = e->u.octet_aligned->len;
-
- rec = ingest_record(cl, buf, len);
- if (!rec)
- continue;
- }
- if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records)
- session_alert_watch(s, SESSION_WATCH_RECORDS);
-}
-
-xsltStylesheetPtr load_stylesheet(const char *fname)
-{
- xsltStylesheetPtr ret;
- if (!(ret = xsltParseStylesheetFile((const xmlChar *) fname)))
- {
- yaz_log(YLOG_FATAL|YLOG_ERRNO, "Failed to load stylesheet %s", fname);
- exit(1);
- }
- return ret;
-}
-
-static void do_presentResponse(IOCHAN i, Z_APDU *a)
-{
- struct connection *co = iochan_getdata(i);
- struct client *cl = co->client;
- Z_PresentResponse *r = a->u.presentResponse;