Use strrchr rather than rindex (obsolete)
[pazpar2-moved-to-github.git] / src / pazpar2.c
index 1f91e6a..c2d7d94 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: pazpar2.c,v 1.8 2007-01-03 16:59:32 quinn Exp $ */;
+/* $Id: pazpar2.c,v 1.24 2007-01-10 10:15:04 adam Exp $ */
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -11,6 +11,7 @@
 #include <ctype.h>
 #include <assert.h>
 
+#include <yaz/marcdisp.h>
 #include <yaz/comstack.h>
 #include <yaz/tcpip.h>
 #include <yaz/proto.h>
 #include <yaz/yaz-util.h>
 #include <yaz/nmem.h>
 
+#if HAVE_CONFIG_H
+#include "cconfig.h"
+#endif
+
+#define USE_TIMING 0
+#if USE_TIMING
+#include <yaz/timing.h>
+#endif
+
 #include "pazpar2.h"
 #include "eventl.h"
-#include "command.h"
 #include "http.h"
 #include "termlists.h"
 #include "reclists.h"
 #include "relevance.h"
 #include "config.h"
 
-#define PAZPAR2_VERSION "0.1"
 #define MAX_CHUNK 15
 
 static void client_fatal(struct client *cl);
@@ -59,13 +67,15 @@ static char *client_states[] = {
     "Client_Stopped"
 };
 
+// Note: Some things in this structure will eventually move to configuration
 struct parameters global_parameters = 
 {
     0,
+    0,
     30,
     "81",
     "Index Data PazPar2 (MasterKey)",
-    PAZPAR2_VERSION,
+    VERSION,
     600, // 10 minutes
     60,
     100,
@@ -76,7 +86,6 @@ struct parameters global_parameters =
     0
 };
 
-
 static int send_apdu(struct client *c, Z_APDU *a)
 {
     struct connection *co = c->connection;
@@ -194,6 +203,8 @@ static void send_present(IOCHAN i)
     int start = cl->records + 1;
 
     toget = global_parameters.chunk;
+    if (toget > global_parameters.toget - cl->records)
+        toget = global_parameters.toget - cl->records;
     if (toget > cl->hits - cl->records)
        toget = cl->hits - cl->records;
 
@@ -349,6 +360,8 @@ static void add_facet(struct session *s, const char *type, const char *value)
     termlist_insert(s->termlists[i].termlist, value);
 }
 
+int yaz_marc_write_xml();
+
 static xmlDoc *normalize_record(struct client *cl, Z_External *rec)
 {
     struct conf_retrievalprofile *rprofile = cl->database->rprofile;
@@ -411,13 +424,46 @@ static xmlDoc *normalize_record(struct client *cl, Z_External *rec)
     return rdoc;
 }
 
+// Extract what appears to be years from buf, storing highest and
+// lowest values.
+static int extract_years(const char *buf, int *first, int *last)
+{
+    *first = -1;
+    *last = -1;
+    while (*buf)
+    {
+        const char *e;
+        int len;
+
+        while (*buf && !isdigit(*buf))
+            buf++;
+        len = 0;
+        for (e = buf; *e && isdigit(*e); e++)
+            len++;
+        if (len == 4)
+        {
+            int value = atoi(buf);
+            if (*first < 0 || value < *first)
+                *first = value;
+            if (*last < 0 || value > *last)
+                *last = value;
+        }
+        buf = e;
+    }
+    return *first;
+}
+
 static struct record *ingest_record(struct client *cl, Z_External *rec)
 {
     xmlDoc *xdoc = normalize_record(cl, rec);
     xmlNode *root, *n;
-    struct record *res, *head;
+    struct record *res;
+    struct record_cluster *cluster;
     struct session *se = cl->session;
     xmlChar *mergekey, *mergekey_norm;
+    xmlChar *type;
+    xmlChar *value;
+    struct conf_service *service = global_parameters.server->service;
 
     if (!xdoc)
         return 0;
@@ -426,52 +472,148 @@ static struct record *ingest_record(struct client *cl, Z_External *rec)
     if (!(mergekey = xmlGetProp(root, "mergekey")))
     {
         yaz_log(YLOG_WARN, "No mergekey found in record");
+        xmlFreeDoc(xdoc);
         return 0;
     }
 
     res = nmem_malloc(se->nmem, sizeof(struct record));
-    res->next_cluster = 0;
-    res->target_offset = -1;
-    res->term_frequency_vec = 0;
-    res->title = "Unknown";
-    res->relevance = 0;
+    res->next = 0;
+    res->metadata = nmem_malloc(se->nmem,
+            sizeof(struct record_metadata*) * service->num_metadata);
+    memset(res->metadata, 0, sizeof(struct record_metadata*) * service->num_metadata);
 
     mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey);
     xmlFree(mergekey);
-    res->merge_key = normalize_mergekey(mergekey_norm);
+    normalize_mergekey(mergekey_norm);
 
-    head = reclist_insert(se->reclist, res);
-    relevance_newrec(se->relevance, head);
+    cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged);
+    if (!cluster)
+    {
+        /* no room for record */
+        xmlFreeDoc(xdoc);
+        return 0;
+    }
+    relevance_newrec(se->relevance, cluster);
 
+    type = value = 0;
     for (n = root->children; n; n = n->next)
     {
-        if (n->type != XML_ELEMENT_NODE)
-            continue;
-        if (!strcmp(n->name, "facet"))
-        {
-            xmlChar *type = xmlGetProp(n, "type");
-            xmlChar *value = xmlNodeListGetString(xdoc, n->children, 0);
-            add_facet(se, type, value);
-            relevance_countwords(se->relevance, head, value, 1);
+        if (type)
             xmlFree(type);
+        if (value)
             xmlFree(value);
-        }
-        else if (!strcmp(n->name, "metadata"))
+        type = value = 0;
+
+        if (n->type != XML_ELEMENT_NODE)
+            continue;
+        if (!strcmp(n->name, "metadata"))
         {
-            xmlChar *type = xmlGetProp(n, "type"), *value;
-            if (!strcmp(type, "title"))
-                res->title = nmem_strdup(se->nmem,
-                        value = xmlNodeListGetString(xdoc, n->children, 0));
+            type = xmlGetProp(n, "type");
+            value = xmlNodeListGetString(xdoc, n->children, 0);
+            struct conf_metadata *md = 0;
+            struct record_metadata **wheretoput, *newm;
+            int imeta;
+            int first, last;
+
+            // First, find out what field we're looking at
+            for (imeta = 0; imeta < service->num_metadata; imeta++)
+                if (!strcmp(type, service->metadata[imeta].name))
+                {
+                    md = &service->metadata[imeta];
+                    break;
+                }
+            if (!md)
+            {
+                yaz_log(YLOG_WARN, "Ignoring unknown metadata element: %s", type);
+                continue;
+            }
+
+            // Find out where we are putting it
+            if (md->merge == Metadata_merge_no)
+                wheretoput = &res->metadata[imeta];
+            else
+                wheretoput = &cluster->metadata[imeta];
+            
+            // Put it there
+            newm = nmem_malloc(se->nmem, sizeof(struct record_metadata));
+            newm->next = 0;
+            if (md->type == Metadata_type_generic)
+            {
+                newm->data.text = nmem_strdup(se->nmem, value);
+            }
+            else if (md->type == Metadata_type_year)
+            {
+                if (extract_years(value, &first, &last) < 0)
+                    continue;
+            }
+            else
+            {
+                yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type);
+                continue;
+            }
+            if (md->type == Metadata_type_year && md->merge != Metadata_merge_range)
+            {
+                yaz_log(YLOG_WARN, "Only range merging supported for years");
+                continue;
+            }
+            if (md->merge == Metadata_merge_unique)
+            {
+                struct record_metadata *mnode;
+                for (mnode = *wheretoput; mnode; mnode = mnode->next)
+                    if (!strcmp(mnode->data.text, mnode->data.text))
+                        break;
+                if (!mnode)
+                {
+                    newm->next = *wheretoput;
+                    *wheretoput = newm;
+                }
+            }
+            else if (md->merge == Metadata_merge_longest)
+            {
+                if (!*wheretoput ||
+                        strlen(newm->data.text) > strlen((*wheretoput)->data.text))
+                *wheretoput = newm;
+            }
+            else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
+            {
+                newm->next = *wheretoput;
+                *wheretoput = newm;
+            }
+            else if (md->merge == Metadata_merge_range)
+            {
+                assert(md->type == Metadata_type_year);
+                if (!*wheretoput)
+                {
+                    *wheretoput = newm;
+                    (*wheretoput)->data.year.year1 = first;
+                    (*wheretoput)->data.year.year2 = last;
+                }
+                else
+                {
+                    if (first < (*wheretoput)->data.year.year1)
+                        (*wheretoput)->data.year.year1 = first;
+                    if (last > (*wheretoput)->data.year.year2)
+                        (*wheretoput)->data.year.year2 = last;
+                }
+            }
+            else
+                yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name);
 
-            relevance_countwords(se->relevance, head, value, 4);
+            if (md->rank)
+                relevance_countwords(se->relevance, cluster, value, md->rank);
+            if (md->termlist)
+                add_facet(se, type, value);
             xmlFree(type);
             xmlFree(value);
+            type = value = 0;
         }
         else
             yaz_log(YLOG_WARN, "Unexpected element %s in internal record", n->name);
     }
 
-    relevance_donerecord(se->relevance, head);
+    xmlFreeDoc(xdoc);
+
+    relevance_donerecord(se->relevance, cluster);
     se->total_records++;
 
     return res;
@@ -479,6 +621,9 @@ static struct record *ingest_record(struct client *cl, Z_External *rec)
 
 static void ingest_records(struct client *cl, Z_Records *r)
 {
+#if USE_TIMING
+    yaz_timing_t t = yaz_timing_create();
+#endif
     struct record *rec;
     struct session *s = cl->session;
     Z_NamePlusRecordList *rlist;
@@ -503,6 +648,14 @@ static void ingest_records(struct client *cl, Z_Records *r)
     }
     if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records)
         session_alert_watch(s, SESSION_WATCH_RECORDS);
+
+#if USE_TIMING
+    yaz_timing_stop(t);
+    yaz_log(YLOG_LOG, "ingest_records %6.5f %3.2f %3.2f", 
+            yaz_timing_get_real(t), yaz_timing_get_user(t),
+            yaz_timing_get_sys(t));
+    yaz_timing_destroy(&t);
+#endif
 }
 
 static void do_presentResponse(IOCHAN i, Z_APDU *a)
@@ -1017,6 +1170,21 @@ int select_targets(struct session *se)
     return c;
 }
 
+int session_active_clients(struct session *s)
+{
+    struct client *c;
+    int res = 0;
+
+    for (c = s->clients; c; c = c->next)
+        if (c->connection && (c->state == Client_Connecting ||
+                    c->state == Client_Initializing ||
+                    c->state == Client_Searching ||
+                    c->state == Client_Presenting))
+            res++;
+
+    return res;
+}
+
 char *search(struct session *se, char *query)
 {
     int live_channels = 0;
@@ -1044,7 +1212,7 @@ char *search(struct session *se, char *query)
         se->reclist = reclist_create(se->nmem, maxrecs);
         extract_terms(se->nmem, query, p);
         se->relevance = relevance_create(se->nmem, (const char **) p, maxrecs);
-        se->total_records = se->total_hits = 0;
+        se->total_records = se->total_hits = se->total_merged = 0;
         se->expected_maxrecs = maxrecs;
     }
     else
@@ -1120,9 +1288,7 @@ struct termlist_score **termlist(struct session *s, const char *name, int *num)
     return 0;
 }
 
-#ifdef REPORT_NMEM
-// conditional compilation by SH: This lead to a warning with currently installed
-// YAZ header files on us1
+#ifdef MISSING_HEADERS
 void report_nmem_stats(void)
 {
     size_t in_use, is_free;
@@ -1135,13 +1301,26 @@ void report_nmem_stats(void)
 }
 #endif
 
-struct record **show(struct session *s, int start, int *num, int *total,
+struct record_cluster *show_single(struct session *s, int id)
+{
+    struct record_cluster *r;
+
+    reclist_rewind(s->reclist);
+    while ((r = reclist_read_record(s->reclist)))
+        if (r->recid == id)
+            return r;
+    return 0;
+}
+
+struct record_cluster **show(struct session *s, int start, int *num, int *total,
                      int *sumhits, NMEM nmem_show)
 {
-    struct record **recs = nmem_malloc(nmem_show, *num 
-                                       * sizeof(struct record *));
+    struct record_cluster **recs = nmem_malloc(nmem_show, *num 
+                                       * sizeof(struct record_cluster *));
     int i;
-
+#if USE_TIMING    
+    yaz_timing_t t = yaz_timing_create();
+#endif
     relevance_prepare_read(s->relevance, s->reclist);
 
     *total = s->reclist->num_records;
@@ -1151,12 +1330,13 @@ struct record **show(struct session *s, int start, int *num, int *total,
         if (!reclist_read_record(s->reclist))
         {
             *num = 0;
-            return 0;
+            recs = 0;
+            break;
         }
 
     for (i = 0; i < *num; i++)
     {
-        struct record *r = reclist_read_record(s->reclist);
+        struct record_cluster *r = reclist_read_record(s->reclist);
         if (!r)
         {
             *num = i;
@@ -1164,6 +1344,13 @@ struct record **show(struct session *s, int start, int *num, int *total,
         }
         recs[i] = r;
     }
+#if USE_TIMING
+    yaz_timing_stop(t);
+    yaz_log(YLOG_LOG, "show %6.5f %3.2f %3.2f", 
+            yaz_timing_get_real(t), yaz_timing_get_user(t),
+            yaz_timing_get_sys(t));
+    yaz_timing_destroy(&t);
+#endif
     return recs;
 }
 
@@ -1172,7 +1359,7 @@ void statistics(struct session *se, struct statistics *stat)
     struct client *cl;
     int count = 0;
 
-    bzero(stat, sizeof(*stat));
+    memset(stat, 0, sizeof(*stat));
     for (cl = se->clients; cl; cl = cl->next)
     {
         if (!cl->connection)
@@ -1213,22 +1400,18 @@ int main(int argc, char **argv)
     char *arg;
     int setport = 0;
 
-    if (signal(SIGPIPE, SIG_IGN) < 0)
+    if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
         yaz_log(YLOG_WARN|YLOG_ERRNO, "signal");
 
     yaz_log_init(YLOG_DEFAULT_LEVEL, "pazpar2", 0);
 
-    while ((ret = options("f:x:c:h:p:C:s:d", argv, argc, &arg)) != -2)
+    while ((ret = options("f:x:h:p:C:s:d", argv, argc, &arg)) != -2)
     {
        switch (ret) {
             case 'f':
                 if (!read_config(arg))
                     exit(1);
                 break;
-           case 'c':
-               command_init(atoi(arg));
-                setport++;
-               break;
             case 'h':
                 http_init(arg);
                 setport++;
@@ -1249,7 +1432,6 @@ int main(int argc, char **argv)
                fprintf(stderr, "Usage: pazpar2\n"
                         "    -f configfile\n"
                         "    -h [host:]port          (REST protocol listener)\n"
-                        "    -c cmdport              (telnet-style)\n"
                         "    -C cclconfig\n"
                         "    -s simpletargetfile\n"
                         "    -p hostname[:portno]    (HTTP proxy)\n");
@@ -1257,9 +1439,16 @@ int main(int argc, char **argv)
        }
     }
 
+    if (!config)
+    {
+        yaz_log(YLOG_FATAL, "Load config with -f");
+        exit(1);
+    }
+    global_parameters.server = config->servers;
+
     if (!setport)
     {
-        fprintf(stderr, "Set command port with -h or -c\n");
+        fprintf(stderr, "Set command port with -h\n");
         exit(1);
     }