Use strrchr rather than rindex (obsolete)
[pazpar2-moved-to-github.git] / src / pazpar2.c
index 1537054..c2d7d94 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: pazpar2.c,v 1.12 2007-01-04 07:38:36 adam Exp $ */;
+/* $Id: pazpar2.c,v 1.24 2007-01-10 10:15:04 adam Exp $ */
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -11,6 +11,7 @@
 #include <ctype.h>
 #include <assert.h>
 
+#include <yaz/marcdisp.h>
 #include <yaz/comstack.h>
 #include <yaz/tcpip.h>
 #include <yaz/proto.h>
 #include <yaz/yaz-util.h>
 #include <yaz/nmem.h>
 
+#if HAVE_CONFIG_H
+#include "cconfig.h"
+#endif
+
 #define USE_TIMING 0
 #if USE_TIMING
 #include <yaz/timing.h>
 
 #include "pazpar2.h"
 #include "eventl.h"
-#include "command.h"
 #include "http.h"
 #include "termlists.h"
 #include "reclists.h"
 #include "relevance.h"
 #include "config.h"
 
-#define PAZPAR2_VERSION "0.1"
 #define MAX_CHUNK 15
 
 static void client_fatal(struct client *cl);
@@ -64,13 +67,15 @@ static char *client_states[] = {
     "Client_Stopped"
 };
 
+// Note: Some things in this structure will eventually move to configuration
 struct parameters global_parameters = 
 {
     0,
+    0,
     30,
     "81",
     "Index Data PazPar2 (MasterKey)",
-    PAZPAR2_VERSION,
+    VERSION,
     600, // 10 minutes
     60,
     100,
@@ -198,6 +203,8 @@ static void send_present(IOCHAN i)
     int start = cl->records + 1;
 
     toget = global_parameters.chunk;
+    if (toget > global_parameters.toget - cl->records)
+        toget = global_parameters.toget - cl->records;
     if (toget > cl->hits - cl->records)
        toget = cl->hits - cl->records;
 
@@ -353,6 +360,8 @@ static void add_facet(struct session *s, const char *type, const char *value)
     termlist_insert(s->termlists[i].termlist, value);
 }
 
+int yaz_marc_write_xml();
+
 static xmlDoc *normalize_record(struct client *cl, Z_External *rec)
 {
     struct conf_retrievalprofile *rprofile = cl->database->rprofile;
@@ -415,13 +424,46 @@ static xmlDoc *normalize_record(struct client *cl, Z_External *rec)
     return rdoc;
 }
 
+// Extract what appears to be years from buf, storing highest and
+// lowest values.
+static int extract_years(const char *buf, int *first, int *last)
+{
+    *first = -1;
+    *last = -1;
+    while (*buf)
+    {
+        const char *e;
+        int len;
+
+        while (*buf && !isdigit(*buf))
+            buf++;
+        len = 0;
+        for (e = buf; *e && isdigit(*e); e++)
+            len++;
+        if (len == 4)
+        {
+            int value = atoi(buf);
+            if (*first < 0 || value < *first)
+                *first = value;
+            if (*last < 0 || value > *last)
+                *last = value;
+        }
+        buf = e;
+    }
+    return *first;
+}
+
 static struct record *ingest_record(struct client *cl, Z_External *rec)
 {
     xmlDoc *xdoc = normalize_record(cl, rec);
     xmlNode *root, *n;
-    struct record *res, *head;
+    struct record *res;
+    struct record_cluster *cluster;
     struct session *se = cl->session;
     xmlChar *mergekey, *mergekey_norm;
+    xmlChar *type;
+    xmlChar *value;
+    struct conf_service *service = global_parameters.server->service;
 
     if (!xdoc)
         return 0;
@@ -430,53 +472,140 @@ static struct record *ingest_record(struct client *cl, Z_External *rec)
     if (!(mergekey = xmlGetProp(root, "mergekey")))
     {
         yaz_log(YLOG_WARN, "No mergekey found in record");
+        xmlFreeDoc(xdoc);
         return 0;
     }
 
     res = nmem_malloc(se->nmem, sizeof(struct record));
-    res->next_cluster = 0;
-    res->target_offset = -1;
-    res->term_frequency_vec = 0;
-    res->title = "Unknown";
-    res->relevance = 0;
+    res->next = 0;
+    res->metadata = nmem_malloc(se->nmem,
+            sizeof(struct record_metadata*) * service->num_metadata);
+    memset(res->metadata, 0, sizeof(struct record_metadata*) * service->num_metadata);
 
     mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey);
     xmlFree(mergekey);
-    res->merge_key = normalize_mergekey(mergekey_norm);
+    normalize_mergekey(mergekey_norm);
 
-    head = reclist_insert(se->reclist, res);
-    relevance_newrec(se->relevance, head);
+    cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged);
+    if (!cluster)
+    {
+        /* no room for record */
+        xmlFreeDoc(xdoc);
+        return 0;
+    }
+    relevance_newrec(se->relevance, cluster);
 
+    type = value = 0;
     for (n = root->children; n; n = n->next)
     {
+        if (type)
+            xmlFree(type);
+        if (value)
+            xmlFree(value);
+        type = value = 0;
+
         if (n->type != XML_ELEMENT_NODE)
             continue;
-        if (!strcmp(n->name, "facet"))
+        if (!strcmp(n->name, "metadata"))
         {
-            xmlChar *type = xmlGetProp(n, "type");
-            xmlChar *value = xmlNodeListGetString(xdoc, n->children, 0);
-            if (type && value)
+            type = xmlGetProp(n, "type");
+            value = xmlNodeListGetString(xdoc, n->children, 0);
+            struct conf_metadata *md = 0;
+            struct record_metadata **wheretoput, *newm;
+            int imeta;
+            int first, last;
+
+            // First, find out what field we're looking at
+            for (imeta = 0; imeta < service->num_metadata; imeta++)
+                if (!strcmp(type, service->metadata[imeta].name))
+                {
+                    md = &service->metadata[imeta];
+                    break;
+                }
+            if (!md)
             {
-                add_facet(se, type, value);
-                relevance_countwords(se->relevance, head, value, 1);
+                yaz_log(YLOG_WARN, "Ignoring unknown metadata element: %s", type);
+                continue;
             }
-            xmlFree(type);
-            xmlFree(value);
-        }
-        else if (!strcmp(n->name, "metadata"))
-        {
-            xmlChar *type = xmlGetProp(n, "type"), *value;
-            if (type && !strcmp(type, "title"))
+
+            // Find out where we are putting it
+            if (md->merge == Metadata_merge_no)
+                wheretoput = &res->metadata[imeta];
+            else
+                wheretoput = &cluster->metadata[imeta];
+            
+            // Put it there
+            newm = nmem_malloc(se->nmem, sizeof(struct record_metadata));
+            newm->next = 0;
+            if (md->type == Metadata_type_generic)
+            {
+                newm->data.text = nmem_strdup(se->nmem, value);
+            }
+            else if (md->type == Metadata_type_year)
+            {
+                if (extract_years(value, &first, &last) < 0)
+                    continue;
+            }
+            else
             {
-                xmlChar *value = xmlNodeListGetString(xdoc, n->children, 0);
-                if (value)
+                yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type);
+                continue;
+            }
+            if (md->type == Metadata_type_year && md->merge != Metadata_merge_range)
+            {
+                yaz_log(YLOG_WARN, "Only range merging supported for years");
+                continue;
+            }
+            if (md->merge == Metadata_merge_unique)
+            {
+                struct record_metadata *mnode;
+                for (mnode = *wheretoput; mnode; mnode = mnode->next)
+                    if (!strcmp(mnode->data.text, mnode->data.text))
+                        break;
+                if (!mnode)
                 {
-                    res->title = nmem_strdup(se->nmem, value);
-                    relevance_countwords(se->relevance, head, value, 4);
-                    xmlFree(value);
+                    newm->next = *wheretoput;
+                    *wheretoput = newm;
                 }
             }
+            else if (md->merge == Metadata_merge_longest)
+            {
+                if (!*wheretoput ||
+                        strlen(newm->data.text) > strlen((*wheretoput)->data.text))
+                *wheretoput = newm;
+            }
+            else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
+            {
+                newm->next = *wheretoput;
+                *wheretoput = newm;
+            }
+            else if (md->merge == Metadata_merge_range)
+            {
+                assert(md->type == Metadata_type_year);
+                if (!*wheretoput)
+                {
+                    *wheretoput = newm;
+                    (*wheretoput)->data.year.year1 = first;
+                    (*wheretoput)->data.year.year2 = last;
+                }
+                else
+                {
+                    if (first < (*wheretoput)->data.year.year1)
+                        (*wheretoput)->data.year.year1 = first;
+                    if (last > (*wheretoput)->data.year.year2)
+                        (*wheretoput)->data.year.year2 = last;
+                }
+            }
+            else
+                yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name);
+
+            if (md->rank)
+                relevance_countwords(se->relevance, cluster, value, md->rank);
+            if (md->termlist)
+                add_facet(se, type, value);
             xmlFree(type);
+            xmlFree(value);
+            type = value = 0;
         }
         else
             yaz_log(YLOG_WARN, "Unexpected element %s in internal record", n->name);
@@ -484,7 +613,7 @@ static struct record *ingest_record(struct client *cl, Z_External *rec)
 
     xmlFreeDoc(xdoc);
 
-    relevance_donerecord(se->relevance, head);
+    relevance_donerecord(se->relevance, cluster);
     se->total_records++;
 
     return res;
@@ -1083,7 +1212,7 @@ char *search(struct session *se, char *query)
         se->reclist = reclist_create(se->nmem, maxrecs);
         extract_terms(se->nmem, query, p);
         se->relevance = relevance_create(se->nmem, (const char **) p, maxrecs);
-        se->total_records = se->total_hits = 0;
+        se->total_records = se->total_hits = se->total_merged = 0;
         se->expected_maxrecs = maxrecs;
     }
     else
@@ -1159,9 +1288,7 @@ struct termlist_score **termlist(struct session *s, const char *name, int *num)
     return 0;
 }
 
-#ifdef REPORT_NMEM
-// conditional compilation by SH: This lead to a warning with currently installed
-// YAZ header files on us1
+#ifdef MISSING_HEADERS
 void report_nmem_stats(void)
 {
     size_t in_use, is_free;
@@ -1174,11 +1301,22 @@ void report_nmem_stats(void)
 }
 #endif
 
-struct record **show(struct session *s, int start, int *num, int *total,
+struct record_cluster *show_single(struct session *s, int id)
+{
+    struct record_cluster *r;
+
+    reclist_rewind(s->reclist);
+    while ((r = reclist_read_record(s->reclist)))
+        if (r->recid == id)
+            return r;
+    return 0;
+}
+
+struct record_cluster **show(struct session *s, int start, int *num, int *total,
                      int *sumhits, NMEM nmem_show)
 {
-    struct record **recs = nmem_malloc(nmem_show, *num 
-                                       * sizeof(struct record *));
+    struct record_cluster **recs = nmem_malloc(nmem_show, *num 
+                                       * sizeof(struct record_cluster *));
     int i;
 #if USE_TIMING    
     yaz_timing_t t = yaz_timing_create();
@@ -1198,7 +1336,7 @@ struct record **show(struct session *s, int start, int *num, int *total,
 
     for (i = 0; i < *num; i++)
     {
-        struct record *r = reclist_read_record(s->reclist);
+        struct record_cluster *r = reclist_read_record(s->reclist);
         if (!r)
         {
             *num = i;
@@ -1221,7 +1359,7 @@ void statistics(struct session *se, struct statistics *stat)
     struct client *cl;
     int count = 0;
 
-    bzero(stat, sizeof(*stat));
+    memset(stat, 0, sizeof(*stat));
     for (cl = se->clients; cl; cl = cl->next)
     {
         if (!cl->connection)
@@ -1262,22 +1400,18 @@ int main(int argc, char **argv)
     char *arg;
     int setport = 0;
 
-    if (signal(SIGPIPE, SIG_IGN) < 0)
+    if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
         yaz_log(YLOG_WARN|YLOG_ERRNO, "signal");
 
     yaz_log_init(YLOG_DEFAULT_LEVEL, "pazpar2", 0);
 
-    while ((ret = options("f:x:c:h:p:C:s:d", argv, argc, &arg)) != -2)
+    while ((ret = options("f:x:h:p:C:s:d", argv, argc, &arg)) != -2)
     {
        switch (ret) {
             case 'f':
                 if (!read_config(arg))
                     exit(1);
                 break;
-           case 'c':
-               command_init(atoi(arg));
-                setport++;
-               break;
             case 'h':
                 http_init(arg);
                 setport++;
@@ -1298,7 +1432,6 @@ int main(int argc, char **argv)
                fprintf(stderr, "Usage: pazpar2\n"
                         "    -f configfile\n"
                         "    -h [host:]port          (REST protocol listener)\n"
-                        "    -c cmdport              (telnet-style)\n"
                         "    -C cclconfig\n"
                         "    -s simpletargetfile\n"
                         "    -p hostname[:portno]    (HTTP proxy)\n");
@@ -1306,9 +1439,16 @@ int main(int argc, char **argv)
        }
     }
 
+    if (!config)
+    {
+        yaz_log(YLOG_FATAL, "Load config with -f");
+        exit(1);
+    }
+    global_parameters.server = config->servers;
+
     if (!setport)
     {
-        fprintf(stderr, "Set command port with -h or -c\n");
+        fprintf(stderr, "Set command port with -h\n");
         exit(1);
     }