Version 1.3.29
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index 6ca6015..34b3084 100644 (file)
@@ -85,13 +85,13 @@ namespace metaproxy_1 {
             friend class Impl;
             friend class Frontend;
             std::string zurl;
-            WRBUF m_apdu_wrbuf;
+            mp::wrbuf m_apdu_wrbuf;
             ZOOM_connection m_connection;
             ZOOM_resultset m_resultset;
             std::string m_frontend_database;
             SearchablePtr sptr;
             xsltStylesheetPtr xsp;
-            std::string content_session_id;
+            std::string cproxy_host;
             bool enable_cproxy;
             bool enable_explain;
             xmlDoc *explain_doc;
@@ -201,6 +201,7 @@ namespace metaproxy_1 {
             std::string file_path;
             std::string content_proxy_server;
             std::string content_tmp_file;
+            std::string content_config_file;
             bool apdu_log;
             CCL_bibset bibset;
             std::string element_transform;
@@ -263,7 +264,6 @@ void yf::Zoom::process(mp::Package &package) const
 
 yf::Zoom::Backend::Backend()
 {
-    m_apdu_wrbuf = wrbuf_alloc();
     m_connection = ZOOM_connection_create(0);
     ZOOM_connection_save_apdu_wrbuf(m_connection, m_apdu_wrbuf);
     m_resultset = 0;
@@ -281,7 +281,6 @@ yf::Zoom::Backend::~Backend()
         xmlFreeDoc(explain_doc);
     ZOOM_connection_destroy(m_connection);
     ZOOM_resultset_destroy(m_resultset);
-    wrbuf_destroy(m_apdu_wrbuf);
 }
 
 
@@ -322,7 +321,7 @@ void yf::Zoom::Backend::connect(std::string zurl,
                                 int *error, char **addinfo,
                                 ODR odr)
 {
-    ZOOM_connection_connect(m_connection, zurl.c_str(), 0);
+    ZOOM_connection_connect(m_connection, zurl.length() ? zurl.c_str() : 0, 0);
     get_zoom_error(error, addinfo, odr);
 }
 
@@ -368,7 +367,6 @@ yf::Zoom::Searchable::Searchable(CCL_bibset base)
     piggyback = true;
     use_turbomarc = true;
     sortStrategy = "embed";
-    urlRecipe = "${md-electronic-url}";
     ccl_bibset = ccl_qual_dup(base);
 }
 
@@ -625,7 +623,6 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only,
     std::string explain_xslt_fname;
     std::string record_xslt_fname;
 
-    content_tmp_file = "/tmp/cf.XXXXXX.p";
     if (path && *path)
     {
         file_path = path;
@@ -698,9 +695,19 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only,
             for (attr = ptr->properties; attr; attr = attr->next)
             {
                 if (!strcmp((const char *) attr->name, "server"))
+                {
+                    yaz_log(YLOG_WARN,
+                            "contentProxy's server attribute is deprecated");
+                    yaz_log(YLOG_LOG, 
+                            "Specify config_file instead. For example:");
+                    yaz_log(YLOG_LOG, 
+                            " content_file=\"/etc/cf-proxy/cproxy.cfg\"");
                     content_proxy_server = mp::xml::get_text(attr->children);
+                }
                 else if (!strcmp((const char *) attr->name, "tmp_file"))
                     content_tmp_file = mp::xml::get_text(attr->children);
+                else if (!strcmp((const char *) attr->name, "config_file"))
+                    content_config_file = mp::xml::get_text(attr->children);
                 else
                     throw mp::filter::FilterException(
                         "Bad attribute " + std::string((const char *)
@@ -823,16 +830,98 @@ bool yf::Zoom::Frontend::create_content_session(mp::Package &package,
 {
     if (b->sptr->contentConnector.length())
     {
-        char *fname = (char *) xmalloc(m_p->content_tmp_file.length() + 8);
-        strcpy(fname, m_p->content_tmp_file.c_str());
+        std::string proxyhostname;
+        std::string tmp_file;
+        bool legacy_format = false;
+
+        if (m_p->content_proxy_server.length())
+        {
+            proxyhostname = m_p->content_proxy_server;
+            legacy_format = true;
+        }
+            
+        if (m_p->content_tmp_file.length())
+            tmp_file = m_p->content_tmp_file;
+
+        if (m_p->content_config_file.length())
+        {
+            FILE *inf = fopen(m_p->content_config_file.c_str(), "r");
+            if (inf)
+            {
+                char buf[1024];
+                while (fgets(buf, sizeof(buf)-1, inf))
+                {
+                    char *cp;
+                    cp = strchr(buf, '#');
+                    if (cp)
+                        *cp = '\0';
+                    cp = strchr(buf, '\n');
+                    if (cp)
+                        *cp = '\0';
+                    cp = strchr(buf, ':');
+                    if (cp)
+                    {
+                        char *cp1 = cp;
+                        while (cp1 != buf && cp1[-1] == ' ')
+                            cp1--;
+                        *cp1 = '\0';
+                        cp++;
+                        while (*cp == ' ')
+                            cp++;
+                        if (!strcmp(buf, "proxyhostname"))
+                            proxyhostname = cp; 
+                        if (!strcmp(buf, "sessiondir") && *cp)
+                        {
+                            if (cp[strlen(cp)-1] == '/')
+                                cp[strlen(cp)-1] = '\0';
+                            tmp_file = std::string(cp) + std::string("/cf.XXXXXX.p");
+                        }
+                    }
+                }
+                fclose(inf);
+            }
+            else
+            {
+                package.log("zoom", YLOG_WARN|YLOG_ERRNO,
+                            "unable to open content config %s",
+                            m_p->content_config_file.c_str());
+                *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+                *addinfo = (char *)  odr_malloc(odr, 60 + tmp_file.length());
+                sprintf(*addinfo, "unable to open content config %s",
+                        m_p->content_config_file.c_str());
+                return false;
+            }
+        }
+
+        if (proxyhostname.length() == 0)
+        {
+            package.log("zoom", YLOG_WARN, "no proxyhostname");
+            return true;
+        }
+        if (tmp_file.length() == 0)
+        {
+            package.log("zoom", YLOG_WARN, "no tmp_file");
+            return true;
+        }
+
+        char *fname = xstrdup(tmp_file.c_str());
         char *xx = strstr(fname, "XXXXXX");
         if (!xx)
         {
-            xx = fname + strlen(fname);
-            strcat(fname, "XXXXXX");
+            package.log("zoom", YLOG_WARN, "bad tmp_file %s", tmp_file.c_str());
+            *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+            *addinfo = (char *)  odr_malloc(odr, 60 + tmp_file.length());
+            sprintf(*addinfo, "bad format of content tmp_file: %s",
+                    tmp_file.c_str());
+            xfree(fname);
+            return false;
         }
         char tmp_char = xx[6];
         sprintf(xx, "%06d", ((unsigned) rand()) % 1000000);
+        if (legacy_format)
+            b->cproxy_host = std::string(xx) + "." + proxyhostname;
+        else
+            b->cproxy_host = proxyhostname + "/" + xx;
         xx[6] = tmp_char;
 
         FILE *file = fopen(fname, "w");
@@ -845,8 +934,7 @@ bool yf::Zoom::Frontend::create_content_session(mp::Package &package,
             xfree(fname);
             return false;
         }
-        b->content_session_id.assign(xx, 6);
-        WRBUF w = wrbuf_alloc();
+        mp::wrbuf w;
         wrbuf_puts(w, "#content_proxy\n");
         wrbuf_printf(w, "connector: %s\n", b->sptr->contentConnector.c_str());
         if (authentication.length())
@@ -856,11 +944,10 @@ bool yf::Zoom::Frontend::create_content_session(mp::Package &package,
         if (realm.length())
             wrbuf_printf(w, "realm: %s\n", realm.c_str());
 
-        fwrite(wrbuf_buf(w), 1, wrbuf_len(w), file);
+        fwrite(w.buf(), 1, w.len(), file);
         fclose(file);
         package.log("zoom", YLOG_LOG, "content file: %s", fname);
         xfree(fname);
-        wrbuf_destroy(w);
     }
     return true;
 }
@@ -873,7 +960,10 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     std::list<BackendPtr>::const_iterator map_it;
     if (m_backend && !m_backend->enable_explain && 
         m_backend->m_frontend_database == database)
+    {
+        m_backend->connect("", error, addinfo, odr);
         return m_backend;
+    }
 
     std::string input_args;
     std::string torus_db;
@@ -1003,7 +1093,7 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         if (!doc)
         {
             *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
-            *addinfo = odr_strdup(odr, database.c_str());
+            *addinfo = odr_strdup(odr, torus_db.c_str());
             BackendPtr b;
             return b;
         }
@@ -1036,7 +1126,7 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     if (!sptr)
     {
         *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
-        *addinfo = odr_strdup(odr, database.c_str());
+        *addinfo = odr_strdup(odr, torus_db.c_str());
         BackendPtr b;
         return b;
     }
@@ -1242,19 +1332,20 @@ void yf::Zoom::Frontend::prepare_elements(BackendPtr b,
     const char *syntax_name = 0;
     
     if (preferredRecordSyntax &&
-        !oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
-        && element_set_name)
+        !oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml))
     {
-        if (!strcmp(element_set_name, m_p->element_transform.c_str()))
+        if (element_set_name &&
+            !strcmp(element_set_name, m_p->element_transform.c_str()))
         {
             enable_pz2_retrieval = true;
             enable_pz2_transform = true;
         }
-        else if (!strcmp(element_set_name, m_p->element_raw.c_str()))
+        else if (element_set_name && 
+                 !strcmp(element_set_name, m_p->element_raw.c_str()))
         {
             enable_pz2_retrieval = true;
         }
-        else
+        else if (m_p->record_xsp)
         {
             enable_pz2_retrieval = true;
             enable_pz2_transform = true;
@@ -1407,28 +1498,21 @@ Z_Records *yf::Zoom::Frontend::get_records(Package &package,
     {  // only return records if no error and at least one record
 
         const char *xsl_parms[3];
-        char cproxy_host[1024];
-
-        if (b->enable_cproxy && b->content_session_id.length())
+        mp::wrbuf cproxy_host;
+        
+        if (b->enable_cproxy && b->cproxy_host.length())
         {
-            sprintf(cproxy_host, "%s.%s/",
-                    b->content_session_id.c_str(),
-                    m_p->content_proxy_server.c_str());
-            
-            char *q_cproxy_host = (char *) 
-                odr_malloc(odr, strlen(cproxy_host) + 3);
-            strcpy(q_cproxy_host, "\"");
-            strcat(q_cproxy_host, cproxy_host);
-            strcat(q_cproxy_host, "\"");
+            wrbuf_puts(cproxy_host, "\"");
+            wrbuf_puts(cproxy_host, b->cproxy_host.c_str());
+            wrbuf_puts(cproxy_host, "/\"");
 
             xsl_parms[0] = "cproxyhost";
-            xsl_parms[1] = q_cproxy_host;
+            xsl_parms[1] = wrbuf_cstr(cproxy_host);
             xsl_parms[2] = 0;
         }
         else
         {
             xsl_parms[0] = 0;
-            *cproxy_host = '\0';
         }
 
         char *odr_database = odr_strdup(odr,
@@ -1505,9 +1589,30 @@ Z_Records *yf::Zoom::Frontend::get_records(Package &package,
                     }
                     else
                     { 
+                        // first stage XSLT - per target
                         xsltStylesheetPtr xsp = b->xsp;
                         xmlDoc *rec_res = xsltApplyStylesheet(xsp, rec_doc,
                                                               xsl_parms);
+                        // insert generated-url
+                        if (rec_res)
+                        {
+                            std::string res = 
+                                mp::xml::url_recipe_handle(rec_res,
+                                                           b->sptr->urlRecipe);
+                            if (res.length())
+                            {
+                                xmlNode *ptr = xmlDocGetRootElement(rec_res);
+                                while (ptr && ptr->type != XML_ELEMENT_NODE)
+                                    ptr = ptr->next;
+                                xmlNode *c = 
+                                    xmlNewChild(ptr, 0, BAD_CAST "metadata", 0);
+                                xmlNewProp(c, BAD_CAST "type", BAD_CAST
+                                           "generated-url");
+                                xmlNode * t = xmlNewText(BAD_CAST res.c_str());
+                                xmlAddChild(c, t);
+                            }
+                        }
+                        // second stage XSLT - common
                         if (rec_res && m_p->record_xsp &&
                             enable_record_transform)
                         {
@@ -1518,12 +1623,12 @@ Z_Records *yf::Zoom::Frontend::get_records(Package &package,
                                                           xsl_parms);
                             xmlFreeDoc(tmp_doc);
                         }
+                        // get result out of it
                         if (rec_res)
                         {
                             xsltSaveResultToString(&xmlrec_buf, &rec_len,
                                                    rec_res, xsp);
                             rec_buf = (const char *) xmlrec_buf;
-                            package.log("zoom", YLOG_LOG, "xslt successful");
                             package.log_write(rec_buf, rec_len);
 
                             xmlFreeDoc(rec_res);
@@ -1544,37 +1649,6 @@ Z_Records *yf::Zoom::Frontend::get_records(Package &package,
                     }
                 }
 
-                if (rec_buf)
-                {
-                    xmlDoc *doc = xmlParseMemory(rec_buf, rec_len);
-                    std::string res = 
-                        mp::xml::url_recipe_handle(doc, b->sptr->urlRecipe);
-                    if (res.length() && *cproxy_host)
-                    {
-                        size_t off = res.find_first_of("://");
-                        if (off != std::string::npos)
-                            res.insert(off + 3, cproxy_host);
-                    }
-                    if (res.length())
-                    {
-                        xmlNode *ptr = xmlDocGetRootElement(doc);
-                        while (ptr && ptr->type != XML_ELEMENT_NODE)
-                            ptr = ptr->next;
-                        xmlNode *c = 
-                            xmlNewChild(ptr, 0, BAD_CAST "metadata", 0);
-                        xmlNewProp(c, BAD_CAST "type", BAD_CAST
-                                   "generated-url");
-                        xmlNode * t = xmlNewText(BAD_CAST res.c_str());
-                        xmlAddChild(c, t);
-
-                        if (xmlrec_buf)
-                            xmlFree(xmlrec_buf);
-
-                        xmlDocDumpMemory(doc, &xmlrec_buf, &rec_len);
-                        rec_buf = (const char *) xmlrec_buf;
-                    }
-                    xmlFreeDoc(doc);
-                }
                 if (!npr)
                 {
                     if (!rec_buf)
@@ -1797,20 +1871,18 @@ next_proxy:
 
     Odr_int hits = 0;
     Z_Query *query = sr->query;
-    WRBUF ccl_wrbuf = 0;
-    WRBUF pqf_wrbuf = 0;
+    mp::wrbuf ccl_wrbuf;
+    mp::wrbuf pqf_wrbuf;
     std::string sortkeys;
 
     if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101)
     {
         // RPN
-        pqf_wrbuf = wrbuf_alloc();
         yaz_rpnquery_to_wrbuf(pqf_wrbuf, query->u.type_1);
     }
     else if (query->which == Z_Query_type_2)
     {
         // CCL
-        ccl_wrbuf = wrbuf_alloc();
         wrbuf_write(ccl_wrbuf, (const char *) query->u.type_2->buf,
                     query->u.type_2->len);
     }
@@ -1863,7 +1935,7 @@ next_proxy:
             return;
         }
 
-        WRBUF sru_sortkeys_wrbuf = wrbuf_alloc();
+        mp::wrbuf sru_sortkeys_wrbuf;
         if (cql_sortby_to_sortkeys(cn, wrbuf_vp_puts, sru_sortkeys_wrbuf))
         {
             error = YAZ_BIB1_ILLEGAL_SORT_RELATION;
@@ -1872,16 +1944,12 @@ next_proxy:
             log_diagnostic(package, error, addinfo);
             apdu_res = odr.create_searchResponse(apdu_req, error, addinfo);
             package.response() = apdu_res;
-            wrbuf_destroy(sru_sortkeys_wrbuf);
             cql_parser_destroy(cp);
             return;
         }
-        WRBUF sort_spec_wrbuf = wrbuf_alloc();
+        mp::wrbuf sort_spec_wrbuf;
         yaz_srw_sortkeys_to_sort_spec(wrbuf_cstr(sru_sortkeys_wrbuf),
                                       sort_spec_wrbuf);
-        wrbuf_destroy(sru_sortkeys_wrbuf);
-
-        ccl_wrbuf = wrbuf_alloc();
         wrbuf_puts(ccl_wrbuf, ccl_buf);
         
         yaz_tok_cfg_t tc = yaz_tok_cfg_create();
@@ -1916,8 +1984,6 @@ next_proxy:
             }
         }
         yaz_tok_parse_destroy(tp);
-        wrbuf_destroy(sort_spec_wrbuf);
-
         cql_parser_destroy(cp);
     }
     else
@@ -1930,19 +1996,18 @@ next_proxy:
         return;
     }
 
-    if (ccl_wrbuf)
+    if (ccl_wrbuf.len())
     {
         // CCL to PQF
-        assert(pqf_wrbuf == 0);
+        assert(pqf_wrbuf.len() == 0);
         int cerror, cpos;
         struct ccl_rpn_node *cn;
         package.log("zoom", YLOG_LOG, "CCL: %s", wrbuf_cstr(ccl_wrbuf));
         cn = ccl_find_str(b->sptr->ccl_bibset, wrbuf_cstr(ccl_wrbuf),
                           &cerror, &cpos);
-        wrbuf_destroy(ccl_wrbuf);
         if (!cn)
         {
-            char *addinfo = odr_strdup(odr, ccl_err_msg(cerror));
+            char *addinfo = odr_strdup_null(odr, ccl_err_msg(cerror));
             error = YAZ_BIB1_MALFORMED_QUERY;
 
             switch (cerror)
@@ -1965,13 +2030,12 @@ next_proxy:
             package.response() = apdu_res;
             return;
         }
-        pqf_wrbuf = wrbuf_alloc();
         ccl_pquery(pqf_wrbuf, cn);
         package.log("zoom", YLOG_LOG, "RPN: %s", wrbuf_cstr(pqf_wrbuf));
         ccl_rpn_delete(cn);
     }
     
-    assert(pqf_wrbuf);
+    assert(pqf_wrbuf.len());
 
     ZOOM_query q = ZOOM_query_create();
     ZOOM_query_sortby2(q, b->sptr->sortStrategy.c_str(), sortkeys.c_str());
@@ -1981,7 +2045,7 @@ next_proxy:
         int status = 0;
         Z_RPNQuery *zquery;
         zquery = p_query_rpn(odr, wrbuf_cstr(pqf_wrbuf));
-        WRBUF wrb = wrbuf_alloc();
+        mp::wrbuf wrb;
             
         if (!strcmp(b->get_option("sru"), "solr"))
         {
@@ -2007,8 +2071,6 @@ next_proxy:
         }
         ZOOM_query_destroy(q);
         
-        wrbuf_destroy(wrb);
-        wrbuf_destroy(pqf_wrbuf);
         if (status)
         {
             error = YAZ_BIB1_MALFORMED_QUERY;
@@ -2025,7 +2087,6 @@ next_proxy:
         package.log("zoom", YLOG_LOG, "search PQF: %s", wrbuf_cstr(pqf_wrbuf));
         b->search(q, &hits, &error, &addinfo, odr);
         ZOOM_query_destroy(q);
-        wrbuf_destroy(pqf_wrbuf);
     }
 
     if (error && proxy_step)