New utility yaz_url: fetches HTTP content
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 10 Aug 2011 13:03:53 +0000 (15:03 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 10 Aug 2011 13:03:53 +0000 (15:03 +0200)
This is a simple wrapper around COMSTACK and HTTP utilities; deals
with proxy'ing.

include/yaz/Makefile.am
include/yaz/url.h [new file with mode: 0644]
src/Makefile.am
src/url.c [new file with mode: 0644]
util/Makefile.am
util/yaz-url.c [new file with mode: 0644]
win/makefile

index 823a8dd..d391988 100644 (file)
@@ -24,7 +24,7 @@ pkginclude_HEADERS= backend.h base64.h \
  zes-psched.h zes-admin.h zes-pset.h zes-update.h zes-update0.h \
  zoom.h z-charneg.h charneg.h soap.h srw.h zgdu.h matchstr.h json.h \
  file_glob.h dirent.h thread_id.h gettimeofday.h shptr.h thread_create.h \
- spipe.h stemmer.h
+ spipe.h stemmer.h url.h
 
 EXTRA_DIST = yaz-version.h.in
 
diff --git a/include/yaz/url.h b/include/yaz/url.h
new file mode 100644 (file)
index 0000000..ac26727
--- /dev/null
@@ -0,0 +1,87 @@
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2011 Index Data.
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Index Data nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file url.h
+ * \brief Fetch URL utility
+ */
+
+#ifndef YAZ_URL_H
+#define YAZ_URL_H
+
+#include <yaz/zgdu.h>
+
+YAZ_BEGIN_CDECL
+
+/** \brief handle for our URL fetcher */
+typedef struct yaz_url *yaz_url_t;
+
+/** \brief creates a URL fetcher handle
+    \returns handle
+*/
+YAZ_EXPORT yaz_url_t yaz_url_create(void);
+
+/** \brief destroys a URL fetcher
+    \param p handle
+    
+    Note: OK to pass NULL as p
+*/
+YAZ_EXPORT void yaz_url_destroy(yaz_url_t p);
+
+/** \brief sets proxy for URL fetcher
+    \param p handle
+    \param proxy proxy address , e.g "localhost:3128"
+
+    Passing a proxy of NULL disables proxy use.
+*/
+YAZ_EXPORT void yaz_url_set_proxy(yaz_url_t p, const char *proxy);
+
+/** \brief executes the actual HTTP request (including redirects, etc)
+    \param p handle
+    \param uri URL
+    \param method HTTP method
+    \param headers HTTP headers to be used (NULL for no custom headers)
+    \param buf content buffer for HTTP request, NULL for empty content
+    \param len content length for HTTP request
+    \returns HTTP response; NULL on ERROR.
+*/
+YAZ_EXPORT Z_HTTP_Response *yaz_url_exec(yaz_url_t p, const char *uri,
+                                         const char *method,
+                                         Z_HTTP_Header *headers,
+                                         const char *buf, size_t len);
+YAZ_END_CDECL
+
+#endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index 8f66427..b7d4369 100644 (file)
@@ -107,7 +107,7 @@ libyaz_la_SOURCES=base64.c version.c options.c log.c \
   iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \
   iconv_decode_marc8.c iconv_decode_iso5426.c iconv_decode_danmarc.c sc.c \
   json.c xml_include.c file_glob.c dirent.c mutex-p.h mutex.c condvar.c \
-  thread_id.c gettimeofday.c thread_create.c spipe.c
+  thread_id.c gettimeofday.c thread_create.c spipe.c url.c
 
 libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO)
 
diff --git a/src/url.c b/src/url.c
new file mode 100644 (file)
index 0000000..eb0ed58
--- /dev/null
+++ b/src/url.c
@@ -0,0 +1,156 @@
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2011 Index Data
+ * See the file LICENSE for details.
+ */
+/**
+ * \file url.c
+ * \brief URL fetch utility
+ */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <yaz/url.h>
+#include <yaz/comstack.h>
+#include <yaz/log.h>
+
+struct yaz_url {
+    ODR odr_in;
+    ODR odr_out;
+    char *proxy;
+};
+
+yaz_url_t yaz_url_create(void)
+{
+    yaz_url_t p = xmalloc(sizeof(*p));
+    p->odr_in = odr_createmem(ODR_DECODE);
+    p->odr_out = odr_createmem(ODR_ENCODE);
+    p->proxy = 0;
+    return p;
+}
+
+void yaz_url_destroy(yaz_url_t p)
+{
+    if (p)
+    {
+        odr_destroy(p->odr_in);
+        odr_destroy(p->odr_out);
+        xfree(p->proxy);
+        xfree(p);
+    }
+}
+
+void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
+{
+    xfree(p->proxy);
+    p->proxy = 0;
+    if (proxy && *proxy)
+        p->proxy = xstrdup(proxy);
+}
+
+Z_HTTP_Response *yaz_url_exec(yaz_url_t p, const char *uri,
+                              const char *method,
+                              Z_HTTP_Header *headers,
+                              const char *buf, size_t len)
+{
+    Z_HTTP_Response *res = 0;
+    int number_of_redirects = 0;
+
+    while (1)
+    {
+        void *add;
+        COMSTACK conn = 0;
+        int code;
+        struct Z_HTTP_Header **last_header_entry;
+        const char *location = 0;
+        Z_GDU *gdu = z_get_HTTP_Request_uri(p->odr_out, uri, 0,
+                                            p->proxy ? 1 : 0);
+        gdu->u.HTTP_Request->method = odr_strdup(p->odr_out, method);
+
+        res = 0;
+        last_header_entry = &gdu->u.HTTP_Request->headers;
+        while (*last_header_entry)
+            last_header_entry = &(*last_header_entry)->next;
+        *last_header_entry = headers; /* attach user headers */
+
+        if (buf && len)
+        {
+            gdu->u.HTTP_Request->content_buf = (char *) buf;
+            gdu->u.HTTP_Request->content_len = len;
+        }
+        if (!z_GDU(p->odr_out, &gdu, 0, 0))
+        {
+            yaz_log(YLOG_WARN, "Can not encode HTTP request URL:%s", uri);
+            return 0;
+        }
+        conn = cs_create_host_proxy(uri, 1, &add, p->proxy);
+        if (!conn)
+        {
+            yaz_log(YLOG_WARN, "Bad address for URL:%s", uri);
+        }
+        else if (cs_connect(conn, add) < 0)
+        {
+            yaz_log(YLOG_WARN, "Can not connect to URL:%s", uri);
+        }
+        else
+        {
+            int len;
+            char *buf = odr_getbuf(p->odr_out, &len, 0);
+            
+            if (cs_put(conn, buf, len) < 0)
+                yaz_log(YLOG_WARN, "cs_put failed URL:%s", uri);
+            else
+            {
+                char *netbuffer = 0;
+                int netlen = 0;
+                int cs_res = cs_get(conn, &netbuffer, &netlen);
+                if (cs_res <= 0)
+                {
+                    yaz_log(YLOG_WARN, "cs_get failed URL:%s", uri);
+                }
+                else
+                {
+                    Z_GDU *gdu;
+                    odr_setbuf(p->odr_in, netbuffer, cs_res, 0);
+                    if (!z_GDU(p->odr_in, &gdu, 0, 0)
+                        || gdu->which != Z_GDU_HTTP_Response)
+                    {
+                        yaz_log(YLOG_WARN, "HTTP decoding failed "
+                                "URL:%s", uri);
+                    }
+                    else
+                    {
+                        res = gdu->u.HTTP_Response;
+                    }
+                }
+                xfree(netbuffer);
+            }
+        }
+        if (conn)
+            cs_close(conn);
+        if (!res)
+            break;
+        code = res->code;
+        location = z_HTTP_header_lookup(res->headers, "Location");
+        if (++number_of_redirects < 10 &&
+            location && (code == 301 || code == 302 || code == 307))
+        {
+            odr_reset(p->odr_out);
+            uri = odr_strdup(p->odr_out, location);
+            odr_reset(p->odr_in);
+        }
+        else
+            break;
+    }
+    return res;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index e2b519d..30029b3 100644 (file)
@@ -11,7 +11,7 @@ AM_CPPFLAGS=-I$(top_srcdir)/include $(XML2_CFLAGS) $(ICU_CPPFLAGS)
 
 bin_PROGRAMS = yaz-marcdump yaz-iconv yaz-illclient yaz-icu yaz-json-parse
 noinst_PROGRAMS = cclsh cql2pqf cql2xcql srwtst yaz-benchmark \
- yaz-xmlquery yaz-record-conv
+ yaz-xmlquery yaz-record-conv yaz-url
 
 # MARC dumper utility
 yaz_marcdump_SOURCES = marcdump.c
@@ -51,3 +51,7 @@ yaz_json_parse_LDADD = ../src/libyaz.la
 yaz_record_conv_SOURCES = yaz-record-conv.c
 yaz_record_conv_LDADD = ../src/libyaz.la
 
+yaz_url_SOURCES = yaz-url.c
+yaz_url_LDADD =../src/libyaz.la
+
+
diff --git a/util/yaz-url.c b/util/yaz-url.c
new file mode 100644 (file)
index 0000000..bdc912c
--- /dev/null
@@ -0,0 +1,136 @@
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2011 Index Data
+ * See the file LICENSE for details.
+ */
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <yaz/url.h>
+#include <yaz/options.h>
+#include <yaz/log.h>
+
+static void usage(void)
+{
+    printf("yaz-icu [options] url ..\n");
+    printf(" -H name=value       HTTP header\n");
+    printf(" -p file             POST content of file\n");
+    printf(" -u user/password    Basic HTTP auth\n");
+    printf(" -x proxy            HTTP proxy\n");
+    exit(1);
+}
+
+static char *get_file(const char *fname, size_t *len)
+{
+    char *buf = 0;
+    FILE *inf = fopen(fname, "rb");
+    if (!inf)
+    {
+        yaz_log(YLOG_FATAL|YLOG_ERRNO, "Could not open %s", fname);
+        exit(1);
+    }
+    if (fseek(inf, 0L, SEEK_END))
+    {
+        yaz_log(YLOG_FATAL|YLOG_ERRNO, "fseek of %s failed", fname);
+        exit(1);
+    }
+    *len = ftell(inf);
+    buf = xmalloc(*len);
+    fseek(inf, 0L, SEEK_SET);
+    fread(buf, 1, *len, inf);
+    fclose(inf);
+    return buf;
+}
+
+int main(int argc, char **argv)
+{
+    int ret;
+    char *arg;
+    yaz_url_t p = yaz_url_create();
+    char *post_buf = 0;
+    size_t post_len = 0;
+    const char *method = "GET";
+    Z_HTTP_Response *http_response;
+    Z_HTTP_Header *http_headers = 0;
+    ODR odr = odr_createmem(ODR_ENCODE);
+    int exit_code = 0;
+
+    while ((ret = options("hH:p:u:x:", argv, argc, &arg))
+           != YAZ_OPTIONS_EOF)
+    {
+        switch (ret)
+        {
+        case 'h':
+            usage();
+            break;
+        case 'H':
+            if (!strchr(arg, '='))
+            {
+                yaz_log(YLOG_FATAL, "bad header option (missing =): %s\n", arg);
+                exit_code = 1;
+            }
+            else
+            {
+                char *cp = strchr(arg, '=');
+                char *name = odr_malloc(odr, 1 + cp - arg);
+                char *value = cp + 1;
+                memcpy(name, arg, cp - arg);
+                name[cp - arg] = '\0';
+                z_HTTP_header_add(odr, &http_headers, name, value);
+            }
+            break;
+        case 'p':
+            xfree(post_buf);
+            post_buf = get_file(arg, &post_len);
+            method = "POST";
+            break;
+        case 'u':
+            if (strchr(arg, '/'))
+            {
+                char *cp = strchr(arg, '/');
+                char *user = odr_malloc(odr, 1 + cp - arg);
+                char *password = cp + 1;
+                memcpy(user, arg, cp - arg);
+                user[cp - arg] = '\0';
+                z_HTTP_header_add_basic_auth(odr, &http_headers, user,
+                                             password);
+            }
+            else
+                z_HTTP_header_add_basic_auth(odr, &http_headers, arg, 0);
+            break;
+        case 'x':
+            yaz_url_set_proxy(p, arg);
+            break;
+        case 0:
+            http_response = yaz_url_exec(p, arg, method, http_headers,
+                                         post_buf, post_len);
+            if (!http_response)
+                exit_code = 1;
+            else
+            {
+                fwrite(http_response->content_buf, 1,
+                       http_response->content_len, stdout);
+            }
+            break;
+        default:
+            usage();
+        }
+    }
+    yaz_url_destroy(p);
+    odr_destroy(odr);
+    exit(exit_code);
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index 3f8cc6f..51e6d1c 100644 (file)
@@ -42,7 +42,8 @@ BISON=bison
 
 default: all
 
-all: dirs generate dll sc_test client ztest yazicu zoomsh utilprog testprog iconv icu libxml2 libxslt
+all: dirs generate dll sc_test client ztest yazicu zoomsh utilprog \
+ testprog iconv icu libxml2 libxslt $(YAZ_URL)
 
 NSIS="c:\program files\nsis\makensis.exe"
 HHC="c:\program files\html help workshop\hhc.exe"
@@ -119,6 +120,7 @@ YAZ_COND_IMPLIB=$(LIBDIR)\yaz_cond4.lib
 
 CLIENT=$(BINDIR)\yaz-client.exe
 YAZ_ICU=$(BINDIR)\yaz-icu.exe
+YAZ_URL=$(BINDIR)\yaz-url.exe
 ZOOMSH=$(BINDIR)\zoomsh.exe
 ZTEST=$(BINDIR)\yaz-ztest.exe
 SC_TEST=$(BINDIR)\sc_test.exe
@@ -342,6 +344,7 @@ YAZ_CLIENT_OBJS= \
    $(OBJDIR)\fhistory.obj
 
 YAZ_ICU_OBJS= $(OBJDIR)\yaz-icu.obj 
+YAZ_URL_OBJS= $(OBJDIR)\yaz-url.obj 
 COND_DLL_OBJS= $(OBJDIR)\condvar.obj
 
 ZTEST_OBJS= \
@@ -540,7 +543,8 @@ MISC_OBJS= \
    $(OBJDIR)\xml_include.obj \
    $(OBJDIR)\file_glob.obj \
    $(OBJDIR)\thread_id.obj \
-   $(OBJDIR)\dirent.obj
+   $(OBJDIR)\dirent.obj \
+   $(OBJDIR)\url.obj
 
 Z3950_OBJS= \
    $(OBJDIR)\z-date.obj\
@@ -925,6 +929,9 @@ $(YAZ_ICU) : "$(BINDIR)" $(YAZ_ICU_OBJS) $(YAZ_ICU_DLL)
        $(LINK_PROGRAM) $(ICU_LIBS) $(YAZ_ICU_IMPLIB) $(YAZ_ICU_OBJS) /out:$@
        $(MT) -manifest $@.manifest -outputresource:$@;1
 
+$(YAZ_URL) : "$(BINDIR)" $(YAZ_URL_OBJS) $(YAZ_DLL)
+       $(LINK_PROGRAM) $(YAZ_URL_OBJS) /out:$@
+       $(MT) -manifest $@.manifest -outputresource:$@;1
 
 $(SC_TEST) : "$(BINDIR)" $(SC_TEST_OBJS) $(YAZ_DLL)
        $(LINK_PROGRAM) $(SC_TEST_OBJS) /out:$@