* Copyright (C) 2005-2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: record_conv.c,v 1.1 2006-05-02 20:47:45 adam Exp $
+ * $Id: record_conv.c,v 1.2 2006-05-03 13:04:46 adam Exp $
*/
/**
* \file record_conv.c
#include <config.h>
#endif
-#if HAVE_XML2
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#endif
-
#include <string.h>
-
+#include <yaz/yaz-iconv.h>
+#include <yaz/marcdisp.h>
#include <yaz/record_conv.h>
#include <yaz/wrbuf.h>
#include <yaz/xmalloc.h>
#include <yaz/nmem.h>
+#include <yaz/tpath.h>
+
+#if HAVE_XSLT
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xinclude.h>
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
/** \brief The internal structure for yaz_record_conv_t */
struct yaz_record_conv_struct {
/** string buffer for error messages */
WRBUF wr_error;
+
+ /** path for opening files */
+ char *path;
};
/** \brief tranformation types (rule types) */
enum YAZ_RECORD_CONV_RULE
{
YAZ_RECORD_CONV_RULE_XSLT,
- YAZ_RECORD_CONV_RULE_MARC_TO_XML,
- YAZ_RECORD_CONV_RULE_XML_TO_MARC
+ YAZ_RECORD_CONV_RULE_MARC
};
/** \brief tranformation info (rule info) */
enum YAZ_RECORD_CONV_RULE which;
union {
struct {
- const char *stylesheet;
+ xsltStylesheetPtr xsp;
+ int dummy;
} xslt;
struct {
- const char *charset;
- } marc_to_xml;
- struct {
- const char *charset;
- } xml_to_marc;
+ yaz_iconv_t iconv_t;
+ int input_format;
+ int output_format;
+ } marc;
} u;
struct yaz_record_conv_rule *next;
};
+/** reset rules+configuration */
+static void yaz_record_conv_reset(yaz_record_conv_t p)
+{
+ struct yaz_record_conv_rule *r;
+ for (r = p->rules; r; r = r->next)
+ {
+ if (r->which == YAZ_RECORD_CONV_RULE_MARC)
+ {
+ if (r->u.marc.iconv_t)
+ yaz_iconv_close(r->u.marc.iconv_t);
+ }
+ else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
+ {
+ xsltFreeStylesheet(r->u.xslt.xsp);
+ }
+ }
+ wrbuf_rewind(p->wr_error);
+ nmem_reset(p->nmem);
+
+ p->rules = 0;
+
+ p->rules_p = &p->rules;
+}
+
yaz_record_conv_t yaz_record_conv_create()
{
yaz_record_conv_t p = xmalloc(sizeof(*p));
p->nmem = nmem_create();
p->wr_error = wrbuf_alloc();
+ p->rules = 0;
+ p->path = 0;
+
+ yaz_record_conv_reset(p);
return p;
}
{
if (p)
{
+ yaz_record_conv_reset(p);
nmem_destroy(p->nmem);
wrbuf_free(p->wr_error, 1);
+ xfree(p->path);
xfree(p);
}
}
-#if HAVE_XML2
static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
enum YAZ_RECORD_CONV_RULE type)
{
return r;
}
-static void yaz_record_conv_reset(yaz_record_conv_t p)
-{
- wrbuf_rewind(p->wr_error);
- nmem_reset(p->nmem);
- p->rules = 0;
- p->rules_p = &p->rules;
-}
-
static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
{
struct _xmlAttr *attr;
return -1;
}
}
- if (stylesheet)
+ if (!stylesheet)
{
- struct yaz_record_conv_rule *r =
- add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
- r->u.xslt.stylesheet = nmem_strdup(p->nmem, stylesheet);
- return 0;
+ wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
+ return -1;
}
- wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
- return -1;
-}
-
-static int conv_marc_to_xml(yaz_record_conv_t p, const xmlNode *ptr)
-{
- struct _xmlAttr *attr;
- const char *charset = 0;
- struct yaz_record_conv_rule *r;
-
- for (attr = ptr->properties; attr; attr = attr->next)
+ else
{
- if (!xmlStrcmp(attr->name, BAD_CAST "charset") &&
- attr->children && attr->children->type == XML_TEXT_NODE)
- charset = (const char *) attr->children->content;
- else
+ char fullpath[1024];
+ xsltStylesheetPtr xsp;
+ if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
{
- wrbuf_printf(p->wr_error, "Bad attribute '%s'."
- "Expected charset.", attr->name);
+ wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
+ stylesheet, p->path);
+ return -1;
+ }
+ xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
+ if (!xsp)
+ {
+ wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
return -1;
}
+ else
+ {
+ struct yaz_record_conv_rule *r =
+ add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
+ r->u.xslt.xsp = xsp;
+ }
}
- r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC_TO_XML);
- if (charset)
- r->u.marc_to_xml.charset = nmem_strdup(p->nmem, charset);
- else
- r->u.marc_to_xml.charset = 0;
return 0;
}
-static int conv_xml_to_marc(yaz_record_conv_t p, const xmlNode *ptr)
+static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
{
struct _xmlAttr *attr;
- const char *charset = 0;
+ const char *input_charset = 0;
+ const char *output_charset = 0;
+ const char *input_format = 0;
+ const char *output_format = 0;
+ int input_format_mode = 0;
+ int output_format_mode = 0;
struct yaz_record_conv_rule *r;
+ yaz_iconv_t cd = 0;
for (attr = ptr->properties; attr; attr = attr->next)
{
- if (!xmlStrcmp(attr->name, BAD_CAST "charset") &&
+ if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
+ attr->children && attr->children->type == XML_TEXT_NODE)
+ input_charset = (const char *) attr->children->content;
+ else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
attr->children && attr->children->type == XML_TEXT_NODE)
- charset = (const char *) attr->children->content;
+ output_charset = (const char *) attr->children->content;
+ else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
+ attr->children && attr->children->type == XML_TEXT_NODE)
+ input_format = (const char *) attr->children->content;
+ else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
+ attr->children && attr->children->type == XML_TEXT_NODE)
+ output_format = (const char *) attr->children->content;
else
{
- wrbuf_printf(p->wr_error, "Bad attribute '%s'."
- "Expected charset.", attr->name);
+ wrbuf_printf(p->wr_error, "Bad attribute '%s'.", attr->name);
return -1;
}
}
- r = add_rule(p, YAZ_RECORD_CONV_RULE_XML_TO_MARC);
- if (charset)
- r->u.xml_to_marc.charset = nmem_strdup(p->nmem, charset);
+ if (!input_format)
+ {
+ wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
+ return -1;
+ }
+ else if (!strcmp(input_format, "marc"))
+ {
+ input_format_mode = YAZ_MARC_ISO2709;
+ }
+ else if (!strcmp(input_format, "xml"))
+ {
+ input_format_mode = YAZ_MARC_MARCXML;
+ /** Libxml2 generates UTF-8 encoding by default .
+ So we convert from UTF-8 to outputcharset (if defined)
+ */
+ if (!input_charset && output_charset)
+ input_charset = "utf-8";
+ }
+ else
+ {
+ wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
+ return -1;
+ }
+
+ if (!output_format)
+ {
+ wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
+ return -1;
+ }
+ else if (!strcmp(output_format, "line"))
+ {
+ output_format_mode = YAZ_MARC_LINE;
+ }
+ else if (!strcmp(output_format, "marcxml"))
+ {
+ output_format_mode = YAZ_MARC_MARCXML;
+ if (input_charset && !output_charset)
+ output_charset = "utf-8";
+ }
+ else if (!strcmp(output_format, "marc"))
+ {
+ output_format_mode = YAZ_MARC_ISO2709;
+ }
+ else if (!strcmp(output_format, "marcxchange"))
+ {
+ output_format_mode = YAZ_MARC_XCHANGE;
+ if (input_charset && !output_charset)
+ output_charset = "utf-8";
+ }
else
- r->u.xml_to_marc.charset = 0;
+ {
+ wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
+ return -1;
+ }
+ if (input_charset && output_charset)
+ {
+ cd = yaz_iconv_open(output_charset, input_charset);
+ if (!cd)
+ {
+ wrbuf_printf(p->wr_error, "Unsupported character set mamping"
+ " inputcharset=%s outputcharset=%s",
+ input_charset, output_charset);
+ return -1;
+ }
+ }
+ else if (input_charset)
+ {
+ wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
+ return -1;
+ }
+ else if (output_charset)
+ {
+ wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
+ return -1;
+ }
+ r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
+ r->u.marc.iconv_t = cd;
+
+ r->u.marc.input_format = input_format_mode;
+ r->u.marc.output_format = output_format_mode;
return 0;
}
-
int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
{
const xmlNode *ptr = ptr_v;
if (conv_xslt(p, ptr))
return -1;
}
- else if (!strcmp((const char *) ptr->name, "marc_to_xml"))
- {
- if (conv_marc_to_xml(p, ptr))
- return -1;
- }
- else if (!strcmp((const char *) ptr->name, "xml_to_marc"))
+ else if (!strcmp((const char *) ptr->name, "marc"))
{
- if (conv_xml_to_marc(p, ptr))
+ if (conv_marc(p, ptr))
return -1;
}
else
{
wrbuf_printf(p->wr_error, "Bad element '%s'."
- "Expected xslt, marc_to_xml,...", ptr->name);
+ "Expected marc, xslt, ..", ptr->name);
return -1;
}
}
return 0;
}
+int yaz_record_conv_record(yaz_record_conv_t p, const char *input_record,
+ WRBUF output_record)
+{
+ int ret = 0;
+ WRBUF record = output_record; /* pointer transfer */
+ struct yaz_record_conv_rule *r = p->rules;
+ wrbuf_rewind(p->wr_error);
+
+ wrbuf_puts(record, input_record);
+ for (; ret == 0 && r; r = r->next)
+ {
+ if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
+ {
+ xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
+ wrbuf_len(record));
+ if (!doc)
+ {
+ wrbuf_printf(p->wr_error, "xmlParseMemory failed");
+ ret = -1;
+ }
+ else
+ {
+ xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
+ if (res)
+ {
+ xmlChar *out_buf;
+ int out_len;
+ xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
+
+ wrbuf_rewind(record);
+ wrbuf_write(record, (const char *) out_buf, out_len);
+
+ xmlFree(out_buf);
+ xmlFreeDoc(res);
+ }
+ else
+ {
+ wrbuf_printf(p->wr_error, "xsltApplyStylesheet faailed");
+ ret = -1;
+ }
+ xmlFreeDoc(doc);
+ }
+ }
+ else if (r->which == YAZ_RECORD_CONV_RULE_MARC)
+ {
+ yaz_marc_t mt = yaz_marc_create();
+
+ yaz_marc_xml(mt, r->u.marc.output_format);
+
+ if (r->u.marc.iconv_t)
+ yaz_marc_iconv(mt, r->u.marc.iconv_t);
+ if (r->u.marc.input_format == YAZ_MARC_ISO2709)
+ {
+ int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
+ wrbuf_len(record));
+ if (sz > 0)
+ ret = 0;
+ else
+ ret = -1;
+ }
+ else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
+ {
+ xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
+ wrbuf_len(record));
+ if (!doc)
+ {
+ wrbuf_printf(p->wr_error, "xmlParseMemory failed");
+ ret = -1;
+ }
+ else
+ {
+ ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
+ if (ret)
+ wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
+ }
+ xmlFreeDoc(doc);
+ }
+ else
+ {
+ wrbuf_printf(p->wr_error, "unsupported input format");
+ ret = -1;
+ }
+ if (ret == 0)
+ {
+ wrbuf_rewind(record);
+ ret = yaz_marc_write_mode(mt, record);
+ if (ret)
+ wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
+ }
+ yaz_marc_destroy(mt);
+ }
+ }
+ return ret;
+}
+
#else
-/* HAVE_XML2 */
+/* !HAVE_XSLT */
int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
{
wrbuf_rewind(p->wr_error);
- wrbuf_printf(p->wr_error, "No XML support for yaz_record_conv");
+ wrbuf_printf(p->wr_error, "No XML support: yaz_record_conv_configure");
+ return -1;
+}
+
+int yaz_record_conv_record(yaz_record_conv_t p, const char *input_record,
+ WRBUF output_record);
+{
+ wrbuf_rewind(p->wr_error);
+ wrbuf_printf(p->wr_error, "No XML support: yaz_record_conv_record");
return -1;
}
return wrbuf_buf(p->wr_error);
}
+void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
+{
+ if (p)
+ {
+ xfree(p->path);
+ p->path = 0;
+ if (path)
+ p->path = xstrdup(path);
+ }
+}
+
/*
* Local variables:
* c-basic-offset: 4
* Copyright (C) 2005-2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: tst_record_conv.c,v 1.1 2006-05-02 20:47:46 adam Exp $
+ * $Id: tst_record_conv.c,v 1.2 2006-05-03 13:04:46 adam Exp $
*
*/
#include <yaz/record_conv.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
-yaz_record_conv_t conv_from_xml(const char *xmlstring, WRBUF w)
+yaz_record_conv_t conv_configure(const char *xmlstring, WRBUF w)
{
xmlDocPtr doc = xmlParseMemory(xmlstring, strlen(xmlstring));
if (!doc)
{
xmlNodePtr ptr = xmlDocGetRootElement(doc);
yaz_record_conv_t p = yaz_record_conv_create();
-
+
+ if (p)
+ {
+ const char *srcdir = getenv("srcdir");
+ if (srcdir)
+ yaz_record_conv_set_path(p, srcdir);
+ }
if (!ptr)
{
wrbuf_printf(w, "xmlDocGetRootElement");
}
}
-int conv_from_xml_compare(const char *xmlstring, const char *expect_error,
- yaz_record_conv_t *pt)
+int conv_configure_test(const char *xmlstring, const char *expect_error,
+ yaz_record_conv_t *pt)
{
WRBUF w = wrbuf_alloc();
int ret;
- yaz_record_conv_t p = conv_from_xml(xmlstring, w);
+ yaz_record_conv_t p = conv_configure(xmlstring, w);
if (!p)
{
if (expect_error && !strcmp(wrbuf_buf(w), expect_error))
ret = 1;
else
+ {
ret = 0;
+ printf("%.*s\n", wrbuf_len(w), wrbuf_buf(w));
+ }
}
else
{
return ret;
}
-static void tst()
+static void tst_configure()
+{
+ YAZ_CHECK(conv_configure_test("<bad", "xmlParseMemory", 0));
+ YAZ_CHECK(conv_configure_test("<bad/>", "Missing 'convert' element", 0));
+ YAZ_CHECK(conv_configure_test("<convert/>", 0, 0));
+ YAZ_CHECK(conv_configure_test("<convert><bad/></convert>",
+ "Bad element 'bad'."
+ "Expected marc, xslt, ..", 0));
+ YAZ_CHECK(conv_configure_test("<convert>"
+ "<xslt stylesheet=\"tst_record_conv.xsl\"/>"
+ "<marc"
+ " inputcharset=\"marc-8\""
+ " outputcharset=\"marc-8\""
+ "/>"
+ "</convert>",
+ "Attribute 'inputformat' required", 0));
+ YAZ_CHECK(conv_configure_test("<convert>"
+ "<xslt stylesheet=\"tst_record_conv.xsl\"/>"
+ "<marc"
+ " inputcharset=\"utf-8\""
+ " outputcharset=\"marc-8\""
+ " inputformat=\"xml\""
+ " outputformat=\"marc\""
+ "/>"
+ "</convert>",
+ 0, 0));
+}
+
+static int conv_convert_test(yaz_record_conv_t p,
+ const char *input_record,
+ const char *output_expect_record)
+{
+ int ret = 0;
+ if (!p)
+ {
+ YAZ_CHECK(ret);
+ }
+ else
+ {
+ WRBUF output_record = wrbuf_alloc();
+ int r = yaz_record_conv_record(p, input_record, output_record);
+ if (r)
+ {
+ if (output_expect_record)
+ {
+ printf("yaz_record_conv error=%s\n",
+ yaz_record_conv_get_error(p));
+ ret = 0;
+ }
+ else
+ ret = 1;
+ }
+ else
+ {
+ if (!output_expect_record)
+ {
+ ret = 0;
+ }
+ else if (strlen(output_expect_record) != wrbuf_len(output_record))
+ {
+ int expect_len = strlen(output_expect_record);
+ ret = 0;
+ printf("output_record expect-len=%d got-len=%d\n", expect_len,
+ wrbuf_len(output_record));
+ printf("got-output_record = %.*s\n",
+ wrbuf_len(output_record), wrbuf_buf(output_record));
+ printf("output_expect_record = %s\n",
+ output_expect_record);
+ }
+ else if (memcmp(output_expect_record, wrbuf_buf(output_record),
+ strlen(output_expect_record)))
+ {
+ ret = 0;
+ printf("got-output_record = %.*s\n",
+ wrbuf_len(output_record), wrbuf_buf(output_record));
+ printf("output_expect_record = %s\n",
+ output_expect_record);
+ }
+ else
+ {
+ ret = 1;
+ }
+ }
+ wrbuf_free(output_record, 1);
+ }
+ return ret;
+}
+
+static void tst_convert()
{
- YAZ_CHECK(conv_from_xml_compare("<bad", "xmlParseMemory", 0));
- YAZ_CHECK(conv_from_xml_compare("<bad/>", "Missing 'convert' element", 0));
- YAZ_CHECK(conv_from_xml_compare("<convert/>", 0, 0));
- YAZ_CHECK(conv_from_xml_compare("<convert><bad/></convert>",
- "Bad element 'bad'."
- "Expected xslt, marc_to_xml,...", 0));
- YAZ_CHECK(conv_from_xml_compare("<convert>"
- "<xslt stylesheet=\"x.xsl\"/>"
- "<marc_to_xml charset=\"marc-8\"/>"
- "</convert>",
- 0, 0));
+ yaz_record_conv_t p = 0;
+ const char *marcxml_rec =
+ "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
+ " <leader>00080nam a22000498a 4500</leader>\n"
+ " <controlfield tag=\"001\"> 11224466 </controlfield>\n"
+ " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
+ " <subfield code=\"a\"> 11224466 </subfield>\n"
+ " </datafield>\n"
+ "</record>\n";
+ const char *iso2709_rec =
+ "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
+ "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
+ "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
+ "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
+ "\x1F\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x1D";
+
+ YAZ_CHECK(conv_configure_test("<convert>"
+ "<marc"
+ " inputcharset=\"utf-8\""
+ " outputcharset=\"marc-8\""
+ " inputformat=\"xml\""
+ " outputformat=\"marc\""
+ "/>"
+ "</convert>",
+ 0, &p));
+ YAZ_CHECK(conv_convert_test(p, marcxml_rec, iso2709_rec));
+
+ YAZ_CHECK(conv_configure_test("<convert>"
+ "<marc"
+ " outputcharset=\"utf-8\""
+ " inputcharset=\"marc-8\""
+ " outputformat=\"marcxml\""
+ " inputformat=\"marc\""
+ "/>"
+ "</convert>",
+ 0, &p));
+ YAZ_CHECK(conv_convert_test(p, iso2709_rec, marcxml_rec));
+
+
+ YAZ_CHECK(conv_configure_test("<convert>"
+ "<xslt stylesheet=\"tst_record_conv.xsl\"/>"
+ "<xslt stylesheet=\"tst_record_conv.xsl\"/>"
+ "<marc"
+ " inputcharset=\"utf-8\""
+ " outputcharset=\"marc-8\""
+ " inputformat=\"xml\""
+ " outputformat=\"marc\""
+ "/>"
+ "<marc"
+ " outputcharset=\"utf-8\""
+ " inputcharset=\"marc-8\""
+ " outputformat=\"marcxml\""
+ " inputformat=\"marc\""
+ "/>"
+ "</convert>",
+ 0, &p));
+ YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
+
+
+
+ YAZ_CHECK(conv_configure_test("<convert>"
+ "<xslt stylesheet=\"tst_record_conv.xsl\"/>"
+ "<xslt stylesheet=\"tst_record_conv.xsl\"/>"
+ "<marc"
+ " outputcharset=\"marc-8\""
+ " inputformat=\"xml\""
+ " outputformat=\"marc\""
+ "/>"
+ "<marc"
+ " inputcharset=\"marc-8\""
+ " outputformat=\"marcxml\""
+ " inputformat=\"marc\""
+ "/>"
+ "</convert>",
+ 0, &p));
+ YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
+
}
+
#endif
int main(int argc, char **argv)
{
YAZ_CHECK_INIT(argc, argv);
#if HAVE_XML2
- tst();
+ tst_configure();
+ tst_convert();
#endif
YAZ_CHECK_TERM;
}