2 * Copyright (C) 2005-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: record_conv.c,v 1.16 2007/05/06 20:12:20 adam Exp $
9 * \brief Record Conversions utility
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
23 #include <yaz/tpath.h>
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
30 #include <libxslt/xsltutils.h>
31 #include <libxslt/transform.h>
34 #include <libexslt/exslt.h>
37 /** \brief The internal structure for yaz_record_conv_t */
38 struct yaz_record_conv_struct {
39 /** \brief memory for configuration */
42 /** \brief conversion rules (allocated using NMEM) */
43 struct yaz_record_conv_rule *rules;
45 /** \brief pointer to last conversion rule pointer in chain */
46 struct yaz_record_conv_rule **rules_p;
48 /** \brief string buffer for error messages */
51 /** \brief path for opening files */
55 /** \brief tranformation types (rule types) */
56 enum YAZ_RECORD_CONV_RULE
58 YAZ_RECORD_CONV_RULE_XSLT,
59 YAZ_RECORD_CONV_RULE_MARC
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65 enum YAZ_RECORD_CONV_RULE which;
69 xsltStylesheetPtr xsp;
78 struct yaz_record_conv_rule *next;
81 /** \brief reset rules+configuration */
82 static void yaz_record_conv_reset(yaz_record_conv_t p)
85 struct yaz_record_conv_rule *r;
86 for (r = p->rules; r; r = r->next)
88 if (r->which == YAZ_RECORD_CONV_RULE_MARC)
90 if (r->u.marc.iconv_t)
91 yaz_iconv_close(r->u.marc.iconv_t);
94 else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
96 xsltFreeStylesheet(r->u.xslt.xsp);
100 wrbuf_rewind(p->wr_error);
105 p->rules_p = &p->rules;
108 yaz_record_conv_t yaz_record_conv_create()
110 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
111 p->nmem = nmem_create();
112 p->wr_error = wrbuf_alloc();
119 yaz_record_conv_reset(p);
123 void yaz_record_conv_destroy(yaz_record_conv_t p)
127 yaz_record_conv_reset(p);
128 nmem_destroy(p->nmem);
129 wrbuf_destroy(p->wr_error);
135 /** \brief adds a rule */
136 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
137 enum YAZ_RECORD_CONV_RULE type)
139 struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
140 nmem_malloc(p->nmem, sizeof(*r));
144 p->rules_p = &r->next;
148 /** \brief parse 'xslt' conversion node */
149 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
152 struct _xmlAttr *attr;
153 const char *stylesheet = 0;
155 for (attr = ptr->properties; attr; attr = attr->next)
157 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
158 attr->children && attr->children->type == XML_TEXT_NODE)
159 stylesheet = (const char *) attr->children->content;
162 wrbuf_printf(p->wr_error, "Bad attribute '%s'"
163 "Expected stylesheet.", attr->name);
169 wrbuf_printf(p->wr_error, "Element <xslt>: "
170 "attribute 'stylesheet' expected");
176 xsltStylesheetPtr xsp;
177 if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
179 wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
180 " could not locate stylesheet '%s' with path '%s'",
181 stylesheet, fullpath, p->path);
184 xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
187 wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
188 " parsing stylesheet '%s' with path '%s' failed,"
192 " EXSLT not supported",
194 stylesheet, fullpath, p->path);
199 struct yaz_record_conv_rule *r =
200 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
206 wrbuf_printf(p->wr_error, "xslt unsupported."
207 " YAZ compiled without XSLT support");
212 /** \brief parse 'marc' conversion node */
213 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
215 struct _xmlAttr *attr;
216 const char *input_charset = 0;
217 const char *output_charset = 0;
218 const char *input_format = 0;
219 const char *output_format = 0;
220 int input_format_mode = 0;
221 int output_format_mode = 0;
222 struct yaz_record_conv_rule *r;
225 for (attr = ptr->properties; attr; attr = attr->next)
227 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
228 attr->children && attr->children->type == XML_TEXT_NODE)
229 input_charset = (const char *) attr->children->content;
230 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
231 attr->children && attr->children->type == XML_TEXT_NODE)
232 output_charset = (const char *) attr->children->content;
233 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
234 attr->children && attr->children->type == XML_TEXT_NODE)
235 input_format = (const char *) attr->children->content;
236 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
237 attr->children && attr->children->type == XML_TEXT_NODE)
238 output_format = (const char *) attr->children->content;
241 wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
242 "'inputformat', 'inputcharset', 'outputformat' or"
243 " 'outputcharset', got attribute '%s'",
250 wrbuf_printf(p->wr_error, "Element <marc>: "
251 "attribute 'inputformat' required");
254 else if (!strcmp(input_format, "marc"))
256 input_format_mode = YAZ_MARC_ISO2709;
258 else if (!strcmp(input_format, "xml"))
260 input_format_mode = YAZ_MARC_MARCXML;
261 /** Libxml2 generates UTF-8 encoding by default .
262 So we convert from UTF-8 to outputcharset (if defined)
264 if (!input_charset && output_charset)
265 input_charset = "utf-8";
269 wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
270 " Unsupported input format"
271 " defined by attribute value",
278 wrbuf_printf(p->wr_error,
279 "Element <marc>: attribute 'outputformat' required");
282 else if (!strcmp(output_format, "line"))
284 output_format_mode = YAZ_MARC_LINE;
286 else if (!strcmp(output_format, "marcxml"))
288 output_format_mode = YAZ_MARC_MARCXML;
289 if (input_charset && !output_charset)
290 output_charset = "utf-8";
292 else if (!strcmp(output_format, "marc"))
294 output_format_mode = YAZ_MARC_ISO2709;
296 else if (!strcmp(output_format, "marcxchange"))
298 output_format_mode = YAZ_MARC_XCHANGE;
299 if (input_charset && !output_charset)
300 output_charset = "utf-8";
304 wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
305 " Unsupported output format"
306 " defined by attribute value",
310 if (input_charset && output_charset)
312 cd = yaz_iconv_open(output_charset, input_charset);
315 wrbuf_printf(p->wr_error,
316 "Element <marc inputcharset='%s' outputcharset='%s'>:"
317 " Unsupported character set mapping"
318 " defined by attribute values",
319 input_charset, output_charset);
323 else if (input_charset)
325 wrbuf_printf(p->wr_error, "Element <marc>: "
326 "attribute 'outputcharset' missing");
329 else if (output_charset)
331 wrbuf_printf(p->wr_error, "Element <marc>: "
332 "attribute 'inputcharset' missing");
335 r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
336 r->u.marc.iconv_t = cd;
338 r->u.marc.input_format = input_format_mode;
339 r->u.marc.output_format = output_format_mode;
343 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
345 yaz_record_conv_reset(p);
347 /* parsing element children */
348 for (ptr = ptr->children; ptr; ptr = ptr->next)
350 if (ptr->type != XML_ELEMENT_NODE)
352 if (!strcmp((const char *) ptr->name, "xslt"))
354 if (conv_xslt(p, ptr))
357 else if (!strcmp((const char *) ptr->name, "marc"))
359 if (conv_marc(p, ptr))
364 wrbuf_printf(p->wr_error, "Element <backend>: expected "
365 "<marc> or <xslt> element, got <%s>"
373 int yaz_record_conv_record(yaz_record_conv_t p,
374 const char *input_record_buf,
375 size_t input_record_len,
379 WRBUF record = output_record; /* pointer transfer */
380 struct yaz_record_conv_rule *r = p->rules;
381 wrbuf_rewind(p->wr_error);
383 wrbuf_write(record, input_record_buf, input_record_len);
384 for (; ret == 0 && r; r = r->next)
386 if (r->which == YAZ_RECORD_CONV_RULE_MARC)
388 yaz_marc_t mt = yaz_marc_create();
390 yaz_marc_xml(mt, r->u.marc.output_format);
392 if (r->u.marc.iconv_t)
393 yaz_marc_iconv(mt, r->u.marc.iconv_t);
394 if (r->u.marc.input_format == YAZ_MARC_ISO2709)
396 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
403 else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
405 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
409 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
414 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
416 wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
422 wrbuf_printf(p->wr_error, "unsupported input format");
427 wrbuf_rewind(record);
428 ret = yaz_marc_write_mode(mt, record);
430 wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
432 yaz_marc_destroy(mt);
435 else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
437 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
441 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
446 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
449 xmlChar *out_buf = 0;
452 #if YAZ_HAVE_XSLTSAVERESULTTOSTRING
453 xsltSaveResultToString(&out_buf, &out_len, res,
456 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
460 wrbuf_printf(p->wr_error,
461 "xsltSaveResultToString failed");
466 wrbuf_rewind(record);
467 wrbuf_write(record, (const char *) out_buf, out_len);
475 wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
486 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
488 return wrbuf_cstr(p->wr_error);
491 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
496 p->path = xstrdup(path);
503 * indent-tabs-mode: nil
505 * vim: shiftwidth=4 tabstop=8 expandtab