1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2012 Index Data
3 * See the file LICENSE for details.
7 * \brief Record Conversions utility
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
33 #include <libexslt/exslt.h>
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38 /** \brief memory for configuration */
41 /** \brief conversion rules (allocated using NMEM) */
42 struct yaz_record_conv_rule *rules;
44 /** \brief pointer to last conversion rule pointer in chain */
45 struct yaz_record_conv_rule **rules_p;
47 /** \brief string buffer for error messages */
50 /** \brief path for opening files */
53 /** \brief handlers */
54 struct yaz_record_conv_type *types;
59 const char *input_charset;
60 const char *output_charset;
61 int input_format_mode;
62 int output_format_mode;
65 /** \brief tranformation info (rule info) */
66 struct yaz_record_conv_rule {
67 struct yaz_record_conv_type *type;
69 struct yaz_record_conv_rule *next;
72 /** \brief reset rules+configuration */
73 static void yaz_record_conv_reset(yaz_record_conv_t p)
76 struct yaz_record_conv_rule *r;
77 for (r = p->rules; r; r = r->next)
79 r->type->destroy(r->info);
81 wrbuf_rewind(p->wr_error);
86 p->rules_p = &p->rules;
89 void yaz_record_conv_add_type(yaz_record_conv_t p,
90 struct yaz_record_conv_type *type)
92 struct yaz_record_conv_type **tp = &p->types;
95 *tp = xmalloc(sizeof(*type));
96 memcpy(*tp, type, sizeof(*type));
100 void yaz_record_conv_destroy(yaz_record_conv_t p)
104 struct yaz_record_conv_type *t = p->types;
106 yaz_record_conv_reset(p);
107 nmem_destroy(p->nmem);
108 wrbuf_destroy(p->wr_error);
112 struct yaz_record_conv_type *t_next = t->next;
122 static void *construct_xslt(yaz_record_conv_t p, const xmlNode *ptr,
123 const char *path, WRBUF wr_error)
125 struct _xmlAttr *attr;
126 const char *stylesheet = 0;
128 if (strcmp((const char *) ptr->name, "xslt"))
131 for (attr = ptr->properties; attr; attr = attr->next)
133 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
134 attr->children && attr->children->type == XML_TEXT_NODE)
135 stylesheet = (const char *) attr->children->content;
138 wrbuf_printf(wr_error, "Bad attribute '%s'"
139 "Expected stylesheet.", attr->name);
145 wrbuf_printf(wr_error, "Element <xslt>: "
146 "attribute 'stylesheet' expected");
152 xsltStylesheetPtr xsp;
154 if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
156 wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
157 " could not locate stylesheet '%s'",
158 stylesheet, stylesheet);
160 wrbuf_printf(wr_error, " with path '%s'", path);
164 xsp_doc = xmlParseFile(fullpath);
167 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
168 " xml parse failed: %s", stylesheet, fullpath);
170 wrbuf_printf(wr_error, " with path '%s'", path);
173 /* need to copy this before passing it to the processor. It will
174 be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
175 xsp = xsltParseStylesheetDoc(xmlCopyDoc(xsp_doc, 1));
178 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
179 " xslt parse failed: %s", stylesheet, fullpath);
181 wrbuf_printf(wr_error, " with path '%s'", path);
182 wrbuf_printf(wr_error, " ("
187 "EXSLT not supported"
195 xsltFreeStylesheet(xsp);
202 static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
205 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
209 wrbuf_printf(wr_error, "xmlParseMemory failed");
214 xmlDocPtr xsp_doc = xmlCopyDoc((xmlDocPtr) info, 1);
215 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
216 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
219 xmlChar *out_buf = 0;
222 #if HAVE_XSLTSAVERESULTTOSTRING
223 xsltSaveResultToString(&out_buf, &out_len, res, xsp);
225 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
229 wrbuf_printf(wr_error,
230 "xsltSaveResultToString failed");
235 wrbuf_rewind(record);
236 wrbuf_write(record, (const char *) out_buf, out_len);
244 wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
248 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
253 static void destroy_xslt(void *info)
257 xmlDocPtr xsp_doc = info;
266 static void *construct_marc(yaz_record_conv_t p, const xmlNode *ptr,
267 const char *path, WRBUF wr_error)
269 NMEM nmem = nmem_create();
270 struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
271 struct _xmlAttr *attr;
272 const char *input_format = 0;
273 const char *output_format = 0;
275 if (strcmp((const char *) ptr->name, "marc"))
282 info->input_charset = 0;
283 info->output_charset = 0;
284 info->input_format_mode = 0;
285 info->output_format_mode = 0;
287 for (attr = ptr->properties; attr; attr = attr->next)
289 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
290 attr->children && attr->children->type == XML_TEXT_NODE)
291 info->input_charset = (const char *) attr->children->content;
292 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
293 attr->children && attr->children->type == XML_TEXT_NODE)
294 info->output_charset = (const char *) attr->children->content;
295 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
296 attr->children && attr->children->type == XML_TEXT_NODE)
297 input_format = (const char *) attr->children->content;
298 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
299 attr->children && attr->children->type == XML_TEXT_NODE)
300 output_format = (const char *) attr->children->content;
303 wrbuf_printf(wr_error, "Element <marc>: expected attributes"
304 "'inputformat', 'inputcharset', 'outputformat' or"
305 " 'outputcharset', got attribute '%s'",
307 nmem_destroy(info->nmem);
313 wrbuf_printf(p->wr_error, "Element <marc>: "
314 "attribute 'inputformat' required");
315 nmem_destroy(info->nmem);
318 else if (!strcmp(input_format, "marc"))
320 info->input_format_mode = YAZ_MARC_ISO2709;
322 else if (!strcmp(input_format, "xml"))
324 info->input_format_mode = YAZ_MARC_MARCXML;
325 /** Libxml2 generates UTF-8 encoding by default .
326 So we convert from UTF-8 to outputcharset (if defined)
328 if (!info->input_charset && info->output_charset)
329 info->input_charset = "utf-8";
333 wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
334 " Unsupported input format"
335 " defined by attribute value",
337 nmem_destroy(info->nmem);
343 wrbuf_printf(wr_error,
344 "Element <marc>: attribute 'outputformat' required");
345 nmem_destroy(info->nmem);
348 else if (!strcmp(output_format, "line"))
350 info->output_format_mode = YAZ_MARC_LINE;
352 else if (!strcmp(output_format, "marcxml"))
354 info->output_format_mode = YAZ_MARC_MARCXML;
355 if (info->input_charset && !info->output_charset)
356 info->output_charset = "utf-8";
358 else if (!strcmp(output_format, "turbomarc"))
360 info->output_format_mode = YAZ_MARC_TURBOMARC;
361 if (info->input_charset && !info->output_charset)
362 info->output_charset = "utf-8";
364 else if (!strcmp(output_format, "marc"))
366 info->output_format_mode = YAZ_MARC_ISO2709;
368 else if (!strcmp(output_format, "marcxchange"))
370 info->output_format_mode = YAZ_MARC_XCHANGE;
371 if (info->input_charset && !info->output_charset)
372 info->output_charset = "utf-8";
376 wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
377 " Unsupported output format"
378 " defined by attribute value",
380 nmem_destroy(info->nmem);
383 if (info->input_charset && info->output_charset)
385 yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
386 info->input_charset);
389 wrbuf_printf(p->wr_error,
390 "Element <marc inputcharset='%s' outputcharset='%s'>:"
391 " Unsupported character set mapping"
392 " defined by attribute values",
393 info->input_charset, info->output_charset);
394 nmem_destroy(info->nmem);
399 else if (info->input_charset)
401 wrbuf_printf(wr_error, "Element <marc>: "
402 "attribute 'outputcharset' missing");
403 nmem_destroy(info->nmem);
406 else if (info->output_charset)
408 wrbuf_printf(wr_error, "Element <marc>: "
409 "attribute 'inputcharset' missing");
410 nmem_destroy(info->nmem);
413 info->input_charset = nmem_strdup(p->nmem, info->input_charset);
414 info->output_charset = nmem_strdup(p->nmem, info->output_charset);
418 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
420 struct marc_info *mi = info;
423 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
424 yaz_marc_t mt = yaz_marc_create();
426 yaz_marc_xml(mt, mi->output_format_mode);
429 yaz_marc_iconv(mt, cd);
430 if (mi->input_format_mode == YAZ_MARC_ISO2709)
432 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
439 else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
440 mi->input_format_mode == YAZ_MARC_TURBOMARC)
442 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
446 wrbuf_printf(wr_error, "xmlParseMemory failed");
451 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
453 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
459 wrbuf_printf(wr_error, "unsupported input format");
464 wrbuf_rewind(record);
465 ret = yaz_marc_write_mode(mt, record);
467 wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
471 yaz_marc_destroy(mt);
475 static void destroy_marc(void *info)
477 struct marc_info *mi = info;
479 nmem_destroy(mi->nmem);
482 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
484 yaz_record_conv_reset(p);
486 /* parsing element children */
487 for (ptr = ptr->children; ptr; ptr = ptr->next)
489 struct yaz_record_conv_type *t;
490 struct yaz_record_conv_rule *r;
492 if (ptr->type != XML_ELEMENT_NODE)
494 for (t = p->types; t; t = t->next)
496 wrbuf_rewind(p->wr_error);
497 info = t->construct(p, ptr, p->path, p->wr_error);
499 if (info || wrbuf_len(p->wr_error))
501 /* info== 0 and no error reported , ie not handled by it */
505 if (wrbuf_len(p->wr_error) == 0)
506 wrbuf_printf(p->wr_error, "Element <backend>: expected "
507 "<marc> or <xslt> element, got <%s>"
511 r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
516 p->rules_p = &r->next;
521 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
522 struct yaz_record_conv_rule *r,
523 const char *input_record_buf,
524 size_t input_record_len,
528 WRBUF record = output_record; /* pointer transfer */
529 wrbuf_rewind(p->wr_error);
531 wrbuf_write(record, input_record_buf, input_record_len);
532 for (; ret == 0 && r; r = r->next)
533 ret = r->type->convert(r->info, record, p->wr_error);
537 int yaz_record_conv_opac_record(yaz_record_conv_t p,
538 Z_OPACRecord *input_record,
542 struct yaz_record_conv_rule *r = p->rules;
543 if (!r || r->type->construct != construct_marc)
544 ret = -1; /* no marc rule so we can't do OPAC */
547 struct marc_info *mi = r->info;
549 WRBUF res = wrbuf_alloc();
550 yaz_marc_t mt = yaz_marc_create();
551 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
554 wrbuf_rewind(p->wr_error);
555 yaz_marc_xml(mt, mi->output_format_mode);
557 yaz_marc_iconv(mt, cd);
559 yaz_opac_decode_wrbuf(mt, input_record, res);
562 ret = yaz_record_conv_record_rule(p,
564 wrbuf_buf(res), wrbuf_len(res),
567 yaz_marc_destroy(mt);
575 int yaz_record_conv_record(yaz_record_conv_t p,
576 const char *input_record_buf,
577 size_t input_record_len,
580 return yaz_record_conv_record_rule(p, p->rules,
582 input_record_len, output_record);
585 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
587 return wrbuf_cstr(p->wr_error);
590 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
595 p->path = xstrdup(path);
598 yaz_record_conv_t yaz_record_conv_create()
600 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
601 p->nmem = nmem_create();
602 p->wr_error = wrbuf_alloc();
610 { /* register marc */
611 struct yaz_record_conv_type t;
613 t.construct = construct_marc;
614 t.convert = convert_marc;
615 t.destroy = destroy_marc;
617 yaz_record_conv_add_type(p, &t);
620 { /* register xslt */
621 struct yaz_record_conv_type t;
623 t.construct = construct_xslt;
624 t.convert = convert_xslt;
625 t.destroy = destroy_xslt;
627 yaz_record_conv_add_type(p, &t);
639 * c-file-style: "Stroustrup"
640 * indent-tabs-mode: nil
642 * vim: shiftwidth=4 tabstop=8 expandtab