1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
244 // Magic function: adds a attribute value to the element name if it is plain characters.
245 // if not, and if the attribute name is not null, it will append a attribute element with the value
246 // if attribute name is null it will return a non-zero value meaning it couldnt handle the value.
248 int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len) {
249 // TODO Map special codes to something possible for XML ELEMENT names
253 for (index = 0; index < code_len; index++) {
254 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
255 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
256 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
260 if (encode && attribute_name)
261 wrbuf_printf(buffer, " %s=\"", attribute_name);
263 if (!encode || attribute_name)
264 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
265 if (encode && attribute_name)
266 wrbuf_printf(buffer, "\"");
267 // return error if we couldn't handle it.
268 if (encode && !attribute_name);
274 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
275 const char *indicator, size_t indicator_len)
277 struct yaz_marc_node *n = yaz_marc_add_node(mt);
278 n->which = YAZ_MARC_DATAFIELD;
279 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
280 n->u.datafield.indicator =
281 nmem_strdupn(mt->nmem, indicator, indicator_len);
282 n->u.datafield.subfields = 0;
284 /* make subfield_pp the current (last one) */
285 mt->subfield_pp = &n->u.datafield.subfields;
288 void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators)
290 struct yaz_marc_node *n = yaz_marc_add_node(mt);
291 n->which = YAZ_MARC_DATAFIELD;
292 n->u.datafield.tag = tag_value;
293 n->u.datafield.indicator = indicators;
294 n->u.datafield.subfields = 0;
296 // make subfield_pp the current (last one)
297 mt->subfield_pp = &n->u.datafield.subfields;
300 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
302 n->u.datafield.indicator = indicator;
307 void yaz_marc_add_subfield(yaz_marc_t mt,
308 const char *code_data, size_t code_data_len)
315 sprintf(msg, "subfield:");
316 for (i = 0; i < 16 && i < code_data_len; i++)
317 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
318 if (i < code_data_len)
319 sprintf(msg + strlen(msg), " ..");
320 yaz_marc_add_comment(mt, msg);
325 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
326 nmem_malloc(mt->nmem, sizeof(*n));
327 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
329 /* mark subfield_pp to point to this one, so we append here next */
330 *mt->subfield_pp = n;
331 mt->subfield_pp = &n->next;
335 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
336 int *indicator_length,
337 int *identifier_length,
339 int *length_data_entry,
340 int *length_starting,
341 int *length_implementation)
345 memcpy(leader, leader_c, 24);
347 if (!atoi_n_check(leader+10, 1, indicator_length))
350 "Indicator length at offset 10 should hold a digit."
353 *indicator_length = 2;
355 if (!atoi_n_check(leader+11, 1, identifier_length))
358 "Identifier length at offset 11 should hold a digit."
361 *identifier_length = 2;
363 if (!atoi_n_check(leader+12, 5, base_address))
366 "Base address at offsets 12..16 should hold a number."
370 if (!atoi_n_check(leader+20, 1, length_data_entry))
373 "Length data entry at offset 20 should hold a digit."
375 *length_data_entry = 4;
378 if (!atoi_n_check(leader+21, 1, length_starting))
381 "Length starting at offset 21 should hold a digit."
383 *length_starting = 5;
386 if (!atoi_n_check(leader+22, 1, length_implementation))
389 "Length implementation at offset 22 should hold a digit."
391 *length_implementation = 0;
397 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
398 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
399 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
400 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
401 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
402 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
404 yaz_marc_add_leader(mt, leader, 24);
407 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
409 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
410 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
413 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
415 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
416 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
419 /* try to guess how many bytes the identifier really is! */
420 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
425 for (i = 1; i<5; i++)
428 size_t outbytesleft = sizeof(outbuf);
430 const char *inp = buf;
432 size_t inbytesleft = i;
433 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
434 &outp, &outbytesleft);
435 if (r != (size_t) (-1))
436 return i; /* got a complete sequence */
438 return 1; /* giving up */
440 return 1; /* we don't know */
443 void yaz_marc_reset(yaz_marc_t mt)
445 nmem_reset(mt->nmem);
447 mt->nodes_pp = &mt->nodes;
451 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
453 struct yaz_marc_node *n;
454 int identifier_length;
455 const char *leader = 0;
457 for (n = mt->nodes; n; n = n->next)
458 if (n->which == YAZ_MARC_LEADER)
460 leader = n->u.leader;
466 if (!atoi_n_check(leader+11, 1, &identifier_length))
469 for (n = mt->nodes; n; n = n->next)
473 case YAZ_MARC_COMMENT:
474 wrbuf_iconv_write(wr, mt->iconv_cd,
475 n->u.comment, strlen(n->u.comment));
476 wrbuf_puts(wr, "\n");
485 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
486 int identifier_length)
488 /* if identifier length is 2 (most MARCs) or less (probably an error),
489 the code is a single character .. However we've
490 seen multibyte codes, so see how big it really is */
491 if (identifier_length > 2)
492 return identifier_length - 1;
494 return cdata_one_character(mt, data);
497 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
499 struct yaz_marc_node *n;
500 int identifier_length;
501 const char *leader = 0;
503 for (n = mt->nodes; n; n = n->next)
504 if (n->which == YAZ_MARC_LEADER)
506 leader = n->u.leader;
512 if (!atoi_n_check(leader+11, 1, &identifier_length))
515 for (n = mt->nodes; n; n = n->next)
517 struct yaz_marc_subfield *s;
520 case YAZ_MARC_DATAFIELD:
521 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
522 n->u.datafield.indicator);
523 for (s = n->u.datafield.subfields; s; s = s->next)
525 size_t using_code_len = get_subfield_len(mt, s->code_data,
528 wrbuf_puts (wr, mt->subfield_str);
529 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
531 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
532 wrbuf_iconv_puts(wr, mt->iconv_cd,
533 s->code_data + using_code_len);
534 marc_iconv_reset(mt, wr);
536 wrbuf_puts (wr, mt->endline_str);
538 case YAZ_MARC_CONTROLFIELD:
539 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
540 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
541 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
542 marc_iconv_reset(mt, wr);
543 wrbuf_puts (wr, mt->endline_str);
545 case YAZ_MARC_COMMENT:
547 wrbuf_iconv_write(wr, mt->iconv_cd,
548 n->u.comment, strlen(n->u.comment));
549 marc_iconv_reset(mt, wr);
550 wrbuf_puts(wr, ")\n");
552 case YAZ_MARC_LEADER:
553 wrbuf_printf(wr, "%s\n", n->u.leader);
556 wrbuf_puts(wr, "\n");
560 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
562 if (mt->enable_collection == collection_second)
564 switch(mt->output_format)
566 case YAZ_MARC_MARCXML:
567 case YAZ_MARC_TMARCXML:
568 wrbuf_printf(wr, "</collection>\n");
570 case YAZ_MARC_XCHANGE:
571 wrbuf_printf(wr, "</collection>\n");
578 void yaz_marc_enable_collection(yaz_marc_t mt)
580 mt->enable_collection = collection_first;
583 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
585 switch(mt->output_format)
588 return yaz_marc_write_line(mt, wr);
589 case YAZ_MARC_MARCXML:
590 case YAZ_MARC_TMARCXML:
591 return yaz_marc_write_marcxml(mt, wr);
592 case YAZ_MARC_XCHANGE:
593 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
594 case YAZ_MARC_ISO2709:
595 return yaz_marc_write_iso2709(mt, wr);
597 return yaz_marc_write_check(mt, wr);
602 const char *collection_name[2] = { "collection", "collection"};
603 const char *record_name[2] = { "record", "r"};
604 const char *leader_name[2] = { "leader", "l"};
605 const char *controlfield_name[2]= { "controlfield", "c"};
606 const char *datafield_name[2] = { "datafield", "d"};
607 const char *indicator_name[2] = { "ind", "i"};
608 const char *subfield_name[2] = { "subfield", "s"};
611 /** \brief common MARC XML/Xchange writer
613 \param wr WRBUF output
614 \param ns XMLNS for the elements
615 \param format record format (e.g. "MARC21")
616 \param type record type (e.g. "Bibliographic")
618 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
623 struct yaz_marc_node *n;
624 int identifier_length;
625 const char *leader = 0;
627 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
629 for (n = mt->nodes; n; n = n->next)
630 if (n->which == YAZ_MARC_LEADER)
632 leader = n->u.leader;
638 if (!atoi_n_check(leader+11, 1, &identifier_length))
641 if (mt->enable_collection != no_collection)
643 if (mt->enable_collection == collection_first) {
644 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
645 mt->enable_collection = collection_second;
647 wrbuf_printf(wr, "<%s", record_name[turbo]);
651 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
654 wrbuf_printf(wr, " format=\"%.80s\"", format);
656 wrbuf_printf(wr, " type=\"%.80s\"", type);
657 wrbuf_printf(wr, ">\n");
658 for (n = mt->nodes; n; n = n->next)
660 struct yaz_marc_subfield *s;
664 case YAZ_MARC_DATAFIELD:
666 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
668 wrbuf_printf(wr, " tag=\"");
669 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
670 strlen(n->u.datafield.tag));
672 wrbuf_printf(wr, "\"");
673 if (n->u.datafield.indicator)
676 for (i = 0; n->u.datafield.indicator[i]; i++)
678 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
679 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
680 n->u.datafield.indicator+i, 1);
681 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
684 wrbuf_printf(wr, ">\n");
685 for (s = n->u.datafield.subfields; s; s = s->next)
687 size_t using_code_len = get_subfield_len(mt, s->code_data,
689 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
691 wrbuf_printf(wr, " code=\"");
692 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
693 s->code_data, using_code_len);
694 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
696 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
699 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
700 s->code_data + using_code_len,
701 strlen(s->code_data + using_code_len));
702 marc_iconv_reset(mt, wr);
703 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
705 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
706 wrbuf_puts(wr, ">\n");
708 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
711 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
712 strlen(n->u.datafield.tag));
713 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
715 case YAZ_MARC_CONTROLFIELD:
716 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
718 wrbuf_printf(wr, " tag=\"");
719 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
720 strlen(n->u.controlfield.tag));
721 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
724 //TODO convert special
725 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
726 strlen(n->u.controlfield.tag));
727 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
729 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
730 n->u.controlfield.data,
731 strlen(n->u.controlfield.data));
732 marc_iconv_reset(mt, wr);
733 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
734 //TODO convert special
736 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
737 strlen(n->u.controlfield.tag));
738 wrbuf_puts(wr, ">\n");
740 case YAZ_MARC_COMMENT:
741 wrbuf_printf(wr, "<!-- ");
742 wrbuf_puts(wr, n->u.comment);
743 wrbuf_printf(wr, " -->\n");
745 case YAZ_MARC_LEADER:
746 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
747 wrbuf_iconv_write_cdata(wr,
748 0 , /* no charset conversion for leader */
749 n->u.leader, strlen(n->u.leader));
750 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
753 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
757 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
762 struct yaz_marc_node *n;
763 int identifier_length;
764 const char *leader = 0;
766 for (n = mt->nodes; n; n = n->next)
767 if (n->which == YAZ_MARC_LEADER)
769 leader = n->u.leader;
775 if (!atoi_n_check(leader+11, 1, &identifier_length))
778 if (mt->enable_collection != no_collection)
780 if (mt->enable_collection == collection_first)
781 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
782 mt->enable_collection = collection_second;
783 wrbuf_printf(wr, "<record");
787 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
790 wrbuf_printf(wr, " format=\"%.80s\"", format);
792 wrbuf_printf(wr, " type=\"%.80s\"", type);
793 wrbuf_printf(wr, ">\n");
794 for (n = mt->nodes; n; n = n->next)
796 struct yaz_marc_subfield *s;
800 case YAZ_MARC_DATAFIELD:
801 wrbuf_printf(wr, " <datafield tag=\"");
802 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
803 strlen(n->u.datafield.tag));
804 wrbuf_printf(wr, "\"");
805 if (n->u.datafield.indicator)
808 for (i = 0; n->u.datafield.indicator[i]; i++)
810 wrbuf_printf(wr, " ind%d=\"", i+1);
811 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
812 n->u.datafield.indicator+i, 1);
813 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
816 wrbuf_printf(wr, ">\n");
817 for (s = n->u.datafield.subfields; s; s = s->next)
819 size_t using_code_len = get_subfield_len(mt, s->code_data,
821 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
822 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
823 s->code_data, using_code_len);
824 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
825 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
826 s->code_data + using_code_len,
827 strlen(s->code_data + using_code_len));
828 marc_iconv_reset(mt, wr);
829 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
830 wrbuf_puts(wr, "\n");
832 wrbuf_printf(wr, " </datafield>\n");
834 case YAZ_MARC_CONTROLFIELD:
835 wrbuf_printf(wr, " <controlfield tag=\"");
836 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
837 strlen(n->u.controlfield.tag));
838 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
839 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
840 n->u.controlfield.data,
841 strlen(n->u.controlfield.data));
843 marc_iconv_reset(mt, wr);
844 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
845 wrbuf_puts(wr, "\n");
847 case YAZ_MARC_COMMENT:
848 wrbuf_printf(wr, "<!-- ");
849 wrbuf_puts(wr, n->u.comment);
850 wrbuf_printf(wr, " -->\n");
852 case YAZ_MARC_LEADER:
853 wrbuf_printf(wr, " <leader>");
854 wrbuf_iconv_write_cdata(wr,
855 0 /* no charset conversion for leader */,
856 n->u.leader, strlen(n->u.leader));
857 wrbuf_printf(wr, "</leader>\n");
860 wrbuf_puts(wr, "</record>\n");
865 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
870 if (mt->write_using_libxml2)
876 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
877 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
878 else // Check for Turbo XML
879 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
883 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
886 xmlDocSetRootElement(doc, root_ptr);
887 xmlDocDumpMemory(doc, &buf_out, &len_out);
889 wrbuf_write(wr, (const char *) buf_out, len_out);
900 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
903 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
905 /* set leader 09 to 'a' for UNICODE */
906 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
907 if (!mt->leader_spec)
908 yaz_marc_modify_leader(mt, 9, "a");
909 char *name_space = "http://www.loc.gov/MARC21/slim";
910 if (mt->output_format == YAZ_MARC_TMARCXML)
911 name_space = "http://www.indexdata.com/MARC21/turboxml";
912 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
916 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
920 return yaz_marc_write_marcxml_ns(mt, wr,
921 "info:lc/xmlns/marcxchange-v1",
927 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
930 struct yaz_marc_subfield *s;
931 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
933 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
934 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
937 //TODO consider if safe
940 strncpy(field + 1, n->u.datafield.tag, 3);
942 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
944 if (n->u.datafield.indicator)
947 for (i = 0; n->u.datafield.indicator[i]; i++)
952 ind_val[0] = n->u.datafield.indicator[i];
954 sprintf(ind_str, "%s%d", indicator_name[turbo], i+1);
955 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
958 WRBUF subfield_name = wrbuf_alloc();
959 for (s = n->u.datafield.subfields; s; s = s->next)
961 xmlNode *ptr_subfield;
962 size_t using_code_len = get_subfield_len(mt, s->code_data,
964 wrbuf_rewind(wr_cdata);
965 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
966 marc_iconv_reset(mt, wr_cdata);
969 ptr_subfield = xmlNewTextChild(
971 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
972 wrbuf_rewind(wr_cdata);
973 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
974 xmlNewProp(ptr_subfield, BAD_CAST "code",
975 BAD_CAST wrbuf_cstr(wr_cdata));
977 else { // Turbo format
978 wrbuf_rewind(subfield_name);
979 wrbuf_puts(subfield_name, "s");
980 int encoding = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len);
981 ptr_subfield = xmlNewTextChild(ptr, ns_record,
982 BAD_CAST wrbuf_cstr(subfield_name),
983 BAD_CAST wrbuf_cstr(wr_cdata));
985 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
986 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
990 wrbuf_destroy(subfield_name);
993 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
998 struct yaz_marc_node *n;
999 int identifier_length;
1000 const char *leader = 0;
1001 xmlNode *record_ptr;
1004 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1005 for (n = mt->nodes; n; n = n->next)
1006 if (n->which == YAZ_MARC_LEADER)
1008 leader = n->u.leader;
1014 if (!atoi_n_check(leader+11, 1, &identifier_length))
1017 wr_cdata = wrbuf_alloc();
1019 record_ptr = xmlNewNode(0, BAD_CAST "r");
1020 *root_ptr = record_ptr;
1022 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1023 xmlSetNs(record_ptr, ns_record);
1026 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1028 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1029 for (n = mt->nodes; n; n = n->next)
1031 struct yaz_marc_subfield *s;
1036 case YAZ_MARC_DATAFIELD:
1037 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1039 case YAZ_MARC_CONTROLFIELD:
1040 wrbuf_rewind(wr_cdata);
1041 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1042 marc_iconv_reset(mt, wr_cdata);
1045 ptr = xmlNewTextChild(record_ptr, ns_record,
1046 BAD_CAST "controlfield",
1047 BAD_CAST wrbuf_cstr(wr_cdata));
1048 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1051 // TODO required iconv?
1054 strncpy(field + 1, n->u.controlfield.tag, 3);
1056 ptr = xmlNewTextChild(record_ptr, ns_record,
1058 BAD_CAST wrbuf_cstr(wr_cdata));
1062 case YAZ_MARC_COMMENT:
1063 ptr = xmlNewComment(BAD_CAST n->u.comment);
1064 xmlAddChild(record_ptr, ptr);
1066 case YAZ_MARC_LEADER:
1068 char *field = "leader";
1071 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1072 BAD_CAST n->u.leader);
1077 wrbuf_destroy(wr_cdata);
1082 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1087 struct yaz_marc_node *n;
1088 int identifier_length;
1089 const char *leader = 0;
1090 xmlNode *record_ptr;
1094 for (n = mt->nodes; n; n = n->next)
1095 if (n->which == YAZ_MARC_LEADER)
1097 leader = n->u.leader;
1103 if (!atoi_n_check(leader+11, 1, &identifier_length))
1106 wr_cdata = wrbuf_alloc();
1108 record_ptr = xmlNewNode(0, BAD_CAST "record");
1109 *root_ptr = record_ptr;
1111 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1112 xmlSetNs(record_ptr, ns_record);
1115 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1117 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1118 for (n = mt->nodes; n; n = n->next)
1120 struct yaz_marc_subfield *s;
1125 case YAZ_MARC_DATAFIELD:
1126 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1127 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1128 if (n->u.datafield.indicator)
1131 for (i = 0; n->u.datafield.indicator[i]; i++)
1136 sprintf(ind_str, "ind%d", i+1);
1137 ind_val[0] = n->u.datafield.indicator[i];
1139 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1142 for (s = n->u.datafield.subfields; s; s = s->next)
1144 xmlNode *ptr_subfield;
1145 size_t using_code_len = get_subfield_len(mt, s->code_data,
1147 wrbuf_rewind(wr_cdata);
1148 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1149 s->code_data + using_code_len);
1150 marc_iconv_reset(mt, wr_cdata);
1151 ptr_subfield = xmlNewTextChild(
1153 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1155 wrbuf_rewind(wr_cdata);
1156 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1157 s->code_data, using_code_len);
1158 xmlNewProp(ptr_subfield, BAD_CAST "code",
1159 BAD_CAST wrbuf_cstr(wr_cdata));
1162 case YAZ_MARC_CONTROLFIELD:
1163 wrbuf_rewind(wr_cdata);
1164 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1165 marc_iconv_reset(mt, wr_cdata);
1167 ptr = xmlNewTextChild(record_ptr, ns_record,
1168 BAD_CAST "controlfield",
1169 BAD_CAST wrbuf_cstr(wr_cdata));
1171 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1173 case YAZ_MARC_COMMENT:
1174 ptr = xmlNewComment(BAD_CAST n->u.comment);
1175 xmlAddChild(record_ptr, ptr);
1177 case YAZ_MARC_LEADER:
1178 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1179 BAD_CAST n->u.leader);
1183 wrbuf_destroy(wr_cdata);
1192 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1194 struct yaz_marc_node *n;
1195 int indicator_length;
1196 int identifier_length;
1197 int length_data_entry;
1198 int length_starting;
1199 int length_implementation;
1200 int data_offset = 0;
1201 const char *leader = 0;
1202 WRBUF wr_dir, wr_head, wr_data_tmp;
1205 for (n = mt->nodes; n; n = n->next)
1206 if (n->which == YAZ_MARC_LEADER)
1207 leader = n->u.leader;
1211 if (!atoi_n_check(leader+10, 1, &indicator_length))
1213 if (!atoi_n_check(leader+11, 1, &identifier_length))
1215 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1217 if (!atoi_n_check(leader+21, 1, &length_starting))
1219 if (!atoi_n_check(leader+22, 1, &length_implementation))
1222 wr_data_tmp = wrbuf_alloc();
1223 wr_dir = wrbuf_alloc();
1224 for (n = mt->nodes; n; n = n->next)
1226 int data_length = 0;
1227 struct yaz_marc_subfield *s;
1231 case YAZ_MARC_DATAFIELD:
1232 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1233 data_length += indicator_length;
1234 wrbuf_rewind(wr_data_tmp);
1235 for (s = n->u.datafield.subfields; s; s = s->next)
1237 /* write dummy IDFS + content */
1238 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1239 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1240 marc_iconv_reset(mt, wr_data_tmp);
1242 /* write dummy FS (makes MARC-8 to become ASCII) */
1243 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1244 marc_iconv_reset(mt, wr_data_tmp);
1245 data_length += wrbuf_len(wr_data_tmp);
1247 case YAZ_MARC_CONTROLFIELD:
1248 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1250 wrbuf_rewind(wr_data_tmp);
1251 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1252 n->u.controlfield.data);
1253 marc_iconv_reset(mt, wr_data_tmp);
1254 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1255 marc_iconv_reset(mt, wr_data_tmp);
1256 data_length += wrbuf_len(wr_data_tmp);
1258 case YAZ_MARC_COMMENT:
1260 case YAZ_MARC_LEADER:
1265 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1266 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1267 data_offset += data_length;
1270 /* mark end of directory */
1271 wrbuf_putc(wr_dir, ISO2709_FS);
1273 /* base address of data (comes after leader+directory) */
1274 base_address = 24 + wrbuf_len(wr_dir);
1276 wr_head = wrbuf_alloc();
1278 /* write record length */
1279 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1280 /* from "original" leader */
1281 wrbuf_write(wr_head, leader+5, 7);
1282 /* base address of data */
1283 wrbuf_printf(wr_head, "%05d", base_address);
1284 /* from "original" leader */
1285 wrbuf_write(wr_head, leader+17, 7);
1287 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1288 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1289 wrbuf_destroy(wr_head);
1290 wrbuf_destroy(wr_dir);
1291 wrbuf_destroy(wr_data_tmp);
1293 for (n = mt->nodes; n; n = n->next)
1295 struct yaz_marc_subfield *s;
1299 case YAZ_MARC_DATAFIELD:
1300 wrbuf_printf(wr, "%.*s", indicator_length,
1301 n->u.datafield.indicator);
1302 for (s = n->u.datafield.subfields; s; s = s->next)
1304 wrbuf_putc(wr, ISO2709_IDFS);
1305 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1306 marc_iconv_reset(mt, wr);
1308 wrbuf_putc(wr, ISO2709_FS);
1310 case YAZ_MARC_CONTROLFIELD:
1311 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1312 marc_iconv_reset(mt, wr);
1313 wrbuf_putc(wr, ISO2709_FS);
1315 case YAZ_MARC_COMMENT:
1317 case YAZ_MARC_LEADER:
1321 wrbuf_printf(wr, "%c", ISO2709_RS);
1326 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1328 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1331 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1333 return -1; /* error */
1334 return r; /* OK, return length > 0 */
1337 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1338 const char **result, size_t *rsize)
1342 wrbuf_rewind(mt->m_wr);
1343 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1345 *result = wrbuf_cstr(mt->m_wr);
1347 *rsize = wrbuf_len(mt->m_wr);
1351 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1354 mt->input_format = format;
1357 int yaz_marc_get_read_format(yaz_marc_t mt)
1360 return mt->input_format;
1365 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1368 mt->output_format = format;
1372 int yaz_marc_get_write_format(yaz_marc_t mt)
1375 return mt->output_format;
1381 * Deprecated, use yaz_marc_set_write_format
1383 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1385 yaz_marc_set_write_format(mt, xmlmode);
1390 void yaz_marc_debug(yaz_marc_t mt, int level)
1396 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1401 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1403 return mt->iconv_cd;
1406 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1408 struct yaz_marc_node *n;
1410 for (n = mt->nodes; n; n = n->next)
1411 if (n->which == YAZ_MARC_LEADER)
1413 leader = n->u.leader;
1414 memcpy(leader+off, str, strlen(str));
1419 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1421 xfree(mt->leader_spec);
1422 mt->leader_spec = 0;
1425 char dummy_leader[24];
1426 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1428 mt->leader_spec = xstrdup(leader_spec);
1433 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1435 const char *cp = leader_spec;
1440 int no_read = 0, no = 0;
1442 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1443 if (no < 2 || no_read < 3)
1445 if (pos < 0 || (size_t) pos >= size)
1450 const char *vp = strchr(val+1, '\'');
1456 if (len + pos > size)
1458 memcpy(leader + pos, val+1, len);
1460 else if (*val >= '0' && *val <= '9')
1476 int yaz_marc_decode_formatstr(const char *arg)
1479 if (!strcmp(arg, "marc"))
1480 mode = YAZ_MARC_ISO2709;
1481 if (!strcmp(arg, "marcxml"))
1482 mode = YAZ_MARC_MARCXML;
1483 if (!strcmp(arg, "tmarcxml"))
1484 mode = YAZ_MARC_TMARCXML;
1485 if (!strcmp(arg, "marcxchange"))
1486 mode = YAZ_MARC_XCHANGE;
1487 if (!strcmp(arg, "line"))
1488 mode = YAZ_MARC_LINE;
1492 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1494 mt->write_using_libxml2 = enable;
1497 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1499 return mt->output_format == YAZ_MARC_TMARCXML;
1506 * c-file-style: "Stroustrup"
1507 * indent-tabs-mode: nil
1509 * vim: shiftwidth=4 tabstop=8 expandtab