1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 enum yaz_collection_state {
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
44 YAZ_MARC_CONTROLFIELD,
49 /** \brief represets a data field */
50 struct yaz_marc_datafield {
53 struct yaz_marc_subfield *subfields;
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
62 /** \brief a comment node */
63 struct yaz_marc_comment {
67 /** \brief MARC node */
68 struct yaz_marc_node {
69 enum YAZ_MARC_NODE_TYPE which;
71 struct yaz_marc_datafield datafield;
72 struct yaz_marc_controlfield controlfield;
76 struct yaz_marc_node *next;
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
82 struct yaz_marc_subfield *next;
85 /** \brief the internals of a yaz_marc_t handle */
91 int write_using_libxml2;
92 enum yaz_collection_state enable_collection;
97 struct yaz_marc_node *nodes;
98 struct yaz_marc_node **nodes_pp;
99 struct yaz_marc_subfield **subfield_pp;
102 yaz_marc_t yaz_marc_create(void)
104 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105 mt->output_format = YAZ_MARC_LINE;
107 mt->write_using_libxml2 = 0;
108 mt->enable_collection = no_collection;
109 mt->m_wr = wrbuf_alloc();
112 strcpy(mt->subfield_str, " $");
113 strcpy(mt->endline_str, "\n");
115 mt->nmem = nmem_create();
120 void yaz_marc_destroy(yaz_marc_t mt)
124 nmem_destroy(mt->nmem);
125 wrbuf_destroy(mt->m_wr);
126 xfree(mt->leader_spec);
130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 wrbuf_iconv_reset(wr, mt->iconv_cd);
140 static int marc_exec_leader(const char *leader_spec, char *leader,
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
149 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
151 struct yaz_marc_node *n = (struct yaz_marc_node *)
152 nmem_malloc(mt->nmem, sizeof(*n));
155 mt->nodes_pp = &n->next;
160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161 const xmlNode *ptr_data)
163 struct yaz_marc_node *n = yaz_marc_add_node(mt);
164 n->which = YAZ_MARC_CONTROLFIELD;
165 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
169 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
170 const xmlNode *ptr_data)
172 struct yaz_marc_node *n = yaz_marc_add_node(mt);
173 n->which = YAZ_MARC_CONTROLFIELD;
174 n->u.controlfield.tag = tag;
175 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
181 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
183 struct yaz_marc_node *n = yaz_marc_add_node(mt);
184 n->which = YAZ_MARC_COMMENT;
185 n->u.comment = nmem_strdup(mt->nmem, comment);
188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
194 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195 yaz_marc_add_comment(mt, buf);
199 int yaz_marc_get_debug(yaz_marc_t mt)
204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
206 struct yaz_marc_node *n = yaz_marc_add_node(mt);
207 n->which = YAZ_MARC_LEADER;
208 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213 const char *data, size_t data_len)
215 struct yaz_marc_node *n = yaz_marc_add_node(mt);
216 n->which = YAZ_MARC_CONTROLFIELD;
217 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
224 sprintf(msg, "controlfield:");
225 for (i = 0; i < 16 && i < data_len; i++)
226 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
228 sprintf(msg + strlen(msg), " ..");
229 yaz_marc_add_comment(mt, msg);
233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234 const char *indicator, size_t indicator_len)
236 struct yaz_marc_node *n = yaz_marc_add_node(mt);
237 n->which = YAZ_MARC_DATAFIELD;
238 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239 n->u.datafield.indicator =
240 nmem_strdupn(mt->nmem, indicator, indicator_len);
241 n->u.datafield.subfields = 0;
243 /* make subfield_pp the current (last one) */
244 mt->subfield_pp = &n->u.datafield.subfields;
247 /** \brief adds a attribute value to the element name if it is plain chars
249 If not, and if the attribute name is not null, it will append a
250 attribute element with the value if attribute name is null it will
251 return a non-zero value meaning it couldnt handle the value.
253 static int element_name_append_attribute_value(
254 yaz_marc_t mt, WRBUF buffer,
255 const char *attribute_name, char *code_data, size_t code_len)
257 /* TODO Map special codes to something possible for XML ELEMENT names */
262 for (index = 0; index < code_len; index++)
264 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
269 /* Add as attribute */
270 if (encode && attribute_name)
271 wrbuf_printf(buffer, " %s=\"", attribute_name);
273 if (!encode || attribute_name)
274 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
278 if (encode && attribute_name)
279 wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285 const char *indicator, size_t indicator_len)
287 struct yaz_marc_node *n = yaz_marc_add_node(mt);
288 n->which = YAZ_MARC_DATAFIELD;
289 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290 n->u.datafield.indicator =
291 nmem_strdupn(mt->nmem, indicator, indicator_len);
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
298 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
300 struct yaz_marc_node *n = yaz_marc_add_node(mt);
301 n->which = YAZ_MARC_DATAFIELD;
302 n->u.datafield.tag = tag_value;
303 n->u.datafield.indicator = indicators;
304 n->u.datafield.subfields = 0;
306 /* make subfield_pp the current (last one) */
307 mt->subfield_pp = &n->u.datafield.subfields;
310 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
312 n->u.datafield.indicator = indicator;
317 void yaz_marc_add_subfield(yaz_marc_t mt,
318 const char *code_data, size_t code_data_len)
325 sprintf(msg, "subfield:");
326 for (i = 0; i < 16 && i < code_data_len; i++)
327 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
328 if (i < code_data_len)
329 sprintf(msg + strlen(msg), " ..");
330 yaz_marc_add_comment(mt, msg);
335 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
336 nmem_malloc(mt->nmem, sizeof(*n));
337 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
339 /* mark subfield_pp to point to this one, so we append here next */
340 *mt->subfield_pp = n;
341 mt->subfield_pp = &n->next;
345 static void check_ascii(yaz_marc_t mt, char *leader, int offset,
348 if (leader[offset] < ' ' || leader[offset] > 127)
351 "Leader character at offset %d is non-ASCII. "
352 "Setting value to '%c'", offset, ch_default);
353 leader[offset] = ch_default;
357 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
358 int *indicator_length,
359 int *identifier_length,
361 int *length_data_entry,
362 int *length_starting,
363 int *length_implementation)
367 memcpy(leader, leader_c, 24);
369 check_ascii(mt, leader, 5, 'a');
370 check_ascii(mt, leader, 6, 'a');
371 check_ascii(mt, leader, 7, 'a');
372 check_ascii(mt, leader, 8, '#');
373 check_ascii(mt, leader, 9, '#');
374 if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0)
376 yaz_marc_cprintf(mt, "Indicator length at offset 10 should"
377 " hold a number 1-9. Assuming 2");
379 *indicator_length = 2;
381 if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0)
383 yaz_marc_cprintf(mt, "Identifier length at offset 11 should "
384 " hold a number 1-9. Assuming 2");
386 *identifier_length = 2;
388 if (!atoi_n_check(leader+12, 5, base_address))
390 yaz_marc_cprintf(mt, "Base address at offsets 12..16 should"
391 " hold a number. Assuming 0");
394 check_ascii(mt, leader, 17, '#');
395 check_ascii(mt, leader, 18, '#');
396 check_ascii(mt, leader, 19, '#');
397 if (!atoi_n_check(leader+20, 1, length_data_entry) ||
398 *length_data_entry < 3)
400 yaz_marc_cprintf(mt, "Length data entry at offset 20 should"
401 " hold a number 3-9. Assuming 4");
402 *length_data_entry = 4;
405 if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4)
407 yaz_marc_cprintf(mt, "Length starting at offset 21 should"
408 " hold a number 4-9. Assuming 5");
409 *length_starting = 5;
412 if (!atoi_n_check(leader+22, 1, length_implementation))
414 yaz_marc_cprintf(mt, "Length implementation at offset 22 should"
415 " hold a number. Assuming 0");
416 *length_implementation = 0;
419 check_ascii(mt, leader, 23, '0');
423 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
424 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
425 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
426 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
427 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
428 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
430 yaz_marc_add_leader(mt, leader, 24);
433 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
435 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
436 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
439 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
441 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
442 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
445 /* try to guess how many bytes the identifier really is! */
446 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
451 for (i = 1; i<5; i++)
454 size_t outbytesleft = sizeof(outbuf);
456 const char *inp = buf;
458 size_t inbytesleft = i;
459 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
460 &outp, &outbytesleft);
461 yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
462 if (r != (size_t) (-1))
463 return i; /* got a complete sequence */
465 return 1; /* giving up */
471 (void) yaz_read_UTF8_char((const unsigned char *) buf, strlen(buf),
473 if (error == 0 && no_read > 0)
476 return 1; /* we don't know */
479 void yaz_marc_reset(yaz_marc_t mt)
481 nmem_reset(mt->nmem);
483 mt->nodes_pp = &mt->nodes;
487 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
489 struct yaz_marc_node *n;
490 int identifier_length;
491 const char *leader = 0;
493 for (n = mt->nodes; n; n = n->next)
494 if (n->which == YAZ_MARC_LEADER)
496 leader = n->u.leader;
502 if (!atoi_n_check(leader+11, 1, &identifier_length))
505 for (n = mt->nodes; n; n = n->next)
509 case YAZ_MARC_COMMENT:
510 wrbuf_iconv_write(wr, mt->iconv_cd,
511 n->u.comment, strlen(n->u.comment));
512 wrbuf_puts(wr, "\n");
521 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
522 int identifier_length)
524 /* if identifier length is 2 (most MARCs) or less (probably an error),
525 the code is a single character .. However we've
526 seen multibyte codes, so see how big it really is */
527 if (identifier_length > 2)
528 return identifier_length - 1;
530 return cdata_one_character(mt, data);
533 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
535 struct yaz_marc_node *n;
536 int identifier_length;
537 const char *leader = 0;
539 for (n = mt->nodes; n; n = n->next)
540 if (n->which == YAZ_MARC_LEADER)
542 leader = n->u.leader;
548 if (!atoi_n_check(leader+11, 1, &identifier_length))
551 for (n = mt->nodes; n; n = n->next)
553 struct yaz_marc_subfield *s;
556 case YAZ_MARC_DATAFIELD:
557 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
558 n->u.datafield.indicator);
559 for (s = n->u.datafield.subfields; s; s = s->next)
561 size_t using_code_len = get_subfield_len(mt, s->code_data,
564 wrbuf_puts (wr, mt->subfield_str);
565 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
567 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
568 wrbuf_iconv_puts(wr, mt->iconv_cd,
569 s->code_data + using_code_len);
570 marc_iconv_reset(mt, wr);
572 wrbuf_puts (wr, mt->endline_str);
574 case YAZ_MARC_CONTROLFIELD:
575 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
576 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
577 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
578 marc_iconv_reset(mt, wr);
579 wrbuf_puts (wr, mt->endline_str);
581 case YAZ_MARC_COMMENT:
583 wrbuf_iconv_write(wr, mt->iconv_cd,
584 n->u.comment, strlen(n->u.comment));
585 marc_iconv_reset(mt, wr);
586 wrbuf_puts(wr, ")\n");
588 case YAZ_MARC_LEADER:
589 wrbuf_printf(wr, "%s\n", n->u.leader);
592 wrbuf_puts(wr, "\n");
596 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
598 if (mt->enable_collection == collection_second)
600 switch(mt->output_format)
602 case YAZ_MARC_MARCXML:
603 case YAZ_MARC_TURBOMARC:
604 wrbuf_printf(wr, "</collection>\n");
606 case YAZ_MARC_XCHANGE:
607 wrbuf_printf(wr, "</collection>\n");
614 void yaz_marc_enable_collection(yaz_marc_t mt)
616 mt->enable_collection = collection_first;
619 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
621 switch(mt->output_format)
624 return yaz_marc_write_line(mt, wr);
625 case YAZ_MARC_MARCXML:
626 return yaz_marc_write_marcxml(mt, wr);
627 case YAZ_MARC_TURBOMARC:
628 return yaz_marc_write_turbomarc(mt, wr);
629 case YAZ_MARC_XCHANGE:
630 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
631 case YAZ_MARC_ISO2709:
632 return yaz_marc_write_iso2709(mt, wr);
634 return yaz_marc_write_check(mt, wr);
639 static const char *record_name[2] = { "record", "r"};
640 static const char *leader_name[2] = { "leader", "l"};
641 static const char *controlfield_name[2] = { "controlfield", "c"};
642 static const char *datafield_name[2] = { "datafield", "d"};
643 static const char *indicator_name[2] = { "ind", "i"};
644 static const char *subfield_name[2] = { "subfield", "s"};
646 /** \brief common MARC XML/Xchange/turbomarc writer
648 \param wr WRBUF output
649 \param ns XMLNS for the elements
650 \param format record format (e.g. "MARC21")
651 \param type record type (e.g. "Bibliographic")
652 \param turbo =1 for turbomarc
656 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
662 struct yaz_marc_node *n;
663 int identifier_length;
664 const char *leader = 0;
666 for (n = mt->nodes; n; n = n->next)
667 if (n->which == YAZ_MARC_LEADER)
669 leader = n->u.leader;
675 if (!atoi_n_check(leader+11, 1, &identifier_length))
678 if (mt->enable_collection != no_collection)
680 if (mt->enable_collection == collection_first)
682 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
683 mt->enable_collection = collection_second;
685 wrbuf_printf(wr, "<%s", record_name[turbo]);
689 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
692 wrbuf_printf(wr, " format=\"%.80s\"", format);
694 wrbuf_printf(wr, " type=\"%.80s\"", type);
695 wrbuf_printf(wr, ">\n");
696 for (n = mt->nodes; n; n = n->next)
698 struct yaz_marc_subfield *s;
702 case YAZ_MARC_DATAFIELD:
704 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
706 wrbuf_printf(wr, " tag=\"");
707 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
708 strlen(n->u.datafield.tag));
710 wrbuf_printf(wr, "\"");
711 if (n->u.datafield.indicator)
714 for (i = 0; n->u.datafield.indicator[i]; i++)
716 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
717 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
718 n->u.datafield.indicator+i, 1);
719 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
722 wrbuf_printf(wr, ">\n");
723 for (s = n->u.datafield.subfields; s; s = s->next)
725 size_t using_code_len = get_subfield_len(mt, s->code_data,
727 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
730 wrbuf_printf(wr, " code=\"");
731 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
732 s->code_data, using_code_len);
733 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
737 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
740 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
741 s->code_data + using_code_len,
742 strlen(s->code_data + using_code_len));
743 marc_iconv_reset(mt, wr);
744 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
746 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
747 wrbuf_puts(wr, ">\n");
749 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
752 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
753 strlen(n->u.datafield.tag));
754 wrbuf_printf(wr, ">\n");
756 case YAZ_MARC_CONTROLFIELD:
757 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
760 wrbuf_printf(wr, " tag=\"");
761 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
762 strlen(n->u.controlfield.tag));
763 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
767 /* TODO convert special */
768 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
769 strlen(n->u.controlfield.tag));
770 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
772 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
773 n->u.controlfield.data,
774 strlen(n->u.controlfield.data));
775 marc_iconv_reset(mt, wr);
776 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
777 /* TODO convert special */
779 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
780 strlen(n->u.controlfield.tag));
781 wrbuf_puts(wr, ">\n");
783 case YAZ_MARC_COMMENT:
784 wrbuf_printf(wr, "<!-- ");
785 wrbuf_puts(wr, n->u.comment);
786 wrbuf_printf(wr, " -->\n");
788 case YAZ_MARC_LEADER:
789 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
790 wrbuf_iconv_write_cdata(wr,
791 0 , /* no charset conversion for leader */
792 n->u.leader, strlen(n->u.leader));
793 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
796 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
800 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
806 if (mt->write_using_libxml2)
813 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
815 ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
819 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
822 xmlDocSetRootElement(doc, root_ptr);
823 xmlDocDumpMemory(doc, &buf_out, &len_out);
825 wrbuf_write(wr, (const char *) buf_out, len_out);
836 return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
839 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
841 /* set leader 09 to 'a' for UNICODE */
842 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
843 if (!mt->leader_spec)
844 yaz_marc_modify_leader(mt, 9, "a");
845 return yaz_marc_write_marcxml_ns(mt, wr,
846 "http://www.loc.gov/MARC21/slim",
850 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
852 /* set leader 09 to 'a' for UNICODE */
853 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
854 if (!mt->leader_spec)
855 yaz_marc_modify_leader(mt, 9, "a");
856 return yaz_marc_write_marcxml_ns(mt, wr,
857 "http://www.indexdata.com/turbomarc", 0, 0, 1);
860 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
864 return yaz_marc_write_marcxml_ns(mt, wr,
865 "info:lc/xmlns/marcxchange-v1",
871 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
873 xmlNsPtr ns_record, WRBUF wr_cdata,
874 int identifier_length)
877 struct yaz_marc_subfield *s;
878 WRBUF subfield_name = wrbuf_alloc();
880 /* TODO consider if safe */
883 strncpy(field + 1, n->u.datafield.tag, 3);
885 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
887 if (n->u.datafield.indicator)
890 for (i = 0; n->u.datafield.indicator[i]; i++)
895 ind_val[0] = n->u.datafield.indicator[i];
897 sprintf(ind_str, "%s%d", indicator_name[1], i+1);
898 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
901 for (s = n->u.datafield.subfields; s; s = s->next)
904 xmlNode *ptr_subfield;
905 size_t using_code_len = get_subfield_len(mt, s->code_data,
907 wrbuf_rewind(wr_cdata);
908 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
909 marc_iconv_reset(mt, wr_cdata);
911 wrbuf_rewind(subfield_name);
912 wrbuf_puts(subfield_name, "s");
913 not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
914 ptr_subfield = xmlNewTextChild(ptr, ns_record,
915 BAD_CAST wrbuf_cstr(subfield_name),
916 BAD_CAST wrbuf_cstr(wr_cdata));
919 /* Generate code attribute value and add */
920 wrbuf_rewind(wr_cdata);
921 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
922 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
925 wrbuf_destroy(subfield_name);
928 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
933 struct yaz_marc_node *n;
934 int identifier_length;
935 const char *leader = 0;
940 for (n = mt->nodes; n; n = n->next)
941 if (n->which == YAZ_MARC_LEADER)
943 leader = n->u.leader;
949 if (!atoi_n_check(leader+11, 1, &identifier_length))
952 wr_cdata = wrbuf_alloc();
954 record_ptr = xmlNewNode(0, BAD_CAST "r");
955 *root_ptr = record_ptr;
957 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
958 xmlSetNs(record_ptr, ns_record);
961 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
963 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
964 for (n = mt->nodes; n; n = n->next)
974 case YAZ_MARC_DATAFIELD:
975 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
977 case YAZ_MARC_CONTROLFIELD:
978 wrbuf_rewind(wr_cdata);
979 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
980 marc_iconv_reset(mt, wr_cdata);
982 strncpy(field + 1, n->u.controlfield.tag, 3);
983 ptr = xmlNewTextChild(record_ptr, ns_record,
985 BAD_CAST wrbuf_cstr(wr_cdata));
987 case YAZ_MARC_COMMENT:
988 ptr = xmlNewComment(BAD_CAST n->u.comment);
989 xmlAddChild(record_ptr, ptr);
991 case YAZ_MARC_LEADER:
992 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
993 BAD_CAST n->u.leader);
997 wrbuf_destroy(wr_cdata);
1002 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1007 struct yaz_marc_node *n;
1008 int identifier_length;
1009 const char *leader = 0;
1010 xmlNode *record_ptr;
1014 for (n = mt->nodes; n; n = n->next)
1015 if (n->which == YAZ_MARC_LEADER)
1017 leader = n->u.leader;
1023 if (!atoi_n_check(leader+11, 1, &identifier_length))
1026 wr_cdata = wrbuf_alloc();
1028 record_ptr = xmlNewNode(0, BAD_CAST "record");
1029 *root_ptr = record_ptr;
1031 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1032 xmlSetNs(record_ptr, ns_record);
1035 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1037 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1038 for (n = mt->nodes; n; n = n->next)
1040 struct yaz_marc_subfield *s;
1045 case YAZ_MARC_DATAFIELD:
1046 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1047 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1048 if (n->u.datafield.indicator)
1051 for (i = 0; n->u.datafield.indicator[i]; i++)
1056 sprintf(ind_str, "ind%d", i+1);
1057 ind_val[0] = n->u.datafield.indicator[i];
1059 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1062 for (s = n->u.datafield.subfields; s; s = s->next)
1064 xmlNode *ptr_subfield;
1065 size_t using_code_len = get_subfield_len(mt, s->code_data,
1067 wrbuf_rewind(wr_cdata);
1068 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1069 s->code_data + using_code_len);
1070 marc_iconv_reset(mt, wr_cdata);
1071 ptr_subfield = xmlNewTextChild(
1073 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1075 wrbuf_rewind(wr_cdata);
1076 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1077 s->code_data, using_code_len);
1078 xmlNewProp(ptr_subfield, BAD_CAST "code",
1079 BAD_CAST wrbuf_cstr(wr_cdata));
1082 case YAZ_MARC_CONTROLFIELD:
1083 wrbuf_rewind(wr_cdata);
1084 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1085 marc_iconv_reset(mt, wr_cdata);
1087 ptr = xmlNewTextChild(record_ptr, ns_record,
1088 BAD_CAST "controlfield",
1089 BAD_CAST wrbuf_cstr(wr_cdata));
1091 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1093 case YAZ_MARC_COMMENT:
1094 ptr = xmlNewComment(BAD_CAST n->u.comment);
1095 xmlAddChild(record_ptr, ptr);
1097 case YAZ_MARC_LEADER:
1098 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1099 BAD_CAST n->u.leader);
1103 wrbuf_destroy(wr_cdata);
1109 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1111 struct yaz_marc_node *n;
1112 int indicator_length;
1113 int identifier_length;
1114 int length_data_entry;
1115 int length_starting;
1116 int length_implementation;
1117 int data_offset = 0;
1118 const char *leader = 0;
1119 WRBUF wr_dir, wr_head, wr_data_tmp;
1122 for (n = mt->nodes; n; n = n->next)
1123 if (n->which == YAZ_MARC_LEADER)
1124 leader = n->u.leader;
1128 if (!atoi_n_check(leader+10, 1, &indicator_length))
1130 if (!atoi_n_check(leader+11, 1, &identifier_length))
1132 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1134 if (!atoi_n_check(leader+21, 1, &length_starting))
1136 if (!atoi_n_check(leader+22, 1, &length_implementation))
1139 wr_data_tmp = wrbuf_alloc();
1140 wr_dir = wrbuf_alloc();
1141 for (n = mt->nodes; n; n = n->next)
1143 int data_length = 0;
1144 struct yaz_marc_subfield *s;
1148 case YAZ_MARC_DATAFIELD:
1149 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1150 data_length += indicator_length;
1151 wrbuf_rewind(wr_data_tmp);
1152 for (s = n->u.datafield.subfields; s; s = s->next)
1154 /* write dummy IDFS + content */
1155 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1156 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1157 marc_iconv_reset(mt, wr_data_tmp);
1159 /* write dummy FS (makes MARC-8 to become ASCII) */
1160 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1161 marc_iconv_reset(mt, wr_data_tmp);
1162 data_length += wrbuf_len(wr_data_tmp);
1164 case YAZ_MARC_CONTROLFIELD:
1165 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1167 wrbuf_rewind(wr_data_tmp);
1168 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1169 n->u.controlfield.data);
1170 marc_iconv_reset(mt, wr_data_tmp);
1171 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1172 marc_iconv_reset(mt, wr_data_tmp);
1173 data_length += wrbuf_len(wr_data_tmp);
1175 case YAZ_MARC_COMMENT:
1177 case YAZ_MARC_LEADER:
1182 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1183 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1184 data_offset += data_length;
1187 /* mark end of directory */
1188 wrbuf_putc(wr_dir, ISO2709_FS);
1190 /* base address of data (comes after leader+directory) */
1191 base_address = 24 + wrbuf_len(wr_dir);
1193 wr_head = wrbuf_alloc();
1195 /* write record length */
1196 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1197 /* from "original" leader */
1198 wrbuf_write(wr_head, leader+5, 7);
1199 /* base address of data */
1200 wrbuf_printf(wr_head, "%05d", base_address);
1201 /* from "original" leader */
1202 wrbuf_write(wr_head, leader+17, 7);
1204 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1205 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1206 wrbuf_destroy(wr_head);
1207 wrbuf_destroy(wr_dir);
1208 wrbuf_destroy(wr_data_tmp);
1210 for (n = mt->nodes; n; n = n->next)
1212 struct yaz_marc_subfield *s;
1216 case YAZ_MARC_DATAFIELD:
1217 wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
1218 for (s = n->u.datafield.subfields; s; s = s->next)
1220 wrbuf_putc(wr, ISO2709_IDFS);
1221 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1222 marc_iconv_reset(mt, wr);
1224 wrbuf_putc(wr, ISO2709_FS);
1226 case YAZ_MARC_CONTROLFIELD:
1227 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1228 marc_iconv_reset(mt, wr);
1229 wrbuf_putc(wr, ISO2709_FS);
1231 case YAZ_MARC_COMMENT:
1233 case YAZ_MARC_LEADER:
1237 wrbuf_printf(wr, "%c", ISO2709_RS);
1242 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1244 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1247 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1249 return -1; /* error */
1250 return r; /* OK, return length > 0 */
1253 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1254 const char **result, size_t *rsize)
1258 wrbuf_rewind(mt->m_wr);
1259 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1261 *result = wrbuf_cstr(mt->m_wr);
1263 *rsize = wrbuf_len(mt->m_wr);
1267 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1269 mt->output_format = xmlmode;
1272 void yaz_marc_debug(yaz_marc_t mt, int level)
1278 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1283 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1285 return mt->iconv_cd;
1288 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1290 struct yaz_marc_node *n;
1292 for (n = mt->nodes; n; n = n->next)
1293 if (n->which == YAZ_MARC_LEADER)
1295 leader = n->u.leader;
1296 memcpy(leader+off, str, strlen(str));
1301 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1303 xfree(mt->leader_spec);
1304 mt->leader_spec = 0;
1307 char dummy_leader[24];
1308 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1310 mt->leader_spec = xstrdup(leader_spec);
1315 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1317 const char *cp = leader_spec;
1322 int no_read = 0, no = 0;
1324 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1325 if (no < 2 || no_read < 3)
1327 if (pos < 0 || (size_t) pos >= size)
1332 const char *vp = strchr(val+1, '\'');
1338 if (len + pos > size)
1340 memcpy(leader + pos, val+1, len);
1342 else if (*val >= '0' && *val <= '9')
1358 int yaz_marc_decode_formatstr(const char *arg)
1361 if (!strcmp(arg, "marc"))
1362 mode = YAZ_MARC_ISO2709;
1363 if (!strcmp(arg, "marcxml"))
1364 mode = YAZ_MARC_MARCXML;
1365 if (!strcmp(arg, "turbomarc"))
1366 mode = YAZ_MARC_TURBOMARC;
1367 if (!strcmp(arg, "marcxchange"))
1368 mode = YAZ_MARC_XCHANGE;
1369 if (!strcmp(arg, "line"))
1370 mode = YAZ_MARC_LINE;
1374 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1376 mt->write_using_libxml2 = enable;
1382 * c-file-style: "Stroustrup"
1383 * indent-tabs-mode: nil
1385 * vim: shiftwidth=4 tabstop=8 expandtab