1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 enum yaz_collection_state {
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
44 YAZ_MARC_CONTROLFIELD,
49 /** \brief represets a data field */
50 struct yaz_marc_datafield {
53 struct yaz_marc_subfield *subfields;
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
62 /** \brief a comment node */
63 struct yaz_marc_comment {
67 /** \brief MARC node */
68 struct yaz_marc_node {
69 enum YAZ_MARC_NODE_TYPE which;
71 struct yaz_marc_datafield datafield;
72 struct yaz_marc_controlfield controlfield;
76 struct yaz_marc_node *next;
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
82 struct yaz_marc_subfield *next;
85 /** \brief the internals of a yaz_marc_t handle */
91 int write_using_libxml2;
92 enum yaz_collection_state enable_collection;
97 struct yaz_marc_node *nodes;
98 struct yaz_marc_node **nodes_pp;
99 struct yaz_marc_subfield **subfield_pp;
102 yaz_marc_t yaz_marc_create(void)
104 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105 mt->output_format = YAZ_MARC_LINE;
107 mt->write_using_libxml2 = 0;
108 mt->enable_collection = no_collection;
109 mt->m_wr = wrbuf_alloc();
112 strcpy(mt->subfield_str, " $");
113 strcpy(mt->endline_str, "\n");
115 mt->nmem = nmem_create();
120 void yaz_marc_destroy(yaz_marc_t mt)
124 nmem_destroy(mt->nmem);
125 wrbuf_destroy(mt->m_wr);
126 xfree(mt->leader_spec);
130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 wrbuf_iconv_reset(wr, mt->iconv_cd);
140 static int marc_exec_leader(const char *leader_spec, char *leader,
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
149 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
151 struct yaz_marc_node *n = (struct yaz_marc_node *)
152 nmem_malloc(mt->nmem, sizeof(*n));
155 mt->nodes_pp = &n->next;
160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161 const xmlNode *ptr_data)
163 struct yaz_marc_node *n = yaz_marc_add_node(mt);
164 n->which = YAZ_MARC_CONTROLFIELD;
165 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
169 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
170 const xmlNode *ptr_data)
172 struct yaz_marc_node *n = yaz_marc_add_node(mt);
173 n->which = YAZ_MARC_CONTROLFIELD;
174 n->u.controlfield.tag = tag;
175 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
181 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
183 struct yaz_marc_node *n = yaz_marc_add_node(mt);
184 n->which = YAZ_MARC_COMMENT;
185 n->u.comment = nmem_strdup(mt->nmem, comment);
188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
194 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195 yaz_marc_add_comment(mt, buf);
199 int yaz_marc_get_debug(yaz_marc_t mt)
204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
206 struct yaz_marc_node *n = yaz_marc_add_node(mt);
207 n->which = YAZ_MARC_LEADER;
208 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213 const char *data, size_t data_len)
215 struct yaz_marc_node *n = yaz_marc_add_node(mt);
216 n->which = YAZ_MARC_CONTROLFIELD;
217 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
224 sprintf(msg, "controlfield:");
225 for (i = 0; i < 16 && i < data_len; i++)
226 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
228 sprintf(msg + strlen(msg), " ..");
229 yaz_marc_add_comment(mt, msg);
233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234 const char *indicator, size_t indicator_len)
236 struct yaz_marc_node *n = yaz_marc_add_node(mt);
237 n->which = YAZ_MARC_DATAFIELD;
238 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239 n->u.datafield.indicator =
240 nmem_strdupn(mt->nmem, indicator, indicator_len);
241 n->u.datafield.subfields = 0;
243 /* make subfield_pp the current (last one) */
244 mt->subfield_pp = &n->u.datafield.subfields;
247 /** \brief adds a attribute value to the element name if it is plain chars
249 If not, and if the attribute name is not null, it will append a
250 attribute element with the value if attribute name is null it will
251 return a non-zero value meaning it couldnt handle the value.
253 static int element_name_append_attribute_value(
254 yaz_marc_t mt, WRBUF buffer,
255 const char *attribute_name, char *code_data, size_t code_len)
257 /* TODO Map special codes to something possible for XML ELEMENT names */
262 for (index = 0; index < code_len; index++)
264 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
269 /* Add as attribute */
270 if (encode && attribute_name)
271 wrbuf_printf(buffer, " %s=\"", attribute_name);
273 if (!encode || attribute_name)
274 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
278 if (encode && attribute_name)
279 wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285 const char *indicator, size_t indicator_len)
287 struct yaz_marc_node *n = yaz_marc_add_node(mt);
288 n->which = YAZ_MARC_DATAFIELD;
289 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290 n->u.datafield.indicator =
291 nmem_strdupn(mt->nmem, indicator, indicator_len);
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
298 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
300 struct yaz_marc_node *n = yaz_marc_add_node(mt);
301 n->which = YAZ_MARC_DATAFIELD;
302 n->u.datafield.tag = tag_value;
303 n->u.datafield.indicator = indicators;
304 n->u.datafield.subfields = 0;
306 /* make subfield_pp the current (last one) */
307 mt->subfield_pp = &n->u.datafield.subfields;
310 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
312 n->u.datafield.indicator = indicator;
317 void yaz_marc_add_subfield(yaz_marc_t mt,
318 const char *code_data, size_t code_data_len)
325 sprintf(msg, "subfield:");
326 for (i = 0; i < 16 && i < code_data_len; i++)
327 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
328 if (i < code_data_len)
329 sprintf(msg + strlen(msg), " ..");
330 yaz_marc_add_comment(mt, msg);
335 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
336 nmem_malloc(mt->nmem, sizeof(*n));
337 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
339 /* mark subfield_pp to point to this one, so we append here next */
340 *mt->subfield_pp = n;
341 mt->subfield_pp = &n->next;
345 static void check_ascii(yaz_marc_t mt, char *leader, int offset,
348 if (leader[offset] < ' ' || leader[offset] > 127)
351 "Leader character at offset %d is non-ASCII. "
352 "Setting value to '%c'", offset, ch_default);
353 leader[offset] = ch_default;
357 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
358 int *indicator_length,
359 int *identifier_length,
361 int *length_data_entry,
362 int *length_starting,
363 int *length_implementation)
367 memcpy(leader, leader_c, 24);
369 check_ascii(mt, leader, 5, 'a');
370 check_ascii(mt, leader, 6, 'a');
371 check_ascii(mt, leader, 7, 'a');
372 check_ascii(mt, leader, 8, '#');
373 check_ascii(mt, leader, 9, '#');
374 if (!atoi_n_check(leader+10, 1, indicator_length))
377 "Indicator length at offset 10 should hold a digit."
380 *indicator_length = 2;
382 if (!atoi_n_check(leader+11, 1, identifier_length))
385 "Identifier length at offset 11 should hold a digit."
388 *identifier_length = 2;
390 if (!atoi_n_check(leader+12, 5, base_address))
393 "Base address at offsets 12..16 should hold a number."
397 check_ascii(mt, leader, 17, '#');
398 check_ascii(mt, leader, 18, '#');
399 check_ascii(mt, leader, 19, '#');
400 if (!atoi_n_check(leader+20, 1, length_data_entry))
403 "Length data entry at offset 20 should hold a digit."
405 *length_data_entry = 4;
408 if (!atoi_n_check(leader+21, 1, length_starting))
411 "Length starting at offset 21 should hold a digit."
413 *length_starting = 5;
416 if (!atoi_n_check(leader+22, 1, length_implementation))
419 "Length implementation at offset 22 should hold a digit."
421 *length_implementation = 0;
424 check_ascii(mt, leader, 23, '0');
428 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
429 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
430 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
431 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
432 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
433 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
435 yaz_marc_add_leader(mt, leader, 24);
438 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
440 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
441 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
444 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
446 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
447 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
450 /* try to guess how many bytes the identifier really is! */
451 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
456 for (i = 1; i<5; i++)
459 size_t outbytesleft = sizeof(outbuf);
461 const char *inp = buf;
463 size_t inbytesleft = i;
464 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
465 &outp, &outbytesleft);
466 yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
467 if (r != (size_t) (-1))
468 return i; /* got a complete sequence */
470 return 1; /* giving up */
472 return 1; /* we don't know */
475 void yaz_marc_reset(yaz_marc_t mt)
477 nmem_reset(mt->nmem);
479 mt->nodes_pp = &mt->nodes;
483 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
485 struct yaz_marc_node *n;
486 int identifier_length;
487 const char *leader = 0;
489 for (n = mt->nodes; n; n = n->next)
490 if (n->which == YAZ_MARC_LEADER)
492 leader = n->u.leader;
498 if (!atoi_n_check(leader+11, 1, &identifier_length))
501 for (n = mt->nodes; n; n = n->next)
505 case YAZ_MARC_COMMENT:
506 wrbuf_iconv_write(wr, mt->iconv_cd,
507 n->u.comment, strlen(n->u.comment));
508 wrbuf_puts(wr, "\n");
517 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
518 int identifier_length)
520 /* if identifier length is 2 (most MARCs) or less (probably an error),
521 the code is a single character .. However we've
522 seen multibyte codes, so see how big it really is */
523 if (identifier_length > 2)
524 return identifier_length - 1;
526 return cdata_one_character(mt, data);
529 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
531 struct yaz_marc_node *n;
532 int identifier_length;
533 const char *leader = 0;
535 for (n = mt->nodes; n; n = n->next)
536 if (n->which == YAZ_MARC_LEADER)
538 leader = n->u.leader;
544 if (!atoi_n_check(leader+11, 1, &identifier_length))
547 for (n = mt->nodes; n; n = n->next)
549 struct yaz_marc_subfield *s;
552 case YAZ_MARC_DATAFIELD:
553 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
554 n->u.datafield.indicator);
555 for (s = n->u.datafield.subfields; s; s = s->next)
557 size_t using_code_len = get_subfield_len(mt, s->code_data,
560 wrbuf_puts (wr, mt->subfield_str);
561 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
563 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
564 wrbuf_iconv_puts(wr, mt->iconv_cd,
565 s->code_data + using_code_len);
566 marc_iconv_reset(mt, wr);
568 wrbuf_puts (wr, mt->endline_str);
570 case YAZ_MARC_CONTROLFIELD:
571 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
572 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
573 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
574 marc_iconv_reset(mt, wr);
575 wrbuf_puts (wr, mt->endline_str);
577 case YAZ_MARC_COMMENT:
579 wrbuf_iconv_write(wr, mt->iconv_cd,
580 n->u.comment, strlen(n->u.comment));
581 marc_iconv_reset(mt, wr);
582 wrbuf_puts(wr, ")\n");
584 case YAZ_MARC_LEADER:
585 wrbuf_printf(wr, "%s\n", n->u.leader);
588 wrbuf_puts(wr, "\n");
592 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
594 if (mt->enable_collection == collection_second)
596 switch(mt->output_format)
598 case YAZ_MARC_MARCXML:
599 case YAZ_MARC_TURBOMARC:
600 wrbuf_printf(wr, "</collection>\n");
602 case YAZ_MARC_XCHANGE:
603 wrbuf_printf(wr, "</collection>\n");
610 void yaz_marc_enable_collection(yaz_marc_t mt)
612 mt->enable_collection = collection_first;
615 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
617 switch(mt->output_format)
620 return yaz_marc_write_line(mt, wr);
621 case YAZ_MARC_MARCXML:
622 return yaz_marc_write_marcxml(mt, wr);
623 case YAZ_MARC_TURBOMARC:
624 return yaz_marc_write_turbomarc(mt, wr);
625 case YAZ_MARC_XCHANGE:
626 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
627 case YAZ_MARC_ISO2709:
628 return yaz_marc_write_iso2709(mt, wr);
630 return yaz_marc_write_check(mt, wr);
635 static const char *record_name[2] = { "record", "r"};
636 static const char *leader_name[2] = { "leader", "l"};
637 static const char *controlfield_name[2] = { "controlfield", "c"};
638 static const char *datafield_name[2] = { "datafield", "d"};
639 static const char *indicator_name[2] = { "ind", "i"};
640 static const char *subfield_name[2] = { "subfield", "s"};
642 /** \brief common MARC XML/Xchange/turbomarc writer
644 \param wr WRBUF output
645 \param ns XMLNS for the elements
646 \param format record format (e.g. "MARC21")
647 \param type record type (e.g. "Bibliographic")
648 \param turbo =1 for turbomarc
652 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
658 struct yaz_marc_node *n;
659 int identifier_length;
660 const char *leader = 0;
662 for (n = mt->nodes; n; n = n->next)
663 if (n->which == YAZ_MARC_LEADER)
665 leader = n->u.leader;
671 if (!atoi_n_check(leader+11, 1, &identifier_length))
674 if (mt->enable_collection != no_collection)
676 if (mt->enable_collection == collection_first)
678 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
679 mt->enable_collection = collection_second;
681 wrbuf_printf(wr, "<%s", record_name[turbo]);
685 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
688 wrbuf_printf(wr, " format=\"%.80s\"", format);
690 wrbuf_printf(wr, " type=\"%.80s\"", type);
691 wrbuf_printf(wr, ">\n");
692 for (n = mt->nodes; n; n = n->next)
694 struct yaz_marc_subfield *s;
698 case YAZ_MARC_DATAFIELD:
700 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
702 wrbuf_printf(wr, " tag=\"");
703 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
704 strlen(n->u.datafield.tag));
706 wrbuf_printf(wr, "\"");
707 if (n->u.datafield.indicator)
710 for (i = 0; n->u.datafield.indicator[i]; i++)
712 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
713 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
714 n->u.datafield.indicator+i, 1);
715 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
718 wrbuf_printf(wr, ">\n");
719 for (s = n->u.datafield.subfields; s; s = s->next)
721 size_t using_code_len = get_subfield_len(mt, s->code_data,
723 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
726 wrbuf_printf(wr, " code=\"");
727 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
728 s->code_data, using_code_len);
729 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
733 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
736 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
737 s->code_data + using_code_len,
738 strlen(s->code_data + using_code_len));
739 marc_iconv_reset(mt, wr);
740 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
742 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
743 wrbuf_puts(wr, ">\n");
745 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
748 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
749 strlen(n->u.datafield.tag));
750 wrbuf_printf(wr, ">\n");
752 case YAZ_MARC_CONTROLFIELD:
753 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
756 wrbuf_printf(wr, " tag=\"");
757 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
758 strlen(n->u.controlfield.tag));
759 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
763 /* TODO convert special */
764 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
765 strlen(n->u.controlfield.tag));
766 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
768 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
769 n->u.controlfield.data,
770 strlen(n->u.controlfield.data));
771 marc_iconv_reset(mt, wr);
772 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
773 /* TODO convert special */
775 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
776 strlen(n->u.controlfield.tag));
777 wrbuf_puts(wr, ">\n");
779 case YAZ_MARC_COMMENT:
780 wrbuf_printf(wr, "<!-- ");
781 wrbuf_puts(wr, n->u.comment);
782 wrbuf_printf(wr, " -->\n");
784 case YAZ_MARC_LEADER:
785 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
786 wrbuf_iconv_write_cdata(wr,
787 0 , /* no charset conversion for leader */
788 n->u.leader, strlen(n->u.leader));
789 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
792 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
796 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
802 if (mt->write_using_libxml2)
809 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
811 ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
815 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
818 xmlDocSetRootElement(doc, root_ptr);
819 xmlDocDumpMemory(doc, &buf_out, &len_out);
821 wrbuf_write(wr, (const char *) buf_out, len_out);
832 return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
835 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
837 /* set leader 09 to 'a' for UNICODE */
838 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
839 if (!mt->leader_spec)
840 yaz_marc_modify_leader(mt, 9, "a");
841 return yaz_marc_write_marcxml_ns(mt, wr,
842 "http://www.loc.gov/MARC21/slim",
846 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
848 /* set leader 09 to 'a' for UNICODE */
849 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
850 if (!mt->leader_spec)
851 yaz_marc_modify_leader(mt, 9, "a");
852 return yaz_marc_write_marcxml_ns(mt, wr,
853 "http://www.indexdata.com/turbomarc", 0, 0, 1);
856 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
860 return yaz_marc_write_marcxml_ns(mt, wr,
861 "info:lc/xmlns/marcxchange-v1",
867 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
869 xmlNsPtr ns_record, WRBUF wr_cdata,
870 int identifier_length)
873 struct yaz_marc_subfield *s;
874 WRBUF subfield_name = wrbuf_alloc();
876 /* TODO consider if safe */
879 strncpy(field + 1, n->u.datafield.tag, 3);
881 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
883 if (n->u.datafield.indicator)
886 for (i = 0; n->u.datafield.indicator[i]; i++)
891 ind_val[0] = n->u.datafield.indicator[i];
893 sprintf(ind_str, "%s%d", indicator_name[1], i+1);
894 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
897 for (s = n->u.datafield.subfields; s; s = s->next)
900 xmlNode *ptr_subfield;
901 size_t using_code_len = get_subfield_len(mt, s->code_data,
903 wrbuf_rewind(wr_cdata);
904 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
905 marc_iconv_reset(mt, wr_cdata);
907 wrbuf_rewind(subfield_name);
908 wrbuf_puts(subfield_name, "s");
909 not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
910 ptr_subfield = xmlNewTextChild(ptr, ns_record,
911 BAD_CAST wrbuf_cstr(subfield_name),
912 BAD_CAST wrbuf_cstr(wr_cdata));
915 /* Generate code attribute value and add */
916 wrbuf_rewind(wr_cdata);
917 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
918 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
921 wrbuf_destroy(subfield_name);
924 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
929 struct yaz_marc_node *n;
930 int identifier_length;
931 const char *leader = 0;
936 for (n = mt->nodes; n; n = n->next)
937 if (n->which == YAZ_MARC_LEADER)
939 leader = n->u.leader;
945 if (!atoi_n_check(leader+11, 1, &identifier_length))
948 wr_cdata = wrbuf_alloc();
950 record_ptr = xmlNewNode(0, BAD_CAST "r");
951 *root_ptr = record_ptr;
953 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
954 xmlSetNs(record_ptr, ns_record);
957 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
959 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
960 for (n = mt->nodes; n; n = n->next)
970 case YAZ_MARC_DATAFIELD:
971 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
973 case YAZ_MARC_CONTROLFIELD:
974 wrbuf_rewind(wr_cdata);
975 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
976 marc_iconv_reset(mt, wr_cdata);
978 strncpy(field + 1, n->u.controlfield.tag, 3);
979 ptr = xmlNewTextChild(record_ptr, ns_record,
981 BAD_CAST wrbuf_cstr(wr_cdata));
983 case YAZ_MARC_COMMENT:
984 ptr = xmlNewComment(BAD_CAST n->u.comment);
985 xmlAddChild(record_ptr, ptr);
987 case YAZ_MARC_LEADER:
988 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
989 BAD_CAST n->u.leader);
993 wrbuf_destroy(wr_cdata);
998 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1003 struct yaz_marc_node *n;
1004 int identifier_length;
1005 const char *leader = 0;
1006 xmlNode *record_ptr;
1010 for (n = mt->nodes; n; n = n->next)
1011 if (n->which == YAZ_MARC_LEADER)
1013 leader = n->u.leader;
1019 if (!atoi_n_check(leader+11, 1, &identifier_length))
1022 wr_cdata = wrbuf_alloc();
1024 record_ptr = xmlNewNode(0, BAD_CAST "record");
1025 *root_ptr = record_ptr;
1027 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1028 xmlSetNs(record_ptr, ns_record);
1031 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1033 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1034 for (n = mt->nodes; n; n = n->next)
1036 struct yaz_marc_subfield *s;
1041 case YAZ_MARC_DATAFIELD:
1042 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1043 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1044 if (n->u.datafield.indicator)
1047 for (i = 0; n->u.datafield.indicator[i]; i++)
1052 sprintf(ind_str, "ind%d", i+1);
1053 ind_val[0] = n->u.datafield.indicator[i];
1055 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1058 for (s = n->u.datafield.subfields; s; s = s->next)
1060 xmlNode *ptr_subfield;
1061 size_t using_code_len = get_subfield_len(mt, s->code_data,
1063 wrbuf_rewind(wr_cdata);
1064 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1065 s->code_data + using_code_len);
1066 marc_iconv_reset(mt, wr_cdata);
1067 ptr_subfield = xmlNewTextChild(
1069 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1071 wrbuf_rewind(wr_cdata);
1072 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1073 s->code_data, using_code_len);
1074 xmlNewProp(ptr_subfield, BAD_CAST "code",
1075 BAD_CAST wrbuf_cstr(wr_cdata));
1078 case YAZ_MARC_CONTROLFIELD:
1079 wrbuf_rewind(wr_cdata);
1080 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1081 marc_iconv_reset(mt, wr_cdata);
1083 ptr = xmlNewTextChild(record_ptr, ns_record,
1084 BAD_CAST "controlfield",
1085 BAD_CAST wrbuf_cstr(wr_cdata));
1087 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1089 case YAZ_MARC_COMMENT:
1090 ptr = xmlNewComment(BAD_CAST n->u.comment);
1091 xmlAddChild(record_ptr, ptr);
1093 case YAZ_MARC_LEADER:
1094 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1095 BAD_CAST n->u.leader);
1099 wrbuf_destroy(wr_cdata);
1105 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1107 struct yaz_marc_node *n;
1108 int indicator_length;
1109 int identifier_length;
1110 int length_data_entry;
1111 int length_starting;
1112 int length_implementation;
1113 int data_offset = 0;
1114 const char *leader = 0;
1115 WRBUF wr_dir, wr_head, wr_data_tmp;
1118 for (n = mt->nodes; n; n = n->next)
1119 if (n->which == YAZ_MARC_LEADER)
1120 leader = n->u.leader;
1124 if (!atoi_n_check(leader+10, 1, &indicator_length))
1126 if (!atoi_n_check(leader+11, 1, &identifier_length))
1128 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1130 if (!atoi_n_check(leader+21, 1, &length_starting))
1132 if (!atoi_n_check(leader+22, 1, &length_implementation))
1135 wr_data_tmp = wrbuf_alloc();
1136 wr_dir = wrbuf_alloc();
1137 for (n = mt->nodes; n; n = n->next)
1139 int data_length = 0;
1140 struct yaz_marc_subfield *s;
1144 case YAZ_MARC_DATAFIELD:
1145 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1146 data_length += indicator_length;
1147 wrbuf_rewind(wr_data_tmp);
1148 for (s = n->u.datafield.subfields; s; s = s->next)
1150 /* write dummy IDFS + content */
1151 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1152 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1153 marc_iconv_reset(mt, wr_data_tmp);
1155 /* write dummy FS (makes MARC-8 to become ASCII) */
1156 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1157 marc_iconv_reset(mt, wr_data_tmp);
1158 data_length += wrbuf_len(wr_data_tmp);
1160 case YAZ_MARC_CONTROLFIELD:
1161 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1163 wrbuf_rewind(wr_data_tmp);
1164 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1165 n->u.controlfield.data);
1166 marc_iconv_reset(mt, wr_data_tmp);
1167 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1168 marc_iconv_reset(mt, wr_data_tmp);
1169 data_length += wrbuf_len(wr_data_tmp);
1171 case YAZ_MARC_COMMENT:
1173 case YAZ_MARC_LEADER:
1178 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1179 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1180 data_offset += data_length;
1183 /* mark end of directory */
1184 wrbuf_putc(wr_dir, ISO2709_FS);
1186 /* base address of data (comes after leader+directory) */
1187 base_address = 24 + wrbuf_len(wr_dir);
1189 wr_head = wrbuf_alloc();
1191 /* write record length */
1192 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1193 /* from "original" leader */
1194 wrbuf_write(wr_head, leader+5, 7);
1195 /* base address of data */
1196 wrbuf_printf(wr_head, "%05d", base_address);
1197 /* from "original" leader */
1198 wrbuf_write(wr_head, leader+17, 7);
1200 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1201 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1202 wrbuf_destroy(wr_head);
1203 wrbuf_destroy(wr_dir);
1204 wrbuf_destroy(wr_data_tmp);
1206 for (n = mt->nodes; n; n = n->next)
1208 struct yaz_marc_subfield *s;
1212 case YAZ_MARC_DATAFIELD:
1213 wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
1214 for (s = n->u.datafield.subfields; s; s = s->next)
1216 wrbuf_putc(wr, ISO2709_IDFS);
1217 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1218 marc_iconv_reset(mt, wr);
1220 wrbuf_putc(wr, ISO2709_FS);
1222 case YAZ_MARC_CONTROLFIELD:
1223 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1224 marc_iconv_reset(mt, wr);
1225 wrbuf_putc(wr, ISO2709_FS);
1227 case YAZ_MARC_COMMENT:
1229 case YAZ_MARC_LEADER:
1233 wrbuf_printf(wr, "%c", ISO2709_RS);
1238 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1240 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1243 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1245 return -1; /* error */
1246 return r; /* OK, return length > 0 */
1249 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1250 const char **result, size_t *rsize)
1254 wrbuf_rewind(mt->m_wr);
1255 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1257 *result = wrbuf_cstr(mt->m_wr);
1259 *rsize = wrbuf_len(mt->m_wr);
1263 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1265 mt->output_format = xmlmode;
1268 void yaz_marc_debug(yaz_marc_t mt, int level)
1274 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1279 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1281 return mt->iconv_cd;
1284 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1286 struct yaz_marc_node *n;
1288 for (n = mt->nodes; n; n = n->next)
1289 if (n->which == YAZ_MARC_LEADER)
1291 leader = n->u.leader;
1292 memcpy(leader+off, str, strlen(str));
1297 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1299 xfree(mt->leader_spec);
1300 mt->leader_spec = 0;
1303 char dummy_leader[24];
1304 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1306 mt->leader_spec = xstrdup(leader_spec);
1311 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1313 const char *cp = leader_spec;
1318 int no_read = 0, no = 0;
1320 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1321 if (no < 2 || no_read < 3)
1323 if (pos < 0 || (size_t) pos >= size)
1328 const char *vp = strchr(val+1, '\'');
1334 if (len + pos > size)
1336 memcpy(leader + pos, val+1, len);
1338 else if (*val >= '0' && *val <= '9')
1354 int yaz_marc_decode_formatstr(const char *arg)
1357 if (!strcmp(arg, "marc"))
1358 mode = YAZ_MARC_ISO2709;
1359 if (!strcmp(arg, "marcxml"))
1360 mode = YAZ_MARC_MARCXML;
1361 if (!strcmp(arg, "turbomarc"))
1362 mode = YAZ_MARC_TURBOMARC;
1363 if (!strcmp(arg, "marcxchange"))
1364 mode = YAZ_MARC_XCHANGE;
1365 if (!strcmp(arg, "line"))
1366 mode = YAZ_MARC_LINE;
1370 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1372 mt->write_using_libxml2 = enable;
1378 * c-file-style: "Stroustrup"
1379 * indent-tabs-mode: nil
1381 * vim: shiftwidth=4 tabstop=8 expandtab