1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2011 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
92 int write_using_libxml2;
93 enum yaz_collection_state enable_collection;
98 struct yaz_marc_node *nodes;
99 struct yaz_marc_node **nodes_pp;
100 struct yaz_marc_subfield **subfield_pp;
103 yaz_marc_t yaz_marc_create(void)
105 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
106 mt->output_format = YAZ_MARC_LINE;
108 mt->write_using_libxml2 = 0;
109 mt->enable_collection = no_collection;
110 mt->m_wr = wrbuf_alloc();
113 strcpy(mt->subfield_str, " $");
114 strcpy(mt->endline_str, "\n");
116 mt->nmem = nmem_create();
121 void yaz_marc_destroy(yaz_marc_t mt)
125 nmem_destroy(mt->nmem);
126 wrbuf_destroy(mt->m_wr);
127 xfree(mt->leader_spec);
131 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
136 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
138 wrbuf_iconv_reset(wr, mt->iconv_cd);
141 static int marc_exec_leader(const char *leader_spec, char *leader,
144 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
150 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
152 struct yaz_marc_node *n = (struct yaz_marc_node *)
153 nmem_malloc(mt->nmem, sizeof(*n));
156 mt->nodes_pp = &n->next;
161 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
162 const xmlNode *ptr_data)
164 struct yaz_marc_node *n = yaz_marc_add_node(mt);
165 n->which = YAZ_MARC_CONTROLFIELD;
166 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
167 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
170 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
171 const xmlNode *ptr_data)
173 struct yaz_marc_node *n = yaz_marc_add_node(mt);
174 n->which = YAZ_MARC_CONTROLFIELD;
175 n->u.controlfield.tag = tag;
176 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
182 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
184 struct yaz_marc_node *n = yaz_marc_add_node(mt);
185 n->which = YAZ_MARC_COMMENT;
186 n->u.comment = nmem_strdup(mt->nmem, comment);
189 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
195 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
196 yaz_marc_add_comment(mt, buf);
200 int yaz_marc_get_debug(yaz_marc_t mt)
205 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
207 struct yaz_marc_node *n = yaz_marc_add_node(mt);
208 n->which = YAZ_MARC_LEADER;
209 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
210 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
213 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
214 const char *data, size_t data_len)
216 struct yaz_marc_node *n = yaz_marc_add_node(mt);
217 n->which = YAZ_MARC_CONTROLFIELD;
218 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
219 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
225 sprintf(msg, "controlfield:");
226 for (i = 0; i < 16 && i < data_len; i++)
227 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
229 sprintf(msg + strlen(msg), " ..");
230 yaz_marc_add_comment(mt, msg);
234 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
235 const char *indicator, size_t indicator_len)
237 struct yaz_marc_node *n = yaz_marc_add_node(mt);
238 n->which = YAZ_MARC_DATAFIELD;
239 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
240 n->u.datafield.indicator =
241 nmem_strdupn(mt->nmem, indicator, indicator_len);
242 n->u.datafield.subfields = 0;
244 /* make subfield_pp the current (last one) */
245 mt->subfield_pp = &n->u.datafield.subfields;
248 // Magic function: adds a attribute value to the element name if it is plain characters.
249 // if not, and if the attribute name is not null, it will append a attribute element with the value
250 // if attribute name is null it will return a non-zero value meaning it couldnt handle the value.
252 int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len)
254 // TODO Map special codes to something possible for XML ELEMENT names
259 for (index = 0; index < code_len; index++)
261 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
262 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
263 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
267 if (encode && attribute_name)
268 wrbuf_printf(buffer, " %s=\"", attribute_name);
270 if (!encode || attribute_name)
271 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
275 if (encode && attribute_name)
276 wrbuf_printf(buffer, "\""); // return error if we couldn't handle it.
281 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
282 const char *indicator, size_t indicator_len)
284 struct yaz_marc_node *n = yaz_marc_add_node(mt);
285 n->which = YAZ_MARC_DATAFIELD;
286 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
287 n->u.datafield.indicator =
288 nmem_strdupn(mt->nmem, indicator, indicator_len);
289 n->u.datafield.subfields = 0;
291 /* make subfield_pp the current (last one) */
292 mt->subfield_pp = &n->u.datafield.subfields;
295 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
297 struct yaz_marc_node *n = yaz_marc_add_node(mt);
298 n->which = YAZ_MARC_DATAFIELD;
299 n->u.datafield.tag = tag_value;
300 n->u.datafield.indicator = indicators;
301 n->u.datafield.subfields = 0;
303 // make subfield_pp the current (last one)
304 mt->subfield_pp = &n->u.datafield.subfields;
307 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
309 n->u.datafield.indicator = indicator;
314 void yaz_marc_add_subfield(yaz_marc_t mt,
315 const char *code_data, size_t code_data_len)
322 sprintf(msg, "subfield:");
323 for (i = 0; i < 16 && i < code_data_len; i++)
324 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
325 if (i < code_data_len)
326 sprintf(msg + strlen(msg), " ..");
327 yaz_marc_add_comment(mt, msg);
332 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
333 nmem_malloc(mt->nmem, sizeof(*n));
334 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
336 /* mark subfield_pp to point to this one, so we append here next */
337 *mt->subfield_pp = n;
338 mt->subfield_pp = &n->next;
342 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
343 int *indicator_length,
344 int *identifier_length,
346 int *length_data_entry,
347 int *length_starting,
348 int *length_implementation)
352 memcpy(leader, leader_c, 24);
354 if (!atoi_n_check(leader+10, 1, indicator_length))
357 "Indicator length at offset 10 should hold a digit."
360 *indicator_length = 2;
362 if (!atoi_n_check(leader+11, 1, identifier_length))
365 "Identifier length at offset 11 should hold a digit."
368 *identifier_length = 2;
370 if (!atoi_n_check(leader+12, 5, base_address))
373 "Base address at offsets 12..16 should hold a number."
377 if (!atoi_n_check(leader+20, 1, length_data_entry))
380 "Length data entry at offset 20 should hold a digit."
382 *length_data_entry = 4;
385 if (!atoi_n_check(leader+21, 1, length_starting))
388 "Length starting at offset 21 should hold a digit."
390 *length_starting = 5;
393 if (!atoi_n_check(leader+22, 1, length_implementation))
396 "Length implementation at offset 22 should hold a digit."
398 *length_implementation = 0;
404 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
405 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
406 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
407 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
408 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
409 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
411 yaz_marc_add_leader(mt, leader, 24);
414 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
416 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
417 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
420 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
422 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
423 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
426 /* try to guess how many bytes the identifier really is! */
427 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
432 for (i = 1; i<5; i++)
435 size_t outbytesleft = sizeof(outbuf);
437 const char *inp = buf;
439 size_t inbytesleft = i;
440 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
441 &outp, &outbytesleft);
442 if (r != (size_t) (-1))
443 return i; /* got a complete sequence */
445 return 1; /* giving up */
447 return 1; /* we don't know */
450 void yaz_marc_reset(yaz_marc_t mt)
452 nmem_reset(mt->nmem);
454 mt->nodes_pp = &mt->nodes;
458 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
460 struct yaz_marc_node *n;
461 int identifier_length;
462 const char *leader = 0;
464 for (n = mt->nodes; n; n = n->next)
465 if (n->which == YAZ_MARC_LEADER)
467 leader = n->u.leader;
473 if (!atoi_n_check(leader+11, 1, &identifier_length))
476 for (n = mt->nodes; n; n = n->next)
480 case YAZ_MARC_COMMENT:
481 wrbuf_iconv_write(wr, mt->iconv_cd,
482 n->u.comment, strlen(n->u.comment));
483 wrbuf_puts(wr, "\n");
492 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
493 int identifier_length)
495 /* if identifier length is 2 (most MARCs) or less (probably an error),
496 the code is a single character .. However we've
497 seen multibyte codes, so see how big it really is */
498 if (identifier_length > 2)
499 return identifier_length - 1;
501 return cdata_one_character(mt, data);
504 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
506 struct yaz_marc_node *n;
507 int identifier_length;
508 const char *leader = 0;
510 for (n = mt->nodes; n; n = n->next)
511 if (n->which == YAZ_MARC_LEADER)
513 leader = n->u.leader;
519 if (!atoi_n_check(leader+11, 1, &identifier_length))
522 for (n = mt->nodes; n; n = n->next)
524 struct yaz_marc_subfield *s;
527 case YAZ_MARC_DATAFIELD:
528 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
529 n->u.datafield.indicator);
530 for (s = n->u.datafield.subfields; s; s = s->next)
532 size_t using_code_len = get_subfield_len(mt, s->code_data,
535 wrbuf_puts (wr, mt->subfield_str);
536 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
538 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
539 wrbuf_iconv_puts(wr, mt->iconv_cd,
540 s->code_data + using_code_len);
541 marc_iconv_reset(mt, wr);
543 wrbuf_puts (wr, mt->endline_str);
545 case YAZ_MARC_CONTROLFIELD:
546 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
547 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
548 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
549 marc_iconv_reset(mt, wr);
550 wrbuf_puts (wr, mt->endline_str);
552 case YAZ_MARC_COMMENT:
554 wrbuf_iconv_write(wr, mt->iconv_cd,
555 n->u.comment, strlen(n->u.comment));
556 marc_iconv_reset(mt, wr);
557 wrbuf_puts(wr, ")\n");
559 case YAZ_MARC_LEADER:
560 wrbuf_printf(wr, "%s\n", n->u.leader);
563 wrbuf_puts(wr, "\n");
567 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
569 if (mt->enable_collection == collection_second)
571 switch(mt->output_format)
573 case YAZ_MARC_MARCXML:
574 case YAZ_MARC_TURBOMARC:
575 wrbuf_printf(wr, "</collection>\n");
577 case YAZ_MARC_XCHANGE:
578 wrbuf_printf(wr, "</collection>\n");
585 void yaz_marc_enable_collection(yaz_marc_t mt)
587 mt->enable_collection = collection_first;
590 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
592 switch(mt->output_format)
595 return yaz_marc_write_line(mt, wr);
596 case YAZ_MARC_MARCXML:
597 return yaz_marc_write_marcxml(mt, wr);
598 case YAZ_MARC_TURBOMARC:
599 return yaz_marc_write_turbomarc(mt, wr);
600 case YAZ_MARC_XCHANGE:
601 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
602 case YAZ_MARC_ISO2709:
603 return yaz_marc_write_iso2709(mt, wr);
605 return yaz_marc_write_check(mt, wr);
610 static const char *record_name[2] = { "record", "r"};
611 static const char *leader_name[2] = { "leader", "l"};
612 static const char *controlfield_name[2] = { "controlfield", "c"};
613 static const char *datafield_name[2] = { "datafield", "d"};
614 static const char *indicator_name[2] = { "ind", "i"};
615 static const char *subfield_name[2] = { "subfield", "s"};
617 /** \brief common MARC XML/Xchange/turbomarc writer
619 \param wr WRBUF output
620 \param ns XMLNS for the elements
621 \param format record format (e.g. "MARC21")
622 \param type record type (e.g. "Bibliographic")
623 \param turbo =1 for turbomarc
627 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
633 struct yaz_marc_node *n;
634 int identifier_length;
635 const char *leader = 0;
637 for (n = mt->nodes; n; n = n->next)
638 if (n->which == YAZ_MARC_LEADER)
640 leader = n->u.leader;
646 if (!atoi_n_check(leader+11, 1, &identifier_length))
649 if (mt->enable_collection != no_collection)
651 if (mt->enable_collection == collection_first)
653 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
654 mt->enable_collection = collection_second;
656 wrbuf_printf(wr, "<%s", record_name[turbo]);
660 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
663 wrbuf_printf(wr, " format=\"%.80s\"", format);
665 wrbuf_printf(wr, " type=\"%.80s\"", type);
666 wrbuf_printf(wr, ">\n");
667 for (n = mt->nodes; n; n = n->next)
669 struct yaz_marc_subfield *s;
673 case YAZ_MARC_DATAFIELD:
675 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
677 wrbuf_printf(wr, " tag=\"");
678 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
679 strlen(n->u.datafield.tag));
681 wrbuf_printf(wr, "\"");
682 if (n->u.datafield.indicator)
685 for (i = 0; n->u.datafield.indicator[i]; i++)
687 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
688 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
689 n->u.datafield.indicator+i, 1);
690 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
693 wrbuf_printf(wr, ">\n");
694 for (s = n->u.datafield.subfields; s; s = s->next)
696 size_t using_code_len = get_subfield_len(mt, s->code_data,
698 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
701 wrbuf_printf(wr, " code=\"");
702 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
703 s->code_data, using_code_len);
704 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
708 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
711 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
712 s->code_data + using_code_len,
713 strlen(s->code_data + using_code_len));
714 marc_iconv_reset(mt, wr);
715 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
717 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
718 wrbuf_puts(wr, ">\n");
720 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
723 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
724 strlen(n->u.datafield.tag));
725 wrbuf_printf(wr, ">\n");
727 case YAZ_MARC_CONTROLFIELD:
728 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
731 wrbuf_printf(wr, " tag=\"");
732 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
733 strlen(n->u.controlfield.tag));
734 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
738 //TODO convert special
739 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
740 strlen(n->u.controlfield.tag));
741 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
743 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
744 n->u.controlfield.data,
745 strlen(n->u.controlfield.data));
746 marc_iconv_reset(mt, wr);
747 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
748 //TODO convert special
750 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
751 strlen(n->u.controlfield.tag));
752 wrbuf_puts(wr, ">\n");
754 case YAZ_MARC_COMMENT:
755 wrbuf_printf(wr, "<!-- ");
756 wrbuf_puts(wr, n->u.comment);
757 wrbuf_printf(wr, " -->\n");
759 case YAZ_MARC_LEADER:
760 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
761 wrbuf_iconv_write_cdata(wr,
762 0 , /* no charset conversion for leader */
763 n->u.leader, strlen(n->u.leader));
764 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
767 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
771 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
777 if (mt->write_using_libxml2)
784 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
786 ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
790 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
793 xmlDocSetRootElement(doc, root_ptr);
794 xmlDocDumpMemory(doc, &buf_out, &len_out);
796 wrbuf_write(wr, (const char *) buf_out, len_out);
807 return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
810 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
812 /* set leader 09 to 'a' for UNICODE */
813 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
814 if (!mt->leader_spec)
815 yaz_marc_modify_leader(mt, 9, "a");
816 return yaz_marc_write_marcxml_ns(mt, wr,
817 "http://www.loc.gov/MARC21/slim",
821 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
823 /* set leader 09 to 'a' for UNICODE */
824 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
825 if (!mt->leader_spec)
826 yaz_marc_modify_leader(mt, 9, "a");
827 return yaz_marc_write_marcxml_ns(mt, wr,
828 "http://www.indexdata.com/turbomarc", 0, 0, 1);
831 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
835 return yaz_marc_write_marcxml_ns(mt, wr,
836 "info:lc/xmlns/marcxchange-v1",
842 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
844 xmlNsPtr ns_record, WRBUF wr_cdata,
845 int identifier_length)
848 struct yaz_marc_subfield *s;
849 WRBUF subfield_name = wrbuf_alloc();
851 //TODO consider if safe
854 strncpy(field + 1, n->u.datafield.tag, 3);
856 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
858 if (n->u.datafield.indicator)
861 for (i = 0; n->u.datafield.indicator[i]; i++)
866 ind_val[0] = n->u.datafield.indicator[i];
868 sprintf(ind_str, "%s%d", indicator_name[1], i+1);
869 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
872 for (s = n->u.datafield.subfields; s; s = s->next)
875 xmlNode *ptr_subfield;
876 size_t using_code_len = get_subfield_len(mt, s->code_data,
878 wrbuf_rewind(wr_cdata);
879 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
880 marc_iconv_reset(mt, wr_cdata);
882 wrbuf_rewind(subfield_name);
883 wrbuf_puts(subfield_name, "s");
884 not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
885 ptr_subfield = xmlNewTextChild(ptr, ns_record,
886 BAD_CAST wrbuf_cstr(subfield_name),
887 BAD_CAST wrbuf_cstr(wr_cdata));
890 // Generate code attribute value and add
891 wrbuf_rewind(wr_cdata);
892 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
893 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
896 wrbuf_destroy(subfield_name);
899 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
904 struct yaz_marc_node *n;
905 int identifier_length;
906 const char *leader = 0;
911 for (n = mt->nodes; n; n = n->next)
912 if (n->which == YAZ_MARC_LEADER)
914 leader = n->u.leader;
920 if (!atoi_n_check(leader+11, 1, &identifier_length))
923 wr_cdata = wrbuf_alloc();
925 record_ptr = xmlNewNode(0, BAD_CAST "r");
926 *root_ptr = record_ptr;
928 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
929 xmlSetNs(record_ptr, ns_record);
932 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
934 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
935 for (n = mt->nodes; n; n = n->next)
945 case YAZ_MARC_DATAFIELD:
946 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
948 case YAZ_MARC_CONTROLFIELD:
949 wrbuf_rewind(wr_cdata);
950 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
951 marc_iconv_reset(mt, wr_cdata);
953 strncpy(field + 1, n->u.controlfield.tag, 3);
954 ptr = xmlNewTextChild(record_ptr, ns_record,
956 BAD_CAST wrbuf_cstr(wr_cdata));
958 case YAZ_MARC_COMMENT:
959 ptr = xmlNewComment(BAD_CAST n->u.comment);
960 xmlAddChild(record_ptr, ptr);
962 case YAZ_MARC_LEADER:
964 char *field = "leader";
966 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
967 BAD_CAST n->u.leader);
972 wrbuf_destroy(wr_cdata);
977 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
982 struct yaz_marc_node *n;
983 int identifier_length;
984 const char *leader = 0;
989 for (n = mt->nodes; n; n = n->next)
990 if (n->which == YAZ_MARC_LEADER)
992 leader = n->u.leader;
998 if (!atoi_n_check(leader+11, 1, &identifier_length))
1001 wr_cdata = wrbuf_alloc();
1003 record_ptr = xmlNewNode(0, BAD_CAST "record");
1004 *root_ptr = record_ptr;
1006 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1007 xmlSetNs(record_ptr, ns_record);
1010 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1012 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1013 for (n = mt->nodes; n; n = n->next)
1015 struct yaz_marc_subfield *s;
1020 case YAZ_MARC_DATAFIELD:
1021 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1022 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1023 if (n->u.datafield.indicator)
1026 for (i = 0; n->u.datafield.indicator[i]; i++)
1031 sprintf(ind_str, "ind%d", i+1);
1032 ind_val[0] = n->u.datafield.indicator[i];
1034 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1037 for (s = n->u.datafield.subfields; s; s = s->next)
1039 xmlNode *ptr_subfield;
1040 size_t using_code_len = get_subfield_len(mt, s->code_data,
1042 wrbuf_rewind(wr_cdata);
1043 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1044 s->code_data + using_code_len);
1045 marc_iconv_reset(mt, wr_cdata);
1046 ptr_subfield = xmlNewTextChild(
1048 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1050 wrbuf_rewind(wr_cdata);
1051 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1052 s->code_data, using_code_len);
1053 xmlNewProp(ptr_subfield, BAD_CAST "code",
1054 BAD_CAST wrbuf_cstr(wr_cdata));
1057 case YAZ_MARC_CONTROLFIELD:
1058 wrbuf_rewind(wr_cdata);
1059 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1060 marc_iconv_reset(mt, wr_cdata);
1062 ptr = xmlNewTextChild(record_ptr, ns_record,
1063 BAD_CAST "controlfield",
1064 BAD_CAST wrbuf_cstr(wr_cdata));
1066 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1068 case YAZ_MARC_COMMENT:
1069 ptr = xmlNewComment(BAD_CAST n->u.comment);
1070 xmlAddChild(record_ptr, ptr);
1072 case YAZ_MARC_LEADER:
1073 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1074 BAD_CAST n->u.leader);
1078 wrbuf_destroy(wr_cdata);
1084 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1086 struct yaz_marc_node *n;
1087 int indicator_length;
1088 int identifier_length;
1089 int length_data_entry;
1090 int length_starting;
1091 int length_implementation;
1092 int data_offset = 0;
1093 const char *leader = 0;
1094 WRBUF wr_dir, wr_head, wr_data_tmp;
1097 for (n = mt->nodes; n; n = n->next)
1098 if (n->which == YAZ_MARC_LEADER)
1099 leader = n->u.leader;
1103 if (!atoi_n_check(leader+10, 1, &indicator_length))
1105 if (!atoi_n_check(leader+11, 1, &identifier_length))
1107 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1109 if (!atoi_n_check(leader+21, 1, &length_starting))
1111 if (!atoi_n_check(leader+22, 1, &length_implementation))
1114 wr_data_tmp = wrbuf_alloc();
1115 wr_dir = wrbuf_alloc();
1116 for (n = mt->nodes; n; n = n->next)
1118 int data_length = 0;
1119 struct yaz_marc_subfield *s;
1123 case YAZ_MARC_DATAFIELD:
1124 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1125 data_length += indicator_length;
1126 wrbuf_rewind(wr_data_tmp);
1127 for (s = n->u.datafield.subfields; s; s = s->next)
1129 /* write dummy IDFS + content */
1130 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1131 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1132 marc_iconv_reset(mt, wr_data_tmp);
1134 /* write dummy FS (makes MARC-8 to become ASCII) */
1135 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1136 marc_iconv_reset(mt, wr_data_tmp);
1137 data_length += wrbuf_len(wr_data_tmp);
1139 case YAZ_MARC_CONTROLFIELD:
1140 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1142 wrbuf_rewind(wr_data_tmp);
1143 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1144 n->u.controlfield.data);
1145 marc_iconv_reset(mt, wr_data_tmp);
1146 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1147 marc_iconv_reset(mt, wr_data_tmp);
1148 data_length += wrbuf_len(wr_data_tmp);
1150 case YAZ_MARC_COMMENT:
1152 case YAZ_MARC_LEADER:
1157 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1158 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1159 data_offset += data_length;
1162 /* mark end of directory */
1163 wrbuf_putc(wr_dir, ISO2709_FS);
1165 /* base address of data (comes after leader+directory) */
1166 base_address = 24 + wrbuf_len(wr_dir);
1168 wr_head = wrbuf_alloc();
1170 /* write record length */
1171 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1172 /* from "original" leader */
1173 wrbuf_write(wr_head, leader+5, 7);
1174 /* base address of data */
1175 wrbuf_printf(wr_head, "%05d", base_address);
1176 /* from "original" leader */
1177 wrbuf_write(wr_head, leader+17, 7);
1179 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1180 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1181 wrbuf_destroy(wr_head);
1182 wrbuf_destroy(wr_dir);
1183 wrbuf_destroy(wr_data_tmp);
1185 for (n = mt->nodes; n; n = n->next)
1187 struct yaz_marc_subfield *s;
1191 case YAZ_MARC_DATAFIELD:
1192 wrbuf_printf(wr, "%.*s", indicator_length,
1193 n->u.datafield.indicator);
1194 for (s = n->u.datafield.subfields; s; s = s->next)
1196 wrbuf_putc(wr, ISO2709_IDFS);
1197 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1198 marc_iconv_reset(mt, wr);
1200 wrbuf_putc(wr, ISO2709_FS);
1202 case YAZ_MARC_CONTROLFIELD:
1203 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1204 marc_iconv_reset(mt, wr);
1205 wrbuf_putc(wr, ISO2709_FS);
1207 case YAZ_MARC_COMMENT:
1209 case YAZ_MARC_LEADER:
1213 wrbuf_printf(wr, "%c", ISO2709_RS);
1218 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1220 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1223 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1225 return -1; /* error */
1226 return r; /* OK, return length > 0 */
1229 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1230 const char **result, size_t *rsize)
1234 wrbuf_rewind(mt->m_wr);
1235 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1237 *result = wrbuf_cstr(mt->m_wr);
1239 *rsize = wrbuf_len(mt->m_wr);
1243 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1245 mt->output_format = xmlmode;
1248 void yaz_marc_debug(yaz_marc_t mt, int level)
1254 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1259 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1261 return mt->iconv_cd;
1264 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1266 struct yaz_marc_node *n;
1268 for (n = mt->nodes; n; n = n->next)
1269 if (n->which == YAZ_MARC_LEADER)
1271 leader = n->u.leader;
1272 memcpy(leader+off, str, strlen(str));
1277 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1279 xfree(mt->leader_spec);
1280 mt->leader_spec = 0;
1283 char dummy_leader[24];
1284 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1286 mt->leader_spec = xstrdup(leader_spec);
1291 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1293 const char *cp = leader_spec;
1298 int no_read = 0, no = 0;
1300 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1301 if (no < 2 || no_read < 3)
1303 if (pos < 0 || (size_t) pos >= size)
1308 const char *vp = strchr(val+1, '\'');
1314 if (len + pos > size)
1316 memcpy(leader + pos, val+1, len);
1318 else if (*val >= '0' && *val <= '9')
1334 int yaz_marc_decode_formatstr(const char *arg)
1337 if (!strcmp(arg, "marc"))
1338 mode = YAZ_MARC_ISO2709;
1339 if (!strcmp(arg, "marcxml"))
1340 mode = YAZ_MARC_MARCXML;
1341 if (!strcmp(arg, "turbomarc"))
1342 mode = YAZ_MARC_TURBOMARC;
1343 if (!strcmp(arg, "marcxchange"))
1344 mode = YAZ_MARC_XCHANGE;
1345 if (!strcmp(arg, "line"))
1346 mode = YAZ_MARC_LINE;
1350 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1352 mt->write_using_libxml2 = enable;
1358 * c-file-style: "Stroustrup"
1359 * indent-tabs-mode: nil
1361 * vim: shiftwidth=4 tabstop=8 expandtab