1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
92 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->xml = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
168 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
170 struct yaz_marc_node *n = yaz_marc_add_node(mt);
171 n->which = YAZ_MARC_COMMENT;
172 n->u.comment = nmem_strdup(mt->nmem, comment);
175 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
181 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
182 yaz_marc_add_comment(mt, buf);
186 int yaz_marc_get_debug(yaz_marc_t mt)
191 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
193 struct yaz_marc_node *n = yaz_marc_add_node(mt);
194 n->which = YAZ_MARC_LEADER;
195 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
196 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
199 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
200 const char *data, size_t data_len)
202 struct yaz_marc_node *n = yaz_marc_add_node(mt);
203 n->which = YAZ_MARC_CONTROLFIELD;
204 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
205 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
211 sprintf(msg, "controlfield:");
212 for (i = 0; i < 16 && i < data_len; i++)
213 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
215 sprintf(msg + strlen(msg), " ..");
216 yaz_marc_add_comment(mt, msg);
220 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
221 const char *indicator, size_t indicator_len)
223 struct yaz_marc_node *n = yaz_marc_add_node(mt);
224 n->which = YAZ_MARC_DATAFIELD;
225 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
226 n->u.datafield.indicator =
227 nmem_strdupn(mt->nmem, indicator, indicator_len);
228 n->u.datafield.subfields = 0;
230 /* make subfield_pp the current (last one) */
231 mt->subfield_pp = &n->u.datafield.subfields;
235 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
236 const char *indicator, size_t indicator_len)
238 struct yaz_marc_node *n = yaz_marc_add_node(mt);
239 n->which = YAZ_MARC_DATAFIELD;
240 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
241 n->u.datafield.indicator =
242 nmem_strdupn(mt->nmem, indicator, indicator_len);
243 n->u.datafield.subfields = 0;
245 /* make subfield_pp the current (last one) */
246 mt->subfield_pp = &n->u.datafield.subfields;
250 void yaz_marc_add_subfield(yaz_marc_t mt,
251 const char *code_data, size_t code_data_len)
258 sprintf(msg, "subfield:");
259 for (i = 0; i < 16 && i < code_data_len; i++)
260 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
261 if (i < code_data_len)
262 sprintf(msg + strlen(msg), " ..");
263 yaz_marc_add_comment(mt, msg);
268 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
269 nmem_malloc(mt->nmem, sizeof(*n));
270 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
272 /* mark subfield_pp to point to this one, so we append here next */
273 *mt->subfield_pp = n;
274 mt->subfield_pp = &n->next;
278 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
279 int *indicator_length,
280 int *identifier_length,
282 int *length_data_entry,
283 int *length_starting,
284 int *length_implementation)
288 memcpy(leader, leader_c, 24);
290 if (!atoi_n_check(leader+10, 1, indicator_length))
293 "Indicator length at offset 10 should hold a digit."
296 *indicator_length = 2;
298 if (!atoi_n_check(leader+11, 1, identifier_length))
301 "Identifier length at offset 11 should hold a digit."
304 *identifier_length = 2;
306 if (!atoi_n_check(leader+12, 5, base_address))
309 "Base address at offsets 12..16 should hold a number."
313 if (!atoi_n_check(leader+20, 1, length_data_entry))
316 "Length data entry at offset 20 should hold a digit."
318 *length_data_entry = 4;
321 if (!atoi_n_check(leader+21, 1, length_starting))
324 "Length starting at offset 21 should hold a digit."
326 *length_starting = 5;
329 if (!atoi_n_check(leader+22, 1, length_implementation))
332 "Length implementation at offset 22 should hold a digit."
334 *length_implementation = 0;
340 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
341 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
342 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
343 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
344 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
345 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
347 yaz_marc_add_leader(mt, leader, 24);
350 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
352 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
353 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
356 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
358 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
359 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
362 /* try to guess how many bytes the identifier really is! */
363 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
368 for (i = 1; i<5; i++)
371 size_t outbytesleft = sizeof(outbuf);
373 const char *inp = buf;
375 size_t inbytesleft = i;
376 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
377 &outp, &outbytesleft);
378 if (r != (size_t) (-1))
379 return i; /* got a complete sequence */
381 return 1; /* giving up */
383 return 1; /* we don't know */
386 void yaz_marc_reset(yaz_marc_t mt)
388 nmem_reset(mt->nmem);
390 mt->nodes_pp = &mt->nodes;
394 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
396 struct yaz_marc_node *n;
397 int identifier_length;
398 const char *leader = 0;
400 for (n = mt->nodes; n; n = n->next)
401 if (n->which == YAZ_MARC_LEADER)
403 leader = n->u.leader;
409 if (!atoi_n_check(leader+11, 1, &identifier_length))
412 for (n = mt->nodes; n; n = n->next)
416 case YAZ_MARC_COMMENT:
417 wrbuf_iconv_write(wr, mt->iconv_cd,
418 n->u.comment, strlen(n->u.comment));
419 wrbuf_puts(wr, "\n");
428 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
429 int identifier_length)
431 /* if identifier length is 2 (most MARCs) or less (probably an error),
432 the code is a single character .. However we've
433 seen multibyte codes, so see how big it really is */
434 if (identifier_length > 2)
435 return identifier_length - 1;
437 return cdata_one_character(mt, data);
440 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
442 struct yaz_marc_node *n;
443 int identifier_length;
444 const char *leader = 0;
446 for (n = mt->nodes; n; n = n->next)
447 if (n->which == YAZ_MARC_LEADER)
449 leader = n->u.leader;
455 if (!atoi_n_check(leader+11, 1, &identifier_length))
458 for (n = mt->nodes; n; n = n->next)
460 struct yaz_marc_subfield *s;
463 case YAZ_MARC_DATAFIELD:
464 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
465 n->u.datafield.indicator);
466 for (s = n->u.datafield.subfields; s; s = s->next)
468 size_t using_code_len = get_subfield_len(mt, s->code_data,
471 wrbuf_puts (wr, mt->subfield_str);
472 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
474 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
475 wrbuf_iconv_puts(wr, mt->iconv_cd,
476 s->code_data + using_code_len);
477 marc_iconv_reset(mt, wr);
479 wrbuf_puts (wr, mt->endline_str);
481 case YAZ_MARC_CONTROLFIELD:
482 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
483 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
484 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
485 marc_iconv_reset(mt, wr);
486 wrbuf_puts (wr, mt->endline_str);
488 case YAZ_MARC_COMMENT:
490 wrbuf_iconv_write(wr, mt->iconv_cd,
491 n->u.comment, strlen(n->u.comment));
492 marc_iconv_reset(mt, wr);
493 wrbuf_puts(wr, ")\n");
495 case YAZ_MARC_LEADER:
496 wrbuf_printf(wr, "%s\n", n->u.leader);
499 wrbuf_puts(wr, "\n");
503 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
505 if (mt->enable_collection == collection_second)
509 case YAZ_MARC_MARCXML:
510 wrbuf_printf(wr, "</collection>\n");
512 case YAZ_MARC_XCHANGE:
513 wrbuf_printf(wr, "</collection>\n");
520 void yaz_marc_enable_collection(yaz_marc_t mt)
522 mt->enable_collection = collection_first;
525 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
530 return yaz_marc_write_line(mt, wr);
531 case YAZ_MARC_MARCXML:
532 case YAZ_MARC_TMARCXML:
533 return yaz_marc_write_marcxml(mt, wr);
534 case YAZ_MARC_XCHANGE:
535 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
536 case YAZ_MARC_ISO2709:
537 return yaz_marc_write_iso2709(mt, wr);
539 return yaz_marc_write_check(mt, wr);
544 /** \brief common MARC XML/Xchange writer
546 \param wr WRBUF output
547 \param ns XMLNS for the elements
548 \param format record format (e.g. "MARC21")
549 \param type record type (e.g. "Bibliographic")
551 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
556 struct yaz_marc_node *n;
557 int identifier_length;
558 const char *leader = 0;
560 for (n = mt->nodes; n; n = n->next)
561 if (n->which == YAZ_MARC_LEADER)
563 leader = n->u.leader;
569 if (!atoi_n_check(leader+11, 1, &identifier_length))
572 if (mt->enable_collection != no_collection)
574 if (mt->enable_collection == collection_first)
575 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
576 mt->enable_collection = collection_second;
577 wrbuf_printf(wr, "<record");
581 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
584 wrbuf_printf(wr, " format=\"%.80s\"", format);
586 wrbuf_printf(wr, " type=\"%.80s\"", type);
587 wrbuf_printf(wr, ">\n");
588 for (n = mt->nodes; n; n = n->next)
590 struct yaz_marc_subfield *s;
594 case YAZ_MARC_DATAFIELD:
595 wrbuf_printf(wr, " <datafield tag=\"");
596 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
597 strlen(n->u.datafield.tag));
598 wrbuf_printf(wr, "\"");
599 if (n->u.datafield.indicator)
602 for (i = 0; n->u.datafield.indicator[i]; i++)
604 wrbuf_printf(wr, " ind%d=\"", i+1);
605 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
606 n->u.datafield.indicator+i, 1);
607 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
610 wrbuf_printf(wr, ">\n");
611 for (s = n->u.datafield.subfields; s; s = s->next)
613 size_t using_code_len = get_subfield_len(mt, s->code_data,
615 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
616 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
617 s->code_data, using_code_len);
618 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
619 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
620 s->code_data + using_code_len,
621 strlen(s->code_data + using_code_len));
622 marc_iconv_reset(mt, wr);
623 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
624 wrbuf_puts(wr, "\n");
626 wrbuf_printf(wr, " </datafield>\n");
628 case YAZ_MARC_CONTROLFIELD:
629 wrbuf_printf(wr, " <controlfield tag=\"");
630 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
631 strlen(n->u.controlfield.tag));
632 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
633 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
634 n->u.controlfield.data,
635 strlen(n->u.controlfield.data));
637 marc_iconv_reset(mt, wr);
638 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
639 wrbuf_puts(wr, "\n");
641 case YAZ_MARC_COMMENT:
642 wrbuf_printf(wr, "<!-- ");
643 wrbuf_puts(wr, n->u.comment);
644 wrbuf_printf(wr, " -->\n");
646 case YAZ_MARC_LEADER:
647 wrbuf_printf(wr, " <leader>");
648 wrbuf_iconv_write_cdata(wr,
649 0 /* no charset conversion for leader */,
650 n->u.leader, strlen(n->u.leader));
651 wrbuf_printf(wr, "</leader>\n");
654 wrbuf_puts(wr, "</record>\n");
658 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
663 if (mt->write_using_libxml2)
669 if (!mt->turbo_format)
670 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
672 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
676 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
679 xmlDocSetRootElement(doc, root_ptr);
680 xmlDocDumpMemory(doc, &buf_out, &len_out);
682 wrbuf_write(wr, (const char *) buf_out, len_out);
693 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
696 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
698 /* set leader 09 to 'a' for UNICODE */
699 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
700 if (!mt->leader_spec)
701 yaz_marc_modify_leader(mt, 9, "a");
702 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
706 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
710 return yaz_marc_write_marcxml_ns(mt, wr,
711 "info:lc/xmlns/marcxchange-v1",
717 void add_marc_datafield_xml2(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
720 struct yaz_marc_subfield *s;
721 int turbo = mt->turbo_format;
723 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
724 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
727 //TODO consider if safe
730 strncpy(field + 1, n->u.datafield.tag, 3);
731 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
733 if (n->u.datafield.indicator)
736 for (i = 0; n->u.datafield.indicator[i]; i++)
741 sprintf(ind_str, "ind%d", i+1);
742 ind_val[0] = n->u.datafield.indicator[i];
744 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
747 WRBUF subfield_name = wrbuf_alloc();
748 for (s = n->u.datafield.subfields; s; s = s->next)
750 xmlNode *ptr_subfield;
751 size_t using_code_len = get_subfield_len(mt, s->code_data,
753 wrbuf_rewind(wr_cdata);
754 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
755 marc_iconv_reset(mt, wr_cdata);
758 ptr_subfield = xmlNewTextChild(
760 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
761 wrbuf_rewind(wr_cdata);
762 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
763 xmlNewProp(ptr_subfield, BAD_CAST "code",
764 BAD_CAST wrbuf_cstr(wr_cdata));
766 else { // Turbo format
767 wrbuf_rewind(subfield_name);
768 wrbuf_puts(subfield_name, "s");
769 // TODO Map special codes to something possible for XML ELEMENT names
770 if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') ||
771 (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') ||
772 (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
774 wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
775 ptr_subfield = xmlNewTextChild(ptr, ns_record,
776 BAD_CAST wrbuf_cstr(subfield_name),
777 BAD_CAST wrbuf_cstr(wr_cdata));
781 yaz_log(YLOG_WARN, "Dropping subfield: %s", s->code_data);
784 wrbuf_destroy(subfield_name);
787 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
792 struct yaz_marc_node *n;
793 int identifier_length;
794 const char *leader = 0;
798 int turbo = mt->turbo_format;
799 for (n = mt->nodes; n; n = n->next)
800 if (n->which == YAZ_MARC_LEADER)
802 leader = n->u.leader;
808 if (!atoi_n_check(leader+11, 1, &identifier_length))
811 wr_cdata = wrbuf_alloc();
813 record_ptr = xmlNewNode(0, BAD_CAST "record");
814 *root_ptr = record_ptr;
816 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
817 xmlSetNs(record_ptr, ns_record);
820 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
822 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
823 for (n = mt->nodes; n; n = n->next)
825 struct yaz_marc_subfield *s;
830 case YAZ_MARC_DATAFIELD:
831 add_marc_datafield_xml2(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
833 case YAZ_MARC_CONTROLFIELD:
834 wrbuf_rewind(wr_cdata);
835 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
836 marc_iconv_reset(mt, wr_cdata);
839 ptr = xmlNewTextChild(record_ptr, ns_record,
840 BAD_CAST "controlfield",
841 BAD_CAST wrbuf_cstr(wr_cdata));
842 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
845 // TODO required iconv?
848 strncpy(field + 1, n->u.controlfield.tag, 3);
849 ptr = xmlNewTextChild(record_ptr, ns_record,
851 BAD_CAST wrbuf_cstr(wr_cdata));
855 case YAZ_MARC_COMMENT:
856 ptr = xmlNewComment(BAD_CAST n->u.comment);
857 xmlAddChild(record_ptr, ptr);
859 case YAZ_MARC_LEADER:
861 char *field = "leader";
864 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
865 BAD_CAST n->u.leader);
870 wrbuf_destroy(wr_cdata);
875 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
880 struct yaz_marc_node *n;
881 int identifier_length;
882 const char *leader = 0;
887 for (n = mt->nodes; n; n = n->next)
888 if (n->which == YAZ_MARC_LEADER)
890 leader = n->u.leader;
896 if (!atoi_n_check(leader+11, 1, &identifier_length))
899 wr_cdata = wrbuf_alloc();
901 record_ptr = xmlNewNode(0, BAD_CAST "record");
902 *root_ptr = record_ptr;
904 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
905 xmlSetNs(record_ptr, ns_record);
908 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
910 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
911 for (n = mt->nodes; n; n = n->next)
913 struct yaz_marc_subfield *s;
918 case YAZ_MARC_DATAFIELD:
919 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
920 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
921 if (n->u.datafield.indicator)
924 for (i = 0; n->u.datafield.indicator[i]; i++)
929 sprintf(ind_str, "ind%d", i+1);
930 ind_val[0] = n->u.datafield.indicator[i];
932 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
935 for (s = n->u.datafield.subfields; s; s = s->next)
937 xmlNode *ptr_subfield;
938 size_t using_code_len = get_subfield_len(mt, s->code_data,
940 wrbuf_rewind(wr_cdata);
941 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
942 s->code_data + using_code_len);
943 marc_iconv_reset(mt, wr_cdata);
944 ptr_subfield = xmlNewTextChild(
946 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
948 wrbuf_rewind(wr_cdata);
949 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
950 s->code_data, using_code_len);
951 xmlNewProp(ptr_subfield, BAD_CAST "code",
952 BAD_CAST wrbuf_cstr(wr_cdata));
955 case YAZ_MARC_CONTROLFIELD:
956 wrbuf_rewind(wr_cdata);
957 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
958 marc_iconv_reset(mt, wr_cdata);
960 ptr = xmlNewTextChild(record_ptr, ns_record,
961 BAD_CAST "controlfield",
962 BAD_CAST wrbuf_cstr(wr_cdata));
964 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
966 case YAZ_MARC_COMMENT:
967 ptr = xmlNewComment(BAD_CAST n->u.comment);
968 xmlAddChild(record_ptr, ptr);
970 case YAZ_MARC_LEADER:
971 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
972 BAD_CAST n->u.leader);
976 wrbuf_destroy(wr_cdata);
985 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
987 struct yaz_marc_node *n;
988 int indicator_length;
989 int identifier_length;
990 int length_data_entry;
992 int length_implementation;
994 const char *leader = 0;
995 WRBUF wr_dir, wr_head, wr_data_tmp;
998 for (n = mt->nodes; n; n = n->next)
999 if (n->which == YAZ_MARC_LEADER)
1000 leader = n->u.leader;
1004 if (!atoi_n_check(leader+10, 1, &indicator_length))
1006 if (!atoi_n_check(leader+11, 1, &identifier_length))
1008 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1010 if (!atoi_n_check(leader+21, 1, &length_starting))
1012 if (!atoi_n_check(leader+22, 1, &length_implementation))
1015 wr_data_tmp = wrbuf_alloc();
1016 wr_dir = wrbuf_alloc();
1017 for (n = mt->nodes; n; n = n->next)
1019 int data_length = 0;
1020 struct yaz_marc_subfield *s;
1024 case YAZ_MARC_DATAFIELD:
1025 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1026 data_length += indicator_length;
1027 wrbuf_rewind(wr_data_tmp);
1028 for (s = n->u.datafield.subfields; s; s = s->next)
1030 /* write dummy IDFS + content */
1031 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1032 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1033 marc_iconv_reset(mt, wr_data_tmp);
1035 /* write dummy FS (makes MARC-8 to become ASCII) */
1036 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1037 marc_iconv_reset(mt, wr_data_tmp);
1038 data_length += wrbuf_len(wr_data_tmp);
1040 case YAZ_MARC_CONTROLFIELD:
1041 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1043 wrbuf_rewind(wr_data_tmp);
1044 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1045 n->u.controlfield.data);
1046 marc_iconv_reset(mt, wr_data_tmp);
1047 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1048 marc_iconv_reset(mt, wr_data_tmp);
1049 data_length += wrbuf_len(wr_data_tmp);
1051 case YAZ_MARC_COMMENT:
1053 case YAZ_MARC_LEADER:
1058 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1059 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1060 data_offset += data_length;
1063 /* mark end of directory */
1064 wrbuf_putc(wr_dir, ISO2709_FS);
1066 /* base address of data (comes after leader+directory) */
1067 base_address = 24 + wrbuf_len(wr_dir);
1069 wr_head = wrbuf_alloc();
1071 /* write record length */
1072 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1073 /* from "original" leader */
1074 wrbuf_write(wr_head, leader+5, 7);
1075 /* base address of data */
1076 wrbuf_printf(wr_head, "%05d", base_address);
1077 /* from "original" leader */
1078 wrbuf_write(wr_head, leader+17, 7);
1080 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1081 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1082 wrbuf_destroy(wr_head);
1083 wrbuf_destroy(wr_dir);
1084 wrbuf_destroy(wr_data_tmp);
1086 for (n = mt->nodes; n; n = n->next)
1088 struct yaz_marc_subfield *s;
1092 case YAZ_MARC_DATAFIELD:
1093 wrbuf_printf(wr, "%.*s", indicator_length,
1094 n->u.datafield.indicator);
1095 for (s = n->u.datafield.subfields; s; s = s->next)
1097 wrbuf_putc(wr, ISO2709_IDFS);
1098 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1099 marc_iconv_reset(mt, wr);
1101 wrbuf_putc(wr, ISO2709_FS);
1103 case YAZ_MARC_CONTROLFIELD:
1104 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1105 marc_iconv_reset(mt, wr);
1106 wrbuf_putc(wr, ISO2709_FS);
1108 case YAZ_MARC_COMMENT:
1110 case YAZ_MARC_LEADER:
1114 wrbuf_printf(wr, "%c", ISO2709_RS);
1119 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1121 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1124 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1126 return -1; /* error */
1127 return r; /* OK, return length > 0 */
1130 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1131 const char **result, size_t *rsize)
1135 wrbuf_rewind(mt->m_wr);
1136 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1138 *result = wrbuf_cstr(mt->m_wr);
1140 *rsize = wrbuf_len(mt->m_wr);
1144 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1150 void yaz_marc_debug(yaz_marc_t mt, int level)
1156 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1161 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1163 return mt->iconv_cd;
1166 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1168 struct yaz_marc_node *n;
1170 for (n = mt->nodes; n; n = n->next)
1171 if (n->which == YAZ_MARC_LEADER)
1173 leader = n->u.leader;
1174 memcpy(leader+off, str, strlen(str));
1179 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1181 xfree(mt->leader_spec);
1182 mt->leader_spec = 0;
1185 char dummy_leader[24];
1186 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1188 mt->leader_spec = xstrdup(leader_spec);
1193 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1195 const char *cp = leader_spec;
1200 int no_read = 0, no = 0;
1202 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1203 if (no < 2 || no_read < 3)
1205 if (pos < 0 || (size_t) pos >= size)
1210 const char *vp = strchr(val+1, '\'');
1216 if (len + pos > size)
1218 memcpy(leader + pos, val+1, len);
1220 else if (*val >= '0' && *val <= '9')
1236 int yaz_marc_decode_formatstr(const char *arg)
1239 if (!strcmp(arg, "marc"))
1240 mode = YAZ_MARC_ISO2709;
1241 if (!strcmp(arg, "marcxml"))
1242 mode = YAZ_MARC_MARCXML;
1243 if (!strcmp(arg, "tmarcxml"))
1244 mode = YAZ_MARC_TMARCXML;
1245 if (!strcmp(arg, "marcxchange"))
1246 mode = YAZ_MARC_XCHANGE;
1247 if (!strcmp(arg, "line"))
1248 mode = YAZ_MARC_LINE;
1252 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1254 mt->write_using_libxml2 = enable;
1257 void yaz_marc_write_turbo_format(yaz_marc_t mt, int enable)
1259 mt->turbo_format = enable;
1266 * c-file-style: "Stroustrup"
1267 * indent-tabs-mode: nil
1269 * vim: shiftwidth=4 tabstop=8 expandtab