2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.28 2006-04-20 20:35:02 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 static void yaz_marc_reset(yaz_marc_t mt);
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
40 YAZ_MARC_CONTROLFIELD,
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
49 struct yaz_marc_subfield *subfields;
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
58 /** \brief a comment node */
59 struct yaz_marc_comment {
63 /** \brief MARC node */
64 struct yaz_marc_node {
65 enum YAZ_MARC_NODE_TYPE which;
67 struct yaz_marc_datafield datafield;
68 struct yaz_marc_controlfield controlfield;
72 struct yaz_marc_node *next;
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
78 struct yaz_marc_subfield *next;
81 /** \brief the internals of a yaz_marc_t handle */
90 struct yaz_marc_node *nodes;
91 struct yaz_marc_node **nodes_pp;
92 struct yaz_marc_subfield **subfield_pp;
95 yaz_marc_t yaz_marc_create(void)
97 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
98 mt->xml = YAZ_MARC_LINE;
100 mt->m_wr = wrbuf_alloc();
102 strcpy(mt->subfield_str, " $");
103 strcpy(mt->endline_str, "\n");
105 mt->nmem = nmem_create();
110 void yaz_marc_destroy(yaz_marc_t mt)
114 nmem_destroy(mt->nmem);
115 wrbuf_free (mt->m_wr, 1);
119 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
121 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
124 mt->nodes_pp = &n->next;
128 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
130 struct yaz_marc_node *n = yaz_marc_add_node(mt);
131 n->which = YAZ_MARC_COMMENT;
132 n->u.comment = nmem_strdup(mt->nmem, comment);
136 static char *yaz_marc_get_xml_text(const xmlNode *ptr_cdata, NMEM nmem)
142 for (ptr = ptr_cdata; ptr; ptr = ptr->next)
143 if (ptr->type == XML_TEXT_NODE)
144 len += xmlStrlen(ptr->content);
145 cdata = (char *) nmem_malloc(nmem, len+1);
147 for (ptr = ptr_cdata; ptr; ptr = ptr->next)
148 if (ptr->type == XML_TEXT_NODE)
149 strcat(cdata, (const char *) ptr->content);
154 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
161 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
165 vsnprintf(buf, sizeof(buf), fmt, ap);
167 vsprintf(buf, fmt, ap);
171 yaz_marc_add_comment(mt, buf);
175 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
177 struct yaz_marc_node *n = yaz_marc_add_node(mt);
178 n->which = YAZ_MARC_LEADER;
179 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
182 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
183 const char *data, size_t data_len)
185 struct yaz_marc_node *n = yaz_marc_add_node(mt);
186 n->which = YAZ_MARC_CONTROLFIELD;
187 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
188 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
194 sprintf(msg, "controlfield:");
195 for (i = 0; i < 16 && i < data_len; i++)
196 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
198 sprintf(msg + strlen(msg), " ..");
199 yaz_marc_add_comment(mt, msg);
204 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
205 const xmlNode *ptr_data)
207 struct yaz_marc_node *n = yaz_marc_add_node(mt);
208 n->which = YAZ_MARC_CONTROLFIELD;
209 n->u.controlfield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
210 n->u.controlfield.data = yaz_marc_get_xml_text(ptr_data, mt->nmem);
214 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
215 const char *indicator, size_t indicator_len)
217 struct yaz_marc_node *n = yaz_marc_add_node(mt);
218 n->which = YAZ_MARC_DATAFIELD;
219 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
220 n->u.datafield.indicator =
221 nmem_strdupn(mt->nmem, indicator, indicator_len);
222 n->u.datafield.subfields = 0;
224 /* make subfield_pp the current (last one) */
225 mt->subfield_pp = &n->u.datafield.subfields;
229 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
230 const char *indicator, size_t indicator_len)
232 struct yaz_marc_node *n = yaz_marc_add_node(mt);
233 n->which = YAZ_MARC_DATAFIELD;
234 n->u.datafield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
235 n->u.datafield.indicator =
236 nmem_strdupn(mt->nmem, indicator, indicator_len);
237 n->u.datafield.subfields = 0;
239 /* make subfield_pp the current (last one) */
240 mt->subfield_pp = &n->u.datafield.subfields;
244 void yaz_marc_add_subfield(yaz_marc_t mt,
245 const char *code_data, size_t code_data_len)
252 sprintf(msg, "subfield:");
253 for (i = 0; i < 16 && i < code_data_len; i++)
254 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
255 if (i < code_data_len)
256 sprintf(msg + strlen(msg), " ..");
257 yaz_marc_add_comment(mt, msg);
262 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
263 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
265 /* mark subfield_pp to point to this one, so we append here next */
266 *mt->subfield_pp = n;
267 mt->subfield_pp = &n->next;
271 static int atoi_n_check(const char *buf, int size, int *val)
273 if (!isdigit(*(const unsigned char *) buf))
275 *val = atoi_n(buf, size);
279 /** \brief reads the MARC 24 bytes leader and checks content
281 \param leader of the 24 byte leader
282 \param indicator_length indicator length
283 \param identifier_length identifier length
284 \param base_address base address
285 \param length_data_entry length of data entry
286 \param length_starting length of starting
287 \param length_implementation length of implementation defined data
289 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
290 int *indicator_length,
291 int *identifier_length,
293 int *length_data_entry,
294 int *length_starting,
295 int *length_implementation)
299 memcpy(leader, leader_c, 24);
301 if (!atoi_n_check(leader+10, 1, indicator_length))
304 "Indicator length at offset 10 should hold a digit."
307 *indicator_length = 2;
309 if (!atoi_n_check(leader+11, 1, identifier_length))
312 "Identifier length at offset 11 should hold a digit."
315 *identifier_length = 2;
317 if (!atoi_n_check(leader+12, 5, base_address))
320 "Base address at offsets 12..16 should hold a number."
324 if (!atoi_n_check(leader+20, 1, length_data_entry))
327 "Length data entry at offset 20 should hold a digit."
329 *length_data_entry = 4;
332 if (!atoi_n_check(leader+21, 1, length_starting))
335 "Length starting at offset 21 should hold a digit."
337 *length_starting = 5;
340 if (!atoi_n_check(leader+22, 1, length_implementation))
343 "Length implementation at offset 22 should hold a digit."
345 *length_implementation = 0;
351 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
352 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
353 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
354 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
355 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
356 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
358 yaz_marc_add_leader(mt, leader, 24);
361 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
363 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
364 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
367 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
369 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
370 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
373 /* try to guess how many bytes the identifier really is! */
374 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
379 for (i = 1; i<5; i++)
382 size_t outbytesleft = sizeof(outbuf);
384 const char *inp = buf;
386 size_t inbytesleft = i;
387 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
388 &outp, &outbytesleft);
389 if (r != (size_t) (-1))
390 return i; /* got a complete sequence */
392 return 1; /* giving up */
394 return 1; /* we don't know */
397 static void yaz_marc_reset(yaz_marc_t mt)
399 nmem_reset(mt->nmem);
401 mt->nodes_pp = &mt->nodes;
405 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
407 struct yaz_marc_node *n;
408 int identifier_length;
409 const char *leader = 0;
411 for (n = mt->nodes; n; n = n->next)
412 if (n->which == YAZ_MARC_LEADER)
414 leader = n->u.leader;
420 if (!atoi_n_check(leader+11, 1, &identifier_length))
423 for (n = mt->nodes; n; n = n->next)
425 struct yaz_marc_subfield *s;
428 case YAZ_MARC_DATAFIELD:
429 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
430 n->u.datafield.indicator);
431 for (s = n->u.datafield.subfields; s; s = s->next)
433 /* if identifier length is 2 (most MARCs),
434 the code is a single character .. However we've
435 seen multibyte codes, so see how big it really is */
436 size_t using_code_len =
437 (identifier_length != 2) ? identifier_length - 1
439 cdata_one_character(mt, s->code_data);
441 wrbuf_puts (wr, mt->subfield_str);
442 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
444 wrbuf_printf(wr, " ");
445 wrbuf_iconv_puts(wr, mt->iconv_cd,
446 s->code_data + using_code_len);
448 wrbuf_puts (wr, mt->endline_str);
450 case YAZ_MARC_CONTROLFIELD:
451 wrbuf_printf(wr, "%s ", n->u.controlfield.tag);
452 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
453 wrbuf_puts (wr, mt->endline_str);
455 case YAZ_MARC_COMMENT:
457 wrbuf_iconv_write(wr, mt->iconv_cd,
458 n->u.comment, strlen(n->u.comment));
459 wrbuf_puts(wr, ")\n");
461 case YAZ_MARC_LEADER:
462 wrbuf_printf(wr, "%s\n", n->u.leader);
468 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
473 return yaz_marc_write_line(mt, wr);
474 case YAZ_MARC_MARCXML:
475 return yaz_marc_write_marcxml(mt, wr);
476 case YAZ_MARC_XCHANGE:
477 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
478 case YAZ_MARC_ISO2709:
479 return yaz_marc_write_iso2709(mt, wr);
484 /** \brief common MARC XML/Xchange writer
486 \param wr WRBUF output
487 \param ns XMLNS for the elements
488 \param format record format (e.g. "MARC21")
489 \param type record type (e.g. "Bibliographic")
491 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
496 struct yaz_marc_node *n;
497 int identifier_length;
498 const char *leader = 0;
500 for (n = mt->nodes; n; n = n->next)
501 if (n->which == YAZ_MARC_LEADER)
503 leader = n->u.leader;
509 if (!atoi_n_check(leader+11, 1, &identifier_length))
512 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
514 wrbuf_printf(wr, " format=\"%.80s\"", format);
516 wrbuf_printf(wr, " type=\"%.80s\"", type);
517 wrbuf_printf(wr, ">\n");
518 for (n = mt->nodes; n; n = n->next)
520 struct yaz_marc_subfield *s;
523 case YAZ_MARC_DATAFIELD:
524 wrbuf_printf(wr, " <datafield tag=\"");
525 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
526 strlen(n->u.datafield.tag));
527 wrbuf_printf(wr, "\"");
528 if (n->u.datafield.indicator)
531 for (i = 0; n->u.datafield.indicator[i]; i++)
533 wrbuf_printf(wr, " ind%d=\"", i+1);
534 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
535 n->u.datafield.indicator+i, 1);
536 wrbuf_printf(wr, "\"");
539 wrbuf_printf(wr, ">\n");
540 for (s = n->u.datafield.subfields; s; s = s->next)
542 /* if identifier length is 2 (most MARCs),
543 the code is a single character .. However we've
544 seen multibyte codes, so see how big it really is */
545 size_t using_code_len =
546 (identifier_length != 2) ? identifier_length - 1
548 cdata_one_character(mt, s->code_data);
550 wrbuf_puts(wr, " <subfield code=\"");
551 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
552 s->code_data, using_code_len);
553 wrbuf_puts(wr, "\">");
554 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
555 s->code_data + using_code_len,
556 strlen(s->code_data + using_code_len));
557 wrbuf_puts(wr, "</subfield>\n");
559 wrbuf_printf(wr, " </datafield>\n");
561 case YAZ_MARC_CONTROLFIELD:
562 wrbuf_printf(wr, " <controlfield tag=\"");
563 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
564 strlen(n->u.controlfield.tag));
565 wrbuf_printf(wr, "\">");
566 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
567 wrbuf_printf(wr, "</controlfield>\n");
569 case YAZ_MARC_COMMENT:
570 wrbuf_printf(wr, "<!-- %s -->\n", n->u.comment);
572 case YAZ_MARC_LEADER:
573 wrbuf_printf(wr, " <leader>");
574 wrbuf_iconv_write_cdata(wr,
575 0 /* no charset conversion for leader */,
576 n->u.leader, strlen(n->u.leader));
577 wrbuf_printf(wr, "</leader>\n");
580 wrbuf_puts(wr, "</record>\n");
584 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
586 yaz_marc_modify_leader(mt, 9, "a");
587 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
591 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
595 return yaz_marc_write_marcxml_ns(mt, wr,
596 "http://www.bs.dk/standards/MarcXchange",
600 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
602 struct yaz_marc_node *n;
603 int indicator_length;
604 int identifier_length;
605 int length_data_entry;
607 int length_implementation;
609 const char *leader = 0;
610 WRBUF wr_dir, wr_head;
613 for (n = mt->nodes; n; n = n->next)
614 if (n->which == YAZ_MARC_LEADER)
615 leader = n->u.leader;
619 if (!atoi_n_check(leader+10, 1, &indicator_length))
621 if (!atoi_n_check(leader+11, 1, &identifier_length))
623 if (!atoi_n_check(leader+20, 1, &length_data_entry))
625 if (!atoi_n_check(leader+21, 1, &length_starting))
627 if (!atoi_n_check(leader+22, 1, &length_implementation))
630 wr_dir = wrbuf_alloc();
631 for (n = mt->nodes; n; n = n->next)
634 struct yaz_marc_subfield *s;
637 case YAZ_MARC_DATAFIELD:
638 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
639 data_length += indicator_length;
640 for (s = n->u.datafield.subfields; s; s = s->next)
641 data_length += 1+strlen(s->code_data);
644 case YAZ_MARC_CONTROLFIELD:
645 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
646 data_length += strlen(n->u.controlfield.data);
649 case YAZ_MARC_COMMENT:
651 case YAZ_MARC_LEADER:
656 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
657 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
658 data_offset += data_length;
661 /* mark end of directory */
662 wrbuf_putc(wr_dir, ISO2709_FS);
664 /* base address of data (comes after leader+directory) */
665 base_address = 24 + wrbuf_len(wr_dir);
667 wr_head = wrbuf_alloc();
669 /* write record length */
670 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
671 /* from "original" leader */
672 wrbuf_write(wr_head, leader+5, 7);
673 /* base address of data */
674 wrbuf_printf(wr_head, "%05d", base_address);
675 /* from "original" leader */
676 wrbuf_write(wr_head, leader+17, 7);
678 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
679 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
680 wrbuf_free(wr_head, 1);
681 wrbuf_free(wr_dir, 1);
683 for (n = mt->nodes; n; n = n->next)
685 struct yaz_marc_subfield *s;
688 case YAZ_MARC_DATAFIELD:
689 wrbuf_printf(wr, "%.*s", indicator_length,
690 n->u.datafield.indicator);
691 for (s = n->u.datafield.subfields; s; s = s->next)
692 wrbuf_printf(wr, "%c%s", ISO2709_IDFS, s->code_data);
693 wrbuf_printf(wr, "%c", ISO2709_FS);
695 case YAZ_MARC_CONTROLFIELD:
696 wrbuf_printf(wr, "%s%c", n->u.controlfield.data, ISO2709_FS);
698 case YAZ_MARC_COMMENT:
700 case YAZ_MARC_LEADER:
704 wrbuf_printf(wr, "%c", ISO2709_RS);
709 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
711 for (; ptr; ptr = ptr->next)
713 if (ptr->type == XML_ELEMENT_NODE)
715 if (!strcmp((const char *) ptr->name, "subfield"))
717 size_t ctrl_data_len = 0;
718 char *ctrl_data_buf = 0;
719 const xmlNode *p = 0, *ptr_code = 0;
720 struct _xmlAttr *attr;
721 for (attr = ptr->properties; attr; attr = attr->next)
722 if (!strcmp((const char *)attr->name, "code"))
723 ptr_code = attr->children;
727 mt, "Bad attribute '%.80s' for 'subfield'",
734 mt, "Missing attribute 'code' for 'subfield'" );
737 if (ptr_code->type == XML_TEXT_NODE)
740 strlen((const char *)ptr_code->content);
745 mt, "Missing value for 'code' in 'subfield'" );
748 for (p = ptr->children; p ; p = p->next)
749 if (p->type == XML_TEXT_NODE)
750 ctrl_data_len += strlen((const char *)p->content);
751 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
752 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
753 for (p = ptr->children; p ; p = p->next)
754 if (p->type == XML_TEXT_NODE)
755 strcat(ctrl_data_buf, (const char *)p->content);
756 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
761 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
769 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
771 int indicator_length;
772 int identifier_length;
774 int length_data_entry;
776 int length_implementation;
777 const char *leader = 0;
778 const xmlNode *ptr = *ptr_p;
780 for(; ptr; ptr = ptr->next)
781 if (ptr->type == XML_ELEMENT_NODE)
783 if (!strcmp((const char *) ptr->name, "leader"))
785 xmlNode *p = ptr->children;
786 for(; p; p = p->next)
787 if (p->type == XML_TEXT_NODE)
788 leader = (const char *) p->content;
794 mt, "Expected element 'leader', got '%.80s'", ptr->name);
800 yaz_marc_cprintf(mt, "Missing element 'leader'");
803 if (strlen(leader) != 24)
805 yaz_marc_cprintf(mt, "Bad length %d of leader data."
806 " Must have length of 24 characters", strlen(leader));
809 yaz_marc_read_leader(mt, leader,
815 &length_implementation);
820 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
822 for(; ptr; ptr = ptr->next)
823 if (ptr->type == XML_ELEMENT_NODE)
825 if (!strcmp((const char *) ptr->name, "controlfield"))
827 const xmlNode *ptr_tag = 0;
828 struct _xmlAttr *attr;
829 for (attr = ptr->properties; attr; attr = attr->next)
830 if (!strcmp((const char *)attr->name, "tag"))
831 ptr_tag = attr->children;
835 mt, "Bad attribute '%.80s' for 'controlfield'",
842 mt, "Missing attribute 'tag' for 'controlfield'" );
845 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
847 else if (!strcmp((const char *) ptr->name, "datafield"))
849 char indstr[11]; /* 0(unused), 1,....9, + zero term */
850 const xmlNode *ptr_tag = 0;
851 struct _xmlAttr *attr;
853 for (i = 0; i<11; i++)
855 for (attr = ptr->properties; attr; attr = attr->next)
856 if (!strcmp((const char *)attr->name, "tag"))
857 ptr_tag = attr->children;
858 else if (strlen((const char *)attr->name) == 4 &&
859 !memcmp(attr->name, "ind", 3))
861 int no = atoi((const char *)attr->name+3);
863 && attr->children->type == XML_TEXT_NODE)
864 indstr[no] = attr->children->content[0];
869 mt, "Bad attribute '%.80s' for 'datafield'",
876 mt, "Missing attribute 'tag' for 'datafield'" );
879 /* note that indstr[0] is unused so we use indstr[1..] */
880 yaz_marc_add_datafield_xml(mt, ptr_tag,
881 indstr+1, strlen(indstr+1));
883 if (yaz_marc_read_xml_subfields(mt, ptr->children))
889 "Expected element controlfield or datafield,"
890 " got %.80s", ptr->name);
897 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
899 const xmlNode *ptr = xmlnode;
900 for(; ptr; ptr = ptr->next)
901 if (ptr->type == XML_ELEMENT_NODE)
903 if (!strcmp((const char *) ptr->name, "record"))
908 mt, "Unknown element '%.80s' in MARC XML reader",
915 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
918 /* ptr points to record node now */
920 if (yaz_marc_read_xml_leader(mt, &ptr))
922 return yaz_marc_read_xml_fields(mt, ptr->next);
925 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
931 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
935 int indicator_length;
936 int identifier_length;
937 int end_of_directory;
939 int length_data_entry;
941 int length_implementation;
945 record_length = atoi_n (buf, 5);
946 if (record_length < 25)
948 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
951 /* ballout if bsize is known and record_length is less than that */
952 if (bsize != -1 && record_length > bsize)
954 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
955 record_length, bsize);
959 yaz_marc_cprintf(mt, "Record length %5d", record_length);
961 yaz_marc_read_leader(mt, buf,
967 &length_implementation);
969 /* First pass. determine length of directory & base of data */
970 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
972 /* length of directory entry */
973 int l = 3 + length_data_entry + length_starting;
974 if (entry_p + l >= record_length)
976 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
977 " Missing FS char", entry_p);
982 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
983 entry_p, buf+entry_p);
985 /* Check for digits in length info */
987 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
991 /* Not all digits, so stop directory scan */
992 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
993 " length and/or length starting", entry_p);
996 entry_p += 3 + length_data_entry + length_starting;
998 end_of_directory = entry_p;
999 if (base_address != entry_p+1)
1001 yaz_marc_cprintf(mt, "Base address not at end of directory,"
1002 " base %d, end %d", base_address, entry_p+1);
1005 /* Second pass. parse control - and datafields */
1006 for (entry_p = 24; entry_p != end_of_directory; )
1013 int identifier_flag = 0;
1014 int entry_p0 = entry_p;
1016 memcpy (tag, buf+entry_p, 3);
1019 data_length = atoi_n(buf+entry_p, length_data_entry);
1020 entry_p += length_data_entry;
1021 data_offset = atoi_n(buf+entry_p, length_starting);
1022 entry_p += length_starting;
1023 i = data_offset + base_address;
1024 end_offset = i+data_length-1;
1026 if (data_length <= 0 || data_offset < 0)
1031 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1033 tag, entry_p0, data_length, data_offset);
1035 if (end_offset >= record_length)
1037 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1038 entry_p0, end_offset, record_length);
1042 if (memcmp (tag, "00", 2))
1043 identifier_flag = 1; /* if not 00X assume subfields */
1044 else if (indicator_length < 4 && indicator_length > 0)
1046 /* Danmarc 00X have subfields */
1047 if (buf[i + indicator_length] == ISO2709_IDFS)
1048 identifier_flag = 1;
1049 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1050 identifier_flag = 2;
1053 if (identifier_flag)
1056 i += identifier_flag-1;
1057 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1058 i += indicator_length;
1060 while (i < end_offset &&
1061 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1063 int code_offset = i+1;
1066 while (i < end_offset &&
1067 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1068 buf[i] != ISO2709_FS)
1070 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1077 while (i < end_offset &&
1078 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1080 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1084 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1087 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1089 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1093 return record_length;
1096 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1098 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1101 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1103 return -1; /* error */
1104 return r; /* OK, return length > 0 */
1107 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1108 char **result, int *rsize)
1112 wrbuf_rewind(mt->m_wr);
1113 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1115 *result = wrbuf_buf(mt->m_wr);
1117 *rsize = wrbuf_len(mt->m_wr);
1121 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1127 void yaz_marc_debug(yaz_marc_t mt, int level)
1133 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1138 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1140 struct yaz_marc_node *n;
1142 for (n = mt->nodes; n; n = n->next)
1143 if (n->which == YAZ_MARC_LEADER)
1145 leader = n->u.leader;
1146 memcpy(leader+off, str, strlen(str));
1152 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1154 yaz_marc_t mt = yaz_marc_create();
1159 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1160 yaz_marc_destroy(mt);
1165 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1167 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1171 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1173 yaz_marc_t mt = yaz_marc_create();
1177 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1181 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1182 yaz_marc_destroy(mt);
1187 int marc_display_ex (const char *buf, FILE *outf, int debug)
1189 return marc_display_exl (buf, outf, debug, -1);
1193 int marc_display (const char *buf, FILE *outf)
1195 return marc_display_ex (buf, outf, 0);
1201 * indent-tabs-mode: nil
1203 * vim: shiftwidth=4 tabstop=8 expandtab