2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.26 2006-04-19 10:05:03 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 static void yaz_marc_reset(yaz_marc_t mt);
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
40 YAZ_MARC_CONTROLFIELD,
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
49 struct yaz_marc_subfield *subfields;
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
58 /** \brief a comment node */
59 struct yaz_marc_comment {
63 /** \brief MARC node */
64 struct yaz_marc_node {
65 enum YAZ_MARC_NODE_TYPE which;
67 struct yaz_marc_datafield datafield;
68 struct yaz_marc_controlfield controlfield;
72 struct yaz_marc_node *next;
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
78 struct yaz_marc_subfield *next;
81 /** \brief the internals of a yaz_marc_t handle */
90 struct yaz_marc_node *nodes;
91 struct yaz_marc_node **nodes_pp;
92 struct yaz_marc_subfield **subfield_pp;
95 yaz_marc_t yaz_marc_create(void)
97 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
98 mt->xml = YAZ_MARC_LINE;
100 mt->m_wr = wrbuf_alloc();
102 strcpy(mt->subfield_str, " $");
103 strcpy(mt->endline_str, "\n");
105 mt->nmem = nmem_create();
110 void yaz_marc_destroy(yaz_marc_t mt)
114 nmem_destroy(mt->nmem);
115 wrbuf_free (mt->m_wr, 1);
119 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
121 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
124 mt->nodes_pp = &n->next;
128 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
130 struct yaz_marc_node *n = yaz_marc_add_node(mt);
131 n->which = YAZ_MARC_COMMENT;
132 n->u.comment = nmem_strdup(mt->nmem, comment);
136 static char *yaz_marc_get_xml_text(const xmlNode *ptr_cdata, NMEM nmem)
142 for (ptr = ptr_cdata; ptr; ptr = ptr->next)
143 if (ptr->type == XML_TEXT_NODE)
144 len += xmlStrlen(ptr->content);
145 cdata = (char *) nmem_malloc(nmem, len+1);
147 for (ptr = ptr_cdata; ptr; ptr = ptr->next)
148 if (ptr->type == XML_TEXT_NODE)
149 strcat(cdata, (const char *) ptr->content);
154 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
161 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
165 vsnprintf(buf, sizeof(buf), fmt, ap);
167 vsprintf(buf, fmt, ap);
171 yaz_marc_add_comment(mt, buf);
175 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
177 struct yaz_marc_node *n = yaz_marc_add_node(mt);
178 n->which = YAZ_MARC_LEADER;
179 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
182 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
183 const char *data, size_t data_len)
185 struct yaz_marc_node *n = yaz_marc_add_node(mt);
186 n->which = YAZ_MARC_CONTROLFIELD;
187 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
188 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
194 sprintf(msg, "controlfield:");
195 for (i = 0; i < 16 && i < data_len; i++)
196 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
198 sprintf(msg + strlen(msg), " ..");
199 yaz_marc_add_comment(mt, msg);
204 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
205 const xmlNode *ptr_data)
207 struct yaz_marc_node *n = yaz_marc_add_node(mt);
208 n->which = YAZ_MARC_CONTROLFIELD;
209 n->u.controlfield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
210 n->u.controlfield.data = yaz_marc_get_xml_text(ptr_data, mt->nmem);
214 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
215 const char *indicator, size_t indicator_len)
217 struct yaz_marc_node *n = yaz_marc_add_node(mt);
218 n->which = YAZ_MARC_DATAFIELD;
219 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
220 n->u.datafield.indicator =
221 nmem_strdupn(mt->nmem, indicator, indicator_len);
222 n->u.datafield.subfields = 0;
224 /* make subfield_pp the current (last one) */
225 mt->subfield_pp = &n->u.datafield.subfields;
229 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
230 const char *indicator, size_t indicator_len)
232 struct yaz_marc_node *n = yaz_marc_add_node(mt);
233 n->which = YAZ_MARC_DATAFIELD;
234 n->u.datafield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
235 n->u.datafield.indicator =
236 nmem_strdupn(mt->nmem, indicator, indicator_len);
237 n->u.datafield.subfields = 0;
239 /* make subfield_pp the current (last one) */
240 mt->subfield_pp = &n->u.datafield.subfields;
244 void yaz_marc_add_subfield(yaz_marc_t mt,
245 const char *code_data, size_t code_data_len)
252 sprintf(msg, "subfield:");
253 for (i = 0; i < 16 && i < code_data_len; i++)
254 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
255 if (i < code_data_len)
256 sprintf(msg + strlen(msg), " ..");
257 yaz_marc_add_comment(mt, msg);
262 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
263 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
265 /* mark subfield_pp to point to this one, so we append here next */
266 *mt->subfield_pp = n;
267 mt->subfield_pp = &n->next;
271 static int atoi_n_check(const char *buf, int size, int *val)
273 if (!isdigit(*(const unsigned char *) buf))
275 *val = atoi_n(buf, size);
279 /** \brief reads the MARC 24 bytes leader and checks content
281 \param leader of the 24 byte leader
282 \param indicator_length indicator length
283 \param identifier_length identifier length
284 \param base_address base address
285 \param length_data_entry length of data entry
286 \param length_starting length of starting
287 \param length_implementation length of implementation defined data
289 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
290 int *indicator_length,
291 int *identifier_length,
293 int *length_data_entry,
294 int *length_starting,
295 int *length_implementation)
299 memcpy(leader, leader_c, 24);
301 if (!atoi_n_check(leader+10, 1, indicator_length))
304 "Indicator length at offset 10 should hold a digit."
307 *indicator_length = 2;
309 if (!atoi_n_check(leader+11, 1, identifier_length))
312 "Identifier length at offset 11 should hold a digit."
315 *identifier_length = 2;
317 if (!atoi_n_check(leader+12, 5, base_address))
320 "Base address at offsets 12..16 should hold a number."
324 if (!atoi_n_check(leader+20, 1, length_data_entry))
327 "Length data entry at offset 20 should hold a digit."
329 *length_data_entry = 4;
332 if (!atoi_n_check(leader+21, 1, length_starting))
335 "Length starting at offset 21 should hold a digit."
337 *length_starting = 5;
340 if (!atoi_n_check(leader+22, 1, length_implementation))
343 "Length implementation at offset 22 should hold a digit."
345 *length_implementation = 0;
351 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
352 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
353 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
354 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
355 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
356 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
358 yaz_marc_add_leader(mt, leader, 24);
361 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
363 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
364 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
367 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
369 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
370 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
373 static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
375 if (mt->xml == YAZ_MARC_ISO2709)
376 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
377 else if (mt->xml == YAZ_MARC_LINE)
378 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
380 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
383 /* try to guess how many bytes the identifier really is! */
384 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
389 for (i = 1; i<5; i++)
392 size_t outbytesleft = sizeof(outbuf);
394 const char *inp = buf;
396 size_t inbytesleft = i;
397 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
398 &outp, &outbytesleft);
399 if (r != (size_t) (-1))
400 return i; /* got a complete sequence */
402 return 1; /* giving up */
404 return 1; /* we don't know */
407 static void yaz_marc_reset(yaz_marc_t mt)
409 nmem_reset(mt->nmem);
411 mt->nodes_pp = &mt->nodes;
415 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
417 struct yaz_marc_node *n;
418 int identifier_length;
419 const char *leader = 0;
421 for (n = mt->nodes; n; n = n->next)
422 if (n->which == YAZ_MARC_LEADER)
424 leader = n->u.leader;
430 if (!atoi_n_check(leader+11, 1, &identifier_length))
433 for (n = mt->nodes; n; n = n->next)
435 struct yaz_marc_subfield *s;
438 case YAZ_MARC_DATAFIELD:
439 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
440 n->u.datafield.indicator);
441 for (s = n->u.datafield.subfields; s; s = s->next)
443 /* if identifier length is 2 (most MARCs),
444 the code is a single character .. However we've
445 seen multibyte codes, so see how big it really is */
446 size_t using_code_len =
447 (identifier_length != 2) ? identifier_length - 1
449 cdata_one_character(mt, s->code_data);
451 wrbuf_puts (wr, mt->subfield_str);
452 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
454 wrbuf_printf(wr, " ");
455 wrbuf_iconv_puts(wr, mt->iconv_cd,
456 s->code_data + using_code_len);
458 wrbuf_puts (wr, mt->endline_str);
460 case YAZ_MARC_CONTROLFIELD:
461 wrbuf_printf(wr, "%s ", n->u.controlfield.tag);
462 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
463 wrbuf_puts (wr, mt->endline_str);
465 case YAZ_MARC_COMMENT:
467 wrbuf_iconv_write(wr, mt->iconv_cd,
468 n->u.comment, strlen(n->u.comment));
469 wrbuf_puts(wr, ")\n");
471 case YAZ_MARC_LEADER:
472 wrbuf_printf(wr, "%s\n", n->u.leader);
478 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
483 return yaz_marc_write_line(mt, wr);
484 case YAZ_MARC_MARCXML:
485 return yaz_marc_write_marcxml(mt, wr);
486 case YAZ_MARC_XCHANGE:
487 return yaz_marc_write_marcxchange(mt, wr);
488 case YAZ_MARC_ISO2709:
489 return yaz_marc_write_iso2709(mt, wr);
494 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
497 struct yaz_marc_node *n;
498 int identifier_length;
499 const char *leader = 0;
501 for (n = mt->nodes; n; n = n->next)
502 if (n->which == YAZ_MARC_LEADER)
504 leader = n->u.leader;
510 if (!atoi_n_check(leader+11, 1, &identifier_length))
513 wrbuf_printf(wr, "<record xmlns=\"%s\">\n", ns);
514 for (n = mt->nodes; n; n = n->next)
516 struct yaz_marc_subfield *s;
519 case YAZ_MARC_DATAFIELD:
520 wrbuf_printf(wr, " <datafield tag=\"");
521 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
522 strlen(n->u.datafield.tag));
523 wrbuf_printf(wr, "\"");
524 if (n->u.datafield.indicator)
527 for (i = 0; n->u.datafield.indicator[i]; i++)
529 wrbuf_printf(wr, " ind%d=\"", i+1);
530 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
531 n->u.datafield.indicator+i, 1);
532 wrbuf_printf(wr, "\"");
535 wrbuf_printf(wr, ">\n");
536 for (s = n->u.datafield.subfields; s; s = s->next)
538 /* if identifier length is 2 (most MARCs),
539 the code is a single character .. However we've
540 seen multibyte codes, so see how big it really is */
541 size_t using_code_len =
542 (identifier_length != 2) ? identifier_length - 1
544 cdata_one_character(mt, s->code_data);
546 wrbuf_puts(wr, " <subfield code=\"");
547 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
548 s->code_data, using_code_len);
549 wrbuf_puts(wr, "\">");
550 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
551 s->code_data + using_code_len,
552 strlen(s->code_data + using_code_len));
553 wrbuf_puts(wr, "</subfield>\n");
555 wrbuf_printf(wr, " </datafield>\n");
557 case YAZ_MARC_CONTROLFIELD:
558 wrbuf_printf(wr, " <controlfield tag=\"");
559 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
560 strlen(n->u.controlfield.tag));
561 wrbuf_printf(wr, "\">");
562 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
563 wrbuf_printf(wr, "</controlfield>\n");
565 case YAZ_MARC_COMMENT:
566 wrbuf_printf(wr, "<!-- %s -->\n", n->u.comment);
568 case YAZ_MARC_LEADER:
569 wrbuf_printf(wr, " <leader>");
570 wrbuf_iconv_write_cdata(wr,
571 0 /* no charset conversion for leader */,
572 n->u.leader, strlen(n->u.leader));
573 wrbuf_printf(wr, "</leader>\n");
576 wrbuf_puts(wr, "</record>\n");
580 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
582 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim");
585 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr)
587 return yaz_marc_write_marcxml_ns(mt, wr,
588 "http://www.bs.dk/standards/MarcXchange");
591 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
593 struct yaz_marc_node *n;
594 int indicator_length;
595 int identifier_length;
596 int length_data_entry;
598 int length_implementation;
600 const char *leader = 0;
601 WRBUF wr_dir, wr_head;
604 for (n = mt->nodes; n; n = n->next)
605 if (n->which == YAZ_MARC_LEADER)
606 leader = n->u.leader;
610 if (!atoi_n_check(leader+10, 1, &indicator_length))
612 if (!atoi_n_check(leader+11, 1, &identifier_length))
614 if (!atoi_n_check(leader+20, 1, &length_data_entry))
616 if (!atoi_n_check(leader+21, 1, &length_starting))
618 if (!atoi_n_check(leader+22, 1, &length_implementation))
621 wr_dir = wrbuf_alloc();
622 for (n = mt->nodes; n; n = n->next)
625 struct yaz_marc_subfield *s;
628 case YAZ_MARC_DATAFIELD:
629 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
630 data_length += indicator_length;
631 for (s = n->u.datafield.subfields; s; s = s->next)
632 data_length += 1+strlen(s->code_data);
635 case YAZ_MARC_CONTROLFIELD:
636 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
637 data_length += strlen(n->u.controlfield.data);
640 case YAZ_MARC_COMMENT:
642 case YAZ_MARC_LEADER:
647 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
648 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
649 data_offset += data_length;
652 /* mark end of directory */
653 wrbuf_putc(wr_dir, ISO2709_FS);
655 /* base address of data (comes after leader+directory) */
656 base_address = 24 + wrbuf_len(wr_dir);
658 wr_head = wrbuf_alloc();
660 /* write record length */
661 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
662 /* from "original" leader */
663 wrbuf_write(wr_head, leader+5, 7);
664 /* base address of data */
665 wrbuf_printf(wr_head, "%05d", base_address);
666 /* from "original" leader */
667 wrbuf_write(wr_head, leader+17, 7);
669 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
670 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
671 wrbuf_free(wr_head, 1);
672 wrbuf_free(wr_dir, 1);
674 for (n = mt->nodes; n; n = n->next)
676 struct yaz_marc_subfield *s;
679 case YAZ_MARC_DATAFIELD:
680 wrbuf_printf(wr, "%.*s", indicator_length,
681 n->u.datafield.indicator);
682 for (s = n->u.datafield.subfields; s; s = s->next)
683 wrbuf_printf(wr, "%c%s", ISO2709_IDFS, s->code_data);
684 wrbuf_printf(wr, "%c", ISO2709_FS);
686 case YAZ_MARC_CONTROLFIELD:
687 wrbuf_printf(wr, "%s%c", n->u.controlfield.data, ISO2709_FS);
689 case YAZ_MARC_COMMENT:
691 case YAZ_MARC_LEADER:
695 wrbuf_printf(wr, "%c", ISO2709_RS);
700 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
702 for (; ptr; ptr = ptr->next)
704 if (ptr->type == XML_ELEMENT_NODE)
706 if (!strcmp((const char *) ptr->name, "subfield"))
708 size_t ctrl_data_len = 0;
709 char *ctrl_data_buf = 0;
710 const xmlNode *p = 0, *ptr_code = 0;
711 struct _xmlAttr *attr;
712 for (attr = ptr->properties; attr; attr = attr->next)
713 if (!strcmp((const char *)attr->name, "code"))
714 ptr_code = attr->children;
718 mt, "Bad attribute '%.80s' for 'subfield'",
725 mt, "Missing attribute 'code' for 'subfield'" );
728 if (ptr_code->type == XML_TEXT_NODE)
731 strlen((const char *)ptr_code->content);
736 mt, "Missing value for 'code' in 'subfield'" );
739 for (p = ptr->children; p ; p = p->next)
740 if (p->type == XML_TEXT_NODE)
741 ctrl_data_len += strlen((const char *)p->content);
742 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
743 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
744 for (p = ptr->children; p ; p = p->next)
745 if (p->type == XML_TEXT_NODE)
746 strcat(ctrl_data_buf, (const char *)p->content);
747 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
752 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
760 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
762 int indicator_length;
763 int identifier_length;
765 int length_data_entry;
767 int length_implementation;
768 const char *leader = 0;
769 const xmlNode *ptr = *ptr_p;
771 for(; ptr; ptr = ptr->next)
772 if (ptr->type == XML_ELEMENT_NODE)
774 if (!strcmp((const char *) ptr->name, "leader"))
776 xmlNode *p = ptr->children;
777 for(; p; p = p->next)
778 if (p->type == XML_TEXT_NODE)
779 leader = (const char *) p->content;
785 mt, "Expected element 'leader', got '%.80s'", ptr->name);
791 yaz_marc_cprintf(mt, "Missing element 'leader'");
794 if (strlen(leader) != 24)
796 yaz_marc_cprintf(mt, "Bad length %d of leader data."
797 " Must have length of 24 characters", strlen(leader));
800 yaz_marc_read_leader(mt, leader,
806 &length_implementation);
811 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
813 for(; ptr; ptr = ptr->next)
814 if (ptr->type == XML_ELEMENT_NODE)
816 if (!strcmp((const char *) ptr->name, "controlfield"))
818 const xmlNode *ptr_tag = 0;
819 struct _xmlAttr *attr;
820 for (attr = ptr->properties; attr; attr = attr->next)
821 if (!strcmp((const char *)attr->name, "tag"))
822 ptr_tag = attr->children;
826 mt, "Bad attribute '%.80s' for 'controlfield'",
833 mt, "Missing attribute 'tag' for 'controlfield'" );
836 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
838 else if (!strcmp((const char *) ptr->name, "datafield"))
840 char indstr[11]; /* 0(unused), 1,....9, + zero term */
841 const xmlNode *ptr_tag = 0;
842 struct _xmlAttr *attr;
844 for (i = 0; i<11; i++)
846 for (attr = ptr->properties; attr; attr = attr->next)
847 if (!strcmp((const char *)attr->name, "tag"))
848 ptr_tag = attr->children;
849 else if (strlen((const char *)attr->name) == 4 &&
850 !memcmp(attr->name, "ind", 3))
852 int no = atoi((const char *)attr->name+3);
854 && attr->children->type == XML_TEXT_NODE)
855 indstr[no] = attr->children->content[0];
860 mt, "Bad attribute '%.80s' for 'datafield'",
867 mt, "Missing attribute 'tag' for 'datafield'" );
870 /* note that indstr[0] is unused so we use indstr[1..] */
871 yaz_marc_add_datafield_xml(mt, ptr_tag,
872 indstr+1, strlen(indstr+1));
874 if (yaz_marc_read_xml_subfields(mt, ptr->children))
880 "Expected element controlfield or datafield,"
881 " got %.80s", ptr->name);
888 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
890 const xmlNode *ptr = xmlnode;
891 for(; ptr; ptr = ptr->next)
892 if (ptr->type == XML_ELEMENT_NODE)
894 if (!strcmp((const char *) ptr->name, "record"))
899 mt, "Unknown element '%.80s' in MARC XML reader",
906 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
909 /* ptr points to record node now */
911 if (yaz_marc_read_xml_leader(mt, &ptr))
913 return yaz_marc_read_xml_fields(mt, ptr->next);
916 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
922 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
926 int indicator_length;
927 int identifier_length;
928 int end_of_directory;
930 int length_data_entry;
932 int length_implementation;
936 record_length = atoi_n (buf, 5);
937 if (record_length < 25)
939 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
942 /* ballout if bsize is known and record_length is less than that */
943 if (bsize != -1 && record_length > bsize)
945 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
946 record_length, bsize);
950 yaz_marc_cprintf(mt, "Record length %5d", record_length);
952 yaz_marc_read_leader(mt, buf,
958 &length_implementation);
960 /* First pass. determine length of directory & base of data */
961 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
963 /* length of directory entry */
964 int l = 3 + length_data_entry + length_starting;
965 if (entry_p + l >= record_length)
967 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
968 " Missing FS char", entry_p);
973 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
974 entry_p, buf+entry_p);
976 /* Check for digits in length info */
978 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
982 /* Not all digits, so stop directory scan */
983 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
984 " length and/or length starting", entry_p);
987 entry_p += 3 + length_data_entry + length_starting;
989 end_of_directory = entry_p;
990 if (base_address != entry_p+1)
992 yaz_marc_cprintf(mt, "Base address not at end of directory,"
993 " base %d, end %d", base_address, entry_p+1);
996 /* Second pass. parse control - and datafields */
997 for (entry_p = 24; entry_p != end_of_directory; )
1004 int identifier_flag = 0;
1005 int entry_p0 = entry_p;
1007 memcpy (tag, buf+entry_p, 3);
1010 data_length = atoi_n(buf+entry_p, length_data_entry);
1011 entry_p += length_data_entry;
1012 data_offset = atoi_n(buf+entry_p, length_starting);
1013 entry_p += length_starting;
1014 i = data_offset + base_address;
1015 end_offset = i+data_length-1;
1017 if (data_length <= 0 || data_offset < 0)
1022 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1024 tag, entry_p0, data_length, data_offset);
1026 if (end_offset >= record_length)
1028 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1029 entry_p0, end_offset, record_length);
1033 if (memcmp (tag, "00", 2))
1034 identifier_flag = 1; /* if not 00X assume subfields */
1035 else if (indicator_length < 4 && indicator_length > 0)
1037 /* Danmarc 00X have subfields */
1038 if (buf[i + indicator_length] == ISO2709_IDFS)
1039 identifier_flag = 1;
1040 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1041 identifier_flag = 2;
1044 if (identifier_flag)
1047 i += identifier_flag-1;
1048 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1049 i += indicator_length;
1051 while (i < end_offset &&
1052 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1054 int code_offset = i+1;
1057 while (i < end_offset &&
1058 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1059 buf[i] != ISO2709_FS)
1061 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1068 while (i < end_offset &&
1069 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1071 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1075 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1078 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1080 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1084 return record_length;
1087 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1089 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1092 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1094 return -1; /* error */
1095 return r; /* OK, return length > 0 */
1098 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1099 char **result, int *rsize)
1103 wrbuf_rewind(mt->m_wr);
1104 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1106 *result = wrbuf_buf(mt->m_wr);
1108 *rsize = wrbuf_len(mt->m_wr);
1112 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1118 void yaz_marc_debug(yaz_marc_t mt, int level)
1124 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1130 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1132 yaz_marc_t mt = yaz_marc_create();
1137 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1138 yaz_marc_destroy(mt);
1143 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1145 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1149 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1151 yaz_marc_t mt = yaz_marc_create();
1155 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1159 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1160 yaz_marc_destroy(mt);
1165 int marc_display_ex (const char *buf, FILE *outf, int debug)
1167 return marc_display_exl (buf, outf, debug, -1);
1171 int marc_display (const char *buf, FILE *outf)
1173 return marc_display_ex (buf, outf, 0);
1179 * indent-tabs-mode: nil
1181 * vim: shiftwidth=4 tabstop=8 expandtab