2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.31 2006-07-06 10:17:53 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 static void yaz_marc_reset(yaz_marc_t mt);
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
40 YAZ_MARC_CONTROLFIELD,
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
49 struct yaz_marc_subfield *subfields;
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
58 /** \brief a comment node */
59 struct yaz_marc_comment {
63 /** \brief MARC node */
64 struct yaz_marc_node {
65 enum YAZ_MARC_NODE_TYPE which;
67 struct yaz_marc_datafield datafield;
68 struct yaz_marc_controlfield controlfield;
72 struct yaz_marc_node *next;
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
78 struct yaz_marc_subfield *next;
81 /** \brief the internals of a yaz_marc_t handle */
90 struct yaz_marc_node *nodes;
91 struct yaz_marc_node **nodes_pp;
92 struct yaz_marc_subfield **subfield_pp;
95 yaz_marc_t yaz_marc_create(void)
97 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
98 mt->xml = YAZ_MARC_LINE;
100 mt->m_wr = wrbuf_alloc();
102 strcpy(mt->subfield_str, " $");
103 strcpy(mt->endline_str, "\n");
105 mt->nmem = nmem_create();
110 void yaz_marc_destroy(yaz_marc_t mt)
114 nmem_destroy(mt->nmem);
115 wrbuf_free (mt->m_wr, 1);
119 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
121 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
124 mt->nodes_pp = &n->next;
128 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
130 struct yaz_marc_node *n = yaz_marc_add_node(mt);
131 n->which = YAZ_MARC_COMMENT;
132 n->u.comment = nmem_strdup(mt->nmem, comment);
135 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
142 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
146 vsnprintf(buf, sizeof(buf), fmt, ap);
148 vsprintf(buf, fmt, ap);
152 yaz_marc_add_comment(mt, buf);
156 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
158 struct yaz_marc_node *n = yaz_marc_add_node(mt);
159 n->which = YAZ_MARC_LEADER;
160 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
163 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
164 const char *data, size_t data_len)
166 struct yaz_marc_node *n = yaz_marc_add_node(mt);
167 n->which = YAZ_MARC_CONTROLFIELD;
168 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
169 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
175 sprintf(msg, "controlfield:");
176 for (i = 0; i < 16 && i < data_len; i++)
177 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
179 sprintf(msg + strlen(msg), " ..");
180 yaz_marc_add_comment(mt, msg);
185 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
186 const xmlNode *ptr_data)
188 struct yaz_marc_node *n = yaz_marc_add_node(mt);
189 n->which = YAZ_MARC_CONTROLFIELD;
190 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
191 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
195 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
196 const char *indicator, size_t indicator_len)
198 struct yaz_marc_node *n = yaz_marc_add_node(mt);
199 n->which = YAZ_MARC_DATAFIELD;
200 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
201 n->u.datafield.indicator =
202 nmem_strdupn(mt->nmem, indicator, indicator_len);
203 n->u.datafield.subfields = 0;
205 /* make subfield_pp the current (last one) */
206 mt->subfield_pp = &n->u.datafield.subfields;
210 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
211 const char *indicator, size_t indicator_len)
213 struct yaz_marc_node *n = yaz_marc_add_node(mt);
214 n->which = YAZ_MARC_DATAFIELD;
215 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
216 n->u.datafield.indicator =
217 nmem_strdupn(mt->nmem, indicator, indicator_len);
218 n->u.datafield.subfields = 0;
220 /* make subfield_pp the current (last one) */
221 mt->subfield_pp = &n->u.datafield.subfields;
225 void yaz_marc_add_subfield(yaz_marc_t mt,
226 const char *code_data, size_t code_data_len)
233 sprintf(msg, "subfield:");
234 for (i = 0; i < 16 && i < code_data_len; i++)
235 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
236 if (i < code_data_len)
237 sprintf(msg + strlen(msg), " ..");
238 yaz_marc_add_comment(mt, msg);
243 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
244 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
246 /* mark subfield_pp to point to this one, so we append here next */
247 *mt->subfield_pp = n;
248 mt->subfield_pp = &n->next;
252 static int atoi_n_check(const char *buf, int size, int *val)
254 if (!isdigit(*(const unsigned char *) buf))
256 *val = atoi_n(buf, size);
260 /** \brief reads the MARC 24 bytes leader and checks content
262 \param leader of the 24 byte leader
263 \param indicator_length indicator length
264 \param identifier_length identifier length
265 \param base_address base address
266 \param length_data_entry length of data entry
267 \param length_starting length of starting
268 \param length_implementation length of implementation defined data
270 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
271 int *indicator_length,
272 int *identifier_length,
274 int *length_data_entry,
275 int *length_starting,
276 int *length_implementation)
280 memcpy(leader, leader_c, 24);
282 if (!atoi_n_check(leader+10, 1, indicator_length))
285 "Indicator length at offset 10 should hold a digit."
288 *indicator_length = 2;
290 if (!atoi_n_check(leader+11, 1, identifier_length))
293 "Identifier length at offset 11 should hold a digit."
296 *identifier_length = 2;
298 if (!atoi_n_check(leader+12, 5, base_address))
301 "Base address at offsets 12..16 should hold a number."
305 if (!atoi_n_check(leader+20, 1, length_data_entry))
308 "Length data entry at offset 20 should hold a digit."
310 *length_data_entry = 4;
313 if (!atoi_n_check(leader+21, 1, length_starting))
316 "Length starting at offset 21 should hold a digit."
318 *length_starting = 5;
321 if (!atoi_n_check(leader+22, 1, length_implementation))
324 "Length implementation at offset 22 should hold a digit."
326 *length_implementation = 0;
332 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
333 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
334 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
335 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
336 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
337 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
339 yaz_marc_add_leader(mt, leader, 24);
342 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
344 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
345 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
348 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
350 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
351 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
354 /* try to guess how many bytes the identifier really is! */
355 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
360 for (i = 1; i<5; i++)
363 size_t outbytesleft = sizeof(outbuf);
365 const char *inp = buf;
367 size_t inbytesleft = i;
368 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
369 &outp, &outbytesleft);
370 if (r != (size_t) (-1))
371 return i; /* got a complete sequence */
373 return 1; /* giving up */
375 return 1; /* we don't know */
378 static void yaz_marc_reset(yaz_marc_t mt)
380 nmem_reset(mt->nmem);
382 mt->nodes_pp = &mt->nodes;
386 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
388 struct yaz_marc_node *n;
389 int identifier_length;
390 const char *leader = 0;
392 for (n = mt->nodes; n; n = n->next)
393 if (n->which == YAZ_MARC_LEADER)
395 leader = n->u.leader;
401 if (!atoi_n_check(leader+11, 1, &identifier_length))
404 for (n = mt->nodes; n; n = n->next)
406 struct yaz_marc_subfield *s;
409 case YAZ_MARC_DATAFIELD:
410 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
411 n->u.datafield.indicator);
412 for (s = n->u.datafield.subfields; s; s = s->next)
414 /* if identifier length is 2 (most MARCs),
415 the code is a single character .. However we've
416 seen multibyte codes, so see how big it really is */
417 size_t using_code_len =
418 (identifier_length != 2) ? identifier_length - 1
420 cdata_one_character(mt, s->code_data);
422 wrbuf_puts (wr, mt->subfield_str);
423 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
425 wrbuf_printf(wr, " ");
426 wrbuf_iconv_puts(wr, mt->iconv_cd,
427 s->code_data + using_code_len);
429 wrbuf_puts (wr, mt->endline_str);
431 case YAZ_MARC_CONTROLFIELD:
432 wrbuf_printf(wr, "%s ", n->u.controlfield.tag);
433 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
434 wrbuf_puts (wr, mt->endline_str);
436 case YAZ_MARC_COMMENT:
438 wrbuf_iconv_write(wr, mt->iconv_cd,
439 n->u.comment, strlen(n->u.comment));
440 wrbuf_puts(wr, ")\n");
442 case YAZ_MARC_LEADER:
443 wrbuf_printf(wr, "%s\n", n->u.leader);
449 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
454 return yaz_marc_write_line(mt, wr);
455 case YAZ_MARC_MARCXML:
456 return yaz_marc_write_marcxml(mt, wr);
457 case YAZ_MARC_XCHANGE:
458 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
459 case YAZ_MARC_ISO2709:
460 return yaz_marc_write_iso2709(mt, wr);
465 /** \brief common MARC XML/Xchange writer
467 \param wr WRBUF output
468 \param ns XMLNS for the elements
469 \param format record format (e.g. "MARC21")
470 \param type record type (e.g. "Bibliographic")
472 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
477 struct yaz_marc_node *n;
478 int identifier_length;
479 const char *leader = 0;
481 for (n = mt->nodes; n; n = n->next)
482 if (n->which == YAZ_MARC_LEADER)
484 leader = n->u.leader;
490 if (!atoi_n_check(leader+11, 1, &identifier_length))
493 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
495 wrbuf_printf(wr, " format=\"%.80s\"", format);
497 wrbuf_printf(wr, " type=\"%.80s\"", type);
498 wrbuf_printf(wr, ">\n");
499 for (n = mt->nodes; n; n = n->next)
501 struct yaz_marc_subfield *s;
504 case YAZ_MARC_DATAFIELD:
505 wrbuf_printf(wr, " <datafield tag=\"");
506 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
507 strlen(n->u.datafield.tag));
508 wrbuf_printf(wr, "\"");
509 if (n->u.datafield.indicator)
512 for (i = 0; n->u.datafield.indicator[i]; i++)
514 wrbuf_printf(wr, " ind%d=\"", i+1);
515 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
516 n->u.datafield.indicator+i, 1);
517 wrbuf_printf(wr, "\"");
520 wrbuf_printf(wr, ">\n");
521 for (s = n->u.datafield.subfields; s; s = s->next)
523 /* if identifier length is 2 (most MARCs),
524 the code is a single character .. However we've
525 seen multibyte codes, so see how big it really is */
526 size_t using_code_len =
527 (identifier_length != 2) ? identifier_length - 1
529 cdata_one_character(mt, s->code_data);
531 wrbuf_puts(wr, " <subfield code=\"");
532 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
533 s->code_data, using_code_len);
534 wrbuf_puts(wr, "\">");
535 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
536 s->code_data + using_code_len,
537 strlen(s->code_data + using_code_len));
538 wrbuf_puts(wr, "</subfield>\n");
540 wrbuf_printf(wr, " </datafield>\n");
542 case YAZ_MARC_CONTROLFIELD:
543 wrbuf_printf(wr, " <controlfield tag=\"");
544 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
545 strlen(n->u.controlfield.tag));
546 wrbuf_printf(wr, "\">");
547 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
548 wrbuf_printf(wr, "</controlfield>\n");
550 case YAZ_MARC_COMMENT:
551 wrbuf_printf(wr, "<!-- ");
552 wrbuf_puts(wr, n->u.comment);
553 wrbuf_printf(wr, " -->\n");
555 case YAZ_MARC_LEADER:
556 wrbuf_printf(wr, " <leader>");
557 wrbuf_iconv_write_cdata(wr,
558 0 /* no charset conversion for leader */,
559 n->u.leader, strlen(n->u.leader));
560 wrbuf_printf(wr, "</leader>\n");
563 wrbuf_puts(wr, "</record>\n");
567 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
569 yaz_marc_modify_leader(mt, 9, "a");
570 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
574 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
578 return yaz_marc_write_marcxml_ns(mt, wr,
579 "http://www.bs.dk/standards/MarcXchange",
583 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
585 struct yaz_marc_node *n;
586 int indicator_length;
587 int identifier_length;
588 int length_data_entry;
590 int length_implementation;
592 const char *leader = 0;
593 WRBUF wr_dir, wr_head;
596 for (n = mt->nodes; n; n = n->next)
597 if (n->which == YAZ_MARC_LEADER)
598 leader = n->u.leader;
602 if (!atoi_n_check(leader+10, 1, &indicator_length))
604 if (!atoi_n_check(leader+11, 1, &identifier_length))
606 if (!atoi_n_check(leader+20, 1, &length_data_entry))
608 if (!atoi_n_check(leader+21, 1, &length_starting))
610 if (!atoi_n_check(leader+22, 1, &length_implementation))
613 wr_dir = wrbuf_alloc();
614 for (n = mt->nodes; n; n = n->next)
617 struct yaz_marc_subfield *s;
620 case YAZ_MARC_DATAFIELD:
621 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
622 data_length += indicator_length;
623 for (s = n->u.datafield.subfields; s; s = s->next)
624 data_length += 1+strlen(s->code_data);
627 case YAZ_MARC_CONTROLFIELD:
628 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
629 data_length += strlen(n->u.controlfield.data);
632 case YAZ_MARC_COMMENT:
634 case YAZ_MARC_LEADER:
639 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
640 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
641 data_offset += data_length;
644 /* mark end of directory */
645 wrbuf_putc(wr_dir, ISO2709_FS);
647 /* base address of data (comes after leader+directory) */
648 base_address = 24 + wrbuf_len(wr_dir);
650 wr_head = wrbuf_alloc();
652 /* write record length */
653 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
654 /* from "original" leader */
655 wrbuf_write(wr_head, leader+5, 7);
656 /* base address of data */
657 wrbuf_printf(wr_head, "%05d", base_address);
658 /* from "original" leader */
659 wrbuf_write(wr_head, leader+17, 7);
661 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
662 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
663 wrbuf_free(wr_head, 1);
664 wrbuf_free(wr_dir, 1);
666 for (n = mt->nodes; n; n = n->next)
668 struct yaz_marc_subfield *s;
671 case YAZ_MARC_DATAFIELD:
672 wrbuf_printf(wr, "%.*s", indicator_length,
673 n->u.datafield.indicator);
674 for (s = n->u.datafield.subfields; s; s = s->next)
676 wrbuf_printf(wr, "%c", ISO2709_IDFS);
677 wrbuf_puts(wr, s->code_data);
679 wrbuf_printf(wr, "%c", ISO2709_FS);
681 case YAZ_MARC_CONTROLFIELD:
682 wrbuf_puts(wr, n->u.controlfield.data);
683 wrbuf_printf(wr, "%c", ISO2709_FS);
685 case YAZ_MARC_COMMENT:
687 case YAZ_MARC_LEADER:
691 wrbuf_printf(wr, "%c", ISO2709_RS);
696 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
698 for (; ptr; ptr = ptr->next)
700 if (ptr->type == XML_ELEMENT_NODE)
702 if (!strcmp((const char *) ptr->name, "subfield"))
704 size_t ctrl_data_len = 0;
705 char *ctrl_data_buf = 0;
706 const xmlNode *p = 0, *ptr_code = 0;
707 struct _xmlAttr *attr;
708 for (attr = ptr->properties; attr; attr = attr->next)
709 if (!strcmp((const char *)attr->name, "code"))
710 ptr_code = attr->children;
714 mt, "Bad attribute '%.80s' for 'subfield'",
721 mt, "Missing attribute 'code' for 'subfield'" );
724 if (ptr_code->type == XML_TEXT_NODE)
727 strlen((const char *)ptr_code->content);
732 mt, "Missing value for 'code' in 'subfield'" );
735 for (p = ptr->children; p ; p = p->next)
736 if (p->type == XML_TEXT_NODE)
737 ctrl_data_len += strlen((const char *)p->content);
738 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
739 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
740 for (p = ptr->children; p ; p = p->next)
741 if (p->type == XML_TEXT_NODE)
742 strcat(ctrl_data_buf, (const char *)p->content);
743 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
748 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
756 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
758 int indicator_length;
759 int identifier_length;
761 int length_data_entry;
763 int length_implementation;
764 const char *leader = 0;
765 const xmlNode *ptr = *ptr_p;
767 for(; ptr; ptr = ptr->next)
768 if (ptr->type == XML_ELEMENT_NODE)
770 if (!strcmp((const char *) ptr->name, "leader"))
772 xmlNode *p = ptr->children;
773 for(; p; p = p->next)
774 if (p->type == XML_TEXT_NODE)
775 leader = (const char *) p->content;
781 mt, "Expected element 'leader', got '%.80s'", ptr->name);
787 yaz_marc_cprintf(mt, "Missing element 'leader'");
790 if (strlen(leader) != 24)
792 yaz_marc_cprintf(mt, "Bad length %d of leader data."
793 " Must have length of 24 characters", strlen(leader));
796 yaz_marc_read_leader(mt, leader,
802 &length_implementation);
807 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
809 for(; ptr; ptr = ptr->next)
810 if (ptr->type == XML_ELEMENT_NODE)
812 if (!strcmp((const char *) ptr->name, "controlfield"))
814 const xmlNode *ptr_tag = 0;
815 struct _xmlAttr *attr;
816 for (attr = ptr->properties; attr; attr = attr->next)
817 if (!strcmp((const char *)attr->name, "tag"))
818 ptr_tag = attr->children;
822 mt, "Bad attribute '%.80s' for 'controlfield'",
829 mt, "Missing attribute 'tag' for 'controlfield'" );
832 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
834 else if (!strcmp((const char *) ptr->name, "datafield"))
836 char indstr[11]; /* 0(unused), 1,....9, + zero term */
837 const xmlNode *ptr_tag = 0;
838 struct _xmlAttr *attr;
840 for (i = 0; i<11; i++)
842 for (attr = ptr->properties; attr; attr = attr->next)
843 if (!strcmp((const char *)attr->name, "tag"))
844 ptr_tag = attr->children;
845 else if (strlen((const char *)attr->name) == 4 &&
846 !memcmp(attr->name, "ind", 3))
848 int no = atoi((const char *)attr->name+3);
850 && attr->children->type == XML_TEXT_NODE)
851 indstr[no] = attr->children->content[0];
856 mt, "Bad attribute '%.80s' for 'datafield'",
863 mt, "Missing attribute 'tag' for 'datafield'" );
866 /* note that indstr[0] is unused so we use indstr[1..] */
867 yaz_marc_add_datafield_xml(mt, ptr_tag,
868 indstr+1, strlen(indstr+1));
870 if (yaz_marc_read_xml_subfields(mt, ptr->children))
876 "Expected element controlfield or datafield,"
877 " got %.80s", ptr->name);
884 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
886 const xmlNode *ptr = xmlnode;
887 for(; ptr; ptr = ptr->next)
888 if (ptr->type == XML_ELEMENT_NODE)
890 if (!strcmp((const char *) ptr->name, "record"))
895 mt, "Unknown element '%.80s' in MARC XML reader",
902 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
905 /* ptr points to record node now */
907 if (yaz_marc_read_xml_leader(mt, &ptr))
909 return yaz_marc_read_xml_fields(mt, ptr->next);
912 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
918 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
922 int indicator_length;
923 int identifier_length;
924 int end_of_directory;
926 int length_data_entry;
928 int length_implementation;
932 record_length = atoi_n (buf, 5);
933 if (record_length < 25)
935 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
938 /* ballout if bsize is known and record_length is less than that */
939 if (bsize != -1 && record_length > bsize)
941 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
942 record_length, bsize);
946 yaz_marc_cprintf(mt, "Record length %5d", record_length);
948 yaz_marc_read_leader(mt, buf,
954 &length_implementation);
956 /* First pass. determine length of directory & base of data */
957 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
959 /* length of directory entry */
960 int l = 3 + length_data_entry + length_starting;
961 if (entry_p + l >= record_length)
963 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
964 " Missing FS char", entry_p);
969 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
970 entry_p, buf+entry_p);
972 /* Check for digits in length info */
974 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
978 /* Not all digits, so stop directory scan */
979 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
980 " length and/or length starting", entry_p);
983 entry_p += 3 + length_data_entry + length_starting;
985 end_of_directory = entry_p;
986 if (base_address != entry_p+1)
988 yaz_marc_cprintf(mt, "Base address not at end of directory,"
989 " base %d, end %d", base_address, entry_p+1);
992 /* Second pass. parse control - and datafields */
993 for (entry_p = 24; entry_p != end_of_directory; )
1000 int identifier_flag = 0;
1001 int entry_p0 = entry_p;
1003 memcpy (tag, buf+entry_p, 3);
1006 data_length = atoi_n(buf+entry_p, length_data_entry);
1007 entry_p += length_data_entry;
1008 data_offset = atoi_n(buf+entry_p, length_starting);
1009 entry_p += length_starting;
1010 i = data_offset + base_address;
1011 end_offset = i+data_length-1;
1013 if (data_length <= 0 || data_offset < 0)
1018 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1020 tag, entry_p0, data_length, data_offset);
1022 if (end_offset >= record_length)
1024 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1025 entry_p0, end_offset, record_length);
1029 if (memcmp (tag, "00", 2))
1030 identifier_flag = 1; /* if not 00X assume subfields */
1031 else if (indicator_length < 4 && indicator_length > 0)
1033 /* Danmarc 00X have subfields */
1034 if (buf[i + indicator_length] == ISO2709_IDFS)
1035 identifier_flag = 1;
1036 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1037 identifier_flag = 2;
1040 if (identifier_flag)
1043 i += identifier_flag-1;
1044 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1045 i += indicator_length;
1047 while (i < end_offset &&
1048 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1050 int code_offset = i+1;
1053 while (i < end_offset &&
1054 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1055 buf[i] != ISO2709_FS)
1057 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1064 while (i < end_offset &&
1065 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1067 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1071 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1074 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1076 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1080 return record_length;
1083 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1085 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1088 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1090 return -1; /* error */
1091 return r; /* OK, return length > 0 */
1094 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1095 char **result, int *rsize)
1099 wrbuf_rewind(mt->m_wr);
1100 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1102 *result = wrbuf_buf(mt->m_wr);
1104 *rsize = wrbuf_len(mt->m_wr);
1108 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1114 void yaz_marc_debug(yaz_marc_t mt, int level)
1120 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1125 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1127 struct yaz_marc_node *n;
1129 for (n = mt->nodes; n; n = n->next)
1130 if (n->which == YAZ_MARC_LEADER)
1132 leader = n->u.leader;
1133 memcpy(leader+off, str, strlen(str));
1139 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1141 yaz_marc_t mt = yaz_marc_create();
1146 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1147 yaz_marc_destroy(mt);
1152 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1154 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1158 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1160 yaz_marc_t mt = yaz_marc_create();
1164 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1168 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1169 yaz_marc_destroy(mt);
1174 int marc_display_ex (const char *buf, FILE *outf, int debug)
1176 return marc_display_exl (buf, outf, debug, -1);
1180 int marc_display (const char *buf, FILE *outf)
1182 return marc_display_ex (buf, outf, 0);
1188 * indent-tabs-mode: nil
1190 * vim: shiftwidth=4 tabstop=8 expandtab