2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.29 2006-05-02 20:47:45 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 static void yaz_marc_reset(yaz_marc_t mt);
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
40 YAZ_MARC_CONTROLFIELD,
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
49 struct yaz_marc_subfield *subfields;
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
58 /** \brief a comment node */
59 struct yaz_marc_comment {
63 /** \brief MARC node */
64 struct yaz_marc_node {
65 enum YAZ_MARC_NODE_TYPE which;
67 struct yaz_marc_datafield datafield;
68 struct yaz_marc_controlfield controlfield;
72 struct yaz_marc_node *next;
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
78 struct yaz_marc_subfield *next;
81 /** \brief the internals of a yaz_marc_t handle */
90 struct yaz_marc_node *nodes;
91 struct yaz_marc_node **nodes_pp;
92 struct yaz_marc_subfield **subfield_pp;
95 yaz_marc_t yaz_marc_create(void)
97 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
98 mt->xml = YAZ_MARC_LINE;
100 mt->m_wr = wrbuf_alloc();
102 strcpy(mt->subfield_str, " $");
103 strcpy(mt->endline_str, "\n");
105 mt->nmem = nmem_create();
110 void yaz_marc_destroy(yaz_marc_t mt)
114 nmem_destroy(mt->nmem);
115 wrbuf_free (mt->m_wr, 1);
119 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
121 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
124 mt->nodes_pp = &n->next;
128 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
130 struct yaz_marc_node *n = yaz_marc_add_node(mt);
131 n->which = YAZ_MARC_COMMENT;
132 n->u.comment = nmem_strdup(mt->nmem, comment);
135 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
142 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
146 vsnprintf(buf, sizeof(buf), fmt, ap);
148 vsprintf(buf, fmt, ap);
152 yaz_marc_add_comment(mt, buf);
156 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
158 struct yaz_marc_node *n = yaz_marc_add_node(mt);
159 n->which = YAZ_MARC_LEADER;
160 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
163 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
164 const char *data, size_t data_len)
166 struct yaz_marc_node *n = yaz_marc_add_node(mt);
167 n->which = YAZ_MARC_CONTROLFIELD;
168 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
169 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
175 sprintf(msg, "controlfield:");
176 for (i = 0; i < 16 && i < data_len; i++)
177 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
179 sprintf(msg + strlen(msg), " ..");
180 yaz_marc_add_comment(mt, msg);
185 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
186 const xmlNode *ptr_data)
188 struct yaz_marc_node *n = yaz_marc_add_node(mt);
189 n->which = YAZ_MARC_CONTROLFIELD;
190 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
191 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
195 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
196 const char *indicator, size_t indicator_len)
198 struct yaz_marc_node *n = yaz_marc_add_node(mt);
199 n->which = YAZ_MARC_DATAFIELD;
200 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
201 n->u.datafield.indicator =
202 nmem_strdupn(mt->nmem, indicator, indicator_len);
203 n->u.datafield.subfields = 0;
205 /* make subfield_pp the current (last one) */
206 mt->subfield_pp = &n->u.datafield.subfields;
210 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
211 const char *indicator, size_t indicator_len)
213 struct yaz_marc_node *n = yaz_marc_add_node(mt);
214 n->which = YAZ_MARC_DATAFIELD;
215 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
216 n->u.datafield.indicator =
217 nmem_strdupn(mt->nmem, indicator, indicator_len);
218 n->u.datafield.subfields = 0;
220 /* make subfield_pp the current (last one) */
221 mt->subfield_pp = &n->u.datafield.subfields;
225 void yaz_marc_add_subfield(yaz_marc_t mt,
226 const char *code_data, size_t code_data_len)
233 sprintf(msg, "subfield:");
234 for (i = 0; i < 16 && i < code_data_len; i++)
235 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
236 if (i < code_data_len)
237 sprintf(msg + strlen(msg), " ..");
238 yaz_marc_add_comment(mt, msg);
243 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
244 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
246 /* mark subfield_pp to point to this one, so we append here next */
247 *mt->subfield_pp = n;
248 mt->subfield_pp = &n->next;
252 static int atoi_n_check(const char *buf, int size, int *val)
254 if (!isdigit(*(const unsigned char *) buf))
256 *val = atoi_n(buf, size);
260 /** \brief reads the MARC 24 bytes leader and checks content
262 \param leader of the 24 byte leader
263 \param indicator_length indicator length
264 \param identifier_length identifier length
265 \param base_address base address
266 \param length_data_entry length of data entry
267 \param length_starting length of starting
268 \param length_implementation length of implementation defined data
270 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
271 int *indicator_length,
272 int *identifier_length,
274 int *length_data_entry,
275 int *length_starting,
276 int *length_implementation)
280 memcpy(leader, leader_c, 24);
282 if (!atoi_n_check(leader+10, 1, indicator_length))
285 "Indicator length at offset 10 should hold a digit."
288 *indicator_length = 2;
290 if (!atoi_n_check(leader+11, 1, identifier_length))
293 "Identifier length at offset 11 should hold a digit."
296 *identifier_length = 2;
298 if (!atoi_n_check(leader+12, 5, base_address))
301 "Base address at offsets 12..16 should hold a number."
305 if (!atoi_n_check(leader+20, 1, length_data_entry))
308 "Length data entry at offset 20 should hold a digit."
310 *length_data_entry = 4;
313 if (!atoi_n_check(leader+21, 1, length_starting))
316 "Length starting at offset 21 should hold a digit."
318 *length_starting = 5;
321 if (!atoi_n_check(leader+22, 1, length_implementation))
324 "Length implementation at offset 22 should hold a digit."
326 *length_implementation = 0;
332 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
333 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
334 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
335 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
336 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
337 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
339 yaz_marc_add_leader(mt, leader, 24);
342 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
344 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
345 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
348 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
350 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
351 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
354 /* try to guess how many bytes the identifier really is! */
355 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
360 for (i = 1; i<5; i++)
363 size_t outbytesleft = sizeof(outbuf);
365 const char *inp = buf;
367 size_t inbytesleft = i;
368 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
369 &outp, &outbytesleft);
370 if (r != (size_t) (-1))
371 return i; /* got a complete sequence */
373 return 1; /* giving up */
375 return 1; /* we don't know */
378 static void yaz_marc_reset(yaz_marc_t mt)
380 nmem_reset(mt->nmem);
382 mt->nodes_pp = &mt->nodes;
386 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
388 struct yaz_marc_node *n;
389 int identifier_length;
390 const char *leader = 0;
392 for (n = mt->nodes; n; n = n->next)
393 if (n->which == YAZ_MARC_LEADER)
395 leader = n->u.leader;
401 if (!atoi_n_check(leader+11, 1, &identifier_length))
404 for (n = mt->nodes; n; n = n->next)
406 struct yaz_marc_subfield *s;
409 case YAZ_MARC_DATAFIELD:
410 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
411 n->u.datafield.indicator);
412 for (s = n->u.datafield.subfields; s; s = s->next)
414 /* if identifier length is 2 (most MARCs),
415 the code is a single character .. However we've
416 seen multibyte codes, so see how big it really is */
417 size_t using_code_len =
418 (identifier_length != 2) ? identifier_length - 1
420 cdata_one_character(mt, s->code_data);
422 wrbuf_puts (wr, mt->subfield_str);
423 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
425 wrbuf_printf(wr, " ");
426 wrbuf_iconv_puts(wr, mt->iconv_cd,
427 s->code_data + using_code_len);
429 wrbuf_puts (wr, mt->endline_str);
431 case YAZ_MARC_CONTROLFIELD:
432 wrbuf_printf(wr, "%s ", n->u.controlfield.tag);
433 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
434 wrbuf_puts (wr, mt->endline_str);
436 case YAZ_MARC_COMMENT:
438 wrbuf_iconv_write(wr, mt->iconv_cd,
439 n->u.comment, strlen(n->u.comment));
440 wrbuf_puts(wr, ")\n");
442 case YAZ_MARC_LEADER:
443 wrbuf_printf(wr, "%s\n", n->u.leader);
449 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
454 return yaz_marc_write_line(mt, wr);
455 case YAZ_MARC_MARCXML:
456 return yaz_marc_write_marcxml(mt, wr);
457 case YAZ_MARC_XCHANGE:
458 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
459 case YAZ_MARC_ISO2709:
460 return yaz_marc_write_iso2709(mt, wr);
465 /** \brief common MARC XML/Xchange writer
467 \param wr WRBUF output
468 \param ns XMLNS for the elements
469 \param format record format (e.g. "MARC21")
470 \param type record type (e.g. "Bibliographic")
472 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
477 struct yaz_marc_node *n;
478 int identifier_length;
479 const char *leader = 0;
481 for (n = mt->nodes; n; n = n->next)
482 if (n->which == YAZ_MARC_LEADER)
484 leader = n->u.leader;
490 if (!atoi_n_check(leader+11, 1, &identifier_length))
493 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
495 wrbuf_printf(wr, " format=\"%.80s\"", format);
497 wrbuf_printf(wr, " type=\"%.80s\"", type);
498 wrbuf_printf(wr, ">\n");
499 for (n = mt->nodes; n; n = n->next)
501 struct yaz_marc_subfield *s;
504 case YAZ_MARC_DATAFIELD:
505 wrbuf_printf(wr, " <datafield tag=\"");
506 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
507 strlen(n->u.datafield.tag));
508 wrbuf_printf(wr, "\"");
509 if (n->u.datafield.indicator)
512 for (i = 0; n->u.datafield.indicator[i]; i++)
514 wrbuf_printf(wr, " ind%d=\"", i+1);
515 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
516 n->u.datafield.indicator+i, 1);
517 wrbuf_printf(wr, "\"");
520 wrbuf_printf(wr, ">\n");
521 for (s = n->u.datafield.subfields; s; s = s->next)
523 /* if identifier length is 2 (most MARCs),
524 the code is a single character .. However we've
525 seen multibyte codes, so see how big it really is */
526 size_t using_code_len =
527 (identifier_length != 2) ? identifier_length - 1
529 cdata_one_character(mt, s->code_data);
531 wrbuf_puts(wr, " <subfield code=\"");
532 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
533 s->code_data, using_code_len);
534 wrbuf_puts(wr, "\">");
535 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
536 s->code_data + using_code_len,
537 strlen(s->code_data + using_code_len));
538 wrbuf_puts(wr, "</subfield>\n");
540 wrbuf_printf(wr, " </datafield>\n");
542 case YAZ_MARC_CONTROLFIELD:
543 wrbuf_printf(wr, " <controlfield tag=\"");
544 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
545 strlen(n->u.controlfield.tag));
546 wrbuf_printf(wr, "\">");
547 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
548 wrbuf_printf(wr, "</controlfield>\n");
550 case YAZ_MARC_COMMENT:
551 wrbuf_printf(wr, "<!-- %s -->\n", n->u.comment);
553 case YAZ_MARC_LEADER:
554 wrbuf_printf(wr, " <leader>");
555 wrbuf_iconv_write_cdata(wr,
556 0 /* no charset conversion for leader */,
557 n->u.leader, strlen(n->u.leader));
558 wrbuf_printf(wr, "</leader>\n");
561 wrbuf_puts(wr, "</record>\n");
565 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
567 yaz_marc_modify_leader(mt, 9, "a");
568 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
572 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
576 return yaz_marc_write_marcxml_ns(mt, wr,
577 "http://www.bs.dk/standards/MarcXchange",
581 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
583 struct yaz_marc_node *n;
584 int indicator_length;
585 int identifier_length;
586 int length_data_entry;
588 int length_implementation;
590 const char *leader = 0;
591 WRBUF wr_dir, wr_head;
594 for (n = mt->nodes; n; n = n->next)
595 if (n->which == YAZ_MARC_LEADER)
596 leader = n->u.leader;
600 if (!atoi_n_check(leader+10, 1, &indicator_length))
602 if (!atoi_n_check(leader+11, 1, &identifier_length))
604 if (!atoi_n_check(leader+20, 1, &length_data_entry))
606 if (!atoi_n_check(leader+21, 1, &length_starting))
608 if (!atoi_n_check(leader+22, 1, &length_implementation))
611 wr_dir = wrbuf_alloc();
612 for (n = mt->nodes; n; n = n->next)
615 struct yaz_marc_subfield *s;
618 case YAZ_MARC_DATAFIELD:
619 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
620 data_length += indicator_length;
621 for (s = n->u.datafield.subfields; s; s = s->next)
622 data_length += 1+strlen(s->code_data);
625 case YAZ_MARC_CONTROLFIELD:
626 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
627 data_length += strlen(n->u.controlfield.data);
630 case YAZ_MARC_COMMENT:
632 case YAZ_MARC_LEADER:
637 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
638 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
639 data_offset += data_length;
642 /* mark end of directory */
643 wrbuf_putc(wr_dir, ISO2709_FS);
645 /* base address of data (comes after leader+directory) */
646 base_address = 24 + wrbuf_len(wr_dir);
648 wr_head = wrbuf_alloc();
650 /* write record length */
651 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
652 /* from "original" leader */
653 wrbuf_write(wr_head, leader+5, 7);
654 /* base address of data */
655 wrbuf_printf(wr_head, "%05d", base_address);
656 /* from "original" leader */
657 wrbuf_write(wr_head, leader+17, 7);
659 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
660 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
661 wrbuf_free(wr_head, 1);
662 wrbuf_free(wr_dir, 1);
664 for (n = mt->nodes; n; n = n->next)
666 struct yaz_marc_subfield *s;
669 case YAZ_MARC_DATAFIELD:
670 wrbuf_printf(wr, "%.*s", indicator_length,
671 n->u.datafield.indicator);
672 for (s = n->u.datafield.subfields; s; s = s->next)
673 wrbuf_printf(wr, "%c%s", ISO2709_IDFS, s->code_data);
674 wrbuf_printf(wr, "%c", ISO2709_FS);
676 case YAZ_MARC_CONTROLFIELD:
677 wrbuf_printf(wr, "%s%c", n->u.controlfield.data, ISO2709_FS);
679 case YAZ_MARC_COMMENT:
681 case YAZ_MARC_LEADER:
685 wrbuf_printf(wr, "%c", ISO2709_RS);
690 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
692 for (; ptr; ptr = ptr->next)
694 if (ptr->type == XML_ELEMENT_NODE)
696 if (!strcmp((const char *) ptr->name, "subfield"))
698 size_t ctrl_data_len = 0;
699 char *ctrl_data_buf = 0;
700 const xmlNode *p = 0, *ptr_code = 0;
701 struct _xmlAttr *attr;
702 for (attr = ptr->properties; attr; attr = attr->next)
703 if (!strcmp((const char *)attr->name, "code"))
704 ptr_code = attr->children;
708 mt, "Bad attribute '%.80s' for 'subfield'",
715 mt, "Missing attribute 'code' for 'subfield'" );
718 if (ptr_code->type == XML_TEXT_NODE)
721 strlen((const char *)ptr_code->content);
726 mt, "Missing value for 'code' in 'subfield'" );
729 for (p = ptr->children; p ; p = p->next)
730 if (p->type == XML_TEXT_NODE)
731 ctrl_data_len += strlen((const char *)p->content);
732 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
733 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
734 for (p = ptr->children; p ; p = p->next)
735 if (p->type == XML_TEXT_NODE)
736 strcat(ctrl_data_buf, (const char *)p->content);
737 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
742 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
750 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
752 int indicator_length;
753 int identifier_length;
755 int length_data_entry;
757 int length_implementation;
758 const char *leader = 0;
759 const xmlNode *ptr = *ptr_p;
761 for(; ptr; ptr = ptr->next)
762 if (ptr->type == XML_ELEMENT_NODE)
764 if (!strcmp((const char *) ptr->name, "leader"))
766 xmlNode *p = ptr->children;
767 for(; p; p = p->next)
768 if (p->type == XML_TEXT_NODE)
769 leader = (const char *) p->content;
775 mt, "Expected element 'leader', got '%.80s'", ptr->name);
781 yaz_marc_cprintf(mt, "Missing element 'leader'");
784 if (strlen(leader) != 24)
786 yaz_marc_cprintf(mt, "Bad length %d of leader data."
787 " Must have length of 24 characters", strlen(leader));
790 yaz_marc_read_leader(mt, leader,
796 &length_implementation);
801 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
803 for(; ptr; ptr = ptr->next)
804 if (ptr->type == XML_ELEMENT_NODE)
806 if (!strcmp((const char *) ptr->name, "controlfield"))
808 const xmlNode *ptr_tag = 0;
809 struct _xmlAttr *attr;
810 for (attr = ptr->properties; attr; attr = attr->next)
811 if (!strcmp((const char *)attr->name, "tag"))
812 ptr_tag = attr->children;
816 mt, "Bad attribute '%.80s' for 'controlfield'",
823 mt, "Missing attribute 'tag' for 'controlfield'" );
826 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
828 else if (!strcmp((const char *) ptr->name, "datafield"))
830 char indstr[11]; /* 0(unused), 1,....9, + zero term */
831 const xmlNode *ptr_tag = 0;
832 struct _xmlAttr *attr;
834 for (i = 0; i<11; i++)
836 for (attr = ptr->properties; attr; attr = attr->next)
837 if (!strcmp((const char *)attr->name, "tag"))
838 ptr_tag = attr->children;
839 else if (strlen((const char *)attr->name) == 4 &&
840 !memcmp(attr->name, "ind", 3))
842 int no = atoi((const char *)attr->name+3);
844 && attr->children->type == XML_TEXT_NODE)
845 indstr[no] = attr->children->content[0];
850 mt, "Bad attribute '%.80s' for 'datafield'",
857 mt, "Missing attribute 'tag' for 'datafield'" );
860 /* note that indstr[0] is unused so we use indstr[1..] */
861 yaz_marc_add_datafield_xml(mt, ptr_tag,
862 indstr+1, strlen(indstr+1));
864 if (yaz_marc_read_xml_subfields(mt, ptr->children))
870 "Expected element controlfield or datafield,"
871 " got %.80s", ptr->name);
878 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
880 const xmlNode *ptr = xmlnode;
881 for(; ptr; ptr = ptr->next)
882 if (ptr->type == XML_ELEMENT_NODE)
884 if (!strcmp((const char *) ptr->name, "record"))
889 mt, "Unknown element '%.80s' in MARC XML reader",
896 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
899 /* ptr points to record node now */
901 if (yaz_marc_read_xml_leader(mt, &ptr))
903 return yaz_marc_read_xml_fields(mt, ptr->next);
906 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
912 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
916 int indicator_length;
917 int identifier_length;
918 int end_of_directory;
920 int length_data_entry;
922 int length_implementation;
926 record_length = atoi_n (buf, 5);
927 if (record_length < 25)
929 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
932 /* ballout if bsize is known and record_length is less than that */
933 if (bsize != -1 && record_length > bsize)
935 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
936 record_length, bsize);
940 yaz_marc_cprintf(mt, "Record length %5d", record_length);
942 yaz_marc_read_leader(mt, buf,
948 &length_implementation);
950 /* First pass. determine length of directory & base of data */
951 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
953 /* length of directory entry */
954 int l = 3 + length_data_entry + length_starting;
955 if (entry_p + l >= record_length)
957 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
958 " Missing FS char", entry_p);
963 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
964 entry_p, buf+entry_p);
966 /* Check for digits in length info */
968 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
972 /* Not all digits, so stop directory scan */
973 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
974 " length and/or length starting", entry_p);
977 entry_p += 3 + length_data_entry + length_starting;
979 end_of_directory = entry_p;
980 if (base_address != entry_p+1)
982 yaz_marc_cprintf(mt, "Base address not at end of directory,"
983 " base %d, end %d", base_address, entry_p+1);
986 /* Second pass. parse control - and datafields */
987 for (entry_p = 24; entry_p != end_of_directory; )
994 int identifier_flag = 0;
995 int entry_p0 = entry_p;
997 memcpy (tag, buf+entry_p, 3);
1000 data_length = atoi_n(buf+entry_p, length_data_entry);
1001 entry_p += length_data_entry;
1002 data_offset = atoi_n(buf+entry_p, length_starting);
1003 entry_p += length_starting;
1004 i = data_offset + base_address;
1005 end_offset = i+data_length-1;
1007 if (data_length <= 0 || data_offset < 0)
1012 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1014 tag, entry_p0, data_length, data_offset);
1016 if (end_offset >= record_length)
1018 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1019 entry_p0, end_offset, record_length);
1023 if (memcmp (tag, "00", 2))
1024 identifier_flag = 1; /* if not 00X assume subfields */
1025 else if (indicator_length < 4 && indicator_length > 0)
1027 /* Danmarc 00X have subfields */
1028 if (buf[i + indicator_length] == ISO2709_IDFS)
1029 identifier_flag = 1;
1030 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1031 identifier_flag = 2;
1034 if (identifier_flag)
1037 i += identifier_flag-1;
1038 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1039 i += indicator_length;
1041 while (i < end_offset &&
1042 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1044 int code_offset = i+1;
1047 while (i < end_offset &&
1048 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1049 buf[i] != ISO2709_FS)
1051 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1058 while (i < end_offset &&
1059 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1061 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1065 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1068 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1070 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1074 return record_length;
1077 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1079 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1082 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1084 return -1; /* error */
1085 return r; /* OK, return length > 0 */
1088 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1089 char **result, int *rsize)
1093 wrbuf_rewind(mt->m_wr);
1094 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1096 *result = wrbuf_buf(mt->m_wr);
1098 *rsize = wrbuf_len(mt->m_wr);
1102 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1108 void yaz_marc_debug(yaz_marc_t mt, int level)
1114 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1119 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1121 struct yaz_marc_node *n;
1123 for (n = mt->nodes; n; n = n->next)
1124 if (n->which == YAZ_MARC_LEADER)
1126 leader = n->u.leader;
1127 memcpy(leader+off, str, strlen(str));
1133 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1135 yaz_marc_t mt = yaz_marc_create();
1140 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1141 yaz_marc_destroy(mt);
1146 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1148 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1152 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1154 yaz_marc_t mt = yaz_marc_create();
1158 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1162 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1163 yaz_marc_destroy(mt);
1168 int marc_display_ex (const char *buf, FILE *outf, int debug)
1170 return marc_display_exl (buf, outf, debug, -1);
1174 int marc_display (const char *buf, FILE *outf)
1176 return marc_display_ex (buf, outf, 0);
1182 * indent-tabs-mode: nil
1184 * vim: shiftwidth=4 tabstop=8 expandtab