2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.33 2006-08-28 12:34:40 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 static void yaz_marc_reset(yaz_marc_t mt);
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
40 YAZ_MARC_CONTROLFIELD,
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
49 struct yaz_marc_subfield *subfields;
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
58 /** \brief a comment node */
59 struct yaz_marc_comment {
63 /** \brief MARC node */
64 struct yaz_marc_node {
65 enum YAZ_MARC_NODE_TYPE which;
67 struct yaz_marc_datafield datafield;
68 struct yaz_marc_controlfield controlfield;
72 struct yaz_marc_node *next;
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
78 struct yaz_marc_subfield *next;
81 /** \brief the internals of a yaz_marc_t handle */
90 struct yaz_marc_node *nodes;
91 struct yaz_marc_node **nodes_pp;
92 struct yaz_marc_subfield **subfield_pp;
95 yaz_marc_t yaz_marc_create(void)
97 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
98 mt->xml = YAZ_MARC_LINE;
100 mt->m_wr = wrbuf_alloc();
102 strcpy(mt->subfield_str, " $");
103 strcpy(mt->endline_str, "\n");
105 mt->nmem = nmem_create();
110 void yaz_marc_destroy(yaz_marc_t mt)
114 nmem_destroy(mt->nmem);
115 wrbuf_free (mt->m_wr, 1);
119 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
121 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
124 mt->nodes_pp = &n->next;
128 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
130 struct yaz_marc_node *n = yaz_marc_add_node(mt);
131 n->which = YAZ_MARC_COMMENT;
132 n->u.comment = nmem_strdup(mt->nmem, comment);
135 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
142 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
146 vsnprintf(buf, sizeof(buf), fmt, ap);
148 vsprintf(buf, fmt, ap);
152 yaz_marc_add_comment(mt, buf);
156 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
158 struct yaz_marc_node *n = yaz_marc_add_node(mt);
159 n->which = YAZ_MARC_LEADER;
160 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
163 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
164 const char *data, size_t data_len)
166 struct yaz_marc_node *n = yaz_marc_add_node(mt);
167 n->which = YAZ_MARC_CONTROLFIELD;
168 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
169 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
175 sprintf(msg, "controlfield:");
176 for (i = 0; i < 16 && i < data_len; i++)
177 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
179 sprintf(msg + strlen(msg), " ..");
180 yaz_marc_add_comment(mt, msg);
185 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
186 const xmlNode *ptr_data)
188 struct yaz_marc_node *n = yaz_marc_add_node(mt);
189 n->which = YAZ_MARC_CONTROLFIELD;
190 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
191 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
195 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
196 const char *indicator, size_t indicator_len)
198 struct yaz_marc_node *n = yaz_marc_add_node(mt);
199 n->which = YAZ_MARC_DATAFIELD;
200 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
201 n->u.datafield.indicator =
202 nmem_strdupn(mt->nmem, indicator, indicator_len);
203 n->u.datafield.subfields = 0;
205 /* make subfield_pp the current (last one) */
206 mt->subfield_pp = &n->u.datafield.subfields;
210 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
211 const char *indicator, size_t indicator_len)
213 struct yaz_marc_node *n = yaz_marc_add_node(mt);
214 n->which = YAZ_MARC_DATAFIELD;
215 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
216 n->u.datafield.indicator =
217 nmem_strdupn(mt->nmem, indicator, indicator_len);
218 n->u.datafield.subfields = 0;
220 /* make subfield_pp the current (last one) */
221 mt->subfield_pp = &n->u.datafield.subfields;
225 void yaz_marc_add_subfield(yaz_marc_t mt,
226 const char *code_data, size_t code_data_len)
233 sprintf(msg, "subfield:");
234 for (i = 0; i < 16 && i < code_data_len; i++)
235 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
236 if (i < code_data_len)
237 sprintf(msg + strlen(msg), " ..");
238 yaz_marc_add_comment(mt, msg);
243 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
244 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
246 /* mark subfield_pp to point to this one, so we append here next */
247 *mt->subfield_pp = n;
248 mt->subfield_pp = &n->next;
252 static int atoi_n_check(const char *buf, int size, int *val)
254 if (!isdigit(*(const unsigned char *) buf))
256 *val = atoi_n(buf, size);
260 /** \brief reads the MARC 24 bytes leader and checks content
262 \param leader of the 24 byte leader
263 \param indicator_length indicator length
264 \param identifier_length identifier length
265 \param base_address base address
266 \param length_data_entry length of data entry
267 \param length_starting length of starting
268 \param length_implementation length of implementation defined data
270 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
271 int *indicator_length,
272 int *identifier_length,
274 int *length_data_entry,
275 int *length_starting,
276 int *length_implementation)
280 memcpy(leader, leader_c, 24);
282 if (!atoi_n_check(leader+10, 1, indicator_length))
285 "Indicator length at offset 10 should hold a digit."
288 *indicator_length = 2;
290 if (!atoi_n_check(leader+11, 1, identifier_length))
293 "Identifier length at offset 11 should hold a digit."
296 *identifier_length = 2;
298 if (!atoi_n_check(leader+12, 5, base_address))
301 "Base address at offsets 12..16 should hold a number."
305 if (!atoi_n_check(leader+20, 1, length_data_entry))
308 "Length data entry at offset 20 should hold a digit."
310 *length_data_entry = 4;
313 if (!atoi_n_check(leader+21, 1, length_starting))
316 "Length starting at offset 21 should hold a digit."
318 *length_starting = 5;
321 if (!atoi_n_check(leader+22, 1, length_implementation))
324 "Length implementation at offset 22 should hold a digit."
326 *length_implementation = 0;
332 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
333 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
334 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
335 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
336 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
337 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
339 yaz_marc_add_leader(mt, leader, 24);
342 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
344 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
345 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
348 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
350 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
351 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
354 /* try to guess how many bytes the identifier really is! */
355 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
360 for (i = 1; i<5; i++)
363 size_t outbytesleft = sizeof(outbuf);
365 const char *inp = buf;
367 size_t inbytesleft = i;
368 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
369 &outp, &outbytesleft);
370 if (r != (size_t) (-1))
371 return i; /* got a complete sequence */
373 return 1; /* giving up */
375 return 1; /* we don't know */
378 static void yaz_marc_reset(yaz_marc_t mt)
380 nmem_reset(mt->nmem);
382 mt->nodes_pp = &mt->nodes;
386 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
388 struct yaz_marc_node *n;
389 int identifier_length;
390 const char *leader = 0;
392 for (n = mt->nodes; n; n = n->next)
393 if (n->which == YAZ_MARC_LEADER)
395 leader = n->u.leader;
401 if (!atoi_n_check(leader+11, 1, &identifier_length))
404 for (n = mt->nodes; n; n = n->next)
406 struct yaz_marc_subfield *s;
409 case YAZ_MARC_DATAFIELD:
410 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
411 n->u.datafield.indicator);
412 for (s = n->u.datafield.subfields; s; s = s->next)
414 /* if identifier length is 2 (most MARCs),
415 the code is a single character .. However we've
416 seen multibyte codes, so see how big it really is */
417 size_t using_code_len =
418 (identifier_length != 2) ? identifier_length - 1
420 cdata_one_character(mt, s->code_data);
422 wrbuf_puts (wr, mt->subfield_str);
423 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
425 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
426 wrbuf_iconv_puts(wr, mt->iconv_cd,
427 s->code_data + using_code_len);
428 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
431 wrbuf_puts (wr, mt->endline_str);
433 case YAZ_MARC_CONTROLFIELD:
434 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
435 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
436 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
437 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
439 wrbuf_puts (wr, mt->endline_str);
441 case YAZ_MARC_COMMENT:
443 wrbuf_iconv_write(wr, mt->iconv_cd,
444 n->u.comment, strlen(n->u.comment));
445 wrbuf_puts(wr, ")\n");
447 case YAZ_MARC_LEADER:
448 wrbuf_printf(wr, "%s\n", n->u.leader);
454 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
459 return yaz_marc_write_line(mt, wr);
460 case YAZ_MARC_MARCXML:
461 return yaz_marc_write_marcxml(mt, wr);
462 case YAZ_MARC_XCHANGE:
463 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
464 case YAZ_MARC_ISO2709:
465 return yaz_marc_write_iso2709(mt, wr);
470 /** \brief common MARC XML/Xchange writer
472 \param wr WRBUF output
473 \param ns XMLNS for the elements
474 \param format record format (e.g. "MARC21")
475 \param type record type (e.g. "Bibliographic")
477 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
482 struct yaz_marc_node *n;
483 int identifier_length;
484 const char *leader = 0;
486 for (n = mt->nodes; n; n = n->next)
487 if (n->which == YAZ_MARC_LEADER)
489 leader = n->u.leader;
495 if (!atoi_n_check(leader+11, 1, &identifier_length))
498 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
500 wrbuf_printf(wr, " format=\"%.80s\"", format);
502 wrbuf_printf(wr, " type=\"%.80s\"", type);
503 wrbuf_printf(wr, ">\n");
504 for (n = mt->nodes; n; n = n->next)
506 struct yaz_marc_subfield *s;
510 case YAZ_MARC_DATAFIELD:
511 wrbuf_printf(wr, " <datafield tag=\"");
512 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
513 strlen(n->u.datafield.tag));
514 wrbuf_printf(wr, "\"");
515 if (n->u.datafield.indicator)
518 for (i = 0; n->u.datafield.indicator[i]; i++)
520 wrbuf_printf(wr, " ind%d=\"", i+1);
521 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
522 n->u.datafield.indicator+i, 1);
523 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
526 wrbuf_printf(wr, ">\n");
527 for (s = n->u.datafield.subfields; s; s = s->next)
529 /* if identifier length is 2 (most MARCs),
530 the code is a single character .. However we've
531 seen multibyte codes, so see how big it really is */
532 size_t using_code_len =
533 (identifier_length != 2) ? identifier_length - 1
535 cdata_one_character(mt, s->code_data);
537 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
538 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
539 s->code_data, using_code_len);
540 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
541 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
542 s->code_data + using_code_len,
543 strlen(s->code_data + using_code_len));
544 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
545 wrbuf_puts(wr, "\n");
547 wrbuf_printf(wr, " </datafield>\n");
549 case YAZ_MARC_CONTROLFIELD:
550 wrbuf_printf(wr, " <controlfield tag=\"");
551 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
552 strlen(n->u.controlfield.tag));
553 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
554 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
555 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
556 wrbuf_puts(wr, "\n");
558 case YAZ_MARC_COMMENT:
559 wrbuf_printf(wr, "<!-- ");
560 wrbuf_puts(wr, n->u.comment);
561 wrbuf_printf(wr, " -->\n");
563 case YAZ_MARC_LEADER:
564 wrbuf_printf(wr, " <leader>");
565 wrbuf_iconv_write_cdata(wr,
566 0 /* no charset conversion for leader */,
567 n->u.leader, strlen(n->u.leader));
568 wrbuf_printf(wr, "</leader>\n");
571 wrbuf_puts(wr, "</record>\n");
575 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
577 yaz_marc_modify_leader(mt, 9, "a");
578 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
582 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
586 return yaz_marc_write_marcxml_ns(mt, wr,
587 "http://www.bs.dk/standards/MarcXchange",
591 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
593 struct yaz_marc_node *n;
594 int indicator_length;
595 int identifier_length;
596 int length_data_entry;
598 int length_implementation;
600 const char *leader = 0;
601 WRBUF wr_dir, wr_head, wr_data_tmp;
604 for (n = mt->nodes; n; n = n->next)
605 if (n->which == YAZ_MARC_LEADER)
606 leader = n->u.leader;
610 if (!atoi_n_check(leader+10, 1, &indicator_length))
612 if (!atoi_n_check(leader+11, 1, &identifier_length))
614 if (!atoi_n_check(leader+20, 1, &length_data_entry))
616 if (!atoi_n_check(leader+21, 1, &length_starting))
618 if (!atoi_n_check(leader+22, 1, &length_implementation))
621 wr_data_tmp = wrbuf_alloc();
622 wr_dir = wrbuf_alloc();
623 for (n = mt->nodes; n; n = n->next)
626 struct yaz_marc_subfield *s;
630 case YAZ_MARC_DATAFIELD:
631 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
632 data_length += indicator_length;
633 wrbuf_rewind(wr_data_tmp);
634 for (s = n->u.datafield.subfields; s; s = s->next)
636 /* write dummy IDFS + content */
637 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
638 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
640 /* write dummy FS (makes MARC-8 to become ASCII) */
641 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
642 data_length += wrbuf_len(wr_data_tmp);
644 case YAZ_MARC_CONTROLFIELD:
645 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
647 wrbuf_rewind(wr_data_tmp);
648 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
649 n->u.controlfield.data);
650 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
651 data_length += wrbuf_len(wr_data_tmp);
653 case YAZ_MARC_COMMENT:
655 case YAZ_MARC_LEADER:
660 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
661 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
662 data_offset += data_length;
665 /* mark end of directory */
666 wrbuf_putc(wr_dir, ISO2709_FS);
668 /* base address of data (comes after leader+directory) */
669 base_address = 24 + wrbuf_len(wr_dir);
671 wr_head = wrbuf_alloc();
673 /* write record length */
674 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
675 /* from "original" leader */
676 wrbuf_write(wr_head, leader+5, 7);
677 /* base address of data */
678 wrbuf_printf(wr_head, "%05d", base_address);
679 /* from "original" leader */
680 wrbuf_write(wr_head, leader+17, 7);
682 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
683 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
684 wrbuf_free(wr_head, 1);
685 wrbuf_free(wr_dir, 1);
686 wrbuf_free(wr_data_tmp, 1);
688 for (n = mt->nodes; n; n = n->next)
690 struct yaz_marc_subfield *s;
694 case YAZ_MARC_DATAFIELD:
695 wrbuf_printf(wr, "%.*s", indicator_length,
696 n->u.datafield.indicator);
697 for (s = n->u.datafield.subfields; s; s = s->next)
699 wrbuf_putc(wr, ISO2709_IDFS);
700 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
701 /* write dummy blank - makes MARC-8 to become ASCII */
702 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
705 wrbuf_putc(wr, ISO2709_FS);
707 case YAZ_MARC_CONTROLFIELD:
708 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
709 /* write dummy blank - makes MARC-8 to become ASCII */
710 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
712 wrbuf_putc(wr, ISO2709_FS);
714 case YAZ_MARC_COMMENT:
716 case YAZ_MARC_LEADER:
720 wrbuf_printf(wr, "%c", ISO2709_RS);
725 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
727 for (; ptr; ptr = ptr->next)
729 if (ptr->type == XML_ELEMENT_NODE)
731 if (!strcmp((const char *) ptr->name, "subfield"))
733 size_t ctrl_data_len = 0;
734 char *ctrl_data_buf = 0;
735 const xmlNode *p = 0, *ptr_code = 0;
736 struct _xmlAttr *attr;
737 for (attr = ptr->properties; attr; attr = attr->next)
738 if (!strcmp((const char *)attr->name, "code"))
739 ptr_code = attr->children;
743 mt, "Bad attribute '%.80s' for 'subfield'",
750 mt, "Missing attribute 'code' for 'subfield'" );
753 if (ptr_code->type == XML_TEXT_NODE)
756 strlen((const char *)ptr_code->content);
761 mt, "Missing value for 'code' in 'subfield'" );
764 for (p = ptr->children; p ; p = p->next)
765 if (p->type == XML_TEXT_NODE)
766 ctrl_data_len += strlen((const char *)p->content);
767 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
768 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
769 for (p = ptr->children; p ; p = p->next)
770 if (p->type == XML_TEXT_NODE)
771 strcat(ctrl_data_buf, (const char *)p->content);
772 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
777 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
785 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
787 int indicator_length;
788 int identifier_length;
790 int length_data_entry;
792 int length_implementation;
793 const char *leader = 0;
794 const xmlNode *ptr = *ptr_p;
796 for(; ptr; ptr = ptr->next)
797 if (ptr->type == XML_ELEMENT_NODE)
799 if (!strcmp((const char *) ptr->name, "leader"))
801 xmlNode *p = ptr->children;
802 for(; p; p = p->next)
803 if (p->type == XML_TEXT_NODE)
804 leader = (const char *) p->content;
810 mt, "Expected element 'leader', got '%.80s'", ptr->name);
816 yaz_marc_cprintf(mt, "Missing element 'leader'");
819 if (strlen(leader) != 24)
821 yaz_marc_cprintf(mt, "Bad length %d of leader data."
822 " Must have length of 24 characters", strlen(leader));
825 yaz_marc_read_leader(mt, leader,
831 &length_implementation);
836 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
838 for(; ptr; ptr = ptr->next)
839 if (ptr->type == XML_ELEMENT_NODE)
841 if (!strcmp((const char *) ptr->name, "controlfield"))
843 const xmlNode *ptr_tag = 0;
844 struct _xmlAttr *attr;
845 for (attr = ptr->properties; attr; attr = attr->next)
846 if (!strcmp((const char *)attr->name, "tag"))
847 ptr_tag = attr->children;
851 mt, "Bad attribute '%.80s' for 'controlfield'",
858 mt, "Missing attribute 'tag' for 'controlfield'" );
861 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
863 else if (!strcmp((const char *) ptr->name, "datafield"))
865 char indstr[11]; /* 0(unused), 1,....9, + zero term */
866 const xmlNode *ptr_tag = 0;
867 struct _xmlAttr *attr;
869 for (i = 0; i<11; i++)
871 for (attr = ptr->properties; attr; attr = attr->next)
872 if (!strcmp((const char *)attr->name, "tag"))
873 ptr_tag = attr->children;
874 else if (strlen((const char *)attr->name) == 4 &&
875 !memcmp(attr->name, "ind", 3))
877 int no = atoi((const char *)attr->name+3);
879 && attr->children->type == XML_TEXT_NODE)
880 indstr[no] = attr->children->content[0];
885 mt, "Bad attribute '%.80s' for 'datafield'",
892 mt, "Missing attribute 'tag' for 'datafield'" );
895 /* note that indstr[0] is unused so we use indstr[1..] */
896 yaz_marc_add_datafield_xml(mt, ptr_tag,
897 indstr+1, strlen(indstr+1));
899 if (yaz_marc_read_xml_subfields(mt, ptr->children))
905 "Expected element controlfield or datafield,"
906 " got %.80s", ptr->name);
913 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
915 const xmlNode *ptr = xmlnode;
916 for(; ptr; ptr = ptr->next)
917 if (ptr->type == XML_ELEMENT_NODE)
919 if (!strcmp((const char *) ptr->name, "record"))
924 mt, "Unknown element '%.80s' in MARC XML reader",
931 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
934 /* ptr points to record node now */
936 if (yaz_marc_read_xml_leader(mt, &ptr))
938 return yaz_marc_read_xml_fields(mt, ptr->next);
941 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
947 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
951 int indicator_length;
952 int identifier_length;
953 int end_of_directory;
955 int length_data_entry;
957 int length_implementation;
961 record_length = atoi_n (buf, 5);
962 if (record_length < 25)
964 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
967 /* ballout if bsize is known and record_length is less than that */
968 if (bsize != -1 && record_length > bsize)
970 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
971 record_length, bsize);
975 yaz_marc_cprintf(mt, "Record length %5d", record_length);
977 yaz_marc_read_leader(mt, buf,
983 &length_implementation);
985 /* First pass. determine length of directory & base of data */
986 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
988 /* length of directory entry */
989 int l = 3 + length_data_entry + length_starting;
990 if (entry_p + l >= record_length)
992 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
993 " Missing FS char", entry_p);
998 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
999 entry_p, buf+entry_p);
1001 /* Check for digits in length info */
1003 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
1007 /* Not all digits, so stop directory scan */
1008 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
1009 " length and/or length starting", entry_p);
1012 entry_p += 3 + length_data_entry + length_starting;
1014 end_of_directory = entry_p;
1015 if (base_address != entry_p+1)
1017 yaz_marc_cprintf(mt, "Base address not at end of directory,"
1018 " base %d, end %d", base_address, entry_p+1);
1021 /* Second pass. parse control - and datafields */
1022 for (entry_p = 24; entry_p != end_of_directory; )
1029 int identifier_flag = 0;
1030 int entry_p0 = entry_p;
1032 memcpy (tag, buf+entry_p, 3);
1035 data_length = atoi_n(buf+entry_p, length_data_entry);
1036 entry_p += length_data_entry;
1037 data_offset = atoi_n(buf+entry_p, length_starting);
1038 entry_p += length_starting;
1039 i = data_offset + base_address;
1040 end_offset = i+data_length-1;
1042 if (data_length <= 0 || data_offset < 0)
1047 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1049 tag, entry_p0, data_length, data_offset);
1051 if (end_offset >= record_length)
1053 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1054 entry_p0, end_offset, record_length);
1058 if (memcmp (tag, "00", 2))
1059 identifier_flag = 1; /* if not 00X assume subfields */
1060 else if (indicator_length < 4 && indicator_length > 0)
1062 /* Danmarc 00X have subfields */
1063 if (buf[i + indicator_length] == ISO2709_IDFS)
1064 identifier_flag = 1;
1065 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1066 identifier_flag = 2;
1069 if (identifier_flag)
1072 i += identifier_flag-1;
1073 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1074 i += indicator_length;
1076 while (i < end_offset &&
1077 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1079 int code_offset = i+1;
1082 while (i < end_offset &&
1083 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1084 buf[i] != ISO2709_FS)
1086 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1093 while (i < end_offset &&
1094 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1096 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1100 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1103 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1105 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1109 return record_length;
1112 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1114 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1117 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1119 return -1; /* error */
1120 return r; /* OK, return length > 0 */
1123 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1124 char **result, int *rsize)
1128 wrbuf_rewind(mt->m_wr);
1129 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1131 *result = wrbuf_buf(mt->m_wr);
1133 *rsize = wrbuf_len(mt->m_wr);
1137 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1143 void yaz_marc_debug(yaz_marc_t mt, int level)
1149 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1154 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1156 struct yaz_marc_node *n;
1158 for (n = mt->nodes; n; n = n->next)
1159 if (n->which == YAZ_MARC_LEADER)
1161 leader = n->u.leader;
1162 memcpy(leader+off, str, strlen(str));
1168 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1170 yaz_marc_t mt = yaz_marc_create();
1175 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1176 yaz_marc_destroy(mt);
1181 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1183 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1187 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1189 yaz_marc_t mt = yaz_marc_create();
1193 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1197 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1198 yaz_marc_destroy(mt);
1203 int marc_display_ex (const char *buf, FILE *outf, int debug)
1205 return marc_display_exl (buf, outf, debug, -1);
1209 int marc_display (const char *buf, FILE *outf)
1211 return marc_display_ex (buf, outf, 0);
1217 * indent-tabs-mode: nil
1219 * vim: shiftwidth=4 tabstop=8 expandtab