2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.25 2006-01-26 15:37:05 adam Exp $
10 * \brief Implements MARC display - and conversion utilities
20 #include <yaz/marcdisp.h>
21 #include <yaz/wrbuf.h>
22 #include <yaz/yaz-util.h>
33 yaz_marc_t yaz_marc_create(void)
35 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
36 mt->xml = YAZ_MARC_LINE;
38 mt->m_wr = wrbuf_alloc();
40 strcpy(mt->subfield_str, " $");
41 strcpy(mt->endline_str, "\n");
45 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
47 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
48 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
51 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
53 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
54 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
57 void yaz_marc_destroy(yaz_marc_t mt)
61 wrbuf_free (mt->m_wr, 1);
65 static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
67 if (mt->xml == YAZ_MARC_ISO2709)
68 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
69 else if (mt->xml == YAZ_MARC_LINE)
70 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
72 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
75 /* try to guess how many bytes the identifier really is! */
76 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
84 size_t outbytesleft = sizeof(outbuf);
86 const char *inp = buf;
88 size_t inbytesleft = i;
89 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
90 &outp, &outbytesleft);
91 if (r != (size_t) (-1))
92 return i; /* got a complete sequence */
94 return 1; /* giving up */
96 return 1; /* we don't know */
99 static int atoi_n_check(const char *buf, int size, int *val)
101 if (!isdigit(*(const unsigned char *) buf))
103 *val = atoi_n(buf, size);
107 int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
111 int indicator_length;
112 int identifier_length;
113 int end_of_directory;
115 int length_data_entry;
117 int length_implementation;
119 int produce_warnings = 0;
122 produce_warnings = 1;
123 if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC
124 || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE)
125 produce_warnings = 1;
127 record_length = atoi_n (buf, 5);
128 if (record_length < 25)
131 wrbuf_printf(wr, "<!-- Record length %d - aborting -->\n",
135 memcpy(lead, buf, 24); /* se can modify the header for output */
137 /* ballout if bsize is known and record_length is less than that */
138 if (bsize != -1 && record_length > bsize)
140 if (!atoi_n_check(buf+10, 1, &indicator_length))
142 if (produce_warnings)
143 wrbuf_printf(wr, "<!-- Indicator length at offset 10 should hold a digit. Assuming 2 -->\n");
145 indicator_length = 2;
147 if (!atoi_n_check(buf+11, 1, &identifier_length))
149 if (produce_warnings)
150 wrbuf_printf(wr, "<!-- Identifier length at offset 11 should hold a digit. Assuming 2 -->\n");
152 identifier_length = 2;
154 if (!atoi_n_check(buf+12, 5, &base_address))
156 if (produce_warnings)
157 wrbuf_printf(wr, "<!-- Base address at offsets 12..16 should hold a number. Assuming 0 -->\n");
160 if (!atoi_n_check(buf+20, 1, &length_data_entry))
162 if (produce_warnings)
163 wrbuf_printf(wr, "<!-- Length data entry at offset 20 should hold a digit. Assuming 4 -->\n");
164 length_data_entry = 4;
167 if (!atoi_n_check(buf+21, 1, &length_starting))
169 if (produce_warnings)
170 wrbuf_printf(wr, "<!-- Length starting at offset 21 should hold a digit. Assuming 5 -->\n");
174 if (!atoi_n_check(buf+22, 1, &length_implementation))
176 if (produce_warnings)
177 wrbuf_printf(wr, "<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->\n");
178 length_implementation = 0;
182 if (mt->xml != YAZ_MARC_LINE)
188 case YAZ_MARC_ISO2709:
190 case YAZ_MARC_SIMPLEXML:
191 wrbuf_puts (wr, "<iso2709\n");
192 sprintf (str, " RecordStatus=\"%c\"\n", buf[5]);
193 wrbuf_puts (wr, str);
194 sprintf (str, " TypeOfRecord=\"%c\"\n", buf[6]);
195 wrbuf_puts (wr, str);
196 for (i = 1; i<=19; i++)
198 sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]);
199 wrbuf_puts (wr, str);
201 wrbuf_puts (wr, ">\n");
203 case YAZ_MARC_OAIMARC:
206 "<oai_marc xmlns=\"http://www.openarchives.org/OIA/oai_marc\""
208 " xmlns:xsi=\"http://www.w3.org/2000/10/XMLSchema-instance\""
210 " xsi:schemaLocation=\"http://www.openarchives.org/OAI/oai_marc.xsd\""
214 sprintf (str, " status=\"%c\" type=\"%c\" catForm=\"%c\">\n",
215 buf[5], buf[6], buf[7]);
216 wrbuf_puts (wr, str);
218 case YAZ_MARC_MARCXML:
221 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
223 lead[9] = 'a'; /* set leader to signal unicode */
224 marc_cdata(mt, lead, 24, wr);
225 wrbuf_printf(wr, "</leader>\n");
227 case YAZ_MARC_XCHANGE:
230 "<record xmlns=\"http://www.bs.dk/standards/MarcXchange\">\n"
232 marc_cdata(mt, lead, 24, wr);
233 wrbuf_printf(wr, "</leader>\n");
241 wrbuf_puts (wr, "<!--\n");
242 sprintf (str, "Record length %5d\n", record_length);
243 wrbuf_puts (wr, str);
244 sprintf (str, "Indicator length %5d\n", indicator_length);
245 wrbuf_puts (wr, str);
246 sprintf (str, "Identifier length %5d\n", identifier_length);
247 wrbuf_puts (wr, str);
248 sprintf (str, "Base address %5d\n", base_address);
249 wrbuf_puts (wr, str);
250 sprintf (str, "Length data entry %5d\n", length_data_entry);
251 wrbuf_puts (wr, str);
252 sprintf (str, "Length starting %5d\n", length_starting);
253 wrbuf_puts (wr, str);
254 sprintf (str, "Length implementation %5d\n", length_implementation);
255 wrbuf_puts (wr, str);
256 wrbuf_puts (wr, "-->\n");
259 /* first pass. determine length of directory & base of data */
260 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
262 /* length of directory entry */
263 int l = 3 + length_data_entry + length_starting;
264 if (entry_p + l >= record_length)
266 wrbuf_printf (wr, "<!-- Directory offset %d: end of record. "
267 "Missing FS char -->\n", entry_p);
271 wrbuf_printf (wr, "<!-- Directory offset %d: Tag %.3s -->\n",
272 entry_p, buf+entry_p);
273 /* check for digits in length info */
275 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
279 /* not all digits, so stop directory scan */
280 wrbuf_printf (wr, "<!-- Directory offset %d: Bad data for data "
281 "length and/or length starting -->\n", entry_p);
284 entry_p += 3 + length_data_entry + length_starting;
286 end_of_directory = entry_p;
287 if (base_address != entry_p+1)
289 if (produce_warnings)
290 wrbuf_printf (wr,"<!-- Base address not at end of directory, "
291 "base %d, end %d -->\n", base_address, entry_p+1);
293 if (mt->xml == YAZ_MARC_ISO2709)
295 WRBUF wr_head = wrbuf_alloc();
296 WRBUF wr_dir = wrbuf_alloc();
297 WRBUF wr_tmp = wrbuf_alloc();
300 /* second pass. create directory for ISO2709 output */
301 for (entry_p = 24; entry_p != end_of_directory; )
303 int data_length, data_offset, end_offset;
306 wrbuf_write(wr_dir, buf+entry_p, 3);
309 data_length = atoi_n (buf+entry_p, length_data_entry);
310 entry_p += length_data_entry;
311 data_offset = atoi_n (buf+entry_p, length_starting);
312 entry_p += length_starting;
313 i = data_offset + base_address;
314 end_offset = i+data_length-1;
316 if (data_length <= 0 || data_offset < 0 || end_offset >= record_length)
319 while (i < end_offset &&
320 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
322 sz1 = 1+i - (data_offset + base_address);
325 sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd,
326 buf + data_offset+base_address, sz1);
327 wrbuf_rewind(wr_tmp);
331 wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2);
332 wrbuf_printf(wr_dir, "%0*d", length_starting, data_p);
335 wrbuf_putc(wr_dir, ISO2709_FS);
336 wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
337 wrbuf_write(wr_head, lead+5, 7);
338 wrbuf_printf(wr_head, "%05d", base_address);
339 wrbuf_write(wr_head, lead+17, 7);
341 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
342 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
343 wrbuf_free(wr_head, 1);
344 wrbuf_free(wr_dir, 1);
345 wrbuf_free(wr_tmp, 1);
347 /* third pass. create data output */
348 for (entry_p = 24; entry_p != end_of_directory; )
355 int identifier_flag = 0;
356 int entry_p0 = entry_p;
358 memcpy (tag, buf+entry_p, 3);
361 data_length = atoi_n (buf+entry_p, length_data_entry);
362 entry_p += length_data_entry;
363 data_offset = atoi_n (buf+entry_p, length_starting);
364 entry_p += length_starting;
365 i = data_offset + base_address;
366 end_offset = i+data_length-1;
368 if (data_length <= 0 || data_offset < 0)
373 wrbuf_printf(wr, "<!-- Directory offset %d: data-length %d, "
374 "data-offset %d -->\n",
375 entry_p0, data_length, data_offset);
377 if (end_offset >= record_length)
379 wrbuf_printf (wr,"<!-- Directory offset %d: Data out of bounds "
381 entry_p0, end_offset, record_length);
385 if (memcmp (tag, "00", 2))
386 identifier_flag = 1; /* if not 00X assume subfields */
387 else if (indicator_length < 4 && indicator_length > 0)
389 /* Danmarc 00X have subfields */
390 if (buf[i + indicator_length] == ISO2709_IDFS)
392 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
398 wrbuf_printf(wr, "<!-- identifier_flag = %d -->\n",
405 wrbuf_puts (wr, tag);
406 wrbuf_puts (wr, " ");
408 case YAZ_MARC_SIMPLEXML:
409 wrbuf_printf (wr, "<field tag=\"");
410 marc_cdata(mt, tag, strlen(tag), wr);
411 wrbuf_printf(wr, "\"");
413 case YAZ_MARC_OAIMARC:
415 wrbuf_printf (wr, " <varfield id=\"");
417 wrbuf_printf (wr, " <fixfield id=\"");
418 marc_cdata(mt, tag, strlen(tag), wr);
419 wrbuf_printf(wr, "\"");
421 case YAZ_MARC_MARCXML:
422 case YAZ_MARC_XCHANGE:
424 wrbuf_printf (wr, " <datafield tag=\"");
426 wrbuf_printf (wr, " <controlfield tag=\"");
427 marc_cdata(mt, tag, strlen(tag), wr);
428 wrbuf_printf(wr, "\"");
433 i += identifier_flag-1;
434 for (j = 0; j<indicator_length; j++, i++)
438 case YAZ_MARC_ISO2709:
439 wrbuf_putc(wr, buf[i]);
442 wrbuf_putc(wr, buf[i]);
444 case YAZ_MARC_SIMPLEXML:
445 wrbuf_printf(wr, " Indicator%d=\"", j+1);
446 marc_cdata(mt, buf+i, 1, wr);
447 wrbuf_printf(wr, "\"");
449 case YAZ_MARC_OAIMARC:
450 wrbuf_printf(wr, " i%d=\"", j+1);
451 marc_cdata(mt, buf+i, 1, wr);
452 wrbuf_printf(wr, "\"");
454 case YAZ_MARC_MARCXML:
455 case YAZ_MARC_XCHANGE:
456 wrbuf_printf(wr, " ind%d=\"", j+1);
457 marc_cdata(mt, buf+i, 1, wr);
458 wrbuf_printf(wr, "\"");
462 if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
463 || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE)
465 wrbuf_puts (wr, ">");
467 wrbuf_puts (wr, "\n");
471 while (i < end_offset &&
472 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
476 int sb_octet_length = identifier_length-1;
477 if (identifier_length == 2)
478 sb_octet_length = cdata_one_character(mt, buf+i);
483 case YAZ_MARC_ISO2709:
485 wrbuf_iconv_write(wr, mt->iconv_cd,
486 buf+i, identifier_length);
487 i += identifier_length;
490 wrbuf_puts (wr, mt->subfield_str);
491 marc_cdata(mt, buf+i, sb_octet_length, wr);
492 i = i+sb_octet_length;
493 wrbuf_putc (wr, ' ');
495 case YAZ_MARC_SIMPLEXML:
496 wrbuf_puts (wr, " <subfield code=\"");
497 marc_cdata(mt, buf+i, sb_octet_length, wr);
498 i = i+sb_octet_length;
499 wrbuf_puts (wr, "\">");
501 case YAZ_MARC_OAIMARC:
502 wrbuf_puts (wr, " <subfield label=\"");
503 marc_cdata(mt, buf+i, sb_octet_length, wr);
504 i = i+sb_octet_length;
505 wrbuf_puts (wr, "\">");
507 case YAZ_MARC_MARCXML:
508 case YAZ_MARC_XCHANGE:
509 wrbuf_puts (wr, " <subfield code=\"");
510 marc_cdata(mt, buf+i, sb_octet_length, wr);
511 i = i+sb_octet_length;
512 wrbuf_puts (wr, "\">");
516 while (i < end_offset &&
517 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
518 buf[i] != ISO2709_FS)
520 marc_cdata(mt, buf + i0, i - i0, wr);
522 if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS)
523 marc_cdata(mt, buf + i, 1, wr);
525 if (mt->xml == YAZ_MARC_SIMPLEXML ||
526 mt->xml == YAZ_MARC_MARCXML ||
527 mt->xml == YAZ_MARC_XCHANGE ||
528 mt->xml == YAZ_MARC_OAIMARC)
529 wrbuf_puts (wr, "</subfield>\n");
535 while (i < end_offset &&
536 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
538 marc_cdata(mt, buf + i0, i - i0, wr);
539 if (mt->xml == YAZ_MARC_ISO2709)
540 marc_cdata(mt, buf + i, 1, wr);
542 if (mt->xml == YAZ_MARC_LINE)
543 wrbuf_puts (wr, mt->endline_str);
545 wrbuf_printf(wr, "<!-- separator but not at end of field length=%d-->\n", data_length);
546 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
547 wrbuf_printf(wr, "<!-- no separator at end of field length=%d-->\n", data_length);
550 case YAZ_MARC_SIMPLEXML:
551 wrbuf_puts (wr, "</field>\n");
553 case YAZ_MARC_OAIMARC:
555 wrbuf_puts (wr, "</varfield>\n");
557 wrbuf_puts (wr, "</fixfield>\n");
559 case YAZ_MARC_MARCXML:
560 case YAZ_MARC_XCHANGE:
562 wrbuf_puts (wr, " </datafield>\n");
564 wrbuf_puts (wr, "</controlfield>\n");
573 case YAZ_MARC_SIMPLEXML:
574 wrbuf_puts (wr, "</iso2709>\n");
576 case YAZ_MARC_OAIMARC:
577 wrbuf_puts (wr, "</oai_marc>\n");
579 case YAZ_MARC_MARCXML:
580 case YAZ_MARC_XCHANGE:
581 wrbuf_puts (wr, "</record>\n");
583 case YAZ_MARC_ISO2709:
584 wrbuf_putc (wr, ISO2709_RS);
587 return record_length;
590 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
591 char **result, int *rsize)
595 wrbuf_rewind(mt->m_wr);
596 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
598 *result = wrbuf_buf(mt->m_wr);
600 *rsize = wrbuf_len(mt->m_wr);
604 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
610 void yaz_marc_debug(yaz_marc_t mt, int level)
616 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
622 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
624 yaz_marc_t mt = yaz_marc_create();
629 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
630 yaz_marc_destroy(mt);
635 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
637 return yaz_marc_decode(buf, wr, debug, bsize, 0);
641 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
643 yaz_marc_t mt = yaz_marc_create();
647 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
651 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
652 yaz_marc_destroy(mt);
657 int marc_display_ex (const char *buf, FILE *outf, int debug)
659 return marc_display_exl (buf, outf, debug, -1);
663 int marc_display (const char *buf, FILE *outf)
665 return marc_display_ex (buf, outf, 0);
671 * indent-tabs-mode: nil
673 * vim: shiftwidth=4 tabstop=8 expandtab