1 /* $Id: marcread.c,v 1.24.2.3 2005-12-08 11:06:31 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
28 #include <yaz/yaz-util.h>
29 #include <yaz/marcdisp.h>
35 #define MARCOMP_DEBUG 0
37 static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
43 int identifier_length;
46 int length_data_entry;
48 int length_implementation;
53 data1_node *res_root, *res_top;
55 data1_marctab *marctab;
57 if ((*p->readf)(p->fh, buf, 5) != 5)
59 while (*buf < '0' || *buf > '9')
63 yaz_log(LOG_WARN, "MARC: Skipping bad byte %d (0x%02X)",
64 *buf & 0xff, *buf & 0xff);
68 if ((*p->readf)(p->fh, buf+4, 1) != 1)
71 record_length = atoi_n (buf, 5);
72 if (record_length < 25)
74 logf (LOG_WARN, "MARC record length < 25, is %d", record_length);
77 /* read remaining part - attempt to read one byte furhter... */
78 read_bytes = (*p->readf)(p->fh, buf+5, record_length-4);
79 if (read_bytes < record_length-5)
81 logf (LOG_WARN, "Couldn't read whole MARC record");
84 if (read_bytes == record_length - 4)
86 off_t cur_offset = (*p->tellf)(p->fh);
90 (*p->endf)(p->fh, cur_offset - 1);
93 res_root = data1_mk_root (p->dh, p->mem, absynName);
96 yaz_log (LOG_WARN, "cannot read MARC without an abstract syntax");
102 const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0};
104 res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root);
106 lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top);
107 data1_mk_text_n(p->dh, p->mem, buf, 24, lead);
110 res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root);
112 if ((marctab = res_root->u.root.absyn->marc))
114 memcpy(marctab->leader, buf, 24);
115 memcpy(marctab->implementation_codes, buf+6, 4);
116 marctab->implementation_codes[4] = '\0';
117 memcpy(marctab->user_systems, buf+17, 3);
118 marctab->user_systems[3] = '\0';
121 if (marctab && marctab->force_indicator_length >= 0)
122 indicator_length = marctab->force_indicator_length;
124 indicator_length = atoi_n (buf+10, 1);
125 if (marctab && marctab->force_identifier_length >= 0)
126 identifier_length = marctab->force_identifier_length;
128 identifier_length = atoi_n (buf+11, 1);
129 base_address = atoi_n (buf+12, 5);
131 length_data_entry = atoi_n (buf+20, 1);
132 length_starting = atoi_n (buf+21, 1);
133 length_implementation = atoi_n (buf+22, 1);
135 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
137 int l = 3 + length_data_entry + length_starting;
138 if (entry_p + l >= record_length)
140 yaz_log(LOG_WARN, "MARC: Directory offset %d: end of record.",
144 /* check for digits in length info */
146 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
150 /* not all digits, so stop directory scan */
151 yaz_log(LOG_LOG, "MARC: Bad directory");
154 entry_p += 3 + length_data_entry + length_starting;
156 end_of_directory = entry_p;
157 if (base_address != entry_p+1)
159 yaz_log(LOG_WARN, "MARC: Base address does not follow directory");
161 for (entry_p = 24; entry_p != end_of_directory; )
169 data1_node *parent = res_top;
171 memcpy (tag, buf+entry_p, 3);
178 res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent);
181 fprintf (outf, "%s ", tag);
183 data_length = atoi_n (buf+entry_p, length_data_entry);
184 entry_p += length_data_entry;
185 data_offset = atoi_n (buf+entry_p, length_starting);
186 entry_p += length_starting;
187 i = data_offset + base_address;
188 end_offset = i+data_length-1;
190 if (data_length <= 0 || data_offset < 0 || end_offset >= record_length)
192 yaz_log(LOG_WARN, "MARC: Bad offsets in data. Skipping rest");
196 if (memcmp (tag, "00", 2) && indicator_length)
198 /* generate indicator node */
201 const char *attr[10];
208 res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res);
210 for (j = 0; j<indicator_length; j++)
212 char str1[18], str2[2];
213 sprintf (str1, "ind%d", j+1);
220 data1_tag_add_attr (p->dh, p->mem, res, attr);
228 res = data1_mk_tag_n (p->dh, p->mem,
229 buf+i, indicator_length, 0 /* attr */, res);
231 for (j = 0; j<indicator_length; j++)
232 fprintf (outf, "%c", buf[j+i]);
235 i += indicator_length;
241 const char *attr[10];
247 res = data1_mk_tag(p->dh, p->mem, "controlfield", attr, res);
251 /* traverse sub fields */
253 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
255 if (memcmp (tag, "00", 2) && identifier_length)
264 for (j = 1; j<identifier_length && j < 9; j++)
265 code[j-1] = buf[i+j];
270 res = data1_mk_tag(p->dh, p->mem, "subfield",
275 res = data1_mk_tag_n (p->dh, p->mem,
276 buf+i+1, identifier_length-1,
277 0 /* attr */, parent);
280 fprintf (outf, " $");
281 for (j = 1; j<identifier_length; j++)
282 fprintf (outf, "%c", buf[j+i]);
285 i += identifier_length;
287 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
288 buf[i] != ISO2709_FS && i < end_offset)
291 fprintf (outf, "%c", buf[i]);
295 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
301 fprintf (outf, "%c", buf[i]);
308 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, parent);
311 fprintf (outf, "\n");
313 fprintf (outf, "-- separator but not at end of field\n");
314 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
315 fprintf (outf, "-- no separator at end of field\n");
322 * Locate some data under this node. This routine should handle variants
325 static char *get_data(data1_node *n, int *len)
331 if (n->which == DATA1N_data)
333 *len = n->u.data.len;
335 /** Fixme: not delete leader/final whitespaces
336 ** in MARC field/subfield. It fixed in
337 ** data1/d1_marc.c too.
339 for (i = 0; i<*len; i++)
340 if (!d1_isspace(n->u.data.data[i]))
342 while (*len && d1_isspace(n->u.data.data[*len - 1]))
346 return n->u.data.data + i;
349 return n->u.data.data;
351 if (n->which == DATA1N_tag)
353 else if (n->which == DATA1N_data)
363 static data1_node *lookup_subfield(data1_node *node, const char *name)
367 for (p=node; p; p=p->next)
369 if (!yaz_matchstr(p->u.tag.tag, name))
375 static inline_subfield *lookup_inline_subfield(inline_subfield *pisf,
380 for (p=pisf; p; p=p->next)
382 if (!yaz_matchstr(p->name, name))
388 static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf,
389 inline_subfield *pisf)
393 for (p = psf; p && pisf; p = p->next)
395 if (p->which == MC_SF)
397 inline_subfield *found = lookup_inline_subfield(pisf, p->name);
401 if (strcmp(p->prefix, "_"))
403 wrbuf_puts(buf, " ");
404 wrbuf_puts(buf, p->prefix);
406 if (p->interval.start == -1)
408 wrbuf_puts(buf, found->data);
412 wrbuf_write(buf, found->data+p->interval.start,
413 p->interval.end-p->interval.start+1);
416 if (strcmp(p->suffix, "_"))
418 wrbuf_puts(buf, p->suffix);
419 wrbuf_puts(buf, " ");
422 logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
427 else if (p->which == MC_SFVARIANT)
429 inline_subfield *next;
432 next = cat_inline_subfield(p->u.child, buf, pisf);
438 else if (p->which == MC_SFGROUP)
443 for (pp = p->u.child, found = 0; pp; pp = pp->next)
445 if (!yaz_matchstr(pisf->name, p->name))
453 wrbuf_puts(buf, " (");
454 pisf = cat_inline_subfield(p->u.child, buf, pisf);
455 wrbuf_puts(buf, ") ");
462 static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield)
464 if (!pf || !subfield)
470 inline_field *pif=NULL;
473 if (yaz_matchstr(subfield->u.tag.tag, "1"))
475 subfield = subfield->next;
480 pif = inline_mk_field();
484 if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0)
486 logf(LOG_WARN, "inline subfield ($%s): parse error",
488 inline_destroy_field(pif);
492 } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1"));
496 if (pif && !yaz_matchstr(pif->name, pf->name))
498 if (!pf->list && pif->list)
500 wrbuf_puts(buf, pif->list->data);
510 ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0];
511 ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0];
513 if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
514 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])))
516 cat_inline_subfield(pf->list, buf, pif->list);
519 add separator for inline fields
523 wrbuf_puts(buf, "\n");
528 logf(LOG_WARN, "In-line field %s missed -- indicators do not match", pif->name);
532 inline_destroy_field(pif);
535 logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf->buf);
539 static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf,
540 data1_node *subfield)
544 for (p = psf; p && subfield; p = p->next)
546 if (p->which == MC_SF)
548 data1_node *found = lookup_subfield(subfield, p->name);
554 if (strcmp(p->prefix, "_"))
556 wrbuf_puts(buf, " ");
557 wrbuf_puts(buf, p->prefix);
562 cat_inline_field(p->u.in_line, buf, found);
564 else if (p->interval.start == -1)
566 wrbuf_puts(buf, get_data(found, &len));
570 wrbuf_write(buf, get_data(found, &len)+p->interval.start,
571 p->interval.end-p->interval.start+1);
574 if (strcmp(p->suffix, "_"))
576 wrbuf_puts(buf, p->suffix);
577 wrbuf_puts(buf, " ");
580 logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
582 subfield = found->next;
585 else if (p->which == MC_SFVARIANT)
589 next = cat_subfield(p->u.child, buf, subfield);
590 if (next == subfield)
595 else if (p->which == MC_SFGROUP)
600 for (pp = p->u.child, found = 0; pp; pp = pp->next)
602 if (!yaz_matchstr(subfield->u.tag.tag, pp->name))
610 wrbuf_puts(buf, " (");
611 subfield = cat_subfield(p->u.child, buf, subfield);
612 wrbuf_puts(buf, ") ");
619 static data1_node *cat_field(struct grs_read_info *p, mc_field *pf,
620 WRBUF buf, data1_node *field)
622 data1_node *subfield;
629 if (yaz_matchstr(field->u.tag.tag, pf->name))
632 subfield = field->child;
638 check subfield without indicators
641 if (!pf->list && subfield->which == DATA1N_data)
645 if (pf->interval.start == -1)
647 wrbuf_puts(buf, get_data(field, &len));
651 wrbuf_write(buf, get_data(field, &len)+pf->interval.start,
652 pf->interval.end-pf->interval.start+1);
656 logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf);
665 ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0];
666 ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1];
669 ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
670 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))
674 logf(LOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag);
679 subfield = subfield->child;
684 cat_subfield(pf->list, buf, subfield);
687 logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf);
693 static int is_empty(char *s)
705 static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt,
708 data1_marctab *marctab = root->u.root.absyn->marc;
709 data1_node *top = root->child;
715 c = mc_mk_context(mc_stmnt+3);
724 mc_destroy_context(c);
729 logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
731 if (!yaz_matchstr(pf->name, "ldr"))
735 logf(LOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions",
736 pf->interval.start, pf->interval.end);
738 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
739 data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start,
740 pf->interval.end-pf->interval.start+1, new);
748 if (!yaz_matchstr(field->u.tag.tag, pf->name))
753 logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
758 field = cat_field(p, pf, buf, field);
761 for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n"))
765 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
766 data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new);
776 mc_destroy_field(pf);
777 mc_destroy_context(c);
781 data1_node *grs_read_marcxml(struct grs_read_info *p)
783 data1_node *root = grs_read_iso2709(p, 1);
789 for (e=root->u.root.absyn->main_elements; e; e=e->next)
791 data1_tag *tag = e->tag;
793 if (tag && tag->which == DATA1T_string &&
794 !yaz_matchstr(tag->value.string, "mc?"))
795 parse_data1_tree(p, tag->value.string, root);
800 data1_node *grs_read_marc(struct grs_read_info *p)
802 data1_node *root = grs_read_iso2709(p, 0);
808 for (e=root->u.root.absyn->main_elements; e; e=e->next)
810 data1_tag *tag = e->tag;
812 if (tag && tag->which == DATA1T_string &&
813 !yaz_matchstr(tag->value.string, "mc?"))
814 parse_data1_tree(p, tag->value.string, root);
819 static void *grs_init_marc(void)
824 static void grs_destroy_marc(void *clientData)
828 static struct recTypeGrs marc_type = {
835 RecTypeGrs recTypeGrs_marc = &marc_type;
837 static struct recTypeGrs marcxml_type = {
844 RecTypeGrs recTypeGrs_marcxml = &marcxml_type;