X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fmarcread.c;h=42096f0040cc3728cb5de7f4fe648f21c940b5ec;hb=668b136879471079ce03b26981a5fd1b3a210bc1;hp=f3214ff52574a66ba8699536800b3f033a5a4fe2;hpb=f578ebbcfe51125d91358a98a79ab8411f38933f;p=idzebra-moved-to-github.git diff --git a/recctrl/marcread.c b/recctrl/marcread.c index f3214ff..42096f0 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,5 +1,5 @@ -/* $Id: marcread.c,v 1.21 2003-08-21 10:29:00 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: marcread.c,v 1.24.2.4 2006-08-14 10:39:16 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include @@ -42,6 +42,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) int indicator_length; int identifier_length; int base_address; + int end_of_directory; int length_data_entry; int length_starting; int length_implementation; @@ -55,6 +56,18 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) if ((*p->readf)(p->fh, buf, 5) != 5) return NULL; + while (*buf < '0' || *buf > '9') + { + int i; + + yaz_log(LOG_WARN, "MARC: Skipping bad byte %d (0x%02X)", + *buf & 0xff, *buf & 0xff); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + + if ((*p->readf)(p->fh, buf+4, 1) != 1) + return NULL; + } record_length = atoi_n (buf, 5); if (record_length < 25) { @@ -113,16 +126,39 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) identifier_length = marctab->force_identifier_length; else identifier_length = atoi_n (buf+11, 1); - base_address = atoi_n (buf+12, 4); + base_address = atoi_n (buf+12, 5); length_data_entry = atoi_n (buf+20, 1); length_starting = atoi_n (buf+21, 1); length_implementation = atoi_n (buf+22, 1); for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) - entry_p += 3+length_data_entry+length_starting; - base_address = entry_p+1; - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + { + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + yaz_log(LOG_WARN, "MARC: Directory offset %d: end of record.", + entry_p); + return 0; + } + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + yaz_log(LOG_LOG, "MARC: Bad directory"); + break; + } + entry_p += 3 + length_data_entry + length_starting; + } + end_of_directory = entry_p; + if (base_address != entry_p+1) + { + yaz_log(LOG_WARN, "MARC: Base address does not follow directory"); + } + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; @@ -151,6 +187,12 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) i = data_offset + base_address; end_offset = i+data_length-1; + if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) + { + yaz_log(LOG_WARN, "MARC: Bad offsets in data. Skipping rest"); + break; + } + if (memcmp (tag, "00", 2) && indicator_length) { /* generate indicator node */ @@ -210,37 +252,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) i0 = i; while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) { - - if (!memcmp(tag, "4", 1) && (!yaz_matchstr(absynName, "UNIMARC")|| - !yaz_matchstr(absynName, "RUSMARC"))) - { - int go = 1; - data1_node *res = - data1_mk_tag_n (p->dh, p->mem, - buf+i+1, identifier_length-1, - 0 /* attr */, parent); - i += identifier_length; - i0 = i; - do { - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS && i < end_offset) - { - i++; - } - if (!memcmp(buf+i+1, "1", 1) && idh, p->mem, buf + i0, i - i0, res); - i0 = i; - } - else if (memcmp (tag, "00", 2) && identifier_length) + if (memcmp (tag, "00", 2) && identifier_length) { data1_node *res; if (marc_xml) @@ -305,6 +317,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) } return res_root; } + /* * Locate some data under this node. This routine should handle variants * prettily. @@ -317,9 +330,12 @@ static char *get_data(data1_node *n, int *len) { if (n->which == DATA1N_data) { - int i; *len = n->u.data.len; - + + /** Fixme: not delete leader/final whitespaces + ** in MARC field/subfield. It fixed in + ** data1/d1_marc.c too. + for (i = 0; i<*len; i++) if (!d1_isspace(n->u.data.data[i])) break; @@ -328,6 +344,9 @@ static char *get_data(data1_node *n, int *len) *len = *len - i; if (*len > 0) return n->u.data.data + i; + **/ + if (*len > 0) + return n->u.data.data; } if (n->which == DATA1N_tag) n = n->child; @@ -352,7 +371,9 @@ static data1_node *lookup_subfield(data1_node *node, const char *name) } return 0; } -static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char *name) + +static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, + const char *name) { inline_subfield *p; @@ -363,7 +384,9 @@ static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char } return 0; } -static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_subfield *pisf) + +static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf, + inline_subfield *pisf) { mc_subfield *p; @@ -377,20 +400,23 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_ { if (strcmp(p->prefix, "_")) { - strcat(strcat(buf, " "), p->prefix); + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); } if (p->interval.start == -1) { - strcat(buf, found->data); + wrbuf_puts(buf, found->data); } else { - strncat(buf, found->data+p->interval.start, - p->interval.end-p->interval.start+1); + wrbuf_write(buf, found->data+p->interval.start, + p->interval.end-p->interval.start+1); + wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) { - strcat(strcat(buf, p->suffix), " "); + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); @@ -424,30 +450,54 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_ } if (found) { - strcat(buf, " ("); + wrbuf_puts(buf, " ("); pisf = cat_inline_subfield(p->u.child, buf, pisf); - strcat(buf, ") "); + wrbuf_puts(buf, ") "); } } } return pisf; } -static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) -{ - + +static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) +{ if (!pf || !subfield) return; - for (;subfield; subfield = subfield->next) + for (;subfield;) { int len; - inline_field *pif = inline_parse(get_data(subfield,&len)); + inline_field *pif=NULL; + data1_node *psubf; + + if (yaz_matchstr(subfield->u.tag.tag, "1")) + { + subfield = subfield->next; + continue; + } + + psubf = subfield; + pif = inline_mk_field(); + do + { + int i; + if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0) + { + logf(LOG_WARN, "inline subfield ($%s): parse error", + psubf->u.tag.tag); + inline_destroy_field(pif); + return; + } + psubf = psubf->next; + } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1")); + + subfield = psubf; if (pif && !yaz_matchstr(pif->name, pf->name)) { if (!pf->list && pif->list) { - strcat(buf, pif->list->data); + wrbuf_puts(buf, pif->list->data); } else { @@ -468,24 +518,26 @@ static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) /* add separator for inline fields */ - if (strlen(buf)) + if (wrbuf_len(buf)) { - strcat(buf, "\n"); + wrbuf_puts(buf, "\n"); } } else { - logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name); + logf(LOG_WARN, "In-line field %s missed -- indicators do not match", pif->name); } } } inline_destroy_field(pif); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf->buf); #endif } -static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield) + +static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf, + data1_node *subfield) { mc_subfield *p; @@ -501,7 +553,8 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel if (strcmp(p->prefix, "_")) { - strcat(strcat(buf, " "), p->prefix); + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); } if (p->u.in_line) @@ -510,16 +563,18 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel } else if (p->interval.start == -1) { - strcat(buf, get_data(found, &len)); + wrbuf_puts(buf, get_data(found, &len)); } else { - strncat(buf, get_data(found, &len)+p->interval.start, + wrbuf_write(buf, get_data(found, &len)+p->interval.start, p->interval.end-p->interval.start+1); + wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) { - strcat(strcat(buf, p->suffix), " "); + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); @@ -552,15 +607,17 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel } if (found) { - strcat(buf, " ("); + wrbuf_puts(buf, " ("); subfield = cat_subfield(p->u.child, buf, subfield); - strcat(buf, ") "); + wrbuf_puts(buf, ") "); } } } return subfield; } -static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field) + +static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, + WRBUF buf, data1_node *field) { data1_node *subfield; int ind1, ind2; @@ -587,15 +644,16 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d if (pf->interval.start == -1) { - strcat(buf, get_data(field, &len)); + wrbuf_puts(buf, get_data(field, &len)); } else { - strncat(buf, get_data(field, &len)+pf->interval.start, - pf->interval.end-pf->interval.start+1); + wrbuf_write(buf, get_data(field, &len)+pf->interval.start, + pf->interval.end-pf->interval.start+1); + wrbuf_puts(buf, ""); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf); #endif return field->next; } @@ -626,11 +684,12 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d cat_subfield(pf->list, buf, subfield); #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf); #endif return field->next; } + static int is_empty(char *s) { char *p = s; @@ -642,14 +701,16 @@ static int is_empty(char *s) } return 1; } -static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root) + +static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, + data1_node *root) { data1_marctab *marctab = root->u.root.absyn->marc; data1_node *top = root->child; data1_node *field; mc_context *c; mc_field *pf; - char buf[1000000]; + WRBUF buf; c = mc_mk_context(mc_stmnt+3); @@ -663,6 +724,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data mc_destroy_context(c); return; } + buf = wrbuf_alloc(); #if MARCOMP_DEBUG logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); #endif @@ -686,13 +748,16 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data if (!yaz_matchstr(field->u.tag.tag, pf->name)) { data1_node *new; - char *pb = buf; + char *pb; #if MARCOMP_DEBUG logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); #endif - *buf = '\0'; + wrbuf_rewind(buf); + wrbuf_puts(buf, ""); + field = cat_field(p, pf, buf, field); + pb = wrbuf_buf(buf); for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n")) { if (!is_empty(pb)) @@ -710,6 +775,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data } mc_destroy_field(pf); mc_destroy_context(c); + wrbuf_free(buf, 1); } data1_node *grs_read_marcxml(struct grs_read_info *p) @@ -731,7 +797,6 @@ data1_node *grs_read_marcxml(struct grs_read_info *p) return root; } - data1_node *grs_read_marc(struct grs_read_info *p) { data1_node *root = grs_read_iso2709(p, 0); @@ -750,6 +815,7 @@ data1_node *grs_read_marc(struct grs_read_info *p) } return root; } + static void *grs_init_marc(void) { return 0;