X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fmarcread.c;h=42096f0040cc3728cb5de7f4fe648f21c940b5ec;hb=668b136879471079ce03b26981a5fd1b3a210bc1;hp=86993736649018f1fef5ae29894bae1a9c99299e;hpb=df9a3a9811db8c6ad09feeb94d66b429d98d8fde;p=idzebra-moved-to-github.git diff --git a/recctrl/marcread.c b/recctrl/marcread.c index 8699373..42096f0 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,5 +1,5 @@ -/* $Id: marcread.c,v 1.23 2003-12-10 23:30:15 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 +/* $Id: marcread.c,v 1.24.2.4 2006-08-14 10:39:16 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include @@ -42,6 +42,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) int indicator_length; int identifier_length; int base_address; + int end_of_directory; int length_data_entry; int length_starting; int length_implementation; @@ -55,6 +56,18 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) if ((*p->readf)(p->fh, buf, 5) != 5) return NULL; + while (*buf < '0' || *buf > '9') + { + int i; + + yaz_log(LOG_WARN, "MARC: Skipping bad byte %d (0x%02X)", + *buf & 0xff, *buf & 0xff); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + + if ((*p->readf)(p->fh, buf+4, 1) != 1) + return NULL; + } record_length = atoi_n (buf, 5); if (record_length < 25) { @@ -120,9 +133,32 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) length_implementation = atoi_n (buf+22, 1); for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) - entry_p += 3+length_data_entry+length_starting; - base_address = entry_p+1; - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + { + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + yaz_log(LOG_WARN, "MARC: Directory offset %d: end of record.", + entry_p); + return 0; + } + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + yaz_log(LOG_LOG, "MARC: Bad directory"); + break; + } + entry_p += 3 + length_data_entry + length_starting; + } + end_of_directory = entry_p; + if (base_address != entry_p+1) + { + yaz_log(LOG_WARN, "MARC: Base address does not follow directory"); + } + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; @@ -151,6 +187,12 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) i = data_offset + base_address; end_offset = i+data_length-1; + if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) + { + yaz_log(LOG_WARN, "MARC: Bad offsets in data. Skipping rest"); + break; + } + if (memcmp (tag, "00", 2) && indicator_length) { /* generate indicator node */ @@ -275,6 +317,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) } return res_root; } + /* * Locate some data under this node. This routine should handle variants * prettily. @@ -287,9 +330,12 @@ static char *get_data(data1_node *n, int *len) { if (n->which == DATA1N_data) { - int i; *len = n->u.data.len; - + + /** Fixme: not delete leader/final whitespaces + ** in MARC field/subfield. It fixed in + ** data1/d1_marc.c too. + for (i = 0; i<*len; i++) if (!d1_isspace(n->u.data.data[i])) break; @@ -298,6 +344,9 @@ static char *get_data(data1_node *n, int *len) *len = *len - i; if (*len > 0) return n->u.data.data + i; + **/ + if (*len > 0) + return n->u.data.data; } if (n->which == DATA1N_tag) n = n->child; @@ -310,6 +359,7 @@ static char *get_data(data1_node *n, int *len) *len = strlen(r); return r; } + static data1_node *lookup_subfield(data1_node *node, const char *name) { data1_node *p; @@ -321,7 +371,9 @@ static data1_node *lookup_subfield(data1_node *node, const char *name) } return 0; } -static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char *name) + +static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, + const char *name) { inline_subfield *p; @@ -332,7 +384,9 @@ static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char } return 0; } -static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_subfield *pisf) + +static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf, + inline_subfield *pisf) { mc_subfield *p; @@ -346,20 +400,23 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_ { if (strcmp(p->prefix, "_")) { - strcat(strcat(buf, " "), p->prefix); + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); } if (p->interval.start == -1) { - strcat(buf, found->data); + wrbuf_puts(buf, found->data); } else { - strncat(buf, found->data+p->interval.start, - p->interval.end-p->interval.start+1); + wrbuf_write(buf, found->data+p->interval.start, + p->interval.end-p->interval.start+1); + wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) { - strcat(strcat(buf, p->suffix), " "); + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); @@ -393,15 +450,16 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_ } if (found) { - strcat(buf, " ("); + wrbuf_puts(buf, " ("); pisf = cat_inline_subfield(p->u.child, buf, pisf); - strcat(buf, ") "); + wrbuf_puts(buf, ") "); } } } return pisf; } -static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) + +static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) { if (!pf || !subfield) return; @@ -439,7 +497,7 @@ static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) { if (!pf->list && pif->list) { - strcat(buf, pif->list->data); + wrbuf_puts(buf, pif->list->data); } else { @@ -460,24 +518,26 @@ static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) /* add separator for inline fields */ - if (strlen(buf)) + if (wrbuf_len(buf)) { - strcat(buf, "\n"); + wrbuf_puts(buf, "\n"); } } else { - logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name); + logf(LOG_WARN, "In-line field %s missed -- indicators do not match", pif->name); } } } inline_destroy_field(pif); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf->buf); #endif } -static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield) + +static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf, + data1_node *subfield) { mc_subfield *p; @@ -493,7 +553,8 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel if (strcmp(p->prefix, "_")) { - strcat(strcat(buf, " "), p->prefix); + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); } if (p->u.in_line) @@ -502,16 +563,18 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel } else if (p->interval.start == -1) { - strcat(buf, get_data(found, &len)); + wrbuf_puts(buf, get_data(found, &len)); } else { - strncat(buf, get_data(found, &len)+p->interval.start, + wrbuf_write(buf, get_data(found, &len)+p->interval.start, p->interval.end-p->interval.start+1); + wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) { - strcat(strcat(buf, p->suffix), " "); + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); @@ -544,15 +607,17 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel } if (found) { - strcat(buf, " ("); + wrbuf_puts(buf, " ("); subfield = cat_subfield(p->u.child, buf, subfield); - strcat(buf, ") "); + wrbuf_puts(buf, ") "); } } } return subfield; } -static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field) + +static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, + WRBUF buf, data1_node *field) { data1_node *subfield; int ind1, ind2; @@ -579,15 +644,16 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d if (pf->interval.start == -1) { - strcat(buf, get_data(field, &len)); + wrbuf_puts(buf, get_data(field, &len)); } else { - strncat(buf, get_data(field, &len)+pf->interval.start, - pf->interval.end-pf->interval.start+1); + wrbuf_write(buf, get_data(field, &len)+pf->interval.start, + pf->interval.end-pf->interval.start+1); + wrbuf_puts(buf, ""); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf); #endif return field->next; } @@ -618,11 +684,12 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d cat_subfield(pf->list, buf, subfield); #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf); #endif return field->next; } + static int is_empty(char *s) { char *p = s; @@ -634,14 +701,16 @@ static int is_empty(char *s) } return 1; } -static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root) + +static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, + data1_node *root) { data1_marctab *marctab = root->u.root.absyn->marc; data1_node *top = root->child; data1_node *field; mc_context *c; mc_field *pf; - char buf[1000000]; + WRBUF buf; c = mc_mk_context(mc_stmnt+3); @@ -655,6 +724,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data mc_destroy_context(c); return; } + buf = wrbuf_alloc(); #if MARCOMP_DEBUG logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); #endif @@ -678,13 +748,16 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data if (!yaz_matchstr(field->u.tag.tag, pf->name)) { data1_node *new; - char *pb = buf; + char *pb; #if MARCOMP_DEBUG logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); #endif - *buf = '\0'; + wrbuf_rewind(buf); + wrbuf_puts(buf, ""); + field = cat_field(p, pf, buf, field); + pb = wrbuf_buf(buf); for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n")) { if (!is_empty(pb)) @@ -702,6 +775,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data } mc_destroy_field(pf); mc_destroy_context(c); + wrbuf_free(buf, 1); } data1_node *grs_read_marcxml(struct grs_read_info *p) @@ -723,7 +797,6 @@ data1_node *grs_read_marcxml(struct grs_read_info *p) return root; } - data1_node *grs_read_marc(struct grs_read_info *p) { data1_node *root = grs_read_iso2709(p, 0); @@ -742,6 +815,7 @@ data1_node *grs_read_marc(struct grs_read_info *p) } return root; } + static void *grs_init_marc(void) { return 0;