X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fmarcread.c;h=42096f0040cc3728cb5de7f4fe648f21c940b5ec;hb=668b136879471079ce03b26981a5fd1b3a210bc1;hp=323e96f784abcc43b3f6df08059adff112619270;hpb=3985f957cc12ed8006b9b6aa024367c8596388d8;p=idzebra-moved-to-github.git diff --git a/recctrl/marcread.c b/recctrl/marcread.c index 323e96f..42096f0 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,4 +1,4 @@ -/* $Id: marcread.c,v 1.24 2004-06-16 22:12:30 adam Exp $ +/* $Id: marcread.c,v 1.24.2.4 2006-08-14 10:39:16 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include @@ -42,6 +42,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) int indicator_length; int identifier_length; int base_address; + int end_of_directory; int length_data_entry; int length_starting; int length_implementation; @@ -55,6 +56,18 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) if ((*p->readf)(p->fh, buf, 5) != 5) return NULL; + while (*buf < '0' || *buf > '9') + { + int i; + + yaz_log(LOG_WARN, "MARC: Skipping bad byte %d (0x%02X)", + *buf & 0xff, *buf & 0xff); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + + if ((*p->readf)(p->fh, buf+4, 1) != 1) + return NULL; + } record_length = atoi_n (buf, 5); if (record_length < 25) { @@ -120,9 +133,32 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) length_implementation = atoi_n (buf+22, 1); for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) - entry_p += 3+length_data_entry+length_starting; - base_address = entry_p+1; - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + { + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + yaz_log(LOG_WARN, "MARC: Directory offset %d: end of record.", + entry_p); + return 0; + } + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + yaz_log(LOG_LOG, "MARC: Bad directory"); + break; + } + entry_p += 3 + length_data_entry + length_starting; + } + end_of_directory = entry_p; + if (base_address != entry_p+1) + { + yaz_log(LOG_WARN, "MARC: Base address does not follow directory"); + } + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; @@ -151,6 +187,12 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) i = data_offset + base_address; end_offset = i+data_length-1; + if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) + { + yaz_log(LOG_WARN, "MARC: Bad offsets in data. Skipping rest"); + break; + } + if (memcmp (tag, "00", 2) && indicator_length) { /* generate indicator node */ @@ -288,9 +330,12 @@ static char *get_data(data1_node *n, int *len) { if (n->which == DATA1N_data) { - int i; *len = n->u.data.len; - + + /** Fixme: not delete leader/final whitespaces + ** in MARC field/subfield. It fixed in + ** data1/d1_marc.c too. + for (i = 0; i<*len; i++) if (!d1_isspace(n->u.data.data[i])) break; @@ -299,6 +344,9 @@ static char *get_data(data1_node *n, int *len) *len = *len - i; if (*len > 0) return n->u.data.data + i; + **/ + if (*len > 0) + return n->u.data.data; } if (n->which == DATA1N_tag) n = n->child; @@ -362,7 +410,7 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf, else { wrbuf_write(buf, found->data+p->interval.start, - p->interval.end-p->interval.start); + p->interval.end-p->interval.start+1); wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) @@ -484,7 +532,7 @@ static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) inline_destroy_field(pif); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf->buf); #endif } @@ -520,7 +568,7 @@ static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf, else { wrbuf_write(buf, get_data(found, &len)+p->interval.start, - p->interval.end-p->interval.start); + p->interval.end-p->interval.start+1); wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) @@ -601,11 +649,11 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, else { wrbuf_write(buf, get_data(field, &len)+pf->interval.start, - pf->interval.end-pf->interval.start); + pf->interval.end-pf->interval.start+1); wrbuf_puts(buf, ""); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf); #endif return field->next; } @@ -636,7 +684,7 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, cat_subfield(pf->list, buf, subfield); #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf->buf); #endif return field->next;