* Copyright (C) 1995-2005, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: marcdisp.c,v 1.14 2005-02-02 23:07:56 adam Exp $
+ * $Id: marcdisp.c,v 1.21 2005-04-20 13:17:51 adam Exp $
*/
/**
wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
}
+static int atoi_n_check(const char *buf, int size, int *val)
+{
+ if (!isdigit(*(const unsigned char *) buf))
+ return 0;
+ *val = atoi_n(buf, size);
+ return 1;
+}
+
int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
{
int entry_p;
int record_length;
int indicator_length;
int identifier_length;
+ int end_of_directory;
int base_address;
int length_data_entry;
int length_starting;
int length_implementation;
+ char lead[24];
+ int produce_warnings = 0;
- wrbuf_rewind(wr);
+ if (mt->debug)
+ produce_warnings = 1;
+ if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC
+ || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE)
+ produce_warnings = 1;
record_length = atoi_n (buf, 5);
if (record_length < 25)
{
if (mt->debug)
- {
- char str[40];
-
- sprintf (str, "Record length %d - aborting\n", record_length);
- wrbuf_puts (wr, str);
- }
+ wrbuf_printf(wr, "<!-- Record length %d - aborting -->\n",
+ record_length);
return -1;
}
+ memcpy(lead, buf, 24); /* se can modify the header for output */
+
/* ballout if bsize is known and record_length is less than that */
if (bsize != -1 && record_length > bsize)
return -1;
- if (isdigit(((const unsigned char *) buf)[10]))
- indicator_length = atoi_n (buf+10, 1);
- else
- indicator_length = 2;
- if (isdigit(((const unsigned char *) buf)[11]))
- identifier_length = atoi_n (buf+11, 1);
- else
+ if (!atoi_n_check(buf+10, 1, &indicator_length))
+ {
+ if (produce_warnings)
+ wrbuf_printf(wr, "<!-- Indicator length at offset 10 should hold a digit. Assuming 2 -->\n");
+ lead[10] = '2';
+ indicator_length = 2;
+ }
+ if (!atoi_n_check(buf+11, 1, &identifier_length))
+ {
+ if (produce_warnings)
+ wrbuf_printf(wr, "<!-- Identifier length at offset 11 should hold a digit. Assuming 2 -->\n");
+ lead[11] = '2';
identifier_length = 2;
- base_address = atoi_n (buf+12, 5);
-
- length_data_entry = atoi_n (buf+20, 1);
- if (buf[20] <= '0' || buf[20] >= '9')
+ }
+ if (!atoi_n_check(buf+12, 5, &base_address))
+ {
+ if (produce_warnings)
+ wrbuf_printf(wr, "<!-- Base address at offsets 12..16 should hold a number. Assuming 0 -->\n");
+ base_address = 0;
+ }
+ if (!atoi_n_check(buf+20, 1, &length_data_entry))
+ {
+ if (produce_warnings)
+ wrbuf_printf(wr, "<!-- Length data entry at offset 20 should hold a digit. Assuming 4 -->\n");
+ length_data_entry = 4;
+ lead[20] = '4';
+ }
+ if (!atoi_n_check(buf+21, 1, &length_starting))
{
- wrbuf_printf(wr, "<!-- Length data entry should hold a digit. Assuming 4 -->\n");
- length_data_entry = 4;
+ if (produce_warnings)
+ wrbuf_printf(wr, "<!-- Length starting at offset 21 should hold a digit. Assuming 5 -->\n");
+ length_starting = 5;
+ lead[21] = '5';
}
- length_starting = atoi_n (buf+21, 1);
- if (buf[21] <= '0' || buf[21] >= '9')
+ if (!atoi_n_check(buf+22, 1, &length_implementation))
{
- wrbuf_printf(wr, "<!-- Length starting should hold a digit. Assuming 5 -->\n");
- length_starting = 5;
+ if (produce_warnings)
+ wrbuf_printf(wr, "<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->\n");
+ length_implementation = 0;
+ lead[22] = '0';
}
- length_implementation = atoi_n (buf+22, 1);
if (mt->xml != YAZ_MARC_LINE)
{
wr,
"<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
" <leader>");
-#if 1
- marc_cdata(mt, buf, 9, wr);
- marc_cdata(mt, "a", 1, wr); /* set leader to signal unicode */
- marc_cdata(mt, buf+10, 14, wr);
-#else
- marc_cdata(mt, buf, 24, wr); /* leave header as is .. */
-#endif
+ lead[9] = 'a'; /* set leader to signal unicode */
+ marc_cdata(mt, lead, 24, wr);
+ wrbuf_printf(wr, "</leader>\n");
+ break;
+ case YAZ_MARC_XCHANGE:
+ wrbuf_printf(
+ wr,
+ "<record xmlns=\"http://www.bs.dk/standards/MarcXchange\">\n"
+ " <leader>");
+ marc_cdata(mt, lead, 24, wr);
wrbuf_printf(wr, "</leader>\n");
break;
}
{
char str[40];
- if (mt->xml)
- wrbuf_puts (wr, "<!--\n");
+ wrbuf_puts (wr, "<!--\n");
sprintf (str, "Record length %5d\n", record_length);
wrbuf_puts (wr, str);
sprintf (str, "Indicator length %5d\n", indicator_length);
wrbuf_puts (wr, str);
sprintf (str, "Length implementation %5d\n", length_implementation);
wrbuf_puts (wr, str);
- if (mt->xml)
- wrbuf_puts (wr, "-->\n");
+ wrbuf_puts (wr, "-->\n");
}
/* first pass. determine length of directory & base of data */
for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
{
- entry_p += 3+length_data_entry+length_starting;
- if (entry_p >= record_length)
+ /* length of directory entry */
+ int l = 3 + length_data_entry + length_starting;
+ if (entry_p + l >= record_length)
+ {
+ wrbuf_printf (wr, "<!-- Directory offset %d: end of record. "
+ "Missing FS char -->\n", entry_p);
return -1;
+ }
+ if (mt->debug)
+ wrbuf_printf (wr, "<!-- Directory offset %d: Tag %.3s -->\n",
+ entry_p, buf+entry_p);
+ /* check for digits in length info */
+ while (--l >= 3)
+ if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
+ break;
+ if (l >= 3)
+ {
+ /* not all digits, so stop directory scan */
+ wrbuf_printf (wr, "<!-- Directory offset %d: Bad data for data "
+ "length and/or length starting -->\n", entry_p);
+ break;
+ }
+ entry_p += 3 + length_data_entry + length_starting;
}
- if (mt->debug && base_address != entry_p+1)
+ end_of_directory = entry_p;
+ if (base_address != entry_p+1)
{
- wrbuf_printf (wr," <!-- base address not at end of directory "
- "base=%d end=%d -->\n", base_address, entry_p+1);
+ if (produce_warnings)
+ wrbuf_printf (wr,"<!-- Base address not at end of directory, "
+ "base %d, end %d -->\n", base_address, entry_p+1);
}
- base_address = entry_p+1;
-
if (mt->xml == YAZ_MARC_ISO2709)
{
WRBUF wr_head = wrbuf_alloc();
int data_p = 0;
/* second pass. create directory for ISO2709 output */
- for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+ for (entry_p = 24; entry_p != end_of_directory; )
{
int data_length, data_offset, end_offset;
int i, sz1, sz2;
i = data_offset + base_address;
end_offset = i+data_length-1;
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS &&
- i < end_offset)
+ if (data_length <= 0 || data_offset < 0 || end_offset >= record_length)
+ return -1;
+
+ while (i < end_offset &&
+ buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
i++;
sz1 = 1+i - (data_offset + base_address);
if (mt->iconv_cd)
}
wrbuf_putc(wr_dir, ISO2709_FS);
wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
- wrbuf_write(wr_head, buf+5, 7);
+ wrbuf_write(wr_head, lead+5, 7);
wrbuf_printf(wr_head, "%05d", base_address);
- wrbuf_write(wr_head, buf+17, 7);
+ wrbuf_write(wr_head, lead+17, 7);
wrbuf_write(wr, wrbuf_buf(wr_head), 24);
wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
wrbuf_free(wr_tmp, 1);
}
/* third pass. create data output */
- for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+ for (entry_p = 24; entry_p != end_of_directory; )
{
int data_length;
int data_offset;
int i, j;
char tag[4];
int identifier_flag = 0;
- int entry_p0;
+ int entry_p0 = entry_p;
memcpy (tag, buf+entry_p, 3);
entry_p += 3;
- entry_p0 = entry_p;
tag[3] = '\0';
data_length = atoi_n (buf+entry_p, length_data_entry);
entry_p += length_data_entry;
entry_p += length_starting;
i = data_offset + base_address;
end_offset = i+data_length-1;
+
+ if (data_length <= 0 || data_offset < 0)
+ break;
if (mt->debug)
{
- wrbuf_printf(wr, "<!-- offset=%d data dlength=%d doffset=%d -->\n",
+ wrbuf_printf(wr, "<!-- Directory offset %d: data-length %d, "
+ "data-offset %d -->\n",
entry_p0, data_length, data_offset);
}
+ if (end_offset >= record_length)
+ {
+ wrbuf_printf (wr,"<!-- Directory offset %d: Data out of bounds "
+ "%d >= %d -->\n",
+ entry_p0, end_offset, record_length);
+ break;
+ }
- if (indicator_length < 4 && indicator_length > 0)
+ if (memcmp (tag, "00", 2))
+ identifier_flag = 1; /* if not 00X assume subfields */
+ else if (indicator_length < 4 && indicator_length > 0)
{
+ /* Danmarc 00X have subfields */
if (buf[i + indicator_length] == ISO2709_IDFS)
identifier_flag = 1;
else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
identifier_flag = 2;
}
- else if (memcmp (tag, "00", 2))
- identifier_flag = 1;
-
+
+ if (mt->debug)
+ {
+ wrbuf_printf(wr, "<!-- identifier_flag = %d -->\n",
+ identifier_flag);
+ }
+
switch(mt->xml)
{
case YAZ_MARC_LINE:
- if (mt->debug)
- wrbuf_puts (wr, "Tag: ");
wrbuf_puts (wr, tag);
wrbuf_puts (wr, " ");
break;
wrbuf_printf(wr, "\"");
break;
case YAZ_MARC_MARCXML:
+ case YAZ_MARC_XCHANGE:
if (identifier_flag)
wrbuf_printf (wr, " <datafield tag=\"");
else
wrbuf_putc(wr, buf[i]);
break;
case YAZ_MARC_LINE:
- if (mt->debug)
- wrbuf_puts (wr, " Ind: ");
wrbuf_putc(wr, buf[i]);
break;
case YAZ_MARC_SIMPLEXML:
wrbuf_printf(wr, "\"");
break;
case YAZ_MARC_MARCXML:
+ case YAZ_MARC_XCHANGE:
wrbuf_printf(wr, " ind%d=\"", j+1);
marc_cdata(mt, buf+i, 1, wr);
wrbuf_printf(wr, "\"");
}
}
if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
- || mt->xml == YAZ_MARC_OAIMARC)
+ || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE)
{
wrbuf_puts (wr, ">");
if (identifier_flag)
wrbuf_puts (wr, "\n");
}
- if (mt->xml == YAZ_MARC_LINE)
- {
- if (mt->debug)
- wrbuf_puts (wr, " Fields: ");
- }
if (identifier_flag)
{
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
+ while (i < end_offset &&
+ buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
{
int i0;
i++;
wrbuf_puts (wr, "\">");
break;
case YAZ_MARC_MARCXML:
+ case YAZ_MARC_XCHANGE:
wrbuf_puts (wr, " <subfield code=\"");
marc_cdata(mt, buf+i, identifier_length-1, wr);
i = i+identifier_length-1;
break;
}
i0 = i;
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
- buf[i] != ISO2709_FS && i < end_offset)
+ while (i < end_offset &&
+ buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
+ buf[i] != ISO2709_FS)
i++;
marc_cdata(mt, buf + i0, i - i0, wr);
if (mt->xml == YAZ_MARC_SIMPLEXML ||
mt->xml == YAZ_MARC_MARCXML ||
+ mt->xml == YAZ_MARC_XCHANGE ||
mt->xml == YAZ_MARC_OAIMARC)
wrbuf_puts (wr, "</subfield>\n");
}
else
{
int i0 = i;
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
+ while (i < end_offset &&
+ buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
i++;
marc_cdata(mt, buf + i0, i - i0, wr);
if (mt->xml == YAZ_MARC_ISO2709)
if (mt->xml == YAZ_MARC_LINE)
wrbuf_puts (wr, mt->endline_str);
if (i < end_offset)
- wrbuf_printf(wr, " <!-- separator but not at end of field length=%d-->\n", data_length);
+ wrbuf_printf(wr, "<!-- separator but not at end of field length=%d-->\n", data_length);
if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
- wrbuf_printf(wr, " <!-- no separator at end of field length=%d-->\n", data_length);
+ wrbuf_printf(wr, "<!-- no separator at end of field length=%d-->\n", data_length);
switch(mt->xml)
{
case YAZ_MARC_SIMPLEXML:
wrbuf_puts (wr, "</fixfield>\n");
break;
case YAZ_MARC_MARCXML:
+ case YAZ_MARC_XCHANGE:
if (identifier_flag)
wrbuf_puts (wr, " </datafield>\n");
else
wrbuf_puts (wr, "</oai_marc>\n");
break;
case YAZ_MARC_MARCXML:
+ case YAZ_MARC_XCHANGE:
wrbuf_puts (wr, "</record>\n");
break;
case YAZ_MARC_ISO2709:
char **result, int *rsize)
{
int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
- if (r > 0)
- {
- if (result)
- *result = wrbuf_buf(mt->m_wr);
- if (rsize)
- *rsize = wrbuf_len(mt->m_wr);
- }
+ if (result)
+ *result = wrbuf_buf(mt->m_wr);
+ if (rsize)
+ *rsize = wrbuf_len(mt->m_wr);
return r;
}