From: Adam Dickmeiss Date: Fri, 15 Dec 2006 12:37:17 +0000 (+0000) Subject: Split MARC reader functions out of marcdisp.c. Prepare for MARC format X-Git-Tag: YAZ.2.1.42~5 X-Git-Url: http://jsfdemo.indexdata.com/cgi-bin?a=commitdiff_plain;h=54db174ec4113c44b05f3766dc6f8ad665968cfb;p=yaz-moved-to-github.git Split MARC reader functions out of marcdisp.c. Prepare for MARC format line reader. --- diff --git a/include/yaz/marcdisp.h b/include/yaz/marcdisp.h index 97fad09..1b8e818 100644 --- a/include/yaz/marcdisp.h +++ b/include/yaz/marcdisp.h @@ -24,7 +24,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* $Id: marcdisp.h,v 1.21 2006-12-13 11:25:17 adam Exp $ */ +/* $Id: marcdisp.h,v 1.22 2006-12-15 12:37:17 adam Exp $ */ /** * \file marcdisp.h @@ -38,6 +38,7 @@ #include #include +#include #include YAZ_BEGIN_CDECL @@ -147,6 +148,21 @@ YAZ_EXPORT int atoi_n(const char *buf, int len); YAZ_EXPORT int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize); +/** \brief read MARC lineformat from stream + \param mt handle + \param getbyte get one byte handler + \param ungetbyte unget one byte handler + \param client_data opaque data for handers + + Parses MARC line record from stream + Returns > 0 for OK (same as length), -1=ERROR +*/ +YAZ_EXPORT +int yaz_marc_read_line(yaz_marc_t mt, + int (*getbyte)(void *client_data), + void (*ungetbyte)(int b, void *client_data), + void *client_data); + /** \brief parses MARCXML/MarcXchange record from xmlNode pointer \param mt handle \param ptr is a pointer to root xml node @@ -211,6 +227,113 @@ YAZ_EXPORT int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wrbuf); */ YAZ_EXPORT int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec); + + +/** \brief sets leader, validates it, and returns important values + \param mt handle + \param leader of the 24 byte leader to be set + \param indicator_length indicator length (returned value) + \param identifier_length identifier length (returned value) + \param base_address base address (returned value) + \param length_data_entry length of data entry (returned value) + \param length_starting length of starting + \param length_implementation length of implementation defined data +*/ +YAZ_EXPORT +void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, + int *indicator_length, + int *identifier_length, + int *base_address, + int *length_data_entry, + int *length_starting, + int *length_implementation); + + +/** \brief adds MARC comment string + \param mt handle + \param comment comment to be added) +*/ +YAZ_EXPORT +void yaz_marc_add_comment(yaz_marc_t mt, char *comment); + +/** \brief adds MARC annotation - printf interface + \param mt handle + \param fmt printf format string +*/ +YAZ_EXPORT +void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...); + +/** \brief adds subfield to MARC structure + \param mt handle + \param code_data code data buffer + \param code_data_len length of code data +*/ +YAZ_EXPORT +void yaz_marc_add_subfield(yaz_marc_t mt, + const char *code_data, size_t code_data_len); + + +/** \brief adds controlfield to MARC structure + \param mt handle + \param tag (e.g. "001" + \param data value for this tag + \param data_len length of data +*/ +YAZ_EXPORT +void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, + const char *data, size_t data_len); + + +/** \brief adds controlfield to MARC structure using xml Nodes + \param mt handle + \param ptr_tag value of tag (TEXT xmlNode) + \param ptr_data value of data (TEXT xmlNode) +*/ +YAZ_EXPORT +void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const xmlNode *ptr_data); + + +/** \brief adds datafield to MARC structure using strings + \param mt handle + \param tag value of tag as string + \param indicator indicator string + \param indicator_len length of indicator string +*/ +YAZ_EXPORT +void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, + const char *indicator, size_t indicator_len); + +/** \brief adds datafield to MARC structure using xml Nodes + \param mt handle + \param ptr_tag value of tag (TEXT xmlNode) + \param indicator indicator string + \param indicator_len length of indicator string +*/ +YAZ_EXPORT +void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const char *indicator, size_t indicator_len); + + +/** \brief returns memory for MARC handle + \param mt handle + \retval NMEM handle for MARC system +*/ +YAZ_EXPORT +NMEM yaz_marc_get_nmem(yaz_marc_t mt); + +/** \brief clears memory and MARC record + \param mt handle +*/ +YAZ_EXPORT +void yaz_marc_reset(yaz_marc_t mt); + +/** \brief gets debug level for MARC system + \param mt handle +*/ +YAZ_EXPORT +int yaz_marc_get_debug(yaz_marc_t mt); + YAZ_END_CDECL #endif diff --git a/src/Makefile.am b/src/Makefile.am index 9d538f1..324cb1e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,6 +1,6 @@ ## This file is part of the YAZ toolkit. ## Copyright (C) 1994-2006, Index Data, All rights reserved. -## $Id: Makefile.am,v 1.45 2006-11-29 12:34:51 heikki Exp $ +## $Id: Makefile.am,v 1.46 2006-12-15 12:37:18 adam Exp $ YAZ_VERSION_INFO=2:1:0 @@ -49,7 +49,9 @@ diagsrw.c $(top_srcdir)/include/yaz/diagsrw.h: csvtosrw.tcl srw.csv diagsru_update.c $(top_srcdir)/include/yaz/diagsru_update.h: csvtosru_update.tcl sru_update.csv $(TCLSH) $(srcdir)/csvtosru_update.tcl $(srcdir) -libyaz_la_SOURCES=version.c options.c log.c marcdisp.c oid.c wrbuf.c \ +libyaz_la_SOURCES=version.c options.c log.c \ + marcdisp.c marc_read_xml.c marc_read_iso2709.c marc_read_line.c \ + oid.c wrbuf.c \ nmemsdup.c xmalloc.c readconf.c tpath.c nmem.c matchstr.c atoin.c \ siconv.c marc8.c marc8r.c \ odr_bool.c ber_bool.c ber_len.c ber_tag.c odr_util.c \ diff --git a/src/marc_read_iso2709.c b/src/marc_read_iso2709.c new file mode 100644 index 0000000..aee41b4 --- /dev/null +++ b/src/marc_read_iso2709.c @@ -0,0 +1,200 @@ +/* + * Copyright (C) 1995-2006, Index Data ApS + * See the file LICENSE for details. + * + * $Id: marc_read_iso2709.c,v 1.1 2006-12-15 12:37:18 adam Exp $ + */ + +/** + * \file marc_read_iso2709.c + * \brief Implements reading of MARC as ISO2709 + */ + +#if HAVE_CONFIG_H +#include +#endif + +#ifdef WIN32 +#include +#endif + +#include +#include +#include +#include +#include +#include + +int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize) +{ + int entry_p; + int record_length; + int indicator_length; + int identifier_length; + int end_of_directory; + int base_address; + int length_data_entry; + int length_starting; + int length_implementation; + + yaz_marc_reset(mt); + + record_length = atoi_n (buf, 5); + if (record_length < 25) + { + yaz_marc_cprintf(mt, "Record length %d < 24", record_length); + return -1; + } + /* ballout if bsize is known and record_length is less than that */ + if (bsize != -1 && record_length > bsize) + { + yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d", + record_length, bsize); + return -1; + } + if (yaz_marc_get_debug(mt)) + yaz_marc_cprintf(mt, "Record length %5d", record_length); + + yaz_marc_set_leader(mt, buf, + &indicator_length, + &identifier_length, + &base_address, + &length_data_entry, + &length_starting, + &length_implementation); + + /* First pass. determine length of directory & base of data */ + for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + { + /* length of directory entry */ + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + yaz_marc_cprintf(mt, "Directory offset %d: end of record." + " Missing FS char", entry_p); + return -1; + } + if (yaz_marc_get_debug(mt)) + { + yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s", + entry_p, buf+entry_p); + } + /* Check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* Not all digits, so stop directory scan */ + yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data" + " length and/or length starting", entry_p); + break; + } + entry_p += 3 + length_data_entry + length_starting; + } + end_of_directory = entry_p; + if (base_address != entry_p+1) + { + yaz_marc_cprintf(mt, "Base address not at end of directory," + " base %d, end %d", base_address, entry_p+1); + } + + /* Second pass. parse control - and datafields */ + for (entry_p = 24; entry_p != end_of_directory; ) + { + int data_length; + int data_offset; + int end_offset; + int i; + char tag[4]; + int identifier_flag = 0; + int entry_p0 = entry_p; + + memcpy (tag, buf+entry_p, 3); + entry_p += 3; + tag[3] = '\0'; + data_length = atoi_n(buf+entry_p, length_data_entry); + entry_p += length_data_entry; + data_offset = atoi_n(buf+entry_p, length_starting); + entry_p += length_starting; + i = data_offset + base_address; + end_offset = i+data_length-1; + + if (data_length <= 0 || data_offset < 0) + break; + + if (yaz_marc_get_debug(mt)) + { + yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d," + " data-offset %d", + tag, entry_p0, data_length, data_offset); + } + if (end_offset >= record_length) + { + yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d", + entry_p0, end_offset, record_length); + break; + } + + if (memcmp (tag, "00", 2)) + identifier_flag = 1; /* if not 00X assume subfields */ + else if (indicator_length < 4 && indicator_length > 0) + { + /* Danmarc 00X have subfields */ + if (buf[i + indicator_length] == ISO2709_IDFS) + identifier_flag = 1; + else if (buf[i + indicator_length + 1] == ISO2709_IDFS) + identifier_flag = 2; + } + + if (identifier_flag) + { + /* datafield */ + i += identifier_flag-1; + yaz_marc_add_datafield(mt, tag, buf+i, indicator_length); + i += indicator_length; + + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + { + int code_offset = i+1; + + i ++; + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && + buf[i] != ISO2709_FS) + i++; + yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset); + } + } + else + { + /* controlfield */ + int i0 = i; + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + i++; + yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0); + } + if (i < end_offset) + { + yaz_marc_cprintf(mt, "Separator but not at end of field length=%d", + data_length); + } + if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + { + yaz_marc_cprintf(mt, "No separator at end of field length=%d", + data_length); + } + } + return record_length; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/marc_read_line.c b/src/marc_read_line.c new file mode 100644 index 0000000..c05af1b --- /dev/null +++ b/src/marc_read_line.c @@ -0,0 +1,46 @@ +/* + * Copyright (C) 1995-2006, Index Data ApS + * See the file LICENSE for details. + * + * $Id: marc_read_line.c,v 1.1 2006-12-15 12:37:18 adam Exp $ + */ + +/** + * \file marc_read_iso2709.c + * \brief Implements reading of MARC as ISO2709 + */ + +#if HAVE_CONFIG_H +#include +#endif + +#ifdef WIN32 +#include +#endif + +#include +#include +#include + +#include +#include +#include + +int yaz_marc_read_line(yaz_marc_t mt, + int (*getbyte)(void *client_data), + void (*ungetbyte)(int b, void *client_data), + void *client_data) +{ + yaz_marc_reset(mt); + + return -1; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/marc_read_xml.c b/src/marc_read_xml.c new file mode 100644 index 0000000..2642dd4 --- /dev/null +++ b/src/marc_read_xml.c @@ -0,0 +1,263 @@ +/* + * Copyright (C) 1995-2006, Index Data ApS + * See the file LICENSE for details. + * + * $Id: marc_read_xml.c,v 1.1 2006-12-15 12:37:18 adam Exp $ + */ + +/** + * \file marc_read_xml.c + * \brief Implements reading of MARC as XML + */ + +#if HAVE_CONFIG_H +#include +#endif + +#ifdef WIN32 +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if YAZ_HAVE_XML2 +#include +#endif + +#if YAZ_HAVE_XML2 +int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) +{ + NMEM nmem = yaz_marc_get_nmem(mt); + for (; ptr; ptr = ptr->next) + { + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "subfield")) + { + size_t ctrl_data_len = 0; + char *ctrl_data_buf = 0; + const xmlNode *p = 0, *ptr_code = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + if (!strcmp((const char *)attr->name, "code")) + ptr_code = attr->children; + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'subfield'", + attr->name); + return -1; + } + if (!ptr_code) + { + yaz_marc_cprintf( + mt, "Missing attribute 'code' for 'subfield'" ); + return -1; + } + if (ptr_code->type == XML_TEXT_NODE) + { + ctrl_data_len = + strlen((const char *)ptr_code->content); + } + else + { + yaz_marc_cprintf( + mt, "Missing value for 'code' in 'subfield'" ); + return -1; + } + for (p = ptr->children; p ; p = p->next) + if (p->type == XML_TEXT_NODE) + ctrl_data_len += strlen((const char *)p->content); + ctrl_data_buf = nmem_malloc(nmem, ctrl_data_len+1); + strcpy(ctrl_data_buf, (const char *)ptr_code->content); + for (p = ptr->children; p ; p = p->next) + if (p->type == XML_TEXT_NODE) + strcat(ctrl_data_buf, (const char *)p->content); + yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len); + } + else + { + yaz_marc_cprintf( + mt, "Expected element 'subfield', got '%.80s'", ptr->name); + return -1; + } + } + } + return 0; +} + +static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) +{ + int indicator_length; + int identifier_length; + int base_address; + int length_data_entry; + int length_starting; + int length_implementation; + const char *leader = 0; + const xmlNode *ptr = *ptr_p; + + for(; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "leader")) + { + xmlNode *p = ptr->children; + for(; p; p = p->next) + if (p->type == XML_TEXT_NODE) + leader = (const char *) p->content; + break; + } + else + { + yaz_marc_cprintf( + mt, "Expected element 'leader', got '%.80s'", ptr->name); + return -1; + } + } + if (!leader) + { + yaz_marc_cprintf(mt, "Missing element 'leader'"); + return -1; + } + if (strlen(leader) != 24) + { + yaz_marc_cprintf(mt, "Bad length %d of leader data." + " Must have length of 24 characters", strlen(leader)); + return -1; + } + yaz_marc_set_leader(mt, leader, + &indicator_length, + &identifier_length, + &base_address, + &length_data_entry, + &length_starting, + &length_implementation); + *ptr_p = ptr; + return 0; +} + +static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) +{ + for(; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "controlfield")) + { + const xmlNode *ptr_tag = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + if (!strcmp((const char *)attr->name, "tag")) + ptr_tag = attr->children; + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'controlfield'", + attr->name); + return -1; + } + if (!ptr_tag) + { + yaz_marc_cprintf( + mt, "Missing attribute 'tag' for 'controlfield'" ); + return -1; + } + yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children); + } + else if (!strcmp((const char *) ptr->name, "datafield")) + { + char indstr[11]; /* 0(unused), 1,....9, + zero term */ + const xmlNode *ptr_tag = 0; + struct _xmlAttr *attr; + int i; + for (i = 0; i<11; i++) + indstr[i] = '\0'; + for (attr = ptr->properties; attr; attr = attr->next) + if (!strcmp((const char *)attr->name, "tag")) + ptr_tag = attr->children; + else if (strlen((const char *)attr->name) == 4 && + !memcmp(attr->name, "ind", 3)) + { + int no = atoi((const char *)attr->name+3); + if (attr->children + && attr->children->type == XML_TEXT_NODE) + indstr[no] = attr->children->content[0]; + } + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'datafield'", + attr->name); + return -1; + } + if (!ptr_tag) + { + yaz_marc_cprintf( + mt, "Missing attribute 'tag' for 'datafield'" ); + return -1; + } + /* note that indstr[0] is unused so we use indstr[1..] */ + yaz_marc_add_datafield_xml(mt, ptr_tag, + indstr+1, strlen(indstr+1)); + + if (yaz_marc_read_xml_subfields(mt, ptr->children)) + return -1; + } + else + { + yaz_marc_cprintf(mt, + "Expected element controlfield or datafield," + " got %.80s", ptr->name); + return -1; + } + } + return 0; +} +#endif + +int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) +{ +#if YAZ_HAVE_XML2 + for(; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "record")) + break; + else + { + yaz_marc_cprintf( + mt, "Unknown element '%.80s' in MARC XML reader", + ptr->name); + return -1; + } + } + if (!ptr) + { + yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record"); + return -1; + } + /* ptr points to record node now */ + ptr = ptr->children; + if (yaz_marc_read_xml_leader(mt, &ptr)) + return -1; + return yaz_marc_read_xml_fields(mt, ptr->next); +#else + return -1; +#endif +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/marcdisp.c b/src/marcdisp.c index 994c5a6..321cd72 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.37 2006-12-13 11:25:17 adam Exp $ + * $Id: marcdisp.c,v 1.38 2006-12-15 12:37:18 adam Exp $ */ /** @@ -32,8 +32,6 @@ #include #endif -static void yaz_marc_reset(yaz_marc_t mt); - /** \brief node types for yaz_marc_node */ enum YAZ_MARC_NODE_TYPE { @@ -120,11 +118,16 @@ void yaz_marc_destroy(yaz_marc_t mt) xfree(mt); } +NMEM yaz_marc_get_nmem(yaz_marc_t mt) +{ + return mt->nmem; +} + static int marc_exec_leader(const char *leader_spec, char *leader, size_t size); -struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) +static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) { struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n)); n->next = 0; @@ -133,6 +136,18 @@ struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) return n; } +#if YAZ_HAVE_XML2 +void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const xmlNode *ptr_data) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); +} +#endif + + void yaz_marc_add_comment(yaz_marc_t mt, char *comment) { struct yaz_marc_node *n = yaz_marc_add_node(mt); @@ -161,6 +176,11 @@ void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...) va_end (ap); } +int yaz_marc_get_debug(yaz_marc_t mt) +{ + return mt->debug; +} + void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len) { struct yaz_marc_node *n = yaz_marc_add_node(mt); @@ -190,17 +210,6 @@ void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, } } -#if YAZ_HAVE_XML2 -void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, - const xmlNode *ptr_data) -{ - struct yaz_marc_node *n = yaz_marc_add_node(mt); - n->which = YAZ_MARC_CONTROLFIELD; - n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); - n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); -} -#endif - void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, const char *indicator, size_t indicator_len) { @@ -266,23 +275,13 @@ static int atoi_n_check(const char *buf, int size, int *val) return 1; } -/** \brief reads the MARC 24 bytes leader and checks content - \param mt handle - \param leader of the 24 byte leader - \param indicator_length indicator length - \param identifier_length identifier length - \param base_address base address - \param length_data_entry length of data entry - \param length_starting length of starting - \param length_implementation length of implementation defined data -*/ -static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c, - int *indicator_length, - int *identifier_length, - int *base_address, - int *length_data_entry, - int *length_starting, - int *length_implementation) +void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, + int *indicator_length, + int *identifier_length, + int *base_address, + int *length_data_entry, + int *length_starting, + int *length_implementation) { char leader[24]; @@ -384,7 +383,7 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf) return 1; /* we don't know */ } -static void yaz_marc_reset(yaz_marc_t mt) +void yaz_marc_reset(yaz_marc_t mt) { nmem_reset(mt->nmem); mt->nodes = 0; @@ -768,391 +767,6 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) return 0; } -#if YAZ_HAVE_XML2 -int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) -{ - for (; ptr; ptr = ptr->next) - { - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "subfield")) - { - size_t ctrl_data_len = 0; - char *ctrl_data_buf = 0; - const xmlNode *p = 0, *ptr_code = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - if (!strcmp((const char *)attr->name, "code")) - ptr_code = attr->children; - else - { - yaz_marc_cprintf( - mt, "Bad attribute '%.80s' for 'subfield'", - attr->name); - return -1; - } - if (!ptr_code) - { - yaz_marc_cprintf( - mt, "Missing attribute 'code' for 'subfield'" ); - return -1; - } - if (ptr_code->type == XML_TEXT_NODE) - { - ctrl_data_len = - strlen((const char *)ptr_code->content); - } - else - { - yaz_marc_cprintf( - mt, "Missing value for 'code' in 'subfield'" ); - return -1; - } - for (p = ptr->children; p ; p = p->next) - if (p->type == XML_TEXT_NODE) - ctrl_data_len += strlen((const char *)p->content); - ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1); - strcpy(ctrl_data_buf, (const char *)ptr_code->content); - for (p = ptr->children; p ; p = p->next) - if (p->type == XML_TEXT_NODE) - strcat(ctrl_data_buf, (const char *)p->content); - yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len); - } - else - { - yaz_marc_cprintf( - mt, "Expected element 'subfield', got '%.80s'", ptr->name); - return -1; - } - } - } - return 0; -} - -static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) -{ - int indicator_length; - int identifier_length; - int base_address; - int length_data_entry; - int length_starting; - int length_implementation; - const char *leader = 0; - const xmlNode *ptr = *ptr_p; - - for(; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "leader")) - { - xmlNode *p = ptr->children; - for(; p; p = p->next) - if (p->type == XML_TEXT_NODE) - leader = (const char *) p->content; - break; - } - else - { - yaz_marc_cprintf( - mt, "Expected element 'leader', got '%.80s'", ptr->name); - return -1; - } - } - if (!leader) - { - yaz_marc_cprintf(mt, "Missing element 'leader'"); - return -1; - } - if (strlen(leader) != 24) - { - yaz_marc_cprintf(mt, "Bad length %d of leader data." - " Must have length of 24 characters", strlen(leader)); - return -1; - } - yaz_marc_read_leader(mt, leader, - &indicator_length, - &identifier_length, - &base_address, - &length_data_entry, - &length_starting, - &length_implementation); - *ptr_p = ptr; - return 0; -} - -static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) -{ - for(; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "controlfield")) - { - const xmlNode *ptr_tag = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - if (!strcmp((const char *)attr->name, "tag")) - ptr_tag = attr->children; - else - { - yaz_marc_cprintf( - mt, "Bad attribute '%.80s' for 'controlfield'", - attr->name); - return -1; - } - if (!ptr_tag) - { - yaz_marc_cprintf( - mt, "Missing attribute 'tag' for 'controlfield'" ); - return -1; - } - yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children); - } - else if (!strcmp((const char *) ptr->name, "datafield")) - { - char indstr[11]; /* 0(unused), 1,....9, + zero term */ - const xmlNode *ptr_tag = 0; - struct _xmlAttr *attr; - int i; - for (i = 0; i<11; i++) - indstr[i] = '\0'; - for (attr = ptr->properties; attr; attr = attr->next) - if (!strcmp((const char *)attr->name, "tag")) - ptr_tag = attr->children; - else if (strlen((const char *)attr->name) == 4 && - !memcmp(attr->name, "ind", 3)) - { - int no = atoi((const char *)attr->name+3); - if (attr->children - && attr->children->type == XML_TEXT_NODE) - indstr[no] = attr->children->content[0]; - } - else - { - yaz_marc_cprintf( - mt, "Bad attribute '%.80s' for 'datafield'", - attr->name); - return -1; - } - if (!ptr_tag) - { - yaz_marc_cprintf( - mt, "Missing attribute 'tag' for 'datafield'" ); - return -1; - } - /* note that indstr[0] is unused so we use indstr[1..] */ - yaz_marc_add_datafield_xml(mt, ptr_tag, - indstr+1, strlen(indstr+1)); - - if (yaz_marc_read_xml_subfields(mt, ptr->children)) - return -1; - } - else - { - yaz_marc_cprintf(mt, - "Expected element controlfield or datafield," - " got %.80s", ptr->name); - return -1; - } - } - return 0; -} -#endif - -int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) -{ -#if YAZ_HAVE_XML2 - for(; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "record")) - break; - else - { - yaz_marc_cprintf( - mt, "Unknown element '%.80s' in MARC XML reader", - ptr->name); - return -1; - } - } - if (!ptr) - { - yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record"); - return -1; - } - /* ptr points to record node now */ - ptr = ptr->children; - if (yaz_marc_read_xml_leader(mt, &ptr)) - return -1; - return yaz_marc_read_xml_fields(mt, ptr->next); -#else - return -1; -#endif -} - -int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize) -{ - int entry_p; - int record_length; - int indicator_length; - int identifier_length; - int end_of_directory; - int base_address; - int length_data_entry; - int length_starting; - int length_implementation; - - yaz_marc_reset(mt); - - record_length = atoi_n (buf, 5); - if (record_length < 25) - { - yaz_marc_cprintf(mt, "Record length %d < 24", record_length); - return -1; - } - /* ballout if bsize is known and record_length is less than that */ - if (bsize != -1 && record_length > bsize) - { - yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d", - record_length, bsize); - return -1; - } - if (mt->debug) - yaz_marc_cprintf(mt, "Record length %5d", record_length); - - yaz_marc_read_leader(mt, buf, - &indicator_length, - &identifier_length, - &base_address, - &length_data_entry, - &length_starting, - &length_implementation); - - /* First pass. determine length of directory & base of data */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) - { - /* length of directory entry */ - int l = 3 + length_data_entry + length_starting; - if (entry_p + l >= record_length) - { - yaz_marc_cprintf(mt, "Directory offset %d: end of record." - " Missing FS char", entry_p); - return -1; - } - if (mt->debug) - { - yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s", - entry_p, buf+entry_p); - } - /* Check for digits in length info */ - while (--l >= 3) - if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) - break; - if (l >= 3) - { - /* Not all digits, so stop directory scan */ - yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data" - " length and/or length starting", entry_p); - break; - } - entry_p += 3 + length_data_entry + length_starting; - } - end_of_directory = entry_p; - if (base_address != entry_p+1) - { - yaz_marc_cprintf(mt, "Base address not at end of directory," - " base %d, end %d", base_address, entry_p+1); - } - - /* Second pass. parse control - and datafields */ - for (entry_p = 24; entry_p != end_of_directory; ) - { - int data_length; - int data_offset; - int end_offset; - int i; - char tag[4]; - int identifier_flag = 0; - int entry_p0 = entry_p; - - memcpy (tag, buf+entry_p, 3); - entry_p += 3; - tag[3] = '\0'; - data_length = atoi_n(buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n(buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - if (data_length <= 0 || data_offset < 0) - break; - - if (mt->debug) - { - yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d," - " data-offset %d", - tag, entry_p0, data_length, data_offset); - } - if (end_offset >= record_length) - { - yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d", - entry_p0, end_offset, record_length); - break; - } - - if (memcmp (tag, "00", 2)) - identifier_flag = 1; /* if not 00X assume subfields */ - else if (indicator_length < 4 && indicator_length > 0) - { - /* Danmarc 00X have subfields */ - if (buf[i + indicator_length] == ISO2709_IDFS) - identifier_flag = 1; - else if (buf[i + indicator_length + 1] == ISO2709_IDFS) - identifier_flag = 2; - } - - if (identifier_flag) - { - /* datafield */ - i += identifier_flag-1; - yaz_marc_add_datafield(mt, tag, buf+i, indicator_length); - i += indicator_length; - - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - { - int code_offset = i+1; - - i ++; - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS) - i++; - yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset); - } - } - else - { - /* controlfield */ - int i0 = i; - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - i++; - yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0); - } - if (i < end_offset) - { - yaz_marc_cprintf(mt, "Separator but not at end of field length=%d", - data_length); - } - if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - { - yaz_marc_cprintf(mt, "No separator at end of field length=%d", - data_length); - } - } - return record_length; -} int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) { @@ -1313,7 +927,6 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size) return 0; } - /* * Local variables: * c-basic-offset: 4 diff --git a/win/makefile b/win/makefile index b5fabec..2fb9914 100644 --- a/win/makefile +++ b/win/makefile @@ -1,6 +1,6 @@ # Copyright (C) 1994-2006, Index Data ApS # All rights reserved. -# $Id: makefile,v 1.121 2006-12-13 10:36:00 adam Exp $ +# $Id: makefile,v 1.122 2006-12-15 12:37:18 adam Exp $ # # Programmed by # Heikki Levanto & Adam Dickmeiss @@ -368,6 +368,9 @@ MISC_OBJS= \ $(OBJDIR)\atoin.obj \ $(OBJDIR)\log.obj \ $(OBJDIR)\marcdisp.obj \ + $(OBJDIR)\marc_read_xml.obj \ + $(OBJDIR)\marc_read_iso2709.obj \ + $(OBJDIR)\marc_read_line.obj \ $(OBJDIR)\nmem.obj \ $(OBJDIR)\nmemsdup.obj \ $(OBJDIR)\oid.obj \