1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
7 * \file marc_read_xml.c
8 * \brief Implements reading of MARC as XML
22 #include <yaz/marcdisp.h>
23 #include <yaz/wrbuf.h>
24 #include <yaz/yaz-util.h>
25 #include <yaz/nmem_xml.h>
28 #include <libxml/tree.h>
32 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
34 NMEM nmem = yaz_marc_get_nmem(mt);
35 for (; ptr; ptr = ptr->next)
37 if (ptr->type == XML_ELEMENT_NODE)
39 if (!strcmp((const char *) ptr->name, "subfield"))
41 size_t ctrl_data_len = 0;
42 char *ctrl_data_buf = 0;
43 const xmlNode *p = 0, *ptr_code = 0;
44 struct _xmlAttr *attr;
45 for (attr = ptr->properties; attr; attr = attr->next)
46 if (!strcmp((const char *)attr->name, "code"))
47 ptr_code = attr->children;
51 mt, "Bad attribute '%.80s' for 'subfield'",
58 mt, "Missing attribute 'code' for 'subfield'" );
61 if (ptr_code->type == XML_TEXT_NODE)
64 strlen((const char *)ptr_code->content);
69 mt, "Missing value for 'code' in 'subfield'" );
72 for (p = ptr->children; p ; p = p->next)
73 if (p->type == XML_TEXT_NODE)
74 ctrl_data_len += strlen((const char *)p->content);
75 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
76 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
77 for (p = ptr->children; p ; p = p->next)
78 if (p->type == XML_TEXT_NODE)
79 strcat(ctrl_data_buf, (const char *)p->content);
80 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
85 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
93 const char *tag_value_extract(const char *name, char tag_buffer[5])
95 size_t length = strlen(name);
98 strcpy(tag_buffer, name);
104 // Given a xmlNode ptr, extract a value from either a element name or from a given attribute
105 char *element_attribute_value_extract(const xmlNode *ptr,
106 const char *attribute_name,
109 const char *name = (const char *) ptr->name;
110 size_t length = strlen(name);
113 return nmem_strdup(nmem, name+1);
114 // TODO Extract from attribute where matches attribute_name
115 for (attr = ptr->properties; attr; attr = attr->next)
116 if (!strcmp((const char *)attr->name, attribute_name))
117 return nmem_text_node_cdata(attr->children, nmem);
122 int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
124 for (; ptr; ptr = ptr->next)
126 if (ptr->type == XML_ELEMENT_NODE)
128 if (!strncmp((const char *) ptr->name, "s", 1))
130 NMEM nmem = yaz_marc_get_nmem(mt);
132 size_t ctrl_data_len = 0;
133 char *ctrl_data_buf = 0;
134 const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
138 mt, "Missing 'code' value for 'subfield'" );
142 ctrl_data_len = strlen((const char *) tag_value);
143 // Extract (length) from CDATA
144 for (p = ptr->children; p ; p = p->next)
145 if (p->type == XML_TEXT_NODE)
146 ctrl_data_len += strlen((const char *)p->content);
147 // Allocate memory for code value (1 character (can be multi-byte) and data
148 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
149 // Build a string with "<Code><data>"
150 strcpy(ctrl_data_buf, (const char *) tag_value);
151 for (p = ptr->children; p ; p = p->next)
152 if (p->type == XML_TEXT_NODE)
153 strcat(ctrl_data_buf, (const char *)p->content);
154 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
159 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
168 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
170 int indicator_length;
171 int identifier_length;
173 int length_data_entry;
175 int length_implementation;
176 const char *leader = 0;
177 const xmlNode *ptr = *ptr_p;
179 for(; ptr; ptr = ptr->next)
180 if (ptr->type == XML_ELEMENT_NODE)
182 if ( !strcmp( (const char *) ptr->name, "leader") ||
183 (!strncmp((const char *) ptr->name, "l", 1) ))
185 xmlNode *p = ptr->children;
186 for(; p; p = p->next)
187 if (p->type == XML_TEXT_NODE)
188 leader = (const char *) p->content;
194 mt, "Expected element 'leader', got '%.80s'", ptr->name);
199 yaz_marc_cprintf(mt, "Missing element 'leader'");
202 if (strlen(leader) != 24)
204 yaz_marc_cprintf(mt, "Bad length %d of leader data."
205 " Must have length of 24 characters", strlen(leader));
208 yaz_marc_set_leader(mt, leader,
214 &length_implementation);
219 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
221 for(; ptr; ptr = ptr->next)
222 if (ptr->type == XML_ELEMENT_NODE)
224 if (!strcmp( (const char *) ptr->name, "controlfield"))
226 const xmlNode *ptr_tag = 0;
227 struct _xmlAttr *attr;
228 for (attr = ptr->properties; attr; attr = attr->next)
229 if (!strcmp((const char *)attr->name, "tag"))
230 ptr_tag = attr->children;
234 mt, "Bad attribute '%.80s' for 'controlfield'",
241 mt, "Missing attribute 'tag' for 'controlfield'" );
244 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
246 else if (!strcmp((const char *) ptr->name, "datafield"))
248 char indstr[11]; /* 0(unused), 1,....9, + zero term */
249 const xmlNode *ptr_tag = 0;
250 struct _xmlAttr *attr;
252 for (i = 0; i<11; i++)
254 for (attr = ptr->properties; attr; attr = attr->next)
255 if (!strcmp((const char *)attr->name, "tag"))
256 ptr_tag = attr->children;
257 else if (strlen((const char *)attr->name) == 4 &&
258 !memcmp(attr->name, "ind", 3))
260 int no = atoi((const char *)attr->name+3);
262 && attr->children->type == XML_TEXT_NODE)
263 indstr[no] = attr->children->content[0];
268 mt, "Bad attribute '%.80s' for 'datafield'",
274 mt, "Missing attribute 'tag' for 'datafield'" );
277 /* note that indstr[0] is unused so we use indstr[1..] */
278 yaz_marc_add_datafield_xml(mt, ptr_tag,
279 indstr+1, strlen(indstr+1));
281 if (yaz_marc_read_xml_subfields(mt, ptr->children))
287 "Expected element controlfield or datafield,"
288 " got %.80s", ptr->name);
296 static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
298 for(; ptr; ptr = ptr->next)
299 if (ptr->type == XML_ELEMENT_NODE)
301 if (!strncmp( (const char *) ptr->name, "c", 1))
303 NMEM nmem = yaz_marc_get_nmem(mt);
304 char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
308 mt, "Missing attribute 'tag' for 'controlfield'" );
311 yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children);
313 else if (!strncmp((const char *) ptr->name, "d",1))
315 struct _xmlAttr *attr;
316 NMEM nmem = yaz_marc_get_nmem(mt);
318 char *indstr = nmem_malloc(nmem, 11); /* 0(unused), 1,....9, + zero term */
320 for (index = 0; index < 11; index++)
321 indstr[index] = '\0';
322 tag_value = element_attribute_value_extract(ptr, "tag", nmem);
326 mt, "Missing attribute 'tag' for 'datafield'" );
329 for (attr = ptr->properties; attr; attr = attr->next)
330 if (strlen((const char *)attr->name) == 2 &&
331 attr->name[0] == 'i')
333 //extract indicator attribute from i#="Y" pattern
334 int no = atoi((const char *)attr->name+1);
336 && attr->children->type == XML_TEXT_NODE)
337 indstr[no] = attr->children->content[0];
342 mt, "Bad attribute '%.80s' for 'datafield'",
345 /* note that indstr[0] is unused so we use indstr[1..] */
346 yaz_marc_add_datafield_xml2(mt, tag_value, indstr+1);
347 if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */))
353 "Expected element controlfield or datafield,"
354 " got %.80s", ptr->name);
365 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
370 for(; ptr; ptr = ptr->next)
371 if (ptr->type == XML_ELEMENT_NODE)
373 if (!strcmp((const char *) ptr->name, "record"))
375 format = YAZ_MARC_MARCXML;
378 else if (!strcmp((const char *) ptr->name, "r"))
380 format = YAZ_MARC_TURBOMARC;
386 mt, "Unknown element '%.80s' in MARC XML reader",
393 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
396 /* ptr points to record node now */
398 if (yaz_marc_read_xml_leader(mt, &ptr))
403 case YAZ_MARC_MARCXML:
404 return yaz_marc_read_xml_fields(mt, ptr->next);
405 case YAZ_MARC_TURBOMARC:
406 return yaz_marc_read_turbo_xml_fields(mt, ptr->next);
416 * c-file-style: "Stroustrup"
417 * indent-tabs-mode: nil
419 * vim: shiftwidth=4 tabstop=8 expandtab