Update source headers for 2008. Omit CVS ID keyword subst.
[yaz-moved-to-github.git] / src / marcdisp.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2008 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marcdisp.c
8  * \brief Implements MARC conversion utilities
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
28
29 #if YAZ_HAVE_XML2
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #endif
33
34 enum yaz_collection_state {
35     no_collection,
36     collection_first,
37     collection_second
38 };
39    
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
42
43     YAZ_MARC_DATAFIELD,
44     YAZ_MARC_CONTROLFIELD,
45     YAZ_MARC_COMMENT,
46     YAZ_MARC_LEADER
47 };
48
49 /** \brief represets a data field */
50 struct yaz_marc_datafield {
51     char *tag;
52     char *indicator;
53     struct yaz_marc_subfield *subfields;
54 };
55
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
58     char *tag;
59     char *data;
60 };
61
62 /** \brief a comment node */
63 struct yaz_marc_comment {
64     char *comment;
65 };
66
67 /** \brief MARC node */
68 struct yaz_marc_node {
69     enum YAZ_MARC_NODE_TYPE which;
70     union {
71         struct yaz_marc_datafield datafield;
72         struct yaz_marc_controlfield controlfield;
73         char *comment;
74         char *leader;
75     } u;
76     struct yaz_marc_node *next;
77 };
78
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
81     char *code_data;
82     struct yaz_marc_subfield *next;
83 };
84
85 /** \brief the internals of a yaz_marc_t handle */
86 struct yaz_marc_t_ {
87     WRBUF m_wr;
88     NMEM nmem;
89     int xml;
90     int debug;
91     int write_using_libxml2;
92     enum yaz_collection_state enable_collection;
93     yaz_iconv_t iconv_cd;
94     char subfield_str[8];
95     char endline_str[8];
96     char *leader_spec;
97     struct yaz_marc_node *nodes;
98     struct yaz_marc_node **nodes_pp;
99     struct yaz_marc_subfield **subfield_pp;
100 };
101
102 yaz_marc_t yaz_marc_create(void)
103 {
104     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105     mt->xml = YAZ_MARC_LINE;
106     mt->debug = 0;
107     mt->write_using_libxml2 = 0;
108     mt->enable_collection = no_collection;
109     mt->m_wr = wrbuf_alloc();
110     mt->iconv_cd = 0;
111     mt->leader_spec = 0;
112     strcpy(mt->subfield_str, " $");
113     strcpy(mt->endline_str, "\n");
114
115     mt->nmem = nmem_create();
116     yaz_marc_reset(mt);
117     return mt;
118 }
119
120 void yaz_marc_destroy(yaz_marc_t mt)
121 {
122     if (!mt)
123         return ;
124     nmem_destroy(mt->nmem);
125     wrbuf_destroy(mt->m_wr);
126     xfree(mt->leader_spec);
127     xfree(mt);
128 }
129
130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
131 {
132     return mt->nmem;
133 }
134
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
136 {
137     wrbuf_iconv_reset(wr, mt->iconv_cd);
138 }
139
140 static int marc_exec_leader(const char *leader_spec, char *leader,
141                             size_t size);
142
143
144 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
145 {
146     struct yaz_marc_node *n = (struct yaz_marc_node *)
147         nmem_malloc(mt->nmem, sizeof(*n));
148     n->next = 0;
149     *mt->nodes_pp = n;
150     mt->nodes_pp = &n->next;
151     return n;
152 }
153
154 #if YAZ_HAVE_XML2
155 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
156                                    const xmlNode *ptr_data)
157 {
158     struct yaz_marc_node *n = yaz_marc_add_node(mt);
159     n->which = YAZ_MARC_CONTROLFIELD;
160     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
161     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
162 }
163 #endif
164
165
166 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
167 {
168     struct yaz_marc_node *n = yaz_marc_add_node(mt);
169     n->which = YAZ_MARC_COMMENT;
170     n->u.comment = nmem_strdup(mt->nmem, comment);
171 }
172
173 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
174 {
175     va_list ap;
176     char buf[200];
177
178     va_start(ap, fmt);
179     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
180     yaz_marc_add_comment(mt, buf);
181     va_end (ap);
182 }
183
184 int yaz_marc_get_debug(yaz_marc_t mt)
185 {
186     return mt->debug;
187 }
188
189 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
190 {
191     struct yaz_marc_node *n = yaz_marc_add_node(mt);
192     n->which = YAZ_MARC_LEADER;
193     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
194     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
195 }
196
197 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
198                                const char *data, size_t data_len)
199 {
200     struct yaz_marc_node *n = yaz_marc_add_node(mt);
201     n->which = YAZ_MARC_CONTROLFIELD;
202     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
203     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
204     if (mt->debug)
205     {
206         size_t i;
207         char msg[80];
208
209         sprintf(msg, "controlfield:");
210         for (i = 0; i < 16 && i < data_len; i++)
211             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
212         if (i < data_len)
213             sprintf(msg + strlen(msg), " ..");
214         yaz_marc_add_comment(mt, msg);
215     }
216 }
217
218 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
219                             const char *indicator, size_t indicator_len)
220 {
221     struct yaz_marc_node *n = yaz_marc_add_node(mt);
222     n->which = YAZ_MARC_DATAFIELD;
223     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
224     n->u.datafield.indicator =
225         nmem_strdupn(mt->nmem, indicator, indicator_len);
226     n->u.datafield.subfields = 0;
227
228     /* make subfield_pp the current (last one) */
229     mt->subfield_pp = &n->u.datafield.subfields;
230 }
231
232 #if YAZ_HAVE_XML2
233 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
234                                 const char *indicator, size_t indicator_len)
235 {
236     struct yaz_marc_node *n = yaz_marc_add_node(mt);
237     n->which = YAZ_MARC_DATAFIELD;
238     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
239     n->u.datafield.indicator =
240         nmem_strdupn(mt->nmem, indicator, indicator_len);
241     n->u.datafield.subfields = 0;
242
243     /* make subfield_pp the current (last one) */
244     mt->subfield_pp = &n->u.datafield.subfields;
245 }
246 #endif
247
248 void yaz_marc_add_subfield(yaz_marc_t mt,
249                            const char *code_data, size_t code_data_len)
250 {
251     if (mt->debug)
252     {
253         size_t i;
254         char msg[80];
255
256         sprintf(msg, "subfield:");
257         for (i = 0; i < 16 && i < code_data_len; i++)
258             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
259         if (i < code_data_len)
260             sprintf(msg + strlen(msg), " ..");
261         yaz_marc_add_comment(mt, msg);
262     }
263
264     if (mt->subfield_pp)
265     {
266         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
267             nmem_malloc(mt->nmem, sizeof(*n));
268         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
269         n->next = 0;
270         /* mark subfield_pp to point to this one, so we append here next */
271         *mt->subfield_pp = n;
272         mt->subfield_pp = &n->next;
273     }
274 }
275
276 int atoi_n_check(const char *buf, int size, int *val)
277 {
278     int i;
279     for (i = 0; i < size; i++)
280         if (!isdigit(i[(const unsigned char *) buf]))
281             return 0;
282     *val = atoi_n(buf, size);
283     return 1;
284 }
285
286 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
287                          int *indicator_length,
288                          int *identifier_length,
289                          int *base_address,
290                          int *length_data_entry,
291                          int *length_starting,
292                          int *length_implementation)
293 {
294     char leader[24];
295
296     memcpy(leader, leader_c, 24);
297
298     if (!atoi_n_check(leader+10, 1, indicator_length))
299     {
300         yaz_marc_cprintf(mt, 
301                          "Indicator length at offset 10 should hold a digit."
302                          " Assuming 2");
303         leader[10] = '2';
304         *indicator_length = 2;
305     }
306     if (!atoi_n_check(leader+11, 1, identifier_length))
307     {
308         yaz_marc_cprintf(mt, 
309                          "Identifier length at offset 11 should hold a digit."
310                          " Assuming 2");
311         leader[11] = '2';
312         *identifier_length = 2;
313     }
314     if (!atoi_n_check(leader+12, 5, base_address))
315     {
316         yaz_marc_cprintf(mt, 
317                          "Base address at offsets 12..16 should hold a number."
318                          " Assuming 0");
319         *base_address = 0;
320     }
321     if (!atoi_n_check(leader+20, 1, length_data_entry))
322     {
323         yaz_marc_cprintf(mt, 
324                          "Length data entry at offset 20 should hold a digit."
325                          " Assuming 4");
326         *length_data_entry = 4;
327         leader[20] = '4';
328     }
329     if (!atoi_n_check(leader+21, 1, length_starting))
330     {
331         yaz_marc_cprintf(mt,
332                          "Length starting at offset 21 should hold a digit."
333                          " Assuming 5");
334         *length_starting = 5;
335         leader[21] = '5';
336     }
337     if (!atoi_n_check(leader+22, 1, length_implementation))
338     {
339         yaz_marc_cprintf(mt, 
340                          "Length implementation at offset 22 should hold a digit."
341                          " Assuming 0");
342         *length_implementation = 0;
343         leader[22] = '0';
344     }
345
346     if (mt->debug)
347     {
348         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
349         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
350         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
351         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
352         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
353         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
354     }
355     yaz_marc_add_leader(mt, leader, 24);
356 }
357
358 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
359 {
360     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
361     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
362 }
363
364 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
365 {
366     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
367     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
368 }
369
370 /* try to guess how many bytes the identifier really is! */
371 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
372 {
373     if (mt->iconv_cd)
374     {
375         size_t i;
376         for (i = 1; i<5; i++)
377         {
378             char outbuf[12];
379             size_t outbytesleft = sizeof(outbuf);
380             char *outp = outbuf;
381             const char *inp = buf;
382
383             size_t inbytesleft = i;
384             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
385                                  &outp, &outbytesleft);
386             if (r != (size_t) (-1))
387                 return i;  /* got a complete sequence */
388         }
389         return 1; /* giving up */
390     }
391     return 1; /* we don't know */
392 }
393                               
394 void yaz_marc_reset(yaz_marc_t mt)
395 {
396     nmem_reset(mt->nmem);
397     mt->nodes = 0;
398     mt->nodes_pp = &mt->nodes;
399     mt->subfield_pp = 0;
400 }
401
402 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
403 {
404     struct yaz_marc_node *n;
405     int identifier_length;
406     const char *leader = 0;
407
408     for (n = mt->nodes; n; n = n->next)
409         if (n->which == YAZ_MARC_LEADER)
410         {
411             leader = n->u.leader;
412             break;
413         }
414     
415     if (!leader)
416         return -1;
417     if (!atoi_n_check(leader+11, 1, &identifier_length))
418         return -1;
419
420     for (n = mt->nodes; n; n = n->next)
421     {
422         switch(n->which)
423         {
424         case YAZ_MARC_COMMENT:
425             wrbuf_iconv_write(wr, mt->iconv_cd, 
426                               n->u.comment, strlen(n->u.comment));
427             wrbuf_puts(wr, ")\n");
428             break;
429         default:
430             break;
431         }
432     }
433     return 0;
434 }
435
436
437 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
438 {
439     struct yaz_marc_node *n;
440     int identifier_length;
441     const char *leader = 0;
442
443     for (n = mt->nodes; n; n = n->next)
444         if (n->which == YAZ_MARC_LEADER)
445         {
446             leader = n->u.leader;
447             break;
448         }
449     
450     if (!leader)
451         return -1;
452     if (!atoi_n_check(leader+11, 1, &identifier_length))
453         return -1;
454
455     for (n = mt->nodes; n; n = n->next)
456     {
457         struct yaz_marc_subfield *s;
458         switch(n->which)
459         {
460         case YAZ_MARC_DATAFIELD:
461             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
462                          n->u.datafield.indicator);
463             for (s = n->u.datafield.subfields; s; s = s->next)
464             {
465                 /* if identifier length is 2 (most MARCs),
466                    the code is a single character .. However we've
467                    seen multibyte codes, so see how big it really is */
468                 size_t using_code_len = 
469                     (identifier_length != 2) ? identifier_length - 1
470                     :
471                     cdata_one_character(mt, s->code_data);
472                 
473                 wrbuf_puts (wr, mt->subfield_str); 
474                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
475                                   using_code_len);
476                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
477                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
478                                  s->code_data + using_code_len);
479                 marc_iconv_reset(mt, wr);
480             }
481             wrbuf_puts (wr, mt->endline_str);
482             break;
483         case YAZ_MARC_CONTROLFIELD:
484             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
485             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
486             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
487             marc_iconv_reset(mt, wr);
488             wrbuf_puts (wr, mt->endline_str);
489             break;
490         case YAZ_MARC_COMMENT:
491             wrbuf_puts(wr, "(");
492             wrbuf_iconv_write(wr, mt->iconv_cd, 
493                               n->u.comment, strlen(n->u.comment));
494             marc_iconv_reset(mt, wr);
495             wrbuf_puts(wr, ")\n");
496             break;
497         case YAZ_MARC_LEADER:
498             wrbuf_printf(wr, "%s\n", n->u.leader);
499         }
500     }
501     wrbuf_puts(wr, "\n");
502     return 0;
503 }
504
505 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
506 {
507     if (mt->enable_collection == collection_second)
508     {
509         switch(mt->xml)
510         {
511         case YAZ_MARC_MARCXML:
512             wrbuf_printf(wr, "</collection>\n");
513             break;
514         case YAZ_MARC_XCHANGE:
515             wrbuf_printf(wr, "</collection>\n");
516             break;
517         }
518     }
519     return 0;
520 }
521
522 void yaz_marc_enable_collection(yaz_marc_t mt)
523 {
524     mt->enable_collection = collection_first;
525 }
526
527 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
528 {
529     switch(mt->xml)
530     {
531     case YAZ_MARC_LINE:
532         return yaz_marc_write_line(mt, wr);
533     case YAZ_MARC_MARCXML:
534         return yaz_marc_write_marcxml(mt, wr);
535     case YAZ_MARC_XCHANGE:
536         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
537     case YAZ_MARC_ISO2709:
538         return yaz_marc_write_iso2709(mt, wr);
539     case YAZ_MARC_CHECK:
540         return yaz_marc_write_check(mt, wr);
541     }
542     return -1;
543 }
544
545 /** \brief common MARC XML/Xchange writer
546     \param mt handle
547     \param wr WRBUF output
548     \param ns XMLNS for the elements
549     \param format record format (e.g. "MARC21")
550     \param type record type (e.g. "Bibliographic")
551 */
552 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
553                                       const char *ns, 
554                                       const char *format,
555                                       const char *type)
556 {
557     struct yaz_marc_node *n;
558     int identifier_length;
559     const char *leader = 0;
560
561     for (n = mt->nodes; n; n = n->next)
562         if (n->which == YAZ_MARC_LEADER)
563         {
564             leader = n->u.leader;
565             break;
566         }
567     
568     if (!leader)
569         return -1;
570     if (!atoi_n_check(leader+11, 1, &identifier_length))
571         return -1;
572     
573     if (mt->enable_collection != no_collection)
574     {
575         if (mt->enable_collection == collection_first)
576             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
577         mt->enable_collection = collection_second;
578         wrbuf_printf(wr, "<record");
579     }
580     else
581     {
582         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
583     }
584     if (format)
585         wrbuf_printf(wr, " format=\"%.80s\"", format);
586     if (type)
587         wrbuf_printf(wr, " type=\"%.80s\"", type);
588     wrbuf_printf(wr, ">\n");
589     for (n = mt->nodes; n; n = n->next)
590     {
591         struct yaz_marc_subfield *s;
592
593         switch(n->which)
594         {
595         case YAZ_MARC_DATAFIELD:
596             wrbuf_printf(wr, "  <datafield tag=\"");
597             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
598                                     strlen(n->u.datafield.tag));
599             wrbuf_printf(wr, "\"");
600             if (n->u.datafield.indicator)
601             {
602                 int i;
603                 for (i = 0; n->u.datafield.indicator[i]; i++)
604                 {
605                     wrbuf_printf(wr, " ind%d=\"", i+1);
606                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
607                                           n->u.datafield.indicator+i, 1);
608                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
609                 }
610             }
611             wrbuf_printf(wr, ">\n");
612             for (s = n->u.datafield.subfields; s; s = s->next)
613             {
614                 /* if identifier length is 2 (most MARCs),
615                    the code is a single character .. However we've
616                    seen multibyte codes, so see how big it really is */
617                 size_t using_code_len = 
618                     (identifier_length != 2) ? identifier_length - 1
619                     :
620                     cdata_one_character(mt, s->code_data);
621                 
622                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
623                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
624                                         s->code_data, using_code_len);
625                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
626                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
627                                         s->code_data + using_code_len,
628                                         strlen(s->code_data + using_code_len));
629                 marc_iconv_reset(mt, wr);
630                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
631                 wrbuf_puts(wr, "\n");
632             }
633             wrbuf_printf(wr, "  </datafield>\n");
634             break;
635         case YAZ_MARC_CONTROLFIELD:
636             wrbuf_printf(wr, "  <controlfield tag=\"");
637             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
638                                     strlen(n->u.controlfield.tag));
639             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
640             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
641                                     n->u.controlfield.data,
642                                     strlen(n->u.controlfield.data));
643
644             marc_iconv_reset(mt, wr);
645             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
646             wrbuf_puts(wr, "\n");
647             break;
648         case YAZ_MARC_COMMENT:
649             wrbuf_printf(wr, "<!-- ");
650             wrbuf_puts(wr, n->u.comment);
651             wrbuf_printf(wr, " -->\n");
652             break;
653         case YAZ_MARC_LEADER:
654             wrbuf_printf(wr, "  <leader>");
655             wrbuf_iconv_write_cdata(wr, 
656                                     0 /* no charset conversion for leader */,
657                                     n->u.leader, strlen(n->u.leader));
658             wrbuf_printf(wr, "</leader>\n");
659         }
660     }
661     wrbuf_puts(wr, "</record>\n");
662     return 0;
663 }
664
665 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
666                                      const char *ns, 
667                                      const char *format,
668                                      const char *type)
669 {
670     if (mt->write_using_libxml2)
671     {
672 #if YAZ_HAVE_XML2
673         int ret;
674         xmlNode *root_ptr;
675
676         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
677         if (ret == 0)
678         {
679             xmlChar *buf_out;
680             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
681             int len_out;
682
683             xmlDocSetRootElement(doc, root_ptr);
684             xmlDocDumpMemory(doc, &buf_out, &len_out);
685
686             wrbuf_write(wr, (const char *) buf_out, len_out);
687             wrbuf_puts(wr, "");
688             xmlFree(buf_out);
689             xmlFreeDoc(doc);
690         }
691         return ret;
692 #else
693         return -1;
694 #endif
695     }
696     else
697         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
698 }
699
700 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
701 {
702     /* set leader 09 to 'a' for UNICODE */
703     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
704     if (!mt->leader_spec)
705         yaz_marc_modify_leader(mt, 9, "a");
706     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
707                                      0, 0);
708 }
709
710 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
711                                const char *format,
712                                const char *type)
713 {
714     return yaz_marc_write_marcxml_ns(mt, wr,
715                                      "http://www.bs.dk/standards/MarcXchange",
716                                      0, 0);
717 }
718
719
720 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
721                        const char *ns, 
722                        const char *format,
723                        const char *type)
724 {
725 #if YAZ_HAVE_XML2
726     struct yaz_marc_node *n;
727     int identifier_length;
728     const char *leader = 0;
729     xmlNode *record_ptr;
730     xmlNsPtr ns_record;
731     WRBUF wr_cdata = 0;
732
733     for (n = mt->nodes; n; n = n->next)
734         if (n->which == YAZ_MARC_LEADER)
735         {
736             leader = n->u.leader;
737             break;
738         }
739     
740     if (!leader)
741         return -1;
742     if (!atoi_n_check(leader+11, 1, &identifier_length))
743         return -1;
744
745     wr_cdata = wrbuf_alloc();
746
747     record_ptr = xmlNewNode(0, BAD_CAST "record");
748     *root_ptr = record_ptr;
749
750     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
751     xmlSetNs(record_ptr, ns_record);
752
753     if (format)
754         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
755     if (type)
756         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
757     for (n = mt->nodes; n; n = n->next)
758     {
759         struct yaz_marc_subfield *s;
760         xmlNode *ptr;
761
762         switch(n->which)
763         {
764         case YAZ_MARC_DATAFIELD:
765             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
766             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
767             if (n->u.datafield.indicator)
768             {
769                 int i;
770                 for (i = 0; n->u.datafield.indicator[i]; i++)
771                 {
772                     char ind_str[6];
773                     char ind_val[2];
774
775                     sprintf(ind_str, "ind%d", i+1);
776                     ind_val[0] = n->u.datafield.indicator[i];
777                     ind_val[1] = '\0';
778                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
779                 }
780             }
781             for (s = n->u.datafield.subfields; s; s = s->next)
782             {
783                 xmlNode *ptr_subfield;
784                 /* if identifier length is 2 (most MARCs),
785                    the code is a single character .. However we've
786                    seen multibyte codes, so see how big it really is */
787                 size_t using_code_len = 
788                     (identifier_length != 2) ? identifier_length - 1
789                     :
790                     cdata_one_character(mt, s->code_data);
791
792                 wrbuf_rewind(wr_cdata);
793                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
794                                  s->code_data + using_code_len);
795                 marc_iconv_reset(mt, wr_cdata);
796                 ptr_subfield = xmlNewTextChild(
797                     ptr, ns_record, 
798                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
799
800                 wrbuf_rewind(wr_cdata);
801                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
802                                   s->code_data, using_code_len);
803                 xmlNewProp(ptr_subfield, BAD_CAST "code",
804                            BAD_CAST wrbuf_cstr(wr_cdata));
805             }
806             break;
807         case YAZ_MARC_CONTROLFIELD:
808             wrbuf_rewind(wr_cdata);
809             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
810             marc_iconv_reset(mt, wr_cdata);
811             
812             ptr = xmlNewTextChild(record_ptr, ns_record,
813                                   BAD_CAST "controlfield",
814                                   BAD_CAST wrbuf_cstr(wr_cdata));
815             
816             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
817             break;
818         case YAZ_MARC_COMMENT:
819             ptr = xmlNewComment(BAD_CAST n->u.comment);
820             xmlAddChild(record_ptr, ptr);
821             break;
822         case YAZ_MARC_LEADER:
823             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
824                             BAD_CAST n->u.leader);
825             break;
826         }
827     }
828     wrbuf_destroy(wr_cdata);
829     return 0;
830 #else
831     return -1;
832 #endif
833 }
834
835 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
836 {
837     struct yaz_marc_node *n;
838     int indicator_length;
839     int identifier_length;
840     int length_data_entry;
841     int length_starting;
842     int length_implementation;
843     int data_offset = 0;
844     const char *leader = 0;
845     WRBUF wr_dir, wr_head, wr_data_tmp;
846     int base_address;
847     
848     for (n = mt->nodes; n; n = n->next)
849         if (n->which == YAZ_MARC_LEADER)
850             leader = n->u.leader;
851     
852     if (!leader)
853         return -1;
854     if (!atoi_n_check(leader+10, 1, &indicator_length))
855         return -1;
856     if (!atoi_n_check(leader+11, 1, &identifier_length))
857         return -1;
858     if (!atoi_n_check(leader+20, 1, &length_data_entry))
859         return -1;
860     if (!atoi_n_check(leader+21, 1, &length_starting))
861         return -1;
862     if (!atoi_n_check(leader+22, 1, &length_implementation))
863         return -1;
864
865     wr_data_tmp = wrbuf_alloc();
866     wr_dir = wrbuf_alloc();
867     for (n = mt->nodes; n; n = n->next)
868     {
869         int data_length = 0;
870         struct yaz_marc_subfield *s;
871
872         switch(n->which)
873         {
874         case YAZ_MARC_DATAFIELD:
875             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
876             data_length += indicator_length;
877             wrbuf_rewind(wr_data_tmp);
878             for (s = n->u.datafield.subfields; s; s = s->next)
879             {
880                 /* write dummy IDFS + content */
881                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
882                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
883                 marc_iconv_reset(mt, wr_data_tmp);
884             }
885             /* write dummy FS (makes MARC-8 to become ASCII) */
886             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
887             marc_iconv_reset(mt, wr_data_tmp);
888             data_length += wrbuf_len(wr_data_tmp);
889             break;
890         case YAZ_MARC_CONTROLFIELD:
891             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
892
893             wrbuf_rewind(wr_data_tmp);
894             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
895                              n->u.controlfield.data);
896             marc_iconv_reset(mt, wr_data_tmp);
897             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
898             marc_iconv_reset(mt, wr_data_tmp);
899             data_length += wrbuf_len(wr_data_tmp);
900             break;
901         case YAZ_MARC_COMMENT:
902             break;
903         case YAZ_MARC_LEADER:
904             break;
905         }
906         if (data_length)
907         {
908             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
909             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
910             data_offset += data_length;
911         }
912     }
913     /* mark end of directory */
914     wrbuf_putc(wr_dir, ISO2709_FS);
915
916     /* base address of data (comes after leader+directory) */
917     base_address = 24 + wrbuf_len(wr_dir);
918
919     wr_head = wrbuf_alloc();
920
921     /* write record length */
922     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
923     /* from "original" leader */
924     wrbuf_write(wr_head, leader+5, 7);
925     /* base address of data */
926     wrbuf_printf(wr_head, "%05d", base_address);
927     /* from "original" leader */
928     wrbuf_write(wr_head, leader+17, 7);
929     
930     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
931     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
932     wrbuf_destroy(wr_head);
933     wrbuf_destroy(wr_dir);
934     wrbuf_destroy(wr_data_tmp);
935
936     for (n = mt->nodes; n; n = n->next)
937     {
938         struct yaz_marc_subfield *s;
939
940         switch(n->which)
941         {
942         case YAZ_MARC_DATAFIELD:
943             wrbuf_printf(wr, "%.*s", indicator_length,
944                          n->u.datafield.indicator);
945             for (s = n->u.datafield.subfields; s; s = s->next)
946             {
947                 wrbuf_putc(wr, ISO2709_IDFS);
948                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
949                 marc_iconv_reset(mt, wr);
950             }
951             wrbuf_putc(wr, ISO2709_FS);
952             break;
953         case YAZ_MARC_CONTROLFIELD:
954             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
955             marc_iconv_reset(mt, wr);
956             wrbuf_putc(wr, ISO2709_FS);
957             break;
958         case YAZ_MARC_COMMENT:
959             break;
960         case YAZ_MARC_LEADER:
961             break;
962         }
963     }
964     wrbuf_printf(wr, "%c", ISO2709_RS);
965     return 0;
966 }
967
968
969 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
970 {
971     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
972     if (r <= 0)
973         return r;
974     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
975     if (s != 0)
976         return -1; /* error */
977     return r; /* OK, return length > 0 */
978 }
979
980 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
981                          const char **result, size_t *rsize)
982 {
983     int r;
984
985     wrbuf_rewind(mt->m_wr);
986     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
987     if (result)
988         *result = wrbuf_cstr(mt->m_wr);
989     if (rsize)
990         *rsize = wrbuf_len(mt->m_wr);
991     return r;
992 }
993
994 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
995 {
996     if (mt)
997         mt->xml = xmlmode;
998 }
999
1000 void yaz_marc_debug(yaz_marc_t mt, int level)
1001 {
1002     if (mt)
1003         mt->debug = level;
1004 }
1005
1006 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1007 {
1008     mt->iconv_cd = cd;
1009 }
1010
1011 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1012 {
1013     return mt->iconv_cd;
1014 }
1015
1016 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1017 {
1018     struct yaz_marc_node *n;
1019     char *leader = 0;
1020     for (n = mt->nodes; n; n = n->next)
1021         if (n->which == YAZ_MARC_LEADER)
1022         {
1023             leader = n->u.leader;
1024             memcpy(leader+off, str, strlen(str));
1025             break;
1026         }
1027 }
1028
1029 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1030 {
1031     xfree(mt->leader_spec);
1032     mt->leader_spec = 0;
1033     if (leader_spec)
1034     {
1035         char dummy_leader[24];
1036         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1037             return -1;
1038         mt->leader_spec = xstrdup(leader_spec);
1039     }
1040     return 0;
1041 }
1042
1043 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1044 {
1045     const char *cp = leader_spec;
1046     while (cp)
1047     {
1048         char val[21];
1049         int pos;
1050         int no_read = 0, no = 0;
1051
1052         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1053         if (no < 2 || no_read < 3)
1054             return -1;
1055         if (pos < 0 || pos >= size)
1056             return -1;
1057
1058         if (*val == '\'')
1059         {
1060             const char *vp = strchr(val+1, '\'');
1061             size_t len;
1062             
1063             if (!vp)
1064                 return -1;
1065             len = vp-val-1;
1066             if (len + pos > size)
1067                 return -1;
1068             memcpy(leader + pos, val+1, len);
1069         }
1070         else if (*val >= '0' && *val <= '9')
1071         {
1072             int ch = atoi(val);
1073             leader[pos] = ch;
1074         }
1075         else
1076             return -1;
1077         cp += no_read;
1078         if (*cp != ',')
1079             break;
1080
1081         cp++;
1082     }
1083     return 0;
1084 }
1085
1086 int yaz_marc_decode_formatstr(const char *arg)
1087 {
1088     int mode = -1; 
1089     if (!strcmp(arg, "marc"))
1090         mode = YAZ_MARC_ISO2709;
1091     if (!strcmp(arg, "marcxml"))
1092         mode = YAZ_MARC_MARCXML;
1093     if (!strcmp(arg, "marcxchange"))
1094         mode = YAZ_MARC_XCHANGE;
1095     if (!strcmp(arg, "line"))
1096         mode = YAZ_MARC_LINE;
1097     return mode;
1098 }
1099
1100 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1101 {
1102     mt->write_using_libxml2 = enable;
1103 }
1104
1105 /*
1106  * Local variables:
1107  * c-basic-offset: 4
1108  * indent-tabs-mode: nil
1109  * End:
1110  * vim: shiftwidth=4 tabstop=8 expandtab
1111  */
1112