a2a3250f4e2f45ee628852dc58a4177f23d9eeda
[yaz-moved-to-github.git] / src / marcdisp.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2008 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marcdisp.c
8  * \brief Implements MARC conversion utilities
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
29
30 #if YAZ_HAVE_XML2
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
33 #endif
34
35 enum yaz_collection_state {
36     no_collection,
37     collection_first,
38     collection_second
39 };
40    
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
43
44     YAZ_MARC_DATAFIELD,
45     YAZ_MARC_CONTROLFIELD,
46     YAZ_MARC_COMMENT,
47     YAZ_MARC_LEADER
48 };
49
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
52     char *tag;
53     char *indicator;
54     struct yaz_marc_subfield *subfields;
55 };
56
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
59     char *tag;
60     char *data;
61 };
62
63 /** \brief a comment node */
64 struct yaz_marc_comment {
65     char *comment;
66 };
67
68 /** \brief MARC node */
69 struct yaz_marc_node {
70     enum YAZ_MARC_NODE_TYPE which;
71     union {
72         struct yaz_marc_datafield datafield;
73         struct yaz_marc_controlfield controlfield;
74         char *comment;
75         char *leader;
76     } u;
77     struct yaz_marc_node *next;
78 };
79
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
82     char *code_data;
83     struct yaz_marc_subfield *next;
84 };
85
86 /** \brief the internals of a yaz_marc_t handle */
87 struct yaz_marc_t_ {
88     WRBUF m_wr;
89     NMEM nmem;
90     int xml;
91     int debug;
92     int write_using_libxml2;
93     enum yaz_collection_state enable_collection;
94     yaz_iconv_t iconv_cd;
95     char subfield_str[8];
96     char endline_str[8];
97     char *leader_spec;
98     struct yaz_marc_node *nodes;
99     struct yaz_marc_node **nodes_pp;
100     struct yaz_marc_subfield **subfield_pp;
101 };
102
103 yaz_marc_t yaz_marc_create(void)
104 {
105     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
106     mt->xml = YAZ_MARC_LINE;
107     mt->debug = 0;
108     mt->write_using_libxml2 = 0;
109     mt->enable_collection = no_collection;
110     mt->m_wr = wrbuf_alloc();
111     mt->iconv_cd = 0;
112     mt->leader_spec = 0;
113     strcpy(mt->subfield_str, " $");
114     strcpy(mt->endline_str, "\n");
115
116     mt->nmem = nmem_create();
117     yaz_marc_reset(mt);
118     return mt;
119 }
120
121 void yaz_marc_destroy(yaz_marc_t mt)
122 {
123     if (!mt)
124         return ;
125     nmem_destroy(mt->nmem);
126     wrbuf_destroy(mt->m_wr);
127     xfree(mt->leader_spec);
128     xfree(mt);
129 }
130
131 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
132 {
133     return mt->nmem;
134 }
135
136 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 {
138     wrbuf_iconv_reset(wr, mt->iconv_cd);
139 }
140
141 static int marc_exec_leader(const char *leader_spec, char *leader,
142                             size_t size);
143
144
145 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
146 {
147     struct yaz_marc_node *n = (struct yaz_marc_node *)
148         nmem_malloc(mt->nmem, sizeof(*n));
149     n->next = 0;
150     *mt->nodes_pp = n;
151     mt->nodes_pp = &n->next;
152     return n;
153 }
154
155 #if YAZ_HAVE_XML2
156 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
157                                    const xmlNode *ptr_data)
158 {
159     struct yaz_marc_node *n = yaz_marc_add_node(mt);
160     n->which = YAZ_MARC_CONTROLFIELD;
161     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
162     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
163 }
164 #endif
165
166
167 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
168 {
169     struct yaz_marc_node *n = yaz_marc_add_node(mt);
170     n->which = YAZ_MARC_COMMENT;
171     n->u.comment = nmem_strdup(mt->nmem, comment);
172 }
173
174 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
175 {
176     va_list ap;
177     char buf[200];
178
179     va_start(ap, fmt);
180     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
181     yaz_marc_add_comment(mt, buf);
182     va_end (ap);
183 }
184
185 int yaz_marc_get_debug(yaz_marc_t mt)
186 {
187     return mt->debug;
188 }
189
190 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
191 {
192     struct yaz_marc_node *n = yaz_marc_add_node(mt);
193     n->which = YAZ_MARC_LEADER;
194     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
195     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
196 }
197
198 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
199                                const char *data, size_t data_len)
200 {
201     struct yaz_marc_node *n = yaz_marc_add_node(mt);
202     n->which = YAZ_MARC_CONTROLFIELD;
203     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
204     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
205     if (mt->debug)
206     {
207         size_t i;
208         char msg[80];
209
210         sprintf(msg, "controlfield:");
211         for (i = 0; i < 16 && i < data_len; i++)
212             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
213         if (i < data_len)
214             sprintf(msg + strlen(msg), " ..");
215         yaz_marc_add_comment(mt, msg);
216     }
217 }
218
219 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
220                             const char *indicator, size_t indicator_len)
221 {
222     struct yaz_marc_node *n = yaz_marc_add_node(mt);
223     n->which = YAZ_MARC_DATAFIELD;
224     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
225     n->u.datafield.indicator =
226         nmem_strdupn(mt->nmem, indicator, indicator_len);
227     n->u.datafield.subfields = 0;
228
229     /* make subfield_pp the current (last one) */
230     mt->subfield_pp = &n->u.datafield.subfields;
231 }
232
233 #if YAZ_HAVE_XML2
234 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
235                                 const char *indicator, size_t indicator_len)
236 {
237     struct yaz_marc_node *n = yaz_marc_add_node(mt);
238     n->which = YAZ_MARC_DATAFIELD;
239     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
240     n->u.datafield.indicator =
241         nmem_strdupn(mt->nmem, indicator, indicator_len);
242     n->u.datafield.subfields = 0;
243
244     /* make subfield_pp the current (last one) */
245     mt->subfield_pp = &n->u.datafield.subfields;
246 }
247 #endif
248
249 void yaz_marc_add_subfield(yaz_marc_t mt,
250                            const char *code_data, size_t code_data_len)
251 {
252     if (mt->debug)
253     {
254         size_t i;
255         char msg[80];
256
257         sprintf(msg, "subfield:");
258         for (i = 0; i < 16 && i < code_data_len; i++)
259             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
260         if (i < code_data_len)
261             sprintf(msg + strlen(msg), " ..");
262         yaz_marc_add_comment(mt, msg);
263     }
264
265     if (mt->subfield_pp)
266     {
267         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
268             nmem_malloc(mt->nmem, sizeof(*n));
269         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
270         n->next = 0;
271         /* mark subfield_pp to point to this one, so we append here next */
272         *mt->subfield_pp = n;
273         mt->subfield_pp = &n->next;
274     }
275 }
276
277 int atoi_n_check(const char *buf, int size, int *val)
278 {
279     int i;
280     for (i = 0; i < size; i++)
281         if (!isdigit(i[(const unsigned char *) buf]))
282             return 0;
283     *val = atoi_n(buf, size);
284     return 1;
285 }
286
287 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
288                          int *indicator_length,
289                          int *identifier_length,
290                          int *base_address,
291                          int *length_data_entry,
292                          int *length_starting,
293                          int *length_implementation)
294 {
295     char leader[24];
296
297     memcpy(leader, leader_c, 24);
298
299     if (!atoi_n_check(leader+10, 1, indicator_length))
300     {
301         yaz_marc_cprintf(mt, 
302                          "Indicator length at offset 10 should hold a digit."
303                          " Assuming 2");
304         leader[10] = '2';
305         *indicator_length = 2;
306     }
307     if (!atoi_n_check(leader+11, 1, identifier_length))
308     {
309         yaz_marc_cprintf(mt, 
310                          "Identifier length at offset 11 should hold a digit."
311                          " Assuming 2");
312         leader[11] = '2';
313         *identifier_length = 2;
314     }
315     if (!atoi_n_check(leader+12, 5, base_address))
316     {
317         yaz_marc_cprintf(mt, 
318                          "Base address at offsets 12..16 should hold a number."
319                          " Assuming 0");
320         *base_address = 0;
321     }
322     if (!atoi_n_check(leader+20, 1, length_data_entry))
323     {
324         yaz_marc_cprintf(mt, 
325                          "Length data entry at offset 20 should hold a digit."
326                          " Assuming 4");
327         *length_data_entry = 4;
328         leader[20] = '4';
329     }
330     if (!atoi_n_check(leader+21, 1, length_starting))
331     {
332         yaz_marc_cprintf(mt,
333                          "Length starting at offset 21 should hold a digit."
334                          " Assuming 5");
335         *length_starting = 5;
336         leader[21] = '5';
337     }
338     if (!atoi_n_check(leader+22, 1, length_implementation))
339     {
340         yaz_marc_cprintf(mt, 
341                          "Length implementation at offset 22 should hold a digit."
342                          " Assuming 0");
343         *length_implementation = 0;
344         leader[22] = '0';
345     }
346
347     if (mt->debug)
348     {
349         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
350         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
351         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
352         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
353         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
354         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
355     }
356     yaz_marc_add_leader(mt, leader, 24);
357 }
358
359 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
360 {
361     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
362     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
363 }
364
365 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
366 {
367     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
368     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
369 }
370
371 /* try to guess how many bytes the identifier really is! */
372 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
373 {
374     if (mt->iconv_cd)
375     {
376         size_t i;
377         for (i = 1; i<5; i++)
378         {
379             char outbuf[12];
380             size_t outbytesleft = sizeof(outbuf);
381             char *outp = outbuf;
382             const char *inp = buf;
383
384             size_t inbytesleft = i;
385             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
386                                  &outp, &outbytesleft);
387             if (r != (size_t) (-1))
388                 return i;  /* got a complete sequence */
389         }
390         return 1; /* giving up */
391     }
392     return 1; /* we don't know */
393 }
394                               
395 void yaz_marc_reset(yaz_marc_t mt)
396 {
397     nmem_reset(mt->nmem);
398     mt->nodes = 0;
399     mt->nodes_pp = &mt->nodes;
400     mt->subfield_pp = 0;
401 }
402
403 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
404 {
405     struct yaz_marc_node *n;
406     int identifier_length;
407     const char *leader = 0;
408
409     for (n = mt->nodes; n; n = n->next)
410         if (n->which == YAZ_MARC_LEADER)
411         {
412             leader = n->u.leader;
413             break;
414         }
415     
416     if (!leader)
417         return -1;
418     if (!atoi_n_check(leader+11, 1, &identifier_length))
419         return -1;
420
421     for (n = mt->nodes; n; n = n->next)
422     {
423         switch(n->which)
424         {
425         case YAZ_MARC_COMMENT:
426             wrbuf_iconv_write(wr, mt->iconv_cd, 
427                               n->u.comment, strlen(n->u.comment));
428             wrbuf_puts(wr, ")\n");
429             break;
430         default:
431             break;
432         }
433     }
434     return 0;
435 }
436
437 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
438                                int identifier_length)
439 {
440     /* if identifier length is 2 (most MARCs) or less (probably an error),
441        the code is a single character .. However we've
442        seen multibyte codes, so see how big it really is */
443     if (identifier_length > 2)
444         return identifier_length - 1;
445     else
446         return cdata_one_character(mt, data);
447 }
448
449 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
450 {
451     struct yaz_marc_node *n;
452     int identifier_length;
453     const char *leader = 0;
454
455     for (n = mt->nodes; n; n = n->next)
456         if (n->which == YAZ_MARC_LEADER)
457         {
458             leader = n->u.leader;
459             break;
460         }
461     
462     if (!leader)
463         return -1;
464     if (!atoi_n_check(leader+11, 1, &identifier_length))
465         return -1;
466
467     for (n = mt->nodes; n; n = n->next)
468     {
469         struct yaz_marc_subfield *s;
470         switch(n->which)
471         {
472         case YAZ_MARC_DATAFIELD:
473             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
474                          n->u.datafield.indicator);
475             for (s = n->u.datafield.subfields; s; s = s->next)
476             {
477                 size_t using_code_len = get_subfield_len(mt, s->code_data,
478                                                          identifier_length);
479                 
480                 wrbuf_puts (wr, mt->subfield_str); 
481                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
482                                   using_code_len);
483                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
484                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
485                                  s->code_data + using_code_len);
486                 marc_iconv_reset(mt, wr);
487             }
488             wrbuf_puts (wr, mt->endline_str);
489             break;
490         case YAZ_MARC_CONTROLFIELD:
491             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
492             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
493             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
494             marc_iconv_reset(mt, wr);
495             wrbuf_puts (wr, mt->endline_str);
496             break;
497         case YAZ_MARC_COMMENT:
498             wrbuf_puts(wr, "(");
499             wrbuf_iconv_write(wr, mt->iconv_cd, 
500                               n->u.comment, strlen(n->u.comment));
501             marc_iconv_reset(mt, wr);
502             wrbuf_puts(wr, ")\n");
503             break;
504         case YAZ_MARC_LEADER:
505             wrbuf_printf(wr, "%s\n", n->u.leader);
506         }
507     }
508     wrbuf_puts(wr, "\n");
509     return 0;
510 }
511
512 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
513 {
514     if (mt->enable_collection == collection_second)
515     {
516         switch(mt->xml)
517         {
518         case YAZ_MARC_MARCXML:
519             wrbuf_printf(wr, "</collection>\n");
520             break;
521         case YAZ_MARC_XCHANGE:
522             wrbuf_printf(wr, "</collection>\n");
523             break;
524         }
525     }
526     return 0;
527 }
528
529 void yaz_marc_enable_collection(yaz_marc_t mt)
530 {
531     mt->enable_collection = collection_first;
532 }
533
534 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
535 {
536     switch(mt->xml)
537     {
538     case YAZ_MARC_LINE:
539         return yaz_marc_write_line(mt, wr);
540     case YAZ_MARC_MARCXML:
541         return yaz_marc_write_marcxml(mt, wr);
542     case YAZ_MARC_XCHANGE:
543         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
544     case YAZ_MARC_ISO2709:
545         return yaz_marc_write_iso2709(mt, wr);
546     case YAZ_MARC_CHECK:
547         return yaz_marc_write_check(mt, wr);
548     }
549     return -1;
550 }
551
552 /** \brief common MARC XML/Xchange writer
553     \param mt handle
554     \param wr WRBUF output
555     \param ns XMLNS for the elements
556     \param format record format (e.g. "MARC21")
557     \param type record type (e.g. "Bibliographic")
558 */
559 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
560                                       const char *ns, 
561                                       const char *format,
562                                       const char *type)
563 {
564     struct yaz_marc_node *n;
565     int identifier_length;
566     const char *leader = 0;
567
568     for (n = mt->nodes; n; n = n->next)
569         if (n->which == YAZ_MARC_LEADER)
570         {
571             leader = n->u.leader;
572             break;
573         }
574     
575     if (!leader)
576         return -1;
577     if (!atoi_n_check(leader+11, 1, &identifier_length))
578         return -1;
579     
580     if (mt->enable_collection != no_collection)
581     {
582         if (mt->enable_collection == collection_first)
583             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
584         mt->enable_collection = collection_second;
585         wrbuf_printf(wr, "<record");
586     }
587     else
588     {
589         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
590     }
591     if (format)
592         wrbuf_printf(wr, " format=\"%.80s\"", format);
593     if (type)
594         wrbuf_printf(wr, " type=\"%.80s\"", type);
595     wrbuf_printf(wr, ">\n");
596     for (n = mt->nodes; n; n = n->next)
597     {
598         struct yaz_marc_subfield *s;
599
600         switch(n->which)
601         {
602         case YAZ_MARC_DATAFIELD:
603             wrbuf_printf(wr, "  <datafield tag=\"");
604             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
605                                     strlen(n->u.datafield.tag));
606             wrbuf_printf(wr, "\"");
607             if (n->u.datafield.indicator)
608             {
609                 int i;
610                 for (i = 0; n->u.datafield.indicator[i]; i++)
611                 {
612                     wrbuf_printf(wr, " ind%d=\"", i+1);
613                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
614                                           n->u.datafield.indicator+i, 1);
615                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
616                 }
617             }
618             wrbuf_printf(wr, ">\n");
619             for (s = n->u.datafield.subfields; s; s = s->next)
620             {
621                 size_t using_code_len = get_subfield_len(mt, s->code_data,
622                                                          identifier_length);
623                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
624                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
625                                         s->code_data, using_code_len);
626                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
627                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
628                                         s->code_data + using_code_len,
629                                         strlen(s->code_data + using_code_len));
630                 marc_iconv_reset(mt, wr);
631                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
632                 wrbuf_puts(wr, "\n");
633             }
634             wrbuf_printf(wr, "  </datafield>\n");
635             break;
636         case YAZ_MARC_CONTROLFIELD:
637             wrbuf_printf(wr, "  <controlfield tag=\"");
638             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
639                                     strlen(n->u.controlfield.tag));
640             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
641             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
642                                     n->u.controlfield.data,
643                                     strlen(n->u.controlfield.data));
644
645             marc_iconv_reset(mt, wr);
646             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
647             wrbuf_puts(wr, "\n");
648             break;
649         case YAZ_MARC_COMMENT:
650             wrbuf_printf(wr, "<!-- ");
651             wrbuf_puts(wr, n->u.comment);
652             wrbuf_printf(wr, " -->\n");
653             break;
654         case YAZ_MARC_LEADER:
655             wrbuf_printf(wr, "  <leader>");
656             wrbuf_iconv_write_cdata(wr, 
657                                     0 /* no charset conversion for leader */,
658                                     n->u.leader, strlen(n->u.leader));
659             wrbuf_printf(wr, "</leader>\n");
660         }
661     }
662     wrbuf_puts(wr, "</record>\n");
663     return 0;
664 }
665
666 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
667                                      const char *ns, 
668                                      const char *format,
669                                      const char *type)
670 {
671     if (mt->write_using_libxml2)
672     {
673 #if YAZ_HAVE_XML2
674         int ret;
675         xmlNode *root_ptr;
676
677         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
678         if (ret == 0)
679         {
680             xmlChar *buf_out;
681             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
682             int len_out;
683
684             xmlDocSetRootElement(doc, root_ptr);
685             xmlDocDumpMemory(doc, &buf_out, &len_out);
686
687             wrbuf_write(wr, (const char *) buf_out, len_out);
688             wrbuf_puts(wr, "");
689             xmlFree(buf_out);
690             xmlFreeDoc(doc);
691         }
692         return ret;
693 #else
694         return -1;
695 #endif
696     }
697     else
698         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
699 }
700
701 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
702 {
703     /* set leader 09 to 'a' for UNICODE */
704     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
705     if (!mt->leader_spec)
706         yaz_marc_modify_leader(mt, 9, "a");
707     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
708                                      0, 0);
709 }
710
711 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
712                                const char *format,
713                                const char *type)
714 {
715     return yaz_marc_write_marcxml_ns(mt, wr,
716                                      "http://www.bs.dk/standards/MarcXchange",
717                                      0, 0);
718 }
719
720
721 #if YAZ_HAVE_XML2
722 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
723                        const char *ns, 
724                        const char *format,
725                        const char *type)
726 {
727     struct yaz_marc_node *n;
728     int identifier_length;
729     const char *leader = 0;
730     xmlNode *record_ptr;
731     xmlNsPtr ns_record;
732     WRBUF wr_cdata = 0;
733
734     for (n = mt->nodes; n; n = n->next)
735         if (n->which == YAZ_MARC_LEADER)
736         {
737             leader = n->u.leader;
738             break;
739         }
740     
741     if (!leader)
742         return -1;
743     if (!atoi_n_check(leader+11, 1, &identifier_length))
744         return -1;
745
746     wr_cdata = wrbuf_alloc();
747
748     record_ptr = xmlNewNode(0, BAD_CAST "record");
749     *root_ptr = record_ptr;
750
751     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
752     xmlSetNs(record_ptr, ns_record);
753
754     if (format)
755         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
756     if (type)
757         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
758     for (n = mt->nodes; n; n = n->next)
759     {
760         struct yaz_marc_subfield *s;
761         xmlNode *ptr;
762
763         switch(n->which)
764         {
765         case YAZ_MARC_DATAFIELD:
766             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
767             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
768             if (n->u.datafield.indicator)
769             {
770                 int i;
771                 for (i = 0; n->u.datafield.indicator[i]; i++)
772                 {
773                     char ind_str[6];
774                     char ind_val[2];
775
776                     sprintf(ind_str, "ind%d", i+1);
777                     ind_val[0] = n->u.datafield.indicator[i];
778                     ind_val[1] = '\0';
779                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
780                 }
781             }
782             for (s = n->u.datafield.subfields; s; s = s->next)
783             {
784                 xmlNode *ptr_subfield;
785                 size_t using_code_len = get_subfield_len(mt, s->code_data,
786                                                          identifier_length);
787                 wrbuf_rewind(wr_cdata);
788                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
789                                  s->code_data + using_code_len);
790                 marc_iconv_reset(mt, wr_cdata);
791                 ptr_subfield = xmlNewTextChild(
792                     ptr, ns_record, 
793                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
794
795                 wrbuf_rewind(wr_cdata);
796                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
797                                   s->code_data, using_code_len);
798                 xmlNewProp(ptr_subfield, BAD_CAST "code",
799                            BAD_CAST wrbuf_cstr(wr_cdata));
800             }
801             break;
802         case YAZ_MARC_CONTROLFIELD:
803             wrbuf_rewind(wr_cdata);
804             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
805             marc_iconv_reset(mt, wr_cdata);
806             
807             ptr = xmlNewTextChild(record_ptr, ns_record,
808                                   BAD_CAST "controlfield",
809                                   BAD_CAST wrbuf_cstr(wr_cdata));
810             
811             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
812             break;
813         case YAZ_MARC_COMMENT:
814             ptr = xmlNewComment(BAD_CAST n->u.comment);
815             xmlAddChild(record_ptr, ptr);
816             break;
817         case YAZ_MARC_LEADER:
818             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
819                             BAD_CAST n->u.leader);
820             break;
821         }
822     }
823     wrbuf_destroy(wr_cdata);
824     return 0;
825 }
826 #endif
827
828 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
829 {
830     struct yaz_marc_node *n;
831     int indicator_length;
832     int identifier_length;
833     int length_data_entry;
834     int length_starting;
835     int length_implementation;
836     int data_offset = 0;
837     const char *leader = 0;
838     WRBUF wr_dir, wr_head, wr_data_tmp;
839     int base_address;
840     
841     for (n = mt->nodes; n; n = n->next)
842         if (n->which == YAZ_MARC_LEADER)
843             leader = n->u.leader;
844     
845     if (!leader)
846         return -1;
847     if (!atoi_n_check(leader+10, 1, &indicator_length))
848         return -1;
849     if (!atoi_n_check(leader+11, 1, &identifier_length))
850         return -1;
851     if (!atoi_n_check(leader+20, 1, &length_data_entry))
852         return -1;
853     if (!atoi_n_check(leader+21, 1, &length_starting))
854         return -1;
855     if (!atoi_n_check(leader+22, 1, &length_implementation))
856         return -1;
857
858     wr_data_tmp = wrbuf_alloc();
859     wr_dir = wrbuf_alloc();
860     for (n = mt->nodes; n; n = n->next)
861     {
862         int data_length = 0;
863         struct yaz_marc_subfield *s;
864
865         switch(n->which)
866         {
867         case YAZ_MARC_DATAFIELD:
868             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
869             data_length += indicator_length;
870             wrbuf_rewind(wr_data_tmp);
871             for (s = n->u.datafield.subfields; s; s = s->next)
872             {
873                 /* write dummy IDFS + content */
874                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
875                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
876                 marc_iconv_reset(mt, wr_data_tmp);
877             }
878             /* write dummy FS (makes MARC-8 to become ASCII) */
879             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
880             marc_iconv_reset(mt, wr_data_tmp);
881             data_length += wrbuf_len(wr_data_tmp);
882             break;
883         case YAZ_MARC_CONTROLFIELD:
884             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
885
886             wrbuf_rewind(wr_data_tmp);
887             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
888                              n->u.controlfield.data);
889             marc_iconv_reset(mt, wr_data_tmp);
890             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
891             marc_iconv_reset(mt, wr_data_tmp);
892             data_length += wrbuf_len(wr_data_tmp);
893             break;
894         case YAZ_MARC_COMMENT:
895             break;
896         case YAZ_MARC_LEADER:
897             break;
898         }
899         if (data_length)
900         {
901             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
902             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
903             data_offset += data_length;
904         }
905     }
906     /* mark end of directory */
907     wrbuf_putc(wr_dir, ISO2709_FS);
908
909     /* base address of data (comes after leader+directory) */
910     base_address = 24 + wrbuf_len(wr_dir);
911
912     wr_head = wrbuf_alloc();
913
914     /* write record length */
915     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
916     /* from "original" leader */
917     wrbuf_write(wr_head, leader+5, 7);
918     /* base address of data */
919     wrbuf_printf(wr_head, "%05d", base_address);
920     /* from "original" leader */
921     wrbuf_write(wr_head, leader+17, 7);
922     
923     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
924     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
925     wrbuf_destroy(wr_head);
926     wrbuf_destroy(wr_dir);
927     wrbuf_destroy(wr_data_tmp);
928
929     for (n = mt->nodes; n; n = n->next)
930     {
931         struct yaz_marc_subfield *s;
932
933         switch(n->which)
934         {
935         case YAZ_MARC_DATAFIELD:
936             wrbuf_printf(wr, "%.*s", indicator_length,
937                          n->u.datafield.indicator);
938             for (s = n->u.datafield.subfields; s; s = s->next)
939             {
940                 wrbuf_putc(wr, ISO2709_IDFS);
941                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
942                 marc_iconv_reset(mt, wr);
943             }
944             wrbuf_putc(wr, ISO2709_FS);
945             break;
946         case YAZ_MARC_CONTROLFIELD:
947             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
948             marc_iconv_reset(mt, wr);
949             wrbuf_putc(wr, ISO2709_FS);
950             break;
951         case YAZ_MARC_COMMENT:
952             break;
953         case YAZ_MARC_LEADER:
954             break;
955         }
956     }
957     wrbuf_printf(wr, "%c", ISO2709_RS);
958     return 0;
959 }
960
961
962 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
963 {
964     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
965     if (r <= 0)
966         return r;
967     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
968     if (s != 0)
969         return -1; /* error */
970     return r; /* OK, return length > 0 */
971 }
972
973 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
974                          const char **result, size_t *rsize)
975 {
976     int r;
977
978     wrbuf_rewind(mt->m_wr);
979     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
980     if (result)
981         *result = wrbuf_cstr(mt->m_wr);
982     if (rsize)
983         *rsize = wrbuf_len(mt->m_wr);
984     return r;
985 }
986
987 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
988 {
989     if (mt)
990         mt->xml = xmlmode;
991 }
992
993 void yaz_marc_debug(yaz_marc_t mt, int level)
994 {
995     if (mt)
996         mt->debug = level;
997 }
998
999 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1000 {
1001     mt->iconv_cd = cd;
1002 }
1003
1004 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1005 {
1006     return mt->iconv_cd;
1007 }
1008
1009 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1010 {
1011     struct yaz_marc_node *n;
1012     char *leader = 0;
1013     for (n = mt->nodes; n; n = n->next)
1014         if (n->which == YAZ_MARC_LEADER)
1015         {
1016             leader = n->u.leader;
1017             memcpy(leader+off, str, strlen(str));
1018             break;
1019         }
1020 }
1021
1022 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1023 {
1024     xfree(mt->leader_spec);
1025     mt->leader_spec = 0;
1026     if (leader_spec)
1027     {
1028         char dummy_leader[24];
1029         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1030             return -1;
1031         mt->leader_spec = xstrdup(leader_spec);
1032     }
1033     return 0;
1034 }
1035
1036 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1037 {
1038     const char *cp = leader_spec;
1039     while (cp)
1040     {
1041         char val[21];
1042         int pos;
1043         int no_read = 0, no = 0;
1044
1045         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1046         if (no < 2 || no_read < 3)
1047             return -1;
1048         if (pos < 0 || (size_t) pos >= size)
1049             return -1;
1050
1051         if (*val == '\'')
1052         {
1053             const char *vp = strchr(val+1, '\'');
1054             size_t len;
1055             
1056             if (!vp)
1057                 return -1;
1058             len = vp-val-1;
1059             if (len + pos > size)
1060                 return -1;
1061             memcpy(leader + pos, val+1, len);
1062         }
1063         else if (*val >= '0' && *val <= '9')
1064         {
1065             int ch = atoi(val);
1066             leader[pos] = ch;
1067         }
1068         else
1069             return -1;
1070         cp += no_read;
1071         if (*cp != ',')
1072             break;
1073
1074         cp++;
1075     }
1076     return 0;
1077 }
1078
1079 int yaz_marc_decode_formatstr(const char *arg)
1080 {
1081     int mode = -1; 
1082     if (!strcmp(arg, "marc"))
1083         mode = YAZ_MARC_ISO2709;
1084     if (!strcmp(arg, "marcxml"))
1085         mode = YAZ_MARC_MARCXML;
1086     if (!strcmp(arg, "marcxchange"))
1087         mode = YAZ_MARC_XCHANGE;
1088     if (!strcmp(arg, "line"))
1089         mode = YAZ_MARC_LINE;
1090     return mode;
1091 }
1092
1093 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1094 {
1095     mt->write_using_libxml2 = enable;
1096 }
1097
1098 /*
1099  * Local variables:
1100  * c-basic-offset: 4
1101  * indent-tabs-mode: nil
1102  * End:
1103  * vim: shiftwidth=4 tabstop=8 expandtab
1104  */
1105