1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
6 #define _FILE_OFFSET_BITS 64
13 #include <libxml/parser.h>
14 #include <libxml/tree.h>
15 #include <libxml/xpath.h>
16 #include <libxml/xpathInternals.h>
18 /* Libxml2 version < 2.6.15. xmlreader not reliable/present */
19 #if LIBXML_VERSION < 20615
20 #define USE_XMLREADER 0
22 #define USE_XMLREADER 1
26 #include <libxml/xmlreader.h>
44 #include <yaz/marcdisp.h>
46 #include <yaz/yaz-util.h>
47 #include <yaz/xmalloc.h>
48 #include <yaz/options.h>
49 #include <yaz/backtrace.h>
61 static int no_errors = 0;
63 static void usage(const char *prog)
65 fprintf(stderr, "Usage: %s [-i format] [-o format] [-f from] [-t to] "
66 "[-l pos=value] [-c cfile] [-s prefix] [-C size] [-n] "
67 "[-p] [-v] [-V] file...\n",
71 static void show_version(void)
73 char vstr[20], sha1_str[41];
75 yaz_version(vstr, sha1_str);
76 printf("YAZ version: %s %s\n", YAZ_VERSION, YAZ_VERSION_SHA1);
77 if (strcmp(sha1_str, YAZ_VERSION_SHA1))
78 printf("YAZ DLL/SO: %s %s\n", vstr, sha1_str);
82 static int getbyte_stream(void *client_data)
84 FILE *f = (FILE*) client_data;
92 static void ungetbyte_stream(int c, void *client_data)
94 FILE *f = (FILE*) client_data;
101 static void marcdump_read_line(yaz_marc_t mt, const char *fname)
103 FILE *inf = fopen(fname, "rb");
106 fprintf(stderr, "%s: cannot open %s:%s\n",
107 prog, fname, strerror(errno));
111 while (yaz_marc_read_line(mt, getbyte_stream,
112 ungetbyte_stream, inf) == 0)
114 WRBUF wrbuf = wrbuf_alloc();
115 yaz_marc_write_mode(mt, wrbuf);
116 fputs(wrbuf_cstr(wrbuf), stdout);
117 wrbuf_destroy(wrbuf);
122 static void marcdump_read_json(yaz_marc_t mt, const char *fname)
124 FILE *inf = fopen(fname, "rb");
127 fprintf(stderr, "%s: cannot open %s:%s\n",
128 prog, fname, strerror(errno));
135 WRBUF w = wrbuf_alloc();
139 while ((c = getc(inf)) != EOF)
141 n = json_parse2(wrbuf_cstr(w), &errmsg, &errpos);
144 int r = yaz_marc_read_json_node(mt, n);
148 yaz_marc_write_mode(mt, w);
149 fputs(wrbuf_cstr(w), stdout);
154 fprintf(stderr, "%s: JSON MARC parsing failed ret=%d\n", fname,
160 fprintf(stderr, "%s: JSON parse error: %s . pos=%ld\n", fname,
161 errmsg, (long) errpos);
170 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
172 WRBUF wrbuf = wrbuf_alloc();
174 xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */,
180 while ((ret = xmlTextReaderRead(reader)) == 1)
182 int type = xmlTextReaderNodeType(reader);
183 if (type == XML_READER_TYPE_ELEMENT)
185 char *name = (char *) xmlTextReaderLocalName(reader);
186 if (!strcmp(name, "record") || !strcmp(name, "r"))
188 xmlNodePtr ptr = xmlTextReaderExpand(reader);
190 int r = yaz_marc_read_xml(mt, ptr);
194 fprintf(stderr, "yaz_marc_read_xml failed\n");
198 int write_rc = yaz_marc_write_mode(mt, wrbuf);
201 yaz_log(YLOG_WARN, "yaz_marc_write_mode: "
202 "write error: %d", write_rc);
205 fputs(wrbuf_cstr(wrbuf), stdout);
212 xmlFreeTextReader(reader);
215 xmlDocPtr doc = xmlParseFile(fname);
218 xmlNodePtr ptr = xmlDocGetRootElement(doc);
219 for (; ptr; ptr = ptr->next)
221 if (ptr->type == XML_ELEMENT_NODE)
223 if (!strcmp((const char *) ptr->name, "collection"))
228 if (!strcmp((const char *) ptr->name, "record") ||
229 !strcmp((const char *) ptr->name, "r"))
231 int r = yaz_marc_read_xml(mt, ptr);
235 fprintf(stderr, "yaz_marc_read_xml failed\n");
239 yaz_marc_write_mode(mt, wrbuf);
241 fputs(wrbuf_cstr(wrbuf), stdout);
250 fputs(wrbuf_cstr(wrbuf), stdout);
251 wrbuf_destroy(wrbuf);
255 static void dump(const char *fname, const char *from, const char *to,
256 int input_format, int output_format,
257 int write_using_libxml2,
258 int print_offset, const char *split_fname, int split_chunk,
259 int verbose, FILE *cfile, const char *leader_spec)
261 yaz_marc_t mt = yaz_marc_create();
264 if (yaz_marc_leader_spec(mt, leader_spec))
266 fprintf(stderr, "bad leader spec: %s\n", leader_spec);
267 yaz_marc_destroy(mt);
272 cd = yaz_iconv_open(to, from);
275 fprintf(stderr, "conversion from %s to %s "
276 "unsupported\n", from, to);
277 yaz_marc_destroy(mt);
280 yaz_marc_iconv(mt, cd);
282 yaz_marc_enable_collection(mt);
283 yaz_marc_xml(mt, output_format);
284 yaz_marc_write_using_libxml2(mt, write_using_libxml2);
285 yaz_marc_debug(mt, verbose);
287 if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TURBOMARC || input_format == YAZ_MARC_XCHANGE)
290 marcdump_read_xml(mt, fname);
293 else if (input_format == YAZ_MARC_LINE)
295 marcdump_read_line(mt, fname);
297 else if (input_format == YAZ_MARC_JSON)
299 marcdump_read_json(mt, fname);
301 else if (input_format == YAZ_MARC_ISO2709)
303 FILE *inf = fopen(fname, "rb");
306 int split_file_no = -1;
309 fprintf(stderr, "%s: cannot open %s:%s\n",
310 prog, fname, strerror(errno));
314 fprintf(cfile, "char *marc_records[] = {\n");
317 const char *result = 0;
325 r = fread(buf, 1, 5, inf);
328 if (r == 0) /* normal EOF, all good */
330 if (print_offset && verbose)
332 printf("<!-- Extra %ld bytes at end of file -->\n",
337 while (*buf < '0' || *buf > '9')
340 long off = ftell(inf) - 5;
341 printf("<!-- Skipping bad byte %d (0x%02X) at offset "
343 *buf & 0xff, *buf & 0xff,
345 for (i = 0; i<4; i++)
347 r = fread(buf+4, 1, 1, inf);
354 if (verbose || print_offset)
355 printf("<!-- End of file with data -->\n");
360 long off = ftell(inf) - 5;
361 printf("<!-- Record %d offset %ld (0x%lx) -->\n",
364 len = atoi_n(buf, 5);
365 if (len < 25 || len > 100000)
367 long off = ftell(inf) - 5;
368 printf("<!-- Bad Length %ld read at offset %ld (%lx) -->\n",
369 (long)len, (long) off, (long) off);
374 r = fread(buf + 5, 1, rlen, inf);
377 long off = ftell(inf);
378 printf("<!-- Premature EOF at offset %ld (%lx) -->\n",
379 (long) off, (long) off);
383 while (buf[len-1] != ISO2709_RS)
385 if (len > sizeof(buf)-2)
390 r = fread(buf + len, 1, 1, inf);
397 printf("<!-- EOF while searching for RS -->\n");
404 const char *mode = 0;
406 if ((marc_no % split_chunk) == 0)
413 sprintf(fname, "%.200s%07d", split_fname, split_file_no);
414 sf = fopen(fname, mode);
417 fprintf(stderr, "Could not open %s\n", fname);
422 if (fwrite(buf, 1, len, sf) != len)
424 fprintf(stderr, "Could write content to %s\n",
434 if (yaz_marc_check_marc21_coding(from, buf, 26))
436 cd1 = yaz_iconv_open(to, "utf-8");
438 yaz_marc_iconv(mt, cd1);
440 r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
444 yaz_iconv_close(cd1);
445 yaz_marc_iconv(mt, cd);
450 if (r > 0 && result && len_result)
452 if (fwrite(result, len_result, 1, stdout) != 1)
454 fprintf(stderr, "Write to stdout failed\n");
465 fprintf(cfile, "\n");
466 for (i = 0; i < r; i++)
469 fprintf(cfile, " \"");
470 if (p[i] < 32 || p[i] > 126)
471 fprintf(cfile, "\" \"\\x%02X\" \"", p[i] & 255);
475 if (i < r - 1 && (i & 15) == 15)
476 fprintf(cfile, "\"\n");
479 fprintf(cfile, "\"\n");
486 fprintf(cfile, "};\n");
490 WRBUF wrbuf = wrbuf_alloc();
491 yaz_marc_write_trailer(mt, wrbuf);
492 fputs(wrbuf_cstr(wrbuf), stdout);
493 wrbuf_destroy(wrbuf);
497 yaz_marc_destroy(mt);
500 int main (int argc, char **argv)
503 int print_offset = 0;
507 int output_format = YAZ_MARC_LINE;
509 char *from = 0, *to = 0;
510 int input_format = YAZ_MARC_ISO2709;
512 const char *split_fname = 0;
513 const char *leader_spec = 0;
514 int write_using_libxml2 = 0;
517 setlocale(LC_CTYPE, "");
521 to = nl_langinfo(CODESET);
526 yaz_enable_panic_backtrace(prog);
527 while ((r = options("i:o:C:npc:xOeXIf:t:s:l:Vv", argv, argc, &arg)) != -2)
533 input_format = yaz_marc_decode_formatstr(arg);
534 if (input_format == -1)
536 fprintf(stderr, "%s: bad input format: %s\n", prog, arg);
541 if (input_format == YAZ_MARC_MARCXML
542 || input_format == YAZ_MARC_XCHANGE)
544 fprintf(stderr, "%s: Libxml2 support not enabled\n", prog);
550 /* dirty hack so we can make Libxml2 do the writing ..
552 if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0)
554 /* Only supported for Libxml2 2.6.0 or later */
555 #if LIBXML_VERSION >= 20600
557 write_using_libxml2 = 1;
559 fprintf(stderr, "%s: output using Libxml2 unsupported\n", prog);
563 output_format = yaz_marc_decode_formatstr(arg);
564 if (output_format == -1)
566 fprintf(stderr, "%s: bad output format: %s\n", prog, arg);
582 cfile = fopen(arg, "w");
585 fprintf(stderr, "%s: -x no longer supported. "
586 "Use -i marcxml instead\n", prog);
590 fprintf(stderr, "%s: OAI MARC no longer supported."
591 " Use MARCXML instead.\n", prog);
595 fprintf(stderr, "%s: -e no longer supported. "
596 "Use -o marcxchange instead\n", prog);
600 fprintf(stderr, "%s: -X no longer supported. "
601 "Use -o marcxml instead\n", prog);
605 fprintf(stderr, "%s: -I no longer supported. "
606 "Use -o marc instead\n", prog);
610 output_format = YAZ_MARC_CHECK;
619 split_chunk = atoi(arg);
622 dump(arg, from, to, input_format, output_format,
624 print_offset, split_fname, split_chunk,
625 verbose, cfile, leader_spec);
652 * c-file-style: "Stroustrup"
653 * indent-tabs-mode: nil
655 * vim: shiftwidth=4 tabstop=8 expandtab