1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
6 #define _FILE_OFFSET_BITS 64
13 #include <libxml/parser.h>
14 #include <libxml/tree.h>
15 #include <libxml/xpath.h>
16 #include <libxml/xpathInternals.h>
18 /* Libxml2 version < 2.6.15. xmlreader not reliable/present */
19 #if LIBXML_VERSION < 20615
20 #define USE_XMLREADER 0
22 #define USE_XMLREADER 1
26 #include <libxml/xmlreader.h>
44 #include <yaz/marcdisp.h>
45 #include <yaz/yaz-util.h>
46 #include <yaz/xmalloc.h>
47 #include <yaz/options.h>
59 static void usage(const char *prog)
61 fprintf (stderr, "Usage: %s [-i format] [-o format] [-f from] [-t to] "
62 "[-l pos=value] [-c cfile] [-s prefix] [-C size] [-n] "
63 "[-p] [-v] [-V] file...\n",
67 static void show_version(void)
69 char vstr[20], sha1_str[41];
71 yaz_version(vstr, sha1_str);
72 printf("YAZ version: %s %s\n", YAZ_VERSION, YAZ_VERSION_SHA1);
73 if (strcmp(sha1_str, YAZ_VERSION_SHA1))
74 printf("YAZ DLL/SO: %s %s\n", vstr, sha1_str);
78 static int getbyte_stream(void *client_data)
80 FILE *f = (FILE*) client_data;
88 static void ungetbyte_stream(int c, void *client_data)
90 FILE *f = (FILE*) client_data;
97 static void marcdump_read_line(yaz_marc_t mt, const char *fname)
99 FILE *inf = fopen(fname, "rb");
102 fprintf (stderr, "%s: cannot open %s:%s\n",
103 prog, fname, strerror (errno));
107 while (yaz_marc_read_line(mt, getbyte_stream,
108 ungetbyte_stream, inf) == 0)
110 WRBUF wrbuf = wrbuf_alloc();
111 yaz_marc_write_mode(mt, wrbuf);
112 fputs(wrbuf_cstr(wrbuf), stdout);
113 wrbuf_destroy(wrbuf);
119 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
121 WRBUF wrbuf = wrbuf_alloc();
123 xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */,
129 while ((ret = xmlTextReaderRead(reader)) == 1)
131 int type = xmlTextReaderNodeType(reader);
132 if (type == XML_READER_TYPE_ELEMENT)
134 const char *name = (const char *)
135 xmlTextReaderLocalName(reader);
136 if (!strcmp(name, "record"))
138 xmlNodePtr ptr = xmlTextReaderExpand(reader);
140 int r = yaz_marc_read_xml(mt, ptr);
142 fprintf(stderr, "yaz_marc_read_xml failed\n");
145 int write_rc = yaz_marc_write_mode(mt, wrbuf);
147 yaz_log(YLOG_WARN, "yaz_marc_write_mode: write error: %d", write_rc);
149 fputs(wrbuf_cstr(wrbuf), stdout);
157 xmlDocPtr doc = xmlParseFile(fname);
160 xmlNodePtr ptr = xmlDocGetRootElement(doc);
161 for (; ptr; ptr = ptr->next)
163 if (ptr->type == XML_ELEMENT_NODE)
165 if (!strcmp((const char *) ptr->name, "collection"))
170 if (!strcmp((const char *) ptr->name, "record"))
172 int r = yaz_marc_read_xml(mt, ptr);
174 fprintf(stderr, "yaz_marc_read_xml failed\n");
177 yaz_marc_write_mode(mt, wrbuf);
179 fputs(wrbuf_cstr(wrbuf), stdout);
188 fputs(wrbuf_cstr(wrbuf), stdout);
189 wrbuf_destroy(wrbuf);
193 static void dump(const char *fname, const char *from, const char *to,
194 int input_format, int output_format,
195 int write_using_libxml2,
196 int print_offset, const char *split_fname, int split_chunk,
197 int verbose, FILE *cfile, const char *leader_spec)
199 yaz_marc_t mt = yaz_marc_create();
202 if (yaz_marc_leader_spec(mt, leader_spec))
204 fprintf(stderr, "bad leader spec: %s\n", leader_spec);
205 yaz_marc_destroy(mt);
210 cd = yaz_iconv_open(to, from);
213 fprintf(stderr, "conversion from %s to %s "
214 "unsupported\n", from, to);
215 yaz_marc_destroy(mt);
218 yaz_marc_iconv(mt, cd);
220 yaz_marc_enable_collection(mt);
221 yaz_marc_set_read_format(mt, input_format);
222 yaz_marc_set_write_format(mt, output_format);
223 yaz_marc_write_using_libxml2(mt, write_using_libxml2);
224 yaz_marc_debug(mt, verbose);
226 if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TMARCXML || input_format == YAZ_MARC_XCHANGE)
229 marcdump_read_xml(mt, fname);
232 else if (input_format == YAZ_MARC_LINE)
234 marcdump_read_line(mt, fname);
236 else if (input_format == YAZ_MARC_ISO2709)
238 FILE *inf = fopen(fname, "rb");
241 int split_file_no = -1;
244 fprintf (stderr, "%s: cannot open %s:%s\n",
245 prog, fname, strerror (errno));
249 fprintf (cfile, "char *marc_records[] = {\n");
252 const char *result = 0;
259 r = fread (buf, 1, 5, inf);
262 if (r && print_offset && verbose)
263 printf ("<!-- Extra %ld bytes at end of file -->\n",
267 while (*buf < '0' || *buf > '9')
270 long off = ftell(inf) - 5;
271 if (verbose || print_offset)
272 printf("<!-- Skipping bad byte %d (0x%02X) at offset "
274 *buf & 0xff, *buf & 0xff,
276 for (i = 0; i<4; i++)
278 r = fread(buf+4, 1, 1, inf);
284 if (verbose || print_offset)
285 printf ("<!-- End of file with data -->\n");
290 long off = ftell(inf) - 5;
291 printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
294 len = atoi_n(buf, 5);
295 if (len < 25 || len > 100000)
297 long off = ftell(inf) - 5;
298 printf("Bad Length %ld read at offset %ld (%lx)\n",
299 (long)len, (long) off, (long) off);
303 r = fread (buf + 5, 1, rlen, inf);
306 while (buf[len-1] != ISO2709_RS)
308 if (len > sizeof(buf)-2)
310 r = fread (buf + len, 1, 1, inf);
318 const char *mode = 0;
320 if ((marc_no % split_chunk) == 0)
327 sprintf(fname, "%.200s%07d", split_fname, split_file_no);
328 sf = fopen(fname, mode);
331 fprintf(stderr, "Could not open %s\n", fname);
336 if (fwrite(buf, 1, len, sf) != len)
338 fprintf(stderr, "Could write content to %s\n",
346 r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
347 if (r > 0 && result && len_result)
349 if (fwrite(result, len_result, 1, stdout) != 1)
351 fprintf(stderr, "Write to stdout failed\n");
360 fprintf (cfile, ",");
361 fprintf (cfile, "\n");
362 for (i = 0; i < r; i++)
365 fprintf (cfile, " \"");
366 fprintf (cfile, "\\x%02X", p[i] & 255);
368 if (i < r - 1 && (i & 15) == 15)
369 fprintf (cfile, "\"\n");
372 fprintf (cfile, "\"\n");
379 fprintf (cfile, "};\n");
383 WRBUF wrbuf = wrbuf_alloc();
384 yaz_marc_write_trailer(mt, wrbuf);
385 fputs(wrbuf_cstr(wrbuf), stdout);
386 wrbuf_destroy(wrbuf);
390 yaz_marc_destroy(mt);
393 int main (int argc, char **argv)
396 int print_offset = 0;
400 int output_format = YAZ_MARC_LINE;
402 char *from = 0, *to = 0;
403 int input_format = YAZ_MARC_ISO2709;
405 const char *split_fname = 0;
406 const char *leader_spec = 0;
407 int write_using_libxml2 = 0;
410 setlocale(LC_CTYPE, "");
414 to = nl_langinfo(CODESET);
419 while ((r = options("i:o:C:npc:xOeXIf:t:s:l:Vv", argv, argc, &arg)) != -2)
425 input_format = yaz_marc_decode_formatstr(arg);
426 if (input_format == -1)
428 fprintf(stderr, "%s: bad input format: %s\n", prog, arg);
433 if (input_format == YAZ_MARC_MARCXML
434 || input_format == YAZ_MARC_XCHANGE)
436 fprintf(stderr, "%s: Libxml2 support not enabled\n", prog);
442 /* dirty hack so we can make Libxml2 do the writing ..
444 if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0)
447 write_using_libxml2 = 1;
449 output_format = yaz_marc_decode_formatstr(arg);
450 if (output_format == -1)
452 fprintf(stderr, "%s: bad output format: %s\n", prog, arg);
468 cfile = fopen(arg, "w");
471 fprintf(stderr, "%s: -x no longer supported. "
472 "Use -i marcxml instead\n", prog);
476 fprintf(stderr, "%s: OAI MARC no longer supported."
477 " Use MARCXML instead.\n", prog);
481 fprintf(stderr, "%s: -e no longer supported. "
482 "Use -o marcxchange instead\n", prog);
486 fprintf(stderr, "%s: -X no longer supported. "
487 "Use -o marcxml instead\n", prog);
491 fprintf(stderr, "%s: -I no longer supported. "
492 "Use -o marc instead\n", prog);
496 output_format = YAZ_MARC_CHECK;
505 split_chunk = atoi(arg);
508 dump(arg, from, to, input_format, output_format,
510 print_offset, split_fname, split_chunk,
511 verbose, cfile, leader_spec);
536 * c-file-style: "Stroustrup"
537 * indent-tabs-mode: nil
539 * vim: shiftwidth=4 tabstop=8 expandtab