2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdump.c,v 1.55 2007-12-18 21:13:06 adam Exp $
8 #define _FILE_OFFSET_BITS 64
15 #include <libxml/parser.h>
16 #include <libxml/tree.h>
17 #include <libxml/xpath.h>
18 #include <libxml/xpathInternals.h>
20 /* Libxml2 version < 2.6.15. xmlreader not reliable/present */
21 #if LIBXML_VERSION < 20615
22 #define USE_XMLREADER 0
24 #define USE_XMLREADER 1
28 #include <libxml/xmlreader.h>
46 #include <yaz/marcdisp.h>
47 #include <yaz/yaz-util.h>
48 #include <yaz/xmalloc.h>
49 #include <yaz/options.h>
61 static void usage(const char *prog)
63 fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] "
64 "[-i format] [-o format] "
65 "[-n] [-l pos=value] [-v] [-C chunk] [-s splitfname] [-p] file...\n",
69 static int getbyte_stream(void *client_data)
71 FILE *f = (FILE*) client_data;
79 static void ungetbyte_stream(int c, void *client_data)
81 FILE *f = (FILE*) client_data;
88 static void marcdump_read_line(yaz_marc_t mt, const char *fname)
90 FILE *inf = fopen(fname, "rb");
93 fprintf (stderr, "%s: cannot open %s:%s\n",
94 prog, fname, strerror (errno));
98 while (yaz_marc_read_line(mt, getbyte_stream,
99 ungetbyte_stream, inf) == 0)
101 WRBUF wrbuf = wrbuf_alloc();
102 yaz_marc_write_mode(mt, wrbuf);
103 fputs(wrbuf_cstr(wrbuf), stdout);
104 wrbuf_destroy(wrbuf);
107 WRBUF wrbuf = wrbuf_alloc();
108 yaz_marc_write_trailer(mt, wrbuf);
109 fputs(wrbuf_cstr(wrbuf), stdout);
110 wrbuf_destroy(wrbuf);
116 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
118 WRBUF wrbuf = wrbuf_alloc();
120 xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */,
126 while ((ret = xmlTextReaderRead(reader)) == 1)
128 int type = xmlTextReaderNodeType(reader);
129 if (type == XML_READER_TYPE_ELEMENT)
131 const char *name = (const char *)
132 xmlTextReaderConstName(reader);
133 if (!strcmp(name, "record"))
135 xmlNodePtr ptr = xmlTextReaderExpand(reader);
137 int r = yaz_marc_read_xml(mt, ptr);
139 fprintf(stderr, "yaz_marc_read_xml failed\n");
142 yaz_marc_write_mode(mt, wrbuf);
144 fputs(wrbuf_cstr(wrbuf), stdout);
150 yaz_marc_write_trailer(mt, wrbuf);
151 fputs(wrbuf_cstr(wrbuf), stdout);
154 xmlDocPtr doc = xmlParseFile(fname);
157 xmlNodePtr ptr = xmlDocGetRootElement(doc);
158 for (; ptr; ptr = ptr->next)
160 if (ptr->type == XML_ELEMENT_NODE)
162 if (!strcmp((const char *) ptr->name, "collection"))
167 if (!strcmp((const char *) ptr->name, "record"))
169 int r = yaz_marc_read_xml(mt, ptr);
171 fprintf(stderr, "yaz_marc_read_xml failed\n");
174 yaz_marc_write_mode(mt, wrbuf);
176 fputs(wrbuf_cstr(wrbuf), stdout);
185 yaz_marc_write_trailer(mt, wrbuf);
186 fputs(wrbuf_cstr(wrbuf), stdout);
187 wrbuf_destroy(wrbuf);
191 static void dump(const char *fname, const char *from, const char *to,
192 int input_format, int output_format,
193 int write_using_libxml2,
194 int print_offset, const char *split_fname, int split_chunk,
195 int verbose, FILE *cfile, const char *leader_spec)
197 yaz_marc_t mt = yaz_marc_create();
200 if (yaz_marc_leader_spec(mt, leader_spec))
202 fprintf(stderr, "bad leader spec: %s\n", leader_spec);
203 yaz_marc_destroy(mt);
208 cd = yaz_iconv_open(to, from);
211 fprintf(stderr, "conversion from %s to %s "
212 "unsupported\n", from, to);
213 yaz_marc_destroy(mt);
216 yaz_marc_iconv(mt, cd);
218 yaz_marc_xml(mt, output_format);
219 yaz_marc_enable_collection(mt);
220 yaz_marc_write_using_libxml2(mt, write_using_libxml2);
221 yaz_marc_debug(mt, verbose);
223 if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_XCHANGE)
226 marcdump_read_xml(mt, fname);
229 else if (input_format == YAZ_MARC_LINE)
231 marcdump_read_line(mt, fname);
233 else if (input_format == YAZ_MARC_ISO2709)
235 FILE *inf = fopen(fname, "rb");
238 int split_file_no = -1;
241 fprintf (stderr, "%s: cannot open %s:%s\n",
242 prog, fname, strerror (errno));
246 fprintf (cfile, "char *marc_records[] = {\n");
249 const char *result = 0;
256 r = fread (buf, 1, 5, inf);
259 if (r && print_offset && verbose)
260 printf ("<!-- Extra %ld bytes at end of file -->\n",
264 while (*buf < '0' || *buf > '9')
267 long off = ftell(inf) - 5;
268 if (verbose || print_offset)
269 printf("<!-- Skipping bad byte %d (0x%02X) at offset "
271 *buf & 0xff, *buf & 0xff,
273 for (i = 0; i<4; i++)
275 r = fread(buf+4, 1, 1, inf);
281 if (verbose || print_offset)
282 printf ("<!-- End of file with data -->\n");
287 long off = ftell(inf) - 5;
288 printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
291 len = atoi_n(buf, 5);
292 if (len < 25 || len > 100000)
294 long off = ftell(inf) - 5;
295 printf("Bad Length %ld read at offset %ld (%lx)\n",
296 (long)len, (long) off, (long) off);
300 r = fread (buf + 5, 1, rlen, inf);
303 while (buf[len-1] != ISO2709_RS)
305 if (len > sizeof(buf)-2)
307 r = fread (buf + len, 1, 1, inf);
315 const char *mode = 0;
317 if ((marc_no % split_chunk) == 0)
324 sprintf(fname, "%.200s%07d", split_fname, split_file_no);
325 sf = fopen(fname, mode);
328 fprintf(stderr, "Could not open %s\n", fname);
333 if (fwrite(buf, 1, len, sf) != len)
335 fprintf(stderr, "Could write content to %s\n",
343 r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
346 fwrite (result, len_result, 1, stdout);
353 fprintf (cfile, ",");
354 fprintf (cfile, "\n");
355 for (i = 0; i < r; i++)
358 fprintf (cfile, " \"");
359 fprintf (cfile, "\\x%02X", p[i] & 255);
361 if (i < r - 1 && (i & 15) == 15)
362 fprintf (cfile, "\"\n");
365 fprintf (cfile, "\"\n");
372 fprintf (cfile, "};\n");
376 WRBUF wrbuf = wrbuf_alloc();
377 yaz_marc_write_trailer(mt, wrbuf);
378 fputs(wrbuf_cstr(wrbuf), stdout);
379 wrbuf_destroy(wrbuf);
383 yaz_marc_destroy(mt);
386 int main (int argc, char **argv)
389 int print_offset = 0;
393 int output_format = YAZ_MARC_LINE;
395 char *from = 0, *to = 0;
396 int input_format = YAZ_MARC_ISO2709;
398 const char *split_fname = 0;
399 const char *leader_spec = 0;
400 int write_using_libxml2 = 0;
403 setlocale(LC_CTYPE, "");
407 to = nl_langinfo(CODESET);
412 while ((r = options("i:o:C:npvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2)
418 input_format = yaz_marc_decode_formatstr(arg);
419 if (input_format == -1)
421 fprintf(stderr, "%s: bad input format: %s\n", prog, arg);
426 if (input_format == YAZ_MARC_MARCXML
427 || input_format == YAZ_MARC_XCHANGE)
429 fprintf(stderr, "%s: Libxml2 support not enabled\n", prog);
435 /* dirty hack so we can make Libxml2 do the writing ..
437 if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0)
440 write_using_libxml2 = 1;
442 output_format = yaz_marc_decode_formatstr(arg);
443 if (output_format == -1)
445 fprintf(stderr, "%s: bad output format: %s\n", prog, arg);
461 cfile = fopen(arg, "w");
464 fprintf(stderr, "%s: -x no longer supported. "
465 "Use -i marcxml instead\n", prog);
469 fprintf(stderr, "%s: OAI MARC no longer supported."
470 " Use MARCXML instead.\n", prog);
474 fprintf(stderr, "%s: -e no longer supported. "
475 "Use -o marcxchange instead\n", prog);
479 fprintf(stderr, "%s: -X no longer supported. "
480 "Use -o marcxml instead\n", prog);
484 fprintf(stderr, "%s: -I no longer supported. "
485 "Use -o marc instead\n", prog);
489 output_format = YAZ_MARC_CHECK;
498 split_chunk = atoi(arg);
501 dump(arg, from, to, input_format, output_format,
503 print_offset, split_fname, split_chunk,
504 verbose, cfile, leader_spec);
526 * indent-tabs-mode: nil
528 * vim: shiftwidth=4 tabstop=8 expandtab