--- /dev/null
+/*
+ * Copyright (c) 1997-2002, Index Data
+ * See the file LICENSE for details.
+ *
+ * $Id: siconv.c,v 1.1 2002-08-27 14:02:13 adam Exp $
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+
+#if HAVE_ICONV_H
+#include <iconv.h>
+#endif
+
+#include <yaz/yaz-util.h>
+
+struct yaz_iconv_struct {
+ int my_errno;
+ unsigned long (*read_handle)(yaz_iconv_t cd, char **inbuf,
+ size_t *inbytesleft);
+ size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft);
+#if HAVE_ICONV_H
+ iconv_t iconv_cd;
+#endif
+};
+
+
+static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd,
+ char **inbuf, size_t *inbytesleft)
+{
+ unsigned char *inp = *inbuf;
+ unsigned long x = 0;
+ x = inp[0];
+ (*inbytesleft)--;
+ inp++;
+ *inbuf = inp;
+ return x;
+}
+
+static unsigned long yaz_read_UTF8 (yaz_iconv_t cd,
+ char **inbuf, size_t *inbytesleft)
+{
+ unsigned char *inp = *inbuf;
+ unsigned long x = 0;
+ if (inp[0] <= 0x7f)
+ {
+ x = inp[0];
+
+ (*inbytesleft)--;
+ inp++;
+ }
+ else if (inp[0] <= 0xdf && *inbytesleft >= 2)
+ {
+ x = ((inp[0] & 0x1f) << 6) + (inp[1] & 0x3f);
+
+ (*inbytesleft) -= 2;
+ inp += 2;
+ }
+ else if (inp[0] <= 0xef && *inbytesleft >= 3)
+ {
+ x = ((inp[0] & 0x0f) << 12) +
+ ((inp[1] & 0x3f) << 6) + (inp[1] & 0x3f);
+
+ (*inbytesleft) -= 3;
+ inp += 3;
+ }
+ else if (inp[0] <= 0xef && *inbytesleft >= 4)
+ {
+ x = ((inp[0] & 0x07) << 18) +
+ ((inp[1] & 0x3f) << 12) + ((inp[2] & 0x3f) << 6) +
+ (inp[3] & 0x3f);
+
+ (*inbytesleft) -= 4;
+ inp += 4;
+ }
+ else
+ {
+ cd->my_errno = YAZ_ICONV_EINVAL;
+ }
+ *inbuf = inp;
+ return x;
+}
+
+static unsigned long yaz_read_UCS4 (yaz_iconv_t cd,
+ char **inbuf, size_t *inbytesleft)
+{
+ unsigned char *inp = *inbuf;
+ unsigned long x = 0;
+
+ if (*inbytesleft < 4)
+ {
+ cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
+ return 0;
+ }
+ memcpy (&x, inp, sizeof(x));
+ (*inbytesleft) -= 4;
+ inp += 4;
+ *inbuf = inp;
+ return x;
+}
+
+static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft)
+{
+ unsigned char *outp = *outbuf;
+ if (x <= 0x7f && *outbytesleft >= 1)
+ {
+ *outp++ = x;
+ (*outbytesleft)--;
+ }
+ else if (x <= 0x7ff && *outbytesleft >= 2)
+ {
+ *outp++ = (x >> 6) | 0xc0;
+ *outp++ = (x & 0x3f) | 0x80;
+ (*outbytesleft) -= 2;
+ }
+ else if (x <= 0xffff && *outbytesleft >= 3)
+ {
+ *outp++ = (x >> 12) | 0xe0;
+ *outp++ = ((x >> 6) & 0x3f) | 0x80;
+ *outp++ = (x & 0x3f) | 0x80;
+ (*outbytesleft) -= 3;
+ }
+ else if (x <= 0x1fffff && *outbytesleft >= 4)
+ {
+ *outp++ = (x >> 18) | 0xf0;
+ *outp++ = ((x >> 12) & 0x3f) | 0x80;
+ *outp++ = ((x >> 6) & 0x3f) | 0x80;
+ *outp++ = (x & 0x3f) | 0x80;
+ (*outbytesleft) -= 4;
+ }
+ else if (x > 0x1fffff)
+ {
+ cd->my_errno = YAZ_ICONV_EILSEQ; /* invalid sequence */
+ return (size_t)(-1);
+ }
+ else
+ {
+ cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
+ return (size_t)(-1);
+ }
+ *outbuf = outp;
+ return 0;
+}
+
+static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft)
+{
+ unsigned char *outp = *outbuf;
+ if (x > 255 || x < 1)
+ {
+ cd->my_errno = YAZ_ICONV_EILSEQ;
+ return (size_t) -1;
+ }
+ else if (*outbytesleft >= 1)
+ {
+ *outp++ = x;
+ (*outbytesleft)--;
+ }
+ else
+ {
+ cd->my_errno = YAZ_ICONV_E2BIG;
+ return (size_t)(-1);
+ }
+ *outbuf = outp;
+ return 0;
+}
+
+
+static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft)
+{
+ unsigned char *outp = *outbuf;
+ if (x < 1 || x > 0x1fffff)
+ {
+ cd->my_errno = YAZ_ICONV_EILSEQ;
+ return (size_t)(-1);
+ }
+ else if (*outbytesleft >= 4)
+ {
+ memcpy (outp, &x, sizeof(x));
+ outp += 4;
+ (*outbytesleft) -= 4;
+ }
+ else
+ {
+ cd->my_errno = YAZ_ICONV_E2BIG;
+ return (size_t)(-1);
+ }
+ *outbuf = outp;
+ return 0;
+}
+
+yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
+{
+ yaz_iconv_t cd = xmalloc (sizeof(*cd));
+
+ cd->write_handle = 0;
+ cd->read_handle = 0;
+ cd->my_errno = YAZ_ICONV_UNKNOWN;
+
+ if (!strcmp(fromcode, "UTF-8"))
+ cd->read_handle = yaz_read_UTF8;
+ else if (!strcmp(fromcode, "ISO-8859-1"))
+ cd->read_handle = yaz_read_ISO8859_1;
+ else if (!strcmp(fromcode, "UCS-4"))
+ cd->read_handle = yaz_read_UCS4;
+
+
+ if (!strcmp(tocode, "UTF-8"))
+ cd->write_handle = yaz_write_UTF8;
+ else if (!strcmp (tocode, "ISO-8859-1"))
+ cd->write_handle = yaz_write_ISO8859_1;
+ else if (!strcmp (tocode, "UCS-4"))
+ cd->write_handle = yaz_write_UCS4;
+
+#if HAVE_ICONV_H
+ cd->iconv_cd = 0;
+ if (!cd->read_handle || !cd->write_handle)
+ {
+ cd->iconv_cd = iconv_open (tocode, fromcode);
+ if (cd->iconv_cd == (iconv_t) (-1))
+ {
+ xfree (cd);
+ return 0;
+ }
+ }
+#else
+ if (!cd->to_UCS4 || !cd->from_UCS4)
+ {
+ xfree (cd);
+ return 0;
+ }
+#endif
+ return cd;
+}
+
+size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ char *inbuf0;
+ size_t r = 0;
+#if HAVE_ICONV_H
+ if (cd->iconv_cd)
+ {
+ size_t r =
+ iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
+ if (r == (size_t)(-1))
+ {
+ switch (errno)
+ {
+ case E2BIG:
+ cd->my_errno = YAZ_ICONV_E2BIG;
+ break;
+ case EINVAL:
+ cd->my_errno = YAZ_ICONV_EINVAL;
+ break;
+ case EILSEQ:
+ cd->my_errno = YAZ_ICONV_EILSEQ;
+ break;
+ default:
+ cd->my_errno = YAZ_ICONV_UNKNOWN;
+ }
+ }
+ return r;
+ }
+#endif
+ if (inbuf == 0 || *inbuf == 0)
+ return 0;
+ inbuf0 = *inbuf;
+ while (1)
+ {
+ unsigned long x;
+
+ if (*inbytesleft == 0)
+ {
+ r = *inbuf - inbuf0;
+ break;
+ }
+
+ x = (cd->read_handle)(cd, inbuf, inbytesleft);
+ if (x == 0)
+ {
+ r = (size_t)(-1);
+ break;
+ }
+ r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
+ if (r)
+ break;
+ }
+ return r;
+}
+
+int yaz_iconv_error (yaz_iconv_t cd)
+{
+ return cd->my_errno;
+}
+
+int yaz_iconv_close (yaz_iconv_t cd)
+{
+#if HAVE_ICONV_H
+ if (cd->iconv_cd)
+ iconv_close (cd->iconv_cd);
+#endif
+ xfree (cd);
+ return 0;
+}
+
+
--- /dev/null
+/*
+ * Copyright (c) 1997-2002, Index Data
+ * See the file LICENSE for details.
+ *
+ * $Id: siconvtst.c,v 1.1 2002-08-27 14:02:13 adam Exp $
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <yaz/yaz-util.h>
+
+#define CHUNK 8
+
+static void convert (FILE *inf, yaz_iconv_t cd)
+{
+ char inbuf0[CHUNK], *inbuf = inbuf0;
+ char outbuf0[CHUNK], *outbuf = outbuf0;
+ size_t outbytesleft = CHUNK;
+ size_t inbytesleft = CHUNK;
+
+ while (1)
+ {
+ size_t r = fread (inbuf, 1, inbytesleft, inf);
+ if (inbytesleft != r)
+ {
+ if (ferror(inf))
+ {
+ fprintf (stderr, "yaziconv: error reading file\n");
+ exit (6);
+ }
+ if (r == 0)
+ {
+ if (outbuf != outbuf0)
+ fwrite (outbuf0, 1, outbuf - outbuf0, stdout);
+ break;
+ }
+ }
+ r = yaz_iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ if (r == (size_t)(-1))
+ {
+ if (yaz_iconv_error(cd) == YAZ_ICONV_EILSEQ)
+ {
+ fprintf (stderr, "invalid sequence\n");
+ return ;
+ }
+
+ if (yaz_iconv_error(cd) == EINVAL) /* incomplete input */
+ {
+ size_t i;
+ for (i = 0; i<inbytesleft; i++)
+ inbuf0[i] = inbuf[i];
+ inbytesleft = CHUNK - inbytesleft;
+ }
+ if (yaz_iconv_error(cd) == E2BIG) /* no more output space */
+ {
+ fwrite (outbuf0, 1, outbuf - outbuf0, stdout);
+ outbuf = outbuf0;
+ outbytesleft = CHUNK;
+ }
+ else
+ {
+ fprintf (stderr, "yaziconv: unknown error\n");
+ exit (7);
+ }
+ }
+ else
+ {
+ inbuf = inbuf0;
+ inbytesleft = CHUNK;
+ }
+ }
+}
+
+int main (int argc, char **argv)
+{
+ int ret;
+ char *from = 0;
+ char *to = 0;
+ char *arg;
+ yaz_iconv_t cd;
+ FILE *inf = stdin;
+
+ while ((ret = options ("f:t:", argv, argc, &arg)) != -2)
+ {
+ switch (ret)
+ {
+ case 0:
+ inf = fopen (arg, "rb");
+ if (!inf)
+ {
+ fprintf (stderr, "yaziconv: cannot open %s", arg);
+ exit (2);
+ }
+ break;
+ case 'f':
+ from = arg;
+ break;
+ case 't':
+ to = arg;
+ break;
+ default:
+ fprintf (stderr, "yaziconv: Usage\n"
+ "siconv -f encoding -t encoding [file]\n");
+ exit(1);
+ }
+ }
+ if (!to)
+ {
+ fprintf (stderr, "yaziconv: -t encoding missing\n");
+ exit (3);
+ }
+ if (!from)
+ {
+ fprintf (stderr, "yaziconv: -f encoding missing\n");
+ exit (4);
+ }
+ cd = yaz_iconv_open (to, from);
+ if (!cd)
+ {
+ fprintf (stderr, "yaziconv: unsupported encoding\n");
+ exit (5);
+ }
+
+
+ yaz_iconv_close (cd);
+}