2 * Copyright (c) 1997-2003, Index Data
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.9 2003-01-06 08:20:28 adam Exp $
8 /* mini iconv and wrapper for system iconv library (if present) */
25 #include <yaz/yaz-util.h>
27 unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft,
30 struct yaz_iconv_struct {
33 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
34 size_t inbytesleft, size_t *no_read);
35 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
36 size_t inbytesleft, size_t *no_read);
37 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
38 char **outbuf, size_t *outbytesleft);
44 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
45 size_t inbytesleft, size_t *no_read)
47 unsigned long x = inp[0];
52 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
53 size_t inbytesleft, size_t *no_read)
62 cd->my_errno = YAZ_ICONV_EINVAL;
65 if (inp[1] != 0xbb || inp[2] != 0xbf)
67 cd->my_errno = YAZ_ICONV_EILSEQ;
74 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
75 size_t inbytesleft, size_t *no_read)
84 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
87 cd->my_errno = YAZ_ICONV_EILSEQ;
89 else if (inp[0] <= 0xdf && inbytesleft >= 2)
91 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
97 cd->my_errno = YAZ_ICONV_EILSEQ;
100 else if (inp[0] <= 0xef && inbytesleft >= 3)
102 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
109 cd->my_errno = YAZ_ICONV_EILSEQ;
112 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
114 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
115 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
121 cd->my_errno = YAZ_ICONV_EILSEQ;
124 else if (inp[0] <= 0xfb && inbytesleft >= 5)
126 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
127 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
134 cd->my_errno = YAZ_ICONV_EILSEQ;
137 else if (inp[0] <= 0xfd && inbytesleft >= 6)
139 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
140 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
141 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
147 cd->my_errno = YAZ_ICONV_EILSEQ;
153 cd->my_errno = YAZ_ICONV_EINVAL;
158 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
159 size_t inbytesleft, size_t *no_read)
165 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
170 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
176 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
177 size_t inbytesleft, size_t *no_read)
183 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
188 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
195 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
196 size_t inbytesleft, size_t *no_read)
200 if (inbytesleft < sizeof(wchar_t))
202 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
208 memcpy (&wch, inp, sizeof(wch));
210 *no_read = sizeof(wch);
216 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
217 size_t inbytesleft, size_t *no_read)
219 return yaz_marc8_conv(inp, inbytesleft, no_read);
222 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
223 char **outbuf, size_t *outbytesleft)
225 unsigned char *outp = (unsigned char *) *outbuf;
226 if (x <= 0x7f && *outbytesleft >= 1)
228 *outp++ = (unsigned char) x;
231 else if (x <= 0x7ff && *outbytesleft >= 2)
233 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
234 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
235 (*outbytesleft) -= 2;
237 else if (x <= 0xffff && *outbytesleft >= 3)
239 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
240 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
241 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
242 (*outbytesleft) -= 3;
244 else if (x <= 0x1fffff && *outbytesleft >= 4)
246 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
247 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
248 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
249 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
250 (*outbytesleft) -= 4;
252 else if (x <= 0x3ffffff && *outbytesleft >= 5)
254 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
255 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
256 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
257 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
258 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
259 (*outbytesleft) -= 5;
261 else if (*outbytesleft >= 6)
263 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
264 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
265 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
266 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
267 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
268 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
269 (*outbytesleft) -= 6;
273 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
276 *outbuf = (char *) outp;
280 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
281 char **outbuf, size_t *outbytesleft)
283 unsigned char *outp = (unsigned char *) *outbuf;
284 if (x > 255 || x < 1)
286 cd->my_errno = YAZ_ICONV_EILSEQ;
289 else if (*outbytesleft >= 1)
291 *outp++ = (unsigned char) x;
296 cd->my_errno = YAZ_ICONV_E2BIG;
299 *outbuf = (char *) outp;
304 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
305 char **outbuf, size_t *outbytesleft)
307 unsigned char *outp = (unsigned char *) *outbuf;
308 if (*outbytesleft >= 4)
310 *outp++ = (unsigned char) (x<<24);
311 *outp++ = (unsigned char) (x<<16);
312 *outp++ = (unsigned char) (x<<8);
313 *outp++ = (unsigned char) x;
314 (*outbytesleft) -= 4;
318 cd->my_errno = YAZ_ICONV_E2BIG;
321 *outbuf = (char *) outp;
325 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
326 char **outbuf, size_t *outbytesleft)
328 unsigned char *outp = (unsigned char *) *outbuf;
329 if (*outbytesleft >= 4)
331 *outp++ = (unsigned char) x;
332 *outp++ = (unsigned char) (x<<8);
333 *outp++ = (unsigned char) (x<<16);
334 *outp++ = (unsigned char) (x<<24);
335 (*outbytesleft) -= 4;
339 cd->my_errno = YAZ_ICONV_E2BIG;
342 *outbuf = (char *) outp;
347 static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
348 char **outbuf, size_t *outbytesleft)
350 unsigned char *outp = (unsigned char *) *outbuf;
352 if (*outbytesleft >= sizeof(wchar_t))
355 memcpy(outp, &wch, sizeof(wch));
357 (*outbytesleft) -= sizeof(wch);
361 cd->my_errno = YAZ_ICONV_E2BIG;
364 *outbuf = (char *) outp;
369 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
371 return cd->read_handle && cd->write_handle;
374 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
376 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
378 cd->write_handle = 0;
381 cd->my_errno = YAZ_ICONV_UNKNOWN;
383 /* a useful hack: if fromcode has leading @,
384 the library not use YAZ's own conversions .. */
385 if (fromcode[0] == '@')
389 if (!yaz_matchstr(fromcode, "UTF8"))
391 cd->read_handle = yaz_read_UTF8;
392 cd->init_handle = yaz_init_UTF8;
394 else if (!yaz_matchstr(fromcode, "ISO88591"))
395 cd->read_handle = yaz_read_ISO8859_1;
396 else if (!yaz_matchstr(fromcode, "UCS4"))
397 cd->read_handle = yaz_read_UCS4;
398 else if (!yaz_matchstr(fromcode, "UCS4LE"))
399 cd->read_handle = yaz_read_UCS4LE;
400 else if (!yaz_matchstr(fromcode, "MARC8"))
401 cd->read_handle = yaz_read_marc8;
403 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
404 cd->read_handle = yaz_read_wchar_t;
407 if (!yaz_matchstr(tocode, "UTF8"))
408 cd->write_handle = yaz_write_UTF8;
409 else if (!yaz_matchstr(tocode, "ISO88591"))
410 cd->write_handle = yaz_write_ISO8859_1;
411 else if (!yaz_matchstr (tocode, "UCS4"))
412 cd->write_handle = yaz_write_UCS4;
413 else if (!yaz_matchstr(tocode, "UCS4LE"))
414 cd->write_handle = yaz_write_UCS4LE;
416 else if (!yaz_matchstr(tocode, "WCHAR_T"))
417 cd->write_handle = yaz_write_wchar_t;
422 if (!cd->read_handle || !cd->write_handle)
424 cd->iconv_cd = iconv_open (tocode, fromcode);
425 if (cd->iconv_cd == (iconv_t) (-1))
432 if (!cd->read_handle || !cd->write_handle)
442 size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
443 char **outbuf, size_t *outbytesleft)
451 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
452 if (r == (size_t)(-1))
457 cd->my_errno = YAZ_ICONV_E2BIG;
460 cd->my_errno = YAZ_ICONV_EINVAL;
463 cd->my_errno = YAZ_ICONV_EILSEQ;
466 cd->my_errno = YAZ_ICONV_UNKNOWN;
472 if (inbuf == 0 || *inbuf == 0)
475 cd->my_errno = YAZ_ICONV_UNKNOWN;
485 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
486 *inbytesleft, &no_read);
489 if (cd->my_errno == YAZ_ICONV_EINVAL)
494 *inbytesleft -= no_read;
504 if (*inbytesleft == 0)
510 x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
517 r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
520 *inbytesleft -= no_read;
526 int yaz_iconv_error (yaz_iconv_t cd)
531 int yaz_iconv_close (yaz_iconv_t cd)
535 iconv_close (cd->iconv_cd);