2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.14 2005-10-28 18:36:58 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversion: UTF-8, MARC-8, Latin-1.
32 #include <yaz/yaz-util.h>
34 unsigned long yaz_marc8_1_conv (unsigned char *inp, size_t inbytesleft,
35 size_t *no_read, int *combining);
36 unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft,
37 size_t *no_read, int *combining);
38 unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft,
39 size_t *no_read, int *combining);
40 unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft,
41 size_t *no_read, int *combining);
42 unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft,
43 size_t *no_read, int *combining);
44 unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft,
45 size_t *no_read, int *combining);
46 unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft,
47 size_t *no_read, int *combining);
48 unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft,
49 size_t *no_read, int *combining);
50 unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft,
51 size_t *no_read, int *combining);
55 struct yaz_iconv_struct {
58 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
59 size_t inbytesleft, size_t *no_read);
60 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
61 size_t inbytesleft, size_t *no_read);
62 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
63 char **outbuf, size_t *outbytesleft,
69 unsigned long comb_x[8];
70 size_t comb_no_read[8];
73 int marc8_comb_no_read;
76 unsigned long unget_x;
80 unsigned long compose_char;
83 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
84 size_t inbytesleft, size_t *no_read)
86 unsigned long x = inp[0];
91 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
92 size_t inbytesleft, size_t *no_read)
101 cd->my_errno = YAZ_ICONV_EINVAL;
104 if (inp[1] != 0xbb || inp[2] != 0xbf)
106 cd->my_errno = YAZ_ICONV_EILSEQ;
113 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
114 size_t inbytesleft, size_t *no_read)
123 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
126 cd->my_errno = YAZ_ICONV_EILSEQ;
128 else if (inp[0] <= 0xdf && inbytesleft >= 2)
130 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
136 cd->my_errno = YAZ_ICONV_EILSEQ;
139 else if (inp[0] <= 0xef && inbytesleft >= 3)
141 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
148 cd->my_errno = YAZ_ICONV_EILSEQ;
151 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
153 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
154 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
160 cd->my_errno = YAZ_ICONV_EILSEQ;
163 else if (inp[0] <= 0xfb && inbytesleft >= 5)
165 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
166 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
173 cd->my_errno = YAZ_ICONV_EILSEQ;
176 else if (inp[0] <= 0xfd && inbytesleft >= 6)
178 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
179 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
180 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
186 cd->my_errno = YAZ_ICONV_EILSEQ;
192 cd->my_errno = YAZ_ICONV_EINVAL;
197 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
198 size_t inbytesleft, size_t *no_read)
204 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
209 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
215 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
216 size_t inbytesleft, size_t *no_read)
222 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
227 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
234 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
235 size_t inbytesleft, size_t *no_read)
239 if (inbytesleft < sizeof(wchar_t))
241 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
247 memcpy (&wch, inp, sizeof(wch));
249 *no_read = sizeof(wch);
257 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
258 size_t inbytesleft, size_t *no_read,
261 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
262 size_t inbytesleft, size_t *no_read)
265 if (cd->comb_offset < cd->comb_size)
267 *no_read = cd->comb_no_read[cd->comb_offset];
268 x = cd->comb_x[cd->comb_offset];
270 /* special case for double-diacritic combining characters,
271 INVERTED BREVE and DOUBLE TILDE.
272 We'll increment the no_read counter by 1, since we want to skip over
273 the processing of the closing ligature character
275 if (x == 0x0361 || x == 0x0360)
282 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
285 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
288 cd->comb_x[cd->comb_size] = x;
289 cd->comb_no_read[cd->comb_size] = *no_read;
291 inbytesleft = inbytesleft - *no_read;
296 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
297 size_t inbytesleft, size_t *no_read,
301 while(inbytesleft >= 1 && inp[0] == 27)
303 size_t inbytesleft0 = inbytesleft;
306 while(inbytesleft > 0 && strchr("(,$!", *inp))
311 if (inbytesleft <= 0)
314 cd->my_errno = YAZ_ICONV_EINVAL;
317 cd->marc8_esc_mode = *inp++;
319 (*no_read) += inbytesleft0 - inbytesleft;
321 if (inbytesleft <= 0)
326 size_t no_read_sub = 0;
329 switch(cd->marc8_esc_mode)
331 case 'B': /* Basic ASCII */
332 case 'E': /* ANSEL */
333 case 's': /* ASCII */
334 x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, comb);
336 case 'g': /* Greek */
337 x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, comb);
339 case 'b': /* Subscripts */
340 x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, comb);
342 case 'p': /* Superscripts */
343 x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, comb);
345 case '2': /* Basic Hebrew */
346 x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, comb);
348 case 'N': /* Basic Cyrillic */
349 case 'Q': /* Extended Cyrillic */
350 x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, comb);
352 case '3': /* Basic Arabic */
353 case '4': /* Extended Arabic */
354 x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, comb);
356 case 'S': /* Greek */
357 x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, comb);
359 case '1': /* Chinese, Japanese, Korean (EACC) */
360 x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, comb);
364 cd->my_errno = YAZ_ICONV_EILSEQ;
367 *no_read += no_read_sub;
372 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
373 size_t inbytesleft, size_t *no_read)
375 if (cd->marc8_comb_x)
377 unsigned long x = cd->marc8_comb_x;
378 *no_read = cd->marc8_comb_no_read;
379 cd->marc8_comb_x = 0;
383 while(inbytesleft >= 1 && inp[0] == 27)
385 size_t inbytesleft0 = inbytesleft;
388 while(inbytesleft > 0 && strchr("(,$!", *inp))
393 if (inbytesleft <= 0)
396 cd->my_errno = YAZ_ICONV_EINVAL;
399 cd->marc8_esc_mode = *inp++;
401 (*no_read) += inbytesleft0 - inbytesleft;
403 if (inbytesleft <= 0)
409 size_t no_read_sub = 0;
411 switch(cd->marc8_esc_mode)
413 case 'B': /* Basic ASCII */
414 case 'E': /* ANSEL */
415 case 's': /* ASCII */
416 x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, &comb);
418 case 'g': /* Greek */
419 x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, &comb);
421 case 'b': /* Subscripts */
422 x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, &comb);
424 case 'p': /* Superscripts */
425 x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, &comb);
427 case '2': /* Basic Hebrew */
428 x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, &comb);
430 case 'N': /* Basic Cyrillic */
431 case 'Q': /* Extended Cyrillic */
432 x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, &comb);
434 case '3': /* Basic Arabic */
435 case '4': /* Extended Arabic */
436 x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, &comb);
438 case 'S': /* Greek */
439 x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, &comb);
441 case '1': /* Chinese, Japanese, Korean (EACC) */
442 x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, &comb);
446 cd->my_errno = YAZ_ICONV_EILSEQ;
450 printf ("esc mode=%c x=%04lX comb=%d\n", cd->marc8_esc_mode, x, comb);
452 *no_read += no_read_sub;
454 if (comb && cd->marc8_comb_x == 0)
457 unsigned long next_x;
459 /* read next char .. */
460 next_x = yaz_read_marc8(cd, inp + *no_read,
461 inbytesleft - *no_read, &tmp_read);
462 /* save this x for later .. */
463 cd->marc8_comb_x = x;
464 /* save next read for later .. */
465 cd->marc8_comb_no_read = tmp_read;
466 /* return next x - thereby swap */
474 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
475 char **outbuf, size_t *outbytesleft,
478 unsigned char *outp = (unsigned char *) *outbuf;
479 if (x <= 0x7f && *outbytesleft >= 1)
481 *outp++ = (unsigned char) x;
484 else if (x <= 0x7ff && *outbytesleft >= 2)
486 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
487 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
488 (*outbytesleft) -= 2;
490 else if (x <= 0xffff && *outbytesleft >= 3)
492 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
493 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
494 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
495 (*outbytesleft) -= 3;
497 else if (x <= 0x1fffff && *outbytesleft >= 4)
499 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
500 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
501 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
502 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
503 (*outbytesleft) -= 4;
505 else if (x <= 0x3ffffff && *outbytesleft >= 5)
507 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
508 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
509 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
510 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
511 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
512 (*outbytesleft) -= 5;
514 else if (*outbytesleft >= 6)
516 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
517 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
518 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
519 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
520 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
521 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
522 (*outbytesleft) -= 6;
526 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
529 *outbuf = (char *) outp;
534 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
535 char **outbuf, size_t *outbytesleft,
538 /* list of two char unicode sequence that, when combined, are
539 equivalent to single unicode chars that can be represented in
541 Regular iconv on Linux at least does not seem to convert these,
542 but since MARC-8 to UTF-8 generates these composed sequence
543 we get a better chance of a successful MARC-8 -> ISO-8859-1
546 unsigned long x1, x2;
549 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
550 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
551 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
552 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
553 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
554 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
555 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
556 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
557 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
558 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
559 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
560 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
561 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
562 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
563 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
564 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
565 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
566 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
567 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
568 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
569 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
570 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
571 /* omitted: 0xd7 MULTIPLICATION SIGN */
572 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
573 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
574 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
575 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
576 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
577 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
578 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
579 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
580 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
581 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
582 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
583 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
584 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
585 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
586 /* omitted: 0xe6 LATIN SMALL LETTER AE */
587 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
588 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
589 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
590 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
591 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
592 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
593 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
594 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
595 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
596 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
597 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
598 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
599 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
600 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
601 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
602 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
603 /* omitted: 0xf7 DIVISION SIGN */
604 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
605 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
606 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
607 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
608 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
609 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
610 /* omitted: 0xfe LATIN SMALL LETTER THORN */
611 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
615 unsigned char *outp = (unsigned char *) *outbuf;
617 if (!last && x > 32 && x < 127 && cd->compose_char == 0)
619 cd->compose_char = x;
622 else if (cd->compose_char)
625 for (i = 0; comb[i].x1; i++)
626 if (cd->compose_char == comb[i].x1 && x == comb[i].x2)
633 if (*outbytesleft >= 1)
635 *outp++ = (unsigned char) cd->compose_char;
637 *outbuf = (char *) outp;
638 if (!last && x > 32 && x < 127)
640 cd->compose_char = x;
646 cd->my_errno = YAZ_ICONV_E2BIG;
650 /* compose_char and old x combined to one new char: x */
651 cd->compose_char = 0;
653 if (x > 255 || x < 1)
655 cd->my_errno = YAZ_ICONV_EILSEQ;
658 else if (*outbytesleft >= 1)
660 *outp++ = (unsigned char) x;
665 cd->my_errno = YAZ_ICONV_E2BIG;
668 *outbuf = (char *) outp;
673 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
674 char **outbuf, size_t *outbytesleft,
677 unsigned char *outp = (unsigned char *) *outbuf;
678 if (*outbytesleft >= 4)
680 *outp++ = (unsigned char) (x>>24);
681 *outp++ = (unsigned char) (x>>16);
682 *outp++ = (unsigned char) (x>>8);
683 *outp++ = (unsigned char) x;
684 (*outbytesleft) -= 4;
688 cd->my_errno = YAZ_ICONV_E2BIG;
691 *outbuf = (char *) outp;
695 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
696 char **outbuf, size_t *outbytesleft,
699 unsigned char *outp = (unsigned char *) *outbuf;
700 if (*outbytesleft >= 4)
702 *outp++ = (unsigned char) x;
703 *outp++ = (unsigned char) (x>>8);
704 *outp++ = (unsigned char) (x>>16);
705 *outp++ = (unsigned char) (x>>24);
706 (*outbytesleft) -= 4;
710 cd->my_errno = YAZ_ICONV_E2BIG;
713 *outbuf = (char *) outp;
718 static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
719 char **outbuf, size_t *outbytesleft,
722 unsigned char *outp = (unsigned char *) *outbuf;
724 if (*outbytesleft >= sizeof(wchar_t))
727 memcpy(outp, &wch, sizeof(wch));
729 (*outbytesleft) -= sizeof(wch);
733 cd->my_errno = YAZ_ICONV_E2BIG;
736 *outbuf = (char *) outp;
741 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
743 return cd->read_handle && cd->write_handle;
746 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
748 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
750 cd->write_handle = 0;
753 cd->my_errno = YAZ_ICONV_UNKNOWN;
754 cd->marc8_esc_mode = 'B';
756 cd->comb_offset = cd->comb_size = 0;
758 cd->marc8_comb_x = 0;
760 cd->compose_char = 0;
762 /* a useful hack: if fromcode has leading @,
763 the library not use YAZ's own conversions .. */
764 if (fromcode[0] == '@')
768 if (!yaz_matchstr(fromcode, "UTF8"))
770 cd->read_handle = yaz_read_UTF8;
771 cd->init_handle = yaz_init_UTF8;
773 else if (!yaz_matchstr(fromcode, "ISO88591"))
774 cd->read_handle = yaz_read_ISO8859_1;
775 else if (!yaz_matchstr(fromcode, "UCS4"))
776 cd->read_handle = yaz_read_UCS4;
777 else if (!yaz_matchstr(fromcode, "UCS4LE"))
778 cd->read_handle = yaz_read_UCS4LE;
779 else if (!yaz_matchstr(fromcode, "MARC8"))
780 cd->read_handle = yaz_read_marc8;
782 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
783 cd->read_handle = yaz_read_wchar_t;
786 if (!yaz_matchstr(tocode, "UTF8"))
787 cd->write_handle = yaz_write_UTF8;
788 else if (!yaz_matchstr(tocode, "ISO88591"))
789 cd->write_handle = yaz_write_ISO8859_1;
790 else if (!yaz_matchstr (tocode, "UCS4"))
791 cd->write_handle = yaz_write_UCS4;
792 else if (!yaz_matchstr(tocode, "UCS4LE"))
793 cd->write_handle = yaz_write_UCS4LE;
795 else if (!yaz_matchstr(tocode, "WCHAR_T"))
796 cd->write_handle = yaz_write_wchar_t;
801 if (!cd->read_handle || !cd->write_handle)
803 cd->iconv_cd = iconv_open (tocode, fromcode);
804 if (cd->iconv_cd == (iconv_t) (-1))
811 if (!cd->read_handle || !cd->write_handle)
821 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
822 char **outbuf, size_t *outbytesleft)
830 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
831 if (r == (size_t)(-1))
836 cd->my_errno = YAZ_ICONV_E2BIG;
839 cd->my_errno = YAZ_ICONV_EINVAL;
842 cd->my_errno = YAZ_ICONV_EILSEQ;
845 cd->my_errno = YAZ_ICONV_UNKNOWN;
851 if (inbuf == 0 || *inbuf == 0)
854 cd->my_errno = YAZ_ICONV_UNKNOWN;
864 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
865 *inbytesleft, &no_read);
868 if (cd->my_errno == YAZ_ICONV_EINVAL)
873 *inbytesleft -= no_read;
885 if (*inbytesleft == 0)
892 x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
903 no_read = cd->no_read_x;
907 r = (cd->write_handle)(cd, x, outbuf, outbytesleft,
908 (*inbytesleft - no_read) == 0 ? 1 : 0);
911 /* unable to write it. save it because read_handle cannot
914 cd->no_read_x = no_read;
919 *inbytesleft -= no_read;
925 int yaz_iconv_error (yaz_iconv_t cd)
930 int yaz_iconv_close (yaz_iconv_t cd)
934 iconv_close (cd->iconv_cd);
944 * indent-tabs-mode: nil
946 * vim: shiftwidth=4 tabstop=8 expandtab