2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.47 2007-10-12 14:22:19 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58 size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78 size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80 size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82 size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84 size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86 size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88 size_t *no_read, int *combining);
92 struct yaz_iconv_struct {
95 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96 size_t inbytesleft, size_t *no_read);
97 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98 size_t inbytesleft, size_t *no_read);
99 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100 char **outbuf, size_t *outbytesleft);
101 size_t (*flush_handle)(yaz_iconv_t cd,
102 char **outbuf, size_t *outbytesleft);
107 unsigned long comb_x[8];
108 size_t comb_no_read[8];
110 unsigned long unget_x;
114 unsigned long compose_char;
116 unsigned long write_marc8_comb_ch[8];
117 size_t write_marc8_comb_no;
118 unsigned write_marc8_second_half_char;
119 unsigned long write_marc8_last;
120 const char *write_marc8_g0;
121 const char *write_marc8_g1;
125 unsigned long x1, x2;
128 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
129 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
130 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
131 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
132 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
133 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
134 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
135 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
136 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
137 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
138 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
139 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
140 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
141 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
142 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
143 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
144 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
145 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
146 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
147 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
148 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
149 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
150 /* omitted: 0xd7 MULTIPLICATION SIGN */
151 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
152 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
153 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
154 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
155 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
156 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
157 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
158 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
159 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
160 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
161 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
162 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
163 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
164 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
165 /* omitted: 0xe6 LATIN SMALL LETTER AE */
166 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
167 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
168 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
169 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
170 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
171 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
172 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
173 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
174 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
175 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
176 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
177 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
178 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
179 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
180 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
181 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
182 /* omitted: 0xf7 DIVISION SIGN */
183 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
184 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
185 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
186 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
187 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
188 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
189 /* omitted: 0xfe LATIN SMALL LETTER THORN */
190 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
195 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
196 size_t inbytesleft, size_t *no_read)
198 unsigned long x = inp[0];
204 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
205 size_t inbytesleft, size_t *no_read)
214 cd->my_errno = YAZ_ICONV_EINVAL;
217 if (inp[1] != 0xbb && inp[2] == 0xbf)
224 unsigned long yaz_read_UTF8_char(unsigned char *inp,
225 size_t inbytesleft, size_t *no_read,
230 *no_read = 0; /* by default */
236 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
238 *error = YAZ_ICONV_EILSEQ;
240 else if (inp[0] <= 0xdf && inbytesleft >= 2)
242 if ((inp[1] & 0xc0) == 0x80)
244 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
248 *error = YAZ_ICONV_EILSEQ;
251 *error = YAZ_ICONV_EILSEQ;
253 else if (inp[0] <= 0xef && inbytesleft >= 3)
255 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
257 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
262 *error = YAZ_ICONV_EILSEQ;
265 *error = YAZ_ICONV_EILSEQ;
267 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
269 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
270 && (inp[3] & 0xc0) == 0x80)
272 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
273 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
277 *error = YAZ_ICONV_EILSEQ;
280 *error = YAZ_ICONV_EILSEQ;
282 else if (inp[0] <= 0xfb && inbytesleft >= 5)
284 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
285 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
287 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
288 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
293 *error = YAZ_ICONV_EILSEQ;
296 *error = YAZ_ICONV_EILSEQ;
298 else if (inp[0] <= 0xfd && inbytesleft >= 6)
300 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
301 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
302 && (inp[5] & 0xc0) == 0x80)
304 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
305 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
306 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
310 *error = YAZ_ICONV_EILSEQ;
313 *error = YAZ_ICONV_EILSEQ;
316 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
321 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
322 size_t inbytesleft, size_t *no_read)
324 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
327 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
328 size_t inbytesleft, size_t *no_read)
334 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
339 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
345 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
346 size_t inbytesleft, size_t *no_read)
352 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
357 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
364 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
365 size_t inbytesleft, size_t *no_read)
369 if (inbytesleft < sizeof(wchar_t))
371 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
377 memcpy (&wch, inp, sizeof(wch));
379 *no_read = sizeof(wch);
385 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
386 size_t inbytesleft, size_t *no_read)
393 while (inbytesleft > 0)
399 else if (*inp == 0xa3)
409 if (inbytesleft == 0)
411 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
416 case 0xe1: /* alpha small */
422 case 0xc1: /* alpha capital */
429 case 0xe2: /* Beta small */
432 case 0xc2: /* Beta capital */
436 case 0xe4: /* Gamma small */
439 case 0xc4: /* Gamma capital */
443 case 0xe5: /* Delta small */
446 case 0xc5: /* Delta capital */
449 case 0xe6: /* epsilon small */
455 case 0xc6: /* epsilon capital */
461 case 0xe9: /* Zeta small */
464 case 0xc9: /* Zeta capital */
467 case 0xea: /* Eta small */
473 case 0xca: /* Eta capital */
479 case 0xeb: /* Theta small */
482 case 0xcb: /* Theta capital */
485 case 0xec: /* Iota small */
497 case 0xcc: /* Iota capital */
506 case 0xed: /* Kappa small */
509 case 0xcd: /* Kappa capital */
512 case 0xee: /* Lambda small */
515 case 0xce: /* Lambda capital */
518 case 0xef: /* Mu small */
521 case 0xcf: /* Mu capital */
524 case 0xf0: /* Nu small */
527 case 0xd0: /* Nu capital */
530 case 0xf1: /* Xi small */
533 case 0xd1: /* Xi capital */
536 case 0xf2: /* Omicron small */
542 case 0xd2: /* Omicron capital */
548 case 0xf3: /* Pi small */
551 case 0xd3: /* Pi capital */
554 case 0xf5: /* Rho small */
557 case 0xd5: /* Rho capital */
560 case 0xf7: /* Sigma small (end of words) */
563 case 0xf6: /* Sigma small */
566 case 0xd6: /* Sigma capital */
569 case 0xf8: /* Tau small */
572 case 0xd8: /* Tau capital */
575 case 0xf9: /* Upsilon small */
587 case 0xd9: /* Upsilon capital */
596 case 0xfa: /* Phi small */
599 case 0xda: /* Phi capital */
602 case 0xfb: /* Chi small */
605 case 0xdb: /* Chi capital */
608 case 0xfc: /* Psi small */
611 case 0xdc: /* Psi capital */
614 case 0xfd: /* Omega small */
620 case 0xdd: /* Omega capital */
635 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
636 char **outbuf, size_t *outbytesleft)
639 unsigned char *out = (unsigned char*) *outbuf;
640 if (*outbytesleft < 3)
642 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
647 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
648 case 0x03b1 : out[k++]=0xe1; break;
649 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
650 case 0x0391 : out[k++]=0xc1; break;
651 case 0x03b2 : out[k++]=0xe2; break;
652 case 0x0392 : out[k++]=0xc2; break;
653 case 0x03b3 : out[k++]=0xe4; break;
654 case 0x0393 : out[k++]=0xc4; break;
655 case 0x03b4 : out[k++]=0xe5; break;
656 case 0x0394 : out[k++]=0xc5; break;
657 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
658 case 0x03b5 : out[k++]=0xe6; break;
659 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
660 case 0x0395 : out[k++]=0xc6; break;
661 case 0x03b6 : out[k++]=0xe9; break;
662 case 0x0396 : out[k++]=0xc9; break;
663 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
664 case 0x03b7 : out[k++]=0xea; break;
665 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
666 case 0x0397 : out[k++]=0xca; break;
667 case 0x03b8 : out[k++]=0xeb; break;
668 case 0x0398 : out[k++]=0xcb; break;
669 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
670 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
671 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
672 case 0x03b9 : out[k++]=0xec; break;
673 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
674 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
675 case 0x0399 : out[k++]=0xcc; break;
676 case 0x03ba : out[k++]=0xed; break;
677 case 0x039a : out[k++]=0xcd; break;
678 case 0x03bb : out[k++]=0xee; break;
679 case 0x039b : out[k++]=0xce; break;
680 case 0x03bc : out[k++]=0xef; break;
681 case 0x039c : out[k++]=0xcf; break;
682 case 0x03bd : out[k++]=0xf0; break;
683 case 0x039d : out[k++]=0xd0; break;
684 case 0x03be : out[k++]=0xf1; break;
685 case 0x039e : out[k++]=0xd1; break;
686 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
687 case 0x03bf : out[k++]=0xf2; break;
688 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
689 case 0x039f : out[k++]=0xd2; break;
690 case 0x03c0 : out[k++]=0xf3; break;
691 case 0x03a0 : out[k++]=0xd3; break;
692 case 0x03c1 : out[k++]=0xf5; break;
693 case 0x03a1 : out[k++]=0xd5; break;
694 case 0x03c2 : out[k++]=0xf7; break;
695 case 0x03c3 : out[k++]=0xf6; break;
696 case 0x03a3 : out[k++]=0xd6; break;
697 case 0x03c4 : out[k++]=0xf8; break;
698 case 0x03a4 : out[k++]=0xd8; break;
699 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
700 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
701 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
702 case 0x03c5 : out[k++]=0xf9; break;
703 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
704 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
705 case 0x03a5 : out[k++]=0xd9; break;
706 case 0x03c6 : out[k++]=0xfa; break;
707 case 0x03a6 : out[k++]=0xda; break;
708 case 0x03c7 : out[k++]=0xfb; break;
709 case 0x03a7 : out[k++]=0xdb; break;
710 case 0x03c8 : out[k++]=0xfc; break;
711 case 0x03a8 : out[k++]=0xdc; break;
712 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
713 case 0x03c9 : out[k++]=0xfd; break;
714 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
715 case 0x03a9 : out[k++]=0xdd; break;
719 cd->my_errno = YAZ_ICONV_EILSEQ;
730 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
731 size_t inbytesleft, size_t *no_read)
739 while (inbytesleft > 0)
745 else if (*inp == 0x9e)
749 else if (*inp == 0x9f)
759 if (inbytesleft == 0)
761 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
989 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
990 char **outbuf, size_t *outbytesleft)
993 unsigned char *out = (unsigned char*) *outbuf;
994 if (*outbytesleft < 3)
996 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
1001 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1002 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1003 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1004 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1005 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1006 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1007 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1008 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1009 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1010 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1011 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1012 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1013 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1014 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1015 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1016 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1017 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1018 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1019 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1020 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1021 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1022 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1023 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1024 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1025 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1026 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1027 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1028 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1029 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1030 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1031 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1032 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1033 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1034 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1035 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1036 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1037 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1038 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1039 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1040 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1041 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1042 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1043 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1044 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1045 case 0x03b1 : out[k++]=0x81; break;
1046 case 0x03b2 : out[k++]=0x82; break;
1047 case 0x03b3 : out[k++]=0x83; break;
1048 case 0x03b4 : out[k++]=0x84; break;
1049 case 0x03b5 : out[k++]=0x85; break;
1050 case 0x03b6 : out[k++]=0x86; break;
1051 case 0x03b7 : out[k++]=0x87; break;
1052 case 0x03b8 : out[k++]=0x88; break;
1053 case 0x03b9 : out[k++]=0x89; break;
1054 case 0x03ba : out[k++]=0x8a; break;
1055 case 0x03bb : out[k++]=0x8b; break;
1056 case 0x03bc : out[k++]=0x8c; break;
1057 case 0x03bd : out[k++]=0x8d; break;
1058 case 0x03be : out[k++]=0x8e; break;
1059 case 0x03bf : out[k++]=0x8f; break;
1060 case 0x03c0 : out[k++]=0x90; break;
1061 case 0x03c1 : out[k++]=0x91; break;
1062 case 0x03c2 : out[k++]=0x92; break;
1063 case 0x03c3 : out[k++]=0x93; break;
1064 case 0x03c4 : out[k++]=0x94; break;
1065 case 0x03c5 : out[k++]=0x95; break;
1066 case 0x03c6 : out[k++]=0x96; break;
1067 case 0x03c7 : out[k++]=0x96; break;
1068 case 0x03c8 : out[k++]=0x98; break;
1069 case 0x03c9 : out[k++]=0x99; break;
1073 cd->my_errno = YAZ_ICONV_EILSEQ;
1085 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1086 size_t inbytesleft, size_t *no_read,
1089 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1090 size_t inbytesleft, size_t *no_read)
1093 if (cd->comb_offset < cd->comb_size)
1095 *no_read = cd->comb_no_read[cd->comb_offset];
1096 x = cd->comb_x[cd->comb_offset];
1098 /* special case for double-diacritic combining characters,
1099 INVERTED BREVE and DOUBLE TILDE.
1100 We'll increment the no_read counter by 1, since we want to skip over
1101 the processing of the closing ligature character
1103 /* this code is no longer necessary.. our handlers code in
1104 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1105 0 and no_read=1 when a sequence does not match the input.
1106 The SECOND HALFs in codetables.xml produces a non-existant
1107 entry in the conversion trie.. Hence when met, the input byte is
1108 skipped as it should (in yaz_iconv)
1111 if (x == 0x0361 || x == 0x0360)
1118 cd->comb_offset = 0;
1119 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1123 if (inbytesleft == 0 && cd->comb_size)
1125 cd->my_errno = YAZ_ICONV_EINVAL;
1130 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1133 cd->comb_x[cd->comb_size] = x;
1134 cd->comb_no_read[cd->comb_size] = *no_read;
1136 inbytesleft = inbytesleft - *no_read;
1141 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1142 size_t inbytesleft, size_t *no_read)
1144 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1145 if (x && cd->comb_size == 1)
1147 /* For MARC8s we try to get a Latin-1 page code out of it */
1149 for (i = 0; latin1_comb[i].x1; i++)
1150 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1152 *no_read += cd->comb_no_read[0];
1154 x = latin1_comb[i].y;
1161 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1162 size_t inbytesleft, size_t *no_read,
1166 while(inbytesleft >= 1 && inp[0] == 27)
1168 size_t inbytesleft0 = inbytesleft;
1171 while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1176 if (inbytesleft <= 0)
1179 cd->my_errno = YAZ_ICONV_EINVAL;
1182 cd->marc8_esc_mode = *inp++;
1184 (*no_read) += inbytesleft0 - inbytesleft;
1186 if (inbytesleft <= 0)
1188 else if (*inp == ' ')
1196 size_t no_read_sub = 0;
1199 switch(cd->marc8_esc_mode)
1201 case 'B': /* Basic ASCII */
1202 case 's': /* ASCII */
1203 case 'E': /* ANSEL */
1204 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1208 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1211 case 'g': /* Greek */
1212 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1214 case 'b': /* Subscripts */
1215 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1217 case 'p': /* Superscripts */
1218 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1220 case '2': /* Basic Hebrew */
1221 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1223 case 'N': /* Basic Cyrillic */
1224 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1226 case 'Q': /* Extended Cyrillic */
1227 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1229 case '3': /* Basic Arabic */
1230 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1232 case '4': /* Extended Arabic */
1233 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1235 case 'S': /* Greek */
1236 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1238 case '1': /* Chinese, Japanese, Korean (EACC) */
1239 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1243 cd->my_errno = YAZ_ICONV_EILSEQ;
1246 *no_read += no_read_sub;
1251 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1252 char **outbuf, size_t *outbytesleft)
1254 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1257 size_t yaz_write_UTF8_char(unsigned long x,
1258 char **outbuf, size_t *outbytesleft,
1261 unsigned char *outp = (unsigned char *) *outbuf;
1263 if (x <= 0x7f && *outbytesleft >= 1)
1265 *outp++ = (unsigned char) x;
1268 else if (x <= 0x7ff && *outbytesleft >= 2)
1270 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1271 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1272 (*outbytesleft) -= 2;
1274 else if (x <= 0xffff && *outbytesleft >= 3)
1276 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1277 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1278 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1279 (*outbytesleft) -= 3;
1281 else if (x <= 0x1fffff && *outbytesleft >= 4)
1283 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1284 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1285 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1286 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1287 (*outbytesleft) -= 4;
1289 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1291 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1292 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1293 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1294 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1295 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1296 (*outbytesleft) -= 5;
1298 else if (*outbytesleft >= 6)
1300 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1301 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1302 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1303 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1304 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1305 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1306 (*outbytesleft) -= 6;
1310 *error = YAZ_ICONV_E2BIG; /* not room for output */
1311 return (size_t)(-1);
1313 *outbuf = (char *) outp;
1317 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1318 char **outbuf, size_t *outbytesleft)
1320 /* list of two char unicode sequence that, when combined, are
1321 equivalent to single unicode chars that can be represented in
1323 Regular iconv on Linux at least does not seem to convert these,
1324 but since MARC-8 to UTF-8 generates these composed sequence
1325 we get a better chance of a successful MARC-8 -> ISO-8859-1
1327 unsigned char *outp = (unsigned char *) *outbuf;
1329 if (cd->compose_char)
1332 for (i = 0; latin1_comb[i].x1; i++)
1333 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1335 x = latin1_comb[i].y;
1338 if (*outbytesleft < 1)
1339 { /* no room. Retain compose_char and bail out */
1340 cd->my_errno = YAZ_ICONV_E2BIG;
1341 return (size_t)(-1);
1343 if (!latin1_comb[i].x1)
1344 { /* not found. Just write compose_char */
1345 *outp++ = (unsigned char) cd->compose_char;
1347 *outbuf = (char *) outp;
1349 /* compose_char used so reset it. x now holds current char */
1350 cd->compose_char = 0;
1353 if (x > 32 && x < 127 && cd->compose_char == 0)
1355 cd->compose_char = x;
1358 else if (x > 255 || x < 1)
1360 cd->my_errno = YAZ_ICONV_EILSEQ;
1363 else if (*outbytesleft < 1)
1365 cd->my_errno = YAZ_ICONV_E2BIG;
1366 return (size_t)(-1);
1368 *outp++ = (unsigned char) x;
1370 *outbuf = (char *) outp;
1374 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1375 char **outbuf, size_t *outbytesleft)
1377 if (cd->compose_char)
1379 unsigned char *outp = (unsigned char *) *outbuf;
1380 if (*outbytesleft < 1)
1382 cd->my_errno = YAZ_ICONV_E2BIG;
1383 return (size_t)(-1);
1385 *outp++ = (unsigned char) cd->compose_char;
1387 *outbuf = (char *) outp;
1388 cd->compose_char = 0;
1393 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1394 char **outbuf, size_t *outbytesleft)
1396 unsigned char *outp = (unsigned char *) *outbuf;
1397 if (*outbytesleft >= 4)
1399 *outp++ = (unsigned char) (x>>24);
1400 *outp++ = (unsigned char) (x>>16);
1401 *outp++ = (unsigned char) (x>>8);
1402 *outp++ = (unsigned char) x;
1403 (*outbytesleft) -= 4;
1407 cd->my_errno = YAZ_ICONV_E2BIG;
1408 return (size_t)(-1);
1410 *outbuf = (char *) outp;
1414 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1415 char **outbuf, size_t *outbytesleft)
1417 unsigned char *outp = (unsigned char *) *outbuf;
1418 if (*outbytesleft >= 4)
1420 *outp++ = (unsigned char) x;
1421 *outp++ = (unsigned char) (x>>8);
1422 *outp++ = (unsigned char) (x>>16);
1423 *outp++ = (unsigned char) (x>>24);
1424 (*outbytesleft) -= 4;
1428 cd->my_errno = YAZ_ICONV_E2BIG;
1429 return (size_t)(-1);
1431 *outbuf = (char *) outp;
1435 static unsigned long lookup_marc8(yaz_iconv_t cd,
1436 unsigned long x, int *comb,
1437 const char **page_chr)
1440 char *utf8_outbuf = utf8_buf;
1441 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1443 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1444 if (r == (size_t)(-1))
1446 cd->my_errno = YAZ_ICONV_EILSEQ;
1452 size_t inbytesleft, no_read_sub = 0;
1455 *utf8_outbuf = '\0';
1456 inp = (unsigned char *) utf8_buf;
1457 inbytesleft = strlen(utf8_buf);
1459 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1462 *page_chr = ESC "(B";
1465 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1469 *page_chr = ESC "(B";
1471 /* this possibly solves bug #1778 */
1472 *page_chr = ESC ")!E";
1476 x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1479 *page_chr = ESC "g";
1482 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1485 *page_chr = ESC "b";
1488 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1491 *page_chr = ESC "p";
1494 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1497 *page_chr = ESC "(2";
1500 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1503 *page_chr = ESC "(N";
1506 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1509 *page_chr = ESC "(Q";
1512 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1515 *page_chr = ESC "(3";
1518 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1521 *page_chr = ESC "(4";
1524 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1527 *page_chr = ESC "(S";
1530 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1533 *page_chr = ESC "$1";
1536 cd->my_errno = YAZ_ICONV_EILSEQ;
1541 static size_t flush_combos(yaz_iconv_t cd,
1542 char **outbuf, size_t *outbytesleft)
1544 unsigned long y = cd->write_marc8_last;
1547 size_t i, out_no = 0;
1552 byte = (unsigned char )((y>>16) & 0xff);
1554 out_buf[out_no++] = byte;
1555 byte = (unsigned char)((y>>8) & 0xff);
1557 out_buf[out_no++] = byte;
1558 byte = (unsigned char )(y & 0xff);
1560 out_buf[out_no++] = byte;
1562 if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1564 cd->my_errno = YAZ_ICONV_E2BIG;
1565 return (size_t) (-1);
1568 for (i = 0; i < cd->write_marc8_comb_no; i++)
1570 /* all MARC-8 combined characters are simple bytes */
1571 byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1572 *(*outbuf)++ = byte;
1575 memcpy(*outbuf, out_buf, out_no);
1577 (*outbytesleft) -= out_no;
1578 if (cd->write_marc8_second_half_char)
1580 *(*outbuf)++ = cd->write_marc8_second_half_char;
1584 cd->write_marc8_last = 0;
1585 cd->write_marc8_comb_no = 0;
1586 cd->write_marc8_second_half_char = 0;
1590 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1591 char **outbuf, size_t *outbytesleft,
1592 const char *page_chr)
1594 const char **old_page_chr = &cd->write_marc8_g0;
1596 /* are we going to a G1-set (such as such as ESC ")!E") */
1597 if (page_chr && page_chr[1] == ')')
1598 old_page_chr = &cd->write_marc8_g1;
1600 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1603 const char *page_out = page_chr;
1605 if (*outbytesleft < 8)
1607 cd->my_errno = YAZ_ICONV_E2BIG;
1609 return (size_t) (-1);
1614 if (!strcmp(*old_page_chr, ESC "p")
1615 || !strcmp(*old_page_chr, ESC "g")
1616 || !strcmp(*old_page_chr, ESC "b"))
1619 /* Technique 1 leave */
1620 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1622 /* Must leave script + enter new page */
1623 plen = strlen(page_out);
1624 memcpy(*outbuf, page_out, plen);
1626 (*outbytesleft) -= plen;
1627 page_out = ESC "(B";
1631 *old_page_chr = page_chr;
1632 plen = strlen(page_out);
1633 memcpy(*outbuf, page_out, plen);
1635 (*outbytesleft) -= plen;
1641 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1642 char **outbuf, size_t *outbytesleft)
1645 const char *page_chr = 0;
1646 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1649 return (size_t) (-1);
1655 size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1660 cd->write_marc8_second_half_char = 0xEC;
1661 else if (x == 0x0360)
1662 cd->write_marc8_second_half_char = 0xFB;
1664 if (cd->write_marc8_comb_no < 6)
1665 cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1669 size_t r = flush_combos(cd, outbuf, outbytesleft);
1675 r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1679 cd->write_marc8_last = y;
1684 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1685 char **outbuf, size_t *outbytesleft)
1687 size_t r = flush_combos(cd, outbuf, outbytesleft);
1690 cd->write_marc8_g1 = 0;
1691 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1694 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1695 char **outbuf, size_t *outbytesleft)
1698 for (i = 0; latin1_comb[i].x1; i++)
1700 if (x == latin1_comb[i].y)
1703 /* save the output pointers .. */
1704 char *outbuf0 = *outbuf;
1705 size_t outbytesleft0 = *outbytesleft;
1706 int last_ch = cd->write_marc8_last;
1708 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1709 outbuf, outbytesleft);
1712 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1713 outbuf, outbytesleft);
1714 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1716 /* not enough room. reset output to original values */
1718 *outbytesleft = outbytesleft0;
1719 cd->write_marc8_last = last_ch;
1724 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1729 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1730 char **outbuf, size_t *outbytesleft)
1732 unsigned char *outp = (unsigned char *) *outbuf;
1734 if (*outbytesleft >= sizeof(wchar_t))
1737 memcpy(outp, &wch, sizeof(wch));
1738 outp += sizeof(wch);
1739 (*outbytesleft) -= sizeof(wch);
1743 cd->my_errno = YAZ_ICONV_E2BIG;
1744 return (size_t)(-1);
1746 *outbuf = (char *) outp;
1751 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1753 return cd->read_handle && cd->write_handle;
1756 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1758 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1760 cd->write_handle = 0;
1761 cd->read_handle = 0;
1762 cd->init_handle = 0;
1763 cd->flush_handle = 0;
1764 cd->my_errno = YAZ_ICONV_UNKNOWN;
1766 /* a useful hack: if fromcode has leading @,
1767 the library not use YAZ's own conversions .. */
1768 if (fromcode[0] == '@')
1772 if (!yaz_matchstr(fromcode, "UTF8"))
1774 cd->read_handle = yaz_read_UTF8;
1775 cd->init_handle = yaz_init_UTF8;
1777 else if (!yaz_matchstr(fromcode, "ISO88591"))
1778 cd->read_handle = yaz_read_ISO8859_1;
1779 else if (!yaz_matchstr(fromcode, "UCS4"))
1780 cd->read_handle = yaz_read_UCS4;
1781 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1782 cd->read_handle = yaz_read_UCS4LE;
1783 else if (!yaz_matchstr(fromcode, "MARC8"))
1784 cd->read_handle = yaz_read_marc8;
1785 else if (!yaz_matchstr(fromcode, "MARC8s"))
1786 cd->read_handle = yaz_read_marc8s;
1787 else if (!yaz_matchstr(fromcode, "advancegreek"))
1788 cd->read_handle = yaz_read_advancegreek;
1789 else if (!yaz_matchstr(fromcode, "iso54281984"))
1790 cd->read_handle = yaz_read_iso5428_1984;
1791 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1792 cd->read_handle = yaz_read_iso5428_1984;
1794 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1795 cd->read_handle = yaz_read_wchar_t;
1798 if (!yaz_matchstr(tocode, "UTF8"))
1799 cd->write_handle = yaz_write_UTF8;
1800 else if (!yaz_matchstr(tocode, "ISO88591"))
1802 cd->write_handle = yaz_write_ISO8859_1;
1803 cd->flush_handle = yaz_flush_ISO8859_1;
1805 else if (!yaz_matchstr (tocode, "UCS4"))
1806 cd->write_handle = yaz_write_UCS4;
1807 else if (!yaz_matchstr(tocode, "UCS4LE"))
1808 cd->write_handle = yaz_write_UCS4LE;
1809 else if (!yaz_matchstr(tocode, "MARC8"))
1811 cd->write_handle = yaz_write_marc8;
1812 cd->flush_handle = yaz_flush_marc8;
1814 else if (!yaz_matchstr(tocode, "MARC8s"))
1816 cd->write_handle = yaz_write_marc8;
1817 cd->flush_handle = yaz_flush_marc8;
1819 else if (!yaz_matchstr(tocode, "advancegreek"))
1821 cd->write_handle = yaz_write_advancegreek;
1823 else if (!yaz_matchstr(tocode, "iso54281984"))
1825 cd->write_handle = yaz_write_iso5428_1984;
1827 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1829 cd->write_handle = yaz_write_iso5428_1984;
1832 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1833 cd->write_handle = yaz_write_wchar_t;
1838 if (!cd->read_handle || !cd->write_handle)
1840 cd->iconv_cd = iconv_open (tocode, fromcode);
1841 if (cd->iconv_cd == (iconv_t) (-1))
1848 if (!cd->read_handle || !cd->write_handle)
1858 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1859 char **outbuf, size_t *outbytesleft)
1868 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1869 if (r == (size_t)(-1))
1871 switch (yaz_errno())
1874 cd->my_errno = YAZ_ICONV_E2BIG;
1877 cd->my_errno = YAZ_ICONV_EINVAL;
1880 cd->my_errno = YAZ_ICONV_EILSEQ;
1883 cd->my_errno = YAZ_ICONV_UNKNOWN;
1895 cd->my_errno = YAZ_ICONV_UNKNOWN;
1896 cd->marc8_esc_mode = 'B';
1898 cd->comb_offset = cd->comb_size = 0;
1899 cd->compose_char = 0;
1901 cd->write_marc8_comb_no = 0;
1902 cd->write_marc8_second_half_char = 0;
1903 cd->write_marc8_last = 0;
1904 cd->write_marc8_g0 = ESC "(B";
1905 cd->write_marc8_g1 = 0;
1913 if (cd->init_handle && inbuf && *inbuf)
1916 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1917 *inbytesleft, &no_read);
1920 if (cd->my_errno == YAZ_ICONV_EINVAL)
1925 *inbytesleft -= no_read;
1931 if (!inbuf || !*inbuf)
1933 if (outbuf && *outbuf)
1936 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1937 if (cd->flush_handle)
1938 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1953 no_read = cd->no_read_x;
1957 if (*inbytesleft == 0)
1959 r = *inbuf - inbuf0;
1962 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1972 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1975 /* unable to write it. save it because read_handle cannot
1977 if (cd->my_errno == YAZ_ICONV_E2BIG)
1980 cd->no_read_x = no_read;
1986 *inbytesleft -= no_read;
1987 (*inbuf) += no_read;
1992 int yaz_iconv_error (yaz_iconv_t cd)
1994 return cd->my_errno;
1997 int yaz_iconv_close (yaz_iconv_t cd)
2001 iconv_close (cd->iconv_cd);
2010 * indent-tabs-mode: nil
2012 * vim: shiftwidth=4 tabstop=8 expandtab