Improve display of MARC records with multi-byte subfield IDs YAZ-695
[yaz-moved-to-github.git] / src / siconv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements simple ICONV
8  *
9  * This implements an interface similar to that of iconv and
10  * is used by YAZ to interface with iconv (if present).
11  * For systems where iconv is not present, this layer
12  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
13  *
14  */
15
16 #if HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23
24 #if HAVE_ICONV_H
25 #include <iconv.h>
26 #endif
27
28 #include <yaz/xmalloc.h>
29 #include <yaz/errno.h>
30 #include "iconv-p.h"
31
32 struct yaz_iconv_struct {
33     int my_errno;
34     int init_flag;
35 #if 0
36     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
37                             size_t inbytesleft, size_t *no_read);
38     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
39                                  size_t inbytesleft, size_t *no_read);
40 #endif
41     size_t no_read_x;
42     unsigned long unget_x;
43 #if HAVE_ICONV_H
44     iconv_t iconv_cd;
45 #endif
46     struct yaz_iconv_encoder_s encoder;
47     struct yaz_iconv_decoder_s decoder;
48 };
49
50
51 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
52 {
53     return cd->decoder.read_handle && cd->encoder.write_handle;
54 }
55
56
57 static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
58 {
59     if (yaz_marc8_encoder(tocode, &cd->encoder))
60         return 1;
61     if (yaz_utf8_encoder(tocode, &cd->encoder))
62         return 1;
63     if (yaz_ucs4_encoder(tocode, &cd->encoder))
64         return 1;
65     if (yaz_iso_8859_1_encoder(tocode, &cd->encoder))
66         return 1;
67     if (yaz_iso_5428_encoder(tocode, &cd->encoder))
68         return 1;
69     if (yaz_advancegreek_encoder(tocode, &cd->encoder))
70         return 1;
71     if (yaz_wchar_encoder(tocode, &cd->encoder))
72         return 1;
73     return 0;
74 }
75
76 static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
77 {
78     if (yaz_marc8_decoder(tocode, &cd->decoder))
79         return 1;
80     if (yaz_iso5426_decoder(tocode, &cd->decoder))
81         return 1;
82     if (yaz_utf8_decoder(tocode, &cd->decoder))
83         return 1;
84     if (yaz_ucs4_decoder(tocode, &cd->decoder))
85         return 1;
86     if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
87         return 1;
88     if (yaz_iso_5428_decoder(tocode, &cd->decoder))
89         return 1;
90     if (yaz_advancegreek_decoder(tocode, &cd->decoder))
91         return 1;
92     if (yaz_wchar_decoder(tocode, &cd->decoder))
93         return 1;
94     if (yaz_danmarc_decoder(tocode, &cd->decoder))
95         return 1;
96     return 0;
97 }
98
99 yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
100 {
101     yaz_iconv_t cd = (yaz_iconv_t) xmalloc(sizeof(*cd));
102
103     cd->encoder.data = 0;
104     cd->encoder.write_handle = 0;
105     cd->encoder.flush_handle = 0;
106     cd->encoder.init_handle = 0;
107     cd->encoder.destroy_handle = 0;
108
109     cd->decoder.data = 0;
110     cd->decoder.read_handle = 0;
111     cd->decoder.init_handle = 0;
112     cd->decoder.destroy_handle = 0;
113
114     cd->my_errno = YAZ_ICONV_UNKNOWN;
115
116     /* a useful hack: if fromcode has leading @,
117        the library not use YAZ's own conversions .. */
118     if (fromcode[0] == '@')
119         fromcode++;
120     else
121     {
122         prepare_encoders(cd, tocode);
123         prepare_decoders(cd, fromcode);
124     }
125     if (cd->decoder.read_handle && cd->encoder.write_handle)
126     {
127 #if HAVE_ICONV_H
128         cd->iconv_cd = (iconv_t) (-1);
129 #endif
130         ;
131     }
132     else
133     {
134 #if HAVE_ICONV_H
135         cd->iconv_cd = iconv_open(tocode, fromcode);
136         if (cd->iconv_cd == (iconv_t) (-1))
137         {
138             yaz_iconv_close(cd);
139             return 0;
140         }
141 #else
142         yaz_iconv_close(cd);
143         return 0;
144 #endif
145     }
146     cd->init_flag = 1;
147     return cd;
148 }
149
150 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
151                  char **outbuf, size_t *outbytesleft)
152 {
153     char *inbuf0 = 0;
154     size_t r = 0;
155
156 #if HAVE_ICONV_H
157     if (cd->iconv_cd != (iconv_t) (-1))
158     {
159         size_t r =
160             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
161         if (r == (size_t)(-1))
162         {
163             switch (yaz_errno())
164             {
165             case E2BIG:
166                 cd->my_errno = YAZ_ICONV_E2BIG;
167                 break;
168             case EINVAL:
169                 cd->my_errno = YAZ_ICONV_EINVAL;
170                 break;
171             case EILSEQ:
172                 cd->my_errno = YAZ_ICONV_EILSEQ;
173                 break;
174             default:
175                 cd->my_errno = YAZ_ICONV_UNKNOWN;
176             }
177         }
178         return r;
179     }
180 #endif
181
182     if (inbuf)
183         inbuf0 = *inbuf;
184
185     if (cd->init_flag)
186     {
187         cd->my_errno = YAZ_ICONV_UNKNOWN;
188
189         if (cd->encoder.init_handle)
190             (*cd->encoder.init_handle)(&cd->encoder);
191
192         cd->unget_x = 0;
193         cd->no_read_x = 0;
194
195         if (cd->decoder.init_handle)
196         {
197             size_t no_read = 0;
198             size_t r = (cd->decoder.init_handle)(
199                 cd, &cd->decoder,
200                 inbuf ? (unsigned char *) *inbuf : 0,
201                 inbytesleft ? *inbytesleft : 0,
202                 &no_read);
203             if (r)
204             {
205                 if (cd->my_errno == YAZ_ICONV_EINVAL)
206                     return r;
207                 cd->init_flag = 0;
208                 return r;
209             }
210             if (inbytesleft)
211                 *inbytesleft -= no_read;
212             if (inbuf)
213                 *inbuf += no_read;
214         }
215     }
216     cd->init_flag = 0;
217
218     if (!inbuf || !*inbuf)
219     {
220         if (outbuf && *outbuf)
221         {
222             if (cd->unget_x)
223                 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
224                                                 cd->unget_x, outbuf, outbytesleft);
225             if (cd->encoder.flush_handle)
226                 r = (*cd->encoder.flush_handle)(cd, &cd->encoder,
227                                                 outbuf, outbytesleft);
228         }
229         if (r == 0)
230             cd->init_flag = 1;
231         cd->unget_x = 0;
232         return r;
233     }
234     while (1)
235     {
236         unsigned long x;
237         size_t no_read;
238
239         if (cd->unget_x)
240         {
241             x = cd->unget_x;
242             no_read = cd->no_read_x;
243         }
244         else
245         {
246             if (*inbytesleft == 0)
247             {
248                 r = *inbuf - inbuf0;
249                 break;
250             }
251             x = (*cd->decoder.read_handle)(
252                 cd, &cd->decoder,
253                 (unsigned char *) *inbuf, *inbytesleft, &no_read);
254             if (no_read == 0)
255             {
256                 r = (size_t)(-1);
257                 break;
258             }
259         }
260         if (x)
261         {
262             r = (*cd->encoder.write_handle)(cd, &cd->encoder,
263                                             x, outbuf, outbytesleft);
264             if (r)
265             {
266                 /* unable to write it. save it because read_handle cannot
267                    rewind .. */
268                 if (cd->my_errno == YAZ_ICONV_E2BIG)
269                 {
270                     cd->unget_x = x;
271                     cd->no_read_x = no_read;
272                     break;
273                 }
274             }
275             cd->unget_x = 0;
276         }
277         *inbytesleft -= no_read;
278         (*inbuf) += no_read;
279     }
280     return r;
281 }
282
283 int yaz_iconv_error(yaz_iconv_t cd)
284 {
285     return cd->my_errno;
286 }
287
288 int yaz_iconv_close(yaz_iconv_t cd)
289 {
290 #if HAVE_ICONV_H
291     if (cd->iconv_cd != (iconv_t) (-1))
292         iconv_close(cd->iconv_cd);
293 #endif
294     if (cd->encoder.destroy_handle)
295         (*cd->encoder.destroy_handle)(&cd->encoder);
296     if (cd->decoder.destroy_handle)
297         (*cd->decoder.destroy_handle)(&cd->decoder);
298     xfree(cd);
299     return 0;
300 }
301
302 void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
303 {
304     cd->my_errno = no;
305 }
306
307 /*
308  * Local variables:
309  * c-basic-offset: 4
310  * c-file-style: "Stroustrup"
311  * indent-tabs-mode: nil
312  * End:
313  * vim: shiftwidth=4 tabstop=8 expandtab
314  */
315