From: Adam Dickmeiss Date: Wed, 19 Apr 2006 23:15:39 +0000 (+0000) Subject: Implemented yaz_iconv to support conversion to MARC-8 X-Git-Tag: YAZ.2.1.18~33 X-Git-Url: http://jsfdemo.indexdata.com/cgi-bin?a=commitdiff_plain;h=8626b7019b5d3d9c9594f20025e97d06a2d590fc;p=yaz-moved-to-github.git Implemented yaz_iconv to support conversion to MARC-8 --- diff --git a/NEWS b/NEWS index 6781a21..92c8c52 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +Utility yaz_iconv now supports conversion to MARC-8. + New facilities for the MARC module. The reading - and writing of content are separate methods for the yaz_marc_t handle. The following read functions are available: yaz_marc_read_iso2709 (Reads MARC in diff --git a/include/yaz/yaz-iconv.h b/include/yaz/yaz-iconv.h index 4e62904..b629e92 100644 --- a/include/yaz/yaz-iconv.h +++ b/include/yaz/yaz-iconv.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * * Permission to use, copy, modify, distribute, and sell this software and * its documentation, in whole or in part, for any purpose, is hereby granted, @@ -23,7 +23,7 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Id: yaz-iconv.h,v 1.8 2005-06-25 15:46:03 adam Exp $ + * $Id: yaz-iconv.h,v 1.9 2006-04-19 23:15:39 adam Exp $ */ /** * \file yaz-iconv.h @@ -38,20 +38,31 @@ YAZ_BEGIN_CDECL +/** \brief yaz_iconv handle (similar to iconv_t) */ typedef struct yaz_iconv_struct *yaz_iconv_t; + +/** \brief error code: unknown */ #define YAZ_ICONV_UNKNOWN 1 +/** \brief error code: Not sufficient room for output buffer */ #define YAZ_ICONV_E2BIG 2 +/** \brief error code: Invalid sequence */ #define YAZ_ICONV_EILSEQ 3 +/** \brief error code: An incomplete multibyte sequence is in input buffer */ #define YAZ_ICONV_EINVAL 4 +/** \brief just like iconv_open(3) */ YAZ_EXPORT yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode); +/** \brief just like iconv(3) */ YAZ_EXPORT size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +/** \brief returns last error - like errno for iconv(3) */ YAZ_EXPORT int yaz_iconv_error (yaz_iconv_t cd); +/** \brief just like iconv_close(3) */ YAZ_EXPORT int yaz_iconv_close (yaz_iconv_t cd); +/** \brief tests whether conversion is handled by YAZ' iconv or system iconv */ YAZ_EXPORT int yaz_iconv_isbuiltin(yaz_iconv_t cd); YAZ_EXPORT int yaz_matchstr(const char *s1, const char *s2); diff --git a/src/Makefile.am b/src/Makefile.am index 090f5ff..5e239d0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,6 +1,6 @@ ## This file is part of the YAZ toolkit. -## Copyright (C) 1994-2005, Index Data, All rights reserved. -## $Id: Makefile.am,v 1.29 2006-03-15 13:32:05 adam Exp $ +## Copyright (C) 1994-2006, Index Data, All rights reserved. +## $Id: Makefile.am,v 1.30 2006-04-19 23:15:39 adam Exp $ YAZ_VERSION_INFO=2:0:0 @@ -29,10 +29,14 @@ AM_CPPFLAGS=-I$(top_srcdir)/include $(XML2_CFLAGS) $(SSL_CFLAGS) AM_YFLAGS=-p cql_ -# MARC8 conversion is generated from codetables.xml +# MARC8->UTF8 conversion is generated from codetables.xml $(srcdir)/marc8.c: charconv.tcl codetables.xml cd $(srcdir); ./charconv.tcl -p marc8 codetables.xml -o marc8.c +# UTF-8->MARC8 conversion is generated from codetables.xml +$(srcdir)/marc8r.c: charconv.tcl codetables.xml + cd $(srcdir); ./charconv.tcl -r -p marc8r codetables.xml -o marc8r.c + # Generate diagnostics from CSVs $(top_srcdir)/include/yaz/diagbib1.h $(srcdir)/diagbib1.c: csvtobib1.tcl bib1.csv cd $(srcdir); ./csvtobib1.tcl @@ -42,7 +46,7 @@ $(top_srcdir)/include/yaz/diagsrw.h $(srcdir)/diagsrw.c: csvtosrw.tcl srw.csv libyaz_la_SOURCES=version.c options.c log.c marcdisp.c oid.c wrbuf.c \ nmemsdup.c xmalloc.c readconf.c tpath.c nmem.c matchstr.c atoin.c \ - siconv.c marc8.c \ + siconv.c marc8.c marc8r.c \ odr_bool.c ber_bool.c ber_len.c ber_tag.c odr_util.c \ odr_null.c ber_null.c odr_int.c ber_int.c odr_tag.c odr_cons.c \ odr_seq.c odr_oct.c ber_oct.c odr_bit.c ber_bit.c odr_oid.c \ diff --git a/src/charconv.tcl b/src/charconv.tcl index 45f6e13..f43c815 100755 --- a/src/charconv.tcl +++ b/src/charconv.tcl @@ -2,7 +2,7 @@ # the next line restats using tclsh \ exec tclsh "$0" "$@" # -# $Id: charconv.tcl,v 1.11 2006-02-23 13:15:43 adam Exp $ +# $Id: charconv.tcl,v 1.12 2006-04-19 23:15:39 adam Exp $ proc usage {} { puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... } @@ -255,11 +255,12 @@ proc dump_trie {ofilehandle} { " } -proc readfile {fname ofilehandle prefix omits} { +proc readfile {fname ofilehandle prefix omits reverse} { global trie set marc_lines 0 set ucs_lines 0 + set utf_lines 0 set codename_lines 0 set lineno 0 set f [open $fname r] @@ -290,12 +291,21 @@ proc readfile {fname ofilehandle prefix omits} { } } elseif {[regexp {} $line s]} { if {[string length $ucs]} { - for {set i 0} {$i < [string length $marc]} {incr i 2} { - lappend hex [string range $marc $i [expr $i+1]] + if {$reverse} { + for {set i 0} {$i < [string length $utf]} {incr i 2} { + lappend hex [string range $utf $i [expr $i+1]] + } + # puts "ins_trie $hex $marc + ins_trie $hex $marc $combining $codename + unset hex + } else { + for {set i 0} {$i < [string length $marc]} {incr i 2} { + lappend hex [string range $marc $i [expr $i+1]] + } + # puts "ins_trie $hex $ucs" + ins_trie $hex $ucs $combining $codename + unset hex } - # puts "ins_trie $hex $ucs" - ins_trie $hex $ucs $combining $codename - unset hex } set marc {} set uni {} @@ -319,6 +329,8 @@ proc readfile {fname ofilehandle prefix omits} { set combining 1 } elseif {[regexp {([0-9A-Fa-f]*)} $line s ucs]} { incr ucs_lines + } elseif {[regexp {([0-9A-Fa-f]*)} $line s utf]} { + incr utf_lines } } close $f @@ -328,6 +340,7 @@ set verbose 0 set ifile {} set ofile out.c set prefix {c} +set reverse_map 0 # Parse command line set l [llength $argv] set i 0 @@ -362,6 +375,9 @@ while {$i < $l} { } lappend omits $arg } + -r { + set reverse_map 1 + } default { lappend ifiles $arg } @@ -377,7 +393,7 @@ set ofilehandle [open $ofile w] preamble_trie $ofilehandle foreach ifile $ifiles { - readfile $ifile $ofilehandle $prefix $omits + readfile $ifile $ofilehandle $prefix $omits $reverse_map } close $ofilehandle diff --git a/src/diag-entry.c b/src/diag-entry.c index eda593d..98b07b8 100644 --- a/src/diag-entry.c +++ b/src/diag-entry.c @@ -1,10 +1,19 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: diag-entry.c,v 1.2 2005-06-25 15:46:04 adam Exp $ + * $Id: diag-entry.c,v 1.3 2006-04-19 23:15:39 adam Exp $ */ +/** + * \file diag-entry.c + * \brief Diagnostic table lookup + */ + +#if HAVE_CONFIG_H +#include +#endif + #include "diag-entry.h" const char *yaz_diag_to_str(struct yaz_diag_entry *tab, int code) diff --git a/src/diag-entry.h b/src/diag-entry.h index 416ed1a..a6c98e1 100644 --- a/src/diag-entry.h +++ b/src/diag-entry.h @@ -1,8 +1,13 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: diag-entry.h,v 1.2 2005-06-25 15:46:04 adam Exp $ + * $Id: diag-entry.h,v 1.3 2006-04-19 23:15:39 adam Exp $ + */ + +/** + * \file diag-entry.h + * \brief Diagnostic table lookup header */ struct yaz_diag_entry { diff --git a/src/siconv.c b/src/siconv.c index 25af4cc..c0c70bf 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.18 2006-03-25 14:41:53 adam Exp $ + * $Id: siconv.c,v 1.19 2006-04-19 23:15:39 adam Exp $ */ /** * \file siconv.c @@ -11,7 +11,7 @@ * This implements an interface similar to that of iconv and * is used by YAZ to interface with iconv (if present). * For systems where iconv is not present, this layer - * provides a few important conversion: UTF-8, MARC-8, Latin-1. + * provides a few important conversions: UTF-8, MARC-8, Latin-1. */ #if HAVE_CONFIG_H @@ -31,25 +31,45 @@ #include -unsigned long yaz_marc8_1_conv (unsigned char *inp, size_t inbytesleft, - size_t *no_read, int *combining); -unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_2_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_3_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_4_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_5_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_6_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_7_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_8_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); +unsigned long yaz_marc8_9_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); + + +unsigned long yaz_marc8r_1_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); -unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8r_2_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); -unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8r_3_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); -unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8r_4_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); -unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8r_5_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); -unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8r_6_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); -unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8r_7_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); -unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft, +unsigned long yaz_marc8r_8_conv(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining); - +unsigned long yaz_marc8r_9_conv(unsigned char *inp, size_t inbytesleft, + size_t *no_read, int *combining); + struct yaz_iconv_struct { int my_errno; int init_flag; @@ -72,6 +92,11 @@ struct yaz_iconv_struct { iconv_t iconv_cd; #endif unsigned long compose_char; + + unsigned long write_marc8_comb_ch[8]; + size_t write_marc8_comb_no; + unsigned long write_marc8_last; + const char *write_marc8_page_chr; }; static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp, @@ -95,12 +120,10 @@ static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp, cd->my_errno = YAZ_ICONV_EINVAL; return (size_t) -1; } - if (inp[1] != 0xbb || inp[2] != 0xbf) - { - cd->my_errno = YAZ_ICONV_EILSEQ; - return (size_t) -1; - } - *no_read = 3; + if (inp[1] != 0xbb && inp[2] == 0xbf) + *no_read = 3; + else + *no_read = 0; return 0; } @@ -446,7 +469,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, static struct { unsigned long x1, x2; unsigned y; - } comb[] = { + } latin1_comb[] = { { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */ { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */ { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ @@ -518,10 +541,10 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, if (cd->compose_char) { int i; - for (i = 0; comb[i].x1; i++) - if (cd->compose_char == comb[i].x1 && x == comb[i].x2) + for (i = 0; latin1_comb[i].x1; i++) + if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2) { - x = comb[i].y; + x = latin1_comb[i].y; break; } if (*outbytesleft < 1) @@ -529,7 +552,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, cd->my_errno = YAZ_ICONV_E2BIG; return (size_t)(-1); } - if (!comb[i].x1) + if (!latin1_comb[i].x1) { /* not found. Just write compose_char */ *outp++ = (unsigned char) cd->compose_char; (*outbytesleft)--; @@ -605,6 +628,193 @@ static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x, return 0; } +static unsigned long lookup_marc8(yaz_iconv_t cd, + unsigned long x, int *comb, + const char **page_chr) +{ + char utf8_buf[7]; + char *utf8_outbuf = utf8_buf; + size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r; + + r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft, 0); + if (r == (size_t)(-1)) + { + cd->my_errno = YAZ_ICONV_EILSEQ; + return 0; + } + else + { + unsigned char *inp; + size_t inbytesleft, no_read_sub = 0; + unsigned long x; + + *utf8_outbuf = '\0'; + inp = (unsigned char *) utf8_buf; + inbytesleft = strlen(utf8_buf); + + x = yaz_marc8r_1_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033(B"; + return x; + } + x = yaz_marc8r_2_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033g"; + return x; + } + x = yaz_marc8r_3_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033b"; + return x; + } + x = yaz_marc8r_4_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033p"; + return x; + } + x = yaz_marc8r_5_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033(2"; + return x; + } + x = yaz_marc8r_6_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033(N"; + return x; + } + x = yaz_marc8r_7_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033(3"; + return x; + } + x = yaz_marc8r_8_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033(S"; + return x; + } + x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb); + if (x) + { + *page_chr = "\033(1"; + return x; + } + cd->my_errno = YAZ_ICONV_EILSEQ; + return x; + } +} + +static size_t flush_combos(yaz_iconv_t cd, + char **outbuf, size_t *outbytesleft) +{ + unsigned long y = cd->write_marc8_last; + unsigned char byte, second_half = 0; + char out_buf[10]; + size_t i, out_no = 0; + + if (!y) + return 0; + + byte = (y>>16) & 0xff; + if (byte) + out_buf[out_no++] = byte; + byte = (y>>8) & 0xff; + if (byte) + out_buf[out_no++] = byte; + byte = y & 0xff; + if (byte) + out_buf[out_no++] = byte; + + if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft) + { + cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t) (-1); + } + + for (i = 0; i < cd->write_marc8_comb_no; i++) + { + byte = cd->write_marc8_comb_ch[i]; + if (byte == 0xEB) + second_half = 0xEC; + else if (byte == 0xFA) + second_half = 0xFB; + + *(*outbuf)++ = byte; + (*outbytesleft)--; + } + memcpy(*outbuf, out_buf, out_no); + *outbuf += out_no; + (*outbytesleft) -= out_no; + if (second_half) + { + *(*outbuf)++ = second_half; + (*outbytesleft)--; + } + + cd->write_marc8_last = 0; + cd->write_marc8_comb_no = 0; + return 0; +} + +static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x, + char **outbuf, size_t *outbytesleft, + int last) +{ + int comb = 0; + const char *page_chr = 0; + unsigned long y = lookup_marc8(cd, x, &comb, &page_chr); + + if (!y) + return (size_t) (-1); + + if (comb) + { + if (cd->write_marc8_comb_no < 6) + cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y; + } + else + { + size_t r = flush_combos(cd, outbuf, outbytesleft); + if (r) + return r; + if (strcmp(page_chr, cd->write_marc8_page_chr)) + { + size_t plen = strlen(page_chr); + + if (*outbytesleft < plen) + { + cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t) (-1); + } + memcpy(*outbuf, page_chr, plen); + (*outbuf) += plen; + (*outbytesleft) -= plen; + cd->write_marc8_page_chr = page_chr; + } + cd->write_marc8_last = y; + } + if (last) + { + size_t r = flush_combos(cd, outbuf, outbytesleft); + if (r) + { + if (comb) + cd->write_marc8_comb_no--; + else + cd->write_marc8_last = 0; + return r; + } + } + return 0; +} + #if HAVE_WCHAR_H static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x, char **outbuf, size_t *outbytesleft, @@ -646,6 +856,10 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) cd->comb_offset = cd->comb_size = 0; cd->compose_char = 0; + cd->write_marc8_comb_no = 0; + cd->write_marc8_last = 0; + cd->write_marc8_page_chr = "\033(B"; + /* a useful hack: if fromcode has leading @, the library not use YAZ's own conversions .. */ if (fromcode[0] == '@') @@ -678,6 +892,8 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) cd->write_handle = yaz_write_UCS4; else if (!yaz_matchstr(tocode, "UCS4LE")) cd->write_handle = yaz_write_UCS4LE; + else if (!yaz_matchstr(tocode, "MARC8")) + cd->write_handle = yaz_write_marc8; #if HAVE_WCHAR_H else if (!yaz_matchstr(tocode, "WCHAR_T")) cd->write_handle = yaz_write_wchar_t; @@ -828,7 +1044,6 @@ int yaz_iconv_close (yaz_iconv_t cd) return 0; } - /* * Local variables: * c-basic-offset: 4 diff --git a/test/tsticonv.c b/test/tsticonv.c index 3892c89..fd8f2c8 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.16 2006-03-25 14:42:16 adam Exp $ + * $Id: tsticonv.c,v 1.17 2006-04-19 23:15:40 adam Exp $ */ #if HAVE_CONFIG_H @@ -349,7 +349,6 @@ int utf8_check(unsigned c) return 1; } - static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) { int ret = 0; @@ -380,11 +379,13 @@ static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) return ret; } -static void tst_x() +static void tst_conversion_marc8_to_latin1() { yaz_iconv_t cd = yaz_iconv_open("ISO-8859-1", "MARC8"); YAZ_CHECK(cd); + if (!cd) + return; YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); @@ -406,11 +407,88 @@ static void tst_x() yaz_iconv_close(cd); } +static void tst_conversion_utf8_to_marc8() +{ + yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + + /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */ + YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat")); + + /** Pure ASCII. 12 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); + + /** Pure ASCII. 13 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); + + /** UPPERCASE SCANDINAVIAN O */ + YAZ_CHECK(tst_convert(cd, "S\xc3\x98", "S\xa2")); + + /** ARING */ + YAZ_CHECK(tst_convert(cd, "A" "\xCC\x8A", "\xEA" "A")); + + /** A MACRON + UMLAUT, DIAERESIS */ + YAZ_CHECK(tst_convert(cd, "A" "\xCC\x84" "\xCC\x88", + "\xE5\xE8\x41")); + + /* Ligature spanning two characters */ + YAZ_CHECK(tst_convert(cd, + "\x74" "\xCD\xA1" "\x73", /* UTF-8 */ + "\xEB\x74\xEC\x73")); /* MARC-8 */ + + /* Double title spanning two characters */ + YAZ_CHECK(tst_convert(cd, + "\x74" "\xCD\xA0" "\x73", /* UTF-8 */ + "\xFA\x74\xFB\x73")); /* MARC-8 */ + + /** Ideographic question mark (Unicode FF1F) */ + YAZ_CHECK(tst_convert(cd, + "\xEF\xBC\x9F" "o", /* UTF-8 */ + "\033(1" "\x21\x2B\x3B" "\033(B" "o" )); + + yaz_iconv_close(cd); +} + + +static void tst_conversion_latin1_to_marc8() +{ + yaz_iconv_t cd = yaz_iconv_open("MARC8", "ISO-8859-1"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + + /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */ + YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat")); + + /** Pure ASCII. 12 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); + + /** Pure ASCII. 13 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); + + /** UPPERCASE SCANDINAVIAN O */ + YAZ_CHECK(tst_convert(cd, "SØ", "S\xa2")); + + yaz_iconv_close(cd); +} + int main (int argc, char **argv) { YAZ_CHECK_INIT(argc, argv); - tst_x(); + tst_conversion_marc8_to_latin1(); + + tst_conversion_utf8_to_marc8(); + + tst_conversion_latin1_to_marc8(); YAZ_CHECK(utf8_check(3)); YAZ_CHECK(utf8_check(127));