-/* $Id: test_icu_I18N.c,v 1.15 2007-05-11 06:48:32 adam Exp $
+/* $Id: test_icu_I18N.c,v 1.16 2007-05-11 08:27:29 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
void test_icu_I18N_transliterator(int argc, char **argv)
{
+
+ UErrorCode status = U_ZERO_ERROR;
+ UParseError parse_error[256];
- /* setting up transliterator */
+ const char * rules8cstr = "[:Punctuation:] Any-Remove";
+ //const char * rules8cstr = "[:Control:] Any-Remove";
+ //const char * rules8cstr = "[:Decimal_Number:] Any-Remove\n";
+ //const char * rules8cstr = "[:Final_Punctuation:] Any-Remove";
+ //const char * rules8cstr = "Lower; [:^Letter:] Remove";
+ //const char * rules8cstr = "[:^Number:] Remove";
+ //const char * rules8cstr = "Lower;[[:WhiteSpace:][:Punctuation:]] Remove";
+ //const char * rules8cstr = "NFD; [:Nonspacing Mark:] Remove; NFC";
+
+
+
+
+ const char * src8cstr = "Genesis 1\n"
+ "The Beginning\n"
+ "1 In the beginning God created the heavens and the earth.\n"
+ "2 Now the earth was formless and empty, darkness was over "
+ "the surface of the deep, and the Spirit of God was hovering "
+ "over the waters.\n"
+ "3 And God said, Let there be light, and there was light.\n";
-#if 0
- UErrorCode status = U_ZERO_ERROR;
- UParseError parse_error[256];
- int32_t id_cap = 256;
- UChar id[256];
- id[0] = 0;
+ struct icu_buf_utf16 * rules16 = icu_buf_utf16_create(0);
+ struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
+ struct icu_buf_utf16 * dest16 = icu_buf_utf16_create(0);
+ struct icu_buf_utf8 * dest8 = icu_buf_utf8_create(0);
- trans = utrans_openU(id, id_len, UTRANS_FORWARD,
- 0, 0, parse_error, &status);
-
-
+ icu_utf16_from_utf8_cstr(rules16, rules8cstr, &status);
+ icu_check_status(status);
+
+ icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
+ icu_check_status(status);
+
+ UTransliterator * trans
+ = utrans_openU(rules16->utf16, rules16->utf16_len,
+ UTRANS_FORWARD,
+ 0, 0,
+ parse_error, &status);
+
+ //= utrans_openU(0, 0, UTRANS_FORWARD,
+ // rules16->utf16, rules16->utf16_len,
+ // parse_error, &status);
+
+ icu_check_status(status);
if(U_FAILURE(status)) {
- printf("Parse Error: line %d offset %d \n",
- parse_error->line, parse_error->offset);
+ printf("Parse Error: \n line %d offset %d \n '%s'\n",
+ parse_error->line, parse_error->offset,
+ rules8cstr);
}
+
+ utrans_transUChars (trans, src16->utf16, &(src16->utf16_len),
+ src16->utf16_cap,
+ 0, &(src16->utf16_len), &status);
+
+ icu_utf16_to_utf8(dest8, src16, &status);
+ icu_check_status(status);
+
+ printf("Transliterator:\n%s\n", dest8->utf8);
+
+
+ utrans_close (trans);
+ icu_buf_utf16_destroy(rules16);
+ icu_buf_utf16_destroy(src16);
+ icu_buf_utf16_destroy(dest16);
+ icu_buf_utf8_destroy(dest8);
+
+
+#if 0
+
icu_check_status(status);
}
+// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+
int test_icu_tokenizer(const char * locale, char action,
const char * src8cstr, int count)
{
-// CRAP to follow
-#if 0
-
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
-
-void test_icu_I18N_casemap_failures(int argc, char **argv)
-{
-
- size_t buf_cap = 128;
- char buf[buf_cap];
- size_t dest8_len = 0;
- NMEM nmem = nmem_create();
- char * dest8 = 0;
-
- const char * src8 = "A ReD fOx hunTS sQUirriLs";
- //size_t src8_len = strlen(src8);
-
- //printf("original string: '%s' (%d)\n", src8, (int) src8_len);
-
- // some calling error needs investigation
- dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len,
- src8, "en", 't');
- YAZ_CHECK(0 == dest8_len);
- //printf("icu_casemap 'en:t' '%s' (%d)\n", dest8, (int) dest8_len);
-
-
- // attention: does not fail even if no locale 'xy_zz' defined
- // it seems to default to english locale
- dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len,
- src8, "zz_abc", 'l');
- YAZ_CHECK(dest8_len);
- //printf("icu_casemap 'zz:l' '%s' (%d)\n", dest8, (int) dest8_len);
-
-
- // shall fail - no buf buffer defined
- dest8 = icu_casemap(nmem, 0, buf_cap, &dest8_len,
- src8, "en", 'l');
- YAZ_CHECK(0 == dest8_len);
- //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len);
-
- // shall fail - no buf_cap defined
- dest8 = icu_casemap(nmem, buf, 0, &dest8_len,
- src8, "en", 'l');
- YAZ_CHECK(0 == dest8_len);
- //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len);
-
- // shall fail - no action 'x' defined
- dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len,
- src8, "en", 'x');
- YAZ_CHECK(0 == dest8_len);
- //printf("icu_casemap 'en:x' '%s' (%d)\n", dest8, (int) dest8_len);
-
- nmem_destroy(nmem);
-}
-
-
-
-#endif
-
-
-
/*
* Local variables:
* c-basic-offset: 4