X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Ftest_icu_I18N.c;h=71c4da1535e388834ed4f8f6a4ba4962a6888975;hb=27cfb6d89ca9b02f63f8334b6b8e666cf7db2ff7;hp=c9d3e395b7dfa9c9d1a1c8409e7d7f104cdfdb2c;hpb=4aa097d555372d370f2485df38ddf93ecd327c59;p=pazpar2-moved-to-github.git diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c index c9d3e39..71c4da1 100644 --- a/src/test_icu_I18N.c +++ b/src/test_icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: test_icu_I18N.c,v 1.10 2007-05-07 12:52:04 marc Exp $ +/* $Id: test_icu_I18N.c,v 1.12 2007-05-10 10:29:58 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -209,7 +209,7 @@ int test_icu_sortmap(const char * locale, int src_list_len, UCollator *coll = ucol_open(locale, &status); icu_check_status(status); - if(!U_SUCCESS(status)) + if(U_FAILURE(status)) return 0; // assigning display terms and sort keys using buf 8 and buf16 @@ -312,6 +312,108 @@ void test_icu_I18N_sortmap(int argc, char **argv) } +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 + +void test_icu_I18N_normmap(int argc, char **argv) +{ + + +} + +int test_icu_tokenizer(const char * locale, char action, + const char * src8cstr, int count) +{ + int success = 1; + + UErrorCode status = U_ZERO_ERROR; + struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0); + struct icu_buf_utf16 * tkn16 = icu_buf_utf16_create(0); + struct icu_buf_utf8 * tkn8 = icu_buf_utf8_create(0); + + //printf("Input: '%s'\n", src8cstr); + + // transforming to UTF16 + icu_utf16_from_utf8_cstr(src16, src8cstr, &status); + icu_check_status(status); + + // set up tokenizer + struct icu_tokenizer * tokenizer + = icu_tokenizer_create(locale, action, &status); + icu_check_status(status); + YAZ_CHECK(tokenizer); + + // attach text buffer to tokenizer + icu_tokenizer_attach(tokenizer, src16, &status); + icu_check_status(status); + YAZ_CHECK(tokenizer->bi); + + // perform work on tokens + //printf("Tokens: "); + while(icu_tokenizer_next_token(tokenizer, tkn16, &status)){ + icu_check_status(status); + + // converting to UTF8 + icu_utf16_to_utf8(tkn8, tkn16, &status); + + //printf("(%d)'%s' ", icu_tokenizer_token_id(tokenizer), tkn8->utf8); + + //printf("token %d %d %d %d '%s'\n", + // + // icu_tokenizer_token_start(tokenizer), + // icu_tokenizer_token_end(tokenizer), + // icu_tokenizer_token_length(tokenizer), + // tkn8->utf8); + } + //printf("\nTokens: %d\n", icu_tokenizer_token_count(tokenizer)); + + + if (count != icu_tokenizer_token_count(tokenizer)){ + success = 0; + printf("\nTokenizer '%s:%c' Error: \n", locale, action); + printf("Input: '%s'\n", src8cstr); + printf("Tokens: %d", icu_tokenizer_token_count(tokenizer)); + printf(", expected: %d\n", count); + } + + icu_tokenizer_destroy(tokenizer); + icu_buf_utf16_destroy(src16); + icu_buf_utf16_destroy(tkn16); + icu_buf_utf8_destroy(tkn8); + + return success; +} + + +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 + +void test_icu_I18N_tokenizer(int argc, char **argv) +{ + + + const char * en_str + = "O Romeo, Romeo! wherefore art thou Romeo?"; + + YAZ_CHECK(test_icu_tokenizer("en", 's', en_str, 2)); + YAZ_CHECK(test_icu_tokenizer("en", 'l', en_str, 7)); + YAZ_CHECK(test_icu_tokenizer("en", 'w', en_str, 16)); + YAZ_CHECK(test_icu_tokenizer("en", 'c', en_str, 41)); + + + + const char * fr_str + = "O Romeo, Romeo! wherefore art thou Romeo?"; + + YAZ_CHECK(test_icu_tokenizer("fr", 's', fr_str, 2)); + YAZ_CHECK(test_icu_tokenizer("fr", 'l', fr_str, 7)); + YAZ_CHECK(test_icu_tokenizer("fr", 'w', fr_str, 16)); + YAZ_CHECK(test_icu_tokenizer("fr", 'c', fr_str, 41)); + +} + + + + + #endif // HAVE_ICU // DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 @@ -327,7 +429,9 @@ int main(int argc, char **argv) //test_icu_I18N_casemap_failures(argc, argv); test_icu_I18N_casemap(argc, argv); test_icu_I18N_sortmap(argc, argv); - + test_icu_I18N_normmap(argc, argv); + test_icu_I18N_tokenizer(argc, argv); + #else // HAVE_ICU printf("ICU unit tests omitted.\n"