X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Ftest_icu_I18N.c;h=6450c8a3dfb63e78beffbc594f0d56c12f986fc6;hb=401d3364f84085a8b665a78aad7864fe5eabe095;hp=6ae54ac9c5f62a4adc4c795fd9ee26b6226b65a9;hpb=b1b86ad23c4567066de98a476b2a2866f7d83d89;p=pazpar2-moved-to-github.git diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c index 6ae54ac..6450c8a 100644 --- a/src/test_icu_I18N.c +++ b/src/test_icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: test_icu_I18N.c,v 1.24 2007-05-22 07:51:45 adam Exp $ +/* $Id: test_icu_I18N.c,v 1.27 2007-05-25 13:27:21 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -513,8 +513,8 @@ void test_icu_I18N_chain(int argc, char **argv) "" "" "" - "" - "" + "" + "" ""; @@ -549,15 +549,16 @@ void test_icu_I18N_chain(int argc, char **argv) step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap, (const uint8_t *) "l", &status); - step = icu_chain_insert_step(chain, ICU_chain_step_type_norm, + step = icu_chain_insert_step(chain, ICU_chain_step_type_index, (const uint8_t *)"", &status); -/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sort, */ +/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sortkey, */ /* (const uint8_t *)"", */ /* &status); */ #endif + xmlFreeDoc(doc); YAZ_CHECK(chain); YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status)); @@ -571,11 +572,81 @@ void test_icu_I18N_chain(int argc, char **argv) YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7); + + YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status)); + + while (icu_chain_next_token(chain, &status)){ + printf("%d '%s' '%s'\n", + icu_chain_get_token_count(chain), + icu_chain_get_norm(chain), + icu_chain_get_display(chain)); + } + + + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3); + icu_chain_destroy(chain); - xmlFreeDoc(doc); } +void test_bug_1140(void) +{ + const char * en_str + = "O Romeo, Romeo! wherefore art thou\t Romeo?"; + + printf("ICU chain:\ninput: '%s'\n", en_str); + + UErrorCode status = U_ZERO_ERROR; + //struct icu_chain_step * step = 0; + struct icu_chain * chain = 0; + + const char * xml_str = "" + + /* if the first rule is normalize instead. Then it works */ +#if 0 + "" +#endif + "" + "" + "" + "" + "" + "" + ""; + + + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); + xmlNode *xml_node = xmlDocGetRootElement(doc); + YAZ_CHECK(xml_node); + + chain = icu_chain_xml_config(xml_node, &status); + + xmlFreeDoc(doc); + YAZ_CHECK(chain); + + YAZ_CHECK(icu_chain_assign_cstr( + chain, "O Romeo, Romeo! wherefore art thou\t Romeo?", + &status)); + + while (icu_chain_next_token(chain, &status)) + ; + + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7); + + YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status)); + + while (icu_chain_next_token(chain, &status)){ + printf("%d '%s' '%s'\n", + icu_chain_get_token_count(chain), + icu_chain_get_norm(chain), + icu_chain_get_display(chain)); + } + + /* we expect 'what' 'is' 'this', i.e. 3 tokens */ + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3); + + icu_chain_destroy(chain); +} #endif // HAVE_ICU @@ -595,6 +666,7 @@ int main(int argc, char **argv) test_icu_I18N_normalizer(argc, argv); test_icu_I18N_tokenizer(argc, argv); test_icu_I18N_chain(argc, argv); + test_bug_1140(); #else // HAVE_ICU