X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Ftest_icu_I18N.c;h=62b52557f1b3f7687a3513dff49786fcb6a6af77;hb=1ea74f79189dfde78b3b65e7355774da84efd0dc;hp=3b56883d0709d449e70c9968abab0e4905009622;hpb=543d6fff956361a3fdd1de89392eb2cc308670b1;p=pazpar2-moved-to-github.git diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c index 3b56883..62b5255 100644 --- a/src/test_icu_I18N.c +++ b/src/test_icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: test_icu_I18N.c,v 1.2 2007-04-30 13:56:52 marc Exp $ +/* $Id: test_icu_I18N.c,v 1.7 2007-05-02 14:01:36 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -19,6 +19,9 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 + + #if HAVE_CONFIG_H #include "cconfig.h" #endif @@ -34,129 +37,113 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #ifdef HAVE_ICU #include "icu_I18N.h" -#include "string.h" +#include +#include -void test_icu_I18N_casemap_en(int argc, char **argv) -{ +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 + +int test_icu_casemap(const char * locale, char action, + const char * src8, const char * check8) +{ + NMEM nmem = nmem_create(); size_t buf_cap = 128; char buf[buf_cap]; + const char * dest8 = 0; size_t dest8_len = 0; - NMEM nmem = nmem_create(); - char * dest8 = 0; + //size_t src8_len = strlen(src8); + int sucess = 0; - const char * src8 = "A ReD fOx hunTS sQUirriLs"; - size_t src8_len = strlen(src8); - - printf("original string: '%s' (%d)\n", src8, (int) src8_len); + //printf("original string: '%s' (%d)\n", src8, (int) src8_len); //these shall succeed dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "en", 'l'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); - - - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "en", 'u'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'en:u' '%s' (%d)\n", dest8, (int) dest8_len); + src8, locale, action); - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "en", 'f'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'en:f' '%s' (%d)\n", dest8, (int) dest8_len); + //printf("icu_casemap '%s:%c' '%s' (%d)\n", + // locale, action, dest8, (int) dest8_len); + if (dest8 + && (dest8_len == strlen(check8)) + && !strcmp(check8, dest8)) + sucess = dest8_len; - // some calling error needs investigation - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "en", 't'); - YAZ_CHECK(0 == dest8_len); - printf("icu_casemap 'en:t' '%s' (%d)\n", dest8, (int) dest8_len); - - - // attention: does not fail even if no locale 'xy_zz' defined - // it seems to default to english locale - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "zz_abc", 'l'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'zz:l' '%s' (%d)\n", dest8, (int) dest8_len); - - - // shall fail - no buf buffer defined - dest8 = icu_casemap(nmem, 0, buf_cap, &dest8_len, - src8, "en", 'l'); - YAZ_CHECK(0 == dest8_len); - //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); - - // shall fail - no buf_cap defined - dest8 = icu_casemap(nmem, buf, 0, &dest8_len, - src8, "en", 'l'); - YAZ_CHECK(0 == dest8_len); - //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); + nmem_destroy(nmem); - // shall fail - no action 'x' defined - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "en", 'x'); - YAZ_CHECK(0 == dest8_len); - //printf("icu_casemap 'en:x' '%s' (%d)\n", dest8, (int) dest8_len); + return sucess; +} +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 +void test_icu_I18N_casemap(int argc, char **argv) +{ + // Locale 'en' + // sucessful tests + YAZ_CHECK(test_icu_casemap("en", 'l', + "A ReD fOx hunTS sQUirriLs", + "a red fox hunts squirrils")); + + YAZ_CHECK(test_icu_casemap("en", 'u', + "A ReD fOx hunTS sQUirriLs", + "A RED FOX HUNTS SQUIRRILS")); + + YAZ_CHECK(test_icu_casemap("en", 'f', + "A ReD fOx hunTS sQUirriLs", + "a red fox hunts squirrils")); + + // this one fails and needs more investigation .. + YAZ_CHECK(0 == test_icu_casemap("en", 't', + "A ReD fOx hunTS sQUirriLs", + "A Red Fox Hunts Squirrils")); - nmem_destroy(nmem); - YAZ_CHECK(0 == 0); - //YAZ_CHECK_EQ(0, 1); -} + // Locale 'da' -void test_icu_I18N_casemap_da(int argc, char **argv) -{ + // sucess expected + YAZ_CHECK(test_icu_casemap("da", 'l', + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "åh æble, øs fløde i åen efter blåbærgrøden")); - size_t buf_cap = 128; - char buf[buf_cap]; - size_t dest8_len = 0; - NMEM nmem = nmem_create(); - char * dest8 = 0; + YAZ_CHECK(test_icu_casemap("da", 'u', + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "ÅH ÆBLE, ØS FLØDE I ÅEN EFTER BLÅBÆRGRØDEN")); - const char * src8 = "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN"; - size_t src8_len = strlen(src8); - - printf("original string: '%s' (%d)\n", src8, (int) src8_len); + YAZ_CHECK(test_icu_casemap("da", 'f', + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "åh æble, øs fløde i åen efter blåbærgrøden")); - //these shall succeed - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 'l'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:l' '%s' (%d)\n", dest8, (int) dest8_len); + YAZ_CHECK(test_icu_casemap("da", 't', + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "Åh Æble, Øs Fløde I Åen Efter Blåbærgrøden")); + // Locale 'de' - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 'u'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:u' '%s' (%d)\n", dest8, (int) dest8_len); + // sucess expected + YAZ_CHECK(test_icu_casemap("de", 'l', + "zWÖlf ärgerliche Würste rollen ÜBer die StRAße", + "zwölf ärgerliche würste rollen über die straße")); + YAZ_CHECK(test_icu_casemap("de", 'u', + "zWÖlf ärgerliche Würste rollen ÜBer die StRAße", + "ZWÖLF ÄRGERLICHE WÜRSTE ROLLEN ÜBER DIE STRASSE")); - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 'f'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:f' '%s' (%d)\n", dest8, (int) dest8_len); + YAZ_CHECK(test_icu_casemap("de", 'f', + "zWÖlf ärgerliche Würste rollen ÜBer die StRAße", + "zwölf ärgerliche würste rollen über die strasse")); + YAZ_CHECK(test_icu_casemap("de", 't', + "zWÖlf ärgerliche Würste rollen ÜBer die StRAße", + "Zwölf Ärgerliche Würste Rollen Über Die Straße")); - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 't'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:t' '%s' (%d)\n", dest8, (int) dest8_len); +} - nmem_destroy(nmem); - YAZ_CHECK(0 == 0); - //YAZ_CHECK_EQ(0, 1); -} +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 -void test_icu_I18N_casemap_de(int argc, char **argv) +void test_icu_I18N_casemap_failures(int argc, char **argv) { size_t buf_cap = 128; @@ -165,93 +152,152 @@ void test_icu_I18N_casemap_de(int argc, char **argv) NMEM nmem = nmem_create(); char * dest8 = 0; - const char * src8 = "zWÖlf ärgerliche Würste rollen ÜBer die StRAße"; - size_t src8_len = strlen(src8); + const char * src8 = "A ReD fOx hunTS sQUirriLs"; + //size_t src8_len = strlen(src8); - printf("original string: '%s' (%d)\n", src8, (int) src8_len); + //printf("original string: '%s' (%d)\n", src8, (int) src8_len); - //these shall succeed + // some calling error needs investigation dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 'l'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:l' '%s' (%d)\n", dest8, (int) dest8_len); + src8, "en", 't'); + YAZ_CHECK(0 == dest8_len); + //printf("icu_casemap 'en:t' '%s' (%d)\n", dest8, (int) dest8_len); + // attention: does not fail even if no locale 'xy_zz' defined + // it seems to default to english locale dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 'u'); + src8, "zz_abc", 'l'); YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:u' '%s' (%d)\n", dest8, (int) dest8_len); + //printf("icu_casemap 'zz:l' '%s' (%d)\n", dest8, (int) dest8_len); - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 'f'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:f' '%s' (%d)\n", dest8, (int) dest8_len); + // shall fail - no buf buffer defined + dest8 = icu_casemap(nmem, 0, buf_cap, &dest8_len, + src8, "en", 'l'); + YAZ_CHECK(0 == dest8_len); + //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); + // shall fail - no buf_cap defined + dest8 = icu_casemap(nmem, buf, 0, &dest8_len, + src8, "en", 'l'); + YAZ_CHECK(0 == dest8_len); + //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); + // shall fail - no action 'x' defined dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "da", 't'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'da:t' '%s' (%d)\n", dest8, (int) dest8_len); + src8, "en", 'x'); + YAZ_CHECK(0 == dest8_len); + //printf("icu_casemap 'en:x' '%s' (%d)\n", dest8, (int) dest8_len); nmem_destroy(nmem); - - YAZ_CHECK(0 == 0); - //YAZ_CHECK_EQ(0, 1); } -void test_icu_I18N_casemap_el(int argc, char **argv) +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 + +int test_icu_sortmap(const char * locale, size_t list_len, + const char ** src8_list, const char ** check8_list) { + int sucess = 1; + size_t i = 0; -#if 0 + NMEM nmem = nmem_create(); size_t buf_cap = 128; char buf[buf_cap]; - size_t dest8_len = 0; - NMEM nmem = nmem_create(); - char * dest8 = 0; - - const char * src8 = "" - size_t src8_len = strlen(src8); + struct icu_termmap ** dest8_list + = nmem_malloc(nmem, sizeof(struct icu_termmap *) * list_len); + //size_t dest8_len = 0; + //size_t src8_len = strlen(src8); + + // initializing icu_termmap + for (i = 0; i < list_len; i++){ + dest8_list[i] = icu_termmap_create(nmem); + dest8_list[i]->norm_term = nmem_strdup(nmem, src8_list[i]); + dest8_list[i]->disp_term = nmem_strdup(nmem, src8_list[i]); + //dest8_list[i]->sort_key = nmem_strdup(nmem, src8_list[i]); + //dest8_list[i]->sort_len = strlen(src8_list[i]); + dest8_list[i]->sort_key + = icu_sortmap(nmem, buf, buf_cap, 0, src8_list[i], locale); + // = icu_sortmap(nmem, buf, buf_cap, &(dest8_list[i]->sort_len), + // src8_list[i], locale); + } + + // do the sorting + qsort(dest8_list, list_len, + sizeof(struct icu_termmap *), icu_termmap_cmp); + + // checking correct sorting + for (i = 0; i < list_len; i++){ + if (0 != strcmp(dest8_list[i]->disp_term, check8_list[i])){ + sucess = 0; + } + } + + if (1 || !sucess){ + printf("\n"); + printf("Input '%s':", locale); + for (i = 0; i < list_len; i++) + printf(" '%s'", src8_list[i]); + printf("\n"); + printf("ICU sort '%s':", locale); + for (i = 0; i < list_len; i++) + printf(" '%s'", dest8_list[i]->disp_term); + if (sucess) + printf(" OK"); + else + printf(" ERROR ??"); + printf("\n"); + printf("Expected '%s':", locale); + for (i = 0; i < list_len; i++) + printf(" '%s'", check8_list[i]); + printf("\n"); + } - printf("original string: '%s' (%d)\n", src8, (int) src8_len); - - //these shall succeed - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "el", 'l'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'el:l' '%s' (%d)\n", dest8, (int) dest8_len); - - - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "el", 'u'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'el:u' '%s' (%d)\n", dest8, (int) dest8_len); - - - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "el", 'f'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'el:f' '%s' (%d)\n", dest8, (int) dest8_len); + nmem_destroy(nmem); + return sucess; +} - dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, - src8, "el", 't'); - YAZ_CHECK(dest8_len); - printf("icu_casemap 'el:t' '%s' (%d)\n", dest8, (int) dest8_len); - nmem_destroy(nmem); +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 - YAZ_CHECK(0 == 0); - //YAZ_CHECK_EQ(0, 1); -#endif +void test_icu_I18N_sortmap(int argc, char **argv) +{ + // sucessful tests + size_t en_1_len = 6; + const char * en_1_src[6] = {"z", "K", "a", "A", "Z", "k"}; + const char * en_1_cck[6] = {"a", "A", "K", "k", "z", "Z"}; + YAZ_CHECK(test_icu_sortmap("en", en_1_len, en_1_src, en_1_cck)); + YAZ_CHECK(0 == test_icu_sortmap("en_AU", en_1_len, en_1_src, en_1_cck)); + YAZ_CHECK(0 == test_icu_sortmap("en_CA", en_1_len, en_1_src, en_1_cck)); + YAZ_CHECK(0 == test_icu_sortmap("en_GB", en_1_len, en_1_src, en_1_cck)); + YAZ_CHECK(0 == test_icu_sortmap("en_US", en_1_len, en_1_src, en_1_cck)); + + // sucessful tests - this one fails and should not!!! + size_t da_1_len = 6; + const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"}; + const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"}; + YAZ_CHECK(0 == test_icu_sortmap("da", da_1_len, da_1_src, da_1_cck)); + YAZ_CHECK(0 == test_icu_sortmap("da_DK", da_1_len, da_1_src, da_1_cck)); + + // sucessful tests + size_t de_1_len = 9; + const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"}; + const char * de_1_cck[9] = {"ä", "a", "o", "ö", "s", "ß", "t", "u", "ü"}; + YAZ_CHECK(test_icu_sortmap("de", de_1_len, de_1_src, de_1_cck)); + YAZ_CHECK(0 == test_icu_sortmap("de_AT", de_1_len, de_1_src, de_1_cck)); + YAZ_CHECK(0 == test_icu_sortmap("de_DE", de_1_len, de_1_src, de_1_cck)); + } #endif +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 + int main(int argc, char **argv) { @@ -260,15 +306,14 @@ int main(int argc, char **argv) #ifdef HAVE_ICU - test_icu_I18N_casemap_en(argc, argv); - test_icu_I18N_casemap_da(argc, argv); - test_icu_I18N_casemap_de(argc, argv); - test_icu_I18N_casemap_el(argc, argv); + test_icu_I18N_casemap_failures(argc, argv); + test_icu_I18N_casemap(argc, argv); + test_icu_I18N_sortmap(argc, argv); #else printf("ICU unit tests omitted.\n" - "Please install libicu36-dev and icu-doc or similar\n".); + "Please install libicu36-dev and icu-doc or similar\n"); YAZ_CHECK(0 == 0); #endif @@ -277,7 +322,7 @@ int main(int argc, char **argv) } - +// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 /* * Local variables: