X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Ftest_icu_I18N.c;h=6450c8a3dfb63e78beffbc594f0d56c12f986fc6;hb=0ff1a97b2a69905755b9adb24a474d30f1c52150;hp=886176583659a47c5fb93c80f07dfbd5f2ca596d;hpb=968c591d366c50df78a3dda223fdb3491cbcb6cd;p=pazpar2-moved-to-github.git
diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c
index 8861765..6450c8a 100644
--- a/src/test_icu_I18N.c
+++ b/src/test_icu_I18N.c
@@ -1,4 +1,4 @@
-/* $Id: test_icu_I18N.c,v 1.22 2007-05-20 19:00:17 marc Exp $
+/* $Id: test_icu_I18N.c,v 1.27 2007-05-25 13:27:21 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
@@ -498,14 +498,35 @@ void test_icu_I18N_tokenizer(int argc, char **argv)
void test_icu_I18N_chain(int argc, char **argv)
{
const char * en_str
- = "O Romeo, Romeo! wherefore art\nthou\tRomeo?";
+ = "O Romeo, Romeo! wherefore art thou\t Romeo?";
printf("ICU chain:\ninput: '%s'\n", en_str);
UErrorCode status = U_ZERO_ERROR;
- struct icu_chain_step * step = 0;
- struct icu_chain * chain
- = icu_chain_create((uint8_t *) "en:word", (uint8_t *) "en");
+ //struct icu_chain_step * step = 0;
+ struct icu_chain * chain = 0;
+
+
+ const char * xml_str = ""
+ ""
+ ""
+ ""
+ ""
+ ""
+ ""
+ ""
+ "";
+
+
+ xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
+ xmlNode *xml_node = xmlDocGetRootElement(doc);
+ YAZ_CHECK(xml_node);
+
+
+ chain = icu_chain_xml_config(xml_node, &status);
+
+#if 0
+ chain = icu_chain_create((uint8_t *) "en:word", (uint8_t *) "en");
step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
(const uint8_t *) "[:Control:] Any-Remove",
&status);
@@ -528,15 +549,17 @@ void test_icu_I18N_chain(int argc, char **argv)
step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
(const uint8_t *) "l",
&status);
- step = icu_chain_insert_step(chain, ICU_chain_step_type_norm,
+ step = icu_chain_insert_step(chain, ICU_chain_step_type_index,
(const uint8_t *)"",
&status);
-/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sort, */
+/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sortkey, */
/* (const uint8_t *)"", */
/* &status); */
+#endif
-
+ xmlFreeDoc(doc);
+ YAZ_CHECK(chain);
YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status));
@@ -547,10 +570,83 @@ void test_icu_I18N_chain(int argc, char **argv)
icu_chain_get_display(chain));
}
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
+
+
+ YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
+
+ while (icu_chain_next_token(chain, &status)){
+ printf("%d '%s' '%s'\n",
+ icu_chain_get_token_count(chain),
+ icu_chain_get_norm(chain),
+ icu_chain_get_display(chain));
+ }
+
+
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
+
icu_chain_destroy(chain);
}
+void test_bug_1140(void)
+{
+ const char * en_str
+ = "O Romeo, Romeo! wherefore art thou\t Romeo?";
+
+ printf("ICU chain:\ninput: '%s'\n", en_str);
+
+ UErrorCode status = U_ZERO_ERROR;
+ //struct icu_chain_step * step = 0;
+ struct icu_chain * chain = 0;
+
+ const char * xml_str = ""
+
+ /* if the first rule is normalize instead. Then it works */
+#if 0
+ ""
+#endif
+ ""
+ ""
+ ""
+ ""
+ ""
+ ""
+ "";
+
+
+ xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
+ xmlNode *xml_node = xmlDocGetRootElement(doc);
+ YAZ_CHECK(xml_node);
+
+ chain = icu_chain_xml_config(xml_node, &status);
+
+ xmlFreeDoc(doc);
+ YAZ_CHECK(chain);
+
+ YAZ_CHECK(icu_chain_assign_cstr(
+ chain, "O Romeo, Romeo! wherefore art thou\t Romeo?",
+ &status));
+
+ while (icu_chain_next_token(chain, &status))
+ ;
+
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
+
+ YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
+
+ while (icu_chain_next_token(chain, &status)){
+ printf("%d '%s' '%s'\n",
+ icu_chain_get_token_count(chain),
+ icu_chain_get_norm(chain),
+ icu_chain_get_display(chain));
+ }
+
+ /* we expect 'what' 'is' 'this', i.e. 3 tokens */
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
+
+ icu_chain_destroy(chain);
+}
#endif // HAVE_ICU
@@ -570,6 +666,7 @@ int main(int argc, char **argv)
test_icu_I18N_normalizer(argc, argv);
test_icu_I18N_tokenizer(argc, argv);
test_icu_I18N_chain(argc, argv);
+ test_bug_1140();
#else // HAVE_ICU