X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Ftest_icu_I18N.c;h=6450c8a3dfb63e78beffbc594f0d56c12f986fc6;hb=76913a6235a6410a02e00486093b45f93a04d2f1;hp=4e976eb64aad7a954a33cbc425b61e7c51956f9e;hpb=b55d58436ec1d0b56ea2833ab8a711f8b9232ed7;p=pazpar2-moved-to-github.git
diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c
index 4e976eb..6450c8a 100644
--- a/src/test_icu_I18N.c
+++ b/src/test_icu_I18N.c
@@ -1,4 +1,4 @@
-/* $Id: test_icu_I18N.c,v 1.23 2007-05-21 10:14:08 marc Exp $
+/* $Id: test_icu_I18N.c,v 1.27 2007-05-25 13:27:21 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
@@ -513,8 +513,8 @@ void test_icu_I18N_chain(int argc, char **argv)
""
""
""
- ""
- ""
+ ""
+ ""
"";
@@ -549,15 +549,16 @@ void test_icu_I18N_chain(int argc, char **argv)
step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
(const uint8_t *) "l",
&status);
- step = icu_chain_insert_step(chain, ICU_chain_step_type_norm,
+ step = icu_chain_insert_step(chain, ICU_chain_step_type_index,
(const uint8_t *)"",
&status);
-/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sort, */
+/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sortkey, */
/* (const uint8_t *)"", */
/* &status); */
#endif
+ xmlFreeDoc(doc);
YAZ_CHECK(chain);
YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status));
@@ -571,10 +572,81 @@ void test_icu_I18N_chain(int argc, char **argv)
YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
+
+ YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
+
+ while (icu_chain_next_token(chain, &status)){
+ printf("%d '%s' '%s'\n",
+ icu_chain_get_token_count(chain),
+ icu_chain_get_norm(chain),
+ icu_chain_get_display(chain));
+ }
+
+
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
+
icu_chain_destroy(chain);
}
+void test_bug_1140(void)
+{
+ const char * en_str
+ = "O Romeo, Romeo! wherefore art thou\t Romeo?";
+
+ printf("ICU chain:\ninput: '%s'\n", en_str);
+
+ UErrorCode status = U_ZERO_ERROR;
+ //struct icu_chain_step * step = 0;
+ struct icu_chain * chain = 0;
+
+ const char * xml_str = ""
+
+ /* if the first rule is normalize instead. Then it works */
+#if 0
+ ""
+#endif
+ ""
+ ""
+ ""
+ ""
+ ""
+ ""
+ "";
+
+
+ xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
+ xmlNode *xml_node = xmlDocGetRootElement(doc);
+ YAZ_CHECK(xml_node);
+
+ chain = icu_chain_xml_config(xml_node, &status);
+
+ xmlFreeDoc(doc);
+ YAZ_CHECK(chain);
+
+ YAZ_CHECK(icu_chain_assign_cstr(
+ chain, "O Romeo, Romeo! wherefore art thou\t Romeo?",
+ &status));
+
+ while (icu_chain_next_token(chain, &status))
+ ;
+
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
+
+ YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
+
+ while (icu_chain_next_token(chain, &status)){
+ printf("%d '%s' '%s'\n",
+ icu_chain_get_token_count(chain),
+ icu_chain_get_norm(chain),
+ icu_chain_get_display(chain));
+ }
+
+ /* we expect 'what' 'is' 'this', i.e. 3 tokens */
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
+
+ icu_chain_destroy(chain);
+}
#endif // HAVE_ICU
@@ -594,6 +666,7 @@ int main(int argc, char **argv)
test_icu_I18N_normalizer(argc, argv);
test_icu_I18N_tokenizer(argc, argv);
test_icu_I18N_chain(argc, argv);
+ test_bug_1140();
#else // HAVE_ICU