-/* $Id: tst_icu_I18N.c,v 1.2 2007-10-22 17:32:07 adam Exp $
+/* $Id: tst_icu_I18N.c,v 1.4 2007-10-24 07:41:48 marc Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
xmlNode *xml_node = xmlDocGetRootElement(doc);
YAZ_CHECK(xml_node);
- printf("ICU chain:\ninput: '%s'\n", en_str);
+ // printf("ICU chain:\ninput: '%s'\n", en_str);
chain = icu_chain_xml_config(xml_node, &status);
YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status));
while (icu_chain_next_token(chain, &status)){
- printf("%d '%s' '%s'\n",
- icu_chain_get_token_count(chain),
- icu_chain_get_norm(chain),
- icu_chain_get_display(chain));
+ ;
+ // printf("%d '%s' '%s'\n",
+ // icu_chain_get_token_count(chain),
+ // icu_chain_get_norm(chain),
+ // icu_chain_get_display(chain));
}
YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
while (icu_chain_next_token(chain, &status)){
- printf("%d '%s' '%s'\n",
- icu_chain_get_token_count(chain),
- icu_chain_get_norm(chain),
- icu_chain_get_display(chain));
+ ;
+ //printf("%d '%s' '%s'\n",
+ // icu_chain_get_token_count(chain),
+ // icu_chain_get_norm(chain),
+ // icu_chain_get_display(chain));
}
chain, "O Romeo, Romeo! wherefore art thou\t Romeo?",
&status));
- while (icu_chain_next_token(chain, &status))
+ while (icu_chain_next_token(chain, &status)){
;
+ printf("%d '%s' '%s'\n",
+ icu_chain_get_token_count(chain),
+ icu_chain_get_norm(chain),
+ icu_chain_get_display(chain));
+
+ }
+
YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
while (icu_chain_next_token(chain, &status)){
- printf("%d '%s' '%s'\n",
+ ;
+ printf("%d '%s' '%s'\n",
icu_chain_get_token_count(chain),
icu_chain_get_norm(chain),
icu_chain_get_display(chain));
icu_chain_destroy(chain);
}
+
+
+void test_chain_empty_token(void)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ struct icu_chain * chain = 0;
+
+ const char * xml_str = "<icu_chain id=\"en:word\" locale=\"el\">"
+ "<normalize rule=\"Remove\"/>"
+ "<normalize rule=\"Lower\"/>"
+ "<index/>"
+ "</icu_chain>";
+
+ xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
+ xmlNode *xml_node = xmlDocGetRootElement(doc);
+ YAZ_CHECK(xml_node);
+
+ chain = icu_chain_xml_config(xml_node, &status);
+
+ xmlFreeDoc(doc);
+ YAZ_CHECK(chain);
+
+ YAZ_CHECK(icu_chain_assign_cstr(
+ chain, " ",
+ &status));
+
+ while (icu_chain_next_token(chain, &status)){
+ ;
+ printf("%d '%s' '%s'\n",
+ icu_chain_get_token_count(chain),
+ icu_chain_get_norm(chain),
+ icu_chain_get_display(chain));
+ }
+
+ // this should result in one toke, namely the empty token '',
+ // but it has none.
+ YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 0);
+
+ icu_chain_destroy(chain);
+}
+
#endif // HAVE_ICU
/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
test_icu_I18N_casemap(argc, argv);
test_icu_I18N_sortmap(argc, argv);
- test_icu_I18N_normalizer(argc, argv);
+ test_icu_I18N_normalizer(argc, argv);
test_icu_I18N_tokenizer(argc, argv);
test_icu_I18N_chain(argc, argv);
+ //test_chain_empty_token();
test_bug_1140();
#else /* HAVE_ICU */