2 gcc -I/usr/include/libxml2 -lxml2 -o icu-xml-convert icu-xml-convert.c
10 /* commando line parameters */
11 static struct config_t {
22 void print_option_error(const struct config_t *p_config)
24 fprintf(stderr, "Calling error, valid options are :\n");
25 fprintf(stderr, "icu_chain_test\n"
26 " [-c (path/to/config/file.xml)]\n"
27 " [-p (c|l|t)] print available info \n"
28 " [-v] verbouse output\n"
33 void read_params(int argc, char **argv, struct config_t *p_config){
37 /* set default parameters */
38 p_config->conffile[0] = 0;
40 /* set up command line parameters */
42 while ((ret = options("c:p:v", argv, argc, &arg)) != -2)
47 strcpy(p_config->conffile, arg);
50 strcpy(p_config->print, arg);
54 p_config->verbatim = atoi(arg);
56 p_config->verbatim = 1;
59 print_option_error(p_config);
64 if (! strlen(p_config->conffile))
68 /* UConverter *conv; */
69 /* conv = ucnv_open("utf-8", &status); */
70 /* assert(U_SUCCESS(status)); */
73 /* = ucnv_toUChars(conv, ustr16, 1024, */
74 /* (const char *) *xstr8, strlen((const char *) *xstr8), */
79 /* ucnv_fromUChars(conv, */
80 /* (char *) *xstr8, strlen((const char *) *xstr8), */
81 /* ustr16, *ustr16_len, */
83 /* ucnv_close(conv); */
86 static void print_icu_converters(const struct config_t *p_config)
91 count = ucnv_countAvailable();
92 printf("Available ICU converters: %d\n", count);
96 printf("%s ", ucnv_getAvailableName(i));
99 printf("Default ICU Converter is: '%s'\n", ucnv_getDefaultName());
104 static void print_icu_transliterators(const struct config_t *p_config)
109 count = utrans_countAvailableIDs();
111 int32_t buf_cap = 128;
114 if (1 < p_config->verbatim){
115 printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
116 printf("<icu>\n<transliterators actions=\"%d\">\n", count);
118 printf("Available ICU transliterators: %d\n", count);
122 utrans_getAvailableID(i, buf, buf_cap);
123 if (1 < p_config->verbatim)
124 printf("<transliterator action=\"%s\"/>\n", buf);
129 if (1 < p_config->verbatim){
130 printf("</transliterators>\n</icu>\n");
134 printf("\n\nUnicode Set Patterns:\n"
135 " Pattern Description\n"
136 " Ranges [a-z] The lower case letters a through z\n"
137 " Named Chars [abc123] The six characters a,b,c,1,2 and 3\n"
138 " String [abc{def}] chars a, b and c, and string 'def'\n"
139 " Categories [\\p{Letter}] Perl General Category 'Letter'.\n"
140 " Categories [:Letter:] Posix General Category 'Letter'.\n"
142 " Combination Example\n"
143 " Union [[:Greek:] [:letter:]]\n"
144 " Intersection [[:Greek:] & [:letter:]]\n"
145 " Set Complement [[:Greek:] - [:letter:]]\n"
146 " Complement [^[:Greek:] [:letter:]]\n"
148 "see: http://icu.sourceforge.net/userguide/unicodeSet.html\n"
151 " [:Punctuation:] Any-Remove\n"
152 " [:Cased-Letter:] Any-Upper\n"
153 " [:Control:] Any-Remove\n"
154 " [:Decimal_Number:] Any-Remove\n"
155 " [:Final_Punctuation:] Any-Remove\n"
156 " [:Georgian:] Any-Upper\n"
157 " [:Katakana:] Any-Remove\n"
158 " [:Arabic:] Any-Remove\n"
159 " [:Punctuation:] Remove\n"
160 " [[:Punctuation:]-[.,]] Remove\n"
161 " [:Line_Separator:] Any-Remove\n"
162 " [:Math_Symbol:] Any-Remove\n"
163 " Lower; [:^Letter:] Remove (word tokenization)\n"
164 " [:^Number:] Remove (numeric tokenization)\n"
165 " [:^Katagana:] Remove (remove everything except Katagana)\n"
166 " Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization)\n"
167 " NFD; [:Nonspacing Mark:] Remove; NFC (removes accents from characters)\n"
168 " [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n"
169 " [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n"
171 "see http://icu.sourceforge.net/userguide/Transform.html\n"
172 " http://www.unicode.org/Public/UNIDATA/UCD.html\n"
173 " http://icu.sourceforge.net/userguide/Transform.html\n"
174 " http://icu.sourceforge.net/userguide/TransformRule.html\n"
185 static void print_icu_xml_locales(const struct config_t *p_config)
189 UErrorCode status = U_ZERO_ERROR;
192 int32_t keyword_len = 0;
193 char keyword_str[128];
194 int32_t keyword_str_len = 0;
197 int32_t language_len = 0;
199 int32_t lang_str_len = 0;
202 int32_t script_len = 0;
203 char script_str[128];
204 int32_t script_str_len = 0;
207 int32_t location_len = 0;
208 char location_str[128];
209 int32_t location_str_len = 0;
212 int32_t variant_len = 0;
213 char variant_str[128];
214 int32_t variant_str_len = 0;
217 int32_t name_len = 0;
219 int32_t name_str_len = 0;
222 int32_t localname_len = 0;
223 char localname_str[128];
224 int32_t localname_str_len = 0;
226 count = uloc_countAvailable() ;
228 if (1 < p_config->verbatim){
229 printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
230 printf("<icu>\n<locales count=\"%d\" default=\"%s\" collations=\"%d\">\n",
231 count, uloc_getDefault(), ucol_countAvailable());
238 = uloc_getDisplayKeyword(uloc_getAvailable(i), "en",
242 u_strToUTF8(keyword_str, 128, &keyword_str_len,
243 keyword, keyword_len,
248 = uloc_getDisplayLanguage(uloc_getAvailable(i), "en",
252 u_strToUTF8(lang_str, 128, &lang_str_len,
253 language, language_len,
258 = uloc_getDisplayScript(uloc_getAvailable(i), "en",
262 u_strToUTF8(script_str, 128, &script_str_len,
267 = uloc_getDisplayCountry(uloc_getAvailable(i), "en",
271 u_strToUTF8(location_str, 128, &location_str_len,
272 location, location_len,
276 = uloc_getDisplayVariant(uloc_getAvailable(i), "en",
280 u_strToUTF8(variant_str, 128, &variant_str_len,
281 variant, variant_len,
285 = uloc_getDisplayName(uloc_getAvailable(i), "en",
289 u_strToUTF8(name_str, 128, &name_str_len,
294 = uloc_getDisplayName(uloc_getAvailable(i), uloc_getAvailable(i),
298 u_strToUTF8(localname_str, 128, &localname_str_len,
299 localname, localname_len,
303 if (1 < p_config->verbatim){
305 printf(" xml:lang=\"%s\"", uloc_getAvailable(i));
306 /* printf(" locale=\"%s\"", uloc_getAvailable(i)); */
307 /* if (strlen(keyword_str)) */
308 /* printf(" keyword=\"%s\"", keyword_str); */
309 /* if (ucol_getAvailable(i)) */
310 /* printf(" collation=\"1\""); */
311 if (strlen(lang_str))
312 printf(" language=\"%s\"", lang_str);
313 if (strlen(script_str))
314 printf(" script=\"%s\"", script_str);
315 if (strlen(location_str))
316 printf(" location=\"%s\"", location_str);
317 if (strlen(variant_str))
318 printf(" variant=\"%s\"", variant_str);
319 if (strlen(name_str))
320 printf(" name=\"%s\"", name_str);
321 if (strlen(localname_str))
322 printf(" localname=\"%s\"", localname_str);
324 if (strlen(localname_str))
325 printf("%s", localname_str);
326 printf("</locale>\n");
328 else if (1 == p_config->verbatim){
329 printf("%s", uloc_getAvailable(i));
331 if (strlen(name_str))
332 printf("%s", name_str);
334 if (strlen(localname_str))
335 printf("%s", localname_str);
339 printf("%s ", uloc_getAvailable(i));
341 if (1 < p_config->verbatim)
342 printf("</locales>\n</icu>\n");
346 if(U_FAILURE(status)) {
347 fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status));
354 int main(int argc, char **argv) {
356 //LIBXML_TEST_VERSION;
358 read_params(argc, argv, &config);
361 print_options(&config);
363 if ('c' == config.print[0])
364 print_icu_converters(&config);
366 if ('l' == config.print[0])
367 print_icu_xml_locales(&config);
369 if ('t' == config.print[0])
370 print_icu_transliterators(&config);
372 //xmlCleanupParser();