X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Ficu_chain_test.c;h=de137fd78f917869f2b8ca8f5996b409378194b7;hb=0ff1a97b2a69905755b9adb24a474d30f1c52150;hp=aed6af21621e1824316ca3373de60eb6f8155189;hpb=d3346ec4ab71b3555320e691d983e9baeee36e02;p=pazpar2-moved-to-github.git diff --git a/src/icu_chain_test.c b/src/icu_chain_test.c index aed6af2..de137fd 100644 --- a/src/icu_chain_test.c +++ b/src/icu_chain_test.c @@ -1,12 +1,28 @@ -/** - gcc -I/usr/include/libxml2 -lxml2 -o icu-xml-convert icu-xml-convert.c +/* $Id: icu_chain_test.c,v 1.7 2007-08-30 08:45:08 marc Exp $ + Copyright (c) 2006-2007, Index Data. + +This file is part of Pazpar2. + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Pazpar2; see the file LICENSE. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ #if HAVE_CONFIG_H #include "cconfig.h" #endif -#define _GNU_SOURCE #include #include @@ -37,63 +53,71 @@ static struct config_t { void print_option_error(const struct config_t *p_config) { - fprintf(stderr, "Calling error, valid options are :\n"); - fprintf(stderr, "icu_chain_test\n" - " [-c (path/to/config/file.xml)]\n" - " [-p (a|c|l|t)] print ICU info \n" - " [-x] XML output\n" - "\n" - "Examples:\n" - "cat hugetextfile.txt | ./icu_chain_test -c config.xml \n" - "./icu_chain_test -p c\n" - "./icu_chain_test -p l -x\n" - "./icu_chain_test -p t -x\n" + fprintf(stderr, "Calling error, valid options are :\n"); + fprintf(stderr, "icu_chain_test\n" + " [-c (path/to/config/file.xml)]\n" + " [-p (a|c|l|t)] print ICU info \n" + " [-x] XML output\n" + "\n" + "Examples:\n" + "cat hugetextfile.txt | ./icu_chain_test -c config.xml \n" + "./icu_chain_test -p c\n" + "./icu_chain_test -p l -x\n" + "./icu_chain_test -p t -x\n" + "\n" + "Example ICU chain XML configuration file:\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n" ); - exit(1); + exit(1); } -void read_params(int argc, char **argv, struct config_t *p_config){ - char *arg; - int ret; - - /* set default parameters */ - p_config->conffile[0] = 0; - p_config->print[0] = 0; - p_config->xmloutput = 0; - p_config->chain = 0; - p_config->infile = stdin; - p_config->outfile = stdout; - - /* set up command line parameters */ - - while ((ret = options("c:p:x", argv, argc, &arg)) != -2) +void read_params(int argc, char **argv, struct config_t *p_config) +{ + char *arg; + int ret; + + /* set default parameters */ + p_config->conffile[0] = 0; + p_config->print[0] = 0; + p_config->xmloutput = 0; + p_config->chain = 0; + p_config->infile = stdin; + p_config->outfile = stdout; + + /* set up command line parameters */ + + while ((ret = options("c:p:x", argv, argc, &arg)) != -2) { - switch (ret) + switch (ret) { case 'c': - strcpy(p_config->conffile, arg); - break; + strcpy(p_config->conffile, arg); + break; case 'p': - strcpy(p_config->print, arg); - break; + strcpy(p_config->print, arg); + break; case 'x': p_config->xmloutput = 1; - break; + break; default: - print_option_error(p_config); + print_option_error(p_config); } } - //p_config->infile = fopen("/etc/passwd", "r"); - - - - if ((!strlen(p_config->conffile) - && !strlen(p_config->print)) - || !config.infile - || !config.outfile) - - print_option_error(p_config); + if ((!strlen(p_config->conffile) + && !strlen(p_config->print)) + || !config.infile + || !config.outfile) + + print_option_error(p_config); }; @@ -123,15 +147,17 @@ static void print_icu_converters(const struct config_t *p_config) count = ucnv_countAvailable(); if (p_config->xmloutput) fprintf(config.outfile, "\n", - count, ucnv_getDefaultName()); + count, ucnv_getDefaultName()); else { fprintf(config.outfile, "Available ICU converters: %d\n", count); - fprintf(config.outfile, "Default ICU Converter is: '%s'\n", ucnv_getDefaultName()); + fprintf(config.outfile, "Default ICU Converter is: '%s'\n", + ucnv_getDefaultName()); } for(i=0;ixmloutput) - fprintf(config.outfile, "\n", ucnv_getAvailableName(i)); + fprintf(config.outfile, "\n", + ucnv_getAvailableName(i)); else fprintf(config.outfile, "%s ", ucnv_getAvailableName(i)); } @@ -144,254 +170,254 @@ static void print_icu_converters(const struct config_t *p_config) static void print_icu_transliterators(const struct config_t *p_config) { - int32_t count; - int32_t i; - - count = utrans_countAvailableIDs(); - - int32_t buf_cap = 128; - char buf[buf_cap]; - - if (p_config->xmloutput) - fprintf(config.outfile, "\n", count); - else - fprintf(config.outfile, "Available ICU transliterators: %d\n", count); - - for(i=0;ixmloutput) + fprintf(config.outfile, "\n", count); + else + fprintf(config.outfile, "Available ICU transliterators: %d\n", count); + + for(i = 0; i xmloutput) - fprintf(config.outfile, "\n", buf); - else - fprintf(config.outfile, " %s", buf); + utrans_getAvailableID(i, buf, buf_cap); + if (p_config->xmloutput) + fprintf(config.outfile, "\n", buf); + else + fprintf(config.outfile, " %s", buf); } - - if (p_config->xmloutput){ - fprintf(config.outfile, "\n"); - } - else + + if (p_config->xmloutput){ + fprintf(config.outfile, "\n"); + } + else { - fprintf(config.outfile, "\n\nUnicode Set Patterns:\n" - " Pattern Description\n" - " Ranges [a-z] The lower case letters a through z\n" - " Named Chars [abc123] The six characters a,b,c,1,2 and 3\n" - " String [abc{def}] chars a, b and c, and string 'def'\n" - " Categories [\\p{Letter}] Perl General Category 'Letter'.\n" - " Categories [:Letter:] Posix General Category 'Letter'.\n" - "\n" - " Combination Example\n" - " Union [[:Greek:] [:letter:]]\n" - " Intersection [[:Greek:] & [:letter:]]\n" - " Set Complement [[:Greek:] - [:letter:]]\n" - " Complement [^[:Greek:] [:letter:]]\n" - "\n" + fprintf(config.outfile, "\n\nUnicode Set Patterns:\n" + " Pattern Description\n" + " Ranges [a-z] The lower case letters a through z\n" + " Named Chars [abc123] The six characters a,b,c,1,2 and 3\n" + " String [abc{def}] chars a, b and c, and string 'def'\n" + " Categories [\\p{Letter}] Perl General Category 'Letter'.\n" + " Categories [:Letter:] Posix General Category 'Letter'.\n" + "\n" + " Combination Example\n" + " Union [[:Greek:] [:letter:]]\n" + " Intersection [[:Greek:] & [:letter:]]\n" + " Set Complement [[:Greek:] - [:letter:]]\n" + " Complement [^[:Greek:] [:letter:]]\n" + "\n" "see: http://icu.sourceforge.net/userguide/unicodeSet.html\n" - "\n" - "Examples:\n" - " [:Punctuation:] Any-Remove\n" - " [:Cased-Letter:] Any-Upper\n" - " [:Control:] Any-Remove\n" - " [:Decimal_Number:] Any-Remove\n" - " [:Final_Punctuation:] Any-Remove\n" - " [:Georgian:] Any-Upper\n" - " [:Katakana:] Any-Remove\n" - " [:Arabic:] Any-Remove\n" - " [:Punctuation:] Remove\n" - " [[:Punctuation:]-[.,]] Remove\n" - " [:Line_Separator:] Any-Remove\n" - " [:Math_Symbol:] Any-Remove\n" - " Lower; [:^Letter:] Remove (word tokenization)\n" - " [:^Number:] Remove (numeric tokenization)\n" - " [:^Katagana:] Remove (remove everything except Katagana)\n" - " Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization)\n" - " NFD; [:Nonspacing Mark:] Remove; NFC (removes accents from characters)\n" - " [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n" - " [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n" - "\n" - "see http://icu.sourceforge.net/userguide/Transform.html\n" - " http://www.unicode.org/Public/UNIDATA/UCD.html\n" - " http://icu.sourceforge.net/userguide/Transform.html\n" - " http://icu.sourceforge.net/userguide/TransformRule.html\n" - ); - - - fprintf(config.outfile, "\n\n"); - + "\n" + "Examples:\n" + " [:Punctuation:] Any-Remove\n" + " [:Cased-Letter:] Any-Upper\n" + " [:Control:] Any-Remove\n" + " [:Decimal_Number:] Any-Remove\n" + " [:Final_Punctuation:] Any-Remove\n" + " [:Georgian:] Any-Upper\n" + " [:Katakana:] Any-Remove\n" + " [:Arabic:] Any-Remove\n" + " [:Punctuation:] Remove\n" + " [[:Punctuation:]-[.,]] Remove\n" + " [:Line_Separator:] Any-Remove\n" + " [:Math_Symbol:] Any-Remove\n" + " Lower; [:^Letter:] Remove (word tokenization)\n" + " [:^Number:] Remove (numeric tokenization)\n" + " [:^Katagana:] Remove (remove everything except Katagana)\n" + " Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization)\n" + " NFD; [:Nonspacing Mark:] Remove; NFC (removes accents from characters)\n" + " [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n" + " [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n" + "\n" + "see http://icu.sourceforge.net/userguide/Transform.html\n" + " http://www.unicode.org/Public/UNIDATA/UCD.html\n" + " http://icu.sourceforge.net/userguide/Transform.html\n" + " http://icu.sourceforge.net/userguide/TransformRule.html\n" + ); + + + fprintf(config.outfile, "\n\n"); + } } static void print_icu_xml_locales(const struct config_t *p_config) { - int32_t count; - int32_t i; - UErrorCode status = U_ZERO_ERROR; - - UChar keyword[64]; - int32_t keyword_len = 0; - char keyword_str[128]; - int32_t keyword_str_len = 0; - - UChar language[64]; - int32_t language_len = 0; - char lang_str[128]; - int32_t lang_str_len = 0; - - UChar script[64]; - int32_t script_len = 0; - char script_str[128]; - int32_t script_str_len = 0; - - UChar location[64]; - int32_t location_len = 0; - char location_str[128]; - int32_t location_str_len = 0; - - UChar variant[64]; - int32_t variant_len = 0; - char variant_str[128]; - int32_t variant_str_len = 0; - - UChar name[64]; - int32_t name_len = 0; - char name_str[128]; - int32_t name_str_len = 0; - - UChar localname[64]; - int32_t localname_len = 0; - char localname_str[128]; - int32_t localname_str_len = 0; - - count = uloc_countAvailable() ; - - if (p_config->xmloutput){ + int32_t count; + int32_t i; + UErrorCode status = U_ZERO_ERROR; + + UChar keyword[64]; + int32_t keyword_len = 0; + char keyword_str[128]; + int32_t keyword_str_len = 0; + + UChar language[64]; + int32_t language_len = 0; + char lang_str[128]; + int32_t lang_str_len = 0; + + UChar script[64]; + int32_t script_len = 0; + char script_str[128]; + int32_t script_str_len = 0; + + UChar location[64]; + int32_t location_len = 0; + char location_str[128]; + int32_t location_str_len = 0; + + UChar variant[64]; + int32_t variant_len = 0; + char variant_str[128]; + int32_t variant_str_len = 0; + + UChar name[64]; + int32_t name_len = 0; + char name_str[128]; + int32_t name_str_len = 0; + + UChar localname[64]; + int32_t localname_len = 0; + char localname_str[128]; + int32_t localname_str_len = 0; + + count = uloc_countAvailable() ; + + if (p_config->xmloutput){ - fprintf(config.outfile, "\n", - count, uloc_getDefault(), ucol_countAvailable()); - } + fprintf(config.outfile, "\n", + count, uloc_getDefault(), ucol_countAvailable()); + } - for(i=0;ixmloutput){ - fprintf(config.outfile, ""); - if (strlen(localname_str)) - fprintf(config.outfile, "%s", localname_str); - fprintf(config.outfile, "\n"); - } - else if (1 == p_config->xmloutput){ - fprintf(config.outfile, "%s", uloc_getAvailable(i)); - fprintf(config.outfile, " | "); - if (strlen(name_str)) - fprintf(config.outfile, "%s", name_str); - fprintf(config.outfile, " | "); - if (strlen(localname_str)) - fprintf(config.outfile, "%s", localname_str); - fprintf(config.outfile, "\n"); + language_len + = uloc_getDisplayLanguage(uloc_getAvailable(i), "en", + language, 64, + &status); + + u_strToUTF8(lang_str, 128, &lang_str_len, + language, language_len, + &status); + + + script_len + = uloc_getDisplayScript(uloc_getAvailable(i), "en", + script, 64, + &status); + + u_strToUTF8(script_str, 128, &script_str_len, + script, script_len, + &status); + + location_len + = uloc_getDisplayCountry(uloc_getAvailable(i), "en", + location, 64, + &status); + + u_strToUTF8(location_str, 128, &location_str_len, + location, location_len, + &status); + + variant_len + = uloc_getDisplayVariant(uloc_getAvailable(i), "en", + variant, 64, + &status); + + u_strToUTF8(variant_str, 128, &variant_str_len, + variant, variant_len, + &status); + + name_len + = uloc_getDisplayName(uloc_getAvailable(i), "en", + name, 64, + &status); + + u_strToUTF8(name_str, 128, &name_str_len, + name, name_len, + &status); + + localname_len + = uloc_getDisplayName(uloc_getAvailable(i), uloc_getAvailable(i), + localname, 64, + &status); + + u_strToUTF8(localname_str, 128, &localname_str_len, + localname, localname_len, + &status); + + + if (p_config->xmloutput){ + fprintf(config.outfile, ""); + if (strlen(localname_str)) + fprintf(config.outfile, "%s", localname_str); + fprintf(config.outfile, "\n"); + } + else if (1 == p_config->xmloutput){ + fprintf(config.outfile, "%s", uloc_getAvailable(i)); + fprintf(config.outfile, " | "); + if (strlen(name_str)) + fprintf(config.outfile, "%s", name_str); + fprintf(config.outfile, " | "); + if (strlen(localname_str)) + fprintf(config.outfile, "%s", localname_str); + fprintf(config.outfile, "\n"); + } + else + fprintf(config.outfile, "%s ", uloc_getAvailable(i)); } + if (p_config->xmloutput) + fprintf(config.outfile, "\n"); else - fprintf(config.outfile, "%s ", uloc_getAvailable(i)); - } - if (p_config->xmloutput) - fprintf(config.outfile, "\n"); - else - fprintf(config.outfile, "\n"); - - if(U_FAILURE(status)) { - fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status)); - exit(status); - } + fprintf(config.outfile, "\n"); + + if(U_FAILURE(status)) { + fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status)); + exit(status); + } } static void print_info(const struct config_t *p_config) { - if (p_config->xmloutput) - fprintf(config.outfile, "\n" - "\n"); + if (p_config->xmloutput) + fprintf(config.outfile, "\n" + "\n"); if ('c' == config.print[0]) print_icu_converters(&config); @@ -408,7 +434,7 @@ static void print_info(const struct config_t *p_config) if (p_config->xmloutput) fprintf(config.outfile, "\n"); - exit(0); + exit(0); }; @@ -427,12 +453,23 @@ static void process_text_file(const struct config_t *p_config) UErrorCode status = U_ZERO_ERROR; int success = 0; + if (! xml_node) { + printf("Could not parse XML config file '%s' \n", + config.conffile); + exit (1); + } + config.chain = icu_chain_xml_config(xml_node, &status); if (config.chain && U_SUCCESS(status)) success = 1; - + else { + printf("Could not set up ICU chain from config file '%s' \n", + config.conffile); + exit (1); + } + if (p_config->xmloutput) fprintf(config.outfile, "\n" @@ -469,7 +506,7 @@ static void process_text_file(const struct config_t *p_config) } - if (p_config->xmloutput) + if (p_config->xmloutput) fprintf(config.outfile, "\n" "\n");