X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=src%2Fcharsets.c;h=fc14257b536accec8de2fe653fb90465590b87c6;hb=6424430743ec2d233dfc8118313cfa4299432e2c;hp=ba5d426358b199f475124702070b21b2034e6fff;hpb=b94f122f7e74623aa67e1fa1d097f7627c087f5c;p=pazpar2-moved-to-github.git diff --git a/src/charsets.c b/src/charsets.c index ba5d426..fc14257 100644 --- a/src/charsets.c +++ b/src/charsets.c @@ -1,5 +1,5 @@ /* This file is part of Pazpar2. - Copyright (C) 2006-2011 Index Data + Copyright (C) 2006-2012 Index Data Pazpar2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -45,32 +45,32 @@ static pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node); static pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn); static pp2_charset_t pp2_charset_create_a_to_z(void); static void pp2_charset_destroy(pp2_charset_t pct); -static pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct); +static pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct); /* charset handle */ struct pp2_charset_s { - const char *(*token_next_handler)(pp2_relevance_token_t prt); - const char *(*get_sort_handler)(pp2_relevance_token_t prt); - const char *(*get_display_handler)(pp2_relevance_token_t prt); + const char *(*token_next_handler)(pp2_charset_token_t prt); + const char *(*get_sort_handler)(pp2_charset_token_t prt); + const char *(*get_display_handler)(pp2_charset_token_t prt); #if YAZ_HAVE_ICU struct icu_chain * icu_chn; UErrorCode icu_sts; #endif }; -static const char *pp2_relevance_token_null(pp2_relevance_token_t prt); -static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt); -static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt); -static const char *pp2_get_display_ascii(pp2_relevance_token_t prt); +static const char *pp2_charset_token_null(pp2_charset_token_t prt); +static const char *pp2_charset_token_a_to_z(pp2_charset_token_t prt); +static const char *pp2_get_sort_ascii(pp2_charset_token_t prt); +static const char *pp2_get_display_ascii(pp2_charset_token_t prt); #if YAZ_HAVE_ICU -static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt); -static const char *pp2_get_sort_icu(pp2_relevance_token_t prt); -static const char *pp2_get_display_icu(pp2_relevance_token_t prt); +static const char *pp2_charset_token_icu(pp2_charset_token_t prt); +static const char *pp2_get_sort_icu(pp2_charset_token_t prt); +static const char *pp2_get_display_icu(pp2_charset_token_t prt); #endif /* tokenzier handle */ -struct pp2_relevance_token_s { +struct pp2_charset_token_s { const char *cp; /* unnormalized buffer we're tokenizing */ const char *last_cp; /* pointer to last token we're dealing with */ pp2_charset_t pct; /* our main charset handle (type+config) */ @@ -160,22 +160,26 @@ int pp2_charset_fact_define(pp2_charset_fact_t pft, { int r; pp2_charset_t pct; - xmlChar *id; + xmlChar *id = 0; assert(xml_node); pct = pp2_charset_create_xml(xml_node); if (!pct) return -1; - id = xmlGetProp(xml_node, (xmlChar*) "id"); - if (id) - default_id = (const char *) id; if (!default_id) { - pp2_charset_destroy(pct); - return -1; + id = xmlGetProp(xml_node, (xmlChar*) "id"); + if (!id) + { + yaz_log(YLOG_WARN, "Missing id for icu_chain"); + pp2_charset_destroy(pct); + return -1; + } + default_id = (const char *) id; } r = pp2_charset_fact_add(pft, pct, default_id); - xmlFree(id); + if (id) + xmlFree(id); return r; } @@ -217,7 +221,7 @@ pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node) pp2_charset_t pp2_charset_create_a_to_z(void) { pp2_charset_t pct = pp2_charset_create(0); - pct->token_next_handler = pp2_relevance_token_a_to_z; + pct->token_next_handler = pp2_charset_token_a_to_z; return pct; } @@ -225,7 +229,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) { pp2_charset_t pct = xmalloc(sizeof(*pct)); - pct->token_next_handler = pp2_relevance_token_null; + pct->token_next_handler = pp2_charset_token_null; pct->get_sort_handler = pp2_get_sort_ascii; pct->get_display_handler = pp2_get_display_ascii; #if YAZ_HAVE_ICU @@ -234,7 +238,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) { pct->icu_chn = icu_chn; pct->icu_sts = U_ZERO_ERROR; - pct->token_next_handler = pp2_relevance_token_icu; + pct->token_next_handler = pp2_charset_token_icu; pct->get_sort_handler = pp2_get_sort_icu; pct->get_display_handler = pp2_get_display_icu; } @@ -250,19 +254,19 @@ void pp2_charset_destroy(pp2_charset_t pct) xfree(pct); } -pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft, - const char *id) +pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft, + const char *id) { struct pp2_charset_entry *pce; for (pce = pft->list; pce; pce = pce->next) if (!strcmp(id, pce->name)) - return pp2_relevance_tokenize(pce->pct); + return pp2_charset_tokenize(pce->pct); return 0; } -pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct) +pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct) { - pp2_relevance_token_t prt = xmalloc(sizeof(*prt)); + pp2_charset_token_t prt = xmalloc(sizeof(*prt)); assert(pct); @@ -280,9 +284,8 @@ pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct) return prt; } -void pp2_relevance_first(pp2_relevance_token_t prt, - const char *buf, - int skip_article) +void pp2_charset_token_first(pp2_charset_token_t prt, + const char *buf, int skip_article) { if (skip_article) { @@ -312,7 +315,7 @@ void pp2_relevance_first(pp2_relevance_token_t prt, #endif // YAZ_HAVE_ICU } -void pp2_relevance_token_destroy(pp2_relevance_token_t prt) +void pp2_charset_token_destroy(pp2_charset_token_t prt) { assert(prt); #if YAZ_HAVE_ICU @@ -326,18 +329,18 @@ void pp2_relevance_token_destroy(pp2_relevance_token_t prt) xfree(prt); } -const char *pp2_relevance_token_next(pp2_relevance_token_t prt) +const char *pp2_charset_token_next(pp2_charset_token_t prt) { assert(prt); return (prt->pct->token_next_handler)(prt); } -const char *pp2_get_sort(pp2_relevance_token_t prt) +const char *pp2_get_sort(pp2_charset_token_t prt) { return prt->pct->get_sort_handler(prt); } -const char *pp2_get_display(pp2_relevance_token_t prt) +const char *pp2_get_display(pp2_charset_token_t prt) { return prt->pct->get_display_handler(prt); } @@ -346,7 +349,7 @@ const char *pp2_get_display(pp2_relevance_token_t prt) /* original tokenizer with our tokenize interface, but we add +1 to ensure no '\0' are in our string (except for EOF) */ -static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt) +static const char *pp2_charset_token_a_to_z(pp2_charset_token_t prt) { const char *cp = prt->cp; int c; @@ -373,7 +376,7 @@ static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt) return wrbuf_cstr(prt->norm_str); } -static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt) +static const char *pp2_get_sort_ascii(pp2_charset_token_t prt) { if (prt->last_cp == 0) return 0; @@ -390,7 +393,7 @@ static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt) } } -static const char *pp2_get_display_ascii(pp2_relevance_token_t prt) +static const char *pp2_get_display_ascii(pp2_charset_token_t prt) { if (prt->last_cp == 0) return 0; @@ -400,7 +403,7 @@ static const char *pp2_get_display_ascii(pp2_relevance_token_t prt) } } -static const char *pp2_relevance_token_null(pp2_relevance_token_t prt) +static const char *pp2_charset_token_null(pp2_charset_token_t prt) { const char *cp = prt->cp; @@ -412,7 +415,7 @@ static const char *pp2_relevance_token_null(pp2_relevance_token_t prt) } #if YAZ_HAVE_ICU -static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt) +static const char *pp2_charset_token_icu(pp2_charset_token_t prt) { if (icu_iter_next(prt->iter)) { @@ -421,12 +424,12 @@ static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt) return 0; } -static const char *pp2_get_sort_icu(pp2_relevance_token_t prt) +static const char *pp2_get_sort_icu(pp2_charset_token_t prt) { return icu_iter_get_sortkey(prt->iter); } -static const char *pp2_get_display_icu(pp2_relevance_token_t prt) +static const char *pp2_get_display_icu(pp2_charset_token_t prt) { return icu_iter_get_display(prt->iter); }