1 /* $Id: icu_I18N.h,v 1.12 2007-05-14 13:51:24 marc Exp $
2 Copyright (c) 2006-2007, Index Data.
4 This file is part of Pazpar2.
6 Pazpar2 is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with Pazpar2; see the file LICENSE. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
30 #include <unicode/utypes.h> /* Basic ICU data types */
31 #include <unicode/uchar.h> /* char names */
33 //#include <unicode/ustdio.h>
34 #include <unicode/ucol.h>
35 //#include <unicode/ucnv.h> /* C Converter API */
36 //#include <unicode/ustring.h> /* some more string fcns*/
37 //#include <unicode/uloc.h>
38 #include <unicode/ubrk.h>
39 //#include <unicode/unistr.h>
40 #include <unicode/utrans.h>
44 // declared structs and functions
46 int icu_check_status (UErrorCode status);
55 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity);
56 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
58 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
59 struct icu_buf_utf16 * src16);
60 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
71 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity);
72 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
74 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8);
77 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
78 struct icu_buf_utf8 * src8,
81 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
82 const char * src8cstr,
86 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
87 struct icu_buf_utf16 * src16,
90 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
91 struct icu_buf_utf16 * src16,
92 const char *locale, char action,
95 UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
96 struct icu_buf_utf8 * dest8,
97 struct icu_buf_utf16 * src16,
105 struct icu_buf_utf16 * buf16;
110 // keep always invariant
113 // <= buf16->utf16_len
115 // 0 <= token_id <= token_count
118 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
121 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
123 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
124 struct icu_buf_utf16 * src16, UErrorCode *status);
126 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
127 struct icu_buf_utf16 * tkn16,
130 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer);
131 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer);
132 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer);
133 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer);
134 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
138 struct icu_normalizer
141 struct icu_buf_utf16 * rules16;
142 UParseError parse_error[256];
143 UTransliterator * trans;
146 struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
150 void icu_normalizer_destroy(struct icu_normalizer * normalizer);
152 int icu_normalizer_normalize(struct icu_normalizer * normalizer,
153 struct icu_buf_utf16 * dest16,
154 struct icu_buf_utf16 * src16,
168 enum icu_chain_step_type {
169 ICU_chain_step_type_none, //
170 ICU_chain_step_type_display, // convert to utf8 display format
171 ICU_chain_step_type_norm, // convert to utf8 norm format
172 ICU_chain_step_type_sort, // convert to utf8 sort format
173 ICU_chain_step_type_charmap, // apply utf16 charmap
174 ICU_chain_step_type_normalize, // apply utf16 normalization
175 ICU_chain_step_type_tokenize // apply utf16 tokenization
180 struct icu_chain_step
182 // type and action object
183 enum icu_chain_step_type type;
185 struct icu_normalizer * normalizer;
186 struct icu_tokenizer * tokenizer;
188 // temprary post-action utf16 buffer
189 struct icu_buf_utf16 * buf16;
190 struct icu_chain_step * next;
196 uint8_t identifier[128];
199 // number of tokens returned so far
202 // utf8 output buffers
203 struct icu_buf_utf8 * display8;
204 struct icu_buf_utf8 * norm8;
205 struct icu_buf_utf8 * sort8;
207 // utf16 source buffer
208 struct icu_buf_utf16 * src16;
210 // linked list of chain steps
211 struct icu_chain_step * steps;
214 struct icu_chain * icu_chain_create(const uint8_t * identifier,
215 const uint8_t * locale);
217 void icu_chain_destroy(struct icu_chain * chain);
219 struct icu_chain_step * icu_chain_append_step(struct icu_chain * chain,
220 enum icu_chain_step_type type,
221 const uint8_t * rule);
223 void icu_chain_step_destroy(struct icu_chain_step * step);
228 #endif // ICU_I18NL_H