2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: icu_I18N.c,v 1.22 2007-12-17 11:23:16 adam Exp $
14 #include <yaz/timing.h>
18 #include <yaz/xmalloc.h>
20 #include <yaz/icu_I18N.h>
28 #include <unicode/ustring.h> /* some more string fcns*/
29 #include <unicode/uchar.h> /* char names */
32 #include <unicode/ucol.h>
35 int icu_check_status (UErrorCode status)
37 if (U_FAILURE(status))
39 yaz_log(YLOG_WARN, "ICU: %d %s\n", status, u_errorName(status));
48 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
50 struct icu_buf_utf16 * buf16
51 = (struct icu_buf_utf16 *) xmalloc(sizeof(struct icu_buf_utf16));
58 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
59 buf16->utf16[0] = (UChar) 0;
60 buf16->utf16_cap = capacity;
65 struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16)
69 buf16->utf16[0] = (UChar) 0;
75 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
82 if (0 == buf16->utf16)
83 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
86 = (UChar *) xrealloc(buf16->utf16, sizeof(UChar) * capacity);
88 icu_buf_utf16_clear(buf16);
89 buf16->utf16_cap = capacity;
102 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
103 struct icu_buf_utf16 * src16)
109 if (dest16->utf16_cap < src16->utf16_len)
110 icu_buf_utf16_resize(dest16, src16->utf16_len * 2);
112 u_strncpy(dest16->utf16, src16->utf16, src16->utf16_len);
113 dest16->utf16_len = src16->utf16_len;
119 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
128 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
130 struct icu_buf_utf8 * buf8
131 = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
138 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
139 buf8->utf8[0] = (uint8_t) 0;
140 buf8->utf8_cap = capacity;
146 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8)
150 buf8->utf8[0] = (uint8_t) 0;
157 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
165 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
168 = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
170 buf8->utf8_cap = capacity;
183 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
185 if (!src8 || src8->utf8_len == 0)
188 if (src8->utf8_len == src8->utf8_cap)
189 src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
191 src8->utf8[src8->utf8_len] = '\0';
193 return (const char *) src8->utf8;
197 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
206 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
207 struct icu_buf_utf8 * src8,
210 int32_t utf16_len = 0;
212 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
214 (const char *) src8->utf8, src8->utf8_len, status);
216 /* check for buffer overflow, resize and retry */
217 if (*status == U_BUFFER_OVERFLOW_ERROR)
219 icu_buf_utf16_resize(dest16, utf16_len * 2);
220 *status = U_ZERO_ERROR;
221 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
223 (const char *) src8->utf8, src8->utf8_len, status);
226 if (U_SUCCESS(*status)
227 && utf16_len <= dest16->utf16_cap)
228 dest16->utf16_len = utf16_len;
230 icu_buf_utf16_clear(dest16);
237 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
238 const char * src8cstr,
241 size_t src8cstr_len = 0;
242 int32_t utf16_len = 0;
244 *status = U_ZERO_ERROR;
245 src8cstr_len = strlen(src8cstr);
247 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
249 src8cstr, src8cstr_len, status);
251 /* check for buffer overflow, resize and retry */
252 if (*status == U_BUFFER_OVERFLOW_ERROR)
254 icu_buf_utf16_resize(dest16, utf16_len * 2);
255 *status = U_ZERO_ERROR;
256 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
258 src8cstr, src8cstr_len, status);
261 if (U_SUCCESS(*status)
262 && utf16_len <= dest16->utf16_cap)
263 dest16->utf16_len = utf16_len;
265 icu_buf_utf16_clear(dest16);
273 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
274 struct icu_buf_utf16 * src16,
277 int32_t utf8_len = 0;
279 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
281 src16->utf16, src16->utf16_len, status);
283 /* check for buffer overflow, resize and retry */
284 if (*status == U_BUFFER_OVERFLOW_ERROR)
286 icu_buf_utf8_resize(dest8, utf8_len * 2);
287 *status = U_ZERO_ERROR;
288 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
290 src16->utf16, src16->utf16_len, status);
294 if (U_SUCCESS(*status)
295 && utf8_len <= dest8->utf8_cap)
296 dest8->utf8_len = utf8_len;
298 icu_buf_utf8_clear(dest8);
305 struct icu_casemap * icu_casemap_create(char action, UErrorCode *status)
307 struct icu_casemap * casemap
308 = (struct icu_casemap *) xmalloc(sizeof(struct icu_casemap));
309 casemap->action = action;
311 switch(casemap->action) {
322 icu_casemap_destroy(casemap);
329 void icu_casemap_destroy(struct icu_casemap * casemap)
335 int icu_casemap_casemap(struct icu_casemap * casemap,
336 struct icu_buf_utf16 * dest16,
337 struct icu_buf_utf16 * src16,
344 return icu_utf16_casemap(dest16, src16, locale,
345 casemap->action, status);
349 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
350 struct icu_buf_utf16 * src16,
351 const char *locale, char action,
354 int32_t dest16_len = 0;
357 if (!src16->utf16_len){ /* guarding for empty source string */
359 dest16->utf16[0] = (UChar) 0;
360 dest16->utf16_len = 0;
368 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
369 src16->utf16, src16->utf16_len,
374 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
375 src16->utf16, src16->utf16_len,
380 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
381 src16->utf16, src16->utf16_len,
386 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
387 src16->utf16, src16->utf16_len,
388 U_FOLD_CASE_DEFAULT, status);
392 return U_UNSUPPORTED_ERROR;
396 /* check for buffer overflow, resize and retry */
397 if (*status == U_BUFFER_OVERFLOW_ERROR
398 && dest16 != src16 /* do not resize if in-place conversion */
400 icu_buf_utf16_resize(dest16, dest16_len * 2);
401 *status = U_ZERO_ERROR;
407 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
408 src16->utf16, src16->utf16_len,
413 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
414 src16->utf16, src16->utf16_len,
419 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
420 src16->utf16, src16->utf16_len,
425 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
426 src16->utf16, src16->utf16_len,
427 U_FOLD_CASE_DEFAULT, status);
431 return U_UNSUPPORTED_ERROR;
436 if (U_SUCCESS(*status)
437 && dest16_len <= dest16->utf16_cap)
438 dest16->utf16_len = dest16_len;
441 dest16->utf16[0] = (UChar) 0;
442 dest16->utf16_len = 0;
450 void icu_sortkey8_from_utf16(UCollator *coll,
451 struct icu_buf_utf8 * dest8,
452 struct icu_buf_utf16 * src16,
456 int32_t sortkey_len = 0;
458 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
459 dest8->utf8, dest8->utf8_cap);
461 /* check for buffer overflow, resize and retry */
462 if (sortkey_len > dest8->utf8_cap) {
463 icu_buf_utf8_resize(dest8, sortkey_len * 2);
464 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
465 dest8->utf8, dest8->utf8_cap);
468 if (U_SUCCESS(*status)
470 dest8->utf8_len = sortkey_len;
472 icu_buf_utf8_clear(dest8);
477 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
480 struct icu_tokenizer * tokenizer
481 = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer));
483 tokenizer->action = action;
485 tokenizer->buf16 = 0;
486 tokenizer->token_count = 0;
487 tokenizer->token_id = 0;
488 tokenizer->token_start = 0;
489 tokenizer->token_end = 0;
492 switch(tokenizer->action) {
495 tokenizer->bi = ubrk_open(UBRK_LINE, locale, 0, 0, status);
499 tokenizer->bi = ubrk_open(UBRK_SENTENCE, locale, 0, 0, status);
503 tokenizer->bi = ubrk_open(UBRK_WORD, locale, 0, 0, status);
507 tokenizer->bi = ubrk_open(UBRK_CHARACTER, locale, 0, 0, status);
511 tokenizer->bi = ubrk_open(UBRK_TITLE, locale, 0, 0, status);
514 *status = U_UNSUPPORTED_ERROR;
519 /* ICU error stuff is a very funny business */
520 if (U_SUCCESS(*status))
523 /* freeing if failed */
524 icu_tokenizer_destroy(tokenizer);
528 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
532 ubrk_close(tokenizer->bi);
537 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
538 struct icu_buf_utf16 * src16,
541 if (!tokenizer || !tokenizer->bi || !src16)
545 tokenizer->buf16 = src16;
546 tokenizer->token_count = 0;
547 tokenizer->token_id = 0;
548 tokenizer->token_start = 0;
549 tokenizer->token_end = 0;
551 ubrk_setText(tokenizer->bi, src16->utf16, src16->utf16_len, status);
554 if (U_FAILURE(*status))
560 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
561 struct icu_buf_utf16 * tkn16,
564 int32_t tkn_start = 0;
569 if (!tokenizer || !tokenizer->bi
570 || !tokenizer->buf16 || !tokenizer->buf16->utf16_len)
574 never change tokenizer->buf16 and keep always invariant
575 0 <= tokenizer->token_start
576 <= tokenizer->token_end
577 <= tokenizer->buf16->utf16_len
578 returns length of token
581 if (0 == tokenizer->token_end) /* first call */
582 tkn_start = ubrk_first(tokenizer->bi);
583 else /* successive calls */
584 tkn_start = tokenizer->token_end;
586 /* get next position */
587 tkn_end = ubrk_next(tokenizer->bi);
589 /* repairing invariant at end of ubrk, which is UBRK_DONE = -1 */
590 if (UBRK_DONE == tkn_end)
591 tkn_end = tokenizer->buf16->utf16_len;
593 /* copy out if everything is well */
594 if(U_FAILURE(*status))
597 /* everything OK, now update internal state */
598 tkn_len = tkn_end - tkn_start;
601 tokenizer->token_count++;
602 tokenizer->token_id++;
604 tokenizer->token_id = 0;
606 tokenizer->token_start = tkn_start;
607 tokenizer->token_end = tkn_end;
610 /* copying into token buffer if it exists */
612 if (tkn16->utf16_cap < tkn_len)
613 icu_buf_utf16_resize(tkn16, (size_t) tkn_len * 2);
615 u_strncpy(tkn16->utf16, &(tokenizer->buf16->utf16)[tkn_start],
618 tkn16->utf16_len = tkn_len;
625 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer)
627 return tokenizer->token_id;
630 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer)
632 return tokenizer->token_start;
635 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer)
637 return tokenizer->token_end;
640 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer)
642 return (tokenizer->token_end - tokenizer->token_start);
645 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer)
647 return tokenizer->token_count;
652 struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
656 struct icu_normalizer * normalizer
657 = (struct icu_normalizer *) xmalloc(sizeof(struct icu_normalizer));
659 normalizer->action = action;
660 normalizer->trans = 0;
661 normalizer->rules16 = icu_buf_utf16_create(0);
662 icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status);
664 switch(normalizer->action) {
668 = utrans_openU(normalizer->rules16->utf16,
669 normalizer->rules16->utf16_len,
672 normalizer->parse_error, status);
677 = utrans_openU(normalizer->rules16->utf16,
678 normalizer->rules16->utf16_len,
681 normalizer->parse_error, status);
684 *status = U_UNSUPPORTED_ERROR;
689 if (U_SUCCESS(*status))
692 /* freeing if failed */
693 icu_normalizer_destroy(normalizer);
698 void icu_normalizer_destroy(struct icu_normalizer * normalizer){
700 if (normalizer->rules16)
701 icu_buf_utf16_destroy(normalizer->rules16);
702 if (normalizer->trans)
703 utrans_close(normalizer->trans);
710 int icu_normalizer_normalize(struct icu_normalizer * normalizer,
711 struct icu_buf_utf16 * dest16,
712 struct icu_buf_utf16 * src16,
715 if (!normalizer || !normalizer->trans
720 if (!src16->utf16_len){ /* guarding for empty source string */
721 icu_buf_utf16_clear(dest16);
725 if (!icu_buf_utf16_copy(dest16, src16))
729 utrans_transUChars (normalizer->trans,
730 dest16->utf16, &(dest16->utf16_len),
732 0, &(src16->utf16_len), status);
734 if (U_FAILURE(*status))
735 icu_buf_utf16_clear(dest16);
737 return dest16->utf16_len;
743 struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
744 enum icu_chain_step_type type,
745 const uint8_t * rule,
746 struct icu_buf_utf16 * buf16,
749 struct icu_chain_step * step = 0;
751 if(!chain || !type || !rule)
754 step = (struct icu_chain_step *) xmalloc(sizeof(struct icu_chain_step));
760 /* create auxilary objects */
762 case ICU_chain_step_type_display:
764 case ICU_chain_step_type_casemap:
765 step->u.casemap = icu_casemap_create(rule[0], status);
767 case ICU_chain_step_type_normalize:
768 step->u.normalizer = icu_normalizer_create((char *) rule, 'f', status);
770 case ICU_chain_step_type_tokenize:
771 step->u.tokenizer = icu_tokenizer_create((char *) chain->locale,
772 (char) rule[0], status);
782 void icu_chain_step_destroy(struct icu_chain_step * step){
787 icu_chain_step_destroy(step->previous);
790 case ICU_chain_step_type_display:
792 case ICU_chain_step_type_casemap:
793 icu_casemap_destroy(step->u.casemap);
794 icu_buf_utf16_destroy(step->buf16);
796 case ICU_chain_step_type_normalize:
797 icu_normalizer_destroy(step->u.normalizer);
798 icu_buf_utf16_destroy(step->buf16);
800 case ICU_chain_step_type_tokenize:
801 icu_tokenizer_destroy(step->u.tokenizer);
802 icu_buf_utf16_destroy(step->buf16);
812 struct icu_chain * icu_chain_create(const char *locale, int sort,
815 struct icu_chain * chain
816 = (struct icu_chain *) xmalloc(sizeof(struct icu_chain));
818 *status = U_ZERO_ERROR;
820 chain->locale = xstrdup(locale);
824 chain->coll = ucol_open((const char *) chain->locale, status);
826 if (U_FAILURE(*status))
829 chain->token_count = 0;
833 chain->display8 = icu_buf_utf8_create(0);
834 chain->norm8 = icu_buf_utf8_create(0);
835 chain->sort8 = icu_buf_utf8_create(0);
837 chain->src16 = icu_buf_utf16_create(0);
845 void icu_chain_destroy(struct icu_chain * chain)
850 ucol_close(chain->coll);
852 icu_buf_utf8_destroy(chain->display8);
853 icu_buf_utf8_destroy(chain->norm8);
854 icu_buf_utf8_destroy(chain->sort8);
856 icu_buf_utf16_destroy(chain->src16);
858 icu_chain_step_destroy(chain->steps);
859 xfree(chain->locale);
866 struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node,
871 struct icu_chain * chain = 0;
873 *status = U_ZERO_ERROR;
875 if (!xml_node ||xml_node->type != XML_ELEMENT_NODE)
879 xmlChar * xml_locale = xmlGetProp((xmlNode *) xml_node,
880 (xmlChar *) "locale");
884 chain = icu_chain_create((const char *) xml_locale, sort, status);
892 for (node = xml_node->children; node; node = node->next)
895 struct icu_chain_step * step = 0;
897 if (node->type != XML_ELEMENT_NODE)
900 xml_rule = xmlGetProp(node, (xmlChar *) "rule");
902 if (!strcmp((const char *) node->name, "casemap"))
903 step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
904 (const uint8_t *) xml_rule, status);
905 else if (!strcmp((const char *) node->name, "transform"))
906 step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
907 (const uint8_t *) xml_rule, status);
908 else if (!strcmp((const char *) node->name, "tokenize"))
909 step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
910 (const uint8_t *) xml_rule, status);
911 else if (!strcmp((const char *) node->name, "display"))
912 step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
913 (const uint8_t *) "", status);
915 if (!step || U_FAILURE(*status))
917 icu_chain_destroy(chain);
928 struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
929 enum icu_chain_step_type type,
930 const uint8_t * rule,
933 struct icu_chain_step * step = 0;
934 struct icu_buf_utf16 * src16 = 0;
935 struct icu_buf_utf16 * buf16 = 0;
937 if (!chain || !type || !rule)
940 /* assign utf16 src buffers as needed */
941 if (chain->steps && chain->steps->buf16)
942 src16 = chain->steps->buf16;
943 else if (chain->src16)
944 src16 = chain->src16;
949 /* create utf16 destination buffers as needed, or */
952 case ICU_chain_step_type_display:
955 case ICU_chain_step_type_casemap:
956 buf16 = icu_buf_utf16_create(0);
958 case ICU_chain_step_type_normalize:
959 buf16 = icu_buf_utf16_create(0);
961 case ICU_chain_step_type_tokenize:
962 buf16 = icu_buf_utf16_create(0);
968 /* create actual chain step with this buffer */
969 step = icu_chain_step_create(chain, type, rule, buf16, status);
971 step->previous = chain->steps;
978 int icu_chain_step_next_token(struct icu_chain * chain,
979 struct icu_chain_step * step,
982 struct icu_buf_utf16 * src16 = 0;
983 int got_new_token = 0;
985 if (!chain || !chain->src16 || !step || !step->more_tokens)
988 /* assign utf16 src buffers as neeed, advance in previous steps
989 tokens until non-zero token met, and setting stop condition */
993 src16 = step->previous->buf16;
994 /* tokens might be killed in previous steps, therefore looping */
996 while (step->need_new_token
997 && step->previous->more_tokens
1000 = icu_chain_step_next_token(chain, step->previous, status);
1003 { /* first step can only work once on chain->src16 input buffer */
1004 src16 = chain->src16;
1005 step->more_tokens = 0;
1012 /* stop if nothing to process */
1013 if (step->need_new_token && !got_new_token)
1015 step->more_tokens = 0;
1019 /* either an old token not finished yet, or a new token, thus
1020 perform the work, eventually put this steps output in
1021 step->buf16 or the chains UTF8 output buffers */
1025 case ICU_chain_step_type_display:
1026 icu_utf16_to_utf8(chain->display8, src16, status);
1028 case ICU_chain_step_type_casemap:
1029 icu_casemap_casemap(step->u.casemap,
1030 step->buf16, src16, status,
1033 case ICU_chain_step_type_normalize:
1034 icu_normalizer_normalize(step->u.normalizer,
1035 step->buf16, src16, status);
1037 case ICU_chain_step_type_tokenize:
1038 /* attach to new src16 token only first time during splitting */
1039 if (step->need_new_token)
1041 icu_tokenizer_attach(step->u.tokenizer, src16, status);
1042 step->need_new_token = 0;
1045 /* splitting one src16 token into multiple buf16 tokens */
1047 = icu_tokenizer_next_token(step->u.tokenizer,
1048 step->buf16, status);
1050 /* make sure to get new previous token if this one had been used up
1051 by recursive call to _same_ step */
1053 if (!step->more_tokens)
1055 step->more_tokens = icu_chain_step_next_token(chain, step, status);
1056 return step->more_tokens; /* avoid one token count too much! */
1064 if (U_FAILURE(*status))
1067 /* if token disappered into thin air, tell caller */
1068 /* if (!step->buf16->utf16_len && !step->more_tokens) */
1075 int icu_chain_assign_cstr(struct icu_chain * chain,
1076 const char * src8cstr,
1079 struct icu_chain_step * stp = 0;
1081 if (!chain || !src8cstr)
1084 chain->src8cstr = src8cstr;
1088 /* clear token count */
1089 chain->token_count = 0;
1091 /* clear all steps stop states */
1094 stp->more_tokens = 1;
1095 stp->need_new_token = 1;
1096 stp = stp->previous;
1099 /* finally convert UTF8 to UTF16 string if needed */
1100 if (chain->steps || chain->sort)
1101 icu_utf16_from_utf8_cstr(chain->src16, chain->src8cstr, status);
1103 if (U_FAILURE(*status))
1111 int icu_chain_next_token(struct icu_chain * chain,
1116 *status = U_ZERO_ERROR;
1121 /* special case with no steps - same as index type binary */
1124 if (chain->token_count)
1128 chain->token_count++;
1131 icu_sortkey8_from_utf16(chain->coll,
1132 chain->sort8, chain->steps->buf16,
1134 return chain->token_count;
1137 /* usual case, one or more icu chain steps existing */
1140 while(!got_token && chain->steps && chain->steps->more_tokens)
1141 got_token = icu_chain_step_next_token(chain, chain->steps, status);
1145 chain->token_count++;
1147 icu_utf16_to_utf8(chain->norm8, chain->steps->buf16, status);
1150 icu_sortkey8_from_utf16(chain->coll,
1151 chain->sort8, chain->steps->buf16,
1154 return chain->token_count;
1161 int icu_chain_token_number(struct icu_chain * chain)
1166 return chain->token_count;
1170 const char * icu_chain_token_display(struct icu_chain * chain)
1172 if (chain->display8)
1173 return icu_buf_utf8_to_cstr(chain->display8);
1178 const char * icu_chain_token_norm(struct icu_chain * chain)
1181 return chain->src8cstr;
1184 return icu_buf_utf8_to_cstr(chain->norm8);
1189 const char * icu_chain_token_sortkey(struct icu_chain * chain)
1192 return icu_buf_utf8_to_cstr(chain->sort8);
1197 const UCollator * icu_chain_get_coll(struct icu_chain * chain)
1202 #endif /* YAZ_HAVE_ICU */
1207 * indent-tabs-mode: nil
1209 * vim: shiftwidth=4 tabstop=8 expandtab