1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2009 Index Data
3 * See the file LICENSE for details.
17 #include <yaz/timing.h>
21 #include <yaz/xmalloc.h>
23 #include <yaz/icu_I18N.h>
31 #include <unicode/ustring.h> /* some more string fcns*/
32 #include <unicode/uchar.h> /* char names */
35 #include <unicode/ucol.h>
38 int icu_check_status (UErrorCode status)
40 if (U_FAILURE(status))
42 yaz_log(YLOG_WARN, "ICU: %d %s\n", status, u_errorName(status));
51 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
53 struct icu_buf_utf16 * buf16
54 = (struct icu_buf_utf16 *) xmalloc(sizeof(struct icu_buf_utf16));
61 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
62 buf16->utf16[0] = (UChar) 0;
63 buf16->utf16_cap = capacity;
68 struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16)
72 buf16->utf16[0] = (UChar) 0;
78 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
85 if (0 == buf16->utf16)
86 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
89 = (UChar *) xrealloc(buf16->utf16, sizeof(UChar) * capacity);
91 icu_buf_utf16_clear(buf16);
92 buf16->utf16_cap = capacity;
105 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
106 struct icu_buf_utf16 * src16)
112 if (dest16->utf16_cap < src16->utf16_len)
113 icu_buf_utf16_resize(dest16, src16->utf16_len * 2);
115 u_strncpy(dest16->utf16, src16->utf16, src16->utf16_len);
116 dest16->utf16_len = src16->utf16_len;
122 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
131 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
133 struct icu_buf_utf8 * buf8
134 = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
141 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
142 buf8->utf8[0] = (uint8_t) 0;
143 buf8->utf8_cap = capacity;
149 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8)
153 buf8->utf8[0] = (uint8_t) 0;
160 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
168 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
171 = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
173 buf8->utf8_cap = capacity;
186 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
188 if (!src8 || src8->utf8_len == 0)
191 if (src8->utf8_len == src8->utf8_cap)
192 src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
194 src8->utf8[src8->utf8_len] = '\0';
196 return (const char *) src8->utf8;
200 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
209 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
210 struct icu_buf_utf8 * src8,
213 int32_t utf16_len = 0;
215 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
217 (const char *) src8->utf8, src8->utf8_len, status);
219 /* check for buffer overflow, resize and retry */
220 if (*status == U_BUFFER_OVERFLOW_ERROR)
222 icu_buf_utf16_resize(dest16, utf16_len * 2);
223 *status = U_ZERO_ERROR;
224 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
226 (const char *) src8->utf8, src8->utf8_len, status);
229 if (U_SUCCESS(*status)
230 && utf16_len <= dest16->utf16_cap)
231 dest16->utf16_len = utf16_len;
233 icu_buf_utf16_clear(dest16);
240 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
241 const char * src8cstr,
244 size_t src8cstr_len = 0;
245 int32_t utf16_len = 0;
247 *status = U_ZERO_ERROR;
248 src8cstr_len = strlen(src8cstr);
250 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
252 src8cstr, src8cstr_len, status);
254 /* check for buffer overflow, resize and retry */
255 if (*status == U_BUFFER_OVERFLOW_ERROR)
257 icu_buf_utf16_resize(dest16, utf16_len * 2);
258 *status = U_ZERO_ERROR;
259 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
261 src8cstr, src8cstr_len, status);
264 if (U_SUCCESS(*status)
265 && utf16_len <= dest16->utf16_cap)
266 dest16->utf16_len = utf16_len;
268 icu_buf_utf16_clear(dest16);
276 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
277 struct icu_buf_utf16 * src16,
280 int32_t utf8_len = 0;
282 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
284 src16->utf16, src16->utf16_len, status);
286 /* check for buffer overflow, resize and retry */
287 if (*status == U_BUFFER_OVERFLOW_ERROR)
289 icu_buf_utf8_resize(dest8, utf8_len * 2);
290 *status = U_ZERO_ERROR;
291 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
293 src16->utf16, src16->utf16_len, status);
297 if (U_SUCCESS(*status)
298 && utf8_len <= dest8->utf8_cap)
299 dest8->utf8_len = utf8_len;
301 icu_buf_utf8_clear(dest8);
308 struct icu_casemap * icu_casemap_create(char action, UErrorCode *status)
310 struct icu_casemap * casemap
311 = (struct icu_casemap *) xmalloc(sizeof(struct icu_casemap));
312 casemap->action = action;
314 switch(casemap->action) {
325 icu_casemap_destroy(casemap);
332 void icu_casemap_destroy(struct icu_casemap * casemap)
338 int icu_casemap_casemap(struct icu_casemap * casemap,
339 struct icu_buf_utf16 * dest16,
340 struct icu_buf_utf16 * src16,
347 return icu_utf16_casemap(dest16, src16, locale,
348 casemap->action, status);
352 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
353 struct icu_buf_utf16 * src16,
354 const char *locale, char action,
357 int32_t dest16_len = 0;
360 if (!src16->utf16_len){ /* guarding for empty source string */
362 dest16->utf16[0] = (UChar) 0;
363 dest16->utf16_len = 0;
371 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
372 src16->utf16, src16->utf16_len,
377 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
378 src16->utf16, src16->utf16_len,
383 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
384 src16->utf16, src16->utf16_len,
389 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
390 src16->utf16, src16->utf16_len,
391 U_FOLD_CASE_DEFAULT, status);
395 return U_UNSUPPORTED_ERROR;
399 /* check for buffer overflow, resize and retry */
400 if (*status == U_BUFFER_OVERFLOW_ERROR
401 && dest16 != src16 /* do not resize if in-place conversion */
403 icu_buf_utf16_resize(dest16, dest16_len * 2);
404 *status = U_ZERO_ERROR;
410 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
411 src16->utf16, src16->utf16_len,
416 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
417 src16->utf16, src16->utf16_len,
422 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
423 src16->utf16, src16->utf16_len,
428 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
429 src16->utf16, src16->utf16_len,
430 U_FOLD_CASE_DEFAULT, status);
434 return U_UNSUPPORTED_ERROR;
439 if (U_SUCCESS(*status)
440 && dest16_len <= dest16->utf16_cap)
441 dest16->utf16_len = dest16_len;
444 dest16->utf16[0] = (UChar) 0;
445 dest16->utf16_len = 0;
453 void icu_sortkey8_from_utf16(UCollator *coll,
454 struct icu_buf_utf8 * dest8,
455 struct icu_buf_utf16 * src16,
459 int32_t sortkey_len = 0;
461 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
462 dest8->utf8, dest8->utf8_cap);
464 /* check for buffer overflow, resize and retry */
465 if (sortkey_len > dest8->utf8_cap) {
466 icu_buf_utf8_resize(dest8, sortkey_len * 2);
467 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
468 dest8->utf8, dest8->utf8_cap);
471 if (U_SUCCESS(*status)
473 dest8->utf8_len = sortkey_len;
475 icu_buf_utf8_clear(dest8);
480 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
483 struct icu_tokenizer * tokenizer
484 = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer));
486 tokenizer->action = action;
488 tokenizer->buf16 = 0;
489 tokenizer->token_count = 0;
490 tokenizer->token_id = 0;
491 tokenizer->token_start = 0;
492 tokenizer->token_end = 0;
495 switch(tokenizer->action) {
498 tokenizer->bi = ubrk_open(UBRK_LINE, locale, 0, 0, status);
502 tokenizer->bi = ubrk_open(UBRK_SENTENCE, locale, 0, 0, status);
506 tokenizer->bi = ubrk_open(UBRK_WORD, locale, 0, 0, status);
510 tokenizer->bi = ubrk_open(UBRK_CHARACTER, locale, 0, 0, status);
514 tokenizer->bi = ubrk_open(UBRK_TITLE, locale, 0, 0, status);
517 *status = U_UNSUPPORTED_ERROR;
522 /* ICU error stuff is a very funny business */
523 if (U_SUCCESS(*status))
526 /* freeing if failed */
527 icu_tokenizer_destroy(tokenizer);
531 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
535 ubrk_close(tokenizer->bi);
540 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
541 struct icu_buf_utf16 * src16,
544 if (!tokenizer || !tokenizer->bi || !src16)
548 tokenizer->buf16 = src16;
549 tokenizer->token_count = 0;
550 tokenizer->token_id = 0;
551 tokenizer->token_start = 0;
552 tokenizer->token_end = 0;
554 ubrk_setText(tokenizer->bi, src16->utf16, src16->utf16_len, status);
557 if (U_FAILURE(*status))
563 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
564 struct icu_buf_utf16 * tkn16,
567 int32_t tkn_start = 0;
572 if (!tokenizer || !tokenizer->bi
573 || !tokenizer->buf16 || !tokenizer->buf16->utf16_len)
577 never change tokenizer->buf16 and keep always invariant
578 0 <= tokenizer->token_start
579 <= tokenizer->token_end
580 <= tokenizer->buf16->utf16_len
581 returns length of token
584 if (0 == tokenizer->token_end) /* first call */
585 tkn_start = ubrk_first(tokenizer->bi);
586 else /* successive calls */
587 tkn_start = tokenizer->token_end;
589 /* get next position */
590 tkn_end = ubrk_next(tokenizer->bi);
592 /* repairing invariant at end of ubrk, which is UBRK_DONE = -1 */
593 if (UBRK_DONE == tkn_end)
594 tkn_end = tokenizer->buf16->utf16_len;
596 /* copy out if everything is well */
597 if(U_FAILURE(*status))
600 /* everything OK, now update internal state */
601 tkn_len = tkn_end - tkn_start;
604 tokenizer->token_count++;
605 tokenizer->token_id++;
607 tokenizer->token_id = 0;
609 tokenizer->token_start = tkn_start;
610 tokenizer->token_end = tkn_end;
613 /* copying into token buffer if it exists */
615 if (tkn16->utf16_cap < tkn_len)
616 icu_buf_utf16_resize(tkn16, (size_t) tkn_len * 2);
618 u_strncpy(tkn16->utf16, &(tokenizer->buf16->utf16)[tkn_start],
621 tkn16->utf16_len = tkn_len;
628 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer)
630 return tokenizer->token_id;
633 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer)
635 return tokenizer->token_start;
638 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer)
640 return tokenizer->token_end;
643 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer)
645 return (tokenizer->token_end - tokenizer->token_start);
648 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer)
650 return tokenizer->token_count;
655 struct icu_transform * icu_transform_create(const char *rules, char action,
659 struct icu_transform * transform
660 = (struct icu_transform *) xmalloc(sizeof(struct icu_transform));
662 transform->action = action;
663 transform->trans = 0;
664 transform->rules16 = icu_buf_utf16_create(0);
665 icu_utf16_from_utf8_cstr(transform->rules16, rules, status);
667 switch(transform->action)
672 = utrans_openU(transform->rules16->utf16,
673 transform->rules16->utf16_len,
676 &transform->parse_error, status);
681 = utrans_openU(transform->rules16->utf16,
682 transform->rules16->utf16_len,
685 &transform->parse_error, status);
688 *status = U_UNSUPPORTED_ERROR;
693 if (U_SUCCESS(*status))
696 /* freeing if failed */
697 icu_transform_destroy(transform);
702 void icu_transform_destroy(struct icu_transform * transform){
704 if (transform->rules16)
705 icu_buf_utf16_destroy(transform->rules16);
706 if (transform->trans)
707 utrans_close(transform->trans);
714 int icu_transform_trans(struct icu_transform * transform,
715 struct icu_buf_utf16 * dest16,
716 struct icu_buf_utf16 * src16,
719 if (!transform || !transform->trans
724 if (!src16->utf16_len){ /* guarding for empty source string */
725 icu_buf_utf16_clear(dest16);
729 if (!icu_buf_utf16_copy(dest16, src16))
733 utrans_transUChars (transform->trans,
734 dest16->utf16, &(dest16->utf16_len),
736 0, &(src16->utf16_len), status);
738 if (U_FAILURE(*status))
739 icu_buf_utf16_clear(dest16);
741 return dest16->utf16_len;
747 struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
748 enum icu_chain_step_type type,
749 const uint8_t * rule,
750 struct icu_buf_utf16 * buf16,
753 struct icu_chain_step * step = 0;
755 if(!chain || !type || !rule)
758 step = (struct icu_chain_step *) xmalloc(sizeof(struct icu_chain_step));
764 /* create auxilary objects */
766 case ICU_chain_step_type_display:
768 case ICU_chain_step_type_casemap:
769 step->u.casemap = icu_casemap_create(rule[0], status);
771 case ICU_chain_step_type_transform:
772 step->u.transform = icu_transform_create((char *) rule, 'f', status);
774 case ICU_chain_step_type_tokenize:
775 step->u.tokenizer = icu_tokenizer_create((char *) chain->locale,
776 (char) rule[0], status);
786 void icu_chain_step_destroy(struct icu_chain_step * step){
791 icu_chain_step_destroy(step->previous);
794 case ICU_chain_step_type_display:
796 case ICU_chain_step_type_casemap:
797 icu_casemap_destroy(step->u.casemap);
798 icu_buf_utf16_destroy(step->buf16);
800 case ICU_chain_step_type_transform:
801 icu_transform_destroy(step->u.transform);
802 icu_buf_utf16_destroy(step->buf16);
804 case ICU_chain_step_type_tokenize:
805 icu_tokenizer_destroy(step->u.tokenizer);
806 icu_buf_utf16_destroy(step->buf16);
816 struct icu_chain * icu_chain_create(const char *locale, int sort,
819 struct icu_chain * chain
820 = (struct icu_chain *) xmalloc(sizeof(struct icu_chain));
822 *status = U_ZERO_ERROR;
824 chain->locale = xstrdup(locale);
828 chain->coll = ucol_open((const char *) chain->locale, status);
830 if (U_FAILURE(*status))
833 chain->token_count = 0;
837 chain->display8 = icu_buf_utf8_create(0);
838 chain->norm8 = icu_buf_utf8_create(0);
839 chain->sort8 = icu_buf_utf8_create(0);
841 chain->src16 = icu_buf_utf16_create(0);
849 void icu_chain_destroy(struct icu_chain * chain)
854 ucol_close(chain->coll);
856 icu_buf_utf8_destroy(chain->display8);
857 icu_buf_utf8_destroy(chain->norm8);
858 icu_buf_utf8_destroy(chain->sort8);
860 icu_buf_utf16_destroy(chain->src16);
862 icu_chain_step_destroy(chain->steps);
863 xfree(chain->locale);
870 struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node,
875 struct icu_chain * chain = 0;
877 *status = U_ZERO_ERROR;
879 if (!xml_node ||xml_node->type != XML_ELEMENT_NODE)
883 xmlChar * xml_locale = xmlGetProp((xmlNode *) xml_node,
884 (xmlChar *) "locale");
888 chain = icu_chain_create((const char *) xml_locale, sort, status);
896 for (node = xml_node->children; node; node = node->next)
899 struct icu_chain_step * step = 0;
901 if (node->type != XML_ELEMENT_NODE)
904 xml_rule = xmlGetProp(node, (xmlChar *) "rule");
906 if (!strcmp((const char *) node->name, "casemap"))
907 step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
908 (const uint8_t *) xml_rule, status);
909 else if (!strcmp((const char *) node->name, "transform"))
910 step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
911 (const uint8_t *) xml_rule, status);
912 else if (!strcmp((const char *) node->name, "tokenize"))
913 step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
914 (const uint8_t *) xml_rule, status);
915 else if (!strcmp((const char *) node->name, "display"))
916 step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
917 (const uint8_t *) "", status);
918 else if (!strcmp((const char *) node->name, "normalize"))
920 yaz_log(YLOG_WARN, "Element %s is deprecated. "
921 "Use transform instead", node->name);
922 step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
923 (const uint8_t *) xml_rule, status);
925 else if (!strcmp((const char *) node->name, "index")
926 || !strcmp((const char *) node->name, "sortkey"))
928 yaz_log(YLOG_WARN, "Element %s is no longer needed. "
929 "Remove it from the configuration", node->name);
933 yaz_log(YLOG_WARN, "Unknown element %s", node->name);
934 icu_chain_destroy(chain);
938 if (step && U_FAILURE(*status))
940 icu_chain_destroy(chain);
949 struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
950 enum icu_chain_step_type type,
951 const uint8_t * rule,
954 struct icu_chain_step * step = 0;
955 struct icu_buf_utf16 * src16 = 0;
956 struct icu_buf_utf16 * buf16 = 0;
958 if (!chain || !type || !rule)
961 /* assign utf16 src buffers as needed */
962 if (chain->steps && chain->steps->buf16)
963 src16 = chain->steps->buf16;
964 else if (chain->src16)
965 src16 = chain->src16;
970 /* create utf16 destination buffers as needed, or */
973 case ICU_chain_step_type_display:
976 case ICU_chain_step_type_casemap:
977 buf16 = icu_buf_utf16_create(0);
979 case ICU_chain_step_type_transform:
980 buf16 = icu_buf_utf16_create(0);
982 case ICU_chain_step_type_tokenize:
983 buf16 = icu_buf_utf16_create(0);
989 /* create actual chain step with this buffer */
990 step = icu_chain_step_create(chain, type, rule, buf16, status);
992 step->previous = chain->steps;
999 int icu_chain_step_next_token(struct icu_chain * chain,
1000 struct icu_chain_step * step,
1003 struct icu_buf_utf16 * src16 = 0;
1004 int got_new_token = 0;
1006 if (!chain || !chain->src16 || !step || !step->more_tokens)
1009 /* assign utf16 src buffers as neeed, advance in previous steps
1010 tokens until non-zero token met, and setting stop condition */
1014 src16 = step->previous->buf16;
1015 /* tokens might be killed in previous steps, therefore looping */
1017 while (step->need_new_token
1018 && step->previous->more_tokens
1021 = icu_chain_step_next_token(chain, step->previous, status);
1024 { /* first step can only work once on chain->src16 input buffer */
1025 src16 = chain->src16;
1026 step->more_tokens = 0;
1033 /* stop if nothing to process */
1034 if (step->need_new_token && !got_new_token)
1036 step->more_tokens = 0;
1040 /* either an old token not finished yet, or a new token, thus
1041 perform the work, eventually put this steps output in
1042 step->buf16 or the chains UTF8 output buffers */
1046 case ICU_chain_step_type_display:
1047 icu_utf16_to_utf8(chain->display8, src16, status);
1049 case ICU_chain_step_type_casemap:
1050 icu_casemap_casemap(step->u.casemap,
1051 step->buf16, src16, status,
1054 case ICU_chain_step_type_transform:
1055 icu_transform_trans(step->u.transform,
1056 step->buf16, src16, status);
1058 case ICU_chain_step_type_tokenize:
1059 /* attach to new src16 token only first time during splitting */
1060 if (step->need_new_token)
1062 icu_tokenizer_attach(step->u.tokenizer, src16, status);
1063 step->need_new_token = 0;
1066 /* splitting one src16 token into multiple buf16 tokens */
1068 = icu_tokenizer_next_token(step->u.tokenizer,
1069 step->buf16, status);
1071 /* make sure to get new previous token if this one had been used up
1072 by recursive call to _same_ step */
1074 if (!step->more_tokens)
1076 step->more_tokens = icu_chain_step_next_token(chain, step, status);
1077 return step->more_tokens; /* avoid one token count too much! */
1085 if (U_FAILURE(*status))
1088 /* if token disappered into thin air, tell caller */
1089 /* if (!step->buf16->utf16_len && !step->more_tokens) */
1096 int icu_chain_assign_cstr(struct icu_chain * chain,
1097 const char * src8cstr,
1100 struct icu_chain_step * stp = 0;
1102 if (!chain || !src8cstr)
1105 chain->src8cstr = src8cstr;
1109 /* clear token count */
1110 chain->token_count = 0;
1112 /* clear all steps stop states */
1115 stp->more_tokens = 1;
1116 stp->need_new_token = 1;
1117 stp = stp->previous;
1120 /* finally convert UTF8 to UTF16 string if needed */
1121 if (chain->steps || chain->sort)
1122 icu_utf16_from_utf8_cstr(chain->src16, chain->src8cstr, status);
1124 if (U_FAILURE(*status))
1132 int icu_chain_next_token(struct icu_chain * chain,
1137 *status = U_ZERO_ERROR;
1142 /* special case with no steps - same as index type binary */
1145 if (chain->token_count)
1149 chain->token_count++;
1152 icu_sortkey8_from_utf16(chain->coll,
1153 chain->sort8, chain->steps->buf16,
1155 return chain->token_count;
1158 /* usual case, one or more icu chain steps existing */
1161 while(!got_token && chain->steps && chain->steps->more_tokens)
1162 got_token = icu_chain_step_next_token(chain, chain->steps, status);
1166 chain->token_count++;
1168 icu_utf16_to_utf8(chain->norm8, chain->steps->buf16, status);
1171 icu_sortkey8_from_utf16(chain->coll,
1172 chain->sort8, chain->steps->buf16,
1175 return chain->token_count;
1182 int icu_chain_token_number(struct icu_chain * chain)
1187 return chain->token_count;
1191 const char * icu_chain_token_display(struct icu_chain * chain)
1193 if (chain->display8)
1194 return icu_buf_utf8_to_cstr(chain->display8);
1199 const char * icu_chain_token_norm(struct icu_chain * chain)
1202 return chain->src8cstr;
1205 return icu_buf_utf8_to_cstr(chain->norm8);
1210 const char * icu_chain_token_sortkey(struct icu_chain * chain)
1213 return icu_buf_utf8_to_cstr(chain->sort8);
1218 const UCollator * icu_chain_get_coll(struct icu_chain * chain)
1223 #endif /* YAZ_HAVE_ICU */
1228 * c-file-style: "Stroustrup"
1229 * indent-tabs-mode: nil
1231 * vim: shiftwidth=4 tabstop=8 expandtab