1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements stemmer wrapper
17 #include <yaz/stemmer.h>
19 #include <yaz/xmalloc.h>
21 #include <libstemmer.h>
23 #include <unicode/ustring.h> /* some more string fcns*/
24 #include <unicode/uchar.h> /* char names */
26 enum stemmer_implementation {
33 // Required for cloning.
36 struct sb_stemmer *sb_stemmer;
39 const char* yaz_stemmer_lookup_charenc(const char *charenc, const char *rule) {
43 const char* yaz_stemmer_lookup_algorithm(const char *locale, const char *rule) {
47 yaz_stemmer_p yaz_stemmer_snowball_create(const char *locale, const char *rule, UErrorCode *status) {
48 const char *charenc = yaz_stemmer_lookup_charenc(locale, rule);
49 const char *algorithm = yaz_stemmer_lookup_algorithm(locale,rule);
50 struct sb_stemmer *stemmer = sb_stemmer_new(algorithm, charenc);
51 yaz_stemmer_p yaz_stemmer;
53 *status = U_ILLEGAL_ARGUMENT_ERROR;
54 yaz_log(YLOG_FATAL, "yaz_stemmer: Failed to create snowball stemmer from locale %srule %s. Showball: charenc %s algorithm %s ",
55 locale, rule, charenc, algorithm);
58 yaz_log(YLOG_DEBUG, "created snowball stemmer: algorithm %s charenc %s ", algorithm, charenc);
59 yaz_stemmer = xmalloc(sizeof(*yaz_stemmer));
60 yaz_stemmer->implementation = yaz_snowball;
62 yaz_stemmer->locale = xstrdup(locale);
63 yaz_stemmer->rule = xstrdup(rule);
64 yaz_stemmer->sb_stemmer = stemmer;
65 yaz_log(YLOG_DEBUG, "created snowball stemmer: algorithm %s charenc %s ", algorithm, charenc);
69 yaz_stemmer_p yaz_stemmer_create(const char *locale, const char *rule, UErrorCode *status) {
70 *status = U_ZERO_ERROR;
71 // dispatch logic required if more algorithms is implemented.
72 yaz_log(YLOG_DEBUG, "create stemmer: locale %s rule %s ", locale, rule);
73 return yaz_stemmer_snowball_create(locale, rule, status);
76 yaz_stemmer_p yaz_stemmer_clone(yaz_stemmer_p stemmer) {
77 UErrorCode error = U_ZERO_ERROR;
80 return yaz_stemmer_create(stemmer->locale, stemmer->rule, &error);
83 void yaz_stemmer_stem(yaz_stemmer_p stemmer, struct icu_buf_utf16 *dst, struct icu_buf_utf16* src, UErrorCode *status)
85 switch(stemmer->implementation) {
87 struct icu_buf_utf8 *utf8_buf = icu_buf_utf8_create(0);
88 icu_utf16_to_utf8(utf8_buf, src, status);
89 if (*status == U_ZERO_ERROR) {
90 const sb_symbol *cstr = (const sb_symbol*) icu_buf_utf8_to_cstr(utf8_buf);
91 const sb_symbol *sb_symbol = sb_stemmer_stem(stemmer->sb_stemmer, cstr, utf8_buf->utf8_len);
93 icu_buf_utf16_copy(dst, src);
97 const char *cstr2 = (const char *) sb_symbol;
98 icu_utf16_from_utf8_cstr(dst, cstr2 , status);
100 yaz_log(YLOG_DEBUG, "stemming %s to %s ", cstr, cstr2);
104 icu_buf_utf8_destroy(utf8_buf);
108 case yaz_no_operation:
109 yaz_log(YLOG_DEBUG, "Stemmer (No operation) called");
111 // Default return the same as given.
112 icu_buf_utf16_copy(dst, src);
117 void yaz_stemmer_destroy(yaz_stemmer_p stemmer)
119 /* Handle no stemmer correctly */
123 switch (stemmer->implementation) {
125 sb_stemmer_delete(stemmer->sb_stemmer);
128 xfree(stemmer->locale);
129 xfree(stemmer->rule);
133 #endif /* YAZ_HAVE_ICU */