projects
/
pazpar2-moved-to-github.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Follow style of previous NEWS entries
[pazpar2-moved-to-github.git]
/
src
/
charsets.c
diff --git
a/src/charsets.c
b/src/charsets.c
index
dfc1015
..
7bbe102
100644
(file)
--- a/
src/charsets.c
+++ b/
src/charsets.c
@@
-1,5
+1,5
@@
/* This file is part of Pazpar2.
/* This file is part of Pazpar2.
- Copyright (C) 2006-2010 Index Data
+ Copyright (C) 2006-2011 Index Data
Pazpar2 is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Pazpar2 is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
@@
-38,12
+38,6
@@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#if YAZ_HAVE_ICU
#include <yaz/icu.h>
#if YAZ_HAVE_ICU
#include <yaz/icu.h>
-
-#if YAZ_VERSIONL >= 0x40002
-/* YAZ 4.0.2 or later as icu_iter */
-#define ICU_ITER 1
-#endif
-
#endif
/* charset handle */
#endif
/* charset handle */
@@
-57,6
+51,7
@@
struct pp2_charset_s {
#endif
};
#endif
};
+static const char *pp2_relevance_token_null(pp2_relevance_token_t prt);
static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt);
static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt);
static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt);
static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt);
@@
-72,7
+67,7
@@
struct pp2_relevance_token_s {
pp2_charset_t pct; /* our main charset handle (type+config) */
WRBUF norm_str; /* normized string we return (temporarily) */
WRBUF sort_str; /* sort string we return (temporarily) */
pp2_charset_t pct; /* our main charset handle (type+config) */
WRBUF norm_str; /* normized string we return (temporarily) */
WRBUF sort_str; /* sort string we return (temporarily) */
-#if ICU_ITER
+#if YAZ_HAVE_ICU
yaz_icu_iter_t iter;
#endif
};
yaz_icu_iter_t iter;
#endif
};
@@
-115,11
+110,18
@@
void pp2_charset_incref(pp2_charset_t pct)
(pct->ref_count)++;
}
(pct->ref_count)++;
}
-pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn)
+pp2_charset_t pp2_charset_create_a_to_z(void)
+{
+ pp2_charset_t pct = pp2_charset_create(0);
+ pct->token_next_handler = pp2_relevance_token_a_to_z;
+ return pct;
+}
+
+pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn)
{
pp2_charset_t pct = xmalloc(sizeof(*pct));
{
pp2_charset_t pct = xmalloc(sizeof(*pct));
- pct->token_next_handler = pp2_relevance_token_a_to_z;
+ pct->token_next_handler = pp2_relevance_token_null;
pct->get_sort_handler = pp2_get_sort_ascii;
pct->ref_count = 1;
#if YAZ_HAVE_ICU
pct->get_sort_handler = pp2_get_sort_ascii;
pct->ref_count = 1;
#if YAZ_HAVE_ICU
@@
-151,14
+153,30
@@
void pp2_charset_destroy(pp2_charset_t pct)
}
}
}
}
-pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct,
- const char *buf,
- int skip_article)
+pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct)
{
pp2_relevance_token_t prt = xmalloc(sizeof(*prt));
assert(pct);
{
pp2_relevance_token_t prt = xmalloc(sizeof(*prt));
assert(pct);
+ prt->norm_str = wrbuf_alloc();
+ prt->sort_str = wrbuf_alloc();
+ prt->cp = 0;
+ prt->last_cp = 0;
+ prt->pct = pct;
+
+#if YAZ_HAVE_ICU
+ prt->iter = 0;
+ if (pct->icu_chn)
+ prt->iter = icu_iter_create(pct->icu_chn);
+#endif
+ return prt;
+}
+
+void pp2_relevance_first(pp2_relevance_token_t prt,
+ const char *buf,
+ int skip_article)
+{
if (skip_article)
{
const char *p = buf;
if (skip_article)
{
const char *p = buf;
@@
-176,39
+194,23
@@
pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct,
buf = p;
}
buf = p;
}
- prt->norm_str = wrbuf_alloc();
- prt->sort_str = wrbuf_alloc();
+ wrbuf_rewind(prt->norm_str);
+ wrbuf_rewind(prt->sort_str);
prt->cp = buf;
prt->last_cp = 0;
prt->cp = buf;
prt->last_cp = 0;
- prt->pct = pct;
#if YAZ_HAVE_ICU
#if YAZ_HAVE_ICU
-#if ICU_ITER
- prt->iter = 0;
-#endif
- if (pct->icu_chn)
+ if (prt->iter)
{
{
-#if ICU_ITER
- prt->iter = icu_iter_create(pct->icu_chn);
icu_iter_first(prt->iter, buf);
icu_iter_first(prt->iter, buf);
-#else
- int ok = 0;
- pct->icu_sts = U_ZERO_ERROR;
-
- ok = icu_chain_assign_cstr(pct->icu_chn, buf, &pct->icu_sts);
-#endif
- //printf("\nfield ok: %d '%s'\n", ok, buf);
- prt->pct = pct;
}
#endif // YAZ_HAVE_ICU
}
#endif // YAZ_HAVE_ICU
- return prt;
}
}
-
void pp2_relevance_token_destroy(pp2_relevance_token_t prt)
{
assert(prt);
void pp2_relevance_token_destroy(pp2_relevance_token_t prt)
{
assert(prt);
-#if ICU_ITER
+#if YAZ_HAVE_ICU
if (prt->iter)
icu_iter_destroy(prt->iter);
#endif
if (prt->iter)
icu_iter_destroy(prt->iter);
#endif
@@
-278,35
+280,30
@@
static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt)
}
}
}
}
+static const char *pp2_relevance_token_null(pp2_relevance_token_t prt)
+{
+ const char *cp = prt->cp;
+
+ prt->last_cp = *cp ? cp : 0;
+ while (*cp)
+ cp++;
+ prt->cp = cp;
+ return prt->last_cp;
+}
#if YAZ_HAVE_ICU
static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt)
{
#if YAZ_HAVE_ICU
static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt)
{
-#if ICU_ITER
if (icu_iter_next(prt->iter))
{
return icu_iter_get_norm(prt->iter);
}
if (icu_iter_next(prt->iter))
{
return icu_iter_get_norm(prt->iter);
}
-#else
- if (icu_chain_next_token(prt->pct->icu_chn, &prt->pct->icu_sts))
- {
- if (U_FAILURE(prt->pct->icu_sts))
- {
- return 0;
- }
- return icu_chain_token_norm(prt->pct->icu_chn);
- }
-#endif
return 0;
}
static const char *pp2_get_sort_icu(pp2_relevance_token_t prt)
{
return 0;
}
static const char *pp2_get_sort_icu(pp2_relevance_token_t prt)
{
-#if ICU_ITER
return icu_iter_get_sortkey(prt->iter);
return icu_iter_get_sortkey(prt->iter);
-#else
- return icu_chain_token_sortkey(prt->pct->icu_chn);
-#endif
}
#endif // YAZ_HAVE_ICU
}
#endif // YAZ_HAVE_ICU