1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2011 Index Data
3 * See the file LICENSE for details.
6 * \file ccl_stop_words.c
7 * \brief Removes stop words from terms in RPN tree
18 struct ccl_stop_info {
21 struct ccl_stop_info *next;
24 struct ccl_stop_words {
26 NMEM nmem; /* memory for removed items */
27 struct ccl_stop_info *removed_items;
30 static void append_removed_item(ccl_stop_words_t csw,
32 const char *t, size_t len)
34 struct ccl_stop_info *csi = (struct ccl_stop_info *)
35 nmem_malloc(csw->nmem, sizeof(*csi));
36 struct ccl_stop_info **csip = &csw->removed_items;
38 csi->qualname = nmem_strdup(csw->nmem, qname);
42 csi->term = (char *) nmem_malloc(csw->nmem, len+1);
43 memcpy(csi->term, t, len);
44 csi->term[len] = '\0';
48 csip = &(*csip)->next;
53 ccl_stop_words_t ccl_stop_words_create(void)
55 NMEM nmem = nmem_create();
56 ccl_stop_words_t csw = (ccl_stop_words_t) xmalloc(sizeof(*csw));
58 csw->removed_items = 0;
59 csw->blank_chars = xstrdup(" \r\n\t");
63 void ccl_stop_words_destroy(ccl_stop_words_t csw)
67 nmem_destroy(csw->nmem);
68 xfree(csw->blank_chars);
73 struct ccl_rpn_node *ccl_remove_stop_r(ccl_stop_words_t csw,
75 struct ccl_rpn_node *p)
77 struct ccl_rpn_node *left, *right;
84 left = ccl_remove_stop_r(csw, bibset, p->u.p[0]);
85 right = ccl_remove_stop_r(csw, bibset, p->u.p[1]);
88 /* we must delete our binary node and return child (if any) */
106 char *cp = p->u.t.term;
110 while (*cp && strchr(csw->blank_chars, *cp))
117 while (*cp && !strchr(csw->blank_chars, *cp))
121 size_t len = cp - cp0;
122 if (ccl_search_stop(bibset, p->u.t.qual,
125 append_removed_item(csw, p->u.t.qual,
127 while (*cp && strchr(csw->blank_chars, *cp))
129 memmove(cp0, cp, strlen(cp)+1);
138 /* chop right blanks .. and see if term it gets empty */
139 if (p->u.t.term && csw->removed_items)
141 char *cp = p->u.t.term + strlen(p->u.t.term);
144 if (cp == p->u.t.term)
146 /* term is empty / blank */
150 if (!strchr(csw->blank_chars, cp[-1]))
162 int ccl_stop_words_tree(ccl_stop_words_t csw,
163 CCL_bibset bibset, struct ccl_rpn_node **t)
165 struct ccl_rpn_node *r;
167 /* remove list items */
168 nmem_reset(csw->nmem);
169 csw->removed_items = 0;
171 r = ccl_remove_stop_r(csw, bibset, *t);
173 if (csw->removed_items)
178 int ccl_stop_words_info(ccl_stop_words_t csw, int idx,
179 const char **qualname, const char **term)
181 struct ccl_stop_info *csi = csw->removed_items;
183 while (csi && i < idx)
190 *qualname = csi->qualname;
200 * c-file-style: "Stroustrup"
201 * indent-tabs-mode: nil
203 * vim: shiftwidth=4 tabstop=8 expandtab