1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Simple tokenizer system.
16 #include <yaz/wrbuf.h>
17 #include <yaz/tokenizer.h>
19 struct yaz_tok_parse {
25 yaz_tok_get_byte_t get_byte_func;
34 char *quote_tokens_begin;
35 char *quote_tokens_end;
38 void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
40 xfree(t->single_tokens);
41 t->single_tokens = xstrdup(simple);
44 yaz_tok_cfg_t yaz_tok_cfg_create(void)
46 yaz_tok_cfg_t t = (yaz_tok_cfg_t) xmalloc(sizeof(*t));
47 t->white_space = xstrdup(" \t\r\n");
48 t->single_tokens = xstrdup("");
49 t->quote_tokens_begin = xstrdup("\"");
50 t->quote_tokens_end = xstrdup("\"");
51 t->comment = xstrdup("#");
56 void yaz_tok_cfg_destroy(yaz_tok_cfg_t t)
59 if (t->ref_count == 0)
61 xfree(t->white_space);
62 xfree(t->single_tokens);
63 xfree(t->quote_tokens_begin);
64 xfree(t->quote_tokens_end);
70 static int read_buf(void **vp)
72 const char *cp = *(const char **) vp;
77 *(const char **)vp = cp;
82 yaz_tok_parse_t yaz_tok_parse_buf(yaz_tok_cfg_t t, const char *buf)
84 return yaz_tok_parse_create(t, read_buf, (void *) buf);
87 static int get_byte(yaz_tok_parse_t tp)
89 int ch = tp->unget_byte;
90 assert(tp->get_byte_func);
94 ch = tp->get_byte_func(&tp->get_byte_data);
98 static void unget_byte(yaz_tok_parse_t tp, int ch)
103 yaz_tok_parse_t yaz_tok_parse_create(yaz_tok_cfg_t t,
104 yaz_tok_get_byte_t h,
107 yaz_tok_parse_t tp = (yaz_tok_parse_t) xmalloc(sizeof(*tp));
110 tp->cfg->ref_count++;
111 tp->get_byte_func = h;
112 tp->get_byte_data = vp;
114 tp->look = YAZ_TOK_ERROR;
117 tp->wr_string = wrbuf_alloc();
122 void yaz_tok_parse_destroy(yaz_tok_parse_t tp)
124 yaz_tok_cfg_destroy(tp->cfg);
125 wrbuf_destroy(tp->wr_string);
129 int yaz_tok_move(yaz_tok_parse_t tp)
131 yaz_tok_cfg_t t = tp->cfg;
133 int ch = get_byte(tp);
135 /* skip white space */
136 while (ch && strchr(t->white_space, ch))
140 else if (strchr(t->comment, ch))
142 else if ((cp = strchr(t->single_tokens, ch)))
143 ch = *cp; /* single token match */
144 else if ((cp = strchr(t->quote_tokens_begin, ch)))
145 { /* quoted string */
146 int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
148 wrbuf_rewind(tp->wr_string);
149 while (ch && ch != end_ch)
150 wrbuf_putc(tp->wr_string, ch);
154 ch = YAZ_TOK_QSTRING;
157 { /* unquoted string */
158 wrbuf_rewind(tp->wr_string);
159 while (ch && !strchr(t->white_space, ch)
160 && !strchr(t->single_tokens, ch)
161 && !strchr(t->comment, ch))
163 wrbuf_putc(tp->wr_string, ch);
173 const char *yaz_tok_parse_string(yaz_tok_parse_t tp)
175 return wrbuf_cstr(tp->wr_string);
181 * indent-tabs-mode: nil
183 * vim: shiftwidth=4 tabstop=8 expandtab