1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2011 Index Data
3 * See the file LICENSE for details.
7 * \brief Simple tokenizer system.
17 #include <yaz/wrbuf.h>
18 #include <yaz/tokenizer.h>
20 struct yaz_tok_parse {
26 yaz_tok_get_byte_t get_byte_func;
35 char *quote_tokens_begin;
36 char *quote_tokens_end;
39 void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
41 xfree(t->single_tokens);
42 t->single_tokens = xstrdup(simple);
45 yaz_tok_cfg_t yaz_tok_cfg_create(void)
47 yaz_tok_cfg_t t = (yaz_tok_cfg_t) xmalloc(sizeof(*t));
48 t->white_space = xstrdup(" \t\r\n");
49 t->single_tokens = xstrdup("");
50 t->quote_tokens_begin = xstrdup("\"");
51 t->quote_tokens_end = xstrdup("\"");
52 t->comment = xstrdup("#");
57 void yaz_tok_cfg_destroy(yaz_tok_cfg_t t)
60 if (t->ref_count == 0)
62 xfree(t->white_space);
63 xfree(t->single_tokens);
64 xfree(t->quote_tokens_begin);
65 xfree(t->quote_tokens_end);
71 static int read_buf(void **vp)
73 const char *cp = *(const char **) vp;
78 *(const char **)vp = cp;
83 yaz_tok_parse_t yaz_tok_parse_buf(yaz_tok_cfg_t t, const char *buf)
85 return yaz_tok_parse_create(t, read_buf, (void *) buf);
88 static int get_byte(yaz_tok_parse_t tp)
90 int ch = tp->unget_byte;
91 assert(tp->get_byte_func);
95 ch = tp->get_byte_func(&tp->get_byte_data);
99 static void unget_byte(yaz_tok_parse_t tp, int ch)
104 yaz_tok_parse_t yaz_tok_parse_create(yaz_tok_cfg_t t,
105 yaz_tok_get_byte_t h,
108 yaz_tok_parse_t tp = (yaz_tok_parse_t) xmalloc(sizeof(*tp));
111 tp->cfg->ref_count++;
112 tp->get_byte_func = h;
113 tp->get_byte_data = vp;
115 tp->look = YAZ_TOK_ERROR;
118 tp->wr_string = wrbuf_alloc();
123 void yaz_tok_parse_destroy(yaz_tok_parse_t tp)
125 yaz_tok_cfg_destroy(tp->cfg);
126 wrbuf_destroy(tp->wr_string);
130 int yaz_tok_move(yaz_tok_parse_t tp)
132 yaz_tok_cfg_t t = tp->cfg;
134 int ch = get_byte(tp);
136 /* skip white space */
137 while (ch && strchr(t->white_space, ch))
141 else if (strchr(t->comment, ch))
143 else if ((cp = strchr(t->single_tokens, ch)))
144 ch = *cp; /* single token match */
145 else if ((cp = strchr(t->quote_tokens_begin, ch)))
146 { /* quoted string */
147 int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
149 wrbuf_rewind(tp->wr_string);
150 while (ch && ch != end_ch)
151 wrbuf_putc(tp->wr_string, ch);
155 ch = YAZ_TOK_QSTRING;
158 { /* unquoted string */
159 wrbuf_rewind(tp->wr_string);
160 while (ch && !strchr(t->white_space, ch)
161 && !strchr(t->single_tokens, ch)
162 && !strchr(t->comment, ch))
164 wrbuf_putc(tp->wr_string, ch);
174 const char *yaz_tok_parse_string(yaz_tok_parse_t tp)
176 return wrbuf_cstr(tp->wr_string);
182 * c-file-style: "Stroustrup"
183 * indent-tabs-mode: nil
185 * vim: shiftwidth=4 tabstop=8 expandtab