src/ccltoken.c

   1 /* This file is part of the YAZ toolkit.
   2  * Copyright (C) 1995-2010 Index Data
   3  * See the file LICENSE for details.
   4  */
   5 /**
   6  * \file ccltoken.c
   7  * \brief Implements CCL lexical analyzer (scanner)
   8  */
   9
  10 #include <string.h>
  11 #include <stdlib.h>
  12 #include <ctype.h>
  13
  14 #include "cclp.h"
  15
  16 /*
  17  * token_cmp: Compare token with keyword(s)
  18  * kw:     Keyword list. Each keyword is separated by space.
  19  * token:  CCL token.
  20  * return: 1 if token string matches one of the keywords in list;
  21  *         0 otherwise.
  22  */
  23 static int token_cmp(CCL_parser cclp, const char **kw, struct ccl_token *token)
  24 {
  25     const char **aliases;
  26     int case_sensitive = cclp->ccl_case_sensitive;
  27     int i;
  28
  29     aliases = ccl_qual_search_special(cclp->bibset, "case");
  30     if (aliases)
  31         case_sensitive = atoi(aliases[0]);
  32
  33     for (i = 0; kw[i]; i++)
  34     {
  35         if (token->len == strlen(kw[i]))
  36         {
  37             if (case_sensitive)
  38             {
  39                 if (!memcmp(kw[i], token->name, token->len))
  40                     return 1;
  41             }
  42             else
  43             {
  44                 if (!ccl_memicmp(kw[i], token->name, token->len))
  45                     return 1;
  46             }
  47         }
  48     }
  49     return 0;
  50 }
  51
  52 /*
  53  * ccl_tokenize: tokenize CCL command string.
  54  * return: CCL token list.
  55  */
  56 struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command)
  57 {
  58     const char **aliases;
  59     const unsigned char *cp = (const unsigned char *) command;
  60     struct ccl_token *first = NULL;
  61     struct ccl_token *last = NULL;
  62     cclp->start_pos = command;
  63
  64     while (1)
  65     {
  66         const unsigned char *cp0 = cp;
  67         while (*cp && strchr(" \t\r\n", *cp))
  68             cp++;
  69         if (!first)
  70         {
  71             first = last = (struct ccl_token *)xmalloc(sizeof(*first));
  72             ccl_assert(first);
  73             last->prev = NULL;
  74         }
  75         else
  76         {
  77             last->next = (struct ccl_token *)xmalloc(sizeof(*first));
  78             ccl_assert(last->next);
  79             last->next->prev = last;
  80             last = last->next;
  81         }
  82         last->ws_prefix_buf = (const char *) cp0;
  83         last->ws_prefix_len = cp - cp0;
  84         last->next = NULL;
  85         last->name = (const char *) cp;
  86         last->len = 1;
  87         switch (*cp++)
  88         {
  89         case '\0':
  90             last->kind = CCL_TOK_EOL;
  91             return first;
  92         case '(':
  93             last->kind = CCL_TOK_LP;
  94             break;
  95         case ')':
  96             last->kind = CCL_TOK_RP;
  97             break;
  98         case ',':
  99             last->kind = CCL_TOK_COMMA;
 100             break;
 101         case '%':
 102         case '!':
 103             last->kind = CCL_TOK_PROX;
 104             while (isdigit(*cp))
 105             {
 106                 ++ last->len;
 107                 cp++;
 108             }
 109             break;
 110         case '>':
 111         case '<':
 112         case '=':
 113             if (*cp == '=' || *cp == '<' || *cp == '>')
 114             {
 115                 cp++;
 116                 last->kind = CCL_TOK_REL;
 117                 ++ last->len;
 118             }
 119             else if (cp[-1] == '=')
 120                 last->kind = CCL_TOK_EQ;
 121             else
 122                 last->kind = CCL_TOK_REL;
 123             break;
 124         case '\"':
 125             last->kind = CCL_TOK_TERM;
 126             last->name = (const char *) cp;
 127             last->len = 0;
 128             while (*cp && *cp != '\"')
 129             {
 130                 cp++;
 131                 ++ last->len;
 132             }
 133             if (*cp == '\"')
 134                 cp++;
 135             break;
 136         default:
 137             if (!strchr("(),%!><= \t\n\r", cp[-1]))
 138             {
 139                 while (*cp && !strchr("(),%!><= \t\n\r", *cp))
 140                 {
 141                     cp++;
 142                     ++ last->len;
 143                 }
 144             }
 145             last->kind = CCL_TOK_TERM;
 146
 147             aliases = ccl_qual_search_special(cclp->bibset, "and");
 148             if (!aliases)
 149                 aliases = cclp->ccl_token_and;
 150             if (token_cmp(cclp, aliases, last))
 151                 last->kind = CCL_TOK_AND;
 152
 153             aliases = ccl_qual_search_special(cclp->bibset, "or");
 154             if (!aliases)
 155                 aliases = cclp->ccl_token_or;
 156             if (token_cmp(cclp, aliases, last))
 157                 last->kind = CCL_TOK_OR;
 158
 159             aliases = ccl_qual_search_special(cclp->bibset, "not");
 160             if (!aliases)
 161                 aliases = cclp->ccl_token_not;
 162             if (token_cmp(cclp, aliases, last))
 163                 last->kind = CCL_TOK_NOT;
 164
 165             aliases = ccl_qual_search_special(cclp->bibset, "set");
 166             if (!aliases)
 167                 aliases = cclp->ccl_token_set;
 168
 169             if (token_cmp(cclp, aliases, last))
 170                 last->kind = CCL_TOK_SET;
 171         }
 172     }
 173     return first;
 174 }
 175
 176 struct ccl_token *ccl_token_add(struct ccl_token *at)
 177 {
 178     struct ccl_token *n = (struct ccl_token *)xmalloc(sizeof(*n));
 179     ccl_assert(n);
 180     n->next = at->next;
 181     n->prev = at;
 182     at->next = n;
 183     if (n->next)
 184         n->next->prev = n;
 185
 186     n->kind = CCL_TOK_TERM;
 187     n->name = 0;
 188     n->len = 0;
 189     n->ws_prefix_buf = 0;
 190     n->ws_prefix_len = 0;
 191     return n;
 192 }
 193
 194 /*
 195  * ccl_token_del: delete CCL tokens
 196  */
 197 void ccl_token_del(struct ccl_token *list)
 198 {
 199     struct ccl_token *list1;
 200
 201     while (list)
 202     {
 203         list1 = list->next;
 204         xfree(list);
 205         list = list1;
 206     }
 207 }
 208
 209 static const char **create_ar(const char *v1, const char *v2)
 210 {
 211     const char **a = (const char **) xmalloc(3 * sizeof(*a));
 212     a[0] = xstrdup(v1);
 213     if (v2)
 214     {
 215         a[1] = xstrdup(v2);
 216         a[2] = 0;
 217     }
 218     else
 219         a[1] = 0;
 220     return a;
 221 }
 222
 223 static void destroy_ar(const char **a)
 224 {
 225     if (a)
 226     {
 227         int i;
 228         for (i = 0; a[i]; i++)
 229             xfree((char *) a[i]);
 230         xfree((char **)a);
 231     }
 232 }
 233
 234 CCL_parser ccl_parser_create(CCL_bibset bibset)
 235 {
 236     CCL_parser p = (CCL_parser)xmalloc(sizeof(*p));
 237     if (!p)
 238         return p;
 239     p->look_token = NULL;
 240     p->error_code = 0;
 241     p->error_pos = NULL;
 242     p->bibset = bibset;
 243
 244     p->ccl_token_and = create_ar("and", 0);
 245     p->ccl_token_or = create_ar("or", 0);
 246     p->ccl_token_not = create_ar("not", "andnot");
 247     p->ccl_token_set = create_ar("set", 0);
 248     p->ccl_case_sensitive = 1;
 249
 250     return p;
 251 }
 252
 253 void ccl_parser_destroy(CCL_parser p)
 254 {
 255     if (!p)
 256         return;
 257     destroy_ar(p->ccl_token_and);
 258     destroy_ar(p->ccl_token_or);
 259     destroy_ar(p->ccl_token_not);
 260     destroy_ar(p->ccl_token_set);
 261     xfree(p);
 262 }
 263
 264 void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag)
 265 {
 266     if (p)
 267         p->ccl_case_sensitive = case_sensitivity_flag;
 268 }
 269
 270 int ccl_parser_get_error(CCL_parser cclp, int *pos)
 271 {
 272     if (pos && cclp->error_code)
 273         *pos = cclp->error_pos - cclp->start_pos;
 274     return cclp->error_code;
 275 }
 276
 277 /*
 278  * Local variables:
 279  * c-basic-offset: 4
 280  * c-file-style: "Stroustrup"
 281  * indent-tabs-mode: nil
 282  * End:
 283  * vim: shiftwidth=4 tabstop=8 expandtab
 284  */
 285