src/ccltoken.c

   1 /* This file is part of the YAZ toolkit.
   2  * Copyright (C) 1995-2013 Index Data
   3  * See the file LICENSE for details.
   4  */
   5 /**
   6  * \file ccltoken.c
   7  * \brief Implements CCL lexical analyzer (scanner)
   8  */
   9 #if HAVE_CONFIG_H
  10 #include <config.h>
  11 #endif
  12
  13 #include <string.h>
  14 #include <stdlib.h>
  15 #include <yaz/yaz-iconv.h>
  16 #include "cclp.h"
  17
  18 /*
  19  * token_cmp: Compare token with keyword(s)
  20  * kw:     Keyword list. Each keyword is separated by space.
  21  * token:  CCL token.
  22  * return: 1 if token string matches one of the keywords in list;
  23  *         0 otherwise.
  24  */
  25 static int token_cmp(CCL_parser cclp, const char **kw, struct ccl_token *token)
  26 {
  27     const char **aliases;
  28     int case_sensitive = cclp->ccl_case_sensitive;
  29     int i;
  30
  31     aliases = ccl_qual_search_special(cclp->bibset, "case");
  32     if (aliases)
  33         case_sensitive = atoi(aliases[0]);
  34
  35     for (i = 0; kw[i]; i++)
  36     {
  37         if (token->len == strlen(kw[i]))
  38         {
  39             if (case_sensitive)
  40             {
  41                 if (!memcmp(kw[i], token->name, token->len))
  42                     return 1;
  43             }
  44             else
  45             {
  46                 if (!ccl_memicmp(kw[i], token->name, token->len))
  47                     return 1;
  48             }
  49         }
  50     }
  51     return 0;
  52 }
  53
  54 /*
  55  * ccl_tokenize: tokenize CCL command string.
  56  * return: CCL token list.
  57  */
  58 struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command)
  59 {
  60     const char **aliases;
  61     const unsigned char *cp = (const unsigned char *) command;
  62     struct ccl_token *first = NULL;
  63     struct ccl_token *last = NULL;
  64     cclp->start_pos = command;
  65
  66     while (1)
  67     {
  68         const unsigned char *cp0 = cp;
  69         while (*cp && strchr(" \t\r\n", *cp))
  70             cp++;
  71         if (!first)
  72         {
  73             first = last = (struct ccl_token *)xmalloc(sizeof(*first));
  74             ccl_assert(first);
  75             last->prev = NULL;
  76         }
  77         else
  78         {
  79             last->next = (struct ccl_token *)xmalloc(sizeof(*first));
  80             ccl_assert(last->next);
  81             last->next->prev = last;
  82             last = last->next;
  83         }
  84         last->ws_prefix_buf = (const char *) cp0;
  85         last->ws_prefix_len = cp - cp0;
  86         last->next = NULL;
  87         last->name = (const char *) cp;
  88         last->len = 1;
  89         switch (*cp++)
  90         {
  91         case '\0':
  92             last->kind = CCL_TOK_EOL;
  93             return first;
  94         case '(':
  95             last->kind = CCL_TOK_LP;
  96             break;
  97         case ')':
  98             last->kind = CCL_TOK_RP;
  99             break;
 100         case ',':
 101             last->kind = CCL_TOK_COMMA;
 102             break;
 103         case '%':
 104         case '!':
 105             last->kind = CCL_TOK_PROX;
 106             while (yaz_isdigit(*cp))
 107             {
 108                 ++ last->len;
 109                 cp++;
 110             }
 111             break;
 112         case '>':
 113         case '<':
 114         case '=':
 115             if (*cp == '=' || *cp == '<' || *cp == '>')
 116             {
 117                 cp++;
 118                 last->kind = CCL_TOK_REL;
 119                 ++ last->len;
 120             }
 121             else if (cp[-1] == '=')
 122                 last->kind = CCL_TOK_EQ;
 123             else
 124                 last->kind = CCL_TOK_REL;
 125             break;
 126         default:
 127             --cp;
 128             --last->len;
 129
 130             last->kind = CCL_TOK_TERM;
 131             last->name = (const char *) cp;
 132             while (*cp && !strchr("(),%!><= \t\n\r", *cp))
 133             {
 134                 if (*cp == '\\' && cp[1])
 135                 {
 136                     cp++;
 137                     ++ last->len;
 138                 }
 139                 else if (*cp == '"')
 140                 {
 141                     while (*cp)
 142                     {
 143                         cp++;
 144                         ++ last->len;
 145                         if (*cp == '\\' && cp[1])
 146                         {
 147                             cp++;
 148                             ++ last->len;
 149                         }
 150                         else if (*cp == '"')
 151                             break;
 152                     }
 153                 }
 154                 if (!*cp)
 155                     break;
 156                 cp++;
 157                 ++ last->len;
 158             }
 159             aliases = ccl_qual_search_special(cclp->bibset, "and");
 160             if (!aliases)
 161                 aliases = cclp->ccl_token_and;
 162             if (token_cmp(cclp, aliases, last))
 163                 last->kind = CCL_TOK_AND;
 164
 165             aliases = ccl_qual_search_special(cclp->bibset, "or");
 166             if (!aliases)
 167                 aliases = cclp->ccl_token_or;
 168             if (token_cmp(cclp, aliases, last))
 169                 last->kind = CCL_TOK_OR;
 170
 171             aliases = ccl_qual_search_special(cclp->bibset, "not");
 172             if (!aliases)
 173                 aliases = cclp->ccl_token_not;
 174             if (token_cmp(cclp, aliases, last))
 175                 last->kind = CCL_TOK_NOT;
 176
 177             aliases = ccl_qual_search_special(cclp->bibset, "set");
 178             if (!aliases)
 179                 aliases = cclp->ccl_token_set;
 180
 181             if (token_cmp(cclp, aliases, last))
 182                 last->kind = CCL_TOK_SET;
 183         }
 184     }
 185     return first;
 186 }
 187
 188 struct ccl_token *ccl_token_add(struct ccl_token *at)
 189 {
 190     struct ccl_token *n = (struct ccl_token *)xmalloc(sizeof(*n));
 191     ccl_assert(n);
 192     n->next = at->next;
 193     n->prev = at;
 194     at->next = n;
 195     if (n->next)
 196         n->next->prev = n;
 197
 198     n->kind = CCL_TOK_TERM;
 199     n->name = 0;
 200     n->len = 0;
 201     n->ws_prefix_buf = 0;
 202     n->ws_prefix_len = 0;
 203     return n;
 204 }
 205
 206 /*
 207  * ccl_token_del: delete CCL tokens
 208  */
 209 void ccl_token_del(struct ccl_token *list)
 210 {
 211     struct ccl_token *list1;
 212
 213     while (list)
 214     {
 215         list1 = list->next;
 216         xfree(list);
 217         list = list1;
 218     }
 219 }
 220
 221 static const char **create_ar(const char *v1, const char *v2)
 222 {
 223     const char **a = (const char **) xmalloc(3 * sizeof(*a));
 224     a[0] = xstrdup(v1);
 225     if (v2)
 226     {
 227         a[1] = xstrdup(v2);
 228         a[2] = 0;
 229     }
 230     else
 231         a[1] = 0;
 232     return a;
 233 }
 234
 235 static void destroy_ar(const char **a)
 236 {
 237     if (a)
 238     {
 239         int i;
 240         for (i = 0; a[i]; i++)
 241             xfree((char *) a[i]);
 242         xfree((char **)a);
 243     }
 244 }
 245
 246 CCL_parser ccl_parser_create(CCL_bibset bibset)
 247 {
 248     CCL_parser p = (CCL_parser)xmalloc(sizeof(*p));
 249     if (!p)
 250         return p;
 251     p->look_token = NULL;
 252     p->error_code = 0;
 253     p->error_pos = NULL;
 254     p->bibset = bibset;
 255
 256     p->ccl_token_and = create_ar("and", 0);
 257     p->ccl_token_or = create_ar("or", 0);
 258     p->ccl_token_not = create_ar("not", "andnot");
 259     p->ccl_token_set = create_ar("set", 0);
 260     p->ccl_case_sensitive = 1;
 261
 262     return p;
 263 }
 264
 265 void ccl_parser_destroy(CCL_parser p)
 266 {
 267     if (!p)
 268         return;
 269     destroy_ar(p->ccl_token_and);
 270     destroy_ar(p->ccl_token_or);
 271     destroy_ar(p->ccl_token_not);
 272     destroy_ar(p->ccl_token_set);
 273     xfree(p);
 274 }
 275
 276 void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag)
 277 {
 278     if (p)
 279         p->ccl_case_sensitive = case_sensitivity_flag;
 280 }
 281
 282 int ccl_parser_get_error(CCL_parser cclp, int *pos)
 283 {
 284     if (pos && cclp->error_code)
 285         *pos = cclp->error_pos - cclp->start_pos;
 286     return cclp->error_code;
 287 }
 288
 289 /*
 290  * Local variables:
 291  * c-basic-offset: 4
 292  * c-file-style: "Stroustrup"
 293  * indent-tabs-mode: nil
 294  * End:
 295  * vim: shiftwidth=4 tabstop=8 expandtab
 296  */
 297