X-Git-Url: http://jsfdemo.indexdata.com/?a=blobdiff_plain;f=dfa%2Fdfa.c;h=f55b69abb036372b5a2431cf7992a3ce41e79f80;hb=a6908b6d93de84a850a3e558a2e4c17f02c3d651;hp=5504f41de4082674efa23bfcf72bb885003cab67;hpb=239ec8f8cc9194097025c4b00d2ac0b0514dcf49;p=idzebra-moved-to-github.git diff --git a/dfa/dfa.c b/dfa/dfa.c index 5504f41..f55b69a 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -1,10 +1,23 @@ /* - * Copyright (C) 1994-1996, Index Data I/S + * Copyright (C) 1994-1997, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.13 1996-06-17 14:24:08 adam + * Revision 1.17 1997-09-18 08:59:17 adam + * Extra generic handle for the character mapping routines. + * + * Revision 1.16 1997/09/05 15:29:57 adam + * Changed prototype for chr_map_input - added const. + * Added support for C++, headers uses extern "C" for public definitions. + * + * Revision 1.15 1997/02/10 10:19:20 adam + * Added facility for open character sets, eg [a-]. + * + * Revision 1.14 1996/10/29 13:57:22 adam + * Include of zebrautl.h instead of alexutil.h. + * + * Revision 1.13 1996/06/17 14:24:08 adam * Bug fix: read_charset didn't handle character mapping. * * Revision 1.12 1996/06/04 10:20:02 adam @@ -54,10 +67,12 @@ #include #include -#include +#include #include "dfap.h" #include "imalloc.h" +#define DFA_OPEN_RANGE 1 + #define CAT 16000 #define OR 16001 #define STAR 16002 @@ -410,10 +425,11 @@ static int read_charset (void) break; if (parse_info->cmap) { - char **mapto, mapfrom[2]; + const char **mapto; + char mapfrom[2]; const char *mcp = mapfrom; mapfrom[0] = ch0; - mapto = (*parse_info->cmap)(&mcp, 1); + mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1); assert (mapto); ch0 = mapto[0][0]; } @@ -421,25 +437,38 @@ static int read_charset (void) ch1 = nextchar_set (&esc1); if (!esc1 && ch1 == '-') { + int open_range = 0; if ((ch1 = nextchar_set (&esc1)) == 0) break; +#if DFA_OPEN_RANGE + if (!esc1 && ch1 == ']') + { + ch1 = 255; + open_range = 1; + } +#else if (!esc1 && ch1 == ']') { add_BSet (parse_info->charset, look_chars, '-'); break; } - if (parse_info->cmap) +#endif + if (!open_range && parse_info->cmap) { - char **mapto, mapfrom[2]; + const char **mapto; + char mapfrom[2]; const char *mcp = mapfrom; mapfrom[0] = ch1; - mapto = (*parse_info->cmap) (&mcp, 1); + mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1); assert (mapto); ch1 = mapto[0][0]; } for (i=ch0; ++i<=ch1;) add_BSet (parse_info->charset, look_chars, i); - ch0 = nextchar_set (&esc0); + if (!open_range) + ch0 = nextchar_set (&esc0); + else + break; } else { @@ -454,8 +483,8 @@ static int read_charset (void) static int map_l_char (void) { - char **mapto; - const char *cp0 = expr_ptr-1; + const char **mapto; + const char *cp0 = (const char *) (expr_ptr-1); int i = 0, len = strlen(cp0); if (cp0[0] == 1 && cp0[1]) @@ -467,10 +496,10 @@ static int map_l_char (void) if (!parse_info->cmap) return L_CHAR; - mapto = (*parse_info->cmap) (&cp0, len); + mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len); assert (mapto); - expr_ptr = cp0; + expr_ptr = (const unsigned char *) cp0; look_ch = mapto[i][0]; logf (LOG_DEBUG, "map from %c to %d", expr_ptr[-1], look_ch); return L_CHAR; @@ -1081,9 +1110,11 @@ struct DFA *dfa_init (void) return dfa; } -void dfa_set_cmap (struct DFA *dfa, char **(*cmap)(const char **from, int len)) +void dfa_set_cmap (struct DFA *dfa, void *vp, + const char **(*cmap)(void *vp, const char **from, int len)) { dfa->parse_info->cmap = cmap; + dfa->parse_info->cmap_data = vp; } int dfa_parse (struct DFA *dfa, const char **pattern)