1 /* $Id: dfa.c,v 1.40 2007-01-15 15:10:15 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <idzebra/util.h>
43 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
44 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
45 /* a character in range [ch[0]..ch[1]] */
46 /* otherwise ch[0] represents */
47 /* accepting no (-acceptno) */
49 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
50 unsigned nullable : 1;
51 DFASet firstpos; /* first positions */
52 DFASet lastpos; /* last positions */
55 struct Tblock { /* Tnode bucket (block) */
56 struct Tblock *next; /* pointer to next bucket */
57 struct Tnode *tarray; /* array of nodes in bucket */
61 #define STATE_HASH 199
62 #define POSET_CHUNK 100
64 int debug_dfa_trav = 0;
65 int debug_dfa_tran = 0;
66 int debug_dfa_followpos = 0;
69 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
70 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
72 static void term_Tnode (struct DFA_parse *parse_info);
75 del_followpos (struct DFA_parse *parse_info),
76 init_pos (struct DFA_parse *parse_info),
77 del_pos (struct DFA_parse *parse_info),
78 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
79 add_follow (struct DFA_parse *parse_info, DFASet lastpos, DFASet firstpos),
80 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
81 init_followpos (struct DFA_parse *parse_info),
82 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
83 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
84 pr_followpos (struct DFA_parse *parse_info),
86 lex (struct DFA_parse *parse_info);
89 nextchar (struct DFA_parse *parse_info, int *esc),
90 read_charset (struct DFA_parse *parse_info);
93 *str_char (unsigned c);
110 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
111 *expr_2 (struct DFA_parse *parse_info),
112 *expr_3 (struct DFA_parse *parse_info),
113 *expr_4 (struct DFA_parse *parse_info);
115 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
117 struct Tnode *t1, *t2, *tn;
119 if (!(t1 = expr_2 (parse_info)))
121 while (parse_info->lookahead == L_ALT)
124 if (!(t2 = expr_2 (parse_info)))
127 tn = mk_Tnode (parse_info);
136 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
138 struct Tnode *t1, *t2, *tn;
140 if (!(t1 = expr_3 (parse_info)))
142 while (parse_info->lookahead == L_WILD ||
143 parse_info->lookahead == L_ANYZ ||
144 parse_info->lookahead == L_ANY ||
145 parse_info->lookahead == L_LP ||
146 parse_info->lookahead == L_CHAR ||
147 parse_info->lookahead == L_CHARS)
149 if (!(t2 = expr_3 (parse_info)))
152 tn = mk_Tnode (parse_info);
161 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
163 struct Tnode *t1, *tn;
165 if (!(t1 = expr_4 (parse_info)))
167 if (parse_info->lookahead == L_CLOS0)
170 tn = mk_Tnode (parse_info);
175 else if (parse_info->lookahead == L_CLOS1)
178 tn = mk_Tnode (parse_info);
183 else if (parse_info->lookahead == L_QUEST)
186 tn = mk_Tnode(parse_info);
189 tn->u.p[1] = mk_Tnode(parse_info);
190 tn->u.p[1]->pos = EPSILON;
196 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
200 switch (parse_info->lookahead)
204 if (!(t1 = expr_1 (parse_info)))
206 if (parse_info->lookahead == L_RP)
212 t1 = mk_Tnode(parse_info);
213 t1->pos = ++parse_info->position;
214 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
218 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
222 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
226 t1 = mk_Tnode(parse_info);
228 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
229 t1->u.p[1] = mk_Tnode(parse_info);
230 t1->u.p[1]->pos = EPSILON;
234 t1 = mk_Tnode(parse_info);
236 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
244 static void do_parse (struct DFA_parse *parse_info, const char **s,
247 int start_anchor_flag = 0;
248 struct Tnode *t1, *t2, *tn;
250 parse_info->err_code = 0;
251 parse_info->expr_ptr = * (const unsigned char **) s;
253 parse_info->inside_string = 0;
255 if (parse_info->lookahead == L_START)
257 start_anchor_flag = 1;
260 if (parse_info->lookahead == L_END)
262 t1 = mk_Tnode (parse_info);
263 t1->pos = ++parse_info->position;
264 t1->u.ch[1] = t1->u.ch[0] = '\n';
269 t1 = expr_1 (parse_info);
270 if (t1 && parse_info->lookahead == L_END)
272 t2 = mk_Tnode (parse_info);
273 t2->pos = ++parse_info->position;
274 t2->u.ch[1] = t2->u.ch[0] = '\n';
276 tn = mk_Tnode (parse_info);
285 if (t1 && parse_info->lookahead == 0)
287 t2 = mk_Tnode(parse_info);
288 t2->pos = ++parse_info->position;
289 t2->u.ch[0] = -(++parse_info->rule);
290 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
292 *tnp = mk_Tnode(parse_info);
299 if (!parse_info->err_code)
301 if (parse_info->lookahead == L_RP)
302 parse_info->err_code = DFA_ERR_RP;
303 else if (parse_info->lookahead == L_LP)
304 parse_info->err_code = DFA_ERR_LP;
306 parse_info->err_code = DFA_ERR_SYNTAX;
309 *s = (const char *) parse_info->expr_ptr;
312 static int nextchar (struct DFA_parse *parse_info, int *esc)
315 if (*parse_info->expr_ptr == '\0')
317 else if (*parse_info->expr_ptr != '\\')
318 return *parse_info->expr_ptr++;
320 switch (*++parse_info->expr_ptr)
327 ++parse_info->expr_ptr;
330 ++parse_info->expr_ptr;
333 ++parse_info->expr_ptr;
336 ++parse_info->expr_ptr;
339 ++parse_info->expr_ptr;
342 return *parse_info->expr_ptr++;
346 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
348 if (*parse_info->expr_ptr == ' ')
351 return *parse_info->expr_ptr++;
353 return nextchar (parse_info, esc);
356 static int read_charset (struct DFA_parse *parse_info)
358 int i, ch0, esc0, cc = 0;
359 parse_info->look_chars = mk_BSet (&parse_info->charset);
360 res_BSet (parse_info->charset, parse_info->look_chars);
362 ch0 = nextchar_set (parse_info, &esc0);
363 if (!esc0 && ch0 == '^')
366 ch0 = nextchar_set (parse_info, &esc0);
369 ch0 is last met character
375 if (!esc0 && ch0 == ']')
377 if (!esc0 && ch0 == '-')
382 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
388 ch0 = nextchar(parse_info, &esc0);
392 if (parse_info->cmap)
396 const char *mcp = mapfrom;
398 mapto = parse_info->cmap(parse_info->cmap_data, &mcp, 1);
403 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
404 ch1 = nextchar_set (parse_info, &esc1);
406 if (!esc1 && ch1 == '-')
409 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
411 if (!esc1 && ch1 == ']')
418 ch1 = nextchar(parse_info, &esc1);
420 else if (parse_info->cmap)
424 const char *mcp = mapfrom;
426 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
430 for (i = ch0; ++i <= ch1;)
431 add_BSet (parse_info->charset, parse_info->look_chars, i);
435 ch0 = nextchar_set (parse_info, &esc0);
444 com_BSet (parse_info->charset, parse_info->look_chars);
448 static int map_l_char (struct DFA_parse *parse_info)
451 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
452 int i = 0, len = strlen(cp0);
454 if (cp0[0] == 1 && cp0[1])
456 parse_info->expr_ptr++;
457 parse_info->look_ch = ((unsigned char *) cp0)[1];
460 if (!parse_info->cmap)
463 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
466 parse_info->expr_ptr = (const unsigned char *) cp0;
467 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
468 yaz_log (YLOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
472 static int lex_sub(struct DFA_parse *parse_info)
475 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
476 if (parse_info->look_ch == '\"')
479 return map_l_char (parse_info);
480 parse_info->inside_string = !parse_info->inside_string;
482 else if (esc || parse_info->inside_string)
483 return map_l_char (parse_info);
484 else if (parse_info->look_ch == '[')
485 return read_charset(parse_info);
489 for (cc = parse_info->charMap; *cc; cc += 2)
490 if (*cc == (int) (parse_info->look_ch))
493 --parse_info->expr_ptr;
496 return map_l_char (parse_info);
501 static void lex (struct DFA_parse *parse_info)
503 parse_info->lookahead = lex_sub (parse_info);
506 static const char *str_char (unsigned c)
510 if (c < 32 || c >= 127)
523 sprintf (s+1, "x%02x", c);
545 static void out_char (int c)
547 printf ("%s", str_char (c));
550 static void term_Tnode (struct DFA_parse *parse_info)
552 struct Tblock *t, *t1;
553 for (t = parse_info->start; (t1 = t);)
561 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
565 if (parse_info->use_Tnode == parse_info->max_Tnode)
567 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
568 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
571 if (parse_info->use_Tnode == 0)
572 parse_info->start = tnew;
574 parse_info->end->next = tnew;
575 parse_info->end = tnew;
577 parse_info->max_Tnode += TADD;
579 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
582 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
584 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
585 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
591 tn0->pos = ++parse_info->position;
592 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
594 tn0->u.ch[1] = parse_info->charset->size;
597 tn0->u.ch[1] = ch1-1;
598 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
600 tn1 = mk_Tnode(parse_info);
604 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
606 tn1->pos = ++(parse_info->position);
607 if ((ch1 = travi_BSet (parse_info->charset, charset,
610 tn1->u.ch[1] = parse_info->charset->size;
613 tn1->u.ch[1] = ch1-1;
620 static void del_followpos (struct DFA_parse *parse_info)
622 ifree (parse_info->followpos);
625 static void init_pos (struct DFA_parse *parse_info)
627 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
628 * (1+parse_info->position));
631 static void del_pos (struct DFA_parse *parse_info)
633 ifree (parse_info->posar);
636 static void add_follow (struct DFA_parse *parse_info,
637 DFASet lastpos, DFASet firstpos)
641 parse_info->followpos[lastpos->value] =
642 union_DFASet (parse_info->poset,
643 parse_info->followpos[lastpos->value], firstpos);
644 lastpos = lastpos->next;
648 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
650 struct Tnode **posar = parse_info->posar;
651 DFASetType poset = parse_info->poset;
656 dfa_trav (parse_info, n->u.p[0]);
657 dfa_trav (parse_info, n->u.p[1]);
658 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
659 n->firstpos = mk_DFASet (poset);
660 n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[0]->firstpos);
661 if (n->u.p[0]->nullable)
662 n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[1]->firstpos);
663 n->lastpos = mk_DFASet (poset);
664 n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[1]->lastpos);
665 if (n->u.p[1]->nullable)
666 n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[0]->lastpos);
668 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
670 n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
671 n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
672 n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
673 n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
678 dfa_trav (parse_info, n->u.p[0]);
679 dfa_trav (parse_info, n->u.p[1]);
680 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
682 n->firstpos = merge_DFASet (poset, n->u.p[0]->firstpos,
683 n->u.p[1]->firstpos);
684 n->lastpos = merge_DFASet (poset, n->u.p[0]->lastpos,
686 n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
687 n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
688 n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
689 n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
694 dfa_trav (parse_info, n->u.p[0]);
695 n->nullable = n->u.p[0]->nullable;
696 n->firstpos = n->u.p[0]->firstpos;
697 n->lastpos = n->u.p[0]->lastpos;
698 add_follow (parse_info, n->lastpos, n->firstpos);
703 dfa_trav (parse_info, n->u.p[0]);
705 n->firstpos = n->u.p[0]->firstpos;
706 n->lastpos = n->u.p[0]->lastpos;
707 add_follow (parse_info, n->lastpos, n->firstpos);
713 n->lastpos = mk_DFASet (poset);
714 n->firstpos = mk_DFASet (poset);
721 n->firstpos = mk_DFASet (poset);
722 n->firstpos = add_DFASet (poset, n->firstpos, n->pos);
723 n->lastpos = mk_DFASet (poset);
724 n->lastpos = add_DFASet (poset, n->lastpos, n->pos);
728 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
729 else if (n->u.ch[1] > n->u.ch[0])
732 out_char (n->u.ch[0]);
733 if (n->u.ch[1] > n->u.ch[0]+1)
735 out_char (n->u.ch[1]);
739 out_char (n->u.ch[0]);
744 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
745 printf (" firstpos :");
746 pr_DFASet (poset, n->firstpos);
747 printf (" lastpos :");
748 pr_DFASet (poset, n->lastpos);
752 static void init_followpos (struct DFA_parse *parse_info)
756 parse_info->followpos = fa =
757 (DFASet *) imalloc (sizeof(DFASet) * (1+parse_info->position));
758 for (i = parse_info->position+1; --i >= 0; fa++)
759 *fa = mk_DFASet (parse_info->poset);
762 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
769 struct DFA_state *dfa_from, *dfa_to;
770 struct Tnode **posar;
776 posar = parse_info->posar;
777 max_char = parse_info->charset->size;
778 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
780 tran_set = cp_DFASet (parse_info->poset, parse_info->root->firstpos);
781 i = add_DFA_state (dfas, &tran_set, &dfa_from);
783 dfa_from->rule_no = 0;
784 poset = parse_info->poset;
785 followpos = parse_info->followpos;
786 while ((dfa_from = get_DFA_state (dfas)))
790 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
791 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
792 *pos_i++ = tran_set->value;
797 c = posar[tran_set->value]->u.ch[1];
802 dfa_from->rule_no = -i;
803 dfa_from->rule_nno = -j;
805 for (char_1 = 0; char_1 <= max_char; char_1++)
808 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
809 if (posar[i]->u.ch[1] >= char_1
810 && (c=posar[i]->u.ch[0]) < char_0)
818 if (char_0 > max_char)
823 tran_set = mk_DFASet (poset);
824 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
826 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
827 char_1 = c - 1; /* forward chunk */
828 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
829 char_1 = c; /* backward chunk */
830 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
831 tran_set = union_DFASet (poset, tran_set, followpos[i]);
835 add_DFA_state (dfas, &tran_set, &dfa_to);
836 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
841 sort_DFA_states (dfas);
844 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
847 struct DFA_tran *tran;
848 int prev_no, i, c, no;
850 for (no=0; no < dfas->no; ++no)
852 s = dfas->sortarray[no];
853 assert (s->no == no);
854 printf ("DFA state %d", no);
856 printf ("#%d", s->rule_no);
858 printf (" #%d", s->rule_nno);
861 pr_DFASet (parse_info->poset, s->set);
863 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
865 if (prev_no != tran->to)
869 printf (" goto %d on [", tran->to);
872 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
873 printf ("%s", str_char(c));
881 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
885 printf ("%d/%d tree nodes used, %ld bytes each\n",
886 parse_info->use_Tnode, parse_info->max_Tnode, (long) sizeof (struct Tnode));
887 k = inf_BSetHandle (parse_info->charset, &i, &j);
888 printf ("%ld/%ld character sets, %ld bytes each\n",
889 i/k, j/k, (long) k*sizeof(BSetWord));
890 k = inf_DFASetType (parse_info->poset, &i, &j);
891 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
892 printf ("%d DFA states\n", dfas->no);
895 static void pr_followpos (struct DFA_parse *parse_info)
897 struct Tnode **posar = parse_info->posar;
899 printf ("\nfollowsets:\n");
900 for (i=1; i <= parse_info->position; i++)
903 pr_DFASet (parse_info->poset, parse_info->followpos[i]);
906 if (posar[i]->u.ch[0] < 0)
907 printf ("#%d", -posar[i]->u.ch[0]);
908 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
911 out_char (posar[i]->u.ch[0]);
912 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
914 out_char (posar[i]->u.ch[1]);
918 out_char (posar[i]->u.ch[0]);
924 void dfa_parse_cmap_clean (struct DFA *d)
926 struct DFA_parse *dfa = d->parse_info;
931 dfa->charMapSize = 7;
932 dfa->charMap = (int *)
933 imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
938 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
940 struct DFA_parse *dfa = d->parse_info;
945 for (cp = cmap; *cp; cp += 2)
947 size = cp - cmap + 1;
948 if (size > dfa->charMapSize)
951 ifree (dfa->charMap);
952 dfa->charMapSize = size;
953 dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));
955 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
958 void dfa_parse_cmap_del (struct DFA *d, int from)
960 struct DFA_parse *dfa = d->parse_info;
964 for (cc = dfa->charMap; *cc; cc += 2)
967 while ((cc[0] = cc[2]))
976 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
978 struct DFA_parse *dfa = d->parse_info;
983 for (cc = dfa->charMap; *cc; cc += 2)
989 indx = cc - dfa->charMap;
990 size = dfa->charMapSize;
993 int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));
994 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
995 ifree (dfa->charMap);
997 dfa->charMapSize = size+16;
999 dfa->charMap[indx] = from;
1000 dfa->charMap[indx+1] = to;
1001 dfa->charMap[indx+2] = 0;
1004 void dfa_parse_cmap_thompson (struct DFA *d)
1006 static int thompson_chars[] =
1022 dfa_parse_cmap_new (d, thompson_chars);
1025 static struct DFA_parse *dfa_parse_init (void)
1027 struct DFA_parse *parse_info =
1028 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1030 parse_info->charset = mk_BSetHandle (255, 20);
1031 parse_info->position = 0;
1032 parse_info->rule = 0;
1033 parse_info->root = NULL;
1035 /* initialize the anyset which by default does not include \n */
1036 parse_info->anyset = mk_BSet (&parse_info->charset);
1037 res_BSet (parse_info->charset, parse_info->anyset);
1038 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1039 com_BSet (parse_info->charset, parse_info->anyset);
1041 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1042 parse_info->start = parse_info->end = NULL;
1043 parse_info->charMap = NULL;
1044 parse_info->charMapSize = 0;
1045 parse_info->cmap = NULL;
1049 static void rm_dfa_parse (struct DFA_parse **dfap)
1051 struct DFA_parse *parse_info = *dfap;
1052 assert (parse_info);
1053 term_Tnode(parse_info);
1054 rm_BSetHandle (&parse_info->charset);
1055 ifree (parse_info->charMap);
1060 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1062 struct DFA_states *dfas;
1063 struct DFA_parse *parse_info = dfap;
1065 assert (poset_chunk > 10);
1068 parse_info->poset = mk_DFASetType (poset_chunk);
1069 init_pos(parse_info);
1070 init_followpos(parse_info);
1071 assert (parse_info->root);
1072 dfa_trav (parse_info, parse_info->root);
1074 if (debug_dfa_followpos)
1075 pr_followpos(parse_info);
1076 init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));
1077 mk_dfa_tran (parse_info, dfas);
1080 pr_tran (parse_info, dfas);
1083 pr_verbose (parse_info, dfas);
1084 del_pos(parse_info);
1085 del_followpos(parse_info);
1086 rm_DFASetType (parse_info->poset);
1090 struct DFA *dfa_init (void)
1094 dfa = (struct DFA *) imalloc (sizeof(*dfa));
1095 dfa->parse_info = dfa_parse_init ();
1096 dfa->state_info = NULL;
1098 dfa_parse_cmap_thompson (dfa);
1102 void dfa_anyset_includes_nl(struct DFA *dfa)
1104 add_BSet (dfa->parse_info->charset, dfa->parse_info->anyset, '\n');
1107 void dfa_set_cmap (struct DFA *dfa, void *vp,
1108 const char **(*cmap)(void *vp, const char **from, int len))
1110 dfa->parse_info->cmap = cmap;
1111 dfa->parse_info->cmap_data = vp;
1114 int dfa_get_last_rule (struct DFA *dfa)
1116 return dfa->parse_info->rule;
1119 int dfa_parse (struct DFA *dfa, const char **pattern)
1122 struct DFA_parse *parse_info;
1125 assert (dfa->parse_info);
1126 parse_info = dfa->parse_info;
1128 do_parse (parse_info, pattern, &top);
1129 if (parse_info->err_code)
1130 return parse_info->err_code;
1131 if ( !dfa->parse_info->root )
1132 dfa->parse_info->root = top;
1137 n = mk_Tnode (parse_info);
1139 n->u.p[0] = dfa->parse_info->root;
1141 dfa->parse_info->root = n;
1146 void dfa_mkstate (struct DFA *dfa)
1150 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1151 dfa->no_states = dfa->state_info->no;
1152 dfa->states = dfa->state_info->sortarray;
1153 rm_dfa_parse (&dfa->parse_info);
1156 void dfa_delete (struct DFA **dfap)
1160 if ((*dfap)->parse_info)
1161 rm_dfa_parse (&(*dfap)->parse_info);
1162 if ((*dfap)->state_info)
1163 rm_DFA_states (&(*dfap)->state_info);
1170 * indent-tabs-mode: nil
1172 * vim: shiftwidth=4 tabstop=8 expandtab