# Copyright (C) 1994, Index Data I/S
# All rights reserved.
# Sebastian Hammer, Adam Dickmeiss
-# $Id: Makefile,v 1.2 1994-09-26 16:30:55 adam Exp $
+# $Id: Makefile,v 1.3 1994-09-27 16:31:17 adam Exp $
SHELL=/bin/sh
INCLUDE=-I../include
TPROG1=agrep
TPROG2=lexer
-CFLAGS=-g -Wall -pedantic
+TPROG3=grepper
+CFLAGS=-g -Wall
DEFS=$(INCLUDE) -DYACC -DYYDEBUG=1 -DMEMDEBUG=1
LIB=../lib/dfa.a
PO = regexp.o imalloc.o states.o set.o bset.o
all: $(LIB)
+alll: $(LIB) $(TPROG1) $(TPROG2) $(TPROG3)
+
$(TPROG1): $(TPROG1).o $(LIB)
$(CC) $(CFLAGS) -o $(TPROG1) $(TPROG1).o $(LIB) ../lib/util.a
$(TPROG2): $(TPROG2).o readfile.o $(LIB)
- $(CC) $(CFLAGS) -o $(TPROG2) $(TPROG2).o readfile.o \
- $(LIB) ../lib/util.a
+ $(CC) $(CFLAGS) -o $(TPROG2) $(TPROG2).o readfile.o $(LIB) ../lib/util.a
+
+$(TPROG3): $(TPROG3).o $(LIB)
+ $(CC) $(CFLAGS) -o $(TPROG3) $(TPROG3).o $(LIB) ../lib/util.a
$(LIB): $(PO)
rm -f $(LIB)
mv y.tab.o $*.o
clean:
- rm -f *.[oa] $(TPROG1) $(TPROG2) core mon.out gmon.out errlist y.tab.c
+ rm -f *.[oa] $(TPROG1) $(TPROG2) $(TPROG3) core mon.out gmon.out errlist y.tab*
depend: depend2
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: agrep.c,v $
- * Revision 1.2 1994-09-26 16:30:56 adam
+ * Revision 1.3 1994-09-27 16:31:18 adam
+ * First version of grepper: grep with error correction.
+ *
+ * Revision 1.2 1994/09/26 16:30:56 adam
* Minor changes. imalloc uses xmalloc now.
*
* Revision 1.1 1994/09/26 10:16:52 adam
static char *prog;
-void error( const char *format, ... )
+void error (const char *format, ...)
{
va_list argptr;
- va_start( argptr, format );
- fprintf( stderr, "%s error: ", prog );
- (void) vfprintf( stderr, format, argptr );
- putc( '\n', stderr );
- exit( 1 );
+ va_start (argptr, format);
+ fprintf (stderr, "%s error: ", prog);
+ (void) vfprintf (stderr, format, argptr);
+ putc ('\n', stderr);
+ exit (1);
}
#ifdef YYDEBUG
static int show_lines = 0;
-int agrep_options( argc, argv )
+int agrep_options (argc, argv)
int argc;
char **argv;
{
- while( --argc > 0 )
- if( **++argv == '-' )
- while( *++*argv )
+ while (--argc > 0)
+ if (**++argv == '-')
+ while (*++*argv)
{
- switch( **argv )
+ switch (**argv)
{
case 'V':
- fprintf( stderr, "%s: %s %s\n", prog, __DATE__, __TIME__ );
+ fprintf (stderr, "%s: %s %s\n", prog, __DATE__, __TIME__);
continue;
case 'v':
dfa_verbose = 1;
continue;
#endif
case 'd':
- switch( *++*argv )
+ switch (*++*argv)
{
case 's':
debug_dfa_tran = 1;
}
continue;
default:
- fprintf( stderr, "%s: unknown option `-%s'\n", prog, *argv );
+ fprintf (stderr, "%s: unknown option `-%s'\n", prog, *argv);
return 1;
}
break;
static char *inf_ptr, *inf_flsh;
static int inf_eof, line_no;
-static int inf_flush( fd )
+static int inf_flush (fd)
int fd;
{
char *p;
unsigned b, r;
r = (unsigned) (inf_buf+INF_BUF_SIZE - inf_ptr); /* no of `wrap' bytes */
- if( r )
- memcpy( inf_buf, inf_ptr, r );
+ if (r)
+ memcpy (inf_buf, inf_ptr, r);
inf_ptr = p = inf_buf + r;
b = INF_BUF_SIZE - r;
do
- if( (r = read( fd, p, b ) ) == (unsigned) -1 )
+ if ((r = read (fd, p, b)) == (unsigned) -1)
return -1;
- else if( r )
+ else if (r)
p += r;
else
{
inf_eof = 1;
break;
}
- while( (b -= r) > 0 );
- while( p != inf_buf && *--p != '\n' )
+ while ((b -= r) > 0);
+ while (p != inf_buf && *--p != '\n')
;
- while( p != inf_buf && *--p != '\n' )
+ while (p != inf_buf && *--p != '\n')
;
inf_flsh = p+1;
return 0;
}
-static char *prline( p )
+static char *prline (p)
char *p;
{
char *p0;
--p;
- while( p != inf_buf && p[-1] != '\n' )
+ while (p != inf_buf && p[-1] != '\n')
--p;
p0 = p;
- while( *p++ != '\n' )
+ while (*p++ != '\n')
;
p[-1] = '\0';
- if( show_lines )
- printf( "%5d:\t%s\n", line_no, p0 );
+ if (show_lines)
+ printf ("%5d:\t%s\n", line_no, p0);
else
- puts( p0 );
+ puts (p0);
p[-1] = '\n';
return p;
}
-static int go( fd, dfaar )
+static int go (fd, dfaar)
int fd;
DFA_state **dfaar;
{
int i;
unsigned char c;
- while( 1 )
+ while (1)
{
- for( c = *inf_ptr++, t=s->trans, i=s->tran_no; --i >= 0; t++ )
- if( c >= t->ch[0] && c <= t->ch[1] )
+ for (c = *inf_ptr++, t=s->trans, i=s->tran_no; --i >= 0; t++)
+ if (c >= t->ch[0] && c <= t->ch[1])
{
p = inf_ptr;
do
{
- if( (s = dfaar[t->to] )->rule_no )
+ if ((s = dfaar[t->to])->rule_no)
{
- inf_ptr = prline( inf_ptr );
+ inf_ptr = prline (inf_ptr);
c = '\n';
break;
}
- for( t=s->trans, i=s->tran_no; --i >= 0; t++ )
- if( (unsigned) *p >= t->ch[0]
- && (unsigned) *p <= t->ch[1] )
+ for (t=s->trans, i=s->tran_no; --i >= 0; t++)
+ if ((unsigned) *p >= t->ch[0]
+ && (unsigned) *p <= t->ch[1])
break;
p++;
- } while( i >= 0 );
+ } while (i >= 0);
s = dfaar[0];
break;
}
- if( c == '\n' )
+ if (c == '\n')
{
++line_no;
- if( inf_ptr == inf_flsh )
+ if (inf_ptr == inf_flsh)
{
- if( inf_eof )
+ if (inf_eof)
break;
++line_no;
- if( inf_flush( fd ) )
+ if (inf_flush (fd))
{
- fprintf( stderr, "%s: read error\n", prog );
+ fprintf (stderr, "%s: read error\n", prog);
return -1;
}
}
return 0;
}
-int agrep( dfas, fd )
+int agrep (dfas, fd)
DFA_states *dfas;
int fd;
{
- inf_buf = imalloc( sizeof(char)*INF_BUF_SIZE );
+ inf_buf = imalloc (sizeof(char)*INF_BUF_SIZE);
inf_eof = 0;
inf_ptr = inf_buf+INF_BUF_SIZE;
- inf_flush( fd );
+ inf_flush (fd);
line_no = 1;
- go( fd, dfas->sortarray);
+ go (fd, dfas->sortarray);
- ifree( inf_buf );
+ ifree (inf_buf);
return 0;
}
-int main( argc, argv )
+int main (argc, argv)
int argc;
char **argv;
{
alexdebug = 0;
#endif
#endif
- setbuf( stdout, outbuf );
- i = agrep_options( argc, argv );
- if( i )
+ setbuf (stdout, outbuf);
+ i = agrep_options (argc, argv);
+ if (i)
return i;
- while( --argc > 0 )
- if( **++argv != '-' && **argv )
- if( !pattern )
+ while (--argc > 0)
+ if (**++argv != '-' && **argv)
+ if (!pattern)
{
pattern = *argv;
- i = parse_dfa( dfa, &pattern, dfa_thompson_chars );
- if( i || *pattern )
+ i = parse_dfa (dfa, &pattern, dfa_thompson_chars);
+ if (i || *pattern)
{
- fprintf( stderr, "%s: illegal pattern\n", prog );
+ fprintf (stderr, "%s: illegal pattern\n", prog);
return 1;
}
dfa->root = dfa->top;
- dfas = mk_dfas( dfa, 200 );
- rm_dfa( &dfa );
+ dfas = mk_dfas (dfa, 200);
+ rm_dfa (&dfa);
}
else
{
++no;
- fd = open( *argv, O_RDONLY | O_BINARY);
- if( fd == -1 )
+ fd = open (*argv, O_RDONLY | O_BINARY);
+ if (fd == -1)
{
- fprintf( stderr, "%s: couldn't open `%s'\n", prog, *argv );
+ fprintf (stderr, "%s: couldn't open `%s'\n", prog, *argv);
return 1;
}
- i = agrep( dfas, fd );
- close( fd );
- if( i )
+ i = agrep (dfas, fd);
+ close (fd);
+ if (i)
return i;
}
- if( !no )
+ if (!no)
{
- fprintf( stderr, "%s: no files specified\n", prog );
+ fprintf (stderr, "%s: no files specified\n", prog);
return 2;
}
fflush(stdout);
--- /dev/null
+/*
+ * Copyright (C) 1994, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: grepper.c,v $
+ * Revision 1.1 1994-09-27 16:31:18 adam
+ * First version of grepper: grep with error correction.
+ *
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include <util.h>
+#include <dfa.h>
+#include "imalloc.h"
+
+char *prog;
+static int show_line = 0;
+
+typedef unsigned MatchWord;
+#define WORD_BITS 32
+
+typedef struct {
+ int n; /* no of MatchWord needed */
+ int range; /* max no. of errors */
+ MatchWord *Sc; /* Mask Sc */
+} MatchContext;
+
+#define INFBUF_SIZE 16384
+
+static inline void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
+{
+ int off = state & (WORD_BITS-1);
+ int wno = state / WORD_BITS;
+
+ m[mc->n * ch + wno] |= 1<<off;
+}
+
+static inline void reset_bit (MatchContext *mc, MatchWord *m, int ch, int state)
+{
+ int off = state & (WORD_BITS-1);
+ int wno = state / WORD_BITS;
+
+ m[mc->n * ch + wno] &= ~(1<<off);
+}
+
+static inline MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch, int state)
+{
+ int off = state & (WORD_BITS-1);
+ int wno = state / WORD_BITS;
+
+ return m[mc->n * ch + wno] & (1<<off);
+}
+
+static MatchContext *mk_MatchContext (DFA_states *dfas, int range)
+{
+ MatchContext *mc = imalloc (sizeof(*mc));
+ int i;
+
+ mc->n = (dfas->no+WORD_BITS) / WORD_BITS;
+ mc->range = range;
+ mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
+
+ for (i=0; i<dfas->no; i++)
+ {
+ int j;
+ DFA_state *state = dfas->sortarray[i];
+
+ for (j=0; j<state->tran_no; j++)
+ {
+ int ch;
+ int ch0 = state->trans[j].ch[0];
+ int ch1 = state->trans[j].ch[1];
+ assert (ch0 >= 0 && ch1 >= 0);
+
+ for (ch = ch0; ch <= ch1; ch++)
+ set_bit (mc, mc->Sc, ch, i);
+ }
+ }
+ return mc;
+}
+
+static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
+ DFA_states *dfas, int ch)
+{
+ int i, s;
+ Rdst[0] = 1;
+ for (i = 1; i<mc->n; i++)
+ Rdst[i] = 0;
+ for (s = 0; s<dfas->no; s++)
+ if (get_bit (mc, Rsrc, 0, s))
+ {
+ DFA_state *state = dfas->sortarray[s];
+ int i = state->tran_no;
+ while (--i >= 0)
+ if (ch >= state->trans[i].ch[0] && ch <= state->trans[i].ch[1])
+ set_bit (mc, Rdst, 0, state->trans[i].to);
+ }
+}
+
+static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
+ DFA_states *dfas)
+{
+ int i, s;
+ for (i = 0; i<mc->n; i++)
+ Rdst[i] = 0;
+ for (s = 0; s<dfas->no; s++)
+ if (get_bit (mc, Rsrc, 0, s))
+ {
+ DFA_state *state = dfas->sortarray[s];
+ int i = state->tran_no;
+ while (--i >= 0)
+ set_bit (mc, Rdst, 0, state->trans[i].to);
+ }
+}
+
+static void or (MatchContext *mc, MatchWord *Rdst,
+ MatchWord *Rsrc1, MatchWord *Rsrc2)
+{
+ int i;
+ for (i = 0; i<mc->n; i++)
+ Rdst[i] = Rsrc1[i] | Rsrc2[i];
+}
+
+
+static int go (MatchContext *mc, DFA_states *dfas, FILE *inf)
+{
+ MatchWord *Rj, *Rj1, *Rj_a, *Rj_b;
+ int s, d, ch;
+ int lineno = 1;
+ char *infbuf;
+ int inf_ptr = 1;
+ int no_match = 0;
+
+ infbuf = imalloc (INFBUF_SIZE);
+ infbuf[0] = '\n';
+ Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
+ Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
+ Rj_a = icalloc (mc->n * sizeof(*Rj));
+ Rj_b = icalloc (mc->n * sizeof(*Rj));
+
+ set_bit (mc, Rj, 0, 0);
+ for (d = 1; d<=mc->range; d++)
+ {
+ int s;
+ memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
+ for (s = 0; s<dfas->no; s++)
+ {
+ if (get_bit (mc, Rj, d-1, s))
+ {
+ DFA_state *state = dfas->sortarray[s];
+ int i = state->tran_no;
+ while (--i >= 0)
+ set_bit (mc, Rj, d, state->trans[i].to);
+ }
+ }
+ }
+ while ((ch = getc (inf)) != EOF)
+ {
+ MatchWord *Rj_t;
+ if (ch == '\n')
+ {
+ if (no_match)
+ {
+ int i = inf_ptr;
+ if (show_line)
+ printf ("%5d:", lineno);
+ while (infbuf[i] != '\n')
+ {
+ if (--i < 0)
+ i = INFBUF_SIZE-1;
+ }
+ do
+ {
+ if (++i == INFBUF_SIZE)
+ i = 0;
+ putchar (infbuf[i]);
+ } while (infbuf[i] != '\n');
+ no_match = 0;
+ }
+ lineno++;
+ }
+ infbuf[inf_ptr++] = ch;
+ if (inf_ptr == INFBUF_SIZE)
+ inf_ptr = 0;
+ mask_shift (mc, Rj1, Rj, dfas, ch);
+ for (d = 1; d <= mc->range; d++)
+ {
+ mask_shift (mc, Rj_b, Rj+d*mc->n, dfas, ch);/* 1 */
+
+ shift (mc, Rj_a, Rj+(d-1)*mc->n, dfas); /* 2 */
+
+ or (mc, Rj_a, Rj_a, Rj_b); /* 1,2 */
+
+ shift (mc, Rj_b, Rj1+(d-1)*mc->n, dfas); /* 3 */
+
+ or (mc, Rj_a, Rj_a, Rj_b); /* 1,2,3 */
+
+ or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
+ }
+ for (s = 0; s<dfas->no; s++)
+ {
+ if (dfas->sortarray[s]->rule_no)
+ if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
+ no_match++;
+ }
+ for (d = 0; d <= mc->range; d++)
+ reset_bit (mc, Rj1+d*mc->n, 0, dfas->no);
+ Rj_t = Rj1;
+ Rj1 = Rj;
+ Rj = Rj_t;
+ }
+ ifree (Rj);
+ ifree (Rj1);
+ ifree (Rj_a);
+ ifree (Rj_b);
+ ifree (infbuf);
+ return 0;
+}
+
+static int grep_file (DFA_states *dfas, const char *fname, int range)
+{
+ FILE *inf;
+ MatchContext *mc;
+
+ if (fname)
+ {
+ inf = fopen (fname, "r");
+ if (!inf)
+ {
+ log (LOG_FATAL|LOG_ERRNO, "cannot open `%s'", fname);
+ exit (1);
+ }
+ }
+ else
+ inf = stdin;
+
+ mc = mk_MatchContext (dfas, range);
+
+ go (mc, dfas, inf);
+
+ if (fname)
+ fclose (inf);
+ return 0;
+}
+
+int main (int argc, char **argv)
+{
+ int ret;
+ int range = 0;
+ char *arg;
+ char *pattern = NULL;
+ DFA_states *dfas = NULL;
+ int no_files = 0;
+
+ prog = argv[0];
+ while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
+ {
+ if (ret == 0)
+ {
+ if (!pattern)
+ {
+ int i;
+ DFA *dfa = init_dfa();
+ pattern = arg;
+ i = parse_dfa (dfa, &pattern, dfa_thompson_chars);
+ if (i || *pattern)
+ {
+ fprintf (stderr, "%s: illegal pattern\n", prog);
+ return 1;
+ }
+ dfa->root = dfa->top;
+ dfas = mk_dfas (dfa, 200);
+ rm_dfa (&dfa);
+ }
+ else
+ {
+ no_files++;
+ grep_file (dfas, arg, range);
+ }
+ }
+ else if (ret == 'v')
+ {
+ log_init (atoi(arg), prog, NULL);
+ }
+ else if (ret == 's')
+ {
+ dfa_verbose = 1;
+ }
+ else if (ret == 'd')
+ {
+ debug_dfa_tran = 1;
+ debug_dfa_followpos = 1;
+ debug_dfa_trav = 1;
+ }
+ else if (ret == 'r')
+ {
+ range = atoi (arg);
+ }
+ else if (ret == 'n')
+ {
+ show_line = 1;
+ }
+ else
+ {
+ log (LOG_FATAL, "unknown option");
+ exit (1);
+ }
+ }
+ if (!pattern)
+ {
+ fprintf (stderr, "usage:\n "
+ " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
+ exit (1);
+ }
+ else if (no_files == 0)
+ {
+ grep_file (dfas, NULL, range);
+ }
+ return 0;
+}
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: imalloc.c,v $
- * Revision 1.2 1994-09-26 16:30:56 adam
+ * Revision 1.3 1994-09-27 16:31:19 adam
+ * First version of grepper: grep with error correction.
+ *
+ * Revision 1.2 1994/09/26 16:30:56 adam
* Minor changes. imalloc uses xmalloc now.
*
* Revision 1.1 1994/09/26 10:16:54 adam
++alloc_calls;
return (void *)p;
#else
- void p = (void) xcalloc( size, 1 );
+ void *p = (void) xcalloc( size, 1 );
if( !p )
log (LOG_FATAL, "Out of memory (icalloc)" );
return p;
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: lexer.c,v $
- * Revision 1.1 1994-09-26 10:16:55 adam
+ * Revision 1.2 1994-09-27 16:31:20 adam
+ * First version of grepper: grep with error correction.
+ *
+ * Revision 1.1 1994/09/26 10:16:55 adam
* First version of dfa module in alex. This version uses yacc to parse
* regular expressions. This should be hand-made instead.
*
static char *prog;
-void error( const char *format, ... )
+void error (const char *format, ...)
{
va_list argptr;
- va_start( argptr, format );
- fprintf( stderr, "%s error: ", prog );
- (void) vfprintf( stderr, format, argptr );
- putc( '\n', stderr );
- exit( 1 );
+ va_start (argptr, format);
+ fprintf (stderr, "%s error: ", prog);
+ (void) vfprintf (stderr, format, argptr);
+ putc ('\n', stderr);
+ exit (1);
}
#ifdef YACC
#endif
int ccluse = 0;
-static int alex_options (int argc, char **argv);
-
-static int alex_options (int argc, char **argv)
+static int lexer_options (int argc, char **argv)
{
- while( --argc > 0 )
- if( **++argv == '-' )
- while( *++*argv )
+ while (--argc > 0)
+ if (**++argv == '-')
+ while (*++*argv)
{
- switch( **argv )
+ switch (**argv)
{
-#ifdef __STDC__
case 'V':
- fprintf( stderr, "%s: %s %s\n", prog, __DATE__, __TIME__ );
+ fprintf (stderr, "%s: %s %s\n", prog, __DATE__, __TIME__);
continue;
-#endif
case 'v':
dfa_verbose = 1;
continue;
ccluse = 1;
continue;
case 'd':
- switch( *++*argv )
+ switch (*++*argv)
{
case 's':
debug_dfa_tran = 1;
}
continue;
default:
- fprintf( stderr, "%s: unknown option `-%s'\n", prog, *argv );
+ fprintf (stderr, "%s: unknown option `-%s'\n",
+ prog, *argv);
return 1;
}
break;
return 0;
}
-int main (int argc, char **argv )
+int main (int argc, char **argv)
{
int i, no = 0;
DFA *dfa;
#else
alexdebug = 0;
#endif
- i = alex_options( argc, argv );
- if( i )
+ i = lexer_options (argc, argv);
+ if (i)
return i;
- if( argc < 2 )
+ if (argc < 2)
{
- fprintf( stderr, "%s: usage %s -cVvt -d[stf]\n", prog, prog );
+ fprintf (stderr, "usage\n %s [-c] [-V] [-v] [-t] [-d[stf]] file\n",
+ prog);
return 1;
}
- else while( --argc > 0 )
- if( **++argv != '-' && **argv )
+ else while (--argc > 0)
+ if (**++argv != '-' && **argv)
{
++no;
- i = read_file( *argv, &dfa );
- if( i )
+ i = read_file (*argv, &dfa);
+ if (i)
return i;
- dfas = mk_dfas( dfa, 2000 );
- rm_dfa( &dfa );
- rm_dfas( &dfas );
+ dfas = mk_dfas (dfa, 2000);
+ rm_dfa (&dfa);
+ rm_dfas (&dfas);
}
#ifdef MEMDEBUG
imemstat();
#endif
- if( !no )
+ if (!no)
{
- fprintf( stderr, "%s: no files specified\n", prog );
+ fprintf (stderr, "%s: no files specified\n", prog);
return 2;
}
return 0;