617 lines
15 KiB
Plaintext
617 lines
15 KiB
Plaintext
/*
|
|
* egrep -- print lines containing (or not containing) a regular expression
|
|
*
|
|
* status returns:
|
|
* 0 - ok, and some matches
|
|
* 1 - ok, but no matches
|
|
* 2 - some error
|
|
*/
|
|
%token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
|
|
%left OR
|
|
%left CHAR DOT CCL NCCL '('
|
|
%left CAT
|
|
%left STAR PLUS QUEST
|
|
|
|
%{
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
|
|
#define BLKSIZE 8192
|
|
#define MAXLIN 350
|
|
#define MAXPOS 4000
|
|
#define NCHARS 128
|
|
#define NSTATES 128
|
|
#define FINAL -1
|
|
char gotofn[NSTATES][NCHARS];
|
|
int state[NSTATES];
|
|
char out[NSTATES];
|
|
int line = 1;
|
|
int name[MAXLIN];
|
|
int left[MAXLIN];
|
|
int right[MAXLIN];
|
|
int parent[MAXLIN];
|
|
int foll[MAXLIN];
|
|
int positions[MAXPOS];
|
|
char chars[MAXLIN];
|
|
int nxtpos;
|
|
int nxtchar = 0;
|
|
int tmpstat[MAXLIN];
|
|
int initstat[MAXLIN];
|
|
int xstate;
|
|
int count;
|
|
int icount;
|
|
char *input;
|
|
FILE *exprfile;
|
|
|
|
long lnum;
|
|
int bflag;
|
|
int cflag;
|
|
int fflag;
|
|
int lflag;
|
|
int nflag;
|
|
int hflag = 1;
|
|
int sflag;
|
|
int vflag;
|
|
int retcode = 0;
|
|
int nfile;
|
|
long blkno;
|
|
long tln;
|
|
int nsucc;
|
|
|
|
int f;
|
|
char *fname;
|
|
%}
|
|
|
|
%%
|
|
s: t
|
|
={ unary(FINAL, $1);
|
|
line--;
|
|
}
|
|
;
|
|
t: b r
|
|
={ $$ = node(CAT, $1, $2); }
|
|
| OR b r OR
|
|
={ $$ = node(CAT, $2, $3); }
|
|
| OR b r
|
|
={ $$ = node(CAT, $2, $3); }
|
|
| b r OR
|
|
={ $$ = node(CAT, $1, $2); }
|
|
;
|
|
b:
|
|
={ $$ = enter(DOT);
|
|
$$ = unary(STAR, $$); }
|
|
;
|
|
r: CHAR
|
|
={ $$ = enter($1); }
|
|
| DOT
|
|
={ $$ = enter(DOT); }
|
|
| CCL
|
|
={ $$ = cclenter(CCL); }
|
|
| NCCL
|
|
={ $$ = cclenter(NCCL); }
|
|
;
|
|
|
|
r: r OR r
|
|
={ $$ = node(OR, $1, $3); }
|
|
| r r %prec CAT
|
|
={ $$ = node(CAT, $1, $2); }
|
|
| r STAR
|
|
={ $$ = unary(STAR, $1); }
|
|
| r PLUS
|
|
={ $$ = unary(PLUS, $1); }
|
|
| r QUEST
|
|
={ $$ = unary(QUEST, $1); }
|
|
| '(' r ')'
|
|
={ $$ = $2; }
|
|
| error
|
|
;
|
|
|
|
%%
|
|
yyerror(s) {
|
|
fprintf(stderr, "egrep: %s\n", s);
|
|
exit(2);
|
|
}
|
|
|
|
yylex() {
|
|
extern int yylval;
|
|
int cclcnt, x;
|
|
register char c, d;
|
|
switch(c = nextch()) {
|
|
case '$':
|
|
case '^': c = '\n';
|
|
goto defchar;
|
|
case '|': return (OR);
|
|
case '*': return (STAR);
|
|
case '+': return (PLUS);
|
|
case '?': return (QUEST);
|
|
case '(': return (c);
|
|
case ')': return (c);
|
|
case '.': return (DOT);
|
|
case '\0': return (0);
|
|
case '\n': return (OR);
|
|
case '[':
|
|
x = CCL;
|
|
cclcnt = 0;
|
|
count = nxtchar++;
|
|
if ((c = nextch()) == '^') {
|
|
x = NCCL;
|
|
c = nextch();
|
|
}
|
|
do {
|
|
if (c == '\0') synerror();
|
|
if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
|
|
if ((d = nextch()) != 0) {
|
|
c = chars[nxtchar-1];
|
|
while (c < d) {
|
|
if (nxtchar >= MAXLIN) overflo();
|
|
chars[nxtchar++] = ++c;
|
|
cclcnt++;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
if (nxtchar >= MAXLIN) overflo();
|
|
chars[nxtchar++] = c;
|
|
cclcnt++;
|
|
} while ((c = nextch()) != ']');
|
|
chars[count] = cclcnt;
|
|
return (x);
|
|
case '\\':
|
|
if ((c = nextch()) == '\0') synerror();
|
|
defchar:
|
|
default: yylval = c; return (CHAR);
|
|
}
|
|
}
|
|
nextch() {
|
|
register char c;
|
|
if (fflag) {
|
|
if ((c = getc(exprfile)) == EOF) {
|
|
fclose(exprfile);
|
|
return(0);
|
|
}
|
|
}
|
|
else c = *input++;
|
|
return(c);
|
|
}
|
|
|
|
synerror() {
|
|
fprintf(stderr, "egrep: syntax error\n");
|
|
exit(2);
|
|
}
|
|
|
|
enter(x) int x; {
|
|
if(line >= MAXLIN) overflo();
|
|
name[line] = x;
|
|
left[line] = 0;
|
|
right[line] = 0;
|
|
return(line++);
|
|
}
|
|
|
|
cclenter(x) int x; {
|
|
register linno;
|
|
linno = enter(x);
|
|
right[linno] = count;
|
|
return (linno);
|
|
}
|
|
|
|
node(x, l, r) {
|
|
if(line >= MAXLIN) overflo();
|
|
name[line] = x;
|
|
left[line] = l;
|
|
right[line] = r;
|
|
parent[l] = line;
|
|
parent[r] = line;
|
|
return(line++);
|
|
}
|
|
|
|
unary(x, d) {
|
|
if(line >= MAXLIN) overflo();
|
|
name[line] = x;
|
|
left[line] = d;
|
|
right[line] = 0;
|
|
parent[d] = line;
|
|
return(line++);
|
|
}
|
|
overflo() {
|
|
fprintf(stderr, "egrep: regular expression too long\n");
|
|
exit(2);
|
|
}
|
|
|
|
cfoll(v) {
|
|
register i;
|
|
if (left[v] == 0) {
|
|
count = 0;
|
|
for (i=1; i<=line; i++) tmpstat[i] = 0;
|
|
follow(v);
|
|
add(foll, v);
|
|
}
|
|
else if (right[v] == 0) cfoll(left[v]);
|
|
else {
|
|
cfoll(left[v]);
|
|
cfoll(right[v]);
|
|
}
|
|
}
|
|
cgotofn() {
|
|
register c, i, k;
|
|
int n, s;
|
|
char symbol[NCHARS];
|
|
int j, nc, pc, pos;
|
|
int curpos, num;
|
|
int number, newpos;
|
|
count = 0;
|
|
for (n=3; n<=line; n++) tmpstat[n] = 0;
|
|
if (cstate(line-1)==0) {
|
|
tmpstat[line] = 1;
|
|
count++;
|
|
out[0] = 1;
|
|
}
|
|
for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
|
|
count--; /*leave out position 1 */
|
|
icount = count;
|
|
tmpstat[1] = 0;
|
|
add(state, 0);
|
|
n = 0;
|
|
for (s=0; s<=n; s++) {
|
|
if (out[s] == 1) continue;
|
|
for (i=0; i<NCHARS; i++) symbol[i] = 0;
|
|
num = positions[state[s]];
|
|
count = icount;
|
|
for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
|
|
pos = state[s] + 1;
|
|
for (i=0; i<num; i++) {
|
|
curpos = positions[pos];
|
|
if ((c = name[curpos]) >= 0) {
|
|
if (c < NCHARS) symbol[c] = 1;
|
|
else if (c == DOT) {
|
|
for (k=0; k<NCHARS; k++)
|
|
if (k!='\n') symbol[k] = 1;
|
|
}
|
|
else if (c == CCL) {
|
|
nc = chars[right[curpos]];
|
|
pc = right[curpos] + 1;
|
|
for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
|
|
}
|
|
else if (c == NCCL) {
|
|
nc = chars[right[curpos]];
|
|
for (j = 0; j < NCHARS; j++) {
|
|
pc = right[curpos] + 1;
|
|
for (k = 0; k < nc; k++)
|
|
if (j==chars[pc++]) goto cont;
|
|
if (j!='\n') symbol[j] = 1;
|
|
cont:;
|
|
}
|
|
}
|
|
else printf("something's funny\n");
|
|
}
|
|
pos++;
|
|
}
|
|
for (c=0; c<NCHARS; c++) {
|
|
if (symbol[c] == 1) { /* nextstate(s,c) */
|
|
count = icount;
|
|
for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
|
|
pos = state[s] + 1;
|
|
for (i=0; i<num; i++) {
|
|
curpos = positions[pos];
|
|
if ((k = name[curpos]) >= 0)
|
|
if (
|
|
(k == c)
|
|
| (k == DOT)
|
|
| (k == CCL && member(c, right[curpos], 1))
|
|
| (k == NCCL && member(c, right[curpos], 0))
|
|
) {
|
|
number = positions[foll[curpos]];
|
|
newpos = foll[curpos] + 1;
|
|
for (k=0; k<number; k++) {
|
|
if (tmpstat[positions[newpos]] != 1) {
|
|
tmpstat[positions[newpos]] = 1;
|
|
count++;
|
|
}
|
|
newpos++;
|
|
}
|
|
}
|
|
pos++;
|
|
} /* end nextstate */
|
|
if (notin(n)) {
|
|
if (n >= NSTATES) overflo();
|
|
add(state, ++n);
|
|
if (tmpstat[line] == 1) out[n] = 1;
|
|
gotofn[s][c] = n;
|
|
}
|
|
else {
|
|
gotofn[s][c] = xstate;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
cstate(v) {
|
|
register b;
|
|
if (left[v] == 0) {
|
|
if (tmpstat[v] != 1) {
|
|
tmpstat[v] = 1;
|
|
count++;
|
|
}
|
|
return(1);
|
|
}
|
|
else if (right[v] == 0) {
|
|
if (cstate(left[v]) == 0) return (0);
|
|
else if (name[v] == PLUS) return (1);
|
|
else return (0);
|
|
}
|
|
else if (name[v] == CAT) {
|
|
if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
|
|
else return (1);
|
|
}
|
|
else { /* name[v] == OR */
|
|
b = cstate(right[v]);
|
|
if (cstate(left[v]) == 0 || b == 0) return (0);
|
|
else return (1);
|
|
}
|
|
}
|
|
|
|
|
|
member(symb, set, torf) {
|
|
register i, num, pos;
|
|
num = chars[set];
|
|
pos = set + 1;
|
|
for (i=0; i<num; i++)
|
|
if (symb == chars[pos++]) return (torf);
|
|
return (!torf);
|
|
}
|
|
|
|
notin(n) {
|
|
register i, j, pos;
|
|
for (i=0; i<=n; i++) {
|
|
if (positions[state[i]] == count) {
|
|
pos = state[i] + 1;
|
|
for (j=0; j < count; j++)
|
|
if (tmpstat[positions[pos++]] != 1) goto nxt;
|
|
xstate = i;
|
|
return (0);
|
|
}
|
|
nxt: ;
|
|
}
|
|
return (1);
|
|
}
|
|
|
|
add(array, n) int *array; {
|
|
register i;
|
|
if (nxtpos + count > MAXPOS) overflo();
|
|
array[n] = nxtpos;
|
|
positions[nxtpos++] = count;
|
|
for (i=3; i <= line; i++) {
|
|
if (tmpstat[i] == 1) {
|
|
positions[nxtpos++] = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
follow(v) int v; {
|
|
int p;
|
|
if (v == line) return;
|
|
p = parent[v];
|
|
switch(name[p]) {
|
|
case STAR:
|
|
case PLUS: cstate(v);
|
|
follow(p);
|
|
return;
|
|
|
|
case OR:
|
|
case QUEST: follow(p);
|
|
return;
|
|
|
|
case CAT: if (v == left[p]) {
|
|
if (cstate(right[p]) == 0) {
|
|
follow(p);
|
|
return;
|
|
}
|
|
}
|
|
else follow(p);
|
|
return;
|
|
case FINAL: if (tmpstat[line] != 1) {
|
|
tmpstat[line] = 1;
|
|
count++;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
main(argc, argv)
|
|
char **argv;
|
|
{
|
|
while (--argc > 0 && (++argv)[0][0]=='-')
|
|
switch (argv[0][1]) {
|
|
|
|
case 's':
|
|
sflag++;
|
|
continue;
|
|
|
|
case 'h':
|
|
hflag = 0;
|
|
continue;
|
|
|
|
case 'b':
|
|
bflag++;
|
|
continue;
|
|
|
|
case 'c':
|
|
cflag++;
|
|
continue;
|
|
|
|
case 'e':
|
|
argc--;
|
|
argv++;
|
|
goto out;
|
|
|
|
case 'f':
|
|
fflag++;
|
|
continue;
|
|
|
|
case 'l':
|
|
lflag++;
|
|
continue;
|
|
|
|
case 'n':
|
|
nflag++;
|
|
continue;
|
|
|
|
case 'v':
|
|
vflag++;
|
|
continue;
|
|
|
|
default:
|
|
fprintf(stderr, "egrep: unknown flag\n");
|
|
continue;
|
|
}
|
|
out:
|
|
if (argc<=0)
|
|
exit(2);
|
|
if (fflag) {
|
|
fname = *argv;
|
|
exprfile = fopen(fname, "r");
|
|
if (exprfile == (FILE *)NULL) {
|
|
fprintf(stderr, "egrep: can't open %s\n", fname);
|
|
exit(2);
|
|
}
|
|
}
|
|
else input = *argv;
|
|
argc--;
|
|
argv++;
|
|
|
|
yyparse();
|
|
|
|
cfoll(line-1);
|
|
cgotofn();
|
|
nfile = argc;
|
|
if (argc<=0) {
|
|
if (lflag) exit(1);
|
|
execute(0);
|
|
}
|
|
else while (--argc >= 0) {
|
|
execute(*argv);
|
|
argv++;
|
|
}
|
|
exit(retcode != 0 ? retcode : nsucc == 0);
|
|
}
|
|
|
|
execute(file)
|
|
char *file;
|
|
{
|
|
register char *p;
|
|
register cstat;
|
|
register ccount;
|
|
static char *buf;
|
|
static int blksize;
|
|
struct stat stb;
|
|
char *nlp;
|
|
int istat;
|
|
if (file) {
|
|
if ((f = open(file, 0)) < 0) {
|
|
fprintf(stderr, "egrep: can't open %s\n", file);
|
|
retcode = 2;
|
|
return;
|
|
}
|
|
}
|
|
else f = 0;
|
|
if (buf == NULL) {
|
|
if (fstat(f, &stb) > 0 && stb.st_blksize > 0)
|
|
blksize = stb.st_blksize;
|
|
else
|
|
blksize = BLKSIZE;
|
|
buf = (char *)malloc(2*blksize);
|
|
if (buf == NULL) {
|
|
fprintf(stderr, "egrep: no memory for %s\n", file);
|
|
retcode = 2;
|
|
return;
|
|
}
|
|
}
|
|
ccount = 0;
|
|
lnum = 1;
|
|
tln = 0;
|
|
blkno = 0;
|
|
p = buf;
|
|
nlp = p;
|
|
if ((ccount = read(f,p,blksize))<=0) goto done;
|
|
istat = cstat = gotofn[0]['\n'];
|
|
if (out[cstat]) goto found;
|
|
for (;;) {
|
|
cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
|
|
if (out[cstat]) {
|
|
found: for(;;) {
|
|
if (*p++ == '\n') {
|
|
if (vflag == 0) {
|
|
succeed: nsucc = 1;
|
|
if (cflag) tln++;
|
|
else if (sflag)
|
|
; /* ugh */
|
|
else if (lflag) {
|
|
printf("%s\n", file);
|
|
close(f);
|
|
return;
|
|
}
|
|
else {
|
|
if (nfile > 1 && hflag) printf("%s:", file);
|
|
if (bflag) printf("%ld:", blkno);
|
|
if (nflag) printf("%ld:", lnum);
|
|
if (p <= nlp) {
|
|
while (nlp < &buf[2*blksize]) putchar(*nlp++);
|
|
nlp = buf;
|
|
}
|
|
while (nlp < p) putchar(*nlp++);
|
|
}
|
|
}
|
|
lnum++;
|
|
nlp = p;
|
|
if ((out[(cstat=istat)]) == 0) goto brk2;
|
|
}
|
|
cfound:
|
|
if (--ccount <= 0) {
|
|
if (p <= &buf[blksize]) {
|
|
if ((ccount = read(f, p, blksize)) <= 0) goto done;
|
|
}
|
|
else if (p == &buf[2*blksize]) {
|
|
p = buf;
|
|
if ((ccount = read(f, p, blksize)) <= 0) goto done;
|
|
}
|
|
else {
|
|
if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done;
|
|
}
|
|
blkno += ccount / 512;
|
|
}
|
|
}
|
|
}
|
|
if (*p++ == '\n') {
|
|
if (vflag) goto succeed;
|
|
else {
|
|
lnum++;
|
|
nlp = p;
|
|
if (out[(cstat=istat)]) goto cfound;
|
|
}
|
|
}
|
|
brk2:
|
|
if (--ccount <= 0) {
|
|
if (p <= &buf[blksize]) {
|
|
if ((ccount = read(f, p, blksize)) <= 0) break;
|
|
}
|
|
else if (p == &buf[2*blksize]) {
|
|
p = buf;
|
|
if ((ccount = read(f, p, blksize)) <= 0) break;
|
|
}
|
|
else {
|
|
if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break;
|
|
}
|
|
blkno += ccount / 512;
|
|
}
|
|
}
|
|
done: close(f);
|
|
if (cflag) {
|
|
if (nfile > 1)
|
|
printf("%s:", file);
|
|
printf("%ld\n", tln);
|
|
}
|
|
}
|