1 files changed, 924 insertions, 0 deletions
diff --git a/usr.bin/grep/egrep/egrep.c b/usr.bin/grep/egrep/egrep.c
new file mode 100644
index 0000000..885965c
--- /dev/null
+++ b/usr.bin/grep/egrep/egrep.c
@@ -0,0 +1,924 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char copyright[] =
+"@(#) Copyright (c) 1991, 1993\n\
+	The Regents of the University of California.  All rights reserved.\n";
+#endif /* not lint */
+
+#ifndef lint
+static char sccsid[] = "@(#)egrep.c	8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+/*
+     Hybrid Boyer/Moore/Gosper-assisted 'grep/egrep/fgrep' search, with delta0
+     table as in original paper (CACM, October, 1977).  No delta1 or delta2.
+     According to experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of
+     minimal practical value.  However, to improve for worst case input,
+     integrating the improved Galil strategies (Apostolico/Giancarlo, SIAM. J.
+     Comput., Feb. 1986) deserves consideration.
+
+     Method: 	extract longest metacharacter-free string from expression.
+		this is done using a side-effect from henry spencer's regcomp().
+		use boyer-moore to match such, then pass submatching lines
+		to either regexp() or standard 'egrep', depending on certain
+		criteria within execstrategy() below.  [this tradeoff is due
+		to the general slowness of the regexp() nondeterministic
+		machine on complex expressions, as well as the startup time
+		of standard 'egrep' on short files.]  alternatively, one may
+		change the vendor-supplied 'egrep' automaton to include
+		boyer-moore directly.  see accompanying writeup for discussion
+		of kanji expression treatment.
+
+		late addition:  apply trickbag for fast match of simple
+		alternations (sublinear, in common low-cardinality cases).
+		trap fgrep into this lair.
+
+		gnu additions:  -f, newline as |, \< and \> [in regexec()], more
+				comments.  inspire better dfa exec() strategy.
+				serious testing and help with special cases.
+
+     Algorithm amalgam summary:
+
+		dfa e?grep 		(aho/thompson)
+		ndfa regexp() 		(spencer/aho)
+		bmg			(boyer/moore/gosper)
+		"superimposed" bmg   	(jaw)
+		fgrep			(aho/corrasick)
+
+		sorry, but the knuth/morris/pratt machine, horspool's
+		"frequentist" code, and the rabin/karp matcher, however cute,
+		just don't cut it for this production.
+
+     James A. Woods				Copyright (c) 1986
+     NASA Ames Research Center
+*/
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <regexp.h>		/* must be henry spencer's version */
+#include <stdio.h>
+#include <ctype.h>
+#include "pathnames.h"
+
+#define	MIN(A, B)	((A) > (B) ? (B) : (A))
+
+#ifdef	SLOWSYS
+#define read	xread
+#endif
+
+#define BUFSIZE	8192		/* make higher for cray */
+#define PATSIZE 6000
+#define LARGE 	BUFSIZE + PATSIZE
+
+#define NALT	7		/* tied to scanf() size in alternate() */
+#define NMUSH	6		/* loosely relates to expected alt length */
+
+#define	FIRSTFEW	33 	/* Always do FIRSTFEW matches with regexec() */
+#define	PUNTPERCENT	10	/* After FIRSTFEW, if PUNTPERCENT of the input
+				 * was processed by regexp(), exec std egrep. */
+#define NL	'\n'
+#define	EOS	'\0'
+#define	NONASCII	0200	/* Bit mask for Kanji non-ascii chars */
+#define META	"\n^$.[]()?+*|\\"	/* egrep meta-characters */
+#define SS2	'\216'		/* EUC Katakana (or Chinese2) prefix */
+#define SS3	'\217'		/* EUC Kanji2 (or Chinese3) prefix */
+
+extern char *optarg;
+extern int optind;
+char *progname;
+
+int cflag, iflag, eflag, fflag, lflag, nflag;	/* SVID flags */
+int sflag, hflag;		/* v7, v8, bsd */
+
+int firstflag;			/* Stop at first match */
+int grepflag;			/* Called as "grep" */
+int fgrepflag;			/* Called as "fgrep" */
+int altflag;			/* Simple alternation in pattern */
+int boyonly;			/* No regexp needed -- all simple */
+int flushflag;
+int grepold, egrepold, fgrepold;
+
+int nalt;			/* Number of alternatives */
+int nsuccess;			/* 1 for match, 2 for error */
+int altmin;			/* Minimum length of all the alternate
+				 * strings */
+int firstfile;			/* argv index of first file argument */
+int patind;			/* argv index of pattern */
+long nmatch;			/* Number of matches in this file */
+long incount, counted;		/* Amount of input consumed */
+long rxcount;			/* Bytes of input processed by regexec() */
+int boyfound;			/* accumulated partial matches (tripped by
+				 * FIRSTFEW) */
+int prevmatch;			/* next three lines aid fast -n */
+long nline, prevnline;
+char *prevloc;
+
+regexp *rspencer;
+char *pattern;
+char *patboy;			/* Pattern for simple Boyer-Moore */
+char *patfile;			/* Filename containing pattern(s) */
+
+int delta0[256];		/* Boyer-Moore algorithm core */
+char cmap[256];			/* Usually 0-255, but if -i, maps upper to
+				 * lower case */
+char str[BUFSIZE + 2];
+int nleftover;
+char linetemp[BUFSIZE];
+char *altpat[NALT];		/* alternation component storage */
+int altlen[NALT];
+short altset[NMUSH + 1][256];
+char preamble[200];		/* match prefix (filename, line no.) */
+
+int fd;
+char *
+strchr(), *strrchr(), *strcpy(), *strncpy(), *strpbrk(), *malloc();
+char *
+grepxlat(), *fold(), *pfile(), *alternate(), *isolate();
+char *gotamatch(), *kanji(), *linesave(), *submatch();
+char **args;
+
+main(argc, argv)
+	int argc;
+	char *argv[];
+{
+	int c, oflag;
+	int errflag = 0;
+
+	args = argv;
+
+	if ((progname = strrchr(argv[0], '/')) != 0)
+		progname++;
+	else
+		progname = argv[0];
+	if (strcmp(progname, "grep") == 0)
+		grepflag++;
+	else if (strcmp(progname, "fgrep") == 0)
+		fgrepflag++;
+
+	oflag = 0;
+	while ((c = getopt(argc, argv, "bchie:f:lnosvwxy1")) != EOF) {
+		switch (c) {
+
+		case 'f':
+			fflag++;
+			patfile = optarg;
+			continue;
+		case 'b':
+		case 'v':
+			egrepold++;	/* boyer-moore of little help here */
+			continue;
+		case 'c':
+			cflag++;
+			continue;
+		case 'e':
+			eflag++;
+			pattern = optarg;
+			continue;
+		case 'h':
+			hflag++;
+			continue;
+		case 'o':
+			oflag++;
+			continue;
+		case '1':	/* Stop at very first match */
+			firstflag++;	/* spead freaks only */
+			continue;
+		case 'i':
+			iflag++;
+			continue;
+		case 'l':
+			lflag++;
+			continue;
+		case 'n':
+			nflag++;
+			continue;
+		case 's':
+			sflag++;
+			continue;
+		case 'w':
+		case 'y':
+			if (!grepflag)
+				errflag++;
+			grepold++;
+			continue;
+		case 'x':	/* needs more work, like -b above */
+			if (!fgrepflag)
+				errflag++;
+			fgrepold++;
+			continue;
+		case '?':
+			errflag++;
+		}
+	}
+	if (errflag || ((argc <= optind) && !fflag && !eflag)) {
+		if (grepflag)
+oops("usage: grep [-bchilnosvwy] [-e] pattern [file ...]");
+		else if (fgrepflag)
+oops("usage: fgrep [-bchilnosvx] {-f patfile | [-e] strings} [file ...]");
+		else		/* encourage SVID options, though we provide
+				 * others */
+oops("usage: egrep [-bchilnosv] {-f patfile | [-e] pattern} [file ...]");
+	}
+	if (fflag)
+		pattern = pfile(patfile);
+	else if (!eflag) {
+		patind = optind;
+		pattern = argv[optind++];
+	}
+
+	if (!oflag && (argc - optind) <= 1)	/* Filename invisible given < 2 files */
+		hflag++;
+	if (pattern[0] == EOS)
+		kernighan(argv);	/* same as it ever was */
+	/*
+	 * 'grep/egrep' merger -- "old" grep is called to handle: tagged
+	 * exprs \( \), word matches \< and \>, -w and -y options, char
+	 * classes with '-' at end (egrep bug?), and patterns beginning with
+	 * an asterisk (don't ask why). otherwise, characters meaningful to
+	 * 'egrep' but not to 'grep' are escaped; the entire expr is then
+	 * passed to 'egrep'. 
+	 */
+	if (grepflag && !grepold) {
+		if (strindex(pattern, "\\(") >= 0 ||
+		    strindex(pattern, "\\<") >= 0 ||
+		    strindex(pattern, "\\>") >= 0 ||
+		    strindex(pattern, "-]") >= 0 ||
+		    pattern[0] == '*')	/* grep bug */
+			grepold++;
+		else
+			pattern = grepxlat(pattern);
+	}
+	if (grepold || egrepold || fgrepold)
+		kernighan(argv);
+
+	if (iflag)
+		strcpy(pattern, fold(pattern));
+	/*
+	 * If the pattern is a plain string, just run boyer-moore. If it
+	 * consists of meta-free alternatives, run "superimposed" bmg.
+	 * Otherwise, find best string, and compile pattern for regexec(). 
+	 */
+	if (strpbrk(pattern, META) == NULL) {	/* do boyer-moore only */
+		boyonly++;
+		patboy = pattern;
+	} else {
+		if ((patboy = alternate(pattern)) != NULL)
+			boyonly++;
+		else {
+			if ((patboy = isolate(pattern)) == NULL)
+				kernighan(argv);	/* expr too involved */
+#ifndef NOKANJI
+			for (c = 0; pattern[c] != EOS; c++)
+				if (pattern[c] & NONASCII)	/* kanji + meta */
+					kernighan(argv);
+#endif
+			if ((rspencer = regcomp(pattern)) == NULL)
+				oops("regcomp failure");
+		}
+	}
+	gosper(patboy);		/* "pre-conditioning is wonderful"
+				 * -- v. strassen */
+
+	if ((firstfile = optind) >= argc) {
+		/* Grep standard input */
+		if (lflag)	/* We don't know its name! */
+			exit(1);
+		egsecute((char *) NULL);
+	} else {
+		while (optind < argc) {
+			egsecute(argv[optind]);
+			optind++;
+			if (firstflag && (nsuccess == 1))
+				break;
+		}
+	}
+	exit((nsuccess == 2) ? 2 : (nsuccess == 0));
+}
+
+char *
+pfile(pfname)			/* absorb expression from file */
+	char *pfname;
+{
+	int fd;
+	struct stat patstat;
+	static char *pat;
+
+	if ((fd = open(pfname, O_RDONLY, 0)) < 0)
+		oops("can't read pattern file");
+	if (fstat(fd, &patstat) != 0)
+		oops("can't stat pattern file");
+	if (patstat.st_size > PATSIZE) {
+		if (fgrepflag) {	/* defer to unix version */
+			fgrepold++;
+			return "dummy";
+		} else
+			oops("pattern file too big");
+	}
+	if ((pat = malloc((unsigned) patstat.st_size + 1)) == NULL)
+		oops("out of memory to read pattern file");
+	if (patstat.st_size != read(fd, pat, (int)patstat.st_size))
+		oops("error reading pattern file");
+	(void) close(fd);
+
+	pat[patstat.st_size] = EOS;
+	if (pat[patstat.st_size - 1] == NL)	/* NOP for egrep; helps grep */
+		pat[patstat.st_size - 1] = EOS;
+
+	if (nlcount(pat, &pat[patstat.st_size]) > NALT) {
+		if (fgrepflag)
+			fgrepold++;	/* "what's it all about, alfie?" */
+		else
+			egrepold++;
+	}
+	return (pat);
+}
+
+egsecute(file)
+	char *file;
+{
+	extern int errno;
+
+	if (file == NULL)
+		fd = 0;
+	else if ((fd = open(file, O_RDONLY, 0)) <= 0) {
+		fprintf(stderr,
+		    "%s: %s: %s\n", progname, file, strerror(errno));
+		nsuccess = 2;
+		return;
+	}
+	chimaera(file, patboy);
+
+	if (!boyonly && !flushflag && file != NULL)
+		flushmatches();
+	if (file != NULL)
+		close(fd);
+}
+
+chimaera(file, pat)		/* "reach out and boyer-moore search someone" */
+	char *file, *pat;	/* -- soon-to-be-popular bumper sticker */
+{
+	register char *k, *strend, *s;
+	register int j, count;
+	register int *deltazero = delta0;
+	int patlen = altmin;
+	char *t;
+
+	nleftover = boyfound = flushflag = 0;
+	nline = 1L;
+	prevmatch = 0;
+	nmatch = counted = rxcount = 0L;
+
+	while ((count = read(fd, str + nleftover, BUFSIZE - nleftover)) > 0) {
+
+		counted += count;
+		strend = linesave(str, count);
+
+		for (k = str + patlen - 1; k < strend;) {
+			/*
+			 * for a large class of patterns, upwards of 80% of
+			 * match time is spent on the next line.  we beat
+			 * existing microcode (vax 'matchc') this way. 
+			 */
+			while ((k += deltazero[*(unsigned char *) k]) < strend);
+			if (k < (str + LARGE))
+				break;
+			k -= LARGE;
+
+			if (altflag) {
+				/*
+				 * Parallel Boyer-Moore.  Check whether each
+				 * of the previous <altmin> chars COULD be
+				 * from one of the alternative strings. 
+				 */
+				s = k - 1;
+				j = altmin;
+				while (altset[--j][(unsigned char)
+					     cmap[*(unsigned char *) s--]]);
+				/*
+				 * quick test fails. in this life, compare
+				 * 'em all.  but, a "reverse trie" would
+				 * attenuate worst case (linear w/delta2?). 
+				 */
+				if (--j < 0) {
+					count = nalt - 1;
+					do {
+						s = k;
+						j = altlen[count];
+						t = altpat[count];
+
+						while
+							(cmap[*(unsigned char *) s--]
+							 == t[--j]);
+						if (j < 0)
+							break;
+					}
+					while (count--);
+				}
+			} else {
+				/* One string -- check it */
+				j = patlen - 1;
+				s = k - 1;
+				while (cmap[*(unsigned char *) s--] == pat[--j]);
+			}
+			/*
+			 * delta-less shortcut for literati. short shrift for
+			 * genetic engineers? 
+			 */
+			if (j >= 0) {
+				k++;	/* no match; restart next char */
+				continue;
+			}
+			k = submatch(file, pat, str, strend, k, count);
+			if (k == NULL)
+				return;
+		}
+		if (nflag) {
+			if (prevmatch)
+				nline = prevnline + nlcount(prevloc, k);
+			else
+				nline = nline + nlcount(str, k);
+			prevmatch = 0;
+		}
+		strncpy(str, linetemp, nleftover);
+	}
+	if (cflag) {
+		/* Bug from old grep: -c overrides -h.  We fix the bug. */
+		if (!hflag)
+			printf("%s:", file);
+		printf("%ld\n", nmatch);
+	}
+}
+
+char *
+linesave(str, count)		/* accumulate partial line at end of buffer */
+	char str[];
+	register int count;
+{
+	register int j;
+
+	count += nleftover;
+	if (count != BUFSIZE && fd != 0)
+		str[count++] = NL;	/* insurance for broken last line */
+	str[count] = EOS;
+	for (j = count - 1; str[j] != NL && j >= 0;)
+		j--;
+	/*
+	 * break up these lines: long line (> BUFSIZE), last line of file, or
+	 * short return from read(), as from tee(1) input 
+	 */
+	if (j < 0 && (count == (BUFSIZE - nleftover))) {
+		str[count++] = NL;
+		str[count] = EOS;
+		linetemp[0] = EOS;
+		nleftover = 0;
+		return (str + count);
+	} else {
+		nleftover = count - j - 1;
+		strncpy(linetemp, str + j + 1, nleftover);
+		return (str + j);
+	}
+}
+
+/*
+ * Process partial match. First check for mis-aligned Kanji, then match line
+ * against full compiled r.e. if statistics do not warrant handing off to
+ * standard egrep. 
+ */
+char *
+submatch(file, pat, str, strend, k, altindex)
+	char file[], pat[], str[];
+	register char *strend, *k;
+	int altindex;
+{
+	register char *s;
+	char *t, c;
+
+	t = k;
+	s = ((altflag) ? k - altlen[altindex] + 1 : k - altmin + 1);
+#ifndef NOKANJI
+	c = ((altflag) ? altpat[altindex][0] : pat[0]);
+	if (c & NONASCII)
+		if ((s = kanji(str, s, k)) == NULL)
+			return (++k);	/* reject false kanji */
+#endif
+	do;
+	while (*s != NL && --s >= str);
+	k = s + 1;		/* now at line start */
+
+	if (boyonly)
+		return (gotamatch(file, k));
+
+	incount = counted - (strend - k);
+	if (boyfound++ == FIRSTFEW)
+		execstrategy(file);
+
+	s = t;
+	do
+		rxcount++;
+	while (*s++ != NL);
+	*--s = EOS;
+	/*
+	 * "quick henry -- the flit" (after theodor geisel) 
+	 */
+	if (regexec(rspencer, ((iflag) ? fold(k) : k)) == 1) {
+		*s = NL;
+		if (gotamatch(file, k) == NULL)
+			return (NULL);
+	}
+	*s = NL;
+	return (s + 1);
+}
+
+#ifndef NOKANJI
+/*
+ * EUC code disambiguation -- scan backwards to first 7-bit code, while
+ * counting intervening 8-bit codes.  If odd, reject unaligned Kanji pattern. 
+ * SS2/3 checks are for intermixed Japanase Katakana or Kanji2. 
+ */
+char *
+kanji(str, s, k)
+	register char *str, *s, *k;
+{
+	register int j = 0;
+
+	for (s--; s >= str; s--) {
+		if (*s == SS2 || *s == SS3 || (*s & NONASCII) == 0)
+			break;
+		j++;
+	}
+#ifndef CHINESE
+	if (*s == SS2)
+		j -= 1;
+#endif  CHINESE
+	return ((j & 01) ? NULL : k);
+}
+#endif
+
+/*
+ * Compute "Boyer-Moore" delta table -- put skip distance in delta0[c] 
+ */
+gosper(pattern)
+	char *pattern;		/* ... HAKMEM lives ... */
+{
+	register int i, j;
+	unsigned char c;
+
+	/* Make one-string case look like simple alternatives case */
+	if (!altflag) {
+		nalt = 1;
+		altmin = altlen[0] = strlen(pattern);
+		altpat[0] = pattern;
+	}
+	/* For chars that aren't in any string, skip by string length. */
+	for (j = 0; j < 256; j++) {
+		delta0[j] = altmin;
+		cmap[j] = j;	/* Sneak in initialization of cmap */
+	}
+
+	/* For chars in a string, skip distance from char to end of string. */
+	/* (If char appears more than once, skip minimum distance.) */
+	for (i = 0; i < nalt; i++)
+		for (j = 0; j < altlen[i] - 1; j++) {
+			c = altpat[i][j];
+			delta0[c] = MIN(delta0[c], altlen[i] - j - 1);
+			if (iflag && islower((int) c))
+				delta0[toupper((int) c)] = delta0[c];
+		}
+
+	/* For last char of each string, fall out of search loop. */
+	for (i = 0; i < nalt; i++) {
+		c = altpat[i][altlen[i] - 1];
+		delta0[c] = LARGE;
+		if (iflag && islower((int) c))
+			delta0[toupper((int) c)] = LARGE;
+	}
+	if (iflag)
+		for (j = 'A'; j <= 'Z'; j++)
+			cmap[j] = tolower((int) j);
+}
+
+/*
+ * Print, count, or stop on full match. Result is either the location for
+ * continued search, or NULL to stop. 
+ */
+char *
+gotamatch(file, s)
+	register char *file, *s;
+{
+	char *savematch();
+	int squirrel = 0;	/* nonzero to squirrel away FIRSTFEW matches */
+
+	nmatch++;
+	nsuccess = 1;
+	if (!boyonly && boyfound <= FIRSTFEW && file != NULL)
+		squirrel = 1;
+
+	if (sflag)
+		return (NULL);	/* -s usurps all flags (unlike some versions) */
+	if (cflag) {		/* -c overrides -l, we guess */
+		do;
+		while (*s++ != NL);
+	} else if (lflag) {
+		puts(file);
+		return (NULL);
+	} else {
+		if (!hflag)
+			if (!squirrel)
+				printf("%s:", file);
+			else
+				(void)sprintf(preamble, "%s:", file);
+		if (nflag) {
+			if (prevmatch)
+				prevnline = prevnline + nlcount(prevloc, s);
+			else
+				prevnline = nline + nlcount(str, s);
+			prevmatch = 1;
+
+			if (!squirrel)
+				printf("%ld:", prevnline);
+			else
+				(void)sprintf(preamble + strlen(preamble),
+					"%ld:", prevnline);
+		}
+		if (!squirrel) {
+			do
+				putchar(*s);
+			while (*s++ != NL);
+		} else
+			s = savematch(s);
+
+		if (nflag)
+			prevloc = s - 1;
+	}
+	return ((firstflag && !cflag) ? NULL : s);
+}
+
+char *
+fold(line)
+	char *line;
+{
+	static char fline[BUFSIZE];
+	register char *s, *t = fline;
+
+	for (s = line; *s != EOS; s++)
+		*t++ = (isupper((int) *s) ? (char) tolower((int) *s) : *s);
+	*t = EOS;
+	return (fline);
+}
+
+strindex(s, t)			/* the easy way, as in K&P, p. 192 */
+	char *s, *t;
+{
+	int i, n;
+
+	n = strlen(t);
+	for (i = 0; s[i] != '\0'; i++)
+		if (strncmp(s + i, t, n) == 0)
+			return (i);
+	return (-1);
+}
+
+char *
+grepxlat(pattern)		/* grep pattern meta conversion */
+	char *pattern;
+{
+	register char *p, *s;
+	static char newpat[BUFSIZE];
+
+	for (s = newpat, p = pattern; *p != EOS;) {
+		if (*p == '\\') {	/* skip escapes ... */
+			*s++ = *p++;
+			if (*p)
+				*s++ = *p++;
+		} else if (*p == '[') {	/* ... and char classes */
+			while (*p != EOS && *p != ']')
+				*s++ = *p++;
+		} else if (strchr("+?|()", *p) != NULL) {
+			*s++ = '\\';	/* insert protection */
+			*s++ = *p++;
+		} else
+			*s++ = *p++;
+	}
+	*s = EOS;
+	grepflag = ((patind) ? 0 : 1);
+	return (newpat);
+}
+
+/*
+ * Test for simple alternation.  Result is NULL if it's not so simple, or is
+ * a pointer to the first string if it is. Warning:  sscanf size is a
+ * fixpoint, beyond which the speedup linearity starts to break down.  In the
+ * wake of the elegant aho/corrasick "trie"-based fgrep, generalizing
+ * altpat[] to arbitrary size is not useful. 
+ */
+char *
+alternate(regexpr)
+	char *regexpr;
+{
+	register int i, j;
+	register char *start, *stop;
+	unsigned char c;
+
+	if (fgrepflag && strchr(regexpr, '|'))
+			return (NULL);
+
+	/*
+	 * break pattern up into altpat array; delimit on newline, bar,
+	 * or EOS.  We know we won't overflow, we've already checked the
+	 * number of patterns we're going to find against NALT.
+	 * Also, set length of pattern and find minimum pattern length.
+	 */
+	nalt = 0;
+	altmin = NMUSH;
+	for (start = stop = regexpr;; ++stop)
+		if (!*stop || *stop == '|' || *stop == NL) {
+			altlen[nalt] = j = stop - start;
+			if (j < altmin)
+				altmin = j;
+			if (!(altpat[nalt] = malloc((u_int)(j + 1))))
+				oops("out of memory");
+			bcopy(start, altpat[nalt], j);
+			altpat[nalt][j] = EOS;
+			++nalt;
+			if (!*stop)
+				break;
+			if (nalt == NALT)
+				return(NULL);
+			if (*stop == NL)
+				*stop = '|';
+			start = stop + 1;
+		}
+	if (!fgrepflag) {
+		if (strchr(regexpr, '|') == NULL || regexpr[0] == '|')
+			return (NULL);
+		if (strpbrk(regexpr, "^$.[]()?+*\\") != NULL
+		    || strindex(regexpr, "||") >= 0)
+			return (NULL);
+	}
+
+	if (nalt > 1) {		/* build superimposed "pre-match" sets per
+				 * char */
+		altflag++;
+		for (j = 0; j < nalt; j++)
+			for (i = 0; i < altmin; i++) {
+				c = altpat[j][altlen[j] - altmin + i];
+				altset[i + 1][c] = 1;	/* offset for sentinel */
+			}
+	}
+	return (altpat[0]);
+}
+
+/*
+ * Grapple with the dfa (std egrep) vs. ndfa (regexp) tradeoff. Criteria to
+ * determine whether to use dfa-based egrep:  We do FIRSTFEW matches with
+ * regexec().  If Boyer-Moore up to now matched more than PUNTPERCENT
+ * of the input, the r.e. is likely to be underspecified, so do old *grep,
+ * which is faster on complex patterns than regexp().  At FIRSTFEW,
+ * dump the saved matches collected by savematch(). They are saved
+ * so that a "PUNT" can "rewind" to ignore them.  Stdin is problematic,
+ * since it's hard to rewind. 
+ */
+
+execstrategy(file)
+	char *file;
+{
+	int pctmatch;
+
+	pctmatch = (100 * rxcount) / incount;
+	if (pctmatch > PUNTPERCENT && file != NULL)
+		kernighan(args);
+	if (file != NULL)
+		flushmatches();
+}
+
+nlcount(bstart, bstop)		/* flail interval to totalize newlines. */
+	char *bstart, *bstop;
+{
+	register char *s = bstart;
+	register char *t = bstop;
+	register int count = 0;
+
+	do {			/* loop unroll for older architectures */
+		if (*t == NL)	/* ... ask ames!jaw for sample code */
+			count++;
+	} while (t-- > s);
+
+	return (count);
+}
+
+char *
+isolate(regexpr)		/* isolate longest metacharacter-free string */
+	char *regexpr;
+{
+	char *dummyexpr;
+
+	/*
+	 * We add (.)* because Henry's regcomp only figures regmust if it
+	 * sees a leading * pattern.  Foo! 
+	 */
+	dummyexpr = malloc((unsigned) strlen(regexpr) + 5);
+	(void)sprintf(dummyexpr, "(.)*%s", regexpr);
+	if ((rspencer = regcomp(dummyexpr)) == NULL)
+		kernighan(args);
+	return (rspencer->regmust);
+}
+
+char *matches[FIRSTFEW];
+static int mcount = 0;
+
+char *
+savematch(s)			/* horde matches during statistics gathering */
+	register char *s;
+{
+	char *p;
+	char *start = s;
+	int msize = 0;
+	int psize = strlen(preamble);
+
+	while (*s++ != NL)
+		msize++;
+	*--s = EOS;
+
+	p = malloc((unsigned) msize + 1 + psize);
+	strcpy(p, preamble);
+	strcpy(p + psize, start);
+	matches[mcount++] = p;
+
+	preamble[0] = 0;
+	*s = NL;
+	return (s);
+}
+
+flushmatches()
+{
+	int n;
+
+	flushflag = 1;
+	for (n = 0; n < mcount; n++)
+		printf("%s\n", matches[n]);
+	mcount = 0;
+}
+
+oops(message)
+	char *message;
+{
+	fprintf(stderr, "%s: %s\n", progname, message);
+	exit(2);
+}
+
+kernighan(args)			/* "let others do the hard part ..." */
+	char *args[];
+{
+	/*
+	 * We may have already run grep on some of the files; remove them
+	 * from the arg list we pass on.  Note that we can't delete them
+	 * totally because the number of file names affects the output
+	 * (automatic -h). 
+	 */
+	/* better would be fork/exec per punted file -- jaw */
+
+	while (firstfile && optind > firstfile)
+		args[firstfile++] = _PATH_DEVNULL;
+	if (patind)
+		args[patind] = pattern;
+	(void) fflush(stdout);
+
+	if (grepflag)
+		execvp(_PATH_GREPSTD, args), oops("can't exec old 'grep'");
+	else if (fgrepflag)
+		execvp(_PATH_FGREPSTD, args), oops("can't exec old 'fgrep'");
+	else
+		execvp(_PATH_EGREPSTD, args), oops("can't exec old 'egrep'");
+}