diff options
Diffstat (limited to 'usr.bin/grep/egrep/egrep.c')
-rw-r--r-- | usr.bin/grep/egrep/egrep.c | 924 |
1 files changed, 924 insertions, 0 deletions
diff --git a/usr.bin/grep/egrep/egrep.c b/usr.bin/grep/egrep/egrep.c new file mode 100644 index 0000000..885965c --- /dev/null +++ b/usr.bin/grep/egrep/egrep.c @@ -0,0 +1,924 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)egrep.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ + +/* + Hybrid Boyer/Moore/Gosper-assisted 'grep/egrep/fgrep' search, with delta0 + table as in original paper (CACM, October, 1977). No delta1 or delta2. + According to experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of + minimal practical value. However, to improve for worst case input, + integrating the improved Galil strategies (Apostolico/Giancarlo, SIAM. J. + Comput., Feb. 1986) deserves consideration. + + Method: extract longest metacharacter-free string from expression. + this is done using a side-effect from henry spencer's regcomp(). + use boyer-moore to match such, then pass submatching lines + to either regexp() or standard 'egrep', depending on certain + criteria within execstrategy() below. [this tradeoff is due + to the general slowness of the regexp() nondeterministic + machine on complex expressions, as well as the startup time + of standard 'egrep' on short files.] alternatively, one may + change the vendor-supplied 'egrep' automaton to include + boyer-moore directly. see accompanying writeup for discussion + of kanji expression treatment. + + late addition: apply trickbag for fast match of simple + alternations (sublinear, in common low-cardinality cases). + trap fgrep into this lair. + + gnu additions: -f, newline as |, \< and \> [in regexec()], more + comments. inspire better dfa exec() strategy. + serious testing and help with special cases. + + Algorithm amalgam summary: + + dfa e?grep (aho/thompson) + ndfa regexp() (spencer/aho) + bmg (boyer/moore/gosper) + "superimposed" bmg (jaw) + fgrep (aho/corrasick) + + sorry, but the knuth/morris/pratt machine, horspool's + "frequentist" code, and the rabin/karp matcher, however cute, + just don't cut it for this production. + + James A. Woods Copyright (c) 1986 + NASA Ames Research Center +*/ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/file.h> +#include <regexp.h> /* must be henry spencer's version */ +#include <stdio.h> +#include <ctype.h> +#include "pathnames.h" + +#define MIN(A, B) ((A) > (B) ? (B) : (A)) + +#ifdef SLOWSYS +#define read xread +#endif + +#define BUFSIZE 8192 /* make higher for cray */ +#define PATSIZE 6000 +#define LARGE BUFSIZE + PATSIZE + +#define NALT 7 /* tied to scanf() size in alternate() */ +#define NMUSH 6 /* loosely relates to expected alt length */ + +#define FIRSTFEW 33 /* Always do FIRSTFEW matches with regexec() */ +#define PUNTPERCENT 10 /* After FIRSTFEW, if PUNTPERCENT of the input + * was processed by regexp(), exec std egrep. */ +#define NL '\n' +#define EOS '\0' +#define NONASCII 0200 /* Bit mask for Kanji non-ascii chars */ +#define META "\n^$.[]()?+*|\\" /* egrep meta-characters */ +#define SS2 '\216' /* EUC Katakana (or Chinese2) prefix */ +#define SS3 '\217' /* EUC Kanji2 (or Chinese3) prefix */ + +extern char *optarg; +extern int optind; +char *progname; + +int cflag, iflag, eflag, fflag, lflag, nflag; /* SVID flags */ +int sflag, hflag; /* v7, v8, bsd */ + +int firstflag; /* Stop at first match */ +int grepflag; /* Called as "grep" */ +int fgrepflag; /* Called as "fgrep" */ +int altflag; /* Simple alternation in pattern */ +int boyonly; /* No regexp needed -- all simple */ +int flushflag; +int grepold, egrepold, fgrepold; + +int nalt; /* Number of alternatives */ +int nsuccess; /* 1 for match, 2 for error */ +int altmin; /* Minimum length of all the alternate + * strings */ +int firstfile; /* argv index of first file argument */ +int patind; /* argv index of pattern */ +long nmatch; /* Number of matches in this file */ +long incount, counted; /* Amount of input consumed */ +long rxcount; /* Bytes of input processed by regexec() */ +int boyfound; /* accumulated partial matches (tripped by + * FIRSTFEW) */ +int prevmatch; /* next three lines aid fast -n */ +long nline, prevnline; +char *prevloc; + +regexp *rspencer; +char *pattern; +char *patboy; /* Pattern for simple Boyer-Moore */ +char *patfile; /* Filename containing pattern(s) */ + +int delta0[256]; /* Boyer-Moore algorithm core */ +char cmap[256]; /* Usually 0-255, but if -i, maps upper to + * lower case */ +char str[BUFSIZE + 2]; +int nleftover; +char linetemp[BUFSIZE]; +char *altpat[NALT]; /* alternation component storage */ +int altlen[NALT]; +short altset[NMUSH + 1][256]; +char preamble[200]; /* match prefix (filename, line no.) */ + +int fd; +char * +strchr(), *strrchr(), *strcpy(), *strncpy(), *strpbrk(), *malloc(); +char * +grepxlat(), *fold(), *pfile(), *alternate(), *isolate(); +char *gotamatch(), *kanji(), *linesave(), *submatch(); +char **args; + +main(argc, argv) + int argc; + char *argv[]; +{ + int c, oflag; + int errflag = 0; + + args = argv; + + if ((progname = strrchr(argv[0], '/')) != 0) + progname++; + else + progname = argv[0]; + if (strcmp(progname, "grep") == 0) + grepflag++; + else if (strcmp(progname, "fgrep") == 0) + fgrepflag++; + + oflag = 0; + while ((c = getopt(argc, argv, "bchie:f:lnosvwxy1")) != EOF) { + switch (c) { + + case 'f': + fflag++; + patfile = optarg; + continue; + case 'b': + case 'v': + egrepold++; /* boyer-moore of little help here */ + continue; + case 'c': + cflag++; + continue; + case 'e': + eflag++; + pattern = optarg; + continue; + case 'h': + hflag++; + continue; + case 'o': + oflag++; + continue; + case '1': /* Stop at very first match */ + firstflag++; /* spead freaks only */ + continue; + case 'i': + iflag++; + continue; + case 'l': + lflag++; + continue; + case 'n': + nflag++; + continue; + case 's': + sflag++; + continue; + case 'w': + case 'y': + if (!grepflag) + errflag++; + grepold++; + continue; + case 'x': /* needs more work, like -b above */ + if (!fgrepflag) + errflag++; + fgrepold++; + continue; + case '?': + errflag++; + } + } + if (errflag || ((argc <= optind) && !fflag && !eflag)) { + if (grepflag) +oops("usage: grep [-bchilnosvwy] [-e] pattern [file ...]"); + else if (fgrepflag) +oops("usage: fgrep [-bchilnosvx] {-f patfile | [-e] strings} [file ...]"); + else /* encourage SVID options, though we provide + * others */ +oops("usage: egrep [-bchilnosv] {-f patfile | [-e] pattern} [file ...]"); + } + if (fflag) + pattern = pfile(patfile); + else if (!eflag) { + patind = optind; + pattern = argv[optind++]; + } + + if (!oflag && (argc - optind) <= 1) /* Filename invisible given < 2 files */ + hflag++; + if (pattern[0] == EOS) + kernighan(argv); /* same as it ever was */ + /* + * 'grep/egrep' merger -- "old" grep is called to handle: tagged + * exprs \( \), word matches \< and \>, -w and -y options, char + * classes with '-' at end (egrep bug?), and patterns beginning with + * an asterisk (don't ask why). otherwise, characters meaningful to + * 'egrep' but not to 'grep' are escaped; the entire expr is then + * passed to 'egrep'. + */ + if (grepflag && !grepold) { + if (strindex(pattern, "\\(") >= 0 || + strindex(pattern, "\\<") >= 0 || + strindex(pattern, "\\>") >= 0 || + strindex(pattern, "-]") >= 0 || + pattern[0] == '*') /* grep bug */ + grepold++; + else + pattern = grepxlat(pattern); + } + if (grepold || egrepold || fgrepold) + kernighan(argv); + + if (iflag) + strcpy(pattern, fold(pattern)); + /* + * If the pattern is a plain string, just run boyer-moore. If it + * consists of meta-free alternatives, run "superimposed" bmg. + * Otherwise, find best string, and compile pattern for regexec(). + */ + if (strpbrk(pattern, META) == NULL) { /* do boyer-moore only */ + boyonly++; + patboy = pattern; + } else { + if ((patboy = alternate(pattern)) != NULL) + boyonly++; + else { + if ((patboy = isolate(pattern)) == NULL) + kernighan(argv); /* expr too involved */ +#ifndef NOKANJI + for (c = 0; pattern[c] != EOS; c++) + if (pattern[c] & NONASCII) /* kanji + meta */ + kernighan(argv); +#endif + if ((rspencer = regcomp(pattern)) == NULL) + oops("regcomp failure"); + } + } + gosper(patboy); /* "pre-conditioning is wonderful" + * -- v. strassen */ + + if ((firstfile = optind) >= argc) { + /* Grep standard input */ + if (lflag) /* We don't know its name! */ + exit(1); + egsecute((char *) NULL); + } else { + while (optind < argc) { + egsecute(argv[optind]); + optind++; + if (firstflag && (nsuccess == 1)) + break; + } + } + exit((nsuccess == 2) ? 2 : (nsuccess == 0)); +} + +char * +pfile(pfname) /* absorb expression from file */ + char *pfname; +{ + int fd; + struct stat patstat; + static char *pat; + + if ((fd = open(pfname, O_RDONLY, 0)) < 0) + oops("can't read pattern file"); + if (fstat(fd, &patstat) != 0) + oops("can't stat pattern file"); + if (patstat.st_size > PATSIZE) { + if (fgrepflag) { /* defer to unix version */ + fgrepold++; + return "dummy"; + } else + oops("pattern file too big"); + } + if ((pat = malloc((unsigned) patstat.st_size + 1)) == NULL) + oops("out of memory to read pattern file"); + if (patstat.st_size != read(fd, pat, (int)patstat.st_size)) + oops("error reading pattern file"); + (void) close(fd); + + pat[patstat.st_size] = EOS; + if (pat[patstat.st_size - 1] == NL) /* NOP for egrep; helps grep */ + pat[patstat.st_size - 1] = EOS; + + if (nlcount(pat, &pat[patstat.st_size]) > NALT) { + if (fgrepflag) + fgrepold++; /* "what's it all about, alfie?" */ + else + egrepold++; + } + return (pat); +} + +egsecute(file) + char *file; +{ + extern int errno; + + if (file == NULL) + fd = 0; + else if ((fd = open(file, O_RDONLY, 0)) <= 0) { + fprintf(stderr, + "%s: %s: %s\n", progname, file, strerror(errno)); + nsuccess = 2; + return; + } + chimaera(file, patboy); + + if (!boyonly && !flushflag && file != NULL) + flushmatches(); + if (file != NULL) + close(fd); +} + +chimaera(file, pat) /* "reach out and boyer-moore search someone" */ + char *file, *pat; /* -- soon-to-be-popular bumper sticker */ +{ + register char *k, *strend, *s; + register int j, count; + register int *deltazero = delta0; + int patlen = altmin; + char *t; + + nleftover = boyfound = flushflag = 0; + nline = 1L; + prevmatch = 0; + nmatch = counted = rxcount = 0L; + + while ((count = read(fd, str + nleftover, BUFSIZE - nleftover)) > 0) { + + counted += count; + strend = linesave(str, count); + + for (k = str + patlen - 1; k < strend;) { + /* + * for a large class of patterns, upwards of 80% of + * match time is spent on the next line. we beat + * existing microcode (vax 'matchc') this way. + */ + while ((k += deltazero[*(unsigned char *) k]) < strend); + if (k < (str + LARGE)) + break; + k -= LARGE; + + if (altflag) { + /* + * Parallel Boyer-Moore. Check whether each + * of the previous <altmin> chars COULD be + * from one of the alternative strings. + */ + s = k - 1; + j = altmin; + while (altset[--j][(unsigned char) + cmap[*(unsigned char *) s--]]); + /* + * quick test fails. in this life, compare + * 'em all. but, a "reverse trie" would + * attenuate worst case (linear w/delta2?). + */ + if (--j < 0) { + count = nalt - 1; + do { + s = k; + j = altlen[count]; + t = altpat[count]; + + while + (cmap[*(unsigned char *) s--] + == t[--j]); + if (j < 0) + break; + } + while (count--); + } + } else { + /* One string -- check it */ + j = patlen - 1; + s = k - 1; + while (cmap[*(unsigned char *) s--] == pat[--j]); + } + /* + * delta-less shortcut for literati. short shrift for + * genetic engineers? + */ + if (j >= 0) { + k++; /* no match; restart next char */ + continue; + } + k = submatch(file, pat, str, strend, k, count); + if (k == NULL) + return; + } + if (nflag) { + if (prevmatch) + nline = prevnline + nlcount(prevloc, k); + else + nline = nline + nlcount(str, k); + prevmatch = 0; + } + strncpy(str, linetemp, nleftover); + } + if (cflag) { + /* Bug from old grep: -c overrides -h. We fix the bug. */ + if (!hflag) + printf("%s:", file); + printf("%ld\n", nmatch); + } +} + +char * +linesave(str, count) /* accumulate partial line at end of buffer */ + char str[]; + register int count; +{ + register int j; + + count += nleftover; + if (count != BUFSIZE && fd != 0) + str[count++] = NL; /* insurance for broken last line */ + str[count] = EOS; + for (j = count - 1; str[j] != NL && j >= 0;) + j--; + /* + * break up these lines: long line (> BUFSIZE), last line of file, or + * short return from read(), as from tee(1) input + */ + if (j < 0 && (count == (BUFSIZE - nleftover))) { + str[count++] = NL; + str[count] = EOS; + linetemp[0] = EOS; + nleftover = 0; + return (str + count); + } else { + nleftover = count - j - 1; + strncpy(linetemp, str + j + 1, nleftover); + return (str + j); + } +} + +/* + * Process partial match. First check for mis-aligned Kanji, then match line + * against full compiled r.e. if statistics do not warrant handing off to + * standard egrep. + */ +char * +submatch(file, pat, str, strend, k, altindex) + char file[], pat[], str[]; + register char *strend, *k; + int altindex; +{ + register char *s; + char *t, c; + + t = k; + s = ((altflag) ? k - altlen[altindex] + 1 : k - altmin + 1); +#ifndef NOKANJI + c = ((altflag) ? altpat[altindex][0] : pat[0]); + if (c & NONASCII) + if ((s = kanji(str, s, k)) == NULL) + return (++k); /* reject false kanji */ +#endif + do; + while (*s != NL && --s >= str); + k = s + 1; /* now at line start */ + + if (boyonly) + return (gotamatch(file, k)); + + incount = counted - (strend - k); + if (boyfound++ == FIRSTFEW) + execstrategy(file); + + s = t; + do + rxcount++; + while (*s++ != NL); + *--s = EOS; + /* + * "quick henry -- the flit" (after theodor geisel) + */ + if (regexec(rspencer, ((iflag) ? fold(k) : k)) == 1) { + *s = NL; + if (gotamatch(file, k) == NULL) + return (NULL); + } + *s = NL; + return (s + 1); +} + +#ifndef NOKANJI +/* + * EUC code disambiguation -- scan backwards to first 7-bit code, while + * counting intervening 8-bit codes. If odd, reject unaligned Kanji pattern. + * SS2/3 checks are for intermixed Japanase Katakana or Kanji2. + */ +char * +kanji(str, s, k) + register char *str, *s, *k; +{ + register int j = 0; + + for (s--; s >= str; s--) { + if (*s == SS2 || *s == SS3 || (*s & NONASCII) == 0) + break; + j++; + } +#ifndef CHINESE + if (*s == SS2) + j -= 1; +#endif CHINESE + return ((j & 01) ? NULL : k); +} +#endif + +/* + * Compute "Boyer-Moore" delta table -- put skip distance in delta0[c] + */ +gosper(pattern) + char *pattern; /* ... HAKMEM lives ... */ +{ + register int i, j; + unsigned char c; + + /* Make one-string case look like simple alternatives case */ + if (!altflag) { + nalt = 1; + altmin = altlen[0] = strlen(pattern); + altpat[0] = pattern; + } + /* For chars that aren't in any string, skip by string length. */ + for (j = 0; j < 256; j++) { + delta0[j] = altmin; + cmap[j] = j; /* Sneak in initialization of cmap */ + } + + /* For chars in a string, skip distance from char to end of string. */ + /* (If char appears more than once, skip minimum distance.) */ + for (i = 0; i < nalt; i++) + for (j = 0; j < altlen[i] - 1; j++) { + c = altpat[i][j]; + delta0[c] = MIN(delta0[c], altlen[i] - j - 1); + if (iflag && islower((int) c)) + delta0[toupper((int) c)] = delta0[c]; + } + + /* For last char of each string, fall out of search loop. */ + for (i = 0; i < nalt; i++) { + c = altpat[i][altlen[i] - 1]; + delta0[c] = LARGE; + if (iflag && islower((int) c)) + delta0[toupper((int) c)] = LARGE; + } + if (iflag) + for (j = 'A'; j <= 'Z'; j++) + cmap[j] = tolower((int) j); +} + +/* + * Print, count, or stop on full match. Result is either the location for + * continued search, or NULL to stop. + */ +char * +gotamatch(file, s) + register char *file, *s; +{ + char *savematch(); + int squirrel = 0; /* nonzero to squirrel away FIRSTFEW matches */ + + nmatch++; + nsuccess = 1; + if (!boyonly && boyfound <= FIRSTFEW && file != NULL) + squirrel = 1; + + if (sflag) + return (NULL); /* -s usurps all flags (unlike some versions) */ + if (cflag) { /* -c overrides -l, we guess */ + do; + while (*s++ != NL); + } else if (lflag) { + puts(file); + return (NULL); + } else { + if (!hflag) + if (!squirrel) + printf("%s:", file); + else + (void)sprintf(preamble, "%s:", file); + if (nflag) { + if (prevmatch) + prevnline = prevnline + nlcount(prevloc, s); + else + prevnline = nline + nlcount(str, s); + prevmatch = 1; + + if (!squirrel) + printf("%ld:", prevnline); + else + (void)sprintf(preamble + strlen(preamble), + "%ld:", prevnline); + } + if (!squirrel) { + do + putchar(*s); + while (*s++ != NL); + } else + s = savematch(s); + + if (nflag) + prevloc = s - 1; + } + return ((firstflag && !cflag) ? NULL : s); +} + +char * +fold(line) + char *line; +{ + static char fline[BUFSIZE]; + register char *s, *t = fline; + + for (s = line; *s != EOS; s++) + *t++ = (isupper((int) *s) ? (char) tolower((int) *s) : *s); + *t = EOS; + return (fline); +} + +strindex(s, t) /* the easy way, as in K&P, p. 192 */ + char *s, *t; +{ + int i, n; + + n = strlen(t); + for (i = 0; s[i] != '\0'; i++) + if (strncmp(s + i, t, n) == 0) + return (i); + return (-1); +} + +char * +grepxlat(pattern) /* grep pattern meta conversion */ + char *pattern; +{ + register char *p, *s; + static char newpat[BUFSIZE]; + + for (s = newpat, p = pattern; *p != EOS;) { + if (*p == '\\') { /* skip escapes ... */ + *s++ = *p++; + if (*p) + *s++ = *p++; + } else if (*p == '[') { /* ... and char classes */ + while (*p != EOS && *p != ']') + *s++ = *p++; + } else if (strchr("+?|()", *p) != NULL) { + *s++ = '\\'; /* insert protection */ + *s++ = *p++; + } else + *s++ = *p++; + } + *s = EOS; + grepflag = ((patind) ? 0 : 1); + return (newpat); +} + +/* + * Test for simple alternation. Result is NULL if it's not so simple, or is + * a pointer to the first string if it is. Warning: sscanf size is a + * fixpoint, beyond which the speedup linearity starts to break down. In the + * wake of the elegant aho/corrasick "trie"-based fgrep, generalizing + * altpat[] to arbitrary size is not useful. + */ +char * +alternate(regexpr) + char *regexpr; +{ + register int i, j; + register char *start, *stop; + unsigned char c; + + if (fgrepflag && strchr(regexpr, '|')) + return (NULL); + + /* + * break pattern up into altpat array; delimit on newline, bar, + * or EOS. We know we won't overflow, we've already checked the + * number of patterns we're going to find against NALT. + * Also, set length of pattern and find minimum pattern length. + */ + nalt = 0; + altmin = NMUSH; + for (start = stop = regexpr;; ++stop) + if (!*stop || *stop == '|' || *stop == NL) { + altlen[nalt] = j = stop - start; + if (j < altmin) + altmin = j; + if (!(altpat[nalt] = malloc((u_int)(j + 1)))) + oops("out of memory"); + bcopy(start, altpat[nalt], j); + altpat[nalt][j] = EOS; + ++nalt; + if (!*stop) + break; + if (nalt == NALT) + return(NULL); + if (*stop == NL) + *stop = '|'; + start = stop + 1; + } + if (!fgrepflag) { + if (strchr(regexpr, '|') == NULL || regexpr[0] == '|') + return (NULL); + if (strpbrk(regexpr, "^$.[]()?+*\\") != NULL + || strindex(regexpr, "||") >= 0) + return (NULL); + } + + if (nalt > 1) { /* build superimposed "pre-match" sets per + * char */ + altflag++; + for (j = 0; j < nalt; j++) + for (i = 0; i < altmin; i++) { + c = altpat[j][altlen[j] - altmin + i]; + altset[i + 1][c] = 1; /* offset for sentinel */ + } + } + return (altpat[0]); +} + +/* + * Grapple with the dfa (std egrep) vs. ndfa (regexp) tradeoff. Criteria to + * determine whether to use dfa-based egrep: We do FIRSTFEW matches with + * regexec(). If Boyer-Moore up to now matched more than PUNTPERCENT + * of the input, the r.e. is likely to be underspecified, so do old *grep, + * which is faster on complex patterns than regexp(). At FIRSTFEW, + * dump the saved matches collected by savematch(). They are saved + * so that a "PUNT" can "rewind" to ignore them. Stdin is problematic, + * since it's hard to rewind. + */ + +execstrategy(file) + char *file; +{ + int pctmatch; + + pctmatch = (100 * rxcount) / incount; + if (pctmatch > PUNTPERCENT && file != NULL) + kernighan(args); + if (file != NULL) + flushmatches(); +} + +nlcount(bstart, bstop) /* flail interval to totalize newlines. */ + char *bstart, *bstop; +{ + register char *s = bstart; + register char *t = bstop; + register int count = 0; + + do { /* loop unroll for older architectures */ + if (*t == NL) /* ... ask ames!jaw for sample code */ + count++; + } while (t-- > s); + + return (count); +} + +char * +isolate(regexpr) /* isolate longest metacharacter-free string */ + char *regexpr; +{ + char *dummyexpr; + + /* + * We add (.)* because Henry's regcomp only figures regmust if it + * sees a leading * pattern. Foo! + */ + dummyexpr = malloc((unsigned) strlen(regexpr) + 5); + (void)sprintf(dummyexpr, "(.)*%s", regexpr); + if ((rspencer = regcomp(dummyexpr)) == NULL) + kernighan(args); + return (rspencer->regmust); +} + +char *matches[FIRSTFEW]; +static int mcount = 0; + +char * +savematch(s) /* horde matches during statistics gathering */ + register char *s; +{ + char *p; + char *start = s; + int msize = 0; + int psize = strlen(preamble); + + while (*s++ != NL) + msize++; + *--s = EOS; + + p = malloc((unsigned) msize + 1 + psize); + strcpy(p, preamble); + strcpy(p + psize, start); + matches[mcount++] = p; + + preamble[0] = 0; + *s = NL; + return (s); +} + +flushmatches() +{ + int n; + + flushflag = 1; + for (n = 0; n < mcount; n++) + printf("%s\n", matches[n]); + mcount = 0; +} + +oops(message) + char *message; +{ + fprintf(stderr, "%s: %s\n", progname, message); + exit(2); +} + +kernighan(args) /* "let others do the hard part ..." */ + char *args[]; +{ + /* + * We may have already run grep on some of the files; remove them + * from the arg list we pass on. Note that we can't delete them + * totally because the number of file names affects the output + * (automatic -h). + */ + /* better would be fork/exec per punted file -- jaw */ + + while (firstfile && optind > firstfile) + args[firstfile++] = _PATH_DEVNULL; + if (patind) + args[patind] = pattern; + (void) fflush(stdout); + + if (grepflag) + execvp(_PATH_GREPSTD, args), oops("can't exec old 'grep'"); + else if (fgrepflag) + execvp(_PATH_FGREPSTD, args), oops("can't exec old 'fgrep'"); + else + execvp(_PATH_EGREPSTD, args), oops("can't exec old 'egrep'"); +} |