diff options
Diffstat (limited to 'usr.bin/grep/egrep/egrep.c')
-rw-r--r-- | usr.bin/grep/egrep/egrep.c | 924 |
1 files changed, 0 insertions, 924 deletions
diff --git a/usr.bin/grep/egrep/egrep.c b/usr.bin/grep/egrep/egrep.c deleted file mode 100644 index 885965c..0000000 --- a/usr.bin/grep/egrep/egrep.c +++ /dev/null @@ -1,924 +0,0 @@ -/*- - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef lint -static char copyright[] = -"@(#) Copyright (c) 1991, 1993\n\ - The Regents of the University of California. All rights reserved.\n"; -#endif /* not lint */ - -#ifndef lint -static char sccsid[] = "@(#)egrep.c 8.1 (Berkeley) 6/6/93"; -#endif /* not lint */ - -/* - Hybrid Boyer/Moore/Gosper-assisted 'grep/egrep/fgrep' search, with delta0 - table as in original paper (CACM, October, 1977). No delta1 or delta2. - According to experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of - minimal practical value. However, to improve for worst case input, - integrating the improved Galil strategies (Apostolico/Giancarlo, SIAM. J. - Comput., Feb. 1986) deserves consideration. - - Method: extract longest metacharacter-free string from expression. - this is done using a side-effect from henry spencer's regcomp(). - use boyer-moore to match such, then pass submatching lines - to either regexp() or standard 'egrep', depending on certain - criteria within execstrategy() below. [this tradeoff is due - to the general slowness of the regexp() nondeterministic - machine on complex expressions, as well as the startup time - of standard 'egrep' on short files.] alternatively, one may - change the vendor-supplied 'egrep' automaton to include - boyer-moore directly. see accompanying writeup for discussion - of kanji expression treatment. - - late addition: apply trickbag for fast match of simple - alternations (sublinear, in common low-cardinality cases). - trap fgrep into this lair. - - gnu additions: -f, newline as |, \< and \> [in regexec()], more - comments. inspire better dfa exec() strategy. - serious testing and help with special cases. - - Algorithm amalgam summary: - - dfa e?grep (aho/thompson) - ndfa regexp() (spencer/aho) - bmg (boyer/moore/gosper) - "superimposed" bmg (jaw) - fgrep (aho/corrasick) - - sorry, but the knuth/morris/pratt machine, horspool's - "frequentist" code, and the rabin/karp matcher, however cute, - just don't cut it for this production. - - James A. Woods Copyright (c) 1986 - NASA Ames Research Center -*/ -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/file.h> -#include <regexp.h> /* must be henry spencer's version */ -#include <stdio.h> -#include <ctype.h> -#include "pathnames.h" - -#define MIN(A, B) ((A) > (B) ? (B) : (A)) - -#ifdef SLOWSYS -#define read xread -#endif - -#define BUFSIZE 8192 /* make higher for cray */ -#define PATSIZE 6000 -#define LARGE BUFSIZE + PATSIZE - -#define NALT 7 /* tied to scanf() size in alternate() */ -#define NMUSH 6 /* loosely relates to expected alt length */ - -#define FIRSTFEW 33 /* Always do FIRSTFEW matches with regexec() */ -#define PUNTPERCENT 10 /* After FIRSTFEW, if PUNTPERCENT of the input - * was processed by regexp(), exec std egrep. */ -#define NL '\n' -#define EOS '\0' -#define NONASCII 0200 /* Bit mask for Kanji non-ascii chars */ -#define META "\n^$.[]()?+*|\\" /* egrep meta-characters */ -#define SS2 '\216' /* EUC Katakana (or Chinese2) prefix */ -#define SS3 '\217' /* EUC Kanji2 (or Chinese3) prefix */ - -extern char *optarg; -extern int optind; -char *progname; - -int cflag, iflag, eflag, fflag, lflag, nflag; /* SVID flags */ -int sflag, hflag; /* v7, v8, bsd */ - -int firstflag; /* Stop at first match */ -int grepflag; /* Called as "grep" */ -int fgrepflag; /* Called as "fgrep" */ -int altflag; /* Simple alternation in pattern */ -int boyonly; /* No regexp needed -- all simple */ -int flushflag; -int grepold, egrepold, fgrepold; - -int nalt; /* Number of alternatives */ -int nsuccess; /* 1 for match, 2 for error */ -int altmin; /* Minimum length of all the alternate - * strings */ -int firstfile; /* argv index of first file argument */ -int patind; /* argv index of pattern */ -long nmatch; /* Number of matches in this file */ -long incount, counted; /* Amount of input consumed */ -long rxcount; /* Bytes of input processed by regexec() */ -int boyfound; /* accumulated partial matches (tripped by - * FIRSTFEW) */ -int prevmatch; /* next three lines aid fast -n */ -long nline, prevnline; -char *prevloc; - -regexp *rspencer; -char *pattern; -char *patboy; /* Pattern for simple Boyer-Moore */ -char *patfile; /* Filename containing pattern(s) */ - -int delta0[256]; /* Boyer-Moore algorithm core */ -char cmap[256]; /* Usually 0-255, but if -i, maps upper to - * lower case */ -char str[BUFSIZE + 2]; -int nleftover; -char linetemp[BUFSIZE]; -char *altpat[NALT]; /* alternation component storage */ -int altlen[NALT]; -short altset[NMUSH + 1][256]; -char preamble[200]; /* match prefix (filename, line no.) */ - -int fd; -char * -strchr(), *strrchr(), *strcpy(), *strncpy(), *strpbrk(), *malloc(); -char * -grepxlat(), *fold(), *pfile(), *alternate(), *isolate(); -char *gotamatch(), *kanji(), *linesave(), *submatch(); -char **args; - -main(argc, argv) - int argc; - char *argv[]; -{ - int c, oflag; - int errflag = 0; - - args = argv; - - if ((progname = strrchr(argv[0], '/')) != 0) - progname++; - else - progname = argv[0]; - if (strcmp(progname, "grep") == 0) - grepflag++; - else if (strcmp(progname, "fgrep") == 0) - fgrepflag++; - - oflag = 0; - while ((c = getopt(argc, argv, "bchie:f:lnosvwxy1")) != EOF) { - switch (c) { - - case 'f': - fflag++; - patfile = optarg; - continue; - case 'b': - case 'v': - egrepold++; /* boyer-moore of little help here */ - continue; - case 'c': - cflag++; - continue; - case 'e': - eflag++; - pattern = optarg; - continue; - case 'h': - hflag++; - continue; - case 'o': - oflag++; - continue; - case '1': /* Stop at very first match */ - firstflag++; /* spead freaks only */ - continue; - case 'i': - iflag++; - continue; - case 'l': - lflag++; - continue; - case 'n': - nflag++; - continue; - case 's': - sflag++; - continue; - case 'w': - case 'y': - if (!grepflag) - errflag++; - grepold++; - continue; - case 'x': /* needs more work, like -b above */ - if (!fgrepflag) - errflag++; - fgrepold++; - continue; - case '?': - errflag++; - } - } - if (errflag || ((argc <= optind) && !fflag && !eflag)) { - if (grepflag) -oops("usage: grep [-bchilnosvwy] [-e] pattern [file ...]"); - else if (fgrepflag) -oops("usage: fgrep [-bchilnosvx] {-f patfile | [-e] strings} [file ...]"); - else /* encourage SVID options, though we provide - * others */ -oops("usage: egrep [-bchilnosv] {-f patfile | [-e] pattern} [file ...]"); - } - if (fflag) - pattern = pfile(patfile); - else if (!eflag) { - patind = optind; - pattern = argv[optind++]; - } - - if (!oflag && (argc - optind) <= 1) /* Filename invisible given < 2 files */ - hflag++; - if (pattern[0] == EOS) - kernighan(argv); /* same as it ever was */ - /* - * 'grep/egrep' merger -- "old" grep is called to handle: tagged - * exprs \( \), word matches \< and \>, -w and -y options, char - * classes with '-' at end (egrep bug?), and patterns beginning with - * an asterisk (don't ask why). otherwise, characters meaningful to - * 'egrep' but not to 'grep' are escaped; the entire expr is then - * passed to 'egrep'. - */ - if (grepflag && !grepold) { - if (strindex(pattern, "\\(") >= 0 || - strindex(pattern, "\\<") >= 0 || - strindex(pattern, "\\>") >= 0 || - strindex(pattern, "-]") >= 0 || - pattern[0] == '*') /* grep bug */ - grepold++; - else - pattern = grepxlat(pattern); - } - if (grepold || egrepold || fgrepold) - kernighan(argv); - - if (iflag) - strcpy(pattern, fold(pattern)); - /* - * If the pattern is a plain string, just run boyer-moore. If it - * consists of meta-free alternatives, run "superimposed" bmg. - * Otherwise, find best string, and compile pattern for regexec(). - */ - if (strpbrk(pattern, META) == NULL) { /* do boyer-moore only */ - boyonly++; - patboy = pattern; - } else { - if ((patboy = alternate(pattern)) != NULL) - boyonly++; - else { - if ((patboy = isolate(pattern)) == NULL) - kernighan(argv); /* expr too involved */ -#ifndef NOKANJI - for (c = 0; pattern[c] != EOS; c++) - if (pattern[c] & NONASCII) /* kanji + meta */ - kernighan(argv); -#endif - if ((rspencer = regcomp(pattern)) == NULL) - oops("regcomp failure"); - } - } - gosper(patboy); /* "pre-conditioning is wonderful" - * -- v. strassen */ - - if ((firstfile = optind) >= argc) { - /* Grep standard input */ - if (lflag) /* We don't know its name! */ - exit(1); - egsecute((char *) NULL); - } else { - while (optind < argc) { - egsecute(argv[optind]); - optind++; - if (firstflag && (nsuccess == 1)) - break; - } - } - exit((nsuccess == 2) ? 2 : (nsuccess == 0)); -} - -char * -pfile(pfname) /* absorb expression from file */ - char *pfname; -{ - int fd; - struct stat patstat; - static char *pat; - - if ((fd = open(pfname, O_RDONLY, 0)) < 0) - oops("can't read pattern file"); - if (fstat(fd, &patstat) != 0) - oops("can't stat pattern file"); - if (patstat.st_size > PATSIZE) { - if (fgrepflag) { /* defer to unix version */ - fgrepold++; - return "dummy"; - } else - oops("pattern file too big"); - } - if ((pat = malloc((unsigned) patstat.st_size + 1)) == NULL) - oops("out of memory to read pattern file"); - if (patstat.st_size != read(fd, pat, (int)patstat.st_size)) - oops("error reading pattern file"); - (void) close(fd); - - pat[patstat.st_size] = EOS; - if (pat[patstat.st_size - 1] == NL) /* NOP for egrep; helps grep */ - pat[patstat.st_size - 1] = EOS; - - if (nlcount(pat, &pat[patstat.st_size]) > NALT) { - if (fgrepflag) - fgrepold++; /* "what's it all about, alfie?" */ - else - egrepold++; - } - return (pat); -} - -egsecute(file) - char *file; -{ - extern int errno; - - if (file == NULL) - fd = 0; - else if ((fd = open(file, O_RDONLY, 0)) <= 0) { - fprintf(stderr, - "%s: %s: %s\n", progname, file, strerror(errno)); - nsuccess = 2; - return; - } - chimaera(file, patboy); - - if (!boyonly && !flushflag && file != NULL) - flushmatches(); - if (file != NULL) - close(fd); -} - -chimaera(file, pat) /* "reach out and boyer-moore search someone" */ - char *file, *pat; /* -- soon-to-be-popular bumper sticker */ -{ - register char *k, *strend, *s; - register int j, count; - register int *deltazero = delta0; - int patlen = altmin; - char *t; - - nleftover = boyfound = flushflag = 0; - nline = 1L; - prevmatch = 0; - nmatch = counted = rxcount = 0L; - - while ((count = read(fd, str + nleftover, BUFSIZE - nleftover)) > 0) { - - counted += count; - strend = linesave(str, count); - - for (k = str + patlen - 1; k < strend;) { - /* - * for a large class of patterns, upwards of 80% of - * match time is spent on the next line. we beat - * existing microcode (vax 'matchc') this way. - */ - while ((k += deltazero[*(unsigned char *) k]) < strend); - if (k < (str + LARGE)) - break; - k -= LARGE; - - if (altflag) { - /* - * Parallel Boyer-Moore. Check whether each - * of the previous <altmin> chars COULD be - * from one of the alternative strings. - */ - s = k - 1; - j = altmin; - while (altset[--j][(unsigned char) - cmap[*(unsigned char *) s--]]); - /* - * quick test fails. in this life, compare - * 'em all. but, a "reverse trie" would - * attenuate worst case (linear w/delta2?). - */ - if (--j < 0) { - count = nalt - 1; - do { - s = k; - j = altlen[count]; - t = altpat[count]; - - while - (cmap[*(unsigned char *) s--] - == t[--j]); - if (j < 0) - break; - } - while (count--); - } - } else { - /* One string -- check it */ - j = patlen - 1; - s = k - 1; - while (cmap[*(unsigned char *) s--] == pat[--j]); - } - /* - * delta-less shortcut for literati. short shrift for - * genetic engineers? - */ - if (j >= 0) { - k++; /* no match; restart next char */ - continue; - } - k = submatch(file, pat, str, strend, k, count); - if (k == NULL) - return; - } - if (nflag) { - if (prevmatch) - nline = prevnline + nlcount(prevloc, k); - else - nline = nline + nlcount(str, k); - prevmatch = 0; - } - strncpy(str, linetemp, nleftover); - } - if (cflag) { - /* Bug from old grep: -c overrides -h. We fix the bug. */ - if (!hflag) - printf("%s:", file); - printf("%ld\n", nmatch); - } -} - -char * -linesave(str, count) /* accumulate partial line at end of buffer */ - char str[]; - register int count; -{ - register int j; - - count += nleftover; - if (count != BUFSIZE && fd != 0) - str[count++] = NL; /* insurance for broken last line */ - str[count] = EOS; - for (j = count - 1; str[j] != NL && j >= 0;) - j--; - /* - * break up these lines: long line (> BUFSIZE), last line of file, or - * short return from read(), as from tee(1) input - */ - if (j < 0 && (count == (BUFSIZE - nleftover))) { - str[count++] = NL; - str[count] = EOS; - linetemp[0] = EOS; - nleftover = 0; - return (str + count); - } else { - nleftover = count - j - 1; - strncpy(linetemp, str + j + 1, nleftover); - return (str + j); - } -} - -/* - * Process partial match. First check for mis-aligned Kanji, then match line - * against full compiled r.e. if statistics do not warrant handing off to - * standard egrep. - */ -char * -submatch(file, pat, str, strend, k, altindex) - char file[], pat[], str[]; - register char *strend, *k; - int altindex; -{ - register char *s; - char *t, c; - - t = k; - s = ((altflag) ? k - altlen[altindex] + 1 : k - altmin + 1); -#ifndef NOKANJI - c = ((altflag) ? altpat[altindex][0] : pat[0]); - if (c & NONASCII) - if ((s = kanji(str, s, k)) == NULL) - return (++k); /* reject false kanji */ -#endif - do; - while (*s != NL && --s >= str); - k = s + 1; /* now at line start */ - - if (boyonly) - return (gotamatch(file, k)); - - incount = counted - (strend - k); - if (boyfound++ == FIRSTFEW) - execstrategy(file); - - s = t; - do - rxcount++; - while (*s++ != NL); - *--s = EOS; - /* - * "quick henry -- the flit" (after theodor geisel) - */ - if (regexec(rspencer, ((iflag) ? fold(k) : k)) == 1) { - *s = NL; - if (gotamatch(file, k) == NULL) - return (NULL); - } - *s = NL; - return (s + 1); -} - -#ifndef NOKANJI -/* - * EUC code disambiguation -- scan backwards to first 7-bit code, while - * counting intervening 8-bit codes. If odd, reject unaligned Kanji pattern. - * SS2/3 checks are for intermixed Japanase Katakana or Kanji2. - */ -char * -kanji(str, s, k) - register char *str, *s, *k; -{ - register int j = 0; - - for (s--; s >= str; s--) { - if (*s == SS2 || *s == SS3 || (*s & NONASCII) == 0) - break; - j++; - } -#ifndef CHINESE - if (*s == SS2) - j -= 1; -#endif CHINESE - return ((j & 01) ? NULL : k); -} -#endif - -/* - * Compute "Boyer-Moore" delta table -- put skip distance in delta0[c] - */ -gosper(pattern) - char *pattern; /* ... HAKMEM lives ... */ -{ - register int i, j; - unsigned char c; - - /* Make one-string case look like simple alternatives case */ - if (!altflag) { - nalt = 1; - altmin = altlen[0] = strlen(pattern); - altpat[0] = pattern; - } - /* For chars that aren't in any string, skip by string length. */ - for (j = 0; j < 256; j++) { - delta0[j] = altmin; - cmap[j] = j; /* Sneak in initialization of cmap */ - } - - /* For chars in a string, skip distance from char to end of string. */ - /* (If char appears more than once, skip minimum distance.) */ - for (i = 0; i < nalt; i++) - for (j = 0; j < altlen[i] - 1; j++) { - c = altpat[i][j]; - delta0[c] = MIN(delta0[c], altlen[i] - j - 1); - if (iflag && islower((int) c)) - delta0[toupper((int) c)] = delta0[c]; - } - - /* For last char of each string, fall out of search loop. */ - for (i = 0; i < nalt; i++) { - c = altpat[i][altlen[i] - 1]; - delta0[c] = LARGE; - if (iflag && islower((int) c)) - delta0[toupper((int) c)] = LARGE; - } - if (iflag) - for (j = 'A'; j <= 'Z'; j++) - cmap[j] = tolower((int) j); -} - -/* - * Print, count, or stop on full match. Result is either the location for - * continued search, or NULL to stop. - */ -char * -gotamatch(file, s) - register char *file, *s; -{ - char *savematch(); - int squirrel = 0; /* nonzero to squirrel away FIRSTFEW matches */ - - nmatch++; - nsuccess = 1; - if (!boyonly && boyfound <= FIRSTFEW && file != NULL) - squirrel = 1; - - if (sflag) - return (NULL); /* -s usurps all flags (unlike some versions) */ - if (cflag) { /* -c overrides -l, we guess */ - do; - while (*s++ != NL); - } else if (lflag) { - puts(file); - return (NULL); - } else { - if (!hflag) - if (!squirrel) - printf("%s:", file); - else - (void)sprintf(preamble, "%s:", file); - if (nflag) { - if (prevmatch) - prevnline = prevnline + nlcount(prevloc, s); - else - prevnline = nline + nlcount(str, s); - prevmatch = 1; - - if (!squirrel) - printf("%ld:", prevnline); - else - (void)sprintf(preamble + strlen(preamble), - "%ld:", prevnline); - } - if (!squirrel) { - do - putchar(*s); - while (*s++ != NL); - } else - s = savematch(s); - - if (nflag) - prevloc = s - 1; - } - return ((firstflag && !cflag) ? NULL : s); -} - -char * -fold(line) - char *line; -{ - static char fline[BUFSIZE]; - register char *s, *t = fline; - - for (s = line; *s != EOS; s++) - *t++ = (isupper((int) *s) ? (char) tolower((int) *s) : *s); - *t = EOS; - return (fline); -} - -strindex(s, t) /* the easy way, as in K&P, p. 192 */ - char *s, *t; -{ - int i, n; - - n = strlen(t); - for (i = 0; s[i] != '\0'; i++) - if (strncmp(s + i, t, n) == 0) - return (i); - return (-1); -} - -char * -grepxlat(pattern) /* grep pattern meta conversion */ - char *pattern; -{ - register char *p, *s; - static char newpat[BUFSIZE]; - - for (s = newpat, p = pattern; *p != EOS;) { - if (*p == '\\') { /* skip escapes ... */ - *s++ = *p++; - if (*p) - *s++ = *p++; - } else if (*p == '[') { /* ... and char classes */ - while (*p != EOS && *p != ']') - *s++ = *p++; - } else if (strchr("+?|()", *p) != NULL) { - *s++ = '\\'; /* insert protection */ - *s++ = *p++; - } else - *s++ = *p++; - } - *s = EOS; - grepflag = ((patind) ? 0 : 1); - return (newpat); -} - -/* - * Test for simple alternation. Result is NULL if it's not so simple, or is - * a pointer to the first string if it is. Warning: sscanf size is a - * fixpoint, beyond which the speedup linearity starts to break down. In the - * wake of the elegant aho/corrasick "trie"-based fgrep, generalizing - * altpat[] to arbitrary size is not useful. - */ -char * -alternate(regexpr) - char *regexpr; -{ - register int i, j; - register char *start, *stop; - unsigned char c; - - if (fgrepflag && strchr(regexpr, '|')) - return (NULL); - - /* - * break pattern up into altpat array; delimit on newline, bar, - * or EOS. We know we won't overflow, we've already checked the - * number of patterns we're going to find against NALT. - * Also, set length of pattern and find minimum pattern length. - */ - nalt = 0; - altmin = NMUSH; - for (start = stop = regexpr;; ++stop) - if (!*stop || *stop == '|' || *stop == NL) { - altlen[nalt] = j = stop - start; - if (j < altmin) - altmin = j; - if (!(altpat[nalt] = malloc((u_int)(j + 1)))) - oops("out of memory"); - bcopy(start, altpat[nalt], j); - altpat[nalt][j] = EOS; - ++nalt; - if (!*stop) - break; - if (nalt == NALT) - return(NULL); - if (*stop == NL) - *stop = '|'; - start = stop + 1; - } - if (!fgrepflag) { - if (strchr(regexpr, '|') == NULL || regexpr[0] == '|') - return (NULL); - if (strpbrk(regexpr, "^$.[]()?+*\\") != NULL - || strindex(regexpr, "||") >= 0) - return (NULL); - } - - if (nalt > 1) { /* build superimposed "pre-match" sets per - * char */ - altflag++; - for (j = 0; j < nalt; j++) - for (i = 0; i < altmin; i++) { - c = altpat[j][altlen[j] - altmin + i]; - altset[i + 1][c] = 1; /* offset for sentinel */ - } - } - return (altpat[0]); -} - -/* - * Grapple with the dfa (std egrep) vs. ndfa (regexp) tradeoff. Criteria to - * determine whether to use dfa-based egrep: We do FIRSTFEW matches with - * regexec(). If Boyer-Moore up to now matched more than PUNTPERCENT - * of the input, the r.e. is likely to be underspecified, so do old *grep, - * which is faster on complex patterns than regexp(). At FIRSTFEW, - * dump the saved matches collected by savematch(). They are saved - * so that a "PUNT" can "rewind" to ignore them. Stdin is problematic, - * since it's hard to rewind. - */ - -execstrategy(file) - char *file; -{ - int pctmatch; - - pctmatch = (100 * rxcount) / incount; - if (pctmatch > PUNTPERCENT && file != NULL) - kernighan(args); - if (file != NULL) - flushmatches(); -} - -nlcount(bstart, bstop) /* flail interval to totalize newlines. */ - char *bstart, *bstop; -{ - register char *s = bstart; - register char *t = bstop; - register int count = 0; - - do { /* loop unroll for older architectures */ - if (*t == NL) /* ... ask ames!jaw for sample code */ - count++; - } while (t-- > s); - - return (count); -} - -char * -isolate(regexpr) /* isolate longest metacharacter-free string */ - char *regexpr; -{ - char *dummyexpr; - - /* - * We add (.)* because Henry's regcomp only figures regmust if it - * sees a leading * pattern. Foo! - */ - dummyexpr = malloc((unsigned) strlen(regexpr) + 5); - (void)sprintf(dummyexpr, "(.)*%s", regexpr); - if ((rspencer = regcomp(dummyexpr)) == NULL) - kernighan(args); - return (rspencer->regmust); -} - -char *matches[FIRSTFEW]; -static int mcount = 0; - -char * -savematch(s) /* horde matches during statistics gathering */ - register char *s; -{ - char *p; - char *start = s; - int msize = 0; - int psize = strlen(preamble); - - while (*s++ != NL) - msize++; - *--s = EOS; - - p = malloc((unsigned) msize + 1 + psize); - strcpy(p, preamble); - strcpy(p + psize, start); - matches[mcount++] = p; - - preamble[0] = 0; - *s = NL; - return (s); -} - -flushmatches() -{ - int n; - - flushflag = 1; - for (n = 0; n < mcount; n++) - printf("%s\n", matches[n]); - mcount = 0; -} - -oops(message) - char *message; -{ - fprintf(stderr, "%s: %s\n", progname, message); - exit(2); -} - -kernighan(args) /* "let others do the hard part ..." */ - char *args[]; -{ - /* - * We may have already run grep on some of the files; remove them - * from the arg list we pass on. Note that we can't delete them - * totally because the number of file names affects the output - * (automatic -h). - */ - /* better would be fork/exec per punted file -- jaw */ - - while (firstfile && optind > firstfile) - args[firstfile++] = _PATH_DEVNULL; - if (patind) - args[patind] = pattern; - (void) fflush(stdout); - - if (grepflag) - execvp(_PATH_GREPSTD, args), oops("can't exec old 'grep'"); - else if (fgrepflag) - execvp(_PATH_FGREPSTD, args), oops("can't exec old 'fgrep'"); - else - execvp(_PATH_EGREPSTD, args), oops("can't exec old 'egrep'"); -} |