diff options
Diffstat (limited to 'usr.bin/grep/grep.c')
-rw-r--r-- | usr.bin/grep/grep.c | 128 |
1 files changed, 76 insertions, 52 deletions
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c index 73e1d3d..e0f1d71 100644 --- a/usr.bin/grep/grep.c +++ b/usr.bin/grep/grep.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <ctype.h> #include <err.h> #include <errno.h> +#include <fcntl.h> #include <getopt.h> #include <limits.h> #include <libgen.h> @@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <string.h> #include <unistd.h> +#include "fastmatch.h" #include "grep.h" #ifndef WITHOUT_NLS @@ -81,9 +83,9 @@ bool matchall; /* Searching patterns */ unsigned int patterns, pattern_sz; -char **pattern; +struct pat *pattern; regex_t *r_pattern; -fastgrep_t *fg_pattern; +fastmatch_t *fg_pattern; /* Filename exclusion/inclusion patterns */ unsigned int fpatterns, fpattern_sz; @@ -104,7 +106,7 @@ bool hflag; /* -h: don't print filename headers */ bool iflag; /* -i: ignore case */ bool lflag; /* -l: only show names of files with matches */ bool mflag; /* -m x: stop reading the files after x matches */ -unsigned long long mcount; /* count for -m */ +long long mcount; /* count for -m */ bool nflag; /* -n: show line numbers in front of matching lines */ bool oflag; /* -o: print only matching part */ bool qflag; /* -q: quiet mode (don't output anything) */ @@ -164,7 +166,7 @@ usage(void) exit(2); } -static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy"; +static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy"; struct option long_options[] = { @@ -200,6 +202,7 @@ struct option long_options[] = {"files-with-matches", no_argument, NULL, 'l'}, {"files-without-match", no_argument, NULL, 'L'}, {"max-count", required_argument, NULL, 'm'}, + {"lzma", no_argument, NULL, 'M'}, {"line-number", no_argument, NULL, 'n'}, {"only-matching", no_argument, NULL, 'o'}, {"quiet", no_argument, NULL, 'q'}, @@ -212,6 +215,7 @@ struct option long_options[] = {"version", no_argument, NULL, 'V'}, {"word-regexp", no_argument, NULL, 'w'}, {"line-regexp", no_argument, NULL, 'x'}, + {"xz", no_argument, NULL, 'X'}, {"decompress", no_argument, NULL, 'Z'}, {NULL, no_argument, NULL, 0} }; @@ -223,23 +227,35 @@ static void add_pattern(char *pat, size_t len) { + /* Do not add further pattern is we already match everything */ + if (matchall) + return; + /* Check if we can do a shortcut */ - if (len == 0 || matchall) { + if (len == 0) { matchall = true; + for (unsigned int i = 0; i < patterns; i++) { + free(pattern[i].pat); + } + pattern = grep_realloc(pattern, sizeof(struct pat)); + pattern[0].pat = NULL; + pattern[0].len = 0; + patterns = 1; return; } /* Increase size if necessary */ if (patterns == pattern_sz) { pattern_sz *= 2; pattern = grep_realloc(pattern, ++pattern_sz * - sizeof(*pattern)); + sizeof(struct pat)); } if (len > 0 && pat[len - 1] == '\n') --len; /* pat may not be NUL-terminated */ - pattern[patterns] = grep_malloc(len + 1); - memcpy(pattern[patterns], pat, len); - pattern[patterns][len] = '\0'; + pattern[patterns].pat = grep_malloc(len + 1); + memcpy(pattern[patterns].pat, pat, len); + pattern[patterns].len = len; + pattern[patterns].pat[len] = '\0'; ++patterns; } @@ -285,14 +301,19 @@ add_dpattern(const char *pat, int mode) static void read_patterns(const char *fn) { + struct stat st; FILE *f; char *line; size_t len; if ((f = fopen(fn, "r")) == NULL) err(2, "%s", fn); - while ((line = fgetln(f, &len)) != NULL) - add_pattern(line, *line == '\n' ? 0 : len); + if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) { + fclose(f); + return; + } + while ((line = fgetln(f, &len)) != NULL) + add_pattern(line, line[0] == '\n' ? 0 : len); if (ferror(f)) err(2, "%s", fn); fclose(f); @@ -311,7 +332,7 @@ int main(int argc, char *argv[]) { char **aargv, **eargv, *eopts; - char *ep; + char *pn, *ep; unsigned long long l; unsigned int aargc, eargc, i; int c, lastc, needpattern, newarg, prevoptind; @@ -325,30 +346,27 @@ main(int argc, char *argv[]) /* Check what is the program name of the binary. In this way we can have all the funcionalities in one binary without the need of scripting and using ugly hacks. */ - switch (__progname[0]) { + pn = __progname; + if (pn[0] == 'b' && pn[1] == 'z') { + filebehave = FILE_BZIP; + pn += 2; + } else if (pn[0] == 'x' && pn[1] == 'z') { + filebehave = FILE_XZ; + pn += 2; + } else if (pn[0] == 'l' && pn[1] == 'z') { + filebehave = FILE_LZMA; + pn += 2; + } else if (pn[0] == 'z') { + filebehave = FILE_GZIP; + pn += 1; + } + switch (pn[0]) { case 'e': grepbehave = GREP_EXTENDED; break; case 'f': grepbehave = GREP_FIXED; break; - case 'g': - grepbehave = GREP_BASIC; - break; - case 'z': - filebehave = FILE_GZIP; - switch(__progname[1]) { - case 'e': - grepbehave = GREP_EXTENDED; - break; - case 'f': - grepbehave = GREP_FIXED; - break; - case 'g': - grepbehave = GREP_BASIC; - break; - } - break; } lastc = '\0'; @@ -503,8 +521,8 @@ main(int argc, char *argv[]) case 'm': mflag = true; errno = 0; - mcount = strtoull(optarg, &ep, 10); - if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || + mcount = strtoll(optarg, &ep, 10); + if (((errno == ERANGE) && (mcount == LLONG_MAX)) || ((errno == EINVAL) && (mcount == 0))) err(2, NULL); else if (ep[0] != '\0') { @@ -512,6 +530,9 @@ main(int argc, char *argv[]) err(2, NULL); } break; + case 'M': + filebehave = FILE_LZMA; + break; case 'n': nflag = true; break; @@ -544,7 +565,7 @@ main(int argc, char *argv[]) break; case 'u': case MMAP_OPT: - /* noop, compatibility */ + filebehave = FILE_MMAP; break; case 'V': printf(getstr(9), __progname, VERSION); @@ -560,6 +581,9 @@ main(int argc, char *argv[]) xflag = true; cflags &= ~REG_NOSUB; break; + case 'X': + filebehave = FILE_XZ; + break; case 'Z': filebehave = FILE_GZIP; break; @@ -630,6 +654,10 @@ main(int argc, char *argv[]) aargc -= optind; aargv += optind; + /* Empty pattern file matches nothing */ + if (!needpattern && (patterns == 0)) + exit(1); + /* Fail if we don't have any pattern */ if (aargc == 0 && needpattern) usage(); @@ -642,9 +670,12 @@ main(int argc, char *argv[]) } switch (grepbehave) { - case GREP_FIXED: case GREP_BASIC: break; + case GREP_FIXED: + /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */ + cflags |= 0020; + break; case GREP_EXTENDED: cflags |= REG_EXTENDED; break; @@ -655,24 +686,17 @@ main(int argc, char *argv[]) fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); -/* - * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. - * Optimizations should be done there. - */ - /* Check if cheating is allowed (always is for fgrep). */ - if (grepbehave == GREP_FIXED) { - for (i = 0; i < patterns; ++i) - fgrepcomp(&fg_pattern[i], pattern[i]); - } else { - for (i = 0; i < patterns; ++i) { - if (fastcomp(&fg_pattern[i], pattern[i])) { - /* Fall back to full regex library */ - c = regcomp(&r_pattern[i], pattern[i], cflags); - if (c != 0) { - regerror(c, &r_pattern[i], re_error, - RE_ERROR_BUF); - errx(2, "%s", re_error); - } + + /* Check if cheating is allowed (always is for fgrep). */ + for (i = 0; i < patterns; ++i) { + if (fastncomp(&fg_pattern[i], pattern[i].pat, + pattern[i].len, cflags) != 0) { + /* Fall back to full regex library */ + c = regcomp(&r_pattern[i], pattern[i].pat, cflags); + if (c != 0) { + regerror(c, &r_pattern[i], re_error, + RE_ERROR_BUF); + errx(2, "%s", re_error); } } } |