summaryrefslogtreecommitdiffstats
path: root/usr.bin/grep/grep.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/grep/grep.c')
-rw-r--r--usr.bin/grep/grep.c128
1 files changed, 76 insertions, 52 deletions
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 73e1d3d..e0f1d71 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <ctype.h>
#include <err.h>
#include <errno.h>
+#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <libgen.h>
@@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
+#include "fastmatch.h"
#include "grep.h"
#ifndef WITHOUT_NLS
@@ -81,9 +83,9 @@ bool matchall;
/* Searching patterns */
unsigned int patterns, pattern_sz;
-char **pattern;
+struct pat *pattern;
regex_t *r_pattern;
-fastgrep_t *fg_pattern;
+fastmatch_t *fg_pattern;
/* Filename exclusion/inclusion patterns */
unsigned int fpatterns, fpattern_sz;
@@ -104,7 +106,7 @@ bool hflag; /* -h: don't print filename headers */
bool iflag; /* -i: ignore case */
bool lflag; /* -l: only show names of files with matches */
bool mflag; /* -m x: stop reading the files after x matches */
-unsigned long long mcount; /* count for -m */
+long long mcount; /* count for -m */
bool nflag; /* -n: show line numbers in front of matching lines */
bool oflag; /* -o: print only matching part */
bool qflag; /* -q: quiet mode (don't output anything) */
@@ -164,7 +166,7 @@ usage(void)
exit(2);
}
-static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
+static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
struct option long_options[] =
{
@@ -200,6 +202,7 @@ struct option long_options[] =
{"files-with-matches", no_argument, NULL, 'l'},
{"files-without-match", no_argument, NULL, 'L'},
{"max-count", required_argument, NULL, 'm'},
+ {"lzma", no_argument, NULL, 'M'},
{"line-number", no_argument, NULL, 'n'},
{"only-matching", no_argument, NULL, 'o'},
{"quiet", no_argument, NULL, 'q'},
@@ -212,6 +215,7 @@ struct option long_options[] =
{"version", no_argument, NULL, 'V'},
{"word-regexp", no_argument, NULL, 'w'},
{"line-regexp", no_argument, NULL, 'x'},
+ {"xz", no_argument, NULL, 'X'},
{"decompress", no_argument, NULL, 'Z'},
{NULL, no_argument, NULL, 0}
};
@@ -223,23 +227,35 @@ static void
add_pattern(char *pat, size_t len)
{
+ /* Do not add further pattern is we already match everything */
+ if (matchall)
+ return;
+
/* Check if we can do a shortcut */
- if (len == 0 || matchall) {
+ if (len == 0) {
matchall = true;
+ for (unsigned int i = 0; i < patterns; i++) {
+ free(pattern[i].pat);
+ }
+ pattern = grep_realloc(pattern, sizeof(struct pat));
+ pattern[0].pat = NULL;
+ pattern[0].len = 0;
+ patterns = 1;
return;
}
/* Increase size if necessary */
if (patterns == pattern_sz) {
pattern_sz *= 2;
pattern = grep_realloc(pattern, ++pattern_sz *
- sizeof(*pattern));
+ sizeof(struct pat));
}
if (len > 0 && pat[len - 1] == '\n')
--len;
/* pat may not be NUL-terminated */
- pattern[patterns] = grep_malloc(len + 1);
- memcpy(pattern[patterns], pat, len);
- pattern[patterns][len] = '\0';
+ pattern[patterns].pat = grep_malloc(len + 1);
+ memcpy(pattern[patterns].pat, pat, len);
+ pattern[patterns].len = len;
+ pattern[patterns].pat[len] = '\0';
++patterns;
}
@@ -285,14 +301,19 @@ add_dpattern(const char *pat, int mode)
static void
read_patterns(const char *fn)
{
+ struct stat st;
FILE *f;
char *line;
size_t len;
if ((f = fopen(fn, "r")) == NULL)
err(2, "%s", fn);
- while ((line = fgetln(f, &len)) != NULL)
- add_pattern(line, *line == '\n' ? 0 : len);
+ if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
+ fclose(f);
+ return;
+ }
+ while ((line = fgetln(f, &len)) != NULL)
+ add_pattern(line, line[0] == '\n' ? 0 : len);
if (ferror(f))
err(2, "%s", fn);
fclose(f);
@@ -311,7 +332,7 @@ int
main(int argc, char *argv[])
{
char **aargv, **eargv, *eopts;
- char *ep;
+ char *pn, *ep;
unsigned long long l;
unsigned int aargc, eargc, i;
int c, lastc, needpattern, newarg, prevoptind;
@@ -325,30 +346,27 @@ main(int argc, char *argv[])
/* Check what is the program name of the binary. In this
way we can have all the funcionalities in one binary
without the need of scripting and using ugly hacks. */
- switch (__progname[0]) {
+ pn = __progname;
+ if (pn[0] == 'b' && pn[1] == 'z') {
+ filebehave = FILE_BZIP;
+ pn += 2;
+ } else if (pn[0] == 'x' && pn[1] == 'z') {
+ filebehave = FILE_XZ;
+ pn += 2;
+ } else if (pn[0] == 'l' && pn[1] == 'z') {
+ filebehave = FILE_LZMA;
+ pn += 2;
+ } else if (pn[0] == 'z') {
+ filebehave = FILE_GZIP;
+ pn += 1;
+ }
+ switch (pn[0]) {
case 'e':
grepbehave = GREP_EXTENDED;
break;
case 'f':
grepbehave = GREP_FIXED;
break;
- case 'g':
- grepbehave = GREP_BASIC;
- break;
- case 'z':
- filebehave = FILE_GZIP;
- switch(__progname[1]) {
- case 'e':
- grepbehave = GREP_EXTENDED;
- break;
- case 'f':
- grepbehave = GREP_FIXED;
- break;
- case 'g':
- grepbehave = GREP_BASIC;
- break;
- }
- break;
}
lastc = '\0';
@@ -503,8 +521,8 @@ main(int argc, char *argv[])
case 'm':
mflag = true;
errno = 0;
- mcount = strtoull(optarg, &ep, 10);
- if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
+ mcount = strtoll(optarg, &ep, 10);
+ if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
((errno == EINVAL) && (mcount == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
@@ -512,6 +530,9 @@ main(int argc, char *argv[])
err(2, NULL);
}
break;
+ case 'M':
+ filebehave = FILE_LZMA;
+ break;
case 'n':
nflag = true;
break;
@@ -544,7 +565,7 @@ main(int argc, char *argv[])
break;
case 'u':
case MMAP_OPT:
- /* noop, compatibility */
+ filebehave = FILE_MMAP;
break;
case 'V':
printf(getstr(9), __progname, VERSION);
@@ -560,6 +581,9 @@ main(int argc, char *argv[])
xflag = true;
cflags &= ~REG_NOSUB;
break;
+ case 'X':
+ filebehave = FILE_XZ;
+ break;
case 'Z':
filebehave = FILE_GZIP;
break;
@@ -630,6 +654,10 @@ main(int argc, char *argv[])
aargc -= optind;
aargv += optind;
+ /* Empty pattern file matches nothing */
+ if (!needpattern && (patterns == 0))
+ exit(1);
+
/* Fail if we don't have any pattern */
if (aargc == 0 && needpattern)
usage();
@@ -642,9 +670,12 @@ main(int argc, char *argv[])
}
switch (grepbehave) {
- case GREP_FIXED:
case GREP_BASIC:
break;
+ case GREP_FIXED:
+ /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
+ cflags |= 0020;
+ break;
case GREP_EXTENDED:
cflags |= REG_EXTENDED;
break;
@@ -655,24 +686,17 @@ main(int argc, char *argv[])
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
-/*
- * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
- * Optimizations should be done there.
- */
- /* Check if cheating is allowed (always is for fgrep). */
- if (grepbehave == GREP_FIXED) {
- for (i = 0; i < patterns; ++i)
- fgrepcomp(&fg_pattern[i], pattern[i]);
- } else {
- for (i = 0; i < patterns; ++i) {
- if (fastcomp(&fg_pattern[i], pattern[i])) {
- /* Fall back to full regex library */
- c = regcomp(&r_pattern[i], pattern[i], cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
- }
+
+ /* Check if cheating is allowed (always is for fgrep). */
+ for (i = 0; i < patterns; ++i) {
+ if (fastncomp(&fg_pattern[i], pattern[i].pat,
+ pattern[i].len, cflags) != 0) {
+ /* Fall back to full regex library */
+ c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
}
}
}
OpenPOWER on IntegriCloud