summaryrefslogtreecommitdiffstats
path: root/usr.bin/grep/grep.c
diff options
context:
space:
mode:
authorgabor <gabor@FreeBSD.org>2011-10-05 09:56:43 +0000
committergabor <gabor@FreeBSD.org>2011-10-05 09:56:43 +0000
commit1cb16d98872b7dcec16ad22150c6a1c95e4a0796 (patch)
tree23f8a074a9eca70aa3ae8f97192648beb5aa6572 /usr.bin/grep/grep.c
parent58a76bb04f6febb9b77e88729b156b605c0be242 (diff)
downloadFreeBSD-src-1cb16d98872b7dcec16ad22150c6a1c95e4a0796.zip
FreeBSD-src-1cb16d98872b7dcec16ad22150c6a1c95e4a0796.tar.gz
Update BSD grep to the latest development version. It has some code
backported that was written for the TRE integration project in Google Summer of Code 2011. This is a temporary solution until the whole regex library is not replaced so that BSD grep development can continue and the backported code gets some review and testing. This change only improves scalability slightly, there is no big performance boost yet but several minor bugs have been found and fixed. Approved by: delphij (mentor) Sposored by: Google Summer of Code 2011 MFC after: 1 week
Diffstat (limited to 'usr.bin/grep/grep.c')
-rw-r--r--usr.bin/grep/grep.c128
1 files changed, 76 insertions, 52 deletions
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 73e1d3d..e0f1d71 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <ctype.h>
#include <err.h>
#include <errno.h>
+#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <libgen.h>
@@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
+#include "fastmatch.h"
#include "grep.h"
#ifndef WITHOUT_NLS
@@ -81,9 +83,9 @@ bool matchall;
/* Searching patterns */
unsigned int patterns, pattern_sz;
-char **pattern;
+struct pat *pattern;
regex_t *r_pattern;
-fastgrep_t *fg_pattern;
+fastmatch_t *fg_pattern;
/* Filename exclusion/inclusion patterns */
unsigned int fpatterns, fpattern_sz;
@@ -104,7 +106,7 @@ bool hflag; /* -h: don't print filename headers */
bool iflag; /* -i: ignore case */
bool lflag; /* -l: only show names of files with matches */
bool mflag; /* -m x: stop reading the files after x matches */
-unsigned long long mcount; /* count for -m */
+long long mcount; /* count for -m */
bool nflag; /* -n: show line numbers in front of matching lines */
bool oflag; /* -o: print only matching part */
bool qflag; /* -q: quiet mode (don't output anything) */
@@ -164,7 +166,7 @@ usage(void)
exit(2);
}
-static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
+static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
struct option long_options[] =
{
@@ -200,6 +202,7 @@ struct option long_options[] =
{"files-with-matches", no_argument, NULL, 'l'},
{"files-without-match", no_argument, NULL, 'L'},
{"max-count", required_argument, NULL, 'm'},
+ {"lzma", no_argument, NULL, 'M'},
{"line-number", no_argument, NULL, 'n'},
{"only-matching", no_argument, NULL, 'o'},
{"quiet", no_argument, NULL, 'q'},
@@ -212,6 +215,7 @@ struct option long_options[] =
{"version", no_argument, NULL, 'V'},
{"word-regexp", no_argument, NULL, 'w'},
{"line-regexp", no_argument, NULL, 'x'},
+ {"xz", no_argument, NULL, 'X'},
{"decompress", no_argument, NULL, 'Z'},
{NULL, no_argument, NULL, 0}
};
@@ -223,23 +227,35 @@ static void
add_pattern(char *pat, size_t len)
{
+ /* Do not add further pattern is we already match everything */
+ if (matchall)
+ return;
+
/* Check if we can do a shortcut */
- if (len == 0 || matchall) {
+ if (len == 0) {
matchall = true;
+ for (unsigned int i = 0; i < patterns; i++) {
+ free(pattern[i].pat);
+ }
+ pattern = grep_realloc(pattern, sizeof(struct pat));
+ pattern[0].pat = NULL;
+ pattern[0].len = 0;
+ patterns = 1;
return;
}
/* Increase size if necessary */
if (patterns == pattern_sz) {
pattern_sz *= 2;
pattern = grep_realloc(pattern, ++pattern_sz *
- sizeof(*pattern));
+ sizeof(struct pat));
}
if (len > 0 && pat[len - 1] == '\n')
--len;
/* pat may not be NUL-terminated */
- pattern[patterns] = grep_malloc(len + 1);
- memcpy(pattern[patterns], pat, len);
- pattern[patterns][len] = '\0';
+ pattern[patterns].pat = grep_malloc(len + 1);
+ memcpy(pattern[patterns].pat, pat, len);
+ pattern[patterns].len = len;
+ pattern[patterns].pat[len] = '\0';
++patterns;
}
@@ -285,14 +301,19 @@ add_dpattern(const char *pat, int mode)
static void
read_patterns(const char *fn)
{
+ struct stat st;
FILE *f;
char *line;
size_t len;
if ((f = fopen(fn, "r")) == NULL)
err(2, "%s", fn);
- while ((line = fgetln(f, &len)) != NULL)
- add_pattern(line, *line == '\n' ? 0 : len);
+ if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
+ fclose(f);
+ return;
+ }
+ while ((line = fgetln(f, &len)) != NULL)
+ add_pattern(line, line[0] == '\n' ? 0 : len);
if (ferror(f))
err(2, "%s", fn);
fclose(f);
@@ -311,7 +332,7 @@ int
main(int argc, char *argv[])
{
char **aargv, **eargv, *eopts;
- char *ep;
+ char *pn, *ep;
unsigned long long l;
unsigned int aargc, eargc, i;
int c, lastc, needpattern, newarg, prevoptind;
@@ -325,30 +346,27 @@ main(int argc, char *argv[])
/* Check what is the program name of the binary. In this
way we can have all the funcionalities in one binary
without the need of scripting and using ugly hacks. */
- switch (__progname[0]) {
+ pn = __progname;
+ if (pn[0] == 'b' && pn[1] == 'z') {
+ filebehave = FILE_BZIP;
+ pn += 2;
+ } else if (pn[0] == 'x' && pn[1] == 'z') {
+ filebehave = FILE_XZ;
+ pn += 2;
+ } else if (pn[0] == 'l' && pn[1] == 'z') {
+ filebehave = FILE_LZMA;
+ pn += 2;
+ } else if (pn[0] == 'z') {
+ filebehave = FILE_GZIP;
+ pn += 1;
+ }
+ switch (pn[0]) {
case 'e':
grepbehave = GREP_EXTENDED;
break;
case 'f':
grepbehave = GREP_FIXED;
break;
- case 'g':
- grepbehave = GREP_BASIC;
- break;
- case 'z':
- filebehave = FILE_GZIP;
- switch(__progname[1]) {
- case 'e':
- grepbehave = GREP_EXTENDED;
- break;
- case 'f':
- grepbehave = GREP_FIXED;
- break;
- case 'g':
- grepbehave = GREP_BASIC;
- break;
- }
- break;
}
lastc = '\0';
@@ -503,8 +521,8 @@ main(int argc, char *argv[])
case 'm':
mflag = true;
errno = 0;
- mcount = strtoull(optarg, &ep, 10);
- if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
+ mcount = strtoll(optarg, &ep, 10);
+ if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
((errno == EINVAL) && (mcount == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
@@ -512,6 +530,9 @@ main(int argc, char *argv[])
err(2, NULL);
}
break;
+ case 'M':
+ filebehave = FILE_LZMA;
+ break;
case 'n':
nflag = true;
break;
@@ -544,7 +565,7 @@ main(int argc, char *argv[])
break;
case 'u':
case MMAP_OPT:
- /* noop, compatibility */
+ filebehave = FILE_MMAP;
break;
case 'V':
printf(getstr(9), __progname, VERSION);
@@ -560,6 +581,9 @@ main(int argc, char *argv[])
xflag = true;
cflags &= ~REG_NOSUB;
break;
+ case 'X':
+ filebehave = FILE_XZ;
+ break;
case 'Z':
filebehave = FILE_GZIP;
break;
@@ -630,6 +654,10 @@ main(int argc, char *argv[])
aargc -= optind;
aargv += optind;
+ /* Empty pattern file matches nothing */
+ if (!needpattern && (patterns == 0))
+ exit(1);
+
/* Fail if we don't have any pattern */
if (aargc == 0 && needpattern)
usage();
@@ -642,9 +670,12 @@ main(int argc, char *argv[])
}
switch (grepbehave) {
- case GREP_FIXED:
case GREP_BASIC:
break;
+ case GREP_FIXED:
+ /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
+ cflags |= 0020;
+ break;
case GREP_EXTENDED:
cflags |= REG_EXTENDED;
break;
@@ -655,24 +686,17 @@ main(int argc, char *argv[])
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
-/*
- * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
- * Optimizations should be done there.
- */
- /* Check if cheating is allowed (always is for fgrep). */
- if (grepbehave == GREP_FIXED) {
- for (i = 0; i < patterns; ++i)
- fgrepcomp(&fg_pattern[i], pattern[i]);
- } else {
- for (i = 0; i < patterns; ++i) {
- if (fastcomp(&fg_pattern[i], pattern[i])) {
- /* Fall back to full regex library */
- c = regcomp(&r_pattern[i], pattern[i], cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
- }
+
+ /* Check if cheating is allowed (always is for fgrep). */
+ for (i = 0; i < patterns; ++i) {
+ if (fastncomp(&fg_pattern[i], pattern[i].pat,
+ pattern[i].len, cflags) != 0) {
+ /* Fall back to full regex library */
+ c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
}
}
}
OpenPOWER on IntegriCloud