summaryrefslogtreecommitdiffstats
path: root/usr.bin/grep
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/grep')
-rw-r--r--usr.bin/grep/grep.c40
-rw-r--r--usr.bin/grep/grep.h7
-rw-r--r--usr.bin/grep/queue.c9
-rw-r--r--usr.bin/grep/tests/Makefile4
-rw-r--r--usr.bin/grep/util.c379
5 files changed, 256 insertions, 183 deletions
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 8b4ff6b..7bebe02 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -82,7 +82,13 @@ const char *errstr[] = {
int cflags = REG_NOSUB;
int eflags = REG_STARTEND;
-/* Shortcut for matching all cases like empty regex */
+/* XXX TODO: Get rid of this flag.
+ * matchall is a gross hack that means that an empty pattern was passed to us.
+ * It is a necessary evil at the moment because our regex(3) implementation
+ * does not allow for empty patterns, as supported by POSIX's definition of
+ * grammar for BREs/EREs. When libregex becomes available, it would be wise
+ * to remove this and let regex(3) handle the dirty details of empty patterns.
+ */
bool matchall;
/* Searching patterns */
@@ -154,9 +160,6 @@ enum {
static inline const char *init_color(const char *);
/* Housekeeping */
-bool first = true; /* flag whether we are processing the first match */
-bool prev; /* flag whether or not the previous line matched */
-int tail; /* lines left to print */
bool file_err; /* file reading error */
/*
@@ -730,20 +733,25 @@ main(int argc, char *argv[])
#endif
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
- /* Check if cheating is allowed (always is for fgrep). */
- for (i = 0; i < patterns; ++i) {
+ /* Don't process any patterns if we have a blank one */
+ if (!matchall) {
+ /* Check if cheating is allowed (always is for fgrep). */
+ for (i = 0; i < patterns; ++i) {
#ifndef WITHOUT_FASTMATCH
- /* Attempt compilation with fastmatch regex and fallback to
- regex(3) if it fails. */
- if (fastncomp(&fg_pattern[i], pattern[i].pat,
- pattern[i].len, cflags) == 0)
- continue;
+ /*
+ * Attempt compilation with fastmatch regex and
+ * fallback to regex(3) if it fails.
+ */
+ if (fastncomp(&fg_pattern[i], pattern[i].pat,
+ pattern[i].len, cflags) == 0)
+ continue;
#endif
- c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
+ c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
+ }
}
}
diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h
index ea17af40..56ce456 100644
--- a/usr.bin/grep/grep.h
+++ b/usr.bin/grep/grep.h
@@ -123,8 +123,7 @@ extern char *label;
extern const char *color;
extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave;
-extern bool file_err, first, matchall, prev;
-extern int tail;
+extern bool file_err, matchall;
extern unsigned int dpatterns, fpatterns, patterns;
extern struct pat *pattern;
extern struct epat *dpattern, *fpattern;
@@ -145,10 +144,10 @@ void *grep_malloc(size_t size);
void *grep_calloc(size_t nmemb, size_t size);
void *grep_realloc(void *ptr, size_t size);
char *grep_strdup(const char *str);
-void printline(struct str *line, int sep, regmatch_t *matches, int m);
+void grep_printline(struct str *line, int sep);
/* queue.c */
-void enqueue(struct str *x);
+bool enqueue(struct str *x);
void printqueue(void);
void clearqueue(void);
diff --git a/usr.bin/grep/queue.c b/usr.bin/grep/queue.c
index 1887888..4531833 100644
--- a/usr.bin/grep/queue.c
+++ b/usr.bin/grep/queue.c
@@ -53,7 +53,10 @@ static unsigned long long count;
static struct qentry *dequeue(void);
-void
+/*
+ * Enqueue another line; return true if we've dequeued a line as a result
+ */
+bool
enqueue(struct str *x)
{
struct qentry *item;
@@ -72,7 +75,9 @@ enqueue(struct str *x)
item = dequeue();
free(item->data.dat);
free(item);
+ return (true);
}
+ return (false);
}
static struct qentry *
@@ -95,7 +100,7 @@ printqueue(void)
struct qentry *item;
while ((item = dequeue()) != NULL) {
- printline(&item->data, '-', NULL, 0);
+ grep_printline(&item->data, '-');
free(item->data.dat);
free(item);
}
diff --git a/usr.bin/grep/tests/Makefile b/usr.bin/grep/tests/Makefile
index f383aa2..f0c0c86 100644
--- a/usr.bin/grep/tests/Makefile
+++ b/usr.bin/grep/tests/Makefile
@@ -20,9 +20,13 @@ ${PACKAGE}FILES+= d_context2_c.out
${PACKAGE}FILES+= d_context_a.in
${PACKAGE}FILES+= d_context_a.out
${PACKAGE}FILES+= d_context_b.in
+${PACKAGE}FILES+= d_context_e.in
${PACKAGE}FILES+= d_context_b.out
${PACKAGE}FILES+= d_context_c.out
${PACKAGE}FILES+= d_context_d.out
+${PACKAGE}FILES+= d_context_e.out
+${PACKAGE}FILES+= d_context_f.out
+${PACKAGE}FILES+= d_context_g.out
${PACKAGE}FILES+= d_egrep.out
${PACKAGE}FILES+= d_escmap.in
${PACKAGE}FILES+= d_f_file_empty.in
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index dd9a753..cfcc72d 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -54,11 +54,23 @@ __FBSDID("$FreeBSD$");
#endif
#include "grep.h"
-static int linesqueued;
-static int procline(struct str *l, int);
+static bool first_match = true;
+
+/*
+ * Parsing context; used to hold things like matches made and
+ * other useful bits
+ */
+struct parsec {
+ regmatch_t matches[MAX_LINE_MATCHES]; /* Matches made */
+ struct str ln; /* Current line */
+ size_t matchidx; /* Latest used match index */
+ bool binary; /* Binary file? */
+};
-static int lasta;
-static bool ctxover;
+
+static int procline(struct parsec *pc);
+static void printline(struct parsec *pc, int sep);
+static void printline_metadata(struct str *line, int sep);
bool
file_matching(const char *fname)
@@ -183,13 +195,13 @@ grep_tree(char **argv)
int
procfile(const char *fn)
{
+ struct parsec pc;
struct file *f;
struct stat sb;
- struct str ln;
+ struct str *ln;
mode_t s;
- int c, t;
-
- mcount = mlimit;
+ int c, last_outed, t, tail;
+ bool doctx, printmatch, same_file;
if (strcmp(fn, "-") == 0) {
fn = label != NULL ? label : getstr(1);
@@ -213,57 +225,99 @@ procfile(const char *fn)
return (0);
}
- ln.file = grep_malloc(strlen(fn) + 1);
- strcpy(ln.file, fn);
- ln.line_no = 0;
- ln.len = 0;
- ctxover = false;
- linesqueued = 0;
+ /* Convenience */
+ ln = &pc.ln;
+ pc.ln.file = grep_malloc(strlen(fn) + 1);
+ strcpy(pc.ln.file, fn);
+ pc.ln.line_no = 0;
+ pc.ln.len = 0;
+ pc.ln.off = -1;
+ pc.binary = f->binary;
tail = 0;
- lasta = 0;
- ln.off = -1;
+ last_outed = 0;
+ same_file = false;
+ doctx = false;
+ printmatch = true;
+ if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag ||
+ lflag || Lflag)
+ printmatch = false;
+ if (printmatch && (Aflag != 0 || Bflag != 0))
+ doctx = true;
+ mcount = mlimit;
for (c = 0; c == 0 || !(lflag || qflag); ) {
- ln.off += ln.len + 1;
- if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
- if (ln.line_no == 0 && matchall)
- exit(0);
+ /* Reset match count for every line processed */
+ pc.matchidx = 0;
+ pc.ln.off += pc.ln.len + 1;
+ if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL ||
+ pc.ln.len == 0) {
+ if (pc.ln.line_no == 0 && matchall)
+ /*
+ * An empty file with an empty pattern and the
+ * -w flag does not match
+ */
+ exit(matchall && wflag ? 1 : 0);
else
break;
}
- if (ln.len > 0 && ln.dat[ln.len - 1] == fileeol)
- --ln.len;
- ln.line_no++;
+
+ if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol)
+ --pc.ln.len;
+ pc.ln.line_no++;
/* Return if we need to skip a binary file */
- if (f->binary && binbehave == BINFILE_SKIP) {
+ if (pc.binary && binbehave == BINFILE_SKIP) {
grep_close(f);
- free(ln.file);
+ free(pc.ln.file);
free(f);
return (0);
}
- /* Process the file line-by-line, enqueue non-matching lines */
- if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
- /* Except don't enqueue lines that appear in -A ctx */
- if (ln.line_no == 0 || lasta != ln.line_no) {
- /* queue is maxed to Bflag number of lines */
- enqueue(&ln);
- linesqueued++;
- ctxover = false;
+ if ((t = procline(&pc)) == 0)
+ ++c;
+
+ /* Deal with any -B context or context separators */
+ if (t == 0 && doctx) {
+ if (!first_match && (!same_file || last_outed > 0))
+ printf("--\n");
+ if (Bflag > 0)
+ printqueue();
+ tail = Aflag;
+ }
+ /* Print the matching line, but only if not quiet/binary */
+ if (t == 0 && printmatch) {
+ printline(&pc, ':');
+ first_match = false;
+ same_file = true;
+ last_outed = 0;
+ }
+ if (t != 0 && doctx) {
+ /* Deal with any -A context */
+ if (tail > 0) {
+ printline(&pc, '-');
+ tail--;
+ if (Bflag > 0)
+ clearqueue();
} else {
/*
- * Indicate to procline() that we have ctx
- * overlap and make sure queue is empty.
+ * Enqueue non-matching lines for -B context.
+ * If we're not actually doing -B context or if
+ * the enqueue resulted in a line being rotated
+ * out, then go ahead and increment last_outed
+ * to signify a gap between context/match.
*/
- if (!ctxover)
- clearqueue();
- ctxover = true;
+ if (Bflag == 0 || (Bflag > 0 && enqueue(ln)))
+ ++last_outed;
}
}
- c += t;
- if (mflag && mcount <= 0)
- break;
+
+ /* Count the matches if we have a match limit */
+ if (t == 0 && mflag) {
+ --mcount;
+ if (mflag && mcount <= 0)
+ break;
+ }
+
}
if (Bflag > 0)
clearqueue();
@@ -271,7 +325,7 @@ procfile(const char *fn)
if (cflag) {
if (!hflag)
- printf("%s:", ln.file);
+ printf("%s:", pc.ln.file);
printf("%u\n", c);
}
if (lflag && !qflag && c != 0)
@@ -282,7 +336,7 @@ procfile(const char *fn)
binbehave == BINFILE_BIN && f->binary && !qflag)
printf(getstr(8), fn);
- free(ln.file);
+ free(pc.ln.file);
free(f);
return (c);
}
@@ -297,62 +351,72 @@ procfile(const char *fn)
* appropriate output.
*/
static int
-procline(struct str *l, int nottext)
+procline(struct parsec *pc)
{
- regmatch_t matches[MAX_LINE_MATCHES];
- regmatch_t pmatch, lastmatch;
+ regmatch_t pmatch, lastmatch, chkmatch;
+ wchar_t wbegin, wend;
size_t st = 0, nst = 0;
unsigned int i;
- int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags;
- int startm = 0;
+ int c = 0, r = 0, lastmatches = 0, leflags = eflags;
+ size_t startm = 0, matchidx;
int retry;
+ matchidx = pc->matchidx;
+
+ /* Special case: empty pattern with -w flag, check first character */
+ if (matchall && wflag) {
+ if (pc->ln.len == 0)
+ return (0);
+ wend = L' ';
+ if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend))
+ return (1);
+ else
+ return (0);
+ } else if (matchall)
+ return (0);
+
/* Initialize to avoid a false positive warning from GCC. */
lastmatch.rm_so = lastmatch.rm_eo = 0;
/* Loop to process the whole line */
- while (st <= l->len) {
+ while (st <= pc->ln.len) {
lastmatches = 0;
- startm = m;
+ startm = matchidx;
retry = 0;
if (st > 0)
leflags |= REG_NOTBOL;
/* Loop to compare with all the patterns */
for (i = 0; i < patterns; i++) {
pmatch.rm_so = st;
- pmatch.rm_eo = l->len;
+ pmatch.rm_eo = pc->ln.len;
#ifndef WITHOUT_FASTMATCH
if (fg_pattern[i].pattern)
r = fastexec(&fg_pattern[i],
- l->dat, 1, &pmatch, leflags);
+ pc->ln.dat, 1, &pmatch, leflags);
else
#endif
- r = regexec(&r_pattern[i], l->dat, 1,
+ r = regexec(&r_pattern[i], pc->ln.dat, 1,
&pmatch, leflags);
- r = (r == 0) ? 0 : REG_NOMATCH;
- if (r == REG_NOMATCH)
+ if (r != 0)
continue;
/* Check for full match */
- if (r == 0 && xflag)
- if (pmatch.rm_so != 0 ||
- (size_t)pmatch.rm_eo != l->len)
- r = REG_NOMATCH;
+ if (xflag && (pmatch.rm_so != 0 ||
+ (size_t)pmatch.rm_eo != pc->ln.len))
+ continue;
/* Check for whole word match */
#ifndef WITHOUT_FASTMATCH
- if (r == 0 && (wflag || fg_pattern[i].word)) {
+ if (wflag || fg_pattern[i].word) {
#else
- if (r == 0 && wflag) {
+ if (wflag) {
#endif
- wchar_t wbegin, wend;
-
wbegin = wend = L' ';
if (pmatch.rm_so != 0 &&
- sscanf(&l->dat[pmatch.rm_so - 1],
+ sscanf(&pc->ln.dat[pmatch.rm_so - 1],
"%lc", &wbegin) != 1)
r = REG_NOMATCH;
else if ((size_t)pmatch.rm_eo !=
- l->len &&
- sscanf(&l->dat[pmatch.rm_eo],
+ pc->ln.len &&
+ sscanf(&pc->ln.dat[pmatch.rm_eo],
"%lc", &wend) != 1)
r = REG_NOMATCH;
else if (iswword(wbegin) ||
@@ -361,7 +425,7 @@ procline(struct str *l, int nottext)
/*
* If we're doing whole word matching and we
* matched once, then we should try the pattern
- * again after advancing just past the start of
+ * again after advancing just past the start of
* the earliest match. This allows the pattern
* to match later on in the line and possibly
* still match a whole word.
@@ -369,33 +433,40 @@ procline(struct str *l, int nottext)
if (r == REG_NOMATCH &&
(retry == 0 || pmatch.rm_so + 1 < retry))
retry = pmatch.rm_so + 1;
+ if (r == REG_NOMATCH)
+ continue;
}
- if (r == 0) {
- lastmatches++;
- lastmatch = pmatch;
- if (m == 0)
- c++;
-
- if (m < MAX_LINE_MATCHES) {
- /* Replace previous match if the new one is earlier and/or longer */
- if (m > startm) {
- if (pmatch.rm_so < matches[m-1].rm_so ||
- (pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) {
- matches[m-1] = pmatch;
- nst = pmatch.rm_eo;
- }
- } else {
- /* Advance as normal if not */
- matches[m++] = pmatch;
- nst = pmatch.rm_eo;
- }
- }
- /* matches - skip further patterns */
- if ((color == NULL && !oflag) ||
- qflag || lflag)
- break;
+ lastmatches++;
+ lastmatch = pmatch;
+
+ if (matchidx == 0)
+ c++;
+
+ /*
+ * Replace previous match if the new one is earlier
+ * and/or longer. This will lead to some amount of
+ * extra work if -o/--color are specified, but it's
+ * worth it from a correctness point of view.
+ */
+ if (matchidx > startm) {
+ chkmatch = pc->matches[matchidx - 1];
+ if (pmatch.rm_so < chkmatch.rm_so ||
+ (pmatch.rm_so == chkmatch.rm_so &&
+ (pmatch.rm_eo - pmatch.rm_so) >
+ (chkmatch.rm_eo - chkmatch.rm_so))) {
+ pc->matches[matchidx - 1] = pmatch;
+ nst = pmatch.rm_eo;
+ }
+ } else {
+ /* Advance as normal if not */
+ pc->matches[matchidx++] = pmatch;
+ nst = pmatch.rm_eo;
}
+ /* avoid excessive matching - skip further patterns */
+ if ((color == NULL && !oflag) || qflag || lflag ||
+ matchidx >= MAX_LINE_MATCHES)
+ break;
}
/*
@@ -414,7 +485,7 @@ procline(struct str *l, int nottext)
/* If we didn't have any matches or REG_NOSUB set */
if (lastmatches == 0 || (cflags & REG_NOSUB))
- nst = l->len;
+ nst = pc->ln.len;
if (lastmatches == 0)
/* No matches */
@@ -427,45 +498,11 @@ procline(struct str *l, int nottext)
st = nst;
}
-
+ /* Reflect the new matchidx in the context */
+ pc->matchidx = matchidx;
if (vflag)
c = !c;
-
- /* Count the matches if we have a match limit */
- if (mflag)
- mcount -= c;
-
- if (c && binbehave == BINFILE_BIN && nottext)
- return (c); /* Binary file */
-
- /* Dealing with the context */
- if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
- if (c) {
- if (!first && !prev && !tail && (Bflag || Aflag) &&
- !ctxover)
- printf("--\n");
- tail = Aflag;
- if (Bflag > 0) {
- printqueue();
- ctxover = false;
- }
- linesqueued = 0;
- printline(l, ':', matches, m);
- } else {
- /* Print -A lines following matches */
- lasta = l->line_no;
- printline(l, '-', matches, m);
- tail--;
- }
- }
-
- if (c) {
- prev = true;
- first = false;
- } else
- prev = false;
-
- return (c);
+ return (c ? 0 : 1);
}
/*
@@ -520,69 +557,89 @@ grep_strdup(const char *str)
}
/*
- * Prints a matching line according to the command line options.
+ * Print an entire line as-is, there are no inline matches to consider. This is
+ * used for printing context.
*/
-void
-printline(struct str *line, int sep, regmatch_t *matches, int m)
-{
- size_t a = 0;
- int i, n = 0;
+void grep_printline(struct str *line, int sep) {
+ printline_metadata(line, sep);
+ fwrite(line->dat, line->len, 1, stdout);
+ putchar(fileeol);
+}
- /* If matchall, everything matches but don't actually print for -o */
- if (oflag && matchall)
- return;
+static void
+printline_metadata(struct str *line, int sep)
+{
+ bool printsep;
+ printsep = false;
if (!hflag) {
if (!nullflag) {
fputs(line->file, stdout);
- ++n;
+ printsep = true;
} else {
printf("%s", line->file);
putchar(0);
}
}
if (nflag) {
- if (n > 0)
+ if (printsep)
putchar(sep);
printf("%d", line->line_no);
- ++n;
+ printsep = true;
}
if (bflag) {
- if (n > 0)
+ if (printsep)
putchar(sep);
printf("%lld", (long long)line->off);
- ++n;
+ printsep = true;
}
- if (n)
+ if (printsep)
putchar(sep);
+}
+
+/*
+ * Prints a matching line according to the command line options.
+ */
+static void
+printline(struct parsec *pc, int sep)
+{
+ size_t a = 0;
+ size_t i, matchidx;
+ regmatch_t match;
+
+ /* If matchall, everything matches but don't actually print for -o */
+ if (oflag && matchall)
+ return;
+
+ matchidx = pc->matchidx;
+
/* --color and -o */
- if ((oflag || color) && m > 0) {
- for (i = 0; i < m; i++) {
+ if ((oflag || color) && matchidx > 0) {
+ printline_metadata(&pc->ln, sep);
+ for (i = 0; i < matchidx; i++) {
+ match = pc->matches[i];
/* Don't output zero length matches */
- if (matches[i].rm_so == matches[i].rm_eo)
+ if (match.rm_so == match.rm_eo)
continue;
if (!oflag)
- fwrite(line->dat + a, matches[i].rm_so - a, 1,
+ fwrite(pc->ln.dat + a, match.rm_so - a, 1,
stdout);
- if (color)
+ if (color)
fprintf(stdout, "\33[%sm\33[K", color);
-
- fwrite(line->dat + matches[i].rm_so,
- matches[i].rm_eo - matches[i].rm_so, 1,
- stdout);
- if (color)
+ fwrite(pc->ln.dat + match.rm_so,
+ match.rm_eo - match.rm_so, 1, stdout);
+ if (color)
fprintf(stdout, "\33[m\33[K");
- a = matches[i].rm_eo;
+ a = match.rm_eo;
if (oflag)
putchar('\n');
}
if (!oflag) {
- if (line->len - a > 0)
- fwrite(line->dat + a, line->len - a, 1, stdout);
+ if (pc->ln.len - a > 0)
+ fwrite(pc->ln.dat + a, pc->ln.len - a, 1,
+ stdout);
putchar('\n');
}
- } else {
- fwrite(line->dat, line->len, 1, stdout);
- putchar(fileeol);
- }
+ } else
+ grep_printline(&pc->ln, sep);
}
OpenPOWER on IntegriCloud