summaryrefslogtreecommitdiffstats
path: root/usr.bin/grep
diff options
context:
space:
mode:
authorkevans <kevans@FreeBSD.org>2017-09-11 15:52:24 +0000
committerkevans <kevans@FreeBSD.org>2017-09-11 15:52:24 +0000
commitf1f01794675bbb08bbb5e7b46e94cdfa850833b1 (patch)
tree492a0c6a67533a7ec3f4983a407d06496c3289d2 /usr.bin/grep
parent7bb4af013bfb33afc54e662f105065877842b109 (diff)
downloadFreeBSD-src-f1f01794675bbb08bbb5e7b46e94cdfa850833b1.zip
FreeBSD-src-f1f01794675bbb08bbb5e7b46e94cdfa850833b1.tar.gz
bsdgrep: add a primitive literal matcher to unbreak fgrep in some scenarios
MFC r322825: bsdgrep: add some additional tests for fgrep Previously added tests only check that fgrep is somewhat sane and works. Add some more tests that check that the implementation is basically functional and not producing incorrect results with various flags. MFC r322826: bsdgrep: add a primitive literal matcher fgrep/grep -F will error out at runtime if compiled with a regex(3) that does not define REG_NOSPEC or REG_LITERAL. glibc is one such regex(3) implementation, and as it turns out they don't support literal matching at all. Provide a primitive literal matcher for use with glibc and other implementations that don't support literal matching so that we don't completely lose fgrep/grep -F if compiled against libgnuregex on stable/10, stable/11, or other systems that we don't necessarily support. This is a wholly unoptimized implementation with no plans to optimize it as of now. This is due to both its use-case being primarily on unsupported systems in the near-distant future and that it's reinventing the wheel that we already have available as a feature of regex(3). PR: 222201 Approved by: emaste (mentor, blanket MFC)
Diffstat (limited to 'usr.bin/grep')
-rw-r--r--usr.bin/grep/grep.c15
-rw-r--r--usr.bin/grep/grep.h4
-rw-r--r--usr.bin/grep/util.c71
3 files changed, 87 insertions, 3 deletions
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 6ef3e1f..3113fba 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -721,12 +721,19 @@ main(int argc, char *argv[])
case GREP_BASIC:
break;
case GREP_FIXED:
+ /*
+ * regex(3) implementations that support fixed-string searches generally
+ * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
+ * here. If neither are defined, GREP_FIXED later implies that the
+ * internal literal matcher should be used. Other cflags that have
+ * the same interpretation as REG_NOSPEC and REG_LITERAL should be
+ * similarly added here, and grep.h should be amended to take this into
+ * consideration when defining WITH_INTERNAL_NOSPEC.
+ */
#if defined(REG_NOSPEC)
cflags |= REG_NOSPEC;
#elif defined(REG_LITERAL)
cflags |= REG_LITERAL;
-#else
- errx(2, "literal expressions not supported at compile time");
#endif
break;
case GREP_EXTENDED:
@@ -743,7 +750,11 @@ main(int argc, char *argv[])
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
/* Don't process any patterns if we have a blank one */
+#ifdef WITH_INTERNAL_NOSPEC
+ if (!matchall && grepbehave != GREP_FIXED) {
+#else
if (!matchall) {
+#endif
/* Check if cheating is allowed (always is for fgrep). */
for (i = 0; i < patterns; ++i) {
#ifndef WITHOUT_FASTMATCH
diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h
index 932e149..7cd8c38 100644
--- a/usr.bin/grep/grep.h
+++ b/usr.bin/grep/grep.h
@@ -57,6 +57,10 @@ extern const char *errstr[];
#define GREP_BASIC 1
#define GREP_EXTENDED 2
+#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
+#define WITH_INTERNAL_NOSPEC
+#endif
+
#define BINFILE_BIN 0
#define BINFILE_SKIP 1
#define BINFILE_TEXT 2
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index 4e0c80c..2741380 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -70,7 +70,10 @@ struct parsec {
bool binary; /* Binary file? */
};
-
+#ifdef WITH_INTERNAL_NOSPEC
+static int litexec(const struct pat *pat, const char *string,
+ size_t nmatch, regmatch_t pmatch[]);
+#endif
static int procline(struct parsec *pc);
static void printline(struct parsec *pc, int sep);
static void printline_metadata(struct str *line, int sep);
@@ -350,6 +353,67 @@ procfile(const char *fn)
return (c);
}
+#ifdef WITH_INTERNAL_NOSPEC
+/*
+ * Internal implementation of literal string search within a string, modeled
+ * after regexec(3), for use when the regex(3) implementation doesn't offer
+ * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
+ * config, but in other scenarios such as building against libgnuregex or on
+ * some non-FreeBSD OSes.
+ */
+static int
+litexec(const struct pat *pat, const char *string, size_t nmatch,
+ regmatch_t pmatch[])
+{
+ char *(*strstr_fn)(const char *, const char *);
+ char *sub, *subject;
+ const char *search;
+ size_t idx, n, ofs, stringlen;
+
+ if (cflags & REG_ICASE)
+ strstr_fn = strcasestr;
+ else
+ strstr_fn = strstr;
+ idx = 0;
+ ofs = pmatch[0].rm_so;
+ stringlen = pmatch[0].rm_eo;
+ if (ofs >= stringlen)
+ return (REG_NOMATCH);
+ subject = strndup(string, stringlen);
+ if (subject == NULL)
+ return (REG_ESPACE);
+ for (n = 0; ofs < stringlen;) {
+ search = (subject + ofs);
+ if ((unsigned long)pat->len > strlen(search))
+ break;
+ sub = strstr_fn(search, pat->pat);
+ /*
+ * Ignoring the empty string possibility due to context: grep optimizes
+ * for empty patterns and will never reach this point.
+ */
+ if (sub == NULL)
+ break;
+ ++n;
+ /* Fill in pmatch if necessary */
+ if (nmatch > 0) {
+ pmatch[idx].rm_so = ofs + (sub - search);
+ pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len;
+ if (++idx == nmatch)
+ break;
+ ofs = pmatch[idx].rm_so + 1;
+ } else
+ /* We only needed to know if we match or not */
+ break;
+ }
+ free(subject);
+ if (n > 0 && nmatch > 0)
+ for (n = idx; n < nmatch; ++n)
+ pmatch[n].rm_so = pmatch[n].rm_eo = -1;
+
+ return (n > 0 ? 0 : REG_NOMATCH);
+}
+#endif /* WITH_INTERNAL_NOSPEC */
+
#define iswword(x) (iswalnum((x)) || (x) == L'_')
/*
@@ -400,6 +464,11 @@ procline(struct parsec *pc)
for (i = 0; i < patterns; i++) {
pmatch.rm_so = st;
pmatch.rm_eo = pc->ln.len;
+#ifdef WITH_INTERNAL_NOSPEC
+ if (grepbehave == GREP_FIXED)
+ r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
+ else
+#endif
#ifndef WITHOUT_FASTMATCH
if (fg_pattern[i].pattern)
r = fastexec(&fg_pattern[i],
OpenPOWER on IntegriCloud