From d8d7d228e475566fe145acde42c3569c522cd98c Mon Sep 17 00:00:00 2001 From: obrien Date: Mon, 3 Jan 2000 21:09:05 +0000 Subject: Virgin import of a trimmed down GNU Grep 2.4. --- gnu/usr.bin/grep/ChangeLog | 406 ++++++++++++++++++++++++ gnu/usr.bin/grep/NEWS | 52 ++- gnu/usr.bin/grep/THANKS | 6 +- gnu/usr.bin/grep/config.hin | 22 ++ gnu/usr.bin/grep/dfa.c | 88 ++++-- gnu/usr.bin/grep/dfa.h | 7 +- gnu/usr.bin/grep/doc/grep.texi | 607 ++++++++++++++++++++++++++---------- gnu/usr.bin/grep/doc/version.texi | 6 +- gnu/usr.bin/grep/grep.1 | 245 ++++++++++----- gnu/usr.bin/grep/grep.c | 509 +++++++++++++++++++----------- gnu/usr.bin/grep/grep.h | 6 +- gnu/usr.bin/grep/search.c | 64 ++-- gnu/usr.bin/grep/system.h | 14 +- gnu/usr.bin/grep/tests/bre.awk | 4 +- gnu/usr.bin/grep/tests/bre.tests | 4 +- gnu/usr.bin/grep/tests/ere.awk | 4 +- gnu/usr.bin/grep/tests/ere.tests | 24 +- gnu/usr.bin/grep/tests/spencer1.awk | 4 +- 18 files changed, 1558 insertions(+), 514 deletions(-) (limited to 'gnu/usr.bin/grep') diff --git a/gnu/usr.bin/grep/ChangeLog b/gnu/usr.bin/grep/ChangeLog index 9996fe5..3855810 100644 --- a/gnu/usr.bin/grep/ChangeLog +++ b/gnu/usr.bin/grep/ChangeLog @@ -1,3 +1,409 @@ +1999-11-18 Paul Eggert + + * m4/largefile.m4 (AC_SYS_LARGEFILE_FLAGS): Work around a + problem with the QNX 4.25 shell, which doesn't propagate exit + status of failed commands inside shell assignments. + +1999-11-13 Eli Zaretskii + + * doc/grep.texi: Minor markup and spelling corrections. Use + @noindent where appropriate. + + * PATCHES-{AM,AC}: rename to PATCHES.{AM,AC} + +1999-11-12 Eli Zaretskii + + doc/grep.texi: Minor fixes and typos corrected. + djgpp/README: Updated version. + +1999-11-07 Paul Eggert + + * src/grep.c (usage): Fix misspelling. + +1999-11-07 Paul Eggert + + Don't assume that the C library has re_set_syntax and friends. + * src/Makefile.am (base_sources): Add regex.c, regex.h. + (EXTRA_DIST): Remove regex.c, regex.h. + + * src/grep.c (prtext): Use out_quiet, not not_text, to decide + whether to set pending to zero at the end. + (not_text): Remove static variable, undoing latest change. + (grep): Likewise. + + * doc/grep.texi: Tighten up the text, and fix some minor + spelling and usage errors. Use @enumerate rather than @table + @samp, since it's better for Q&A format. Add cross + references. + +1999-11-01 Alain Magloire + + * src/search.c: Use the more portable [[:alnum:]] + to define a word instead of Ascii dependent [0-9A-Za-z] + * src/grep.c: make not_text global to not display text when + the context switches -A/-B/-C are use on binary files. + * make grep-2.3g available for testing. + * configure.in: drop support for --without-included-regex. + This was generating bogus bug reports, since many GNU/Linux + users have different version of glibc. And glibc maintainers + decided to drop k&r support. + +1999-11-01 Arnold D. Robbins + + * regex.c (init_syntax_once): move below definition of + ISALNUM etc., then use ISALNUM to init the table, so that + the word ops will work if i18n'ed. + (SYNTAX): And subscript with 0xFF for Latin-1 characters. + +1999-10-26 Alain Magloire + + * src/regex.c: Merge changes from GNU lib C. + * Updated the *.po files + +1999-10-26 Paul Eggert + + * src/grep.c (fillbuf): Don't report buffer size overflow if + newalloc == save and maxalloc == save. This can happen + e.g. when reading a large page-aligned file that contains + no newlines. + +1999-10-21 Paul Eggert + + * src/grep.c (usage): Give example. Clarify -F. + Explain exit status more clearly. + +1999-10-12 Paul Eggert + + * doc/grep.texi: Shorten the commentary about egrep and {. + "BSD grep" -> "traditional grep". + * doc/grep.1: Match recent changes to grep.texi. + +1999-10-11 Paul Eggert + + * NEWS, doc/grep.1, doc/grep.texi: New option --mmap. + * src/grep.c (mmap_option): New variable. + (long_options, reset, usage): Add --mmap. + Default is now read, not mmap. + + * doc/grep.1: Document -Z or --null. + +1999-10-11 Paul Eggert + + * doc/grep.texi: Fix texinfo glitches. POSIX -> POSIX.2 where + appropriate. + +1999-10-11 Paul Eggert + + * acconfig.h (ssize_t): New #undef. + + * configure.in (AC_CHECK_TYPE): Add ssize_t. + + * src/grep.c (PREFERRED_SAVE_FACTOR): New macro. + (reset): If the buffer has already been allocated, set bufsalloc to + be bufalloc / PREFERRED_SAVE_FACTOR. This avoids problems when + bufsalloc == bufalloc (possible after reading a large binary file). + (reset): Use PREFERRED_SAVE_FACTOR instead of magic constant. + Do not set bufbeg; nobody uses it. + Always set buflim. + Check for lseek error. + Use SEEK_CUR, not a magic constant. + (fillbuf): Return an error indication, not a count. + All callers changed. + Do not assume ssize_t fits in int. + Use PREFERRED_SAVE_FACTOR instead of magic constant. + Clean up mmap code. + Do not attempt to mmap zero bytes. + Check for lseek error. + Use SEEK_SET, not a magic constant. + Work correctly if read is interrupted. + (grepfile): Work correctly if open or close is interrupted. + + * src/system.h (SEEK_SET, SEEK_CUR): New macros. + +1999-10-02 Alain Magloire + + * src/regex.[ch]: upgrade from GNU lib C source tree. + + * make beta 2.3f available. + +1999-10-02 Paul Eggert + + * NEWS: egrep is now equivalent to `grep -E'. + The lower bound of an interval is not optional. + You can specify a matcher multiple types without error. + -u and -U are now allowed on non-DOS hosts, and have no effect. + * doc/grep.texi: Likewise. + * doc/grep.1: Likewise. + Fix some troff bugs that prevented `groff' from rendering the page. + + * src/egrepmat.c, src/fgrepmat.c, src/grepmat.c (default_matcher): + Remove. + (matcher): Add. + * src/grep.h (default_matcher): Remove. + (matcher): Now exported from ?grepmat.c, not grep.c. + + * src/dfa.c (lex): If { would start an invalid interval specification, + treat it as a normal character. + Remove (broken) support for {,M} meaning {0,M}. + Diagnose bogus intervals like {1,0}. + (closure): maxrep is now -1 to indicate no limit, not zero; + zero is a valid value for maxrep, meaning an upper bound of zero. + + * src/grep.c (short_options): New constant. + (long_options, main): -u and -U are now supported on Unix, + with no effect. + (matcher): Removed; now defined by ?grepmat.c. + (install_matcher): Renamed from setmatcher. + (setmatcher): New function. + (usage): Report new, more uniform option scheme. + (main): Do not initialize matcher; ?grepmat.c now does this. + Rely on setmatcher to catch matcher conflicts. + Default matcher is "grep". + + * src/search.c (matchers): + Remove "posix-egrep" matcher; no longer needed. + (Ecompile): Likewise. + The egrep matcher now has POSIX behavior. + + * tests/bre.tests: grep '\{' is no longer an error. + Fix test for interval too large, and enable it. + * tests/ere.tests: grep -E {1 is no longer an error + Likewise for a{1, a{1a, a{1a}, a{1,x}. + +1999-09-22 Paul Eggert + + * largefile.m4 (AC_SYS_LARGEFILE_FLAGS): Work around GCC + 2.95.1 bug with HP-UX 10.20. + +1999-09-12 Paul Eggert + + * src/grep.c (fillbuf): Fix typo: we sometimes reported + arithmetic overflow even when there wasn't any. + +1999-09-12 Paul Eggert + + * configure.in (AC_CHECK_FUNCS): Add memmove. + + * src/system.h (S_ISREG): New macro. + (memmove): Define if ! defined HAVE_MEMMOVE && ! defined memmove, + not if !defined STDC_HEADERS. This is needed for SunOS 4.1.4, + which defines STDC_HEADERS but lacks memmove. + + * src/grep.c (bufoffset): Needed even if !defined HAVE_MMAP. + (reset): Always fstat the file, since we always need its size if it is + regular. + Similarly, get the buffer offset of every regular file. + Set bufmapped to 0 if the file's initial offset is not a multiple + of the page size. + (fillbuf): Calculate an upper bound on how much memory we should + allocate only for regular files, since we don't know the sizes of + other files. + Don't bother to check whether the file offset is a multiple of the page + size, since we now do that just once in `reset'. + When an mmapped area would fall past the end of the file, trim it to + just before instead of giving up immediately and doing a `read'; + that avoids a worst-case behavior that could read half an mmapped file. + Fix bug when computing offsets on hosts that don't have mmap. + +1999-08-27 Paul Eggert + + * src/system.h (memmove): New macro. + + * src/grep.c (page_alloc): Reallocate the old buffer instead + of having both old and new buffers active simultaneously. + Remove valloc debugging variant, which no longer applies. + + (fillbuf): Rejigger the buffer allocation mechanism. The old + mechanism could allocate more than 10*N bytes for an N-byte + file, which was excessive. Check for arithmetic overflow a + bit more carefully. + +1999-08-25 Paul Eggert + + * src/grep.c (grepdir): + Don't assume that st_ino and st_dev must be integers; + POSIX.1 allows them to be floating-point (!). + + * src/vms_fab.h (arr_ptr): `:' -> `;' to fix typo. + +1999-08-18 Alain Magloire + + * 2.3e snapshot. + +1999-08-18 Alain Magloire + + * src/search.c: On a CRAY J90 system running UNICOS 8.0. + Compilation of ./src/search.c failed because the declaration of + the variable "regex": + static struct re_pattern_buffer regex; + conflicted with a previous declaration search.c #includes "system.h", + which #includes , which declares : + extern char *regex __((char *_Re, char *_Subject, ...)); + The declaration in search.c is local to that one source file. + I just changed its name to something less likely to conflict. + (I called it "regexbuf", but you could pick any name you want.) + Excerpt email from Dean Kopesky. + +1999-08-16 Paul Eggert + + Upgrade large-file support to the version used in tar and + textutils. + + * Makefile.am (ACLOCAL_AMFLAGS): Define to be empty. + (M4DIR, ACINCLUDE_INPUTS): New macros. + ($(srcdir)/acinclude.m4): New rule. + + * configure.in (AC_CANONICAL_HOST, AM_C_PROTOTYPES): Add. + (AC_SYS_LARGEFILE): Renamed from AC_LFS, for compatibility + with what should appear in the next autoconf release. + + * m4/largefile.m4: Renamed from m4/lfs.m4. + + * src/ansi2knr.1, src/ansi2knr.c, config.guess, config.sub: + New files. config.guess and config.sub ar needed by the new + AC_SYS_LARGEFILE. ansi2knr is needed by AM_C_PROTOTYPES, + which in turn is needed by the new AC_SYS_LARGEFILE. + +1999-08-16 Alain Magloire + + * 2.3d snapshot on ftp server. + +1999-07-26 Paul Eggert + +Several GNU tools have options to process arbitrary file names, even +file names that contain newline characters. These include `find +-print0', `perl -0', `sort -z', and `xargs -0'. It'd be handy if GNU +grep also processed such file names. Here's a proposed patch to do +this, relative to grep 2.3c. This patch introduces two options, one +for the data, and one for the file names. (Sometimes one wants +null-terminated file names in the output, and sometimes one wants to +process lists of null-terminated strings, and these are orthogonal +axes.) + + * NEWS, doc/grep.texi: New -z or --null-data and -Z or --null options. + * src/grep.c (long_options, usage, main): Likewise. + + * src/dfa.h (dfasyntax): New eol parameter. + * src/dfa.c (eolbyte): New var. + (dfasyntax): Set it from new parameter. + (lex, dfastat, build_state, dfaexec): Use it instead of '\n'. + + * src/grep.h (eolbyte): New decl. + * src/grep.c (eolbyte): New var. + (nlscan, prpending, prtext, grepbuf, grep): Use it instead of '\n'. + (filename_mask): New var. + (prline, grepfile): Output NUL separator if filename_mask is zero. + (grep): Look for '\200' as the hallmark of a binary file, not '\0', + if -z or --null-data is specified, since it implies that '\0' is + expected as text. + + * src/search.c (Gcompile, Ecompile): Pass eolbyte to dfasyntax. + (EGexecute, Fexecute): Use eolbyte instead of '\n'. + +1999-06-15 Alain Magloire + + * src/grep.c, doc/grep{1,texi} : + --revert-match should be --invert-match. + Correction proposed by Karl Berry. + +1999-06-12 Alain Magloire + + * doc/grep.{1,texi}: add description for --with-filename. + Noted missing by UEBAYASHI Masao. + +1999-03-17 Paul Eggert + + * NEWS: Add GREP_OPTIONS. + + * doc/grep.texi: Document GREP_OPTIONS, and the other + environment variables. Fix doc for [:blank:], [:cntrl:], [:punct:]. + + * src/grep.c (prepend_args, prepend_default_options): New functions. + (main): Use them to implement GREP_OPTIONS. + * src/system.h (getenv): New decl. + +1999-03-16 Volker Borchert + + * configure.in: Use case case ... esac for checking Visual C++. + When ${CC} contains options it was not recognize. + +1999-03-07 Paul Eggert + + * src/grep.c (usage): Don't report -E, -F, and -G unless we're grep. + (main): Don't match options -E, -F, and -G unless we're grep. + Remove after-the-fact check for options -E, -F, and -G, since + they're no longer needed. + +1999-03-05 Eli Zaretskii + + * src/grep.c (main): Print the name of the default matcher instead + of just "grep". + +1999-02-06 Alain Magloire + + * tests/*.awk : Linux users are seeing "Broken Pipe" on make check. + The problem is that grep does not drain its stdin, thus the previous + process in the pipeline receives a SIGPIPE. Other shells are silent + about this. There is actually no failure, since the broken pipe is + expected. You can work around it by changing the pipeline, so that + the input is drained, like this: + status=`echo 'check' | { ${GREP} -E -e pattern >/dev/null 2>&1; + echo $?; cat >/dev/null; }`; if test $status -ne $errnu then ... fi + Excerpt email from Andreas Schwab. + +1999-02-23 Alain Magloire + + * src/grep.c : Restrict the use of -E, -F, -G + to only grep driver, Posix behaviour. {f,e}grep + the matcher is already set. This change may brake + scripts, warn in NEWS. + + * doc/grep.{1,texi} : -C takes arguments, upgrade manual. + + * beta 2.3a + +1999-02-23 Alain Magloire + + * configure.in : Change the configure VC test from + 'test x$ac_cv_prog_CC = xcl;' to 'test x"$ac_cv_prog_CC" = xcl;' + Email from Joshua R. Poulson. + +1999-02-23 Paul Eggert + + Fix porting bug reported by Amakawa Shuhei for SunOS 4.1.4-JL. + The btowc.c shipped with grep 2.3 is incorrect for Solaris + 2.5.1 and earlier, as it assumes UTF8, which these OSes do not + support. Solaris 7 supports btowc, so there's no need to ship + a substitute for it. The only questionable case is Solaris + 2.6, which lacks btowc but does support UTF8. However, 2.6 + supports UTF8 but only as a demonstration (for an English + locale!); Japanese Solaris 2.6 users typically use EUC, or + sometimes shift-JIS, but they cannot use UTF8 since Japanese + UTF8 is not supported. Hence there's no point to having grep + substitute a btowc that uses UTF8, as it is either redundant, + or it will almost invariably have incorrect behavior. + + * configure.in (AC_CHECK_HEADERS): Don't set USE_WCHAR. + (AC_CHECK_FUNCS): Add btowc, wctype. + (AC_REPLACE_FUNCS): Don't replace btowc; our replacement is + invariably doing the wrong thing anyway, at least on SunOS/Solaris. + Don't bother to check for wctype in -lw, as we don't support + wide characters on Solaris 2.5.1 or earlier anyway. + + * bootstrap/Makefile.try (OBJS): Remove btowc.$(OBJEXT). + + * src/btowc.c: Removed; no longer needed. + +1999-02-19 Paul Eggert + + * NEWS: Fix typo when talking about the old behavior of + silently skipping directories; it was grep 2.1, not grep 2.2. + +1999-02-15 Alain Magloire + + * bootstrap/Makefile.try : add DJGPP DEFS. + Done by Elie Zaretsckii. + 1999-02-14 Alain Magloire * m4/gettext.m4 : Guard [] with changequote. diff --git a/gnu/usr.bin/grep/NEWS b/gnu/usr.bin/grep/NEWS index d55d88c..efeaf41 100644 --- a/gnu/usr.bin/grep/NEWS +++ b/gnu/usr.bin/grep/NEWS @@ -1,3 +1,53 @@ +Version 2.4: + + - egrep is now equivalent to `grep -E' as required by POSIX, + removing a longstanding source of confusion and incompatibility. + `grep' is now more forgiving about stray `{'s, for backward + compatibility with traditional egrep. + + - The lower bound of an interval is not optional. + You must use an explicit zero, e.g. `x{0,10}' instead of `x{,10}'. + (The old documentation incorrectly claimed that it was optional.) + + - The --revert-match option has been renamed to --invert-match. + + - The --fixed-regexp option has been renamed to --fixed-string. + + - New option -H or --with-filename. + + - New option --mmap. By default, GNU grep now uses read instead of mmap. + This is faster on some hosts, and is safer on all. + + - The new option -z or --null-data causes `grep' to treat a zero byte + (the ASCII NUL character) as a line terminator in input data, and + to treat newlines as ordinary data. + + - The new option -Z or --null causes `grep' to output a zero byte + instead of the normal separator after a file name. + + - These two options can be used with commands like `find -print0', + `perl -0', `sort -z', and `xargs -0' to process arbitrary file names, + even those that contain newlines. + + - The environment variable GREP_OPTIONS specifies default options; + e.g. GREP_OPTIONS='--directories=skip' reestablishes grep 2.1's + behavior of silently skipping directories. + + - You can specify a matcher multiple times without error, e.g. + `grep -E -E' or `fgrep -F'. It is still an error to specify + conflicting matchers. + + - -u and -U are now allowed on non-DOS hosts, and have no effect. + + - Modifications of the tests scripts to go around the "Broken Pipe" + errors from bash. See Bash FAQ. + + - New option -r or --recursive or --directories=recurse. + (This option was also in grep 2.3, but wasn't announced here.) + + - --without-included-regex disable, was causing bogus reports .i.e + doing more harm then good. + Version 2.3: - When searching a binary file FOO, grep now just reports @@ -19,7 +69,7 @@ Version 2.3: `grep: DIRECTORY: Binary file matches' (or nothing) otherwise. The new -d ACTION or --directories=ACTION option affects directory handling. - `-d skip' causes `grep' to silently skip directories, as in grep 2.2; + `-d skip' causes `grep' to silently skip directories, as in grep 2.1; `-d read' (the default) causes `grep' to read directories if possible, as in earlier versions of grep. diff --git a/gnu/usr.bin/grep/THANKS b/gnu/usr.bin/grep/THANKS index d87d19b..924b6cf 100644 --- a/gnu/usr.bin/grep/THANKS +++ b/gnu/usr.bin/grep/THANKS @@ -1,5 +1,6 @@ Aharon Robbins -Andreas Schwab +Alain Magloire +Andreas Schwab Andreas Ley Ben Elliston David J MacKenzie @@ -8,6 +9,7 @@ Florian La Roche Franc,ois Pinard Grant McDorman Harald Hanche-Olsen +Jeff Bailey Jim Hand Jim Meyering Jochen Hein @@ -27,6 +29,7 @@ Miles Bader Olaf Kirch Paul Eggert Paul Kimoto +Phillip C. Brisco Philippe Defert Philippe De Muyter Roland Roberts @@ -37,6 +40,7 @@ Sydoruk Stepan Tom 'moof' Spindler Tom Tromey Ulrich Drepper +UEBAYASHI Masao Volker Borchert Wichert Akkerman William Bader diff --git a/gnu/usr.bin/grep/config.hin b/gnu/usr.bin/grep/config.hin index 4b4e289..8922a63 100644 --- a/gnu/usr.bin/grep/config.hin +++ b/gnu/usr.bin/grep/config.hin @@ -55,6 +55,9 @@ /* Version number. */ #undef VERSION +/* Define to `int' if doesn't define. */ +#undef ssize_t + /* Hack for Visual C++ suggested by irox. */ #undef alloca @@ -100,6 +103,9 @@ /* Define if you have the memchr function. */ #undef HAVE_MEMCHR +/* Define if you have the memmove function. */ +#undef HAVE_MEMMOVE + /* Define if you have the munmap function. */ #undef HAVE_MUNMAP @@ -130,6 +136,9 @@ /* Define if you have the strerror function. */ #undef HAVE_STRERROR +/* Define if you have the wctype function. */ +#undef HAVE_WCTYPE + /* Define if you have the header file. */ #undef HAVE_ARGZ_H @@ -183,3 +192,16 @@ /* Define if you have the i library (-li). */ #undef HAVE_LIBI + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define to make fseeko etc. visible, on some hosts. */ +#undef _LARGEFILE_SOURCE + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + +/* Define if compiler has function prototypes */ +#undef PROTOTYPES + diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c index 64ff27d..40a926b 100644 --- a/gnu/usr.bin/grep/dfa.c +++ b/gnu/usr.bin/grep/dfa.c @@ -328,15 +328,20 @@ static reg_syntax_t syntax_bits, syntax_bits_set; /* Flag for case-folding letters into sets. */ static int case_fold; +/* End-of-line byte in data. */ +static unsigned char eolbyte; + /* Entry point to set syntax options. */ void -dfasyntax(bits, fold) +dfasyntax(bits, fold, eol) reg_syntax_t bits; int fold; + int eol; { syntax_bits_set = 1; syntax_bits = bits; case_fold = fold; + eolbyte = eol; } /* Lexical analyzer. All the dross that deals with the obnoxious @@ -555,11 +560,32 @@ lex() goto normal_char; if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0)) goto normal_char; - minrep = maxrep = 0; + if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) + goto normal_char; + + if (syntax_bits & RE_NO_BK_BRACES) + { + /* Scan ahead for a valid interval; if it's not valid, + treat it as a literal '{'. */ + int lo = -1, hi = -1; + char const *p = lexptr; + char const *lim = p + lexleft; + for (; p != lim && ISDIGIT (*p); p++) + lo = (lo < 0 ? 0 : lo * 10) + *p - '0'; + if (p != lim && *p == ',') + while (++p != lim && ISDIGIT (*p)) + hi = (hi < 0 ? 0 : hi * 10) + *p - '0'; + else + hi = lo; + if (p == lim || *p != '}' + || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo)) + goto normal_char; + } + + minrep = 0; /* Cases: {M} - exact count {M,} - minimum count, maximum is infinity - {,M} - 0 through M {M,N} - M through N */ FETCH(c, _("unfinished repeat count")); if (ISDIGIT(c)) @@ -573,16 +599,27 @@ lex() minrep = 10 * minrep + c - '0'; } } - else if (c != ',') + else dfaerror(_("malformed repeat count")); if (c == ',') - for (;;) - { - FETCH(c, _("unfinished repeat count")); - if (!ISDIGIT(c)) - break; - maxrep = 10 * maxrep + c - '0'; - } + { + FETCH (c, _("unfinished repeat count")); + if (! ISDIGIT (c)) + maxrep = -1; + else + { + maxrep = c - '0'; + for (;;) + { + FETCH (c, _("unfinished repeat count")); + if (! ISDIGIT (c)) + break; + maxrep = 10 * maxrep + c - '0'; + } + if (0 <= maxrep && maxrep < minrep) + dfaerror (_("malformed repeat count")); + } + } else maxrep = minrep; if (!(syntax_bits & RE_NO_BK_BRACES)) @@ -634,7 +671,7 @@ lex() zeroset(ccl); notset(ccl); if (!(syntax_bits & RE_DOT_NEWLINE)) - clrbit('\n', ccl); + clrbit(eolbyte, ccl); if (syntax_bits & RE_DOT_NOT_NULL) clrbit('\0', ccl); laststart = 0; @@ -732,7 +769,7 @@ lex() { notset(ccl); if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) - clrbit('\n', ccl); + clrbit(eolbyte, ccl); } laststart = 0; return lasttok = CSET + charclass_index(ccl); @@ -898,7 +935,7 @@ closure() { ntokens = nsubtoks(dfa->tindex); tindex = dfa->tindex - ntokens; - if (maxrep == 0) + if (maxrep < 0) addtok(PLUS); if (minrep == 0) addtok(QMARK); @@ -1561,7 +1598,7 @@ dfastate(s, d, trans) for (i = 0; i < NOTCHAR; ++i) if (IS_WORD_CONSTITUENT(i)) setbit(i, letters); - setbit('\n', newline); + setbit(eolbyte, newline); } zeroset(matches); @@ -1582,7 +1619,7 @@ dfastate(s, d, trans) { if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, d->states[s].newline, 1)) - clrbit('\n', matches); + clrbit(eolbyte, matches); if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, d->states[s].newline, 0)) for (j = 0; j < CHARCLASS_INTS; ++j) @@ -1693,7 +1730,7 @@ dfastate(s, d, trans) state_letter = state; for (i = 0; i < NOTCHAR; ++i) trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state; - trans['\n'] = state_newline; + trans[eolbyte] = state_newline; } else for (i = 0; i < NOTCHAR; ++i) @@ -1717,7 +1754,7 @@ dfastate(s, d, trans) /* Find out if the new state will want any context information. */ wants_newline = 0; - if (tstbit('\n', labels[i])) + if (tstbit(eolbyte, labels[i])) for (j = 0; j < follows.nelem; ++j) if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint)) wants_newline = 1; @@ -1749,7 +1786,7 @@ dfastate(s, d, trans) { int c = j * INTBITS + k; - if (c == '\n') + if (c == eolbyte) trans[c] = state_newline; else if (IS_WORD_CONSTITUENT(c)) trans[c] = state_letter; @@ -1840,8 +1877,8 @@ build_state(s, d) /* Keep the newline transition in a special place so we can use it as a sentinel. */ - d->newlines[s] = trans['\n']; - trans['\n'] = -1; + d->newlines[s] = trans[eolbyte]; + trans[eolbyte] = -1; if (ACCEPTING(s, *d)) d->fails[s] = trans; @@ -1889,6 +1926,7 @@ dfaexec(d, begin, end, newline, count, backref) register unsigned char *p; /* Current input character. */ register int **trans, *t; /* Copy of d->trans so it can be optimized into a register. */ + register unsigned char eol = eolbyte; /* Likewise for eolbyte. */ static int sbit[NOTCHAR]; /* Table for anding with d->success. */ static int sbit_init; @@ -1899,7 +1937,7 @@ dfaexec(d, begin, end, newline, count, backref) sbit_init = 1; for (i = 0; i < NOTCHAR; ++i) sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1; - sbit['\n'] = 4; + sbit[eol] = 4; } if (! d->tralloc) @@ -1908,7 +1946,7 @@ dfaexec(d, begin, end, newline, count, backref) s = s1 = 0; p = (unsigned char *) begin; trans = d->trans; - *end = '\n'; + *end = eol; for (;;) { @@ -1936,7 +1974,7 @@ dfaexec(d, begin, end, newline, count, backref) } /* If the previous character was a newline, count it. */ - if (count && (char *) p <= end && p[-1] == '\n') + if (count && (char *) p <= end && p[-1] == eol) ++*count; /* Check if we've run off the end of the buffer. */ @@ -1950,7 +1988,7 @@ dfaexec(d, begin, end, newline, count, backref) continue; } - if (p[-1] == '\n' && newline) + if (p[-1] == eol && newline) { s = d->newlines[s1]; continue; diff --git a/gnu/usr.bin/grep/dfa.h b/gnu/usr.bin/grep/dfa.h index 95b5d89..f2fef4b 100644 --- a/gnu/usr.bin/grep/dfa.h +++ b/gnu/usr.bin/grep/dfa.h @@ -320,9 +320,10 @@ struct dfa /* Entry points. */ -/* dfasyntax() takes two arguments; the first sets the syntax bits described - earlier in this file, and the second sets the case-folding flag. */ -extern void dfasyntax PARAMS ((reg_syntax_t, int)); +/* dfasyntax() takes three arguments; the first sets the syntax bits described + earlier in this file, the second sets the case-folding flag, and the + third specifies the line terminator. */ +extern void dfasyntax PARAMS ((reg_syntax_t, int, int)); /* Compile the given string of the given length into the given struct dfa. Final argument is a flag specifying whether to build a searching or an diff --git a/gnu/usr.bin/grep/doc/grep.texi b/gnu/usr.bin/grep/doc/grep.texi index 23b0553..50a6938 100644 --- a/gnu/usr.bin/grep/doc/grep.texi +++ b/gnu/usr.bin/grep/doc/grep.texi @@ -22,19 +22,20 @@ @defcodeindex op @syncodeindex op fn +@syncodeindex vr fn @ifinfo @direntry * grep: (grep). print lines matching a pattern. @end direntry -This file documents @sc{grep}, a pattern matching engine. +This file documents @command{grep}, a pattern matching engine. Published by the Free Software Foundation, 59 Temple Place - Suite 330 Boston, MA 02111-1307, USA -Copyright (C) 1998 Free Software Foundation, Inc. +Copyright 1999 Free Software Foundation, Inc. Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -67,7 +68,7 @@ by the Foundation. @page @vskip 0pt plus 1filll -Copyright @copyright{} 1998 Free Software Foundation, Inc. +Copyright @copyright{} 1999 Free Software Foundation, Inc. @sp 2 Published by the Free Software Foundation, @* @@ -92,43 +93,48 @@ by the Foundation. @page -@node Top, Introduction, (dir), (dir) -@comment node-name, next, previous, up +@ifnottex +@node Top +@top Grep -@ifinfo -This document was produced for version @value{VERSION} of @sc{GNU} @sc{grep}. -@end ifinfo +@command{grep} searches for lines matching a pattern. + +This document was produced for version @value{VERSION} of @sc{gnu} +@command{grep}. +@end ifnottex @menu * Introduction:: Introduction. -* Invoking:: Invoking @sc{grep}; description of options. -* Diagnostics:: Exit status returned by @sc{grep}. -* Grep Programs:: @sc{grep} programs. +* Invoking:: Invoking @command{grep}; description of options. +* Diagnostics:: Exit status returned by @command{grep}. +* Grep Programs:: @command{grep} programs. * Regular Expressions:: Regular Expressions. +* Usage:: Examples. * Reporting Bugs:: Reporting Bugs. * Concept Index:: A menu with all the topics in this manual. -* Index:: A menu with all @sc{grep} commands +* Index:: A menu with all @command{grep} commands and command-line options. @end menu -@node Introduction, Invoking, Top, Top -@comment node-name, next, previous, up +@node Introduction @chapter Introduction @cindex Searching for a pattern. -@sc{grep} searches the input files for lines containing a match to a given + +@command{grep} searches the input files +for lines containing a match to a given pattern list. When it finds a match in a line, it copies the line to standard output (by default), or does whatever other sort of output you have requested -with options. @sc{grep} expects to do the matching on text. +with options. @command{grep} expects to do the matching on text. Since newline is also a separator for the list of patterns, there is no way to match newline characters in a text. -@node Invoking, Diagnostics, Introduction, Top -@comment node-name, next, previous, up -@chapter Invoking @sc{grep} +@node Invoking +@chapter Invoking @command{grep} -@sc{grep} comes with a rich set of options from POSIX.2 and GNU extensions. +@command{grep} comes with a rich set of options from @sc{posix.2} and @sc{gnu} +extensions. @table @samp @@ -138,7 +144,7 @@ is no way to match newline characters in a text. @opindex -count @cindex counting lines Suppress normal output; instead print a count of matching -lines for each input file. With the @samp{-v}, @samp{--revert-match} option, +lines for each input file. With the @samp{-v}, @samp{--invert-match} option, count non-matching lines. @item -e @var{pattern} @@ -146,15 +152,15 @@ count non-matching lines. @opindex -e @opindex --regexp=@var{pattern} @cindex pattern list -Use @var{pattern} as the pattern; useful to protect patterns +Use @var{pattern} as the pattern; useful to protect patterns beginning with a @samp{-}. -@item -f @var{file} +@item -f @var{file} @itemx --file=@var{file} -@opindex -f -@opindex --file +@opindex -f +@opindex --file @cindex pattern from file -Obtain patterns from @var{file}, one per line. The empty +Obtain patterns from @var{file}, one per line. The empty file contains zero patterns, and therefore matches nothing. @item -i @@ -162,15 +168,15 @@ file contains zero patterns, and therefore matches nothing. @opindex -i @opindex --ignore-case @cindex case insensitive search -Ignore case distinctions in both the pattern and the input files. +Ignore case distinctions in both the pattern and the input files. @item -l @itemx --files-with-matches @opindex -l @opindex --files-with-matches @cindex names of matching files -Suppress normal output; instead print the name of each input -file from which output would normally have been printed. +Suppress normal output; instead print the name of each input +file from which output would normally have been printed. The scanning of every file will stop on the first match. @item -n @@ -178,7 +184,7 @@ The scanning of every file will stop on the first match. @opindex -n @opindex --line-number @cindex line numbering -Prefix each line of output with the line number within its input file. +Prefix each line of output with the line number within its input file. @item -q @itemx --quiet @@ -187,7 +193,7 @@ Prefix each line of output with the line number within its input file. @opindex --quiet @opindex --silent @cindex quiet, silent -Quiet; suppress normal output. The scanning of every file will stop on +Quiet; suppress normal output. The scanning of every file will stop on the first match. Also see the @samp{-s} or @samp{--no-messages} option. @item -s @@ -196,31 +202,32 @@ the first match. Also see the @samp{-s} or @samp{--no-messages} option. @opindex --no-messages @cindex suppress error messages Suppress error messages about nonexistent or unreadable files. -Portability note: unlike GNU @sc{grep}, BSD @sc{grep} does not comply -with POSIX.2, because BSD @sc{grep} lacks a @samp{-q} option and its -@samp{-s} option behaves like GNU @sc{grep}'s @samp{-q} option. Shell -scripts intended to be portable to BSD @sc{grep} should avoid both +Portability note: unlike @sc{gnu} @command{grep}, traditional +@command{grep} did not conform to @sc{posix.2}, because traditional +@command{grep} lacked a @samp{-q} option and its @samp{-s} option behaved +like @sc{gnu} @command{grep}'s @samp{-q} option. Shell scripts intended +to be portable to traditional @command{grep} should avoid both @samp{-q} and @samp{-s} and should redirect output to @file{/dev/null} instead. @item -v -@itemx --revert-match +@itemx --invert-match @opindex -v -@opindex --revert-match -@cindex revert matching +@opindex --invert-match +@cindex invert matching @cindex print non-matching lines -Invert the sense of matching, to select non-matching lines. +Invert the sense of matching, to select non-matching lines. @item -x @itemx --line-regexp @opindex -x @opindex --line-regexp @cindex match the whole line -Select only those matches that exactly match the whole line. +Select only those matches that exactly match the whole line. @end table -@section GNU Extensions +@section @sc{gnu} Extensions @table @samp @@ -240,17 +247,17 @@ Print @var{num} lines of trailing context after matching lines. @cindex context lines, before match Print @var{num} lines of leading context before matching lines. -@item -C -@itemx --context@var{[=num]} +@item -C @var{num} +@itemx --context=[@var{num}] @opindex -C @opindex --context @cindex context Print @var{num} lines (default 2) of output context. -@item -NUM +@item -@var{num} @opindex -NUM -Same as @samp{--context=@var{num}} lines of leading and trailing +Same as @samp{--context=@var{num}} lines of leading and trailing context. However, grep will never print any given line more than once. @@ -259,8 +266,8 @@ context. However, grep will never print any given line more than once. @opindex -V @opindex --version @cindex Version, printing -Print the version number of @sc{grep} to the standard output stream. -This version number should be included in all bug reports. +Print the version number of @command{grep} to the standard output stream. +This version number should be included in all bug reports. @item --help @opindex --help @@ -274,24 +281,32 @@ and the bug-reporting address, then exit. @opindex --byte-offset @cindex byte offset Print the byte offset within the input file before each line of output. -When @sc{grep} runs on MS-DOS or MS-Windows, the printed byte offsets +When @command{grep} runs on @sc{ms-dos} or MS-Windows, the printed +byte offsets depend on whether the @samp{-u} (@samp{--unix-byte-offsets}) option is used; see below. @item -d @var{action} @itemx --directories=@var{action} -@opindex -d +@opindex -d @opindex --directories @cindex directory search -If an input file is a directory, use @var{action} to process it. -By default, @var{action} is @samp{read}, which means that directories are -read just as if they were ordinary files (some operating systems -and filesystems disallow this, and will cause @sc{grep} to print error +If an input file is a directory, use @var{action} to process it. +By default, @var{action} is @samp{read}, which means that directories are +read just as if they were ordinary files (some operating systems +and filesystems disallow this, and will cause @command{grep} to print error messages for every directory). If @var{action} is @samp{skip}, directories are silently skipped. If @var{action} is @samp{recurse}, -@sc{grep} reads all files under each directory, recursively; this is +@command{grep} reads all files under each directory, recursively; this is equivalent to the @samp{-r} option. +@item -H +@itemx --with-filename +@opindex -H +@opindex --With-filename +@cindex with filename prefix +Print the filename for each match. + @item -h @itemx --no-filename @opindex -h @@ -304,9 +319,9 @@ Suppress the prefixing of filenames on output when multiple files are searched. @opindex -L @opindex --files-without-match @cindex files which don't match -Suppress normal output; instead print the name of each input -file from which no output would normally have been printed. -The scanning of every file will stop on the first match. +Suppress normal output; instead print the name of each input +file from which no output would normally have been printed. +The scanning of every file will stop on the first match. @item -a @itemx --text @@ -314,14 +329,14 @@ The scanning of every file will stop on the first match. @opindex --text @cindex suppress binary data @cindex binary files -Do not suppress output lines that contain binary data. -Normally, if the first few bytes of a file indicate +Do not suppress output lines that contain binary data. +Normally, if the first few bytes of a file indicate that the file contains binary data, grep outputs only a message saying that the file matches the pattern. This -option causes grep to act as if the file is a text +option causes grep to act as if the file is a text file, even if it would otherwise be treated as binary. -@emph{Warning:} the result might be binary garbage -printed to the terminal, which can have nasty +@emph{Warning:} the result might be binary garbage +printed to the terminal, which can have nasty side-effects if the terminal driver interprets some of it as commands. @@ -330,12 +345,12 @@ it as commands. @opindex -w @opindex --word-regexp @cindex matching whole words -Select only those lines containing matches that form -whole words. The test is that the matching substring -must either be at the beginning of the line, or preceded +Select only those lines containing matches that form +whole words. The test is that the matching substring +must either be at the beginning of the line, or preceded by a non-word constituent character. Similarly, it must be either at the end of the line or followed by -a non-word constituent character. Word-constituent +a non-word constituent character. Word-constituent characters are letters, digits, and the underscore. @item -r @@ -359,18 +374,18 @@ Obsolete synonym for @samp{-i}. @opindex --binary @cindex DOS/Windows binary files @cindex binary files, DOS/Windows -Treat the file(s) as binary. By default, under MS-DOS -and MS-Windows, @sc{grep} guesses the file type by looking -at the contents of the first 32KB read from the file. -If @sc{grep} decides the file is a text file, it strips the -CR characters from the original file contents (to make -regular expressions with @code{^} and @code{$} work correctly). +Treat the file(s) as binary. By default, under @sc{ms-dos} +and MS-Windows, @command{grep} guesses the file type by looking +at the contents of the first 32kB read from the file. +If @command{grep} decides the file is a text file, it strips the +@code{CR} characters from the original file contents (to make +regular expressions with @code{^} and @code{$} work correctly). Specifying @samp{-U} overrules this guesswork, causing all -files to be read and passed to the matching mechanism -verbatim; if the file is a text file with CR/LF pairs -at the end of each line, this will cause some regular -expressions to fail. This option is only supported on -MS-DOS and MS-Windows. +files to be read and passed to the matching mechanism +verbatim; if the file is a text file with @code{CR/LF} pairs +at the end of each line, this will cause some regular +expressions to fail. This option has no effect on platforms other than +@sc{ms-dos} and MS-Windows. @item -u @itemx --unix-byte-offsets @@ -378,38 +393,146 @@ MS-DOS and MS-Windows. @opindex --unix-byte-offsets @cindex DOS byte offsets @cindex byte offsets, on DOS/Windows -Report Unix-style byte offsets. This switch causes -@sc{grep} to report byte offsets as if the file were Unix style -text file, i.e. the byte offsets ignore the CR characters which were -stripped off. This will produce results identical to running @sc{grep} on -a Unix machine. This option has no effect unless @samp{-b} -option is also used; it is only supported on MS-DOS and +Report Unix-style byte offsets. This switch causes +@command{grep} to report byte offsets as if the file were Unix style +text file, i.e., the byte offsets ignore the @code{CR} characters which were +stripped. This will produce results identical to running @command{grep} on +a Unix machine. This option has no effect unless @samp{-b} +option is also used; it has no effect on platforms other than @sc{ms-dos} and MS-Windows. +@item --mmap +@opindex --mmap +@cindex memory mapped input +If possible, use the @code{mmap} system call to read input, instead of +the default @code{read} system call. In some situations, @samp{--mmap} +yields better performance. However, @samp{--mmap} can cause undefined +behavior (including core dumps) if an input file shrinks while +@command{grep} is operating, or if an I/O error occurs. + +@item -Z +@itemx --null +@opindex -Z +@opindex --null +@cindex zero-terminated file names +Output a zero byte (the @sc{ascii} @code{NUL} character) instead of the +character that normally follows a file name. For example, @samp{grep +-lZ} outputs a zero byte after each file name instead of the usual +newline. This option makes the output unambiguous, even in the presence +of file names containing unusual characters like newlines. This option +can be used with commands like @samp{find -print0}, @samp{perl -0}, +@samp{sort -z}, and @samp{xargs -0} to process arbitrary file names, +even those that contain newline characters. + +@item -z +@itemx --null-data +@opindex -z +@opindex --null-data +@cindex zero-terminated lines +Treat the input as a set of lines, each terminated by a zero byte (the +@sc{ascii} @code{NUL} character) instead of a newline. Like the @samp{-Z} +or @samp{--null} option, this option can be used with commands like +@samp{sort -z} to process arbitrary file names. + @end table -Several additional options control which variant of the @sc{grep} +Several additional options control which variant of the @command{grep} matching engine is used. @xref{Grep Programs}. -@sc{grep} uses the environment variable @var{LANG} to -provide internationalization support, if compiled with this feature. +@section Environment Variables + +Grep's behavior is affected by the following environment variables. +@cindex environment variables + +@table @code + +@item GREP_OPTIONS +@vindex GREP_OPTIONS +@cindex default options environment variable +This variable specifies default options to be placed in front of any +explicit options. For example, if @code{GREP_OPTIONS} is @samp{--text +--directories=skip}, @command{grep} behaves as if the two options +@samp{--text} and @samp{--directories=skip} had been specified before +any explicit options. Option specifications are separated by +whitespace. A backslash escapes the next character, so it can be used to +specify an option containing whitespace or a backslash. + +@item LC_ALL +@itemx LC_MESSAGES +@itemx LANG +@vindex LC_ALL +@vindex LC_MESSAGES +@vindex LANG +@cindex language of messages +@cindex message language +@cindex national language support +@cindex NLS +@cindex translation of message language +These variables specify the @code{LC_MESSAGES} locale, which determines +the language that @command{grep} uses for messages. The locale is determined +by the first of these variables that is set. American English is used +if none of these environment variables are set, or if the message +catalog is not installed, or if @command{grep} was not compiled with national +language support (@sc{nls}). + +@item LC_ALL +@itemx LC_CTYPE +@itemx LANG +@vindex LC_ALL +@vindex LC_CTYPE +@vindex LANG +@cindex character type +@cindex national language support +@cindex NLS +These variables specify the @code{LC_CTYPE} locale, which determines the +type of characters, e.g., which characters are whitespace. The locale is +determined by the first of these variables that is set. The @sc{posix} +locale is used if none of these environment variables are set, or if the +locale catalog is not installed, or if @command{grep} was not compiled with +national language support (@sc{nls}). + +@item POSIXLY_CORRECT +@vindex POSIXLY_CORRECT +If set, @command{grep} behaves as @sc{posix.2} requires; otherwise, +@command{grep} behaves more like other @sc{gnu} programs. @sc{posix.2} +requires that options that +follow file names must be treated as file names; by default, such +options are permuted to the front of the operand list and are treated as +options. Also, @sc{posix.2} requires that unrecognized options be +diagnosed as +``illegal'', but since they are not really against the law the default +is to diagnose them as ``invalid''. @code{POSIXLY_CORRECT} also +disables @code{_@var{N}_GNU_nonoption_argv_flags_}, described below. + +@item _@var{N}_GNU_nonoption_argv_flags_ +@vindex _@var{N}_GNU_nonoption_argv_flags_ +(Here @code{@var{N}} is @command{grep}'s numeric process ID.) If the +@var{i}th character of this environment variable's value is @samp{1}, do +not consider the @var{i}th operand of @command{grep} to be an option, even if +it appears to be one. A shell can put this variable in the environment +for each command it runs, specifying which operands are the results of +file name wildcard expansion and therefore should not be treated as +options. This behavior is available only with the @sc{gnu} C library, and +only when @code{POSIXLY_CORRECT} is not set. -@node Diagnostics, Grep Programs, Invoking, Top -@comment node-name, next, previous, up +@end table + +@node Diagnostics @chapter Diagnostics + Normally, exit status is 0 if matches were found, and 1 if no matches were found (the @samp{-v} option inverts the sense of the exit status). -Exit status is 2 if there were syntax errors in the pattern, +Exit status is 2 if there were syntax errors in the pattern, inaccessible input files, or other system errors. -@node Grep Programs, Regular Expressions, Diagnostics, Top -@comment node-name, next, previous, up -@chapter @sc{grep} programs +@node Grep Programs +@chapter @command{grep} programs -@sc{grep} searches the named input files (or standard input if no +@command{grep} searches the named input files (or standard input if no files are named, or the file name @file{-} is given) for lines containing -a match to the given pattern. By default, @sc{grep} prints the matching lines. -There are three major variants of @sc{grep}, controlled by the following options. +a match to the given pattern. By default, @command{grep} prints the +matching lines. There are three major variants of @command{grep}, +controlled by the following options. @table @samp @@ -418,14 +541,14 @@ There are three major variants of @sc{grep}, controlled by the following options @opindex -G @opindex --basic-regexp @cindex matching basic regular expressions -Interpret pattern as a basic regular expression. This is the default. +Interpret pattern as a basic regular expression. This is the default. @item -E -@item --extended-regexp +@itemx --extended-regexp @opindex -E @opindex --extended-regexp @cindex matching extended regular expressions -Interpret pattern as an extended regular expression. +Interpret pattern as an extended regular expression. @item -F @@ -439,60 +562,66 @@ by newlines, any of which is to be matched. @end table In addition, two variant programs @sc{egrep} and @sc{fgrep} are available. -@sc{egrep} is similar (but not identical) to @samp{grep -E}, and -is compatible with the historical Unix @sc{egrep}. @sc{fgrep} is the +@sc{egrep} is the same as @samp{grep -E}. @sc{fgrep} is the same as @samp{grep -F}. -@node Regular Expressions, Reporting Bugs, Grep Programs, Top -@comment node-name, next, previous, up +@node Regular Expressions @chapter Regular Expressions @cindex regular expressions -A @dfn{regular expression} is a pattern that describes a set of strings. +A @dfn{regular expression} is a pattern that describes a set of strings. Regular expressions are constructed analogously to arithmetic expressions, -by using various operators to combine smaller expressions. -@sc{grep} understands two different versions of regular expression -syntax: ``basic'' and ``extended''. In GNU @sc{grep}, there is no -difference in available functionality using either syntax. -In other implementations, basic regular expressions are less powerful. -The following description applies to extended regular expressions; +by using various operators to combine smaller expressions. +@command{grep} understands two different versions of regular expression +syntax: ``basic'' and ``extended''. In @sc{gnu} @command{grep}, there is no +difference in available functionality using either syntax. +In other implementations, basic regular expressions are less powerful. +The following description applies to extended regular expressions; differences for basic regular expressions are summarized afterwards. -The fundamental building blocks are the regular expressions that match +The fundamental building blocks are the regular expressions that match a single character. Most characters, including all letters and digits, -are regular expressions that match themselves. Any metacharacter +are regular expressions that match themselves. Any metacharacter with special meaning may be quoted by preceding it with a backslash. A list of characters enclosed by @samp{[} and @samp{]} matches any single character in that list; if the first character of the list is the -caret @samp{^}, then it +caret @samp{^}, then it matches any character @strong{not} in the list. For example, the regular expression @samp{[0123456789]} matches any single digit. -A range of @sc{ascii} characters may be specified by giving the first -and last characters, separated by a hyphen. Finally, certain named -classes of characters are predefined. Their names are self explanatory, -and they are : +A range of @sc{ascii} characters may be specified by giving the first +and last characters, separated by a hyphen. + +Finally, certain named classes of characters are predefined, as follows. +Their interpretation depends on the @code{LC_CTYPE} locale; the +interpretation below is that of the @sc{posix} locale, which is the default +if no @code{LC_CTYPE} locale is specified. @cindex classes of characters @cindex character classes @table @samp @item [:alnum:] -@opindex alnum -@cindex alphanumeric characters -Any of [:digit:] or [:alpha:] +@opindex alnum +@cindex alphanumeric characters +Any of @samp{[:digit:]} or @samp{[:alpha:]} @item [:alpha:] @opindex alpha @cindex alphabetic characters -Any local-specific or one of the @sc{ascii} letters:@* +Any letter:@* @code{a b c d e f g h i j k l m n o p q r s t u v w x y z},@* @code{A B C D E F G H I J K L M N O P Q R S T U V W X Y Z}. +@item [:blank:] +@opindex blank +@cindex blank characters +Space or tab. + @item [:cntrl:] @opindex cntrl @cindex control characters -Any of @code{BEL}, @code{BS}, @code{CR}, @code{FF}, @code{HT}, -@code{NL}, or @code{VT}. +Any character with octal codes 000 through 037, or @code{DEL} (octal +code 177). @item [:digit:] @opindex digit @@ -503,7 +632,7 @@ Any one of @code{0 1 2 3 4 5 6 7 8 9}. @item [:graph:] @opindex graph @cindex graphic characters -Anything that is not a @samp{[:alphanum:]} or @samp{[:punct:]}. +Anything that is not a @samp{[:alnum:]} or @samp{[:punct:]}. @item [:lower:] @opindex lower @@ -514,13 +643,12 @@ Any one of @code{a b c d e f g h i j k l m n o p q r s t u v w x y z}. @opindex print @cindex printable characters Any character from the @samp{[:space:]} class, and any character that is -@strong{not} in the @samp{[:isgraph:]} class. +@strong{not} in the @samp{[:graph:]} class. @item [:punct:] @opindex punct @cindex punctuation characters -Any one of @code{!@: " #% & ' ( ) ; < = > ?@: [ \ ] * + , - .@: / : ^ _ @{ | @}}. - +Any one of @code{!@: " # $ % & ' ( ) * + , - .@: / : ; < = > ?@: @@ [ \ ] ^ _ ` @{ | @} ~}. @item [:space:] @opindex space @@ -541,13 +669,13 @@ Any one of @code{a b c d e f A B C D E F 0 1 2 3 4 5 6 7 8 9}. @end table For example, @samp{[[:alnum:]]} means @samp{[0-9A-Za-z]}, except the latter -form is dependent upon the @sc{ascii} character encoding, whereas the -former is portable. (Note that the brackets in these class names are -part of the symbolic names, and must be included in addition to -the brackets delimiting the bracket list). Most metacharacters lose +form is dependent upon the @sc{ascii} character encoding, whereas the +former is portable. (Note that the brackets in these class names are +part of the symbolic names, and must be included in addition to +the brackets delimiting the bracket list.) Most metacharacters lose their special meaning inside lists. To include a literal @samp{]}, place it first in the list. Similarly, to include a literal @samp{^}, place it anywhere -but first. Finally, to include a literal @samp{-}, place it last. +but first. Finally, to include a literal @samp{-}, place it last. The period @samp{.} matches any single character. The symbol @samp{\w} is a synonym for @samp{[[:alnum:]]} and @samp{\W} is a synonym for @@ -555,12 +683,12 @@ is a synonym for @samp{[[:alnum:]]} and @samp{\W} is a synonym for The caret @samp{^} and the dollar sign @samp{$} are metacharacters that respectively match the empty string at the beginning and end -of a line. The symbols @samp{\<} and @samp{\>} respectively match the +of a line. The symbols @samp{\<} and @samp{\>} respectively match the empty string at the beginning and end of a word. The symbol -@samp{\b} matches the empty string at the edge of a word, and @samp{\B} -matches the empty string provided it's not at the edge of a word. +@samp{\b} matches the empty string at the edge of a word, and @samp{\B} +matches the empty string provided it's not at the edge of a word. -A regular expression may be followed by one of several +A regular expression may be followed by one of several repetition operators: @@ -580,7 +708,7 @@ The preceding item will be matched zero or more times. @item + @opindex + -@cindex plus sign +@cindex plus sign The preceding item will be matched one or more times. @item @{@var{n}@} @@ -595,12 +723,6 @@ The preceding item is matched exactly @var{n} times. @cindex match sub-expression n or more times The preceding item is matched n or more times. -@item @{,@var{m}@} -@opindex @{,m@} -@cindex braces, first argument omitted -@cindex match sub-expression at most m times -The preceding item is optional and is matched at most @var{m} times. - @item @{@var{n},@var{m}@} @opindex @{n,m@} @cindex braces, two arguments @@ -609,17 +731,17 @@ The preceding item is matched at least @var{n} times, but not more than @end table -Two regular expressions may be concatenated; the resulting regular +Two regular expressions may be concatenated; the resulting regular expression matches any string formed by concatenating two substrings -that respectively match the concatenated subexpressions. +that respectively match the concatenated subexpressions. -Two regular expressions may be joined by the infix operator @samp{|}; the -resulting regular expression matches any string matching either +Two regular expressions may be joined by the infix operator @samp{|}; the +resulting regular expression matches any string matching either subexpression. -Repetition takes precedence over concatenation, which in turn +Repetition takes precedence over concatenation, which in turn takes precedence over alternation. A whole subexpression may be -enclosed in parentheses to override these precedence rules. +enclosed in parentheses to override these precedence rules. The backreference @samp{\@var{n}}, where @var{n} is a single digit, matches the substring previously matched by the @var{n}th parenthesized subexpression @@ -631,40 +753,201 @@ In basic regular expressions the metacharacters @samp{?}, @samp{+}, instead use the backslashed versions @samp{\?}, @samp{\+}, @samp{\@{}, @samp{\|}, @samp{\(}, and @samp{\)}. -In @sc{egrep} the metacharacter @samp{@{} loses its special meaning; -instead use @samp{\@{}. This not true for @samp{grep -E}. +@cindex interval specifications +Traditional @command{egrep} did not support the @samp{@{} metacharacter, +and some @command{egrep} implementations support @samp{\@{} instead, so +portable scripts should avoid @samp{@{} in @samp{egrep} patterns and +should use @samp{[@{]} to match a literal @samp{@{}. + +@sc{gnu} @command{egrep} attempts to support traditional usage by +assuming that @samp{@{} is not special if it would be the start of an +invalid interval specification. For example, the shell command +@samp{egrep '@{1'} searches for the two-character string @samp{@{1} +instead of reporting a syntax error in the regular expression. +@sc{posix.2} allows this behavior as an extension, but portable scripts +should avoid it. + +@node Usage +@chapter Usage + +@cindex Usage, examples +Here is an example shell command that invokes @sc{gnu} @command{grep}: + +@example +grep -i 'hello.*world' menu.h main.c +@end example + +@noindent +This lists all lines in the files @file{menu.h} and @file{main.c} that +contain the string @samp{hello} followed by the string @samp{world}; +this is because @samp{.*} matches zero or more characters within a line. +@xref{Regular Expressions}. The @samp{-i} option causes @command{grep} +to ignore case, causing it to match the line @samp{Hello, world!}, which +it would not otherwise match. @xref{Invoking}, for more details about +how to invoke @command{grep}. + +@cindex Using @command{grep}, Q&A +@cindex FAQ about @command{grep} usage +Here are some common questions and answers about @command{grep} usage. + +@enumerate + +@item +How can I list just the names of matching files? + +@example +grep -l 'main' *.c +@end example + +@noindent +lists the names of all C files in the current directory whose contents +mention @samp{main}. + +@item +How do I search directories recursively? + +@example +grep -r 'hello' /home/gigi +@end example + +@noindent +searches for @samp{hello} in all files under the directory +@file{/home/gigi}. For more control of which files are searched, use +@command{find}, @command{grep} and @command{xargs}. For example, +the following command searches only C files: + +@smallexample +find /home/gigi -name '*.c' -print | xargs grep 'hello' /dev/null +@end smallexample + +@item +What if a pattern has a leading @samp{-}? + +@example +grep -e '--cut here--' * +@end example + +@noindent +searches for all lines matching @samp{--cut here--}. Without @samp{-e}, +@command{grep} would attempt to parse @samp{--cut here--} as a list of +options. + +@item +Suppose I want to search for a whole word, not a part of a word? + +@example +grep -w 'hello' * +@end example + +@noindent +searches only for instances of @samp{hello} that are entire words; it +does not match @samp{Othello}. For more control, use @samp{\<} and +@samp{\>} to match the start and end of words. For example: + +@example +grep 'hello\>' * +@end example + +@noindent +searches only for words ending in @samp{hello}, so it matches the word +@samp{Othello}. +@item +How do I output context around the matching lines? -@node Reporting Bugs, Concept Index, Regular Expressions, Top -@comment node-name, next, previous, up +@example +grep -C 2 'hello' * +@end example + +@noindent +prints two lines of context around each matching line. + +@item +How do I force grep to print the name of the file? + +Append @file{/dev/null}: + +@example +grep 'eli' /etc/passwd /dev/null +@end example + +@item +Why do people use strange regular expressions on @command{ps} output? + +@example +ps -ef | grep '[c]ron' +@end example + +If the pattern had been written without the square brackets, it would +have matched not only the @command{ps} output line for @command{cron}, +but also the @command{ps} output line for @command{grep}. + +@item +Why does @command{grep} report ``Binary file matches''? + +If @command{grep} listed all matching ``lines'' from a binary file, it +would probably generate output that is not useful, and it might even +muck up your display. So @sc{gnu} @command{grep} suppresses output from +files that appear to be binary files. To force @sc{gnu} @command{grep} +to output lines even from files that appear to be binary, use the +@samp{-a} or @samp{--text} option. + +@item +Why doesn't @samp{grep -lv} print nonmatching file names? + +@samp{grep -lv} lists the names of all files containing one or more +lines that do not match. To list the names of all files that contain no +matching lines, use the @samp{-L} or @samp{--files-without-match} +option. + +@item +I can do @sc{or} with @samp{|}, but what about @sc{and}? + +@example +grep 'paul' /etc/motd | grep 'franc,ois' +@end example + +@noindent +finds all lines that contain both @samp{paul} and @samp{franc,ois}. + +@item +How can I search in both standard input and in files? + +Use the special file name @samp{-}: + +@example +cat /etc/passwd | grep 'alain' - /etc/motd +@end example +@end enumerate + +@node Reporting Bugs @chapter Reporting bugs @cindex Bugs, reporting Email bug reports to @email{bug-gnu-utils@@gnu.org}. Be sure to include the word ``grep'' somewhere in the ``Subject:'' field. -Large repetition counts in the @samp{@{m,n@}} construct may cause -@sc{grep} to use lots of memory. In addition, certain other -obscure regular expressions require exponential time and +Large repetition counts in the @samp{@{m,n@}} construct may cause +@command{grep} to use lots of memory. In addition, certain other +obscure regular expressions require exponential time and space, and may cause grep to run out of memory. -Backreferences are very slow, and may require exponential time. +Backreferences are very slow, and may require exponential time. @page -@node Concept Index , Index, Reporting Bugs, Top -@comment node-name, next, previous, up +@node Concept Index @unnumbered Concept Index This is a general index of all issues discussed in this manual, with the -exception of the @sc{grep} commands and command-line options. +exception of the @command{grep} commands and command-line options. @printindex cp @page -@node Index, , Concept Index, Top +@node Index @unnumbered Index -This is an alphabetical list of all @sc{grep} commands and command-line -options. +This is an alphabetical list of all @command{grep} commands, command-line +options, and environment variables. @printindex fn diff --git a/gnu/usr.bin/grep/doc/version.texi b/gnu/usr.bin/grep/doc/version.texi index ace0491..2c6880e 100644 --- a/gnu/usr.bin/grep/doc/version.texi +++ b/gnu/usr.bin/grep/doc/version.texi @@ -1,3 +1,3 @@ -@set UPDATED 10 February 1999 -@set EDITION 2.3 -@set VERSION 2.3 +@set UPDATED 13 November 1999 +@set EDITION 2.4 +@set VERSION 2.4 diff --git a/gnu/usr.bin/grep/grep.1 b/gnu/usr.bin/grep/grep.1 index 3b957a0..0cee267 100644 --- a/gnu/usr.bin/grep/grep.1 +++ b/gnu/usr.bin/grep/grep.1 @@ -1,24 +1,66 @@ .\" grep man page +.if !\n(.g \{\ +. if !\w|\*(lq| \{\ +. ds lq `` +. if \w'\(lq' .ds lq "\(lq +. \} +. if !\w|\*(rq| \{\ +. ds rq '' +. if \w'\(rq' .ds rq "\(rq +. \} +.\} .de Id .ds Dt \\$4 .. -.Id $Id: grep.1,v 1.1 1998/11/22 06:45:20 alainm Exp $ +.Id $Id: grep.1,v 1.7 1999/10/12 20:41:01 alainm Exp $ .TH GREP 1 \*(Dt "GNU Project" .SH NAME grep, egrep, fgrep \- print lines matching a pattern .SH SYNOPSIS .B grep -[-[AB] NUM] [-CEFGVabchiLlnqrsvwxyUu] [-e PATTERN | -f FILE] -[-d ACTION] [--directories=ACTION] -[--extended-regexp] [--fixed-strings] [--basic-regexp] -[--regexp=PATTERN] [--file=FILE] [--ignore-case] [--word-regexp] -[--line-regexp] [--line-regexp] [--no-messages] [--revert-match] -[--version] [--help] [--byte-offset] [--line-number] -[--with-filename] [--no-filename] [--quiet] [--silent] [--text] -[--files-without-match] [--files-with-matcces] [--count] -[--before-context=NUM] [--after-context=NUM] [--context] -[--binary] [--unix-byte-offsets] [--recursive] -.I files... +.RB [ \- [ ABC ] +.IR NUM ] +.RB [ \-EFGHLUVZabchilnqrsuvwxyz ] +.RB [ \-e +.I PATTERN +| +.B \-f +.IR FILE ] +.RB [ \-d +.IR ACTION ] +.RB [ \-\^\-directories=\fIACTION\fP ] +.RB [ \-\^\-extended-regexp ] +.RB [ \-\^\-fixed-strings ] +.RB [ \-\^\-basic-regexp ] +.RB [ \-\^\-regexp=\fIPATTERN\fP ] +.RB [ \-\^\-file=\fIFILE\fP ] +.RB [ \-\^\-ignore-case ] +.RB [ \-\^\-word-regexp ] +.RB [ \-\^\-line-regexp ] +.RB [ \-\^\-line-regexp ] +.RB [ \-\^\-no-messages ] +.RB [ \-\^\-invert-match ] +.RB [ \-\^\-version ] +.RB [ \-\^\-help ] +.RB [ \-\^\-byte-offset ] +.RB [ \-\^\-line-number ] +.RB [ \-\^\-with-filename ] +.RB [ \-\^\-no-filename ] +.RB [ \-\^\-quiet ] +.RB [ \-\^\-silent ] +.RB [ \-\^\-text ] +.RB [ \-\^\-files-without-match ] +.RB [ \-\^\-files-with-matches ] +.RB [ \-\^\-count ] +.RB [ \-\^\-before-context=\fINUM\fP ] +.RB [ \-\^\-after-context=\fINUM\fP ] +.RB [ \-\^\-context [ =\fINUM\fP ]] +.RB [ \-\^\-binary ] +.RB [ \-\^\-unix-byte-offsets ] +.RB [ \-\^\-mmap ] +.RB [ \-\^\-null ] +.RB [ \-\^\-recursive ] +.RI [ file .\|.\|.] .SH DESCRIPTION .PP .B Grep @@ -39,80 +81,80 @@ There are three major variants of controlled by the following options. .PD 0 .TP -.B \-G, --basic-regexp +.BR \-G ", " \-\^\-basic-regexp Interpret .I pattern as a basic regular expression (see below). This is the default. .TP -.B \-E, --extended-regexp +.BR \-E ", " \-\^\-extended-regexp Interpret .I pattern as an extended regular expression (see below). .TP -.B \-F, --fixed-strings +.BR \-F ", " \-\^\-fixed-strings Interpret .I pattern as a list of fixed strings, separated by newlines, any of which is to be matched. -.LP +.PP In addition, two variant programs .B egrep and .B fgrep are available. .B Egrep -is similar (but not identical) to -.BR "grep\ \-E" , -and is compatible with the historical Unix -.BR egrep . +is the same as +.BR "grep\ \-E" . .B Fgrep is the same as .BR "grep\ \-F" . .PD -.LP +.PP All variants of .B grep understand the following options: .PD 0 .TP -.BI \-A " NUM" ", --after-context=" NUM +.BI \-A " NUM" "\fR,\fP \-\^\-after-context=" NUM Print .I NUM lines of trailing context after matching lines. .TP -.BI \-B " NUM" ", --before-context=" NUM +.BI \-B " NUM" "\fR,\fP \-\^\-before-context=" NUM Print .I NUM lines of leading context before matching lines. .TP -.BI \-C ,\ --context"[=NUM]" -Print +.BI \-C " \fR[\fPNUM\fR]\fP" "\fR,\fP \-\^\-context\fR[\fP=" NUM\fR]\fP +Print .I NUM lines (default 2) of output context. .TP -.BI \- NUM \ -Same as --context=NUM lines of leading and trailing context. However, +.BI \- NUM +Same as +.BI \-\^\-context= NUM +lines of leading and trailing context. However, .B grep will never print any given line more than once. .TP -.B \-V, --version +.BR \-V ", " \-\^\-version Print the version number of .B grep to standard error. This version number should be included in all bug reports (see below). .TP -.B \-b, --byte-offset +.BR \-b ", " \-\^\-byte-offset Print the byte offset within the input file before each line of output. .TP -.B \-c, --count +.BR \-c ", " \-\^\-count Suppress normal output; instead print a count of matching lines for each input file. With the -.B \-v, --revert-match +.BR \-v ", " \-\^\-invert-match option (see below), count non-matching lines. .TP -.BI \-d " ACTION" ", --directories=" ACTION +.BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION If an input file is a directory, use .I ACTION to process it. By default, @@ -135,75 +177,78 @@ this is equivalent to the .B \-r option. .TP -.BI \-e " PATTERN" ", --regexp=" PATTERN +.BI \-e " PATTERN" "\fR,\fP \-\^\-regexp=" PATTERN Use .I PATTERN as the pattern; useful to protect patterns beginning with .BR \- . .TP -.BI \-f " FILE" ", --file=" FILE +.BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE Obtain patterns from .IR FILE , one per line. The empty file contains zero patterns, and therfore matches nothing. .TP -.B \-h, --no-filename +.BR \-H ", " \-\^\-with-filename +Print the filename for each match. +.TP +.BR \-h ", " \-\^\-no-filename Suppress the prefixing of filenames on output when multiple files are searched. .TP -.B \-i, --ignore-case +.BR \-i ", " \-\^\-ignore-case Ignore case distinctions in both the .I pattern and the input files. .TP -.B \-L, --files-without-match +.BR \-L ", " \-\^\-files-without-match Suppress normal output; instead print the name of each input file from which no output would -normally have been printed. The scanning will stop +normally have been printed. The scanning will stop on the first match. .TP -.B \-l, --files-with-matches +.BR \-l ", " \-\^\-files-with-matches Suppress normal output; instead print the name of each input file from which output -would normally have been printed. The scanning will +would normally have been printed. The scanning will stop on the first match. .TP -.B \-n, --line-number +.BR \-n ", " \-\^\-line-number Prefix each line of output with the line number within its input file. .TP -.B \-q, --quiet, --silent -Quiet; suppress normal output. The scanning will stop +.BR \-q ", " \-\^\-quiet ", " \-\^\-silent +Quiet; suppress normal output. The scanning will stop on the first match. Also see the .B \-s or -.B --no-messages +.B \-\^\-no-messages option below. .TP -.B \-r, --recursive +.BR \-r ", " \-\^\-recursive Read all files under each directory, recursively; this is equivalent to the .B "\-d recurse" option. .TP -.B \-s, --no-messages +.BR \-s ", " \-\^\-no-messages Suppress error messages about nonexistent or unreadable files. -Portability note: unlike GNU +Portability note: unlike \s-1GNU\s0 .BR grep , -BSD +traditional .B grep -does not comply with POSIX.2, because BSD +did not conform to \s-1POSIX.2\s0, because traditional .B grep -lacks a +lacked a .B \-q option and its .B \-s -option behaves like GNU +option behaved like \s-1GNU\s0 .BR grep 's .B \-q option. -Shell scripts intended to be portable to BSD +Shell scripts intended to be portable to traditional .B grep should avoid both .B \-q @@ -211,7 +256,7 @@ and .B \-s and should redirect output to /dev/null instead. .TP -.B \-a, --text +.BR \-a ", " \-\^\-text Do not suppress output lines that contain binary data. Normally, if the first few bytes of a file indicate that the file contains binary data, @@ -222,10 +267,10 @@ This option causes to act as if the file is a text file, even if it would otherwise be treated as binary. .TP -.B \-v, --revert-match +.BR \-v ", " \-\^\-invert-match Invert the sense of matching, to select non-matching lines. .TP -.B \-w, --word-regexp +.BR \-w ", " \-\^\-word-regexp Select only those lines containing matches that form whole words. The test is that the matching substring must either be at the beginning of the line, or preceded by a non-word constituent @@ -233,14 +278,14 @@ character. Similarly, it must be either at the end of the line or followed by a non-word constituent character. Word-constituent characters are letters, digits, and the underscore. .TP -.B \-x, --line-regexp +.BR \-x ", " \-\^\-line-regexp Select only those matches that exactly match the whole line. .TP .B \-y Obsolete synonym for .BR \-i . .TP -.B \-U, --binary +.BR \-U ", " \-\^\-binary Treat the file(s) as binary. By default, under MS-DOS and MS-Windows, .BR grep guesses the file type by looking at the contents of the first 32KB @@ -256,10 +301,11 @@ work correctly). Specifying overrules this guesswork, causing all files to be read and passed to the matching mechanism verbatim; if the file is a text file with CR/LF pairs at the end of each line, this will cause some regular -expressions to fail. This option is only supported on MS-DOS and +expressions to fail. +This option has no effect on platforms other than MS-DOS and MS-Windows. .TP -.B \-u, --unix-byte-offsets +.BR \-u ", " \-\^\-unix-byte-offsets Report Unix-style byte offsets. This switch causes .B grep to report byte offsets as if the file were Unix-style text file, i.e. with @@ -267,7 +313,41 @@ CR characters stripped off. This will produce results identical to running .B grep on a Unix machine. This option has no effect unless .B \-b -option is also used; it is only supported on MS-DOS and MS-Windows. +option is also used; +it has no effect on platforms other than MS-DOS and MS-Windows. +.TP +.B \-\^\-mmap +If possible, use the +.BR mmap (2) +system call to read input, instead of +the default +.BR read (2) +system call. In some situations, +.B -\^-mmap +yields better performance. However, +.B -\^-mmap +can cause undefined behavior (including core dumps) +if an input file shrinks while +.B grep +is operating, or if an I/O error occurs. +.TP +.BR \-Z ", " \-\^\-null +Output a zero byte (the \s-1ASCII\s0 +.B NUL +character) instead of the character that normally follows a file name. +For example, +.B "grep \-lZ" +outputs a zero byte after each file name instead of the usual newline. +This option makes the output unambiguous, even in the presence of file +names containing unusual characters like newlines. This option can be +used with commands like +.BR "find \-print0" , +.BR "perl \-0" , +.BR "sort \-z" , +and +.B "xargs \-0" +to process arbitrary file names, +even those that contain newline characters. .PD .SH "REGULAR EXPRESSIONS" .PP @@ -277,8 +357,8 @@ expressions, by using various operators to combine smaller expressions. .PP .B Grep understands two different versions of regular expression syntax: -``basic'' and ``extended.'' In -.RB "GNU\ " grep , +\*(lqbasic\*(rq and \*(lqextended.\*(rq In +.RB "\s-1GNU\s0\ " grep , there is no difference in available functionality using either syntax. In other implementations, basic regular expressions are less powerful. The following description applies to extended regular expressions; @@ -390,11 +470,6 @@ The preceding item is matched .I n or more times. .TP -.BI {, m } -The preceding item is optional and is matched at most -.I m -times. -.TP .BI { n , m } The preceding item is matched at least .I n @@ -444,12 +519,35 @@ versions and .BR \e) . .PP -In +Traditional +.B egrep +did not support the +.B { +metacharacter, and some +.B egrep +implementations support +.B \e{ +instead, so portable scripts should avoid +.B { +in +.B egrep +patterns and should use +.B [{] +to match a literal +.BR { . +.PP +\s-1GNU\s0 .B egrep -the metacharacter +attempts to support traditional usage by assuming that .B { -loses its special meaning; instead use -.BR \e{ . +is not special if it would be the start of an invalid interval +specification. For example, the shell command +.B "egrep '{1'" +searches for the two-character string +.B {1 +instead of reporting a syntax error in the regular expression. +\s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts +should avoid it. .SH DIAGNOSTICS .PP Normally, exit status is 0 if matches were found, @@ -463,7 +561,8 @@ other system errors. .PP Email bug reports to .BR bug-gnu-utils@gnu.org . -Be sure to include the word ``grep'' somewhere in the ``Subject:'' field. +Be sure to include the word \*(lqgrep\*(rq somewhere in the +\*(lqSubject:\*(rq field. .PP Large repetition counts in the .BI { m , n } @@ -475,3 +574,5 @@ and space, and may cause to run out of memory. .PP Backreferences are very slow, and may require exponential time. +.\" Work around problems with some troff -man implementations. +.br diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c index 3ed4720..445eeca 100644 --- a/gnu/usr.bin/grep/grep.c +++ b/gnu/usr.bin/grep/grep.c @@ -55,6 +55,13 @@ static int show_help; /* If non-zero, print the version on standard output and exit. */ static int show_version; +/* If nonzero, use mmap if possible. */ +static int mmap_option; + +/* Short options. */ +static char const short_options[] = +"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz"; + /* Long options equivalences. */ static struct option long_options[] = { @@ -75,18 +82,19 @@ static struct option long_options[] = {"ignore-case", no_argument, NULL, 'i'}, {"line-number", no_argument, NULL, 'n'}, {"line-regexp", no_argument, NULL, 'x'}, + {"mmap", no_argument, &mmap_option, 1}, {"no-filename", no_argument, NULL, 'h'}, {"no-messages", no_argument, NULL, 's'}, + {"null", no_argument, NULL, 'Z'}, + {"null-data", no_argument, NULL, 'z'}, {"quiet", no_argument, NULL, 'q'}, {"recursive", no_argument, NULL, 'r'}, {"regexp", required_argument, NULL, 'e'}, - {"revert-match", no_argument, NULL, 'v'}, + {"invert-match", no_argument, NULL, 'v'}, {"silent", no_argument, NULL, 'q'}, {"text", no_argument, NULL, 'a'}, -#if O_BINARY {"binary", no_argument, NULL, 'U'}, {"unix-byte-offsets", no_argument, NULL, 'u'}, -#endif {"version", no_argument, NULL, 'V'}, {"with-filename", no_argument, NULL, 'H'}, {"word-regexp", no_argument, NULL, 'w'}, @@ -94,10 +102,10 @@ static struct option long_options[] = }; /* Define flags declared in grep.h. */ -char const *matcher; int match_icase; int match_words; int match_lines; +unsigned char eolbyte; /* For error messages. */ static char *prog; @@ -115,7 +123,10 @@ static enum static int ck_atoi PARAMS ((char const *, int *)); static void usage PARAMS ((int)) __attribute__((noreturn)); static void error PARAMS ((const char *, int)); -static int setmatcher PARAMS ((char const *)); +static void setmatcher PARAMS ((char const *)); +static int install_matcher PARAMS ((char const *)); +static int prepend_args PARAMS ((char const *, char *, char **)); +static void prepend_default_options PARAMS ((char const *, int *, char ***)); static char *page_alloc PARAMS ((size_t, char **)); static int reset PARAMS ((int, char const *, struct stats *)); static int fillbuf PARAMS ((size_t, struct stats *)); @@ -215,14 +226,15 @@ static char *ubuffer; /* Unaligned base of buffer. */ static char *buffer; /* Base of buffer. */ static size_t bufsalloc; /* Allocated size of buffer save region. */ static size_t bufalloc; /* Total buffer size. */ +#define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */ static int bufdesc; /* File descriptor. */ static char *bufbeg; /* Beginning of user-visible stuff. */ static char *buflim; /* Limit of user-visible stuff. */ static size_t pagesize; /* alignment of memory pages */ +static off_t bufoffset; /* Read offset; defined on regular files. */ #if defined(HAVE_MMAP) -static int bufmapped; /* True for ordinary files. */ -static off_t bufoffset; /* What read() normally remembers. */ +static int bufmapped; /* True if buffer is memory-mapped. */ static off_t initial_bufoffset; /* Initial value of bufoffset. */ #endif @@ -233,32 +245,26 @@ static off_t initial_bufoffset; /* Initial value of bufoffset. */ ? (val) \ : (val) + ((alignment) - (size_t) (val) % (alignment))) -/* Return the address of a new page-aligned buffer of size SIZE. Set - *UP to the newly allocated (but possibly unaligned) buffer used to - *build the aligned buffer. To free the buffer, free (*UP). */ +/* Return the address of a page-aligned buffer of size SIZE, + reallocating it from *UP. Set *UP to the newly allocated (but + possibly unaligned) buffer used to build the aligned buffer. To + free the buffer, free (*UP). */ static char * page_alloc (size, up) size_t size; char **up; { - /* HAVE_WORKING_VALLOC means that valloc is properly declared, and - you can free the result of valloc. This symbol is not (yet) - autoconfigured. It can be useful to define HAVE_WORKING_VALLOC - while debugging, since some debugging memory allocators might - catch more bugs if this symbol is enabled. */ -#if HAVE_WORKING_VALLOC - *up = valloc (size); - return *up; -#else size_t asize = size + pagesize - 1; if (size <= asize) { - *up = malloc (asize); - if (*up) - return ALIGN_TO (*up, pagesize); + char *p = *up ? realloc (*up, asize) : malloc (asize); + if (p) + { + *up = p; + return ALIGN_TO (p, pagesize); + } } return NULL; -#endif } /* Reset the buffer for a new file, returning zero if we should skip it. @@ -269,7 +275,9 @@ reset (fd, file, stats) char const *file; struct stats *stats; { - if (pagesize == 0) + if (pagesize) + bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize); + else { size_t ubufsalloc; pagesize = getpagesize (); @@ -281,141 +289,195 @@ reset (fd, file, stats) ubufsalloc = BUFSALLOC; #endif bufsalloc = ALIGN_TO (ubufsalloc, pagesize); - bufalloc = 5 * bufsalloc; + bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc; /* The 1 byte of overflow is a kludge for dfaexec(), which inserts a sentinel newline at the end of the buffer being searched. There's gotta be a better way... */ if (bufsalloc < ubufsalloc - || bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc + || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc + || bufalloc + 1 < bufalloc || ! (buffer = page_alloc (bufalloc + 1, &ubuffer))) fatal (_("memory exhausted"), 0); - bufbeg = buffer; - buflim = buffer; } + + buflim = buffer; bufdesc = fd; - if ( -#if defined(HAVE_MMAP) - 1 -#else - directories != READ_DIRECTORIES -#endif - ) - if (fstat (fd, &stats->stat) != 0) - { - error ("fstat", errno); - return 0; - } + if (fstat (fd, &stats->stat) != 0) + { + error ("fstat", errno); + return 0; + } if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) return 0; -#if defined(HAVE_MMAP) - if (!S_ISREG (stats->stat.st_mode)) - bufmapped = 0; - else + if (S_ISREG (stats->stat.st_mode)) { - bufmapped = 1; - bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1); + if (file) + bufoffset = 0; + else + { + bufoffset = lseek (fd, 0, SEEK_CUR); + if (bufoffset < 0) + { + error ("lseek", errno); + return 0; + } + } +#ifdef HAVE_MMAP + initial_bufoffset = bufoffset; + bufmapped = mmap_option && bufoffset % pagesize == 0; +#endif } + else + { +#ifdef HAVE_MMAP + bufmapped = 0; #endif + } return 1; } /* Read new stuff into the buffer, saving the specified amount of old stuff. When we're done, 'bufbeg' points to the beginning of the buffer contents, and 'buflim' - points just after the end. Return count of new stuff. */ + points just after the end. Return zero if there's an error. */ static int fillbuf (save, stats) size_t save; struct stats *stats; { - int cc; -#if defined(HAVE_MMAP) - caddr_t maddr; -#endif + size_t fillsize = 0; + int cc = 1; + size_t readsize; - if (save > bufsalloc) - { - char *nubuffer; - char *nbuffer; - - while (save > bufsalloc) - bufsalloc *= 2; - bufalloc = 5 * bufsalloc; - if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc - || ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer))) - fatal (_("memory exhausted"), 0); + /* Offset from start of unaligned buffer to start of old stuff + that we want to save. */ + size_t saved_offset = buflim - ubuffer - save; - bufbeg = nbuffer + bufsalloc - save; - memcpy (bufbeg, buflim - save, save); - free (ubuffer); - ubuffer = nubuffer; - buffer = nbuffer; - } - else + if (bufsalloc < save) { - bufbeg = buffer + bufsalloc - save; - memcpy (bufbeg, buflim - save, save); + size_t aligned_save = ALIGN_TO (save, pagesize); + size_t maxalloc = (size_t) -1; + size_t newalloc; + + if (S_ISREG (stats->stat.st_mode)) + { + /* Calculate an upper bound on how much memory we should allocate. + We can't use ALIGN_TO here, since off_t might be longer than + size_t. Watch out for arithmetic overflow. */ + off_t to_be_read = stats->stat.st_size - bufoffset; + size_t slop = to_be_read % pagesize; + off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0); + off_t maxalloc_off = aligned_save + aligned_to_be_read; + if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off) + maxalloc = maxalloc_off; + } + + /* Grow bufsalloc until it is at least as great as `save'; but + if there is an overflow, just grow it to the next page boundary. */ + while (bufsalloc < save) + if (bufsalloc < bufsalloc * 2) + bufsalloc *= 2; + else + { + bufsalloc = aligned_save; + break; + } + + /* Grow the buffer size to be PREFERRED_SAVE_FACTOR times + bufsalloc.... */ + newalloc = PREFERRED_SAVE_FACTOR * bufsalloc; + if (maxalloc < newalloc) + { + /* ... except don't grow it more than a pagesize past the + file size, as that might cause unnecessary memory + exhaustion if the file is large. */ + newalloc = maxalloc; + bufsalloc = aligned_save; + } + + /* Check that the above calculations made progress, which might + not occur if there is arithmetic overflow. If there's no + progress, or if the new buffer size is larger than the old + and buffer reallocation fails, report memory exhaustion. */ + if (bufsalloc < save || newalloc < save + || (newalloc == save && newalloc != maxalloc) + || (bufalloc < newalloc + && ! (buffer + = page_alloc ((bufalloc = newalloc) + 1, &ubuffer)))) + fatal (_("memory exhausted"), 0); } + bufbeg = buffer + bufsalloc - save; + memmove (bufbeg, ubuffer + saved_offset, save); + readsize = bufalloc - bufsalloc; + #if defined(HAVE_MMAP) - if (bufmapped && bufoffset % pagesize == 0 - && stats->stat.st_size - bufoffset >= bufalloc - bufsalloc) + if (bufmapped) { - maddr = buffer + bufsalloc; - maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset); - if (maddr == (caddr_t) -1) + size_t mmapsize = readsize; + + /* Don't mmap past the end of the file; some hosts don't allow this. + Use `read' on the last page. */ + if (stats->stat.st_size - bufoffset < mmapsize) { - /* This used to issue a warning, but on some hosts - (e.g. Solaris 2.5) mmap can fail merely because some - other process has an advisory read lock on the file. - There's no point alarming the user about this misfeature. */ -#if 0 - fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename, - strerror (errno)); -#endif - goto tryread; + mmapsize = stats->stat.st_size - bufoffset; + mmapsize -= mmapsize % pagesize; } -#if 0 - /* You might thing this (or MADV_WILLNEED) would help, - but it doesn't, at least not on a Sun running 4.1. - In fact, it actually slows us down about 30%! */ - madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL); -#endif - cc = bufalloc - bufsalloc; - bufoffset += cc; - } - else - { - tryread: - /* We come here when we're not going to use mmap() any more. - Note that we need to synchronize the file offset the - first time through. */ - if (bufmapped) + + if (mmapsize + && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, + bufdesc, bufoffset) + != (caddr_t) -1)) { + /* Do not bother to use madvise with MADV_SEQUENTIAL or + MADV_WILLNEED on the mmapped memory. One might think it + would help, but it slows us down about 30% on SunOS 4.1. */ + fillsize = mmapsize; + } + else + { + /* Stop using mmap on this file. Synchronize the file + offset. Do not warn about mmap failures. On some hosts + (e.g. Solaris 2.5) mmap can fail merely because some + other process has an advisory read lock on the file. + There's no point alarming the user about this misfeature. */ bufmapped = 0; - if (bufoffset != initial_bufoffset) - lseek (bufdesc, bufoffset, 0); + if (bufoffset != initial_bufoffset + && lseek (bufdesc, bufoffset, SEEK_SET) < 0) + { + error ("lseek", errno); + cc = 0; + } } - cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); } -#else - cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); #endif /*HAVE_MMAP*/ + + if (! fillsize) + { + ssize_t bytesread; + while ((bytesread = read (bufdesc, buffer + bufsalloc, readsize)) < 0 + && errno == EINTR) + continue; + if (bytesread < 0) + cc = 0; + else + fillsize = bytesread; + } + + bufoffset += fillsize; #if O_BINARY - if (cc > 0) - cc = undossify_input (buffer + bufsalloc, cc); + if (fillsize) + fillsize = undossify_input (buffer + bufsalloc, fillsize); #endif - if (cc > 0) - buflim = buffer + bufsalloc + cc; - else - buflim = buffer + bufsalloc; + buflim = buffer + bufsalloc + fillsize; return cc; } /* Flags controlling the style of output. */ static int always_text; /* Assume the input is always text. */ +static int filename_mask; /* If zero, output nulls after filenames. */ static int out_quiet; /* Suppress all normal output. */ static int out_invert; /* Print nonmatching stuff. */ static int out_file; /* Print filenames. */ @@ -447,11 +509,9 @@ nlscan (lim) char *lim; { char *beg; - - for (beg = lastnl; beg < lim; ++beg) - if (*beg == '\n') - ++totalnl; - lastnl = beg; + for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++) + totalnl++; + lastnl = lim; } static void @@ -480,7 +540,7 @@ prline (beg, lim, sep) int sep; { if (out_file) - printf ("%s%c", filename, sep); + printf ("%s%c", filename, sep & filename_mask); if (out_line) { nlscan (beg); @@ -513,7 +573,7 @@ prpending (lim) while (pending > 0 && lastout < lim) { --pending; - if ((nl = memchr (lastout, '\n', lim - lastout)) != 0) + if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0) ++nl; else nl = lim; @@ -531,6 +591,7 @@ prtext (beg, lim, nlinesp) { static int used; /* avoid printing "--" before any output */ char *bp, *p, *nl; + char eol = eolbyte; int i, n; if (!out_quiet && pending > 0) @@ -547,7 +608,7 @@ prtext (beg, lim, nlinesp) if (p > bp) do --p; - while (p > bp && p[-1] != '\n'); + while (p > bp && p[-1] != eol); /* We only print the "--" separator if our output is discontiguous from the last output in the file. */ @@ -556,7 +617,7 @@ prtext (beg, lim, nlinesp) while (p < beg) { - nl = memchr (p, '\n', beg - p); + nl = memchr (p, eol, beg - p); prline (p, nl + 1, '-'); p = nl + 1; } @@ -567,7 +628,7 @@ prtext (beg, lim, nlinesp) /* Caller wants a line count. */ for (n = 0; p < lim; ++n) { - if ((nl = memchr (p, '\n', lim - p)) != 0) + if ((nl = memchr (p, eol, lim - p)) != 0) ++nl; else nl = lim; @@ -581,7 +642,7 @@ prtext (beg, lim, nlinesp) if (!out_quiet) prline (beg, lim, ':'); - pending = out_after; + pending = out_quiet ? 0 : out_after; used = 1; } @@ -596,13 +657,14 @@ grepbuf (beg, lim) int nlines, n; register char *p, *b; char *endp; + char eol = eolbyte; nlines = 0; p = beg; while ((b = (*execute)(p, lim - p, &endp)) != 0) { /* Avoid matching the empty line at the end of the buffer. */ - if (b == lim && ((b > beg && b[-1] == '\n') || b == beg)) + if (b == lim && ((b > beg && b[-1] == eol) || b == beg)) break; if (!out_invert) { @@ -639,6 +701,7 @@ grep (fd, file, stats) int not_text; size_t residue, save; char *beg, *lim; + char eol = eolbyte; if (!reset (fd, file, stats)) return 0; @@ -662,7 +725,7 @@ grep (fd, file, stats) residue = 0; save = 0; - if (fillbuf (save, stats) < 0) + if (! fillbuf (save, stats)) { if (! (is_EISDIR (errno, file) && suppress_errors)) error (filename, errno); @@ -670,7 +733,7 @@ grep (fd, file, stats) } not_text = (! (always_text | out_quiet) - && memchr (bufbeg, '\0', buflim - bufbeg)); + && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg)); done_on_match += not_text; out_quiet += not_text; @@ -682,7 +745,7 @@ grep (fd, file, stats) if (buflim - bufbeg == save) break; beg = bufbeg + save - residue; - for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim) + for (lim = buflim; lim > beg && lim[-1] != eol; --lim) ; residue = buflim - lim; if (beg < lim) @@ -700,7 +763,7 @@ grep (fd, file, stats) ++i; do --beg; - while (beg > bufbeg && beg[-1] != '\n'); + while (beg > bufbeg && beg[-1] != eol); } if (beg != lastout) lastout = 0; @@ -708,7 +771,7 @@ grep (fd, file, stats) totalcc += buflim - bufbeg - save; if (out_line) nlscan (beg); - if (fillbuf (save, stats) < 0) + if (! fillbuf (save, stats)) { if (! (is_EISDIR (errno, file) && suppress_errors)) error (filename, errno); @@ -746,7 +809,8 @@ grepfile (file, stats) } else { - desc = open (file, O_RDONLY); + while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR) + continue; if (desc < 0) { @@ -805,25 +869,21 @@ grepfile (file, stats) if (count_matches) { if (out_file) - printf ("%s:", filename); + printf ("%s%c", filename, ':' & filename_mask); printf ("%d\n", count); } - if (count) - { - status = 0; - if (list_files == 1) - printf ("%s\n", filename); - } - else - { - status = 1; - if (list_files == -1) - printf ("%s\n", filename); - } + status = !count; + if (list_files == 1 - 2 * status) + printf ("%s%c", filename, '\n' & filename_mask); - if (file && close (desc) != 0) - error (file, errno); + if (file) + while (close (desc) != 0) + if (errno != EINTR) + { + error (file, errno); + break; + } } return status; @@ -839,8 +899,8 @@ grepdir (dir, stats) char *name_space; for (ancestor = stats; (ancestor = ancestor->parent) != 0; ) - if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino) - | (ancestor->stat.st_dev ^ stats->stat.st_dev))) + if (ancestor->stat.st_ino == stats->stat.st_ino + && ancestor->stat.st_dev == stats->stat.st_dev) { if (!suppress_errors) fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir, @@ -903,23 +963,28 @@ int status; printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog); printf (_("\ Search for PATTERN in each FILE or standard input.\n\ +Example: %s -i 'hello.*world' menu.h main.c\n\ \n\ -Regexp selection and interpretation:\n\ +Regexp selection and interpretation:\n"), prog); + printf (_("\ -E, --extended-regexp PATTERN is an extended regular expression\n\ - -F, --fixed-regexp PATTERN is a fixed string separated by newlines\n\ - -G, --basic-regexp PATTERN is a basic regular expression\n\ + -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ + -G, --basic-regexp PATTERN is a basic regular expression\n")); + printf (_("\ -e, --regexp=PATTERN use PATTERN as a regular expression\n\ -f, --file=FILE obtain PATTERN from FILE\n\ -i, --ignore-case ignore case distinctions\n\ -w, --word-regexp force PATTERN to match only whole words\n\ - -x, --line-regexp force PATTERN to match only whole lines\n")); + -x, --line-regexp force PATTERN to match only whole lines\n\ + -z, --null-data a data line ends in 0 byte, not newline\n")); printf (_("\ \n\ Miscellaneous:\n\ -s, --no-messages suppress error messages\n\ - -v, --revert-match select non-matching lines\n\ + -v, --invert-match select non-matching lines\n\ -V, --version print version information and exit\n\ - --help display this help and exit\n")); + --help display this help and exit\n\ + --mmap use memory-mapped input if possible\n")); printf (_("\ \n\ Output control:\n\ @@ -934,31 +999,42 @@ Output control:\n\ -r, --recursive equivalent to --directories=recurse.\n\ -L, --files-without-match only print FILE names containing no match\n\ -l, --files-with-matches only print FILE names containing matches\n\ - -c, --count only print a count of matching lines per FILE\n")); + -c, --count only print a count of matching lines per FILE\n\ + -Z, --null print 0 byte after FILE name\n")); printf (_("\ \n\ Context control:\n\ -B, --before-context=NUM print NUM lines of leading context\n\ -A, --after-context=NUM print NUM lines of trailing context\n\ -C, --context[=NUM] print NUM (default 2) lines of output context\n\ - unless overriden by -A or -B\n\ + unless overridden by -A or -B\n\ -NUM same as --context=NUM\n\ -U, --binary do not strip CR characters at EOL (MSDOS)\n\ -u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\ \n\ -If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\ -With no FILE, or when FILE is -, read standard input. If less than\n\ -two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\ -Exit with 2 if syntax errors or system errors.\n")); +`egrep' means `grep -E'. `fgrep' means `grep -F'.\n\ +With no FILE, or when FILE is -, read standard input. If less than\n\ +two FILEs given, assume -h. Exit status is 0 if match, 1 if no match,\n\ +and 2 if trouble.\n")); printf (_("\nReport bugs to .\n")); } exit (status); } +/* Set the matcher to M, reporting any conflicts. */ +static void +setmatcher (m) + char const *m; +{ + if (matcher && strcmp (matcher, m) != 0) + fatal (_("conflicting matchers specified"), 0); + matcher = m; +} + /* Go through the matchers vector and look for the specified matcher. If we find it, install it in compile and execute, and return 1. */ static int -setmatcher (name) +install_matcher (name) char const *name; { int i; @@ -1001,6 +1077,65 @@ setmatcher (name) return 0; } +/* Find the white-space-separated options specified by OPTIONS, and + using BUF to store copies of these options, set ARGV[0], ARGV[1], + etc. to the option copies. Return the number N of options found. + Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] + etc. Backslash can be used to escape whitespace (and backslashes). */ +static int +prepend_args (options, buf, argv) + char const *options; + char *buf; + char **argv; +{ + char const *o = options; + char *b = buf; + int n = 0; + + for (;;) + { + while (ISSPACE ((unsigned char) *o)) + o++; + if (!*o) + return n; + if (argv) + argv[n] = b; + n++; + + do + if ((*b++ = *o++) == '\\' && *o) + b[-1] = *o++; + while (*o && ! ISSPACE ((unsigned char) *o)); + + *b++ = '\0'; + } +} + +/* Prepend the whitespace-separated options in OPTIONS to the argument + vector of a main program with argument count *PARGC and argument + vector *PARGV. */ +static void +prepend_default_options (options, pargc, pargv) + char const *options; + int *pargc; + char ***pargv; +{ + if (options) + { + char *buf = xmalloc (strlen (options) + 1); + int prepended = prepend_args (options, buf, (char **) NULL); + int argc = *pargc; + char * const *argv = *pargv; + char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp); + *pargc = prepended + argc; + *pargv = pp; + *pp++ = *argv++; + pp += prepend_args (options, buf, pp); + while ((*pp++ = *argv++)) + continue; + } +} + int main (argc, argv) int argc; @@ -1048,7 +1183,8 @@ main (argc, argv) keys = NULL; keycc = 0; with_filenames = 0; - matcher = NULL; + eolbyte = '\n'; + filename_mask = ~0; /* The value -1 means to use DEFAULT_CONTEXT. */ out_after = out_before = -1; @@ -1067,13 +1203,10 @@ main (argc, argv) textdomain (PACKAGE); #endif - while ((opt = getopt_long (argc, argv, -#if O_BINARY - "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxyUu", -#else - "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxy", -#endif - long_options, NULL)) != EOF) + prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); + + while ((opt = getopt_long (argc, argv, short_options, long_options, NULL)) + != -1) switch (opt) { case '0': @@ -1115,38 +1248,32 @@ main (argc, argv) default_context = 2; break; case 'E': - if (matcher && strcmp (matcher, "posix-egrep") != 0) - fatal (_("you may specify only one of -E, -F, or -G"), 0); - matcher = "posix-egrep"; + setmatcher ("egrep"); break; case 'F': - if (matcher && strcmp(matcher, "fgrep") != 0) - fatal(_("you may specify only one of -E, -F, or -G"), 0);; - matcher = "fgrep"; + setmatcher ("fgrep"); break; case 'G': - if (matcher && strcmp (matcher, "grep") != 0) - fatal (_("you may specify only one of -E, -F, or -G"), 0); - matcher = "grep"; + setmatcher ("grep"); break; case 'H': with_filenames = 1; break; -#if O_BINARY case 'U': +#if O_BINARY dos_use_file_type = DOS_BINARY; +#endif break; case 'u': +#if O_BINARY dos_report_unix_offset = 1; - break; #endif + break; case 'V': show_version = 1; break; case 'X': - if (matcher) - fatal (_("matcher already specified"), 0); - matcher = optarg; + setmatcher (optarg); break; case 'a': always_text = 1; @@ -1237,6 +1364,12 @@ main (argc, argv) case 'x': match_lines = 1; break; + case 'Z': + filename_mask = 0; + break; + case 'z': + eolbyte = '\0'; + break; case 0: /* long options */ break; @@ -1250,9 +1383,12 @@ main (argc, argv) if (out_before < 0) out_before = default_context; + if (! matcher) + matcher = "grep"; + if (show_version) { - printf (_("grep (GNU grep) %s\n"), VERSION); + printf (_("%s (GNU grep) %s\n"), matcher, VERSION); printf ("\n"); printf (_("\ Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n")); @@ -1284,10 +1420,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")) else usage (2); - if (! matcher) - matcher = default_matcher; - - if (!setmatcher (matcher) && !setmatcher ("default")) + if (!install_matcher (matcher) && !install_matcher ("default")) abort (); (*compile)(keys, keycc); diff --git a/gnu/usr.bin/grep/grep.h b/gnu/usr.bin/grep/grep.h index ebd0bbc..13f55a2 100644 --- a/gnu/usr.bin/grep/grep.h +++ b/gnu/usr.bin/grep/grep.h @@ -35,14 +35,12 @@ extern struct matcher char *(*execute) PARAMS ((char *, size_t, char **)); } matchers[]; -/* Exported from grep.c. */ -extern char const *matcher; - /* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */ -extern char const default_matcher[]; +extern char const *matcher; /* The following flags are exported from grep for the matchers to look at. */ extern int match_icase; /* -i */ extern int match_words; /* -w */ extern int match_lines; /* -x */ +extern unsigned char eolbyte; /* -z */ diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c index cf51e3b..8fb3af0 100644 --- a/gnu/usr.bin/grep/search.c +++ b/gnu/usr.bin/grep/search.c @@ -42,7 +42,6 @@ struct matcher matchers[] = { { "default", Gcompile, EGexecute }, { "grep", Gcompile, EGexecute }, { "egrep", Ecompile, EGexecute }, - { "posix-egrep", Ecompile, EGexecute }, { "awk", Ecompile, EGexecute }, { "fgrep", Fcompile, Fexecute }, { 0, 0, 0 }, @@ -55,7 +54,7 @@ struct matcher matchers[] = { static struct dfa dfa; /* Regex compiled regexp. */ -static struct re_pattern_buffer regex; +static struct re_pattern_buffer regexbuf; /* KWset compiled pattern. For Ecompile and Gcompile, we compile a list of strings, at least one of which is known to occur in @@ -134,9 +133,9 @@ Gcompile(pattern, size) const char *err; re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); - dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase); + dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); - if ((err = re_compile_pattern(pattern, size, ®ex)) != 0) + if ((err = re_compile_pattern(pattern, size, ®exbuf)) != 0) fatal(err, 0); /* In the match_words and match_lines cases, we use a different pattern @@ -149,7 +148,8 @@ Gcompile(pattern, size) (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$). In the whole-line case, we use the pattern: ^(userpattern)$. - BUG: Using [A-Za-z_] is locale-dependent! */ + BUG: Using [A-Za-z_] is locale-dependent! + So will use [:alnum:] */ char *n = malloc(size + 50); int i = 0; @@ -159,14 +159,14 @@ Gcompile(pattern, size) if (match_lines) strcpy(n, "^\\("); if (match_words) - strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\("); + strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\("); i = strlen(n); memcpy(n + i, pattern, size); i += size; if (match_words) - strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)"); + strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)"); if (match_lines) strcpy(n + i, "\\)$"); @@ -186,23 +186,18 @@ Ecompile(pattern, size) { const char *err; - if (strcmp(matcher, "posix-egrep") == 0) - { - re_set_syntax(RE_SYNTAX_POSIX_EGREP); - dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase); - } - else if (strcmp(matcher, "awk") == 0) + if (strcmp(matcher, "awk") == 0) { re_set_syntax(RE_SYNTAX_AWK); - dfasyntax(RE_SYNTAX_AWK, match_icase); + dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte); } else { - re_set_syntax(RE_SYNTAX_EGREP); - dfasyntax(RE_SYNTAX_EGREP, match_icase); + re_set_syntax (RE_SYNTAX_POSIX_EGREP); + dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); } - if ((err = re_compile_pattern(pattern, size, ®ex)) != 0) + if ((err = re_compile_pattern(pattern, size, ®exbuf)) != 0) fatal(err, 0); /* In the match_words and match_lines cases, we use a different pattern @@ -215,7 +210,8 @@ Ecompile(pattern, size) (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$). In the whole-line case, we use the pattern: ^(userpattern)$. - BUG: Using [A-Za-z_] is locale-dependent! */ + BUG: Using [A-Za-z_] is locale-dependent! + so will use the char class */ char *n = malloc(size + 50); int i = 0; @@ -225,14 +221,14 @@ Ecompile(pattern, size) if (match_lines) strcpy(n, "^("); if (match_words) - strcpy(n, "(^|[^0-9A-Za-z_])("); + strcpy(n, "(^|[^[:alnum:]_])("); i = strlen(n); memcpy(n + i, pattern, size); i += size; if (match_words) - strcpy(n + i, ")([^0-9A-Za-z_]|$)"); + strcpy(n + i, ")([^[:alnum:]_]|$)"); if (match_lines) strcpy(n + i, ")$"); @@ -252,6 +248,7 @@ EGexecute(buf, size, endp) char **endp; { register char *buflim, *beg, *end, save; + char eol = eolbyte; int backref, start, len; struct kwsmatch kwsm; static struct re_registers regs; /* This is static on account of a BRAIN-DEAD @@ -269,10 +266,10 @@ EGexecute(buf, size, endp) goto failure; /* Narrow down to the line containing the candidate, and run it through DFA. */ - end = memchr(beg, '\n', buflim - beg); + end = memchr(beg, eol, buflim - beg); if (!end) end = buflim; - while (beg > buf && beg[-1] != '\n') + while (beg > buf && beg[-1] != eol) --beg; save = *end; if (kwsm.index < lastexact) @@ -296,10 +293,10 @@ EGexecute(buf, size, endp) if (!beg) goto failure; /* Narrow down to the line we've found. */ - end = memchr(beg, '\n', buflim - beg); + end = memchr(beg, eol, buflim - beg); if (!end) end = buflim; - while (beg > buf && beg[-1] != '\n') + while (beg > buf && beg[-1] != eol) --beg; /* Successful, no backreferences encountered! */ if (!backref) @@ -307,8 +304,8 @@ EGexecute(buf, size, endp) } /* If we've made it to this point, this means DFA has seen a probable match, and we need to run it through Regex. */ - regex.not_eol = 0; - if ((start = re_search(®ex, beg, end - beg, 0, end - beg, ®s)) >= 0) + regexbuf.not_eol = 0; + if ((start = re_search(®exbuf, beg, end - beg, 0, end - beg, ®s)) >= 0) { len = regs.end[0] - start; if ((!match_lines && !match_words) @@ -331,8 +328,8 @@ EGexecute(buf, size, endp) { /* Try a shorter length anchored at the same place. */ --len; - regex.not_eol = 1; - len = re_match(®ex, beg, start + len, start, ®s); + regexbuf.not_eol = 1; + len = re_match(®exbuf, beg, start + len, start, ®s); } if (len <= 0) { @@ -340,8 +337,8 @@ EGexecute(buf, size, endp) if (start == end - beg) break; ++start; - regex.not_eol = 0; - start = re_search(®ex, beg, end - beg, + regexbuf.not_eol = 0; + start = re_search(®exbuf, beg, end - beg, start, end - beg - start, ®s); len = regs.end[0] - start; } @@ -390,6 +387,7 @@ Fexecute(buf, size, endp) { register char *beg, *try, *end; register size_t len; + char eol = eolbyte; struct kwsmatch kwsmatch; for (beg = buf; beg <= buf + size; ++beg) @@ -399,9 +397,9 @@ Fexecute(buf, size, endp) len = kwsmatch.size[0]; if (match_lines) { - if (beg > buf && beg[-1] != '\n') + if (beg > buf && beg[-1] != eol) continue; - if (beg + len < buf + size && beg[len] != '\n') + if (beg + len < buf + size && beg[len] != eol) continue; goto success; } @@ -425,7 +423,7 @@ Fexecute(buf, size, endp) return 0; success: - if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0) + if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0) ++end; else end = buf + size; diff --git a/gnu/usr.bin/grep/system.h b/gnu/usr.bin/grep/system.h index be01791..a6966e4 100644 --- a/gnu/usr.bin/grep/system.h +++ b/gnu/usr.bin/grep/system.h @@ -1,5 +1,5 @@ /* Portability cruft. Include after config.h and sys/types.h. - Copyright (C) 1996, 1998 Free Software Foundation, Inc. + Copyright (C) 1996, 1998, 1999 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +36,8 @@ # include #else # define O_RDONLY 0 +# define SEEK_SET 0 +# define SEEK_CUR 1 int open(), read(), close(); #endif @@ -99,14 +101,19 @@ extern char *sys_errlist[]; #if STAT_MACROS_BROKEN # undef S_ISDIR +# undef S_ISREG #endif #if !defined(S_ISDIR) && defined(S_IFDIR) # define S_ISDIR(Mode) (((Mode) & S_IFMT) == S_IFDIR) #endif +#if !defined(S_ISREG) && defined(S_IFREG) +# define S_ISREG(Mode) (((Mode) & S_IFMT) == S_IFREG) +#endif #ifdef STDC_HEADERS # include #else +char *getenv (); ptr_t malloc(), realloc(), calloc(); void free(); #endif @@ -139,11 +146,14 @@ void free(); # undef strrchr # define strrchr rindex # undef memcpy -# define memcpy(d, s, n) bcopy((s), (d), (n)) +# define memcpy(d, s, n) bcopy (s, d, n) #endif #ifndef HAVE_MEMCHR ptr_t memchr(); #endif +#if ! defined HAVE_MEMMOVE && ! defined memmove +# define memmove(d, s, n) bcopy (s, d, n) +#endif #include diff --git a/gnu/usr.bin/grep/tests/bre.awk b/gnu/usr.bin/grep/tests/bre.awk index 3973071..9c9fef8 100644 --- a/gnu/usr.bin/grep/tests/bre.awk +++ b/gnu/usr.bin/grep/tests/bre.awk @@ -8,8 +8,8 @@ BEGIN { $0 ~ /^#/ { next; } NF == 3 { - printf ("echo '%s' | ${GREP} -e '%s' > /dev/null 2>&1\n",$3, $2); - printf ("if test $? -ne %s ; then\n", $1); + printf ("status=`echo '%s' | { ${GREP} -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2); + printf ("if test $status -ne %s ; then\n", $1); printf ("\techo Spencer bre test \\#%d failed\n", ++n); printf ("\tfailures=1\n"); printf ("fi\n"); diff --git a/gnu/usr.bin/grep/tests/bre.tests b/gnu/usr.bin/grep/tests/bre.tests index a896377..1ed159d 100644 --- a/gnu/usr.bin/grep/tests/bre.tests +++ b/gnu/usr.bin/grep/tests/bre.tests @@ -17,7 +17,7 @@ 2@\(\{1\}a\)@BADRPT@TO CORRECT 0@^*@* 2@^\{1\}@BADRPT@TO CORRECT -2@\{@BADRPT +0@\{@{ 1@a\(b*\)c\1d@abbcbd 1@a\(b*\)c\1d@abbcbbbd 1@^\(.\)\1@abc @@ -46,7 +46,7 @@ 2@a\{,\}@BADBR 2@a\{1,x\}@BADBR 2@a\{1,x@EBRACE -2@a\{300\}@BADBR@TO CORRECT +2@a\{32768\}@BADBR 2@a\{1,0\}@BADBR 0@ab\{0,0\}c@abcac 0@ab\{0,1\}c@abcac diff --git a/gnu/usr.bin/grep/tests/ere.awk b/gnu/usr.bin/grep/tests/ere.awk index c014fe9..8f6a5b5 100644 --- a/gnu/usr.bin/grep/tests/ere.awk +++ b/gnu/usr.bin/grep/tests/ere.awk @@ -8,8 +8,8 @@ BEGIN { $0 ~ /^#/ { next; } NF == 3 { - printf ("echo '%s' | ${GREP} -E -e '%s' > /dev/null 2>&1\n",$3, $2); - printf ("if test $? -ne %s ; then\n", $1); + printf ("status=`echo '%s' | { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2); + printf ("if test $status -ne %s ; then\n", $1); printf ("\techo Spencer ere test \\#%d failed\n", ++n); printf ("\tfailures=1\n"); printf ("fi\n"); diff --git a/gnu/usr.bin/grep/tests/ere.tests b/gnu/usr.bin/grep/tests/ere.tests index 7d37c4e..20ef2b1 100644 --- a/gnu/usr.bin/grep/tests/ere.tests +++ b/gnu/usr.bin/grep/tests/ere.tests @@ -46,37 +46,37 @@ 0@a[bc]d@abd 0@a\*c@a*c 0@a\\b@a\b@TO CORRECT -0@a\\\*b@a\*b +0@a\\\*b@a\*b@SHELL TROUBLE 0@a\bc@abc@TO CORRECT -2@a\@EESCAPE +2@a\@EESCAPE@SHELL TROUBLE 0@a\\bc@a\bc@TO CORRECT 0@a\[b@a[b 2@a[b@EBRACK 0@a$@a 1@a$@a$ -1@a\$@a -0@a\$@a$ +1@a\$@a@SHELL TROUBLE +0@a\$@a$@SHELL TROUBLE 1@a\\$@a -1@a\\$@a$ -1@a\\$@a\$ -0@a\\$@a\ +1@a\\$@a$@SHELL TROUBLE +1@a\\$@a\$@SHELL TROUBLE +0@a\\$@a\@SHEL TROUBLE 0@ab*c@abc 0@ab+c@abc 0@ab?c@abc 0@{@{@TO CORRECT 0@{abc@{abc@TO CORRECT -2@{1@BADRPT +0@{1@{1 2@{1}@BADRPT@TO CORRECT 0@a{b@a{b@TO CORRECT 0@a{1}b@ab 0@a{1,}b@ab 0@a{1,2}b@aab -2@a{1@EBRACE -2@a{1a@EBRACE -2@a{1a}@BADBR +0@a{1@a{1 +1@a{1a@aa +0@a{1a}@a{1a} 0@a{,2}@a{,2} 0@a{,}@a{,} -2@a{1,x}@BADBR +0@a{1,*}@a{1,,,} 2@a{1,x@EBRACE@TO CORRECT 2@a{300}@BADBR@TO CORRECT 2@a{1,0}@BADBR@TO CORRECT diff --git a/gnu/usr.bin/grep/tests/spencer1.awk b/gnu/usr.bin/grep/tests/spencer1.awk index b66b8f5..70c6118 100644 --- a/gnu/usr.bin/grep/tests/spencer1.awk +++ b/gnu/usr.bin/grep/tests/spencer1.awk @@ -4,8 +4,8 @@ BEGIN { } $0 !~ /^#/ && NF = 3 { - printf ("echo '%s'|${GREP} -E -e '%s' > /dev/null 2>&1\n",$3, $2); - printf ("if test $? -ne %s ; then\n", $1); + printf ("status=`echo '%s'| { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2); + printf ("if test $status -ne %s ; then\n", $1); printf ("\techo Spencer test \\#%d failed\n", ++n); printf ("\tfailures=1\n"); printf ("fi\n"); -- cgit v1.1