diff options
author | wosch <wosch@FreeBSD.org> | 1997-12-20 18:46:09 +0000 |
---|---|---|
committer | wosch <wosch@FreeBSD.org> | 1997-12-20 18:46:09 +0000 |
commit | 19c4e668e763e73e6f3fa3bed4daf100dde804a9 (patch) | |
tree | 7cdbd9828eeb2af5d302f7a50d3c9d1363d3e9f7 /gnu | |
parent | 297bd474725f30e83c41ded39482a77522a72aae (diff) | |
download | FreeBSD-src-19c4e668e763e73e6f3fa3bed4daf100dde804a9.zip FreeBSD-src-19c4e668e763e73e6f3fa3bed4daf100dde804a9.tar.gz |
Added builtin decompression using zlib library, option -Z.
Enabled this new feature with the makefile variable GREP_LIBZ. If
you don't like it, compile with `make GREP_LIBZ='.
grep + zlib has several advantages:
- the shell script zgrep(1) will be basically a one line
exec grep -Z "$@"
- no shell script, no bugs. The current zgrep implementations
have many bugs and some grep options are no supported.
- no shell script, no security risks.
- it is a magnitude faster than a shell script
Also fixed:
0 -> STDIN_FILENO
Close a file descriptor only if the open call was successfully. It does
not hurt for the open(2) function, but the gzclose(3) function
died in free() to free up (not) allocated memory.
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/usr.bin/grep/Makefile | 16 | ||||
-rw-r--r-- | gnu/usr.bin/grep/grep.1 | 16 | ||||
-rw-r--r-- | gnu/usr.bin/grep/grep.c | 166 |
3 files changed, 131 insertions, 67 deletions
diff --git a/gnu/usr.bin/grep/Makefile b/gnu/usr.bin/grep/Makefile index 0cfcf6b..40aafc0 100644 --- a/gnu/usr.bin/grep/Makefile +++ b/gnu/usr.bin/grep/Makefile @@ -1,8 +1,13 @@ +# $Id$ + +GREP_LIBZ= YES +GREP_FTS= YES + PROG= grep SRCS= dfa.c grep.c getopt.c kwset.c obstack.c search.c CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \ -DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \ - -DHAVE_VALLOC=1 -DHAVE_MMAP=1 -DHAVE_FTS=1 + -DHAVE_VALLOC=1 LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \ ${BINDIR}/grep ${BINDIR}/fgrep @@ -11,6 +16,15 @@ MLINKS= grep.1 egrep.1 grep.1 fgrep.1 DPADD+= ${LIBGNUREGEX} LDADD+= -lgnuregex +.if defined(GREP_LIBZ) && !empty(GREP_LIBZ) +LDADD+= -lz +DPADD+= ${LIBZ} +CFLAGS+= -DHAVE_LIBZ=1 +.endif +.if defined(GREP_FTS) && !empty(GREP_FTS) +CFLAGS+= -DHAVE_FTS=1 +.endif + check: all sh ${.CURDIR}/tests/check.sh ${.CURDIR}/tests diff --git a/gnu/usr.bin/grep/grep.1 b/gnu/usr.bin/grep/grep.1 index 1a7d621..75a15a7 100644 --- a/gnu/usr.bin/grep/grep.1 +++ b/gnu/usr.bin/grep/grep.1 @@ -161,7 +161,7 @@ characters are letters, digits, and the underscore. Select only those matches that exactly match the whole line. .PP -Following options only available if compiled with FTS library: +Following options are only available if compiled with FTS library: .PD 0 .TP .BI \-H @@ -171,24 +171,30 @@ option is specified, symbolic links on the command line are followed. (Symbolic links encountered in the tree traversal are not followed.) .TP - .BI \-L If the .I \-R option is specified, all symbolic links are followed. .TP - .BI \-P If the .I \-R option is specified, no symbolic links are followed. .TP - .BI \-R Search in the file hierarchies rooted in the files instead of just the files themselves. -.TP +.LP +Following option is only available if compiled with zlib library: +.PD 0 +.TP +.BI \-Z +If the +.I \-Z +option is specified, the input data will be +decompressed before searching. +.TP .PD .SH "REGULAR EXPRESSIONS" .PP diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c index 8e93267..82a8403 100644 --- a/gnu/usr.bin/grep/grep.c +++ b/gnu/usr.bin/grep/grep.c @@ -15,7 +15,10 @@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - Written July 1992 by Mike Haertel. */ + Written July 1992 by Mike Haertel. + + Recursive searching and builtin decompression (libz) + 1996/1997 by Wolfram Schneider <wosch@FreeBSD.org>. */ #include <errno.h> #include <stdio.h> @@ -96,7 +99,7 @@ memchr(vp, c, n) #endif /* traverse a file hierarchy library */ -#ifdef HAVE_FTS +#if HAVE_FTS > 0 #include <sys/types.h> #include <sys/stat.h> #include <fts.h> @@ -211,6 +214,12 @@ static struct stat bufstat; /* From fstat(). */ static off_t bufoffset; /* What read() normally remembers. */ #endif +#if HAVE_LIBZ > 0 +#include <zlib.h> +static gzFile gzbufdesc; /* zlib file descriptor. */ +static int Zflag; /* uncompress before searching */ +#endif + /* Reset the buffer for a new file. Initialize on the first time through. */ void @@ -237,9 +246,20 @@ reset(fd) bufbeg = buffer; buflim = buffer; } - bufdesc = fd; +#if HAVE_LIBZ > 0 + if (Zflag) { + gzbufdesc = gzdopen(fd, "r"); + if (gzbufdesc == NULL) + fatal("memory exhausted", 0); + } +#endif + bufdesc = fd; #if defined(HAVE_WORKING_MMAP) - if (fstat(fd, &bufstat) < 0 || !S_ISREG(bufstat.st_mode)) + if ( +#if HAVE_LIBZ > 0 + Zflag || +#endif + fstat(fd, &bufstat) < 0 || !S_ISREG(bufstat.st_mode)) bufmapped = 0; else { @@ -323,10 +343,20 @@ fillbuf(save) bufmapped = 0; lseek(bufdesc, bufoffset, 0); } - cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); +#if HAVE_LIBZ > 0 + if (Zflag) + cc = gzread(gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc); + else +#endif + cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); } #else - cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); +#if HAVE_LIBZ > 0 + if (Zflag) + cc = gzread(gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc); + else +#endif + cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); #endif if (cc > 0) buflim = buffer + bufsalloc + cc; @@ -513,32 +543,22 @@ grepbuf(beg, lim) /* - * try to guess if fd belong to a binary file + * try to guess if buf belong to a binary file */ -int isBinaryFile(fd) - int fd; +int isBinaryFile(buf, len) + char *buf; + int len; { #define BINARY_BUF_LEN 32 - static unsigned char buf[BINARY_BUF_LEN]; - int i, n; - - /* pipe, socket, fifo */ - if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) - return(0); + int i; - if ((n =(int) read(fd, buf, (size_t)BINARY_BUF_LEN)) == -1) - return(0); + len = (len < BINARY_BUF_LEN ? len : BINARY_BUF_LEN); /* look for non-printable chars */ - for(i = 0; i < n; i++) - if (!isprint((unsigned char)buf[i]) && !isspace((unsigned char)buf[i])) + for(i = 0; i < len; i++, buf++) + if (!isprint(*buf) && !isspace(*buf)) return(1); - - /* reset fd to begin of file */ - if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) - return(0); - return(0); } @@ -553,10 +573,7 @@ grep(fd) int nlines, i; size_t residue, save; char *beg, *lim; - - /* skip binary files */ - if (aflag && isBinaryFile(fd)) - return(0); + int first, cc; reset(fd); @@ -568,14 +585,22 @@ grep(fd) nlines = 0; residue = 0; save = 0; + first = 0; + cc = 0; for (;;) { - if (fillbuf(save) < 0) + if ((cc = fillbuf(save)) < 0) { error(filename, errno); return nlines; } + + /* skip binary files */ + if (!first && aflag && isBinaryFile(bufbeg, cc)) + return(0); + first++; + lastnl = bufbeg; if (lastout) lastout = bufbeg; @@ -620,21 +645,25 @@ grep(fd) } static char version[] = "GNU grep version 2.0"; - -#ifdef HAVE_FTS -#define USAGE \ -"usage: %s [-[AB] <num>] [-HRPS] [-CEFGLVabchilnqsvwx]\n\ - [-e <expr>] [-f file] [files ...]\n" + +#define GETOPT_STD "0123456789A:B:CEFGLVX:abce:f:hilnqsvwxy" +#if HAVE_FTS > 0 +#define GETOPT_FTS "HPRS" +#else +#define GETOPT_FTS "" +#endif +#if HAVE_LIBZ > 0 +#define GETOPT_Z "Z" #else -#define USAGE \ -"usage: %s [-[AB] <num>] [-CEFGLVabchilnqsvwx]\n\ - [-e <expr>] [-f file] [files ...]\n" +#define GETOPT_Z "" #endif static void usage() { - fprintf(stderr, USAGE, prog); + fprintf(stderr, "usage: %s [-[AB] <num>] [-CEFGLVX%s%s%s", + prog, GETOPT_FTS, GETOPT_Z, + "abchilnqsvwxy]\n [-e <expr>] [-f file] [files ...]\n"); exit(2); } @@ -656,6 +685,8 @@ setmatcher(name) return 0; } + + int main(argc, argv) int argc; @@ -668,7 +699,7 @@ main(argc, argv) FILE *fp; extern char *optarg; extern int optind; -#ifdef HAVE_FTS +#if HAVE_FTS > 0 int Rflag, Hflag, Pflag, Lflag; FTS *ftsp; FTSENT *ftsent; @@ -691,19 +722,12 @@ main(argc, argv) suppress_errors = 0; matcher = NULL; aflag = 0; -#ifdef HAVE_FTS +#if HAVE_FTS > 0 Rflag = Hflag = Pflag = Lflag = 0; #endif while ((opt = getopt(argc, argv, - -#ifndef HAVE_FTS -"0123456789A:B:CEFGVX:abce:f:hiLlnqsvwxy" -#else -"0123456789A:B:CEFGHLPRSVX:abce:f:hiLlnqsvwxy?" -#endif - -)) != EOF) + GETOPT_STD/**/GETOPT_FTS/**/GETOPT_Z)) != -1) switch (opt) { case '0': @@ -755,8 +779,12 @@ main(argc, argv) fatal("matcher already specified", 0); matcher = optarg; break; - -#ifdef HAVE_FTS +#if HAVE_LIBZ > 0 + case 'Z': + Zflag = 1; + break; +#endif +#if HAVE_FTS > 0 /* symbolic links on the command line are followed */ case 'H': Hflag = 1; @@ -880,16 +908,16 @@ main(argc, argv) (*compile)(keys, keycc); -#ifndef HAVE_FTS - if (argc - optind > 1 && !no_filenames) -#else +#if HAVE_FTS > 0 if ((argc - optind > 1 || Rflag) && !no_filenames) +#else + if (argc - optind > 1 && !no_filenames) #endif out_file = 1; status = 1; -#if HAVE_FTS +#if HAVE_FTS > 0 if (Rflag) { fts_options = FTS_PHYSICAL | FTS_NOCHDIR; @@ -984,8 +1012,14 @@ main(argc, argv) else if (list_files == -1) printf("%s\n", filename); - if (desc != STDIN_FILENO) + if (desc != STDIN_FILENO) { +#if HAVE_LIBZ > 0 + if (Zflag) + gzclose(gzbufdesc); + else +#endif close(desc); + } } if (fts_close(ftsp) == -1) @@ -1002,7 +1036,8 @@ main(argc, argv) while (optind < argc) { - desc = strcmp(argv[optind], "-") ? open(argv[optind], O_RDONLY) : 0; + desc = strcmp(argv[optind], "-") ? + open(argv[optind], O_RDONLY) : STDIN_FILENO; if (desc < 0) { if (!suppress_errors) @@ -1010,7 +1045,8 @@ main(argc, argv) } else { - filename = desc == 0 ? "(standard input)" : argv[optind]; + filename = desc == STDIN_FILENO ? + "(standard input)" : argv[optind]; count = grep(desc); if (count_matches) { @@ -1026,9 +1062,17 @@ main(argc, argv) } else if (list_files == -1) printf("%s\n", filename); - } - if (desc != 0) - close(desc); + + if (desc != STDIN_FILENO) { +#if HAVE_LIBZ > 0 + if (Zflag) + gzclose(gzbufdesc); + else +#endif + close(desc); + + } + } ++optind; } @@ -1036,7 +1080,7 @@ main(argc, argv) else { filename = "(standard input)"; - count = grep(0); + count = grep(STDIN_FILENO); if (count_matches) printf("%d\n", count); if (count) |