summaryrefslogtreecommitdiffstats
path: root/usr.bin/grep/file.c
diff options
context:
space:
mode:
authorgabor <gabor@FreeBSD.org>2011-10-05 09:56:43 +0000
committergabor <gabor@FreeBSD.org>2011-10-05 09:56:43 +0000
commit1cb16d98872b7dcec16ad22150c6a1c95e4a0796 (patch)
tree23f8a074a9eca70aa3ae8f97192648beb5aa6572 /usr.bin/grep/file.c
parent58a76bb04f6febb9b77e88729b156b605c0be242 (diff)
downloadFreeBSD-src-1cb16d98872b7dcec16ad22150c6a1c95e4a0796.zip
FreeBSD-src-1cb16d98872b7dcec16ad22150c6a1c95e4a0796.tar.gz
Update BSD grep to the latest development version. It has some code
backported that was written for the TRE integration project in Google Summer of Code 2011. This is a temporary solution until the whole regex library is not replaced so that BSD grep development can continue and the backported code gets some review and testing. This change only improves scalability slightly, there is no big performance boost yet but several minor bugs have been found and fixed. Approved by: delphij (mentor) Sposored by: Google Summer of Code 2011 MFC after: 1 week
Diffstat (limited to 'usr.bin/grep/file.c')
-rw-r--r--usr.bin/grep/file.c125
1 files changed, 93 insertions, 32 deletions
diff --git a/usr.bin/grep/file.c b/usr.bin/grep/file.c
index a6216e5..644c788 100644
--- a/usr.bin/grep/file.c
+++ b/usr.bin/grep/file.c
@@ -34,13 +34,15 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/types.h>
+#include <sys/mman.h>
#include <sys/stat.h>
+#include <sys/types.h>
#include <bzlib.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
+#include <lzma.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
@@ -56,10 +58,12 @@ __FBSDID("$FreeBSD$");
static gzFile gzbufdesc;
static BZFILE* bzbufdesc;
+static lzma_stream lstrm = LZMA_STREAM_INIT;
-static unsigned char buffer[MAXBUFSIZ];
+static unsigned char *buffer;
static unsigned char *bufpos;
static size_t bufrem;
+static size_t fsiz;
static unsigned char *lnbuf;
static size_t lnbuflen;
@@ -70,6 +74,9 @@ grep_refill(struct file *f)
ssize_t nr;
int bzerr;
+ if (filebehave == FILE_MMAP)
+ return (0);
+
bufpos = buffer;
bufrem = 0;
@@ -101,6 +108,36 @@ grep_refill(struct file *f)
/* Make sure we exit with an error */
nr = -1;
}
+ } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) {
+ lzma_action action = LZMA_RUN;
+ uint8_t in_buf[MAXBUFSIZ];
+ lzma_ret ret;
+
+ ret = (filebehave == FILE_XZ) ?
+ lzma_stream_decoder(&lstrm, UINT64_MAX,
+ LZMA_CONCATENATED) :
+ lzma_alone_decoder(&lstrm, UINT64_MAX);
+
+ if (ret != LZMA_OK)
+ return (-1);
+
+ lstrm.next_out = buffer;
+ lstrm.avail_out = MAXBUFSIZ;
+ lstrm.next_in = in_buf;
+ nr = read(f->fd, in_buf, MAXBUFSIZ);
+
+ if (nr < 0)
+ return (-1);
+ else if (nr == 0)
+ action = LZMA_FINISH;
+
+ lstrm.avail_in = nr;
+ ret = lzma_code(&lstrm, action);
+
+ if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+ return (-1);
+ bufrem = MAXBUFSIZ - lstrm.avail_out;
+ return (0);
} else
nr = read(f->fd, buffer, MAXBUFSIZ);
@@ -186,56 +223,76 @@ error:
return (NULL);
}
-static inline struct file *
-grep_file_init(struct file *f)
+/*
+ * Opens a file for processing.
+ */
+struct file *
+grep_open(const char *path)
{
+ struct file *f;
+
+ f = grep_malloc(sizeof *f);
+ memset(f, 0, sizeof *f);
+ if (path == NULL) {
+ /* Processing stdin implies --line-buffered. */
+ lbflag = true;
+ f->fd = STDIN_FILENO;
+ } else if ((f->fd = open(path, O_RDONLY)) == -1)
+ goto error1;
+
+ if (filebehave == FILE_MMAP) {
+ struct stat st;
+
+ if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) ||
+ (!S_ISREG(st.st_mode)))
+ filebehave = FILE_STDIO;
+ else {
+ int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
+#ifdef MAP_PREFAULT_READ
+ flags |= MAP_PREFAULT_READ;
+#endif
+ fsiz = st.st_size;
+ buffer = mmap(NULL, fsiz, PROT_READ, flags,
+ f->fd, (off_t)0);
+ if (buffer == MAP_FAILED)
+ filebehave = FILE_STDIO;
+ else {
+ bufrem = st.st_size;
+ bufpos = buffer;
+ madvise(buffer, st.st_size, MADV_SEQUENTIAL);
+ }
+ }
+ }
+
+ if ((buffer == NULL) || (buffer == MAP_FAILED))
+ buffer = grep_malloc(MAXBUFSIZ);
if (filebehave == FILE_GZIP &&
(gzbufdesc = gzdopen(f->fd, "r")) == NULL)
- goto error;
+ goto error2;
if (filebehave == FILE_BZIP &&
(bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
- goto error;
+ goto error2;
/* Fill read buffer, also catches errors early */
- if (grep_refill(f) != 0)
- goto error;
+ if (bufrem == 0 && grep_refill(f) != 0)
+ goto error2;
/* Check for binary stuff, if necessary */
if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
- f->binary = true;
+ f->binary = true;
return (f);
-error:
+
+error2:
close(f->fd);
+error1:
free(f);
return (NULL);
}
/*
- * Opens a file for processing.
- */
-struct file *
-grep_open(const char *path)
-{
- struct file *f;
-
- f = grep_malloc(sizeof *f);
- memset(f, 0, sizeof *f);
- if (path == NULL) {
- /* Processing stdin implies --line-buffered. */
- lbflag = true;
- f->fd = STDIN_FILENO;
- } else if ((f->fd = open(path, O_RDONLY)) == -1) {
- free(f);
- return (NULL);
- }
-
- return (grep_file_init(f));
-}
-
-/*
* Closes a file.
*/
void
@@ -245,6 +302,10 @@ grep_close(struct file *f)
close(f->fd);
/* Reset read buffer and line buffer */
+ if (filebehave == FILE_MMAP) {
+ munmap(buffer, fsiz);
+ buffer = NULL;
+ }
bufpos = buffer;
bufrem = 0;
OpenPOWER on IntegriCloud