summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorkientzle <kientzle@FreeBSD.org>2006-07-30 00:29:01 +0000
committerkientzle <kientzle@FreeBSD.org>2006-07-30 00:29:01 +0000
commitf501dbec5fe3a8ec80a115b83f7f07e2671bfc95 (patch)
tree7a38c3d6e84fc5da7d1e72df478099c6b6a417f6 /lib
parent61dd143cfb0a5bd3bf9dc7304f3f4126ae2e5acd (diff)
downloadFreeBSD-src-f501dbec5fe3a8ec80a115b83f7f07e2671bfc95.zip
FreeBSD-src-f501dbec5fe3a8ec80a115b83f7f07e2671bfc95.tar.gz
Use 'skip' when ignoring data in tar archives. This dramatically
increases performance when extracting a single entry from a large uncompressed archive, especially on slow devices such as USB hard drives. Requires a number of changes: * New archive_read_open2() supports a 'skip' client function * Old archive_read_open() is implemented as a wrapper now, to continue supporting the old API/ABI. * _read_open_fd and _read_open_file sprout new 'skip' functions. * compression layer gets a new 'skip' operation. * compression_none passes skip requests through to client. * compression_{gzip,bzip2,compress} simply ignore skip requests. Thanks to: Benjamin Lutz, who designed and implemented the whole thing. I'm just committing it. ;-) TODO: Need to update the documentation a little bit.
Diffstat (limited to 'lib')
-rw-r--r--lib/libarchive/archive.h.in13
-rw-r--r--lib/libarchive/archive_private.h2
-rw-r--r--lib/libarchive/archive_read.c15
-rw-r--r--lib/libarchive/archive_read_open_fd.c48
-rw-r--r--lib/libarchive/archive_read_open_file.c49
-rw-r--r--lib/libarchive/archive_read_open_filename.c49
-rw-r--r--lib/libarchive/archive_read_support_compression_bzip2.c1
-rw-r--r--lib/libarchive/archive_read_support_compression_compress.c1
-rw-r--r--lib/libarchive/archive_read_support_compression_gzip.c1
-rw-r--r--lib/libarchive/archive_read_support_compression_none.c72
-rw-r--r--lib/libarchive/archive_read_support_format_tar.c47
11 files changed, 287 insertions, 11 deletions
diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in
index 62c4d52..2ff9a3a 100644
--- a/lib/libarchive/archive.h.in
+++ b/lib/libarchive/archive.h.in
@@ -100,14 +100,18 @@ struct archive_entry;
/* #define ARCHIVE_ERRNO_MISC */
/*
- * Callbacks are invoked to automatically read/write/open/close the archive.
- * You can provide your own for complex tasks (like breaking archives
- * across multiple tapes) or use standard ones built into the library.
+ * Callbacks are invoked to automatically read/skip/write/open/close the
+ * archive. You can provide your own for complex tasks (like breaking
+ * archives across multiple tapes) or use standard ones built into the
+ * library.
*/
/* Returns pointer and size of next block of data from archive. */
typedef ssize_t archive_read_callback(struct archive *, void *_client_data,
const void **_buffer);
+/* Skips at most request bytes from archive and returns the skipped amount */
+typedef ssize_t archive_skip_callback(struct archive *, void *_client_data,
+ size_t request);
/* Returns size actually written, zero on EOF, -1 on error. */
typedef ssize_t archive_write_callback(struct archive *, void *_client_data,
void *_buffer, size_t _length);
@@ -187,6 +191,9 @@ int archive_read_support_format_zip(struct archive *);
int archive_read_open(struct archive *, void *_client_data,
archive_open_callback *, archive_read_callback *,
archive_close_callback *);
+int archive_read_open2(struct archive *, void *_client_data,
+ archive_open_callback *, archive_read_callback *,
+ archive_skip_callback *, archive_close_callback *);
/*
* The archive_read_open_file function is a convenience function built
diff --git a/lib/libarchive/archive_private.h b/lib/libarchive/archive_private.h
index 6b93820..419e0f7 100644
--- a/lib/libarchive/archive_private.h
+++ b/lib/libarchive/archive_private.h
@@ -68,6 +68,7 @@ struct archive {
/* Callbacks to open/read/write/close archive stream. */
archive_open_callback *client_opener;
archive_read_callback *client_reader;
+ archive_skip_callback *client_skipper;
archive_write_callback *client_writer;
archive_close_callback *client_closer;
void *client_data;
@@ -132,6 +133,7 @@ struct archive {
ssize_t (*compression_read_ahead)(struct archive *,
const void **, size_t request);
ssize_t (*compression_read_consume)(struct archive *, size_t);
+ ssize_t (*compression_skip)(struct archive *, size_t);
/*
* Format detection is mostly the same as compression
diff --git a/lib/libarchive/archive_read.c b/lib/libarchive/archive_read.c
index c6e47e1..46ccb59 100644
--- a/lib/libarchive/archive_read.c
+++ b/lib/libarchive/archive_read.c
@@ -110,6 +110,19 @@ archive_read_open(struct archive *a, void *client_data,
archive_open_callback *client_opener, archive_read_callback *client_reader,
archive_close_callback *client_closer)
{
+ /* Old archive_read_open() is just a thin shell around
+ * archive_read_open2. */
+ return archive_read_open2(a, client_data, client_opener,
+ client_reader, NULL, client_closer);
+}
+
+int
+archive_read_open2(struct archive *a, void *client_data,
+ archive_open_callback *client_opener,
+ archive_read_callback *client_reader,
+ archive_skip_callback *client_skipper,
+ archive_close_callback *client_closer)
+{
const void *buffer;
ssize_t bytes_read;
int high_bidder;
@@ -129,6 +142,7 @@ archive_read_open(struct archive *a, void *client_data,
*/
a->client_opener = NULL;
a->client_reader = NULL;
+ a->client_skipper = NULL;
a->client_closer = NULL;
a->client_data = NULL;
@@ -167,6 +181,7 @@ archive_read_open(struct archive *a, void *client_data,
/* Now that the client callbacks have worked, remember them. */
a->client_opener = client_opener; /* Do we need to remember this? */
a->client_reader = client_reader;
+ a->client_skipper = client_skipper;
a->client_closer = client_closer;
a->client_data = client_data;
diff --git a/lib/libarchive/archive_read_open_fd.c b/lib/libarchive/archive_read_open_fd.c
index c5716e7..c1c1452 100644
--- a/lib/libarchive/archive_read_open_fd.c
+++ b/lib/libarchive/archive_read_open_fd.c
@@ -45,6 +45,7 @@ struct read_fd_data {
static int file_close(struct archive *, void *);
static int file_open(struct archive *, void *);
static ssize_t file_read(struct archive *, void *, const void **buff);
+static ssize_t file_skip(struct archive *, void *, size_t request);
int
archive_read_open_fd(struct archive *a, int fd, size_t block_size)
@@ -64,7 +65,7 @@ archive_read_open_fd(struct archive *a, int fd, size_t block_size)
return (ARCHIVE_FATAL);
}
mine->fd = fd;
- return (archive_read_open(a, mine, file_open, file_read, file_close));
+ return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close));
}
static int
@@ -87,10 +88,51 @@ static ssize_t
file_read(struct archive *a, void *client_data, const void **buff)
{
struct read_fd_data *mine = client_data;
+ ssize_t bytes_read;
- (void)a; /* UNUSED */
*buff = mine->buffer;
- return (read(mine->fd, mine->buffer, mine->block_size));
+ bytes_read = read(mine->fd, mine->buffer, mine->block_size);
+ if (bytes_read < 0) {
+ archive_set_error(a, errno, "Error reading fd %d", mine->fd);
+ }
+ return (bytes_read);
+}
+
+static ssize_t
+file_skip(struct archive *a, void *client_data, size_t request)
+{
+ struct read_fd_data *mine = client_data;
+ off_t old_offset, new_offset;
+
+ /* Reduce request to the next smallest multiple of block_size */
+ request = (request / mine->block_size) * mine->block_size;
+ /*
+ * Hurray for lazy evaluation: if the first lseek fails, the second
+ * one will not be executed.
+ */
+ if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
+ ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
+ {
+ if (errno == ESPIPE)
+ {
+ /*
+ * Failure to lseek() can be caused by the file
+ * descriptor pointing to a pipe, socket or FIFO.
+ * Return 0 here, so the compression layer will use
+ * read()s instead to advance the file descriptor.
+ * It's slower of course, but works as well.
+ */
+ return (0);
+ }
+ /*
+ * There's been an error other than ESPIPE. This is most
+ * likely caused by a programmer error (too large request)
+ * or a corrupted archive file.
+ */
+ archive_set_error(a, errno, "Error seeking");
+ return (-1);
+ }
+ return (new_offset - old_offset);
}
static int
diff --git a/lib/libarchive/archive_read_open_file.c b/lib/libarchive/archive_read_open_file.c
index b0db61c..efdd438 100644
--- a/lib/libarchive/archive_read_open_file.c
+++ b/lib/libarchive/archive_read_open_file.c
@@ -48,6 +48,7 @@ struct read_file_data {
static int file_close(struct archive *, void *);
static int file_open(struct archive *, void *);
static ssize_t file_read(struct archive *, void *, const void **buff);
+static ssize_t file_skip(struct archive *, void *, size_t request);
int
archive_read_open_file(struct archive *a, const char *filename,
@@ -73,7 +74,7 @@ archive_read_open_file(struct archive *a, const char *filename,
mine->block_size = block_size;
mine->buffer = NULL;
mine->fd = -1;
- return (archive_read_open(a, mine, file_open, file_read, file_close));
+ return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close));
}
static int
@@ -119,7 +120,6 @@ file_read(struct archive *a, void *client_data, const void **buff)
struct read_file_data *mine = client_data;
ssize_t bytes_read;
- (void)a; /* UNUSED */
*buff = mine->buffer;
bytes_read = read(mine->fd, mine->buffer, mine->block_size);
if (bytes_read < 0) {
@@ -132,6 +132,51 @@ file_read(struct archive *a, void *client_data, const void **buff)
return (bytes_read);
}
+static ssize_t
+file_skip(struct archive *a, void *client_data, size_t request)
+{
+ struct read_file_data *mine = client_data;
+ off_t old_offset, new_offset;
+
+ /* Reduce request to the next smallest multiple of block_size */
+ request = (request / mine->block_size) * mine->block_size;
+ /*
+ * Hurray for lazy evaluation: if the first lseek fails, the second
+ * one will not be executed.
+ */
+ if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
+ ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
+ {
+ if (errno == ESPIPE)
+ {
+ /*
+ * Failure to lseek() can be caused by the file
+ * descriptor pointing to a pipe, socket or FIFO.
+ * Return 0 here, so the compression layer will use
+ * read()s instead to advance the file descriptor.
+ * It's slower of course, but works as well.
+ */
+ return (0);
+ }
+ /*
+ * There's been an error other than ESPIPE. This is most
+ * likely caused by a programmer error (too large request)
+ * or a corrupted archive file.
+ */
+ if (mine->filename[0] == '\0')
+ /*
+ * Should never get here, since lseek() on stdin ought
+ * to return an ESPIPE error.
+ */
+ archive_set_error(a, errno, "Error seeking in stdin");
+ else
+ archive_set_error(a, errno, "Error seeking in '%s'",
+ mine->filename);
+ return (-1);
+ }
+ return (new_offset - old_offset);
+}
+
static int
file_close(struct archive *a, void *client_data)
{
diff --git a/lib/libarchive/archive_read_open_filename.c b/lib/libarchive/archive_read_open_filename.c
index b0db61c..efdd438 100644
--- a/lib/libarchive/archive_read_open_filename.c
+++ b/lib/libarchive/archive_read_open_filename.c
@@ -48,6 +48,7 @@ struct read_file_data {
static int file_close(struct archive *, void *);
static int file_open(struct archive *, void *);
static ssize_t file_read(struct archive *, void *, const void **buff);
+static ssize_t file_skip(struct archive *, void *, size_t request);
int
archive_read_open_file(struct archive *a, const char *filename,
@@ -73,7 +74,7 @@ archive_read_open_file(struct archive *a, const char *filename,
mine->block_size = block_size;
mine->buffer = NULL;
mine->fd = -1;
- return (archive_read_open(a, mine, file_open, file_read, file_close));
+ return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close));
}
static int
@@ -119,7 +120,6 @@ file_read(struct archive *a, void *client_data, const void **buff)
struct read_file_data *mine = client_data;
ssize_t bytes_read;
- (void)a; /* UNUSED */
*buff = mine->buffer;
bytes_read = read(mine->fd, mine->buffer, mine->block_size);
if (bytes_read < 0) {
@@ -132,6 +132,51 @@ file_read(struct archive *a, void *client_data, const void **buff)
return (bytes_read);
}
+static ssize_t
+file_skip(struct archive *a, void *client_data, size_t request)
+{
+ struct read_file_data *mine = client_data;
+ off_t old_offset, new_offset;
+
+ /* Reduce request to the next smallest multiple of block_size */
+ request = (request / mine->block_size) * mine->block_size;
+ /*
+ * Hurray for lazy evaluation: if the first lseek fails, the second
+ * one will not be executed.
+ */
+ if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
+ ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
+ {
+ if (errno == ESPIPE)
+ {
+ /*
+ * Failure to lseek() can be caused by the file
+ * descriptor pointing to a pipe, socket or FIFO.
+ * Return 0 here, so the compression layer will use
+ * read()s instead to advance the file descriptor.
+ * It's slower of course, but works as well.
+ */
+ return (0);
+ }
+ /*
+ * There's been an error other than ESPIPE. This is most
+ * likely caused by a programmer error (too large request)
+ * or a corrupted archive file.
+ */
+ if (mine->filename[0] == '\0')
+ /*
+ * Should never get here, since lseek() on stdin ought
+ * to return an ESPIPE error.
+ */
+ archive_set_error(a, errno, "Error seeking in stdin");
+ else
+ archive_set_error(a, errno, "Error seeking in '%s'",
+ mine->filename);
+ return (-1);
+ }
+ return (new_offset - old_offset);
+}
+
static int
file_close(struct archive *a, void *client_data)
{
diff --git a/lib/libarchive/archive_read_support_compression_bzip2.c b/lib/libarchive/archive_read_support_compression_bzip2.c
index aa2d531..79742e8 100644
--- a/lib/libarchive/archive_read_support_compression_bzip2.c
+++ b/lib/libarchive/archive_read_support_compression_bzip2.c
@@ -187,6 +187,7 @@ init(struct archive *a, const void *buff, size_t n)
a->compression_read_ahead = read_ahead;
a->compression_read_consume = read_consume;
+ a->compression_skip = NULL; /* not supported */
a->compression_finish = finish;
/* Initialize compression library. */
diff --git a/lib/libarchive/archive_read_support_compression_compress.c b/lib/libarchive/archive_read_support_compression_compress.c
index 30a7377..5f893ca 100644
--- a/lib/libarchive/archive_read_support_compression_compress.c
+++ b/lib/libarchive/archive_read_support_compression_compress.c
@@ -190,6 +190,7 @@ init(struct archive *a, const void *buff, size_t n)
a->compression_read_ahead = read_ahead;
a->compression_read_consume = read_consume;
+ a->compression_skip = NULL; /* not supported */
a->compression_finish = finish;
state = malloc(sizeof(*state));
diff --git a/lib/libarchive/archive_read_support_compression_gzip.c b/lib/libarchive/archive_read_support_compression_gzip.c
index b0cda3a..873ef86 100644
--- a/lib/libarchive/archive_read_support_compression_gzip.c
+++ b/lib/libarchive/archive_read_support_compression_gzip.c
@@ -191,6 +191,7 @@ init(struct archive *a, const void *buff, size_t n)
a->compression_read_ahead = read_ahead;
a->compression_read_consume = read_consume;
+ a->compression_skip = NULL; /* not supported */
a->compression_finish = finish;
/*
diff --git a/lib/libarchive/archive_read_support_compression_none.c b/lib/libarchive/archive_read_support_compression_none.c
index 8e7ca3c..229f7bf 100644
--- a/lib/libarchive/archive_read_support_compression_none.c
+++ b/lib/libarchive/archive_read_support_compression_none.c
@@ -27,6 +27,7 @@
#include "archive_platform.h"
__FBSDID("$FreeBSD$");
+#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
@@ -61,6 +62,8 @@ struct archive_decompress_none {
*/
#define BUFFER_SIZE 65536
+#define minimum(a, b) (a < b ? a : b)
+
static int archive_decompressor_none_bid(const void *, size_t);
static int archive_decompressor_none_finish(struct archive *);
static int archive_decompressor_none_init(struct archive *,
@@ -69,6 +72,7 @@ static ssize_t archive_decompressor_none_read_ahead(struct archive *,
const void **, size_t);
static ssize_t archive_decompressor_none_read_consume(struct archive *,
size_t);
+static ssize_t archive_decompressor_none_skip(struct archive *, size_t);
int
archive_read_support_compression_none(struct archive *a)
@@ -123,6 +127,7 @@ archive_decompressor_none_init(struct archive *a, const void *buff, size_t n)
a->compression_data = state;
a->compression_read_ahead = archive_decompressor_none_read_ahead;
a->compression_read_consume = archive_decompressor_none_read_consume;
+ a->compression_skip = archive_decompressor_none_skip;
a->compression_finish = archive_decompressor_none_finish;
return (ARCHIVE_OK);
@@ -251,6 +256,73 @@ archive_decompressor_none_read_consume(struct archive *a, size_t request)
return (request);
}
+/*
+ * Skip at most request bytes. Skipped data is marked as consumed.
+ */
+static ssize_t
+archive_decompressor_none_skip(struct archive *a, size_t request)
+{
+ struct archive_decompress_none *state;
+ ssize_t bytes_skipped, total_bytes_skipped = 0;
+ size_t min;
+
+ state = a->compression_data;
+ if (state->fatal)
+ return (-1);
+ /*
+ * If there is data in the buffers already, use that first.
+ */
+ if (state->avail > 0) {
+ min = minimum(request, state->avail);
+ bytes_skipped = archive_decompressor_none_read_consume(a, min);
+ request -= bytes_skipped;
+ total_bytes_skipped += bytes_skipped;
+ }
+ if (state->client_avail > 0) {
+ min = minimum(request, state->client_avail);
+ bytes_skipped = archive_decompressor_none_read_consume(a, min);
+ request -= bytes_skipped;
+ total_bytes_skipped += bytes_skipped;
+ }
+ if (request == 0)
+ return (total_bytes_skipped);
+ /*
+ * If no client_skipper is provided, just read the old way. It is very
+ * likely that after skipping, the request has not yet been fully
+ * satisfied (and is still > 0). In that case, read as well.
+ */
+ if (a->client_skipper != NULL) {
+ bytes_skipped = (a->client_skipper)(a, a->client_data,
+ request);
+ if (bytes_skipped < 0) { /* error */
+ state->client_total = state->client_avail = 0;
+ state->client_next = state->client_buff = NULL;
+ state->fatal = 1;
+ return (bytes_skipped);
+ }
+ total_bytes_skipped += bytes_skipped;
+ request -= bytes_skipped;
+ state->client_next = state->client_buff;
+ a->raw_position += bytes_skipped;
+ state->client_avail = state->client_total = 0;
+ }
+ while (request > 0) {
+ const void* dummy_buffer;
+ ssize_t bytes_read;
+ bytes_read = archive_decompressor_none_read_ahead(a,
+ &dummy_buffer, request);
+ if (bytes_read < 0)
+ return (bytes_read);
+ assert(bytes_read >= 0); /* precondition for cast below */
+ min = minimum((size_t)bytes_read, request);
+ bytes_read = archive_decompressor_none_read_consume(a, min);
+ total_bytes_skipped += bytes_read;
+ request -= bytes_read;
+ }
+ assert(request == 0);
+ return (total_bytes_skipped);
+}
+
static int
archive_decompressor_none_finish(struct archive *a)
{
diff --git a/lib/libarchive/archive_read_support_format_tar.c b/lib/libarchive/archive_read_support_format_tar.c
index 271a41f..cc0b5ed 100644
--- a/lib/libarchive/archive_read_support_format_tar.c
+++ b/lib/libarchive/archive_read_support_format_tar.c
@@ -193,6 +193,7 @@ static int archive_read_format_tar_bid(struct archive *);
static int archive_read_format_tar_cleanup(struct archive *);
static int archive_read_format_tar_read_data(struct archive *a,
const void **buff, size_t *size, off_t *offset);
+static int archive_read_format_tar_skip(struct archive *a);
static int archive_read_format_tar_read_header(struct archive *,
struct archive_entry *);
static int checksum(struct archive *, const void *);
@@ -260,7 +261,7 @@ archive_read_support_format_tar(struct archive *a)
archive_read_format_tar_bid,
archive_read_format_tar_read_header,
archive_read_format_tar_read_data,
- NULL,
+ archive_read_format_tar_skip,
archive_read_format_tar_cleanup);
if (r != ARCHIVE_OK)
@@ -522,6 +523,50 @@ archive_read_format_tar_read_data(struct archive *a,
}
}
+static int
+archive_read_format_tar_skip(struct archive *a)
+{
+ ssize_t bytes_skipped;
+ struct tar* tar;
+ struct sparse_block *p;
+ int r = ARCHIVE_OK;
+ const void *b; /* dummy variables */
+ size_t s;
+ off_t o;
+
+
+ tar = *(a->pformat_data);
+ if (a->compression_skip == NULL) {
+ while (r == ARCHIVE_OK)
+ r = archive_read_format_tar_read_data(a, &b, &s, &o);
+ return (r);
+ }
+ bytes_skipped = (a->compression_skip)(a, tar->entry_bytes_remaining);
+ if (bytes_skipped < 0)
+ return (ARCHIVE_FATAL);
+ /* same code as above in _tar_read_data() */
+ tar->entry_bytes_remaining -= bytes_skipped;
+ while (tar->sparse_list != NULL &&
+ tar->sparse_list->remaining == 0) {
+ p = tar->sparse_list;
+ tar->sparse_list = p->next;
+ free(p);
+ if (tar->sparse_list != NULL)
+ tar->entry_offset = tar->sparse_list->offset;
+ }
+ if (tar->sparse_list != NULL) {
+ if (tar->sparse_list->remaining < bytes_skipped)
+ bytes_skipped = tar->sparse_list->remaining;
+ tar->sparse_list->remaining -= bytes_skipped;
+ }
+ tar->entry_offset += bytes_skipped;
+ tar->entry_bytes_remaining -= bytes_skipped;
+ /* Reuse padding code above. */
+ while (r == ARCHIVE_OK)
+ r = archive_read_format_tar_read_data(a, &b, &s, &o);
+ return (r);
+}
+
/*
* This function recursively interprets all of the headers associated
* with a single entry.
OpenPOWER on IntegriCloud