diff options
author | kientzle <kientzle@FreeBSD.org> | 2006-07-30 00:29:01 +0000 |
---|---|---|
committer | kientzle <kientzle@FreeBSD.org> | 2006-07-30 00:29:01 +0000 |
commit | f501dbec5fe3a8ec80a115b83f7f07e2671bfc95 (patch) | |
tree | 7a38c3d6e84fc5da7d1e72df478099c6b6a417f6 /lib | |
parent | 61dd143cfb0a5bd3bf9dc7304f3f4126ae2e5acd (diff) | |
download | FreeBSD-src-f501dbec5fe3a8ec80a115b83f7f07e2671bfc95.zip FreeBSD-src-f501dbec5fe3a8ec80a115b83f7f07e2671bfc95.tar.gz |
Use 'skip' when ignoring data in tar archives. This dramatically
increases performance when extracting a single entry from a large
uncompressed archive, especially on slow devices such as USB hard
drives.
Requires a number of changes:
* New archive_read_open2() supports a 'skip' client function
* Old archive_read_open() is implemented as a wrapper now, to
continue supporting the old API/ABI.
* _read_open_fd and _read_open_file sprout new 'skip' functions.
* compression layer gets a new 'skip' operation.
* compression_none passes skip requests through to client.
* compression_{gzip,bzip2,compress} simply ignore skip requests.
Thanks to: Benjamin Lutz, who designed and implemented the whole thing.
I'm just committing it. ;-)
TODO: Need to update the documentation a little bit.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libarchive/archive.h.in | 13 | ||||
-rw-r--r-- | lib/libarchive/archive_private.h | 2 | ||||
-rw-r--r-- | lib/libarchive/archive_read.c | 15 | ||||
-rw-r--r-- | lib/libarchive/archive_read_open_fd.c | 48 | ||||
-rw-r--r-- | lib/libarchive/archive_read_open_file.c | 49 | ||||
-rw-r--r-- | lib/libarchive/archive_read_open_filename.c | 49 | ||||
-rw-r--r-- | lib/libarchive/archive_read_support_compression_bzip2.c | 1 | ||||
-rw-r--r-- | lib/libarchive/archive_read_support_compression_compress.c | 1 | ||||
-rw-r--r-- | lib/libarchive/archive_read_support_compression_gzip.c | 1 | ||||
-rw-r--r-- | lib/libarchive/archive_read_support_compression_none.c | 72 | ||||
-rw-r--r-- | lib/libarchive/archive_read_support_format_tar.c | 47 |
11 files changed, 287 insertions, 11 deletions
diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in index 62c4d52..2ff9a3a 100644 --- a/lib/libarchive/archive.h.in +++ b/lib/libarchive/archive.h.in @@ -100,14 +100,18 @@ struct archive_entry; /* #define ARCHIVE_ERRNO_MISC */ /* - * Callbacks are invoked to automatically read/write/open/close the archive. - * You can provide your own for complex tasks (like breaking archives - * across multiple tapes) or use standard ones built into the library. + * Callbacks are invoked to automatically read/skip/write/open/close the + * archive. You can provide your own for complex tasks (like breaking + * archives across multiple tapes) or use standard ones built into the + * library. */ /* Returns pointer and size of next block of data from archive. */ typedef ssize_t archive_read_callback(struct archive *, void *_client_data, const void **_buffer); +/* Skips at most request bytes from archive and returns the skipped amount */ +typedef ssize_t archive_skip_callback(struct archive *, void *_client_data, + size_t request); /* Returns size actually written, zero on EOF, -1 on error. */ typedef ssize_t archive_write_callback(struct archive *, void *_client_data, void *_buffer, size_t _length); @@ -187,6 +191,9 @@ int archive_read_support_format_zip(struct archive *); int archive_read_open(struct archive *, void *_client_data, archive_open_callback *, archive_read_callback *, archive_close_callback *); +int archive_read_open2(struct archive *, void *_client_data, + archive_open_callback *, archive_read_callback *, + archive_skip_callback *, archive_close_callback *); /* * The archive_read_open_file function is a convenience function built diff --git a/lib/libarchive/archive_private.h b/lib/libarchive/archive_private.h index 6b93820..419e0f7 100644 --- a/lib/libarchive/archive_private.h +++ b/lib/libarchive/archive_private.h @@ -68,6 +68,7 @@ struct archive { /* Callbacks to open/read/write/close archive stream. */ archive_open_callback *client_opener; archive_read_callback *client_reader; + archive_skip_callback *client_skipper; archive_write_callback *client_writer; archive_close_callback *client_closer; void *client_data; @@ -132,6 +133,7 @@ struct archive { ssize_t (*compression_read_ahead)(struct archive *, const void **, size_t request); ssize_t (*compression_read_consume)(struct archive *, size_t); + ssize_t (*compression_skip)(struct archive *, size_t); /* * Format detection is mostly the same as compression diff --git a/lib/libarchive/archive_read.c b/lib/libarchive/archive_read.c index c6e47e1..46ccb59 100644 --- a/lib/libarchive/archive_read.c +++ b/lib/libarchive/archive_read.c @@ -110,6 +110,19 @@ archive_read_open(struct archive *a, void *client_data, archive_open_callback *client_opener, archive_read_callback *client_reader, archive_close_callback *client_closer) { + /* Old archive_read_open() is just a thin shell around + * archive_read_open2. */ + return archive_read_open2(a, client_data, client_opener, + client_reader, NULL, client_closer); +} + +int +archive_read_open2(struct archive *a, void *client_data, + archive_open_callback *client_opener, + archive_read_callback *client_reader, + archive_skip_callback *client_skipper, + archive_close_callback *client_closer) +{ const void *buffer; ssize_t bytes_read; int high_bidder; @@ -129,6 +142,7 @@ archive_read_open(struct archive *a, void *client_data, */ a->client_opener = NULL; a->client_reader = NULL; + a->client_skipper = NULL; a->client_closer = NULL; a->client_data = NULL; @@ -167,6 +181,7 @@ archive_read_open(struct archive *a, void *client_data, /* Now that the client callbacks have worked, remember them. */ a->client_opener = client_opener; /* Do we need to remember this? */ a->client_reader = client_reader; + a->client_skipper = client_skipper; a->client_closer = client_closer; a->client_data = client_data; diff --git a/lib/libarchive/archive_read_open_fd.c b/lib/libarchive/archive_read_open_fd.c index c5716e7..c1c1452 100644 --- a/lib/libarchive/archive_read_open_fd.c +++ b/lib/libarchive/archive_read_open_fd.c @@ -45,6 +45,7 @@ struct read_fd_data { static int file_close(struct archive *, void *); static int file_open(struct archive *, void *); static ssize_t file_read(struct archive *, void *, const void **buff); +static ssize_t file_skip(struct archive *, void *, size_t request); int archive_read_open_fd(struct archive *a, int fd, size_t block_size) @@ -64,7 +65,7 @@ archive_read_open_fd(struct archive *a, int fd, size_t block_size) return (ARCHIVE_FATAL); } mine->fd = fd; - return (archive_read_open(a, mine, file_open, file_read, file_close)); + return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close)); } static int @@ -87,10 +88,51 @@ static ssize_t file_read(struct archive *a, void *client_data, const void **buff) { struct read_fd_data *mine = client_data; + ssize_t bytes_read; - (void)a; /* UNUSED */ *buff = mine->buffer; - return (read(mine->fd, mine->buffer, mine->block_size)); + bytes_read = read(mine->fd, mine->buffer, mine->block_size); + if (bytes_read < 0) { + archive_set_error(a, errno, "Error reading fd %d", mine->fd); + } + return (bytes_read); +} + +static ssize_t +file_skip(struct archive *a, void *client_data, size_t request) +{ + struct read_fd_data *mine = client_data; + off_t old_offset, new_offset; + + /* Reduce request to the next smallest multiple of block_size */ + request = (request / mine->block_size) * mine->block_size; + /* + * Hurray for lazy evaluation: if the first lseek fails, the second + * one will not be executed. + */ + if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) || + ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0)) + { + if (errno == ESPIPE) + { + /* + * Failure to lseek() can be caused by the file + * descriptor pointing to a pipe, socket or FIFO. + * Return 0 here, so the compression layer will use + * read()s instead to advance the file descriptor. + * It's slower of course, but works as well. + */ + return (0); + } + /* + * There's been an error other than ESPIPE. This is most + * likely caused by a programmer error (too large request) + * or a corrupted archive file. + */ + archive_set_error(a, errno, "Error seeking"); + return (-1); + } + return (new_offset - old_offset); } static int diff --git a/lib/libarchive/archive_read_open_file.c b/lib/libarchive/archive_read_open_file.c index b0db61c..efdd438 100644 --- a/lib/libarchive/archive_read_open_file.c +++ b/lib/libarchive/archive_read_open_file.c @@ -48,6 +48,7 @@ struct read_file_data { static int file_close(struct archive *, void *); static int file_open(struct archive *, void *); static ssize_t file_read(struct archive *, void *, const void **buff); +static ssize_t file_skip(struct archive *, void *, size_t request); int archive_read_open_file(struct archive *a, const char *filename, @@ -73,7 +74,7 @@ archive_read_open_file(struct archive *a, const char *filename, mine->block_size = block_size; mine->buffer = NULL; mine->fd = -1; - return (archive_read_open(a, mine, file_open, file_read, file_close)); + return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close)); } static int @@ -119,7 +120,6 @@ file_read(struct archive *a, void *client_data, const void **buff) struct read_file_data *mine = client_data; ssize_t bytes_read; - (void)a; /* UNUSED */ *buff = mine->buffer; bytes_read = read(mine->fd, mine->buffer, mine->block_size); if (bytes_read < 0) { @@ -132,6 +132,51 @@ file_read(struct archive *a, void *client_data, const void **buff) return (bytes_read); } +static ssize_t +file_skip(struct archive *a, void *client_data, size_t request) +{ + struct read_file_data *mine = client_data; + off_t old_offset, new_offset; + + /* Reduce request to the next smallest multiple of block_size */ + request = (request / mine->block_size) * mine->block_size; + /* + * Hurray for lazy evaluation: if the first lseek fails, the second + * one will not be executed. + */ + if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) || + ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0)) + { + if (errno == ESPIPE) + { + /* + * Failure to lseek() can be caused by the file + * descriptor pointing to a pipe, socket or FIFO. + * Return 0 here, so the compression layer will use + * read()s instead to advance the file descriptor. + * It's slower of course, but works as well. + */ + return (0); + } + /* + * There's been an error other than ESPIPE. This is most + * likely caused by a programmer error (too large request) + * or a corrupted archive file. + */ + if (mine->filename[0] == '\0') + /* + * Should never get here, since lseek() on stdin ought + * to return an ESPIPE error. + */ + archive_set_error(a, errno, "Error seeking in stdin"); + else + archive_set_error(a, errno, "Error seeking in '%s'", + mine->filename); + return (-1); + } + return (new_offset - old_offset); +} + static int file_close(struct archive *a, void *client_data) { diff --git a/lib/libarchive/archive_read_open_filename.c b/lib/libarchive/archive_read_open_filename.c index b0db61c..efdd438 100644 --- a/lib/libarchive/archive_read_open_filename.c +++ b/lib/libarchive/archive_read_open_filename.c @@ -48,6 +48,7 @@ struct read_file_data { static int file_close(struct archive *, void *); static int file_open(struct archive *, void *); static ssize_t file_read(struct archive *, void *, const void **buff); +static ssize_t file_skip(struct archive *, void *, size_t request); int archive_read_open_file(struct archive *a, const char *filename, @@ -73,7 +74,7 @@ archive_read_open_file(struct archive *a, const char *filename, mine->block_size = block_size; mine->buffer = NULL; mine->fd = -1; - return (archive_read_open(a, mine, file_open, file_read, file_close)); + return (archive_read_open2(a, mine, file_open, file_read, file_skip, file_close)); } static int @@ -119,7 +120,6 @@ file_read(struct archive *a, void *client_data, const void **buff) struct read_file_data *mine = client_data; ssize_t bytes_read; - (void)a; /* UNUSED */ *buff = mine->buffer; bytes_read = read(mine->fd, mine->buffer, mine->block_size); if (bytes_read < 0) { @@ -132,6 +132,51 @@ file_read(struct archive *a, void *client_data, const void **buff) return (bytes_read); } +static ssize_t +file_skip(struct archive *a, void *client_data, size_t request) +{ + struct read_file_data *mine = client_data; + off_t old_offset, new_offset; + + /* Reduce request to the next smallest multiple of block_size */ + request = (request / mine->block_size) * mine->block_size; + /* + * Hurray for lazy evaluation: if the first lseek fails, the second + * one will not be executed. + */ + if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) || + ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0)) + { + if (errno == ESPIPE) + { + /* + * Failure to lseek() can be caused by the file + * descriptor pointing to a pipe, socket or FIFO. + * Return 0 here, so the compression layer will use + * read()s instead to advance the file descriptor. + * It's slower of course, but works as well. + */ + return (0); + } + /* + * There's been an error other than ESPIPE. This is most + * likely caused by a programmer error (too large request) + * or a corrupted archive file. + */ + if (mine->filename[0] == '\0') + /* + * Should never get here, since lseek() on stdin ought + * to return an ESPIPE error. + */ + archive_set_error(a, errno, "Error seeking in stdin"); + else + archive_set_error(a, errno, "Error seeking in '%s'", + mine->filename); + return (-1); + } + return (new_offset - old_offset); +} + static int file_close(struct archive *a, void *client_data) { diff --git a/lib/libarchive/archive_read_support_compression_bzip2.c b/lib/libarchive/archive_read_support_compression_bzip2.c index aa2d531..79742e8 100644 --- a/lib/libarchive/archive_read_support_compression_bzip2.c +++ b/lib/libarchive/archive_read_support_compression_bzip2.c @@ -187,6 +187,7 @@ init(struct archive *a, const void *buff, size_t n) a->compression_read_ahead = read_ahead; a->compression_read_consume = read_consume; + a->compression_skip = NULL; /* not supported */ a->compression_finish = finish; /* Initialize compression library. */ diff --git a/lib/libarchive/archive_read_support_compression_compress.c b/lib/libarchive/archive_read_support_compression_compress.c index 30a7377..5f893ca 100644 --- a/lib/libarchive/archive_read_support_compression_compress.c +++ b/lib/libarchive/archive_read_support_compression_compress.c @@ -190,6 +190,7 @@ init(struct archive *a, const void *buff, size_t n) a->compression_read_ahead = read_ahead; a->compression_read_consume = read_consume; + a->compression_skip = NULL; /* not supported */ a->compression_finish = finish; state = malloc(sizeof(*state)); diff --git a/lib/libarchive/archive_read_support_compression_gzip.c b/lib/libarchive/archive_read_support_compression_gzip.c index b0cda3a..873ef86 100644 --- a/lib/libarchive/archive_read_support_compression_gzip.c +++ b/lib/libarchive/archive_read_support_compression_gzip.c @@ -191,6 +191,7 @@ init(struct archive *a, const void *buff, size_t n) a->compression_read_ahead = read_ahead; a->compression_read_consume = read_consume; + a->compression_skip = NULL; /* not supported */ a->compression_finish = finish; /* diff --git a/lib/libarchive/archive_read_support_compression_none.c b/lib/libarchive/archive_read_support_compression_none.c index 8e7ca3c..229f7bf 100644 --- a/lib/libarchive/archive_read_support_compression_none.c +++ b/lib/libarchive/archive_read_support_compression_none.c @@ -27,6 +27,7 @@ #include "archive_platform.h" __FBSDID("$FreeBSD$"); +#include <assert.h> #include <errno.h> #include <stdlib.h> #include <string.h> @@ -61,6 +62,8 @@ struct archive_decompress_none { */ #define BUFFER_SIZE 65536 +#define minimum(a, b) (a < b ? a : b) + static int archive_decompressor_none_bid(const void *, size_t); static int archive_decompressor_none_finish(struct archive *); static int archive_decompressor_none_init(struct archive *, @@ -69,6 +72,7 @@ static ssize_t archive_decompressor_none_read_ahead(struct archive *, const void **, size_t); static ssize_t archive_decompressor_none_read_consume(struct archive *, size_t); +static ssize_t archive_decompressor_none_skip(struct archive *, size_t); int archive_read_support_compression_none(struct archive *a) @@ -123,6 +127,7 @@ archive_decompressor_none_init(struct archive *a, const void *buff, size_t n) a->compression_data = state; a->compression_read_ahead = archive_decompressor_none_read_ahead; a->compression_read_consume = archive_decompressor_none_read_consume; + a->compression_skip = archive_decompressor_none_skip; a->compression_finish = archive_decompressor_none_finish; return (ARCHIVE_OK); @@ -251,6 +256,73 @@ archive_decompressor_none_read_consume(struct archive *a, size_t request) return (request); } +/* + * Skip at most request bytes. Skipped data is marked as consumed. + */ +static ssize_t +archive_decompressor_none_skip(struct archive *a, size_t request) +{ + struct archive_decompress_none *state; + ssize_t bytes_skipped, total_bytes_skipped = 0; + size_t min; + + state = a->compression_data; + if (state->fatal) + return (-1); + /* + * If there is data in the buffers already, use that first. + */ + if (state->avail > 0) { + min = minimum(request, state->avail); + bytes_skipped = archive_decompressor_none_read_consume(a, min); + request -= bytes_skipped; + total_bytes_skipped += bytes_skipped; + } + if (state->client_avail > 0) { + min = minimum(request, state->client_avail); + bytes_skipped = archive_decompressor_none_read_consume(a, min); + request -= bytes_skipped; + total_bytes_skipped += bytes_skipped; + } + if (request == 0) + return (total_bytes_skipped); + /* + * If no client_skipper is provided, just read the old way. It is very + * likely that after skipping, the request has not yet been fully + * satisfied (and is still > 0). In that case, read as well. + */ + if (a->client_skipper != NULL) { + bytes_skipped = (a->client_skipper)(a, a->client_data, + request); + if (bytes_skipped < 0) { /* error */ + state->client_total = state->client_avail = 0; + state->client_next = state->client_buff = NULL; + state->fatal = 1; + return (bytes_skipped); + } + total_bytes_skipped += bytes_skipped; + request -= bytes_skipped; + state->client_next = state->client_buff; + a->raw_position += bytes_skipped; + state->client_avail = state->client_total = 0; + } + while (request > 0) { + const void* dummy_buffer; + ssize_t bytes_read; + bytes_read = archive_decompressor_none_read_ahead(a, + &dummy_buffer, request); + if (bytes_read < 0) + return (bytes_read); + assert(bytes_read >= 0); /* precondition for cast below */ + min = minimum((size_t)bytes_read, request); + bytes_read = archive_decompressor_none_read_consume(a, min); + total_bytes_skipped += bytes_read; + request -= bytes_read; + } + assert(request == 0); + return (total_bytes_skipped); +} + static int archive_decompressor_none_finish(struct archive *a) { diff --git a/lib/libarchive/archive_read_support_format_tar.c b/lib/libarchive/archive_read_support_format_tar.c index 271a41f..cc0b5ed 100644 --- a/lib/libarchive/archive_read_support_format_tar.c +++ b/lib/libarchive/archive_read_support_format_tar.c @@ -193,6 +193,7 @@ static int archive_read_format_tar_bid(struct archive *); static int archive_read_format_tar_cleanup(struct archive *); static int archive_read_format_tar_read_data(struct archive *a, const void **buff, size_t *size, off_t *offset); +static int archive_read_format_tar_skip(struct archive *a); static int archive_read_format_tar_read_header(struct archive *, struct archive_entry *); static int checksum(struct archive *, const void *); @@ -260,7 +261,7 @@ archive_read_support_format_tar(struct archive *a) archive_read_format_tar_bid, archive_read_format_tar_read_header, archive_read_format_tar_read_data, - NULL, + archive_read_format_tar_skip, archive_read_format_tar_cleanup); if (r != ARCHIVE_OK) @@ -522,6 +523,50 @@ archive_read_format_tar_read_data(struct archive *a, } } +static int +archive_read_format_tar_skip(struct archive *a) +{ + ssize_t bytes_skipped; + struct tar* tar; + struct sparse_block *p; + int r = ARCHIVE_OK; + const void *b; /* dummy variables */ + size_t s; + off_t o; + + + tar = *(a->pformat_data); + if (a->compression_skip == NULL) { + while (r == ARCHIVE_OK) + r = archive_read_format_tar_read_data(a, &b, &s, &o); + return (r); + } + bytes_skipped = (a->compression_skip)(a, tar->entry_bytes_remaining); + if (bytes_skipped < 0) + return (ARCHIVE_FATAL); + /* same code as above in _tar_read_data() */ + tar->entry_bytes_remaining -= bytes_skipped; + while (tar->sparse_list != NULL && + tar->sparse_list->remaining == 0) { + p = tar->sparse_list; + tar->sparse_list = p->next; + free(p); + if (tar->sparse_list != NULL) + tar->entry_offset = tar->sparse_list->offset; + } + if (tar->sparse_list != NULL) { + if (tar->sparse_list->remaining < bytes_skipped) + bytes_skipped = tar->sparse_list->remaining; + tar->sparse_list->remaining -= bytes_skipped; + } + tar->entry_offset += bytes_skipped; + tar->entry_bytes_remaining -= bytes_skipped; + /* Reuse padding code above. */ + while (r == ARCHIVE_OK) + r = archive_read_format_tar_read_data(a, &b, &s, &o); + return (r); +} + /* * This function recursively interprets all of the headers associated * with a single entry. |