summaryrefslogtreecommitdiffstats
path: root/lib/libarchive/archive_read.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libarchive/archive_read.c')
-rw-r--r--lib/libarchive/archive_read.c598
1 files changed, 453 insertions, 145 deletions
diff --git a/lib/libarchive/archive_read.c b/lib/libarchive/archive_read.c
index 259fd7c..6973847 100644
--- a/lib/libarchive/archive_read.c
+++ b/lib/libarchive/archive_read.c
@@ -53,9 +53,10 @@ __FBSDID("$FreeBSD$");
#include "archive_private.h"
#include "archive_read_private.h"
-static void choose_decompressor(struct archive_read *, const void*, size_t);
+#define minimum(a, b) (a < b ? a : b)
+
+static int build_stream(struct archive_read *);
static int choose_format(struct archive_read *);
-static off_t dummy_skip(struct archive_read *, off_t);
/*
* Allocate, initialize and return a struct archive object.
@@ -74,8 +75,15 @@ archive_read_new(void)
a->archive.state = ARCHIVE_STATE_NEW;
a->entry = archive_entry_new();
- /* We always support uncompressed archives. */
- archive_read_support_compression_none(&a->archive);
+ /* Initialize reblocking logic. */
+ a->buffer_size = 64 * 1024; /* 64k */
+ a->buffer = (char *)malloc(a->buffer_size);
+ a->next = a->buffer;
+ if (a->buffer == NULL) {
+ archive_entry_free(a->entry);
+ free(a);
+ return (NULL);
+ }
return (&a->archive);
}
@@ -108,6 +116,33 @@ archive_read_open(struct archive *a, void *client_data,
client_reader, NULL, client_closer);
}
+static ssize_t
+client_read_proxy(struct archive_read_source *self, const void **buff)
+{
+ return (self->archive->client.reader)((struct archive *)self->archive,
+ self->data, buff);
+}
+
+static int64_t
+client_skip_proxy(struct archive_read_source *self, int64_t request)
+{
+ return (self->archive->client.skipper)((struct archive *)self->archive,
+ self->data, request);
+}
+
+static int
+client_close_proxy(struct archive_read_source *self)
+{
+ int r = ARCHIVE_OK;
+
+ if (self->archive->client.closer != NULL)
+ r = (self->archive->client.closer)((struct archive *)self->archive,
+ self->data);
+ free(self);
+ return (r);
+}
+
+
int
archive_read_open2(struct archive *_a, void *client_data,
archive_open_callback *client_opener,
@@ -116,28 +151,15 @@ archive_read_open2(struct archive *_a, void *client_data,
archive_close_callback *client_closer)
{
struct archive_read *a = (struct archive_read *)_a;
- const void *buffer;
- ssize_t bytes_read;
int e;
- __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_open");
+ __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
+ "archive_read_open");
if (client_reader == NULL)
__archive_errx(1,
"No reader function provided to archive_read_open");
- /*
- * Set these NULL initially. If the open or initial read fails,
- * we'll leave them NULL to indicate that the file is invalid.
- * (In particular, this helps ensure that the closer doesn't
- * get called more than once.)
- */
- a->client_opener = NULL;
- a->client_reader = NULL;
- a->client_skipper = NULL;
- a->client_closer = NULL;
- a->client_data = NULL;
-
/* Open data source. */
if (client_opener != NULL) {
e =(client_opener)(&a->archive, client_data);
@@ -149,129 +171,103 @@ archive_read_open2(struct archive *_a, void *client_data,
}
}
- /* Read first block now for compress format detection. */
- bytes_read = (client_reader)(&a->archive, client_data, &buffer);
-
- if (bytes_read < 0) {
- /* If the first read fails, close before returning error. */
- if (client_closer)
- (client_closer)(&a->archive, client_data);
- /* client_reader should have already set error information. */
- return (ARCHIVE_FATAL);
- }
+ /* Save the client functions and mock up the initial source. */
+ a->client.opener = client_opener; /* Do we need to remember this? */
+ a->client.reader = client_reader;
+ a->client.skipper = client_skipper;
+ a->client.closer = client_closer;
+ a->client.data = client_data;
- /* Now that the client callbacks have worked, remember them. */
- a->client_opener = client_opener; /* Do we need to remember this? */
- a->client_reader = client_reader;
- a->client_skipper = client_skipper;
- a->client_closer = client_closer;
- a->client_data = client_data;
+ {
+ struct archive_read_source *source;
- /* Select a decompression routine. */
- choose_decompressor(a, buffer, (size_t)bytes_read);
- if (a->decompressor == NULL)
- return (ARCHIVE_FATAL);
+ source = calloc(1, sizeof(*source));
+ if (source == NULL)
+ return (ARCHIVE_FATAL);
+ source->reader = NULL;
+ source->upstream = NULL;
+ source->archive = a;
+ source->data = client_data;
+ source->read = client_read_proxy;
+ source->skip = client_skip_proxy;
+ source->close = client_close_proxy;
+ a->source = source;
+ }
- /* Initialize decompression routine with the first block of data. */
- e = (a->decompressor->init)(a, buffer, (size_t)bytes_read);
+ /* In case there's no filter. */
+ a->archive.compression_code = ARCHIVE_COMPRESSION_NONE;
+ a->archive.compression_name = "none";
+ /* Build out the input pipeline. */
+ e = build_stream(a);
if (e == ARCHIVE_OK)
a->archive.state = ARCHIVE_STATE_HEADER;
- /*
- * If the decompressor didn't register a skip function, provide a
- * dummy compression-layer skip function.
- */
- if (a->decompressor->skip == NULL)
- a->decompressor->skip = dummy_skip;
-
return (e);
}
/*
- * Allow each registered decompression routine to bid on whether it
- * wants to handle this stream. Return index of winning bidder.
+ * Allow each registered stream transform to bid on whether
+ * it wants to handle this stream. Repeat until we've finished
+ * building the pipeline.
*/
-static void
-choose_decompressor(struct archive_read *a,
- const void *buffer, size_t bytes_read)
+static int
+build_stream(struct archive_read *a)
{
- int decompression_slots, i, bid, best_bid;
- struct decompressor_t *decompressor, *best_decompressor;
+ int number_readers, i, bid, best_bid;
+ struct archive_reader *reader, *best_reader;
+ struct archive_read_source *source;
+ const void *block;
+ ssize_t bytes_read;
- decompression_slots = sizeof(a->decompressors) /
- sizeof(a->decompressors[0]);
+ /* Read first block now for compress format detection. */
+ bytes_read = (a->source->read)(a->source, &block);
+ if (bytes_read < 0) {
+ /* If the first read fails, close before returning error. */
+ if (a->source->close != NULL) {
+ (a->source->close)(a->source);
+ a->source = NULL;
+ }
+ /* source->read should have already set error information. */
+ return (ARCHIVE_FATAL);
+ }
+
+ number_readers = sizeof(a->readers) / sizeof(a->readers[0]);
best_bid = 0;
- a->decompressor = NULL;
- best_decompressor = NULL;
-
- decompressor = a->decompressors;
- for (i = 0; i < decompression_slots; i++) {
- if (decompressor->bid) {
- bid = (decompressor->bid)(buffer, bytes_read);
- if (bid > best_bid || best_decompressor == NULL) {
+ best_reader = NULL;
+
+ reader = a->readers;
+ for (i = 0, reader = a->readers; i < number_readers; i++, reader++) {
+ if (reader->bid != NULL) {
+ bid = (reader->bid)(reader, block, bytes_read);
+ if (bid > best_bid) {
best_bid = bid;
- best_decompressor = decompressor;
+ best_reader = reader;
}
}
- decompressor ++;
}
/*
- * There were no bidders; this is a serious programmer error
- * and demands a quick and definitive abort.
- */
- if (best_decompressor == NULL)
- __archive_errx(1, "No decompressors were registered; you "
- "must call at least one "
- "archive_read_support_compression_XXX function in order "
- "to successfully read an archive.");
-
- /*
- * There were bidders, but no non-zero bids; this means we can't
- * support this stream.
+ * If we have a winner, it becomes the next stage in the pipeline.
*/
- if (best_bid < 1) {
- archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
- "Unrecognized archive format");
- return;
- }
-
- /* Record the best decompressor for this stream. */
- a->decompressor = best_decompressor;
-}
-
-/*
- * Dummy skip function, for use if the compression layer doesn't provide
- * one: This code just reads data and discards it.
- */
-static off_t
-dummy_skip(struct archive_read * a, off_t request)
-{
- const void * dummy_buffer;
- ssize_t bytes_read;
- off_t bytes_skipped;
-
- for (bytes_skipped = 0; request > 0;) {
- bytes_read = (a->decompressor->read_ahead)(a, &dummy_buffer, 1);
- if (bytes_read < 0)
- return (bytes_read);
- if (bytes_read == 0) {
- /* Premature EOF. */
- archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
- "Truncated input file (need to skip %jd bytes)",
- (intmax_t)request);
+ if (best_reader != NULL) {
+ source = (best_reader->init)(a, best_reader, a->source,
+ block, bytes_read);
+ if (source == NULL)
return (ARCHIVE_FATAL);
- }
- if (bytes_read > request)
- bytes_read = (ssize_t)request;
- (a->decompressor->consume)(a, (size_t)bytes_read);
- request -= bytes_read;
- bytes_skipped += bytes_read;
+ /* Record the best decompressor for this stream. */
+ a->source = source;
+ /* Recurse to get next pipeline stage. */
+ return (build_stream(a));
}
- return (bytes_skipped);
+ /* Save first block of data. */
+ a->client_buff = block;
+ a->client_total = bytes_read;
+ a->client_next = a->client_buff;
+ a->client_avail = a->client_total;
+ return (ARCHIVE_OK);
}
/*
@@ -598,23 +594,24 @@ archive_read_close(struct archive *_a)
/* TODO: Clean up the formatters. */
- /* Clean up the decompressors. */
- n = sizeof(a->decompressors)/sizeof(a->decompressors[0]);
+ /* Clean up the stream pipeline. */
+ if (a->source != NULL) {
+ r1 = (a->source->close)(a->source);
+ if (r1 < r)
+ r = r1;
+ a->source = NULL;
+ }
+
+ /* Release the reader objects. */
+ n = sizeof(a->readers)/sizeof(a->readers[0]);
for (i = 0; i < n; i++) {
- if (a->decompressors[i].finish != NULL) {
- r1 = (a->decompressors[i].finish)(a);
+ if (a->readers[i].free != NULL) {
+ r1 = (a->readers[i].free)(&a->readers[i]);
if (r1 < r)
r = r1;
}
}
- /* Close the client stream. */
- if (a->client_closer != NULL) {
- r1 = ((a->client_closer)(&a->archive, a->client_data));
- if (r1 < r)
- r = r1;
- }
-
return (r);
}
@@ -651,6 +648,7 @@ archive_read_finish(struct archive *_a)
if (a->entry)
archive_entry_free(a->entry);
a->archive.magic = 0;
+ free(a->buffer);
free(a);
#if ARCHIVE_API_VERSION > 1
return (r);
@@ -700,40 +698,350 @@ __archive_read_register_format(struct archive_read *a,
* Used internally by decompression routines to register their bid and
* initialization functions.
*/
-struct decompressor_t *
-__archive_read_register_compression(struct archive_read *a,
- int (*bid)(const void *, size_t),
- int (*init)(struct archive_read *, const void *, size_t))
+struct archive_reader *
+__archive_read_get_reader(struct archive_read *a)
{
int i, number_slots;
__archive_check_magic(&a->archive,
ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
- "__archive_read_register_compression");
+ "__archive_read_get_reader");
- number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]);
+ number_slots = sizeof(a->readers) / sizeof(a->readers[0]);
for (i = 0; i < number_slots; i++) {
- if (a->decompressors[i].bid == bid)
- return (a->decompressors + i);
- if (a->decompressors[i].bid == NULL) {
- a->decompressors[i].bid = bid;
- a->decompressors[i].init = init;
- return (a->decompressors + i);
- }
+ if (a->readers[i].bid == NULL)
+ return (a->readers + i);
}
__archive_errx(1, "Not enough slots for compression registration");
return (NULL); /* Never actually executed. */
}
-/* used internally to simplify read-ahead */
+/*
+ * The next three functions comprise the peek/consume internal I/O
+ * system used by archive format readers. This system allows fairly
+ * flexible read-ahead and allows the I/O code to operate in a
+ * zero-copy manner most of the time.
+ *
+ * In the ideal case, block providers give the I/O code blocks of data
+ * and __archive_read_ahead() just returns pointers directly into
+ * those blocks. Then __archive_read_consume() just bumps those
+ * pointers. Only if your request would span blocks does the I/O
+ * layer use a copy buffer to provide you with a contiguous block of
+ * data. The __archive_read_skip() is an optimization; it scans ahead
+ * very quickly (it usually translates into a seek() operation if
+ * you're reading uncompressed disk files).
+ *
+ * A couple of useful idioms:
+ * * "I just want some data." Ask for 1 byte and pay attention to
+ * the "number of bytes available" from __archive_read_ahead().
+ * You can consume more than you asked for; you just can't consume
+ * more than is available right now. If you consume everything that's
+ * immediately available, the next read_ahead() call will pull
+ * the next block.
+ * * "I want to output a large block of data." As above, ask for 1 byte,
+ * emit all that's available (up to whatever limit you have), then
+ * repeat until you're done.
+ * * "I want to peek ahead by a large amount." Ask for 4k or so, then
+ * double and repeat until you get an error or have enough. Note
+ * that the I/O layer will likely end up expanding its copy buffer
+ * to fit your request, so use this technique cautiously. This
+ * technique is used, for example, by some of the format tasting
+ * code that has uncertain look-ahead needs.
+ *
+ * TODO: Someday, provide a more generic __archive_read_seek() for
+ * those cases where it's useful. This is tricky because there are lots
+ * of cases where seek() is not available (reading gzip data from a
+ * network socket, for instance), so there needs to be a good way to
+ * communicate whether seek() is available and users of that interface
+ * need to use non-seeking strategies whenever seek() is not available.
+ */
+
+/*
+ * Looks ahead in the input stream:
+ * * If 'avail' pointer is provided, that returns number of bytes available
+ * in the current buffer, which may be much larger than requested.
+ * * If end-of-file, *avail gets set to zero.
+ * * If error, *avail gets error code.
+ * * If request can be met, returns pointer to data, returns NULL
+ * if request is not met.
+ *
+ * Note: If you just want "some data", ask for 1 byte and pay attention
+ * to *avail, which will have the actual amount available. If you
+ * know exactly how many bytes you need, just ask for that and treat
+ * a NULL return as an error.
+ *
+ * Important: This does NOT move the file pointer. See
+ * __archive_read_consume() below.
+ */
+
+/*
+ * This is tricky. We need to provide our clients with pointers to
+ * contiguous blocks of memory but we want to avoid copying whenever
+ * possible.
+ *
+ * Mostly, this code returns pointers directly into the block of data
+ * provided by the client_read routine. It can do this unless the
+ * request would split across blocks. In that case, we have to copy
+ * into an internal buffer to combine reads.
+ */
const void *
-__archive_read_ahead(struct archive_read *a, size_t len)
+__archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail)
{
- const void *h;
+ ssize_t bytes_read;
+ size_t tocopy;
- if ((a->decompressor->read_ahead)(a, &h, len) < (ssize_t)len)
+ if (a->fatal) {
+ if (avail)
+ *avail = ARCHIVE_FATAL;
return (NULL);
- return (h);
+ }
+
+ /*
+ * Keep pulling more data until we can satisfy the request.
+ */
+ for (;;) {
+
+ /*
+ * If we can satisfy from the copy buffer, we're done.
+ */
+ if (a->avail >= min) {
+ if (avail != NULL)
+ *avail = a->avail;
+ return (a->next);
+ }
+
+ /*
+ * We can satisfy directly from client buffer if everything
+ * currently in the copy buffer is still in the client buffer.
+ */
+ if (a->client_total >= a->client_avail + a->avail
+ && a->client_avail + a->avail >= min) {
+ /* "Roll back" to client buffer. */
+ a->client_avail += a->avail;
+ a->client_next -= a->avail;
+ /* Copy buffer is now empty. */
+ a->avail = 0;
+ a->next = a->buffer;
+ /* Return data from client buffer. */
+ if (avail != NULL)
+ *avail = a->client_avail;
+ return (a->client_next);
+ }
+
+ /* Move data forward in copy buffer if necessary. */
+ if (a->next > a->buffer &&
+ a->next + min > a->buffer + a->buffer_size) {
+ if (a->avail > 0)
+ memmove(a->buffer, a->next, a->avail);
+ a->next = a->buffer;
+ }
+
+ /* If we've used up the client data, get more. */
+ if (a->client_avail <= 0) {
+ if (a->end_of_file) {
+ if (avail != NULL)
+ *avail = 0;
+ return (NULL);
+ }
+ bytes_read = (a->source->read)(a->source,
+ &a->client_buff);
+ if (bytes_read < 0) { /* Read error. */
+ a->client_total = a->client_avail = 0;
+ a->client_next = a->client_buff = NULL;
+ a->fatal = 1;
+ if (avail != NULL)
+ *avail = ARCHIVE_FATAL;
+ return (NULL);
+ }
+ if (bytes_read == 0) { /* Premature end-of-file. */
+ a->client_total = a->client_avail = 0;
+ a->client_next = a->client_buff = NULL;
+ a->end_of_file = 1;
+ /* Return whatever we do have. */
+ if (avail != NULL)
+ *avail = a->avail;
+ return (NULL);
+ }
+ a->archive.raw_position += bytes_read;
+ a->client_total = bytes_read;
+ a->client_avail = a->client_total;
+ a->client_next = a->client_buff;
+ }
+ else
+ {
+ /*
+ * We can't satisfy the request from the copy
+ * buffer or the existing client data, so we
+ * need to copy more client data over to the
+ * copy buffer.
+ */
+
+ /* Ensure the buffer is big enough. */
+ if (min > a->buffer_size) {
+ size_t s, t;
+ char *p;
+
+ /* Double the buffer; watch for overflow. */
+ s = t = a->buffer_size;
+ while (s < min) {
+ t *= 2;
+ if (t <= s) { /* Integer overflow! */
+ archive_set_error(&a->archive,
+ ENOMEM,
+ "Unable to allocate copy buffer");
+ a->fatal = 1;
+ if (avail != NULL)
+ *avail = ARCHIVE_FATAL;
+ return (NULL);
+ }
+ s = t;
+ }
+ /* Now s >= min, so allocate a new buffer. */
+ p = (char *)malloc(s);
+ if (p == NULL) {
+ archive_set_error(&a->archive, ENOMEM,
+ "Unable to allocate copy buffer");
+ a->fatal = 1;
+ if (avail != NULL)
+ *avail = ARCHIVE_FATAL;
+ return (NULL);
+ }
+ /* Move data into newly-enlarged buffer. */
+ if (a->avail > 0)
+ memmove(p, a->next, a->avail);
+ free(a->buffer);
+ a->next = a->buffer = p;
+ a->buffer_size = s;
+ }
+
+ /* We can add client data to copy buffer. */
+ /* First estimate: copy to fill rest of buffer. */
+ tocopy = (a->buffer + a->buffer_size)
+ - (a->next + a->avail);
+ /* Don't waste time buffering more than we need to. */
+ if (tocopy + a->avail > min)
+ tocopy = min - a->avail;
+ /* Don't copy more than is available. */
+ if (tocopy > a->client_avail)
+ tocopy = a->client_avail;
+
+ memcpy(a->next + a->avail, a->client_next,
+ tocopy);
+ /* Remove this data from client buffer. */
+ a->client_next += tocopy;
+ a->client_avail -= tocopy;
+ /* add it to copy buffer. */
+ a->avail += tocopy;
+ }
+ }
+}
+
+/*
+ * Move the file pointer forward. This should be called after
+ * __archive_read_ahead() returns data to you. Don't try to move
+ * ahead by more than the amount of data available according to
+ * __archive_read_ahead().
+ */
+/*
+ * Mark the appropriate data as used. Note that the request here will
+ * often be much smaller than the size of the previous read_ahead
+ * request.
+ */
+ssize_t
+__archive_read_consume(struct archive_read *a, size_t request)
+{
+ if (a->avail > 0) {
+ /* Read came from copy buffer. */
+ a->next += request;
+ a->avail -= request;
+ } else {
+ /* Read came from client buffer. */
+ a->client_next += request;
+ a->client_avail -= request;
+ }
+ a->archive.file_position += request;
+ return (request);
+}
+
+/*
+ * Move the file pointer ahead by an arbitrary amount. If you're
+ * reading uncompressed data from a disk file, this will actually
+ * translate into a seek() operation. Even in cases where seek()
+ * isn't feasible, this at least pushes the read-and-discard loop
+ * down closer to the data source.
+ */
+int64_t
+__archive_read_skip(struct archive_read *a, int64_t request)
+{
+ off_t bytes_skipped, total_bytes_skipped = 0;
+ size_t min;
+
+ if (a->fatal)
+ return (-1);
+ /*
+ * If there is data in the buffers already, use that first.
+ */
+ if (a->avail > 0) {
+ min = minimum(request, (off_t)a->avail);
+ bytes_skipped = __archive_read_consume(a, min);
+ request -= bytes_skipped;
+ total_bytes_skipped += bytes_skipped;
+ }
+ if (a->client_avail > 0) {
+ min = minimum(request, (off_t)a->client_avail);
+ bytes_skipped = __archive_read_consume(a, min);
+ request -= bytes_skipped;
+ total_bytes_skipped += bytes_skipped;
+ }
+ if (request == 0)
+ return (total_bytes_skipped);
+ /*
+ * If a client_skipper was provided, try that first.
+ */
+#if ARCHIVE_API_VERSION < 2
+ if ((a->source->skip != NULL) && (request < SSIZE_MAX)) {
+#else
+ if (a->source->skip != NULL) {
+#endif
+ bytes_skipped = (a->source->skip)(a->source, request);
+ if (bytes_skipped < 0) { /* error */
+ a->client_total = a->client_avail = 0;
+ a->client_next = a->client_buff = NULL;
+ a->fatal = 1;
+ return (bytes_skipped);
+ }
+ total_bytes_skipped += bytes_skipped;
+ a->archive.file_position += bytes_skipped;
+ request -= bytes_skipped;
+ a->client_next = a->client_buff;
+ a->archive.raw_position += bytes_skipped;
+ a->client_avail = a->client_total = 0;
+ }
+ /*
+ * Note that client_skipper will usually not satisfy the
+ * full request (due to low-level blocking concerns),
+ * so even if client_skipper is provided, we may still
+ * have to use ordinary reads to finish out the request.
+ */
+ while (request > 0) {
+ const void* dummy_buffer;
+ ssize_t bytes_read;
+ dummy_buffer = __archive_read_ahead(a, 1, &bytes_read);
+ if (bytes_read < 0)
+ return (bytes_read);
+ if (bytes_read == 0) {
+ /* We hit EOF before we satisfied the skip request. */
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "Truncated input file (need to skip %jd bytes)",
+ (intmax_t)request);
+ return (ARCHIVE_FATAL);
+ }
+ min = (size_t)(minimum(bytes_read, request));
+ bytes_read = __archive_read_consume(a, min);
+ total_bytes_skipped += bytes_read;
+ request -= bytes_read;
+ }
+ return (total_bytes_skipped);
}
OpenPOWER on IntegriCloud