summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorkientzle <kientzle@FreeBSD.org>2009-03-05 02:37:05 +0000
committerkientzle <kientzle@FreeBSD.org>2009-03-05 02:37:05 +0000
commit512a7ff8a0eee1ed7c654a5cefbd90e9e5c75223 (patch)
tree1857c9327fc913694da2495f264607234706cdd9 /lib
parent699d0e7065958be02871326442891ae3dd0b7b00 (diff)
downloadFreeBSD-src-512a7ff8a0eee1ed7c654a5cefbd90e9e5c75223.zip
FreeBSD-src-512a7ff8a0eee1ed7c654a5cefbd90e9e5c75223.tar.gz
Merge r551,r561 from libarchive.googlecode.com: Update gzip read filter
to fully take advantage of the new peek/consume I/O support. In particular, this now properly handles concatenated gzip streams.
Diffstat (limited to 'lib')
-rw-r--r--lib/libarchive/archive_read_support_compression_gzip.c447
-rw-r--r--lib/libarchive/test/test_compat_gzip.c2
2 files changed, 218 insertions, 231 deletions
diff --git a/lib/libarchive/archive_read_support_compression_gzip.c b/lib/libarchive/archive_read_support_compression_gzip.c
index 1c7473d..91ba270 100644
--- a/lib/libarchive/archive_read_support_compression_gzip.c
+++ b/lib/libarchive/archive_read_support_compression_gzip.c
@@ -51,14 +51,11 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_ZLIB_H
struct private_data {
z_stream stream;
+ char in_stream;
unsigned char *out_block;
size_t out_block_size;
int64_t total_out;
unsigned long crc;
- int header_count;
- char header_done;
- char header_state;
- char header_flags;
char eof; /* True = found end of compressed data. */
};
@@ -72,10 +69,14 @@ static int gzip_filter_close(struct archive_read_filter *);
* them. (In fact, we like detecting them because we can give better
* error messages.) So the bid framework here gets compiled even
* if zlib is unavailable.
+ *
+ * TODO: If zlib is unavailable, gzip_bidder_init() should
+ * use the compress_program framework to try to fire up an external
+ * gunzip program.
*/
-static int gzip_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
+static int gzip_bidder_bid(struct archive_read_filter_bidder *,
+ struct archive_read_filter *);
static int gzip_bidder_init(struct archive_read_filter *);
-static int gzip_bidder_free(struct archive_read_filter_bidder *);
int
archive_read_support_compression_gzip(struct archive *_a)
@@ -89,62 +90,116 @@ archive_read_support_compression_gzip(struct archive *_a)
bidder->data = NULL;
bidder->bid = gzip_bidder_bid;
bidder->init = gzip_bidder_init;
- bidder->free = gzip_bidder_free;
- return (ARCHIVE_OK);
-}
-
-static int
-gzip_bidder_free(struct archive_read_filter_bidder *self){
- (void)self; /* UNUSED */
+ bidder->free = NULL; /* No data, so no cleanup necessary. */
return (ARCHIVE_OK);
}
/*
- * Test whether we can handle this data.
+ * Read and verify the header.
*
- * This logic returns zero if any part of the signature fails. It
- * also tries to Do The Right Thing if a very short buffer prevents us
- * from verifying as much as we would like.
+ * Returns zero if the header couldn't be validated, else returns
+ * number of bytes in header. If pbits is non-NULL, it receives a
+ * count of bits verified, suitable for use by bidder.
*/
static int
-gzip_bidder_bid(struct archive_read_filter_bidder *self,
- struct archive_read_filter *filter)
+peek_at_header(struct archive_read_filter *filter, int *pbits)
{
- const unsigned char *buffer;
- size_t avail;
- int bits_checked;
-
- (void)self; /* UNUSED */
-
- buffer = __archive_read_filter_ahead(filter, 8, &avail);
- if (buffer == NULL)
+ const unsigned char *p;
+ ssize_t avail, len;
+ int bits = 0;
+ int header_flags;
+
+ /* Start by looking at the first ten bytes of the header, which
+ * is all fixed layout. */
+ len = 10;
+ p = __archive_read_filter_ahead(filter, len, &avail);
+ if (p == NULL || avail == 0)
return (0);
-
- bits_checked = 0;
- if (buffer[0] != 037) /* Verify first ID byte. */
+ if (p[0] != 037)
return (0);
- bits_checked += 8;
-
- if (buffer[1] != 0213) /* Verify second ID byte. */
+ bits += 8;
+ if (p[1] != 0213)
return (0);
- bits_checked += 8;
-
- if (buffer[2] != 8) /* Compression must be 'deflate'. */
+ bits += 8;
+ if (p[2] != 8) /* We only support deflation. */
return (0);
- bits_checked += 8;
+ bits += 8;
+ if ((p[3] & 0xE0)!= 0) /* No reserved flags set. */
+ return (0);
+ bits += 3;
+ header_flags = p[3];
+ /* Bytes 4-7 are mod time. */
+ /* Byte 8 is deflate flags. */
+ /* XXXX TODO: return deflate flags back to consume_header for use
+ in initializing the decompressor. */
+ /* Byte 9 is OS. */
+
+ /* Optional extra data: 2 byte length plus variable body. */
+ if (header_flags & 4) {
+ p = __archive_read_filter_ahead(filter, len + 2, &avail);
+ if (p == NULL)
+ return (0);
+ len += ((int)p[len + 1] << 8) | (int)p[len];
+ }
- if ((buffer[3] & 0xE0)!= 0) /* No reserved flags set. */
+ /* Null-terminated optional filename. */
+ if (header_flags & 8) {
+ do {
+ ++len;
+ if (avail < len)
+ p = __archive_read_filter_ahead(filter,
+ len, &avail);
+ if (p == NULL)
+ return (0);
+ } while (p[len - 1] != 0);
+ }
+
+ /* Null-terminated optional comment. */
+ if (header_flags & 16) {
+ do {
+ ++len;
+ if (avail < len)
+ p = __archive_read_filter_ahead(filter,
+ len, &avail);
+ if (p == NULL)
+ return (0);
+ } while (p[len - 1] != 0);
+ }
+
+ /* Optional header CRC */
+ if ((header_flags & 2)) {
+ p = __archive_read_filter_ahead(filter, len + 2, &avail);
+ if (p == NULL)
+ return (0);
+#if 0
+ int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
+ int crc = /* XXX TODO: Compute header CRC. */;
+ if (crc != hcrc)
return (0);
- bits_checked += 3;
+ bits += 16;
+#endif
+ len += 2;
+ }
+
+ if (pbits != NULL)
+ *pbits = bits;
+ return (len);
+}
+
+/*
+ * Bidder just verifies the header and returns the number of verified bits.
+ */
+static int
+gzip_bidder_bid(struct archive_read_filter_bidder *self,
+ struct archive_read_filter *filter)
+{
+ int bits_checked;
- /*
- * TODO: Verify more; in particular, gzip has an optional
- * header CRC, which would give us 16 more verified bits. We
- * may also be able to verify certain constraints on other
- * fields.
- */
+ (void)self; /* UNUSED */
- return (bits_checked);
+ if (peek_at_header(filter, &bits_checked))
+ return (bits_checked);
+ return (0);
}
@@ -185,8 +240,7 @@ gzip_bidder_init(struct archive_read_filter *self)
free(out_block);
free(state);
archive_set_error(&self->archive->archive, ENOMEM,
- "Can't allocate data for %s decompression",
- self->name);
+ "Can't allocate data for gzip decompression");
return (ARCHIVE_FATAL);
}
@@ -197,148 +251,99 @@ gzip_bidder_init(struct archive_read_filter *self)
self->skip = NULL; /* not supported */
self->close = gzip_filter_close;
- state->crc = crc32(0L, NULL, 0);
- state->header_done = 0; /* We've not yet begun to parse header... */
+ state->in_stream = 0; /* We're not actually within a stream yet. */
return (ARCHIVE_OK);
}
static int
-header(struct archive_read_filter *self)
+consume_header(struct archive_read_filter *self)
{
struct private_data *state;
- int ret, b;
+ ssize_t avail;
+ size_t len;
+ int ret;
state = (struct private_data *)self->data;
- /*
- * If still parsing the header, interpret the
- * next byte.
- */
- b = *(state->stream.next_in++);
- state->stream.avail_in--;
-
- /*
- * Simple state machine to parse the GZip header one byte at
- * a time. If you see a way to make this easier to understand,
- * please let me know. ;-)
- */
- switch (state->header_state) {
- case 0: /* First byte of signature. */
- /* We only return EOF for a failure here. */
- if (b != 037)
- return (ARCHIVE_EOF);
- state->header_state = 1;
+ /* If this is a real header, consume it. */
+ len = peek_at_header(self->upstream, NULL);
+ if (len == 0)
+ return (ARCHIVE_EOF);
+ __archive_read_filter_consume(self->upstream, len);
+
+ /* Initialize CRC accumulator. */
+ state->crc = crc32(0L, NULL, 0);
+
+ /* Initialize compression library. */
+ state->stream.next_in = (unsigned char *)(uintptr_t)
+ __archive_read_filter_ahead(self->upstream, 1, &avail);
+ state->stream.avail_in = avail;
+ ret = inflateInit2(&(state->stream),
+ -15 /* Don't check for zlib header */);
+
+ /* Decipher the error code. */
+ switch (ret) {
+ case Z_OK:
+ state->in_stream = 1;
+ return (ARCHIVE_OK);
+ case Z_STREAM_ERROR:
+ archive_set_error(&self->archive->archive,
+ ARCHIVE_ERRNO_MISC,
+ "Internal error initializing compression library: "
+ "invalid setup parameter");
break;
- case 1: /* Second byte of signature. */
- case 2: /* Compression type must be 8 == deflate. */
- if (b != (0xff & "\037\213\010"[(int)state->header_state])) {
- archive_set_error(&self->archive->archive,
- ARCHIVE_ERRNO_MISC,
- "Invalid GZip header (saw %d at offset %d)",
- b, state->header_state);
- return (ARCHIVE_FATAL);
- }
- ++state->header_state;
+ case Z_MEM_ERROR:
+ archive_set_error(&self->archive->archive, ENOMEM,
+ "Internal error initializing compression library: "
+ "out of memory");
break;
- case 3: /* GZip flags. */
- state->header_flags = b;
- state->header_state = 4;
+ case Z_VERSION_ERROR:
+ archive_set_error(&self->archive->archive,
+ ARCHIVE_ERRNO_MISC,
+ "Internal error initializing compression library: "
+ "invalid library version");
break;
- case 4: case 5: case 6: case 7: /* Mod time. */
- case 8: /* Deflate flags. */
- case 9: /* OS. */
- ++state->header_state;
+ default:
+ archive_set_error(&self->archive->archive,
+ ARCHIVE_ERRNO_MISC,
+ "Internal error initializing compression library: "
+ " Zlib error %d", ret);
break;
- case 10: /* Optional Extra: First byte of Length. */
- if ((state->header_flags & 4)) {
- state->header_count = 255 & (int)b;
- state->header_state = 11;
- break;
- }
- /* Fall through if no Optional Extra field. */
- case 11: /* Optional Extra: Second byte of Length. */
- if ((state->header_flags & 4)) {
- state->header_count
- = (0xff00 & ((int)b << 8)) | state->header_count;
- state->header_state = 12;
- break;
- }
- /* Fall through if no Optional Extra field. */
- case 12: /* Optional Extra Field: counted length. */
- if ((state->header_flags & 4)) {
- --state->header_count;
- if (state->header_count == 0) state->header_state = 13;
- else state->header_state = 12;
- break;
- }
- /* Fall through if no Optional Extra field. */
- case 13: /* Optional Original Filename. */
- if ((state->header_flags & 8)) {
- if (b == 0) state->header_state = 14;
- else state->header_state = 13;
- break;
- }
- /* Fall through if no Optional Original Filename. */
- case 14: /* Optional Comment. */
- if ((state->header_flags & 16)) {
- if (b == 0) state->header_state = 15;
- else state->header_state = 14;
- break;
- }
- /* Fall through if no Optional Comment. */
- case 15: /* Optional Header CRC: First byte. */
- if ((state->header_flags & 2)) {
- state->header_state = 16;
- break;
- }
- /* Fall through if no Optional Header CRC. */
- case 16: /* Optional Header CRC: Second byte. */
- if ((state->header_flags & 2)) {
- state->header_state = 17;
- break;
- }
- /* Fall through if no Optional Header CRC. */
- case 17: /* First byte of compressed data. */
- state->header_done = 1; /* done with header */
- state->stream.avail_in++; /* Discard first byte. */
- state->stream.next_in--;
+ }
+ return (ARCHIVE_FATAL);
+}
- /* Initialize compression library. */
- ret = inflateInit2(&(state->stream),
- -15 /* Don't check for zlib header */);
+static int
+consume_trailer(struct archive_read_filter *self)
+{
+ struct private_data *state;
+ const unsigned char *p;
+ ssize_t avail;
- /* Decipher the error code. */
- switch (ret) {
- case Z_OK:
- return (ARCHIVE_OK);
- case Z_STREAM_ERROR:
- archive_set_error(&self->archive->archive,
- ARCHIVE_ERRNO_MISC,
- "Internal error initializing compression library: "
- "invalid setup parameter");
- break;
- case Z_MEM_ERROR:
- archive_set_error(&self->archive->archive, ENOMEM,
- "Internal error initializing compression library: "
- "out of memory");
- break;
- case Z_VERSION_ERROR:
- archive_set_error(&self->archive->archive,
- ARCHIVE_ERRNO_MISC,
- "Internal error initializing compression library: "
- "invalid library version");
- break;
- default:
- archive_set_error(&self->archive->archive,
- ARCHIVE_ERRNO_MISC,
- "Internal error initializing compression library: "
- " Zlib error %d", ret);
- break;
- }
+ state = (struct private_data *)self->data;
+
+ state->in_stream = 0;
+ switch (inflateEnd(&(state->stream))) {
+ case Z_OK:
+ break;
+ default:
+ archive_set_error(&self->archive->archive,
+ ARCHIVE_ERRNO_MISC,
+ "Failed to clean up gzip decompressor");
return (ARCHIVE_FATAL);
}
+ /* GZip trailer is a fixed 8 byte structure. */
+ p = __archive_read_filter_ahead(self->upstream, 8, &avail);
+ if (p == NULL || avail == 0)
+ return (ARCHIVE_FATAL);
+
+ /* XXX TODO: Verify the length and CRC. */
+
+ /* We've verified the trailer, so consume it now. */
+ __archive_read_filter_consume(self->upstream, 8);
+
return (ARCHIVE_OK);
}
@@ -346,12 +351,11 @@ static ssize_t
gzip_filter_read(struct archive_read_filter *self, const void **p)
{
struct private_data *state;
- size_t read_avail, decompressed;
- const void *read_buf;
+ size_t decompressed;
+ ssize_t avail_in;
int ret;
state = (struct private_data *)self->data;
- read_avail = 0;
/* Empty our output buffer. */
state->stream.next_out = state->out_block;
@@ -359,62 +363,47 @@ gzip_filter_read(struct archive_read_filter *self, const void **p)
/* Try to fill the output buffer. */
while (state->stream.avail_out > 0 && !state->eof) {
- /* If the last upstream block is done, get another one. */
- if (state->stream.avail_in == 0) {
- read_buf = __archive_read_filter_ahead(self->upstream,
- 1, &ret);
- if (read_buf == NULL)
- return (ARCHIVE_FATAL);
- /* stream.next_in is really const, but zlib
- * doesn't declare it so. <sigh> */
- state->stream.next_in
- = (unsigned char *)(uintptr_t)read_buf;
- state->stream.avail_in = ret;
- /* There is no more data, return whatever we have. */
- if (ret == 0) {
+ /* If we're not in a stream, read a header
+ * and initialize the decompression library. */
+ if (!state->in_stream) {
+ ret = consume_header(self);
+ if (ret == ARCHIVE_EOF) {
state->eof = 1;
break;
}
- __archive_read_filter_consume(self->upstream, ret);
- }
-
- /* If we're still parsing header bytes, walk through those. */
- if (!state->header_done) {
- ret = header(self);
if (ret < ARCHIVE_OK)
return (ret);
- if (ret == ARCHIVE_EOF)
- state->eof = 1;
- } else {
- /* Decompress as much as we can in one pass. */
- /* XXX Skip trailer XXX */
- ret = inflate(&(state->stream), 0);
- switch (ret) {
- case Z_STREAM_END: /* Found end of stream. */
- switch (inflateEnd(&(state->stream))) {
- case Z_OK:
- break;
- default:
- archive_set_error(&self->archive->archive,
- ARCHIVE_ERRNO_MISC,
- "Failed to clean up gzip decompressor");
- return (ARCHIVE_FATAL);
- }
- /* zlib has been torn down */
- state->header_done = 0;
- state->eof = 1;
- /* FALL THROUGH */
- case Z_OK: /* Decompressor made some progress. */
- /* If we filled our buffer, update stats and return. */
- break;
- default:
- /* Return an error. */
- archive_set_error(&self->archive->archive,
- ARCHIVE_ERRNO_MISC,
- "%s decompression failed",
- self->archive->archive.compression_name);
- return (ARCHIVE_FATAL);
- }
+ }
+
+ /* Peek at the next available data. */
+ /* ZLib treats stream.next_in as const but doesn't declare
+ * it so, hence this ugly cast. */
+ state->stream.next_in = (unsigned char *)(uintptr_t)
+ __archive_read_filter_ahead(self->upstream, 1, &avail_in);
+ if (state->stream.next_in == NULL)
+ return (ARCHIVE_FATAL);
+ state->stream.avail_in = avail_in;
+
+ /* Decompress and consume some of that data. */
+ ret = inflate(&(state->stream), 0);
+ switch (ret) {
+ case Z_OK: /* Decompressor made some progress. */
+ __archive_read_filter_consume(self->upstream,
+ avail_in - state->stream.avail_in);
+ break;
+ case Z_STREAM_END: /* Found end of stream. */
+ __archive_read_filter_consume(self->upstream,
+ avail_in - state->stream.avail_in);
+ /* Consume the stream trailer; release the
+ * decompression library. */
+ ret = consume_trailer(self);
+ break;
+ default:
+ /* Return an error. */
+ archive_set_error(&self->archive->archive,
+ ARCHIVE_ERRNO_MISC,
+ "gzip decompression failed");
+ return (ARCHIVE_FATAL);
}
}
@@ -426,7 +415,6 @@ gzip_filter_read(struct archive_read_filter *self, const void **p)
else
*p = state->out_block;
return (decompressed);
-
}
/*
@@ -441,15 +429,14 @@ gzip_filter_close(struct archive_read_filter *self)
state = (struct private_data *)self->data;
ret = ARCHIVE_OK;
- if (state->header_done) {
+ if (state->in_stream) {
switch (inflateEnd(&(state->stream))) {
case Z_OK:
break;
default:
archive_set_error(&(self->archive->archive),
- ARCHIVE_ERRNO_MISC,
- "Failed to clean up %s compressor",
- self->archive->archive.compression_name);
+ ARCHIVE_ERRNO_MISC,
+ "Failed to clean up gzip compressor");
ret = ARCHIVE_FATAL;
}
}
diff --git a/lib/libarchive/test/test_compat_gzip.c b/lib/libarchive/test/test_compat_gzip.c
index 0140a44..b1d9be3 100644
--- a/lib/libarchive/test/test_compat_gzip.c
+++ b/lib/libarchive/test/test_compat_gzip.c
@@ -86,7 +86,7 @@ DEFINE_TEST(test_compat_gzip)
/* This sample has been 'split', each piece compressed separately,
* then concatenated. Gunzip will emit the concatenated result. */
/* Not supported in libarchive 2.6 and earlier */
- /* verify("test_compat_gzip_1.tgz"); */
+ verify("test_compat_gzip_1.tgz");
/* This sample has been compressed as a single stream, but then
* some unrelated garbage text has been appended to the end. */
verify("test_compat_gzip_2.tgz");
OpenPOWER on IntegriCloud