From 709ff019d467067d0736588aa9e855a3db12abaa Mon Sep 17 00:00:00 2001 From: kientzle Date: Sun, 7 Nov 2010 03:40:37 +0000 Subject: If the Zip reader doesn't see a PK signature block because there's inter-entry garbage, just scan forward to find the next one. This allows us to handle a lot of Zip archives that have been modified in-place. Thanks to: Gleb Kurtsou for sending me a sample archive --- lib/libarchive/archive_read_support_format_zip.c | 53 ++++++++++++++++++++++-- lib/libarchive/test/test_compat_zip.c | 33 +++++++++++++++ lib/libarchive/test/test_compat_zip_2.zip.uu | 10 +++++ 3 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 lib/libarchive/test/test_compat_zip_2.zip.uu (limited to 'lib/libarchive') diff --git a/lib/libarchive/archive_read_support_format_zip.c b/lib/libarchive/archive_read_support_format_zip.c index 4a24cc8..0fa1fa3 100644 --- a/lib/libarchive/archive_read_support_format_zip.c +++ b/lib/libarchive/archive_read_support_format_zip.c @@ -128,6 +128,7 @@ static int archive_read_format_zip_read_data(struct archive_read *, static int archive_read_format_zip_read_data_skip(struct archive_read *a); static int archive_read_format_zip_read_header(struct archive_read *, struct archive_entry *); +static int search_next_signature(struct archive_read *); static int zip_read_data_deflate(struct archive_read *a, const void **buff, size_t *size, off_t *offset); static int zip_read_data_none(struct archive_read *a, const void **buff, @@ -317,10 +318,17 @@ archive_read_format_zip_read_header(struct archive_read *a, signature = (const char *)h; } + /* If we don't see a PK signature here, scan forward. */ if (signature[0] != 'P' || signature[1] != 'K') { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Bad ZIP file"); - return (ARCHIVE_FATAL); + r = search_next_signature(a); + if (r != ARCHIVE_OK) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Bad ZIP file"); + return (ARCHIVE_FATAL); + } + if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) + return (ARCHIVE_FATAL); + signature = (const char *)h; } /* @@ -375,6 +383,42 @@ archive_read_format_zip_read_header(struct archive_read *a, } static int +search_next_signature(struct archive_read *a) +{ + const void *h; + const char *p, *q; + size_t skip; + ssize_t bytes; + int64_t skipped = 0; + + for (;;) { + h = __archive_read_ahead(a, 4, &bytes); + if (h == NULL) + return (ARCHIVE_FATAL); + p = h; + q = p + bytes; + + while (p + 4 <= q) { + if (p[0] == 'P' && p[1] == 'K') { + if ((p[2] == '\001' && p[3] == '\002') + || (p[2] == '\003' && p[3] == '\004') + || (p[2] == '\005' && p[3] == '\006') + || (p[2] == '\007' && p[3] == '\010') + || (p[2] == '0' && p[3] == '0')) { + skip = p - (const char *)h; + __archive_read_consume(a, skip); + return (ARCHIVE_OK); + } + } + ++p; + } + skip = p - (const char *)h; + __archive_read_consume(a, skip); + skipped += skip; + } +} + +static int zip_read_file_header(struct archive_read *a, struct archive_entry *entry, struct zip *zip) { @@ -888,6 +932,9 @@ process_extra(const void* extra, struct zip* zip) if (datasize >= 4) zip->gid = archive_le16dec(p + offset + 2); break; + case 0x7875: + /* Info-Zip Unix Extra Field (type 3) "ux". */ + break; default: break; } diff --git a/lib/libarchive/test/test_compat_zip.c b/lib/libarchive/test/test_compat_zip.c index fefc6fe..adb377c 100644 --- a/lib/libarchive/test/test_compat_zip.c +++ b/lib/libarchive/test/test_compat_zip.c @@ -71,10 +71,43 @@ finish: #endif } +/* + * Verify that we skip junk between entries. The compat_zip_2.zip file + * has several bytes of junk between 'file1' and 'file2'. Such + * junk is routinely introduced by some Zip writers when they manipulate + * existing zip archives. + */ +static void +test_compat_zip_2(void) +{ + char name[] = "test_compat_zip_2.zip"; + struct archive_entry *ae; + struct archive *a; + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_compression_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + extract_reference_file(name); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 10240)); + + /* Read first entry. */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("file1", archive_entry_pathname(ae)); + + /* Read first entry. */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("file2", archive_entry_pathname(ae)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualInt(ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +} + DEFINE_TEST(test_compat_zip) { test_compat_zip_1(); + test_compat_zip_2(); } diff --git a/lib/libarchive/test/test_compat_zip_2.zip.uu b/lib/libarchive/test/test_compat_zip_2.zip.uu new file mode 100644 index 0000000..c33e9d9 --- /dev/null +++ b/lib/libarchive/test/test_compat_zip_2.zip.uu @@ -0,0 +1,10 @@ +$FreeBSD$ + +begin 644 test_compat_zip_2.zip +M4$L#!`H``````'V59CT````````````````%````9FEL93$M2E5.2RU02P,$ +M"@``````@95F/<>D!,D&````!@````4```!F:6QE,F9I;&4R"E!+`0(>`PH` +M`````'V59CT````````````````%``````````````"D@0````!F:6QE,5!+ +M`0(>`PH``````(&59CW'I`3)!@````8````%``````````````"D@2D```!F +::6QE,E!+!08``````@`"`&8```!2```````` +` +end -- cgit v1.1