diff options
Diffstat (limited to 'libarchive/archive_read_support_format_zip.c')
-rw-r--r-- | libarchive/archive_read_support_format_zip.c | 1240 |
1 files changed, 785 insertions, 455 deletions
diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index 572cc58..f805855 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2004 Tim Kientzle + * Copyright (c) 2011 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,17 +30,16 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 #ifdef HAVE_ERRNO_H #include <errno.h> #endif -#include <stdio.h> #ifdef HAVE_STDLIB_H #include <stdlib.h> #endif -#include <time.h> #ifdef HAVE_ZLIB_H #include <zlib.h> #endif #include "archive.h" #include "archive_entry.h" +#include "archive_entry_locale.h" #include "archive_private.h" #include "archive_read_private.h" #include "archive_endian.h" @@ -48,10 +48,39 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 #include "archive_crc32.h" #endif +struct zip_entry { + int64_t local_header_offset; + int64_t compressed_size; + int64_t uncompressed_size; + int64_t gid; + int64_t uid; + struct archive_entry *entry; + time_t mtime; + time_t atime; + time_t ctime; + uint32_t crc32; + uint16_t mode; + uint16_t flags; + char compression; + char system; +}; + struct zip { + /* Structural information about the archive. */ + int64_t central_directory_offset; + size_t central_directory_size; + size_t central_directory_entries; + char have_central_directory; + + /* List of entries (seekable Zip only) */ + size_t entries_remaining; + struct zip_entry *zip_entries; + struct zip_entry *entry; + + size_t unconsumed; + /* entry_bytes_remaining is the number of bytes we expect. */ int64_t entry_bytes_remaining; - int64_t entry_offset; /* These count the number of bytes actually read for the entry. */ int64_t entry_compressed_bytes_read; @@ -60,27 +89,12 @@ struct zip { /* Running CRC32 of the decompressed data */ unsigned long entry_crc32; - unsigned version; - unsigned system; - unsigned flags; - unsigned compression; - const char * compression_name; - time_t mtime; - time_t ctime; - time_t atime; - mode_t mode; - uid_t uid; - gid_t gid; - /* Flags to mark progress of decompression. */ char decompress_init; char end_of_entry; - unsigned long crc32; ssize_t filename_length; ssize_t extra_length; - int64_t uncompressed_size; - int64_t compressed_size; unsigned char *uncompressed_buffer; size_t uncompressed_buffer_size; @@ -89,65 +103,90 @@ struct zip { char stream_valid; #endif - struct archive_string pathname; struct archive_string extra; + struct archive_string_conv *sconv; + struct archive_string_conv *sconv_default; + struct archive_string_conv *sconv_utf8; + int init_default_conversion; char format_name[64]; }; #define ZIP_LENGTH_AT_END 8 - -struct zip_file_header { - char signature[4]; - char version[2]; - char flags[2]; - char compression[2]; - char timedate[4]; - char crc32[4]; - char compressed_size[4]; - char uncompressed_size[4]; - char filename_length[2]; - char extra_length[2]; -}; - -static const char *compression_names[] = { - "uncompressed", - "shrinking", - "reduced-1", - "reduced-2", - "reduced-3", - "reduced-4", - "imploded", - "reserved", - "deflation" -}; - -static int archive_read_format_zip_bid(struct archive_read *); +#define ZIP_ENCRYPTED (1<<0) +#define ZIP_STRONG_ENCRYPTED (1<<6) +#define ZIP_UTF8_NAME (1<<11) + +static int archive_read_format_zip_streamable_bid(struct archive_read *, int); +static int archive_read_format_zip_seekable_bid(struct archive_read *, int); +static int archive_read_format_zip_options(struct archive_read *, + const char *, const char *); static int archive_read_format_zip_cleanup(struct archive_read *); static int archive_read_format_zip_read_data(struct archive_read *, - const void **, size_t *, off_t *); + const void **, size_t *, int64_t *); static int archive_read_format_zip_read_data_skip(struct archive_read *a); -static int archive_read_format_zip_read_header(struct archive_read *, +static int archive_read_format_zip_seekable_read_header(struct archive_read *, + struct archive_entry *); +static int archive_read_format_zip_streamable_read_header(struct archive_read *, struct archive_entry *); -static int search_next_signature(struct archive_read *); +#ifdef HAVE_ZLIB_H static int zip_read_data_deflate(struct archive_read *a, const void **buff, - size_t *size, off_t *offset); + size_t *size, int64_t *offset); +#endif static int zip_read_data_none(struct archive_read *a, const void **buff, - size_t *size, off_t *offset); -static int zip_read_file_header(struct archive_read *a, - struct archive_entry *entry, struct zip *zip); + size_t *size, int64_t *offset); +static int zip_read_local_file_header(struct archive_read *a, + struct archive_entry *entry, struct zip *); static time_t zip_time(const char *); -static void process_extra(const void* extra, struct zip* zip); +static const char *compression_name(int compression); +static void process_extra(const char *, size_t, struct zip_entry *); + +int +archive_read_support_format_zip_streamable(struct archive *_a) +{ + struct archive_read *a = (struct archive_read *)_a; + struct zip *zip; + int r; + + archive_check_magic(_a, ARCHIVE_READ_MAGIC, + ARCHIVE_STATE_NEW, "archive_read_support_format_zip"); + + zip = (struct zip *)malloc(sizeof(*zip)); + if (zip == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate zip data"); + return (ARCHIVE_FATAL); + } + memset(zip, 0, sizeof(*zip)); + + r = __archive_read_register_format(a, + zip, + "zip", + archive_read_format_zip_streamable_bid, + archive_read_format_zip_options, + archive_read_format_zip_streamable_read_header, + archive_read_format_zip_read_data, + archive_read_format_zip_read_data_skip, + archive_read_format_zip_cleanup); + + if (r != ARCHIVE_OK) + free(zip); + return (ARCHIVE_OK); +} int -archive_read_support_format_zip(struct archive *_a) +archive_read_support_format_zip_seekable(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct zip *zip; int r; + archive_check_magic(_a, ARCHIVE_READ_MAGIC, + ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable"); + zip = (struct zip *)malloc(sizeof(*zip)); if (zip == NULL) { - archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data"); + archive_set_error(&a->archive, ENOMEM, + "Can't allocate zip data"); return (ARCHIVE_FATAL); } memset(zip, 0, sizeof(*zip)); @@ -155,9 +194,9 @@ archive_read_support_format_zip(struct archive *_a) r = __archive_read_register_format(a, zip, "zip", - archive_read_format_zip_bid, - NULL, - archive_read_format_zip_read_header, + archive_read_format_zip_seekable_bid, + archive_read_format_zip_options, + archive_read_format_zip_seekable_read_header, archive_read_format_zip_read_data, archive_read_format_zip_read_data_skip, archive_read_format_zip_cleanup); @@ -167,13 +206,201 @@ archive_read_support_format_zip(struct archive *_a) return (ARCHIVE_OK); } +int +archive_read_support_format_zip(struct archive *a) +{ + int r; + r = archive_read_support_format_zip_streamable(a); + if (r != ARCHIVE_OK) + return r; + return (archive_read_support_format_zip_seekable(a)); +} +/* + * TODO: This is a performance sink because it forces the read core to + * drop buffered data from the start of file, which will then have to + * be re-read again if this bidder loses. + * + * We workaround this a little by passing in the best bid so far so + * that later bidders can do nothing if they know they'll never + * outbid. But we can certainly do better... + */ static int -archive_read_format_zip_bid(struct archive_read *a) +archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) { + struct zip *zip = (struct zip *)a->format->data; + int64_t filesize; const char *p; - const void *buff; - ssize_t bytes_avail, offset; + + /* If someone has already bid more than 32, then avoid + trashing the look-ahead buffers with a seek. */ + if (best_bid > 32) + return (-1); + + filesize = __archive_read_seek(a, -22, SEEK_END); + /* If we can't seek, then we can't bid. */ + if (filesize <= 0) + return 0; + + /* TODO: More robust search for end of central directory record. */ + if ((p = __archive_read_ahead(a, 22, NULL)) == NULL) + return 0; + /* First four bytes are signature for end of central directory + record. Four zero bytes ensure this isn't a multi-volume + Zip file (which we don't yet support). */ + if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) + return 0; + + /* Since we've already done the hard work of finding the + end of central directory record, let's save the important + information. */ + zip->central_directory_entries = archive_le16dec(p + 10); + zip->central_directory_size = archive_le32dec(p + 12); + zip->central_directory_offset = archive_le32dec(p + 16); + + /* Just one volume, so central dir must all be on this volume. */ + if (zip->central_directory_entries != archive_le16dec(p + 8)) + return 0; + /* Central directory can't extend beyond end of this file. */ + if (zip->central_directory_offset + zip->central_directory_size > filesize) + return 0; + + /* This is just a tiny bit higher than the maximum returned by + the streaming Zip bidder. This ensures that the more accurate + seeking Zip parser wins whenever seek is available. */ + return 32; +} + +static int +slurp_central_directory(struct archive_read *a, struct zip *zip) +{ + unsigned i; + + __archive_read_seek(a, zip->central_directory_offset, SEEK_SET); + + zip->zip_entries = calloc(zip->central_directory_entries, sizeof(struct zip_entry)); + for (i = 0; i < zip->central_directory_entries; ++i) { + struct zip_entry *zip_entry = &zip->zip_entries[i]; + size_t filename_length, extra_length, comment_length; + uint32_t external_attributes; + const char *p; + + if ((p = __archive_read_ahead(a, 46, NULL)) == NULL) + return ARCHIVE_FATAL; + if (memcmp(p, "PK\001\002", 4) != 0) { + archive_set_error(&a->archive, + -1, "Invalid central directory signature"); + return ARCHIVE_FATAL; + } + zip->have_central_directory = 1; + /* version = p[4]; */ + zip_entry->system = p[5]; + /* version_required = archive_le16dec(p + 6); */ + zip_entry->flags = archive_le16dec(p + 8); + zip_entry->compression = archive_le16dec(p + 10); + zip_entry->mtime = zip_time(p + 12); + zip_entry->crc32 = archive_le32dec(p + 16); + zip_entry->compressed_size = archive_le32dec(p + 20); + zip_entry->uncompressed_size = archive_le32dec(p + 24); + filename_length = archive_le16dec(p + 28); + extra_length = archive_le16dec(p + 30); + comment_length = archive_le16dec(p + 32); + /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */ + /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */ + external_attributes = archive_le32dec(p + 38); + zip_entry->local_header_offset = archive_le32dec(p + 42); + + /* If we can't guess the mode, leave it zero here; + when we read the local file header we might get + more information. */ + zip_entry->mode = 0; + if (zip_entry->system == 3) { + zip_entry->mode = external_attributes >> 16; + } + + /* We don't read the filename until we get to the + local file header. Reading it here would speed up + table-of-contents operations (removing the need to + find and read local file header to get the + filename) at the cost of requiring a lot of extra + space. */ + /* We don't read the extra block here. We assume it + will be duplicated at the local file header. */ + __archive_read_consume(a, + 46 + filename_length + extra_length + comment_length); + } + + /* TODO: Sort zip entries by file offset so that we + can optimize get_next_header() to use skip instead of + seek. */ + + return ARCHIVE_OK; +} + +static int +archive_read_format_zip_seekable_read_header(struct archive_read *a, + struct archive_entry *entry) +{ + struct zip *zip = (struct zip *)a->format->data; + int r; + + a->archive.archive_format = ARCHIVE_FORMAT_ZIP; + if (a->archive.archive_format_name == NULL) + a->archive.archive_format_name = "ZIP"; + + if (zip->zip_entries == NULL) { + r = slurp_central_directory(a, zip); + zip->entries_remaining = zip->central_directory_entries; + if (r != ARCHIVE_OK) + return r; + zip->entry = zip->zip_entries; + } else { + ++zip->entry; + } + + if (zip->entries_remaining <= 0) + return ARCHIVE_EOF; + --zip->entries_remaining; + + /* TODO: If entries are sorted by offset within the file, we + should be able to skip here instead of seeking. Skipping is + typically faster (easier for I/O layer to optimize). */ + __archive_read_seek(a, zip->entry->local_header_offset, SEEK_SET); + zip->unconsumed = 0; + r = zip_read_local_file_header(a, entry, zip); + if (r != ARCHIVE_OK) + return r; + if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) { + const void *p; + size_t linkname_length = archive_entry_size(entry); + + archive_entry_set_size(entry, 0); + p = __archive_read_ahead(a, linkname_length, NULL); + if (p == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Truncated Zip file"); + return ARCHIVE_FATAL; + } + + if (archive_entry_copy_symlink_l(entry, p, linkname_length, + NULL) != 0) { + /* NOTE: If the last argument is NULL, this will + * fail only by memeory allocation failure. */ + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Symlink"); + return (ARCHIVE_FATAL); + } + /* TODO: handle character-set issues? */ + } + return ARCHIVE_OK; +} + +static int +archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid) +{ + const char *p; + + (void)best_bid; /* UNUSED */ if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) return (-1); @@ -192,321 +419,308 @@ archive_read_format_zip_bid(struct archive_read *a) return (30); } - /* - * Attempt to handle self-extracting archives - * by noting a PE header and searching forward - * up to 128k for a 'PK\003\004' marker. - */ - if (p[0] == 'M' && p[1] == 'Z') { - /* - * TODO: Optimize by initializing 'offset' to an - * estimate of the likely start of the archive data - * based on values in the PE header. Note that we - * don't need to be exact, but we mustn't skip too - * far. The search below will compensate if we - * undershoot. - */ - offset = 0; - while (offset < 124000) { - /* Get 4k of data beyond where we stopped. */ - buff = __archive_read_ahead(a, offset + 4096, - &bytes_avail); - if (buff == NULL) - break; - p = (const char *)buff + offset; - while (p + 9 < (const char *)buff + bytes_avail) { - if (p[0] == 'P' && p[1] == 'K' /* signature */ - && p[2] == 3 && p[3] == 4 /* File entry */ - && p[8] == 8 /* compression == deflate */ - && p[9] == 0 /* High byte of compression */ - ) - { - return (30); - } - ++p; - } - offset = p - (const char *)buff; - } - } + /* TODO: It's worth looking ahead a little bit for a valid + * PK signature. In particular, that would make it possible + * to read some UUEncoded SFX files or SFX files coming from + * a network socket. */ return (0); } -/* - * Search forward for a "PK\003\004" file header. This handles the - * case of self-extracting archives, where there is an executable - * prepended to the ZIP archive. - */ static int -skip_sfx(struct archive_read *a) +archive_read_format_zip_options(struct archive_read *a, + const char *key, const char *val) { - const void *h; - const char *p, *q; - size_t skip; - ssize_t bytes; + struct zip *zip; + int ret = ARCHIVE_FAILED; - /* - * TODO: We should be able to skip forward by a bunch - * by lifting some values from the PE header. We don't - * need to be exact (we're still going to search forward - * to find the header), but it will speed things up and - * reduce the chance of a false positive. - */ - for (;;) { - h = __archive_read_ahead(a, 4, &bytes); - if (bytes < 4) - return (ARCHIVE_FATAL); - p = h; - q = p + bytes; - - /* - * Scan ahead until we find something that looks - * like the zip header. - */ - while (p + 4 < q) { - switch (p[3]) { - case '\004': - /* TODO: Additional verification here. */ - if (memcmp("PK\003\004", p, 4) == 0) { - skip = p - (const char *)h; - __archive_read_consume(a, skip); - return (ARCHIVE_OK); - } - p += 4; - break; - case '\003': p += 1; break; - case 'K': p += 2; break; - case 'P': p += 3; break; - default: p += 4; break; - } + zip = (struct zip *)(a->format->data); + if (strcmp(key, "compat-2x") == 0) { + /* Handle filnames as libarchive 2.x */ + zip->init_default_conversion = (val != NULL) ? 1 : 0; + ret = ARCHIVE_OK; + } else if (strcmp(key, "hdrcharset") == 0) { + if (val == NULL || val[0] == 0) + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "zip: hdrcharset option needs a character-set name"); + else { + zip->sconv = archive_string_conversion_from_charset( + &a->archive, val, 0); + if (zip->sconv != NULL) { + if (strcmp(val, "UTF-8") == 0) + zip->sconv_utf8 = zip->sconv; + ret = ARCHIVE_OK; + } else + ret = ARCHIVE_FATAL; } - skip = p - (const char *)h; - __archive_read_consume(a, skip); - } + } else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "zip: unknown keyword ``%s''", key); + + return (ret); } static int -archive_read_format_zip_read_header(struct archive_read *a, +archive_read_format_zip_streamable_read_header(struct archive_read *a, struct archive_entry *entry) { - const void *h; - const char *signature; struct zip *zip; - int r = ARCHIVE_OK, r1; a->archive.archive_format = ARCHIVE_FORMAT_ZIP; if (a->archive.archive_format_name == NULL) a->archive.archive_format_name = "ZIP"; zip = (struct zip *)(a->format->data); - zip->decompress_init = 0; - zip->end_of_entry = 0; - zip->entry_uncompressed_bytes_read = 0; - zip->entry_compressed_bytes_read = 0; - zip->entry_crc32 = crc32(0, NULL, 0); - if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) - return (ARCHIVE_FATAL); - - signature = (const char *)h; - if (signature[0] == 'M' && signature[1] == 'Z') { - /* This is an executable? Must be self-extracting... */ - r = skip_sfx(a); - if (r < ARCHIVE_WARN) - return (r); - if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) - return (ARCHIVE_FATAL); - signature = (const char *)h; - } - /* If we don't see a PK signature here, scan forward. */ - if (signature[0] != 'P' || signature[1] != 'K') { - r = search_next_signature(a); - if (r != ARCHIVE_OK) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Bad ZIP file"); - return (ARCHIVE_FATAL); - } - if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) - return (ARCHIVE_FATAL); - signature = (const char *)h; - } - - /* - * "PK00" signature is used for "split" archives that - * only have a single segment. This means we can just - * skip the PK00; the first real file header should follow. - */ - if (signature[2] == '0' && signature[3] == '0') { - __archive_read_consume(a, 4); - if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) - return (ARCHIVE_FATAL); - signature = (const char *)h; - if (signature[0] != 'P' || signature[1] != 'K') { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Bad ZIP file"); - return (ARCHIVE_FATAL); + /* Make sure we have a zip_entry structure to use. */ + if (zip->zip_entries == NULL) { + zip->zip_entries = malloc(sizeof(struct zip_entry)); + if (zip->zip_entries == NULL) { + archive_set_error(&a->archive, ENOMEM, "Out of memory"); + return ARCHIVE_FATAL; } } + zip->entry = zip->zip_entries; + memset(zip->entry, 0, sizeof(struct zip_entry)); - if (signature[2] == '\001' && signature[3] == '\002') { - /* Beginning of central directory. */ - return (ARCHIVE_EOF); - } - - if (signature[2] == '\003' && signature[3] == '\004') { - /* Regular file entry. */ - r1 = zip_read_file_header(a, entry, zip); - if (r1 != ARCHIVE_OK) - return (r1); - return (r); - } - - if (signature[2] == '\005' && signature[3] == '\006') { - /* End-of-archive record. */ - return (ARCHIVE_EOF); - } - - if (signature[2] == '\007' && signature[3] == '\010') { - /* - * We should never encounter this record here; - * see ZIP_LENGTH_AT_END handling below for details. - */ - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Bad ZIP file: Unexpected end-of-entry record"); - return (ARCHIVE_FATAL); - } - - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Damaged ZIP file or unsupported format variant (%d,%d)", - signature[2], signature[3]); - return (ARCHIVE_FATAL); -} - -static int -search_next_signature(struct archive_read *a) -{ - const void *h; - const char *p, *q; - size_t skip; - ssize_t bytes; - int64_t skipped = 0; - + /* Search ahead for the next local file header. */ + __archive_read_consume(a, zip->unconsumed); + zip->unconsumed = 0; for (;;) { - h = __archive_read_ahead(a, 4, &bytes); - if (h == NULL) + int64_t skipped = 0; + const char *p, *end; + ssize_t bytes; + + p = __archive_read_ahead(a, 4, &bytes); + if (p == NULL) return (ARCHIVE_FATAL); - p = h; - q = p + bytes; + end = p + bytes; - while (p + 4 <= q) { + while (p + 4 <= end) { if (p[0] == 'P' && p[1] == 'K') { - if ((p[2] == '\001' && p[3] == '\002') - || (p[2] == '\003' && p[3] == '\004') - || (p[2] == '\005' && p[3] == '\006') - || (p[2] == '\007' && p[3] == '\010') - || (p[2] == '0' && p[3] == '0')) { - skip = p - (const char *)h; - __archive_read_consume(a, skip); - return (ARCHIVE_OK); + if (p[2] == '\001' && p[3] == '\002') + /* Beginning of central directory. */ + return (ARCHIVE_EOF); + + if (p[2] == '\003' && p[3] == '\004') { + /* Regular file entry. */ + __archive_read_consume(a, skipped); + return zip_read_local_file_header(a, entry, zip); } + + if (p[2] == '\005' && p[3] == '\006') + /* End of central directory. */ + return (ARCHIVE_EOF); } ++p; + ++skipped; } - skip = p - (const char *)h; - __archive_read_consume(a, skip); - skipped += skip; + __archive_read_consume(a, skipped); } } +/* + * Assumes file pointer is at beginning of local file header. + */ static int -zip_read_file_header(struct archive_read *a, struct archive_entry *entry, +zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, struct zip *zip) { - const struct zip_file_header *p; + const char *p; const void *h; + const wchar_t *wp; + const char *cp; + size_t len, filename_length, extra_length; + struct archive_string_conv *sconv; + struct zip_entry *zip_entry = zip->entry; + uint32_t local_crc32; + int64_t compressed_size, uncompressed_size; + int ret = ARCHIVE_OK; + char version; + + zip->decompress_init = 0; + zip->end_of_entry = 0; + zip->entry_uncompressed_bytes_read = 0; + zip->entry_compressed_bytes_read = 0; + zip->entry_crc32 = crc32(0, NULL, 0); + + /* Setup default conversion. */ + if (zip->sconv == NULL && !zip->init_default_conversion) { + zip->sconv_default = + archive_string_default_conversion_for_read(&(a->archive)); + zip->init_default_conversion = 1; + } - if ((p = __archive_read_ahead(a, sizeof *p, NULL)) == NULL) { + if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } - zip->version = p->version[0]; - zip->system = p->version[1]; - zip->flags = archive_le16dec(p->flags); - zip->compression = archive_le16dec(p->compression); - if (zip->compression < - sizeof(compression_names)/sizeof(compression_names[0])) - zip->compression_name = compression_names[zip->compression]; - else - zip->compression_name = "??"; - zip->mtime = zip_time(p->timedate); - zip->ctime = 0; - zip->atime = 0; - zip->mode = 0; - zip->uid = 0; - zip->gid = 0; - zip->crc32 = archive_le32dec(p->crc32); - zip->filename_length = archive_le16dec(p->filename_length); - zip->extra_length = archive_le16dec(p->extra_length); - zip->uncompressed_size = archive_le32dec(p->uncompressed_size); - zip->compressed_size = archive_le32dec(p->compressed_size); - - __archive_read_consume(a, sizeof(struct zip_file_header)); - + if (memcmp(p, "PK\003\004", 4) != 0) { + archive_set_error(&a->archive, -1, "Damaged Zip archive"); + return ARCHIVE_FATAL; + } + version = p[4]; + zip_entry->system = p[5]; + zip_entry->flags = archive_le16dec(p + 6); + zip_entry->compression = archive_le16dec(p + 8); + zip_entry->mtime = zip_time(p + 10); + local_crc32 = archive_le32dec(p + 14); + compressed_size = archive_le32dec(p + 18); + uncompressed_size = archive_le32dec(p + 22); + filename_length = archive_le16dec(p + 26); + extra_length = archive_le16dec(p + 28); + + __archive_read_consume(a, 30); + + if (zip->have_central_directory) { + /* If we read the central dir entry, we must have size information + as well, so ignore the length-at-end flag. */ + zip_entry->flags &= ~ZIP_LENGTH_AT_END; + /* If we have values from both the local file header + and the central directory, warn about mismatches + which might indicate a damaged file. But some + writers always put zero in the local header; don't + bother warning about that. */ + if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Inconsistent CRC32 values"); + ret = ARCHIVE_WARN; + } + if (compressed_size != 0 + && compressed_size != zip_entry->compressed_size) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Inconsistent compressed size"); + ret = ARCHIVE_WARN; + } + if (uncompressed_size != 0 + && uncompressed_size != zip_entry->uncompressed_size) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Inconsistent uncompressed size"); + ret = ARCHIVE_WARN; + } + } else { + /* If we don't have the CD info, use whatever we do have. */ + zip_entry->crc32 = local_crc32; + zip_entry->compressed_size = compressed_size; + zip_entry->uncompressed_size = uncompressed_size; + } /* Read the filename. */ - if ((h = __archive_read_ahead(a, zip->filename_length, NULL)) == NULL) { + if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } - if (archive_string_ensure(&zip->pathname, zip->filename_length) == NULL) - __archive_errx(1, "Out of memory"); - archive_strncpy(&zip->pathname, h, zip->filename_length); - __archive_read_consume(a, zip->filename_length); - archive_entry_set_pathname(entry, zip->pathname.s); - - if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/') - zip->mode = AE_IFDIR | 0777; + if (zip_entry->flags & ZIP_UTF8_NAME) { + /* The filename is stored to be UTF-8. */ + if (zip->sconv_utf8 == NULL) { + zip->sconv_utf8 = + archive_string_conversion_from_charset( + &a->archive, "UTF-8", 1); + if (zip->sconv_utf8 == NULL) + return (ARCHIVE_FATAL); + } + sconv = zip->sconv_utf8; + } else if (zip->sconv != NULL) + sconv = zip->sconv; else - zip->mode = AE_IFREG | 0777; + sconv = zip->sconv_default; + + if (archive_entry_copy_pathname_l(entry, + h, filename_length, sconv) != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for Pathname"); + return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Pathname cannot be converted " + "from %s to current locale.", + archive_string_conversion_charset_name(sconv)); + ret = ARCHIVE_WARN; + } + __archive_read_consume(a, filename_length); + + if (zip_entry->mode == 0) { + /* Especially in streaming mode, we can end up + here without having seen any mode information. + Guess from the filename. */ + wp = archive_entry_pathname_w(entry); + if (wp != NULL) { + len = wcslen(wp); + if (len > 0 && wp[len - 1] == L'/') + zip_entry->mode = AE_IFDIR | 0777; + else + zip_entry->mode = AE_IFREG | 0777; + } else { + cp = archive_entry_pathname(entry); + len = (cp != NULL)?strlen(cp):0; + if (len > 0 && cp[len - 1] == '/') + zip_entry->mode = AE_IFDIR | 0777; + else + zip_entry->mode = AE_IFREG | 0777; + } + } /* Read the extra data. */ - if ((h = __archive_read_ahead(a, zip->extra_length, NULL)) == NULL) { + if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file header"); return (ARCHIVE_FATAL); } - process_extra(h, zip); - __archive_read_consume(a, zip->extra_length); + process_extra(h, extra_length, zip_entry); + __archive_read_consume(a, extra_length); /* Populate some additional entry fields: */ - archive_entry_set_mode(entry, zip->mode); - archive_entry_set_uid(entry, zip->uid); - archive_entry_set_gid(entry, zip->gid); - archive_entry_set_mtime(entry, zip->mtime, 0); - archive_entry_set_ctime(entry, zip->ctime, 0); - archive_entry_set_atime(entry, zip->atime, 0); + archive_entry_set_mode(entry, zip_entry->mode); + archive_entry_set_uid(entry, zip_entry->uid); + archive_entry_set_gid(entry, zip_entry->gid); + archive_entry_set_mtime(entry, zip_entry->mtime, 0); + archive_entry_set_ctime(entry, zip_entry->ctime, 0); + archive_entry_set_atime(entry, zip_entry->atime, 0); /* Set the size only if it's meaningful. */ - if (0 == (zip->flags & ZIP_LENGTH_AT_END)) - archive_entry_set_size(entry, zip->uncompressed_size); + if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END)) + archive_entry_set_size(entry, zip_entry->uncompressed_size); - zip->entry_bytes_remaining = zip->compressed_size; - zip->entry_offset = 0; + zip->entry_bytes_remaining = zip_entry->compressed_size; /* If there's no body, force read_data() to return EOF immediately. */ - if (0 == (zip->flags & ZIP_LENGTH_AT_END) + if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END) && zip->entry_bytes_remaining < 1) zip->end_of_entry = 1; /* Set up a more descriptive format name. */ sprintf(zip->format_name, "ZIP %d.%d (%s)", - zip->version / 10, zip->version % 10, - zip->compression_name); + version / 10, version % 10, + compression_name(zip->entry->compression)); a->archive.archive_format_name = zip->format_name; - return (ARCHIVE_OK); + return (ret); +} + +static const char * +compression_name(int compression) +{ + static const char *compression_names[] = { + "uncompressed", + "shrinking", + "reduced-1", + "reduced-2", + "reduced-3", + "reduced-4", + "imploded", + "reserved", + "deflation" + }; + + if (compression < + sizeof(compression_names)/sizeof(compression_names[0])) + return compression_names[compression]; + else + return "??"; } /* Convert an MSDOS-style date/time into Unix-style time. */ @@ -532,52 +746,49 @@ zip_time(const char *p) static int archive_read_format_zip_read_data(struct archive_read *a, - const void **buff, size_t *size, off_t *offset) + const void **buff, size_t *size, int64_t *offset) { int r; - struct zip *zip; + struct zip *zip = (struct zip *)(a->format->data); - zip = (struct zip *)(a->format->data); + *offset = zip->entry_uncompressed_bytes_read; + *size = 0; + *buff = NULL; - /* - * If we hit end-of-entry last time, clean up and return - * ARCHIVE_EOF this time. - */ - if (zip->end_of_entry) { - *offset = zip->entry_uncompressed_bytes_read; - *size = 0; - *buff = NULL; + /* If we hit end-of-entry last time, return ARCHIVE_EOF. */ + if (zip->end_of_entry) + return (ARCHIVE_EOF); + + /* Return EOF immediately if this is a non-regular file. */ + if (AE_IFREG != (zip->entry->mode & AE_IFMT)) return (ARCHIVE_EOF); + + if (zip->entry->flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Encrypted file is unsupported"); + return (ARCHIVE_FAILED); } - switch(zip->compression) { + __archive_read_consume(a, zip->unconsumed); + zip->unconsumed = 0; + + switch(zip->entry->compression) { case 0: /* No compression. */ r = zip_read_data_none(a, buff, size, offset); break; +#ifdef HAVE_ZLIB_H case 8: /* Deflate compression. */ r = zip_read_data_deflate(a, buff, size, offset); break; +#endif default: /* Unsupported compression. */ - *buff = NULL; - *size = 0; - *offset = 0; /* Return a warning. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unsupported ZIP compression method (%s)", - zip->compression_name); - if (zip->flags & ZIP_LENGTH_AT_END) { - /* - * ZIP_LENGTH_AT_END requires us to - * decompress the entry in order to - * skip it, but we don't know this - * compression method, so we give up. - */ - r = ARCHIVE_FATAL; - } else { - /* We can't decompress this entry, but we will - * be able to skip() it and try the next entry. */ - r = ARCHIVE_WARN; - } + compression_name(zip->entry->compression)); + /* We can't decompress this entry, but we will + * be able to skip() it and try the next entry. */ + return (ARCHIVE_FAILED); break; } if (r != ARCHIVE_OK) @@ -587,105 +798,142 @@ archive_read_format_zip_read_data(struct archive_read *a, zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size); /* If we hit the end, swallow any end-of-data marker. */ if (zip->end_of_entry) { - if (zip->flags & ZIP_LENGTH_AT_END) { - const char *p; - - if ((p = __archive_read_ahead(a, 16, NULL)) == NULL) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Truncated ZIP end-of-file record"); - return (ARCHIVE_FATAL); - } - zip->crc32 = archive_le32dec(p + 4); - zip->compressed_size = archive_le32dec(p + 8); - zip->uncompressed_size = archive_le32dec(p + 12); - __archive_read_consume(a, 16); - } /* Check file size, CRC against these values. */ - if (zip->compressed_size != zip->entry_compressed_bytes_read) { + if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "ZIP compressed data is wrong size"); + "ZIP compressed data is wrong size (read %jd, expected %jd)", + (intmax_t)zip->entry_compressed_bytes_read, + (intmax_t)zip->entry->compressed_size); return (ARCHIVE_WARN); } - /* Size field only stores the lower 32 bits of the actual size. */ - if ((zip->uncompressed_size & UINT32_MAX) + /* Size field only stores the lower 32 bits of the actual + * size. */ + if ((zip->entry->uncompressed_size & UINT32_MAX) != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "ZIP uncompressed data is wrong size"); + "ZIP uncompressed data is wrong size (read %jd, expected %jd)", + (intmax_t)zip->entry_uncompressed_bytes_read, + (intmax_t)zip->entry->uncompressed_size); return (ARCHIVE_WARN); } /* Check computed CRC against header */ - if (zip->crc32 != zip->entry_crc32) { + if (zip->entry->crc32 != zip->entry_crc32) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP bad CRC: 0x%lx should be 0x%lx", - zip->entry_crc32, zip->crc32); + (unsigned long)zip->entry_crc32, + (unsigned long)zip->entry->crc32); return (ARCHIVE_WARN); } } - /* Return EOF immediately if this is a non-regular file. */ - if (AE_IFREG != (zip->mode & AE_IFMT)) - return (ARCHIVE_EOF); return (ARCHIVE_OK); } /* - * Read "uncompressed" data. According to the current specification, - * if ZIP_LENGTH_AT_END is specified, then the size fields in the - * initial file header are supposed to be set to zero. This would, of - * course, make it impossible for us to read the archive, since we - * couldn't determine the end of the file data. Info-ZIP seems to - * include the real size fields both before and after the data in this - * case (the CRC only appears afterwards), so this works as you would - * expect. + * Read "uncompressed" data. There are three cases: + * 1) We know the size of the data. This is always true for the + * seeking reader (we've examined the Central Directory already). + * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred. + * Info-ZIP seems to do this; we know the size but have to grab + * the CRC from the data descriptor afterwards. + * 3) We're streaming and ZIP_LENGTH_AT_END was specified and + * we have no size information. In this case, we can do pretty + * well by watching for the data descriptor record. The data + * descriptor is 16 bytes and includes a computed CRC that should + * provide a strong check. + * + * TODO: Technically, the PK\007\010 signature is optional. + * In the original spec, the data descriptor contained CRC + * and size fields but had no leading signature. In practice, + * newer writers seem to provide the signature pretty consistently, + * but we might need to do something more complex here if + * we want to handle older archives that lack that signature. * * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets * zip->end_of_entry if it consumes all of the data. */ static int -zip_read_data_none(struct archive_read *a, const void **buff, - size_t *size, off_t *offset) +zip_read_data_none(struct archive_read *a, const void **_buff, + size_t *size, int64_t *offset) { struct zip *zip; + const char *buff; ssize_t bytes_avail; zip = (struct zip *)(a->format->data); - if (zip->entry_bytes_remaining == 0) { - *buff = NULL; - *size = 0; - *offset = zip->entry_offset; - zip->end_of_entry = 1; - return (ARCHIVE_OK); - } - /* - * Note: '1' here is a performance optimization. - * Recall that the decompression layer returns a count of - * available bytes; asking for more than that forces the - * decompressor to combine reads by copying data. - */ - *buff = __archive_read_ahead(a, 1, &bytes_avail); - if (bytes_avail <= 0) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Truncated ZIP file data"); - return (ARCHIVE_FATAL); + if (zip->entry->flags & ZIP_LENGTH_AT_END) { + const char *p; + + /* Grab at least 16 bytes. */ + buff = __archive_read_ahead(a, 16, &bytes_avail); + if (bytes_avail < 16) { + /* Zip archives have end-of-archive markers + that are longer than this, so a failure to get at + least 16 bytes really does indicate a truncated + file. */ + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file data"); + return (ARCHIVE_FATAL); + } + /* Check for a complete PK\007\010 signature. */ + p = buff; + if (p[0] == 'P' && p[1] == 'K' + && p[2] == '\007' && p[3] == '\010' + && archive_le32dec(p + 4) == zip->entry_crc32 + && archive_le32dec(p + 8) == zip->entry_compressed_bytes_read + && archive_le32dec(p + 12) == zip->entry_uncompressed_bytes_read) { + zip->entry->crc32 = archive_le32dec(p + 4); + zip->entry->compressed_size = archive_le32dec(p + 8); + zip->entry->uncompressed_size = archive_le32dec(p + 12); + zip->end_of_entry = 1; + zip->unconsumed = 16; + return (ARCHIVE_OK); + } + /* If not at EOF, ensure we consume at least one byte. */ + ++p; + + /* Scan forward until we see where a PK\007\010 signature might be. */ + /* Return bytes up until that point. On the next call, the code + above will verify the data descriptor. */ + while (p < buff + bytes_avail - 4) { + if (p[3] == 'P') { p += 3; } + else if (p[3] == 'K') { p += 2; } + else if (p[3] == '\007') { p += 1; } + else if (p[3] == '\010' && p[2] == '\007' + && p[1] == 'K' && p[0] == 'P') { + break; + } else { p += 4; } + } + bytes_avail = p - buff; + } else { + if (zip->entry_bytes_remaining == 0) { + zip->end_of_entry = 1; + return (ARCHIVE_OK); + } + /* Grab a bunch of bytes. */ + buff = __archive_read_ahead(a, 1, &bytes_avail); + if (bytes_avail <= 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file data"); + return (ARCHIVE_FATAL); + } + if (bytes_avail > zip->entry_bytes_remaining) + bytes_avail = zip->entry_bytes_remaining; } - if (bytes_avail > zip->entry_bytes_remaining) - bytes_avail = zip->entry_bytes_remaining; - __archive_read_consume(a, bytes_avail); *size = bytes_avail; - *offset = zip->entry_offset; - zip->entry_offset += *size; - zip->entry_bytes_remaining -= *size; - zip->entry_uncompressed_bytes_read += *size; - zip->entry_compressed_bytes_read += *size; + zip->entry_bytes_remaining -= bytes_avail; + zip->entry_uncompressed_bytes_read += bytes_avail; + zip->entry_compressed_bytes_read += bytes_avail; + zip->unconsumed += bytes_avail; + *_buff = buff; return (ARCHIVE_OK); } #ifdef HAVE_ZLIB_H static int zip_read_data_deflate(struct archive_read *a, const void **buff, - size_t *size, off_t *offset) + size_t *size, int64_t *offset) { struct zip *zip; ssize_t bytes_avail; @@ -696,7 +944,7 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, /* If the buffer hasn't been allocated, allocate it now. */ if (zip->uncompressed_buffer == NULL) { - zip->uncompressed_buffer_size = 32 * 1024; + zip->uncompressed_buffer_size = 256 * 1024; zip->uncompressed_buffer = (unsigned char *)malloc(zip->uncompressed_buffer_size); if (zip->uncompressed_buffer == NULL) { @@ -731,6 +979,10 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, * decompressor to combine reads by copying data. */ compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); + if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END) + && bytes_avail > zip->entry_bytes_remaining) { + bytes_avail = zip->entry_bytes_remaining; + } if (bytes_avail <= 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated ZIP file body"); @@ -773,66 +1025,102 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, zip->entry_bytes_remaining -= bytes_avail; zip->entry_compressed_bytes_read += bytes_avail; - *offset = zip->entry_offset; *size = zip->stream.total_out; - zip->entry_uncompressed_bytes_read += *size; + zip->entry_uncompressed_bytes_read += zip->stream.total_out; *buff = zip->uncompressed_buffer; - zip->entry_offset += *size; + + if (zip->end_of_entry && (zip->entry->flags & ZIP_LENGTH_AT_END)) { + const char *p; + + if (NULL == (p = __archive_read_ahead(a, 16, NULL))) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP end-of-file record"); + return (ARCHIVE_FATAL); + } + /* Consume the optional PK\007\010 marker. */ + if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010') { + zip->entry->crc32 = archive_le32dec(p + 4); + zip->entry->compressed_size = archive_le32dec(p + 8); + zip->entry->uncompressed_size = archive_le32dec(p + 12); + zip->unconsumed = 16; + } + } + return (ARCHIVE_OK); } -#else -static int -zip_read_data_deflate(struct archive_read *a, const void **buff, - size_t *size, off_t *offset) -{ - *buff = NULL; - *size = 0; - *offset = 0; - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "libarchive compiled without deflate support (no libz)"); - return (ARCHIVE_FATAL); -} #endif static int archive_read_format_zip_read_data_skip(struct archive_read *a) { struct zip *zip; - const void *buff = NULL; - off_t bytes_skipped; zip = (struct zip *)(a->format->data); /* If we've already read to end of data, we're done. */ if (zip->end_of_entry) return (ARCHIVE_OK); + /* If we're seeking, we're done. */ + if (zip->have_central_directory) + return (ARCHIVE_OK); - /* - * If the length is at the end, we have no choice but - * to decompress all the data to find the end marker. - */ - if (zip->flags & ZIP_LENGTH_AT_END) { - size_t size; - off_t offset; - int r; - do { - r = archive_read_format_zip_read_data(a, &buff, - &size, &offset); - } while (r == ARCHIVE_OK); - return (r); + /* So we know we're streaming... */ + if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) { + /* We know the compressed length, so we can just skip. */ + int64_t bytes_skipped = __archive_read_consume(a, + zip->entry_bytes_remaining + zip->unconsumed); + if (bytes_skipped < 0) + return (ARCHIVE_FATAL); + zip->unconsumed = 0; + return (ARCHIVE_OK); } - /* - * If the length is at the beginning, we can skip the - * compressed data much more quickly. - */ - bytes_skipped = __archive_read_skip(a, zip->entry_bytes_remaining); - if (bytes_skipped < 0) - return (ARCHIVE_FATAL); - - /* This entry is finished and done. */ - zip->end_of_entry = 1; - return (ARCHIVE_OK); + /* We're streaming and we don't know the length. */ + /* If the body is compressed and we know the format, we can + * find an exact end-of-entry by decompressing it. */ + switch (zip->entry->compression) { +#ifdef HAVE_ZLIB_H + case 8: /* Deflate compression. */ + while (!zip->end_of_entry) { + int64_t offset = 0; + const void *buff = NULL; + size_t size = 0; + int r; + r = zip_read_data_deflate(a, &buff, &size, &offset); + if (r != ARCHIVE_OK) + return (r); + } + break; +#endif + default: /* Uncompressed or unknown. */ + /* Scan for a PK\007\010 signature. */ + __archive_read_consume(a, zip->unconsumed); + zip->unconsumed = 0; + for (;;) { + const char *p, *buff; + ssize_t bytes_avail; + buff = __archive_read_ahead(a, 16, &bytes_avail); + if (bytes_avail < 16) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file data"); + return (ARCHIVE_FATAL); + } + p = buff; + while (p <= buff + bytes_avail - 16) { + if (p[3] == 'P') { p += 3; } + else if (p[3] == 'K') { p += 2; } + else if (p[3] == '\007') { p += 1; } + else if (p[3] == '\010' && p[2] == '\007' + && p[1] == 'K' && p[0] == 'P') { + __archive_read_consume(a, p - buff + 16); + return ARCHIVE_OK; + } else { p += 4; } + } + __archive_read_consume(a, p - buff); + } + } + return ARCHIVE_OK; } static int @@ -845,8 +1133,8 @@ archive_read_format_zip_cleanup(struct archive_read *a) if (zip->stream_valid) inflateEnd(&zip->stream); #endif + free(zip->zip_entries); free(zip->uncompressed_buffer); - archive_string_free(&(zip->pathname)); archive_string_free(&(zip->extra)); free(zip); (a->format->data) = NULL; @@ -859,28 +1147,30 @@ archive_read_format_zip_cleanup(struct archive_read *a) * triplets. id and size are 2 bytes each. */ static void -process_extra(const void* extra, struct zip* zip) +process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) { - int offset = 0; - const char *p = (const char *)extra; - while (offset < zip->extra_length - 4) + unsigned offset = 0; + + while (offset < extra_length - 4) { unsigned short headerid = archive_le16dec(p + offset); unsigned short datasize = archive_le16dec(p + offset + 2); offset += 4; - if (offset + datasize > zip->extra_length) + if (offset + datasize > extra_length) break; #ifdef DEBUG - fprintf(stderr, "Header id 0x%04x, length %d\n", + fprintf(stderr, "Header id 0x%x, length %d\n", headerid, datasize); #endif switch (headerid) { case 0x0001: /* Zip64 extended information extra field. */ if (datasize >= 8) - zip->uncompressed_size = archive_le64dec(p + offset); + zip_entry->uncompressed_size = + archive_le64dec(p + offset); if (datasize >= 16) - zip->compressed_size = archive_le64dec(p + offset + 8); + zip_entry->compressed_size = + archive_le64dec(p + offset + 8); break; case 0x5455: { @@ -893,12 +1183,12 @@ process_extra(const void* extra, struct zip* zip) { #ifdef DEBUG fprintf(stderr, "mtime: %lld -> %d\n", - (long long)zip->mtime, + (long long)zip_entry->mtime, archive_le32dec(p + offset)); #endif if (datasize < 4) break; - zip->mtime = archive_le32dec(p + offset); + zip_entry->mtime = archive_le32dec(p + offset); offset += 4; datasize -= 4; } @@ -906,7 +1196,7 @@ process_extra(const void* extra, struct zip* zip) { if (datasize < 4) break; - zip->atime = archive_le32dec(p + offset); + zip_entry->atime = archive_le32dec(p + offset); offset += 4; datasize -= 4; } @@ -914,12 +1204,25 @@ process_extra(const void* extra, struct zip* zip) { if (datasize < 4) break; - zip->ctime = archive_le32dec(p + offset); + zip_entry->ctime = archive_le32dec(p + offset); offset += 4; datasize -= 4; } break; } + case 0x5855: + { + /* Info-ZIP Unix Extra Field (old version) "UX". */ + if (datasize >= 8) { + zip_entry->atime = archive_le32dec(p + offset); + zip_entry->mtime = archive_le32dec(p + offset + 4); + } + if (datasize >= 12) { + zip_entry->uid = archive_le16dec(p + offset + 8); + zip_entry->gid = archive_le16dec(p + offset + 10); + } + break; + } case 0x7855: /* Info-ZIP Unix Extra Field (type 2) "Ux". */ #ifdef DEBUG @@ -928,23 +1231,50 @@ process_extra(const void* extra, struct zip* zip) archive_le16dec(p + offset + 2)); #endif if (datasize >= 2) - zip->uid = archive_le16dec(p + offset); + zip_entry->uid = archive_le16dec(p + offset); if (datasize >= 4) - zip->gid = archive_le16dec(p + offset + 2); + zip_entry->gid = archive_le16dec(p + offset + 2); break; case 0x7875: + { /* Info-Zip Unix Extra Field (type 3) "ux". */ + int uidsize = 0, gidsize = 0; + + if (datasize >= 1 && p[offset] == 1) {/* version=1 */ + if (datasize >= 4) { + /* get a uid size. */ + uidsize = p[offset+1]; + if (uidsize == 2) + zip_entry->uid = archive_le16dec( + p + offset + 2); + else if (uidsize == 4 && datasize >= 6) + zip_entry->uid = archive_le32dec( + p + offset + 2); + } + if (datasize >= (2 + uidsize + 3)) { + /* get a gid size. */ + gidsize = p[offset+2+uidsize]; + if (gidsize == 2) + zip_entry->gid = archive_le16dec( + p+offset+2+uidsize+1); + else if (gidsize == 4 && + datasize >= (2 + uidsize + 5)) + zip_entry->gid = archive_le32dec( + p+offset+2+uidsize+1); + } + } break; + } default: break; } offset += datasize; } #ifdef DEBUG - if (offset != zip->extra_length) + if (offset != extra_length) { fprintf(stderr, - "Extra data field contents do not match reported size!"); + "Extra data field contents do not match reported size!\n"); } #endif } |