diff options
Diffstat (limited to 'lib/libarchive')
50 files changed, 19577 insertions, 0 deletions
diff --git a/lib/libarchive/COPYING b/lib/libarchive/COPYING new file mode 100644 index 0000000..5a02bcc --- /dev/null +++ b/lib/libarchive/COPYING @@ -0,0 +1,37 @@ +All of the C source code, header files, and documentation in this +package are covered by the following: + +Copyright (c) 2003-2005 Tim Kientzle +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer + in this position and unchanged. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== + +Shell scripts, makefiles, and certain other files may be covered by +other licenses. In particular, some distributions of this library +contain Makefiles and/or shell scripts that are generated +automatically by GNU autoconf and GNU automake. Those generated files +are controlled by the relevant licenses. + +$FreeBSD$ + diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile new file mode 100644 index 0000000..718ebea --- /dev/null +++ b/lib/libarchive/Makefile @@ -0,0 +1,197 @@ +# $FreeBSD$ + +LIB= archive +DPADD= ${LIBBZ2} ${LIBZ} +LDADD= -lbz2 -lz + +# The libarchive version stamp. +# Version is three numbers: +# Major: Bumped ONLY when API/ABI breakage happens. +# Minor: Bumped when significant new features are added (see SHLIB_MAJOR) +# Revision: Bumped on any notable change +VERSION= 1.2.53 +ARCHIVE_API_MAJOR!= echo ${VERSION} | sed -e 's/\..*//' +ARCHIVE_API_MINOR!= echo ${VERSION} | sed -e 's/[0-9]*\.//' | sed -e 's/\..*//' + +# The FreeBSD SHLIB_MAJOR is computed from the above values. +# To bump SHLIB_MAJOR, increase the MINOR number in "version" file. +SHLIB_MAJOR!= echo $$((${ARCHIVE_API_MAJOR} + ${ARCHIVE_API_MINOR})) +# The SHLIB_MAJOR computation above attempts to match the +# version number generated by libtool. (This may change +# when the FreeBSD port of libtool gets fixed.) + +CFLAGS+= -DPACKAGE_NAME=\"lib${LIB}\" +CFLAGS+= -DPACKAGE_VERSION=\"${VERSION}\" +CFLAGS+= -I${.OBJDIR} + +# FreeBSD/arm has some limitations. +.if ${MACHINE_ARCH} == "arm" +WARNS?= 3 +.else +WARNS?= 6 +.endif + +# Headers to be installed in /usr/include +INCS= archive.h archive_entry.h + +# Build archive.h from archive.h.in by substituting version information. +archive.h: archive.h.in Makefile + cat ${.CURDIR}/archive.h.in | \ + sed 's/@VERSION@/${VERSION}/g' | \ + sed 's/@SHLIB_MAJOR@/${SHLIB_MAJOR}/g' | \ + sed 's/@ARCHIVE_API_MAJOR@/${ARCHIVE_API_MAJOR}/g' | \ + sed 's/@ARCHIVE_API_MINOR@/${ARCHIVE_API_MINOR}/g' | \ + cat > archive.h + +# archive.h needs to be cleaned +CLEANFILES+= archive.h + +# Sources to be compiled. +SRCS= archive.h \ + archive_check_magic.c \ + archive_entry.c \ + archive_read.c \ + archive_read_data_into_buffer.c \ + archive_read_data_into_fd.c \ + archive_read_extract.c \ + archive_read_open_fd.c \ + archive_read_open_file.c \ + archive_read_support_compression_all.c \ + archive_read_support_compression_bzip2.c \ + archive_read_support_compression_compress.c \ + archive_read_support_compression_gzip.c \ + archive_read_support_compression_none.c \ + archive_read_support_format_all.c \ + archive_read_support_format_cpio.c \ + archive_read_support_format_iso9660.c \ + archive_read_support_format_tar.c \ + archive_read_support_format_zip.c \ + archive_string.c \ + archive_string_sprintf.c \ + archive_util.c \ + archive_write.c \ + archive_write_open_fd.c \ + archive_write_open_file.c \ + archive_write_set_compression_bzip2.c \ + archive_write_set_compression_gzip.c \ + archive_write_set_compression_none.c \ + archive_write_set_format.c \ + archive_write_set_format_by_name.c \ + archive_write_set_format_cpio.c \ + archive_write_set_format_pax.c \ + archive_write_set_format_shar.c \ + archive_write_set_format_ustar.c + +# Man pages to be installed. +MAN= archive_entry.3 \ + archive_read.3 \ + archive_util.3 \ + archive_write.3 \ + libarchive.3 \ + libarchive-formats.5 \ + tar.5 + +# Symlink the man pages under each function name. +MLINKS+= archive_entry.3 archive_entry_acl_add_entry.3 +MLINKS+= archive_entry.3 archive_entry_acl_add_entry_w.3 +MLINKS+= archive_entry.3 archive_entry_acl_clear.3 +MLINKS+= archive_entry.3 archive_entry_acl_count.3 +MLINKS+= archive_entry.3 archive_entry_acl_next.3 +MLINKS+= archive_entry.3 archive_entry_acl_next_w.3 +MLINKS+= archive_entry.3 archive_entry_acl_reset.3 +MLINKS+= archive_entry.3 archive_entry_acl_text_w.3 +MLINKS+= archive_entry.3 archive_entry_clear.3 +MLINKS+= archive_entry.3 archive_entry_clone.3 +MLINKS+= archive_entry.3 archive_entry_copy_fflags_text_w.3 +MLINKS+= archive_entry.3 archive_entry_copy_gname_w.3 +MLINKS+= archive_entry.3 archive_entry_copy_hardlink_w.3 +MLINKS+= archive_entry.3 archive_entry_copy_pathname_w.3 +MLINKS+= archive_entry.3 archive_entry_copy_stat.3 +MLINKS+= archive_entry.3 archive_entry_copy_symlink_w.3 +MLINKS+= archive_entry.3 archive_entry_copy_uname_w.3 +MLINKS+= archive_entry.3 archive_entry_fflags.3 +MLINKS+= archive_entry.3 archive_entry_fflags_text.3 +MLINKS+= archive_entry.3 archive_entry_free.3 +MLINKS+= archive_entry.3 archive_entry_gid.3 +MLINKS+= archive_entry.3 archive_entry_gname.3 +MLINKS+= archive_entry.3 archive_entry_gname_w.3 +MLINKS+= archive_entry.3 archive_entry_hardlink.3 +MLINKS+= archive_entry.3 archive_entry_ino.3 +MLINKS+= archive_entry.3 archive_entry_mode.3 +MLINKS+= archive_entry.3 archive_entry_mtime.3 +MLINKS+= archive_entry.3 archive_entry_mtime_nsec.3 +MLINKS+= archive_entry.3 archive_entry_new.3 +MLINKS+= archive_entry.3 archive_entry_pathname.3 +MLINKS+= archive_entry.3 archive_entry_pathname_w.3 +MLINKS+= archive_entry.3 archive_entry_rdev.3 +MLINKS+= archive_entry.3 archive_entry_rdevmajor.3 +MLINKS+= archive_entry.3 archive_entry_rdevminor.3 +MLINKS+= archive_entry.3 archive_entry_set_fflags.3 +MLINKS+= archive_entry.3 archive_entry_set_gid.3 +MLINKS+= archive_entry.3 archive_entry_set_gname.3 +MLINKS+= archive_entry.3 archive_entry_set_hardlink.3 +MLINKS+= archive_entry.3 archive_entry_set_link.3 +MLINKS+= archive_entry.3 archive_entry_set_mode.3 +MLINKS+= archive_entry.3 archive_entry_set_pathname.3 +MLINKS+= archive_entry.3 archive_entry_set_rdevmajor.3 +MLINKS+= archive_entry.3 archive_entry_set_rdevminor.3 +MLINKS+= archive_entry.3 archive_entry_set_size.3 +MLINKS+= archive_entry.3 archive_entry_set_symlink.3 +MLINKS+= archive_entry.3 archive_entry_set_uid.3 +MLINKS+= archive_entry.3 archive_entry_set_uname.3 +MLINKS+= archive_entry.3 archive_entry_size.3 +MLINKS+= archive_entry.3 archive_entry_stat.3 +MLINKS+= archive_entry.3 archive_entry_symlink.3 +MLINKS+= archive_entry.3 archive_entry_uid.3 +MLINKS+= archive_entry.3 archive_entry_uname.3 +MLINKS+= archive_entry.3 archive_entry_uname_w.3 +MLINKS+= archive_read.3 archive_read_data.3 +MLINKS+= archive_read.3 archive_read_data_block.3 +MLINKS+= archive_read.3 archive_read_data_into_buffer.3 +MLINKS+= archive_read.3 archive_read_data_into_fd.3 +MLINKS+= archive_read.3 archive_read_data_skip.3 +MLINKS+= archive_read.3 archive_read_extract.3 +MLINKS+= archive_read.3 archive_read_extract_set_progress_callback.3 +MLINKS+= archive_read.3 archive_read_finish.3 +MLINKS+= archive_read.3 archive_read_new.3 +MLINKS+= archive_read.3 archive_read_next_header.3 +MLINKS+= archive_read.3 archive_read_open.3 +MLINKS+= archive_read.3 archive_read_open_fd.3 +MLINKS+= archive_read.3 archive_read_open_file.3 +MLINKS+= archive_read.3 archive_read_set_bytes_per_block.3 +MLINKS+= archive_read.3 archive_read_support_compression_all.3 +MLINKS+= archive_read.3 archive_read_support_compression_bzip2.3 +MLINKS+= archive_read.3 archive_read_support_compression_compress.3 +MLINKS+= archive_read.3 archive_read_support_compression_gzip.3 +MLINKS+= archive_read.3 archive_read_support_compression_none.3 +MLINKS+= archive_read.3 archive_read_support_format_all.3 +MLINKS+= archive_read.3 archive_read_support_format_cpio.3 +MLINKS+= archive_read.3 archive_read_support_format_iso9660.3 +MLINKS+= archive_read.3 archive_read_support_format_tar.3 +MLINKS+= archive_read.3 archive_read_support_format_zip.3 +MLINKS+= archive_util.3 archive_compression.3 +MLINKS+= archive_util.3 archive_compression_name.3 +MLINKS+= archive_util.3 archive_errno.3 +MLINKS+= archive_util.3 archive_error_string.3 +MLINKS+= archive_util.3 archive_format.3 +MLINKS+= archive_util.3 archive_format_name.3 +MLINKS+= archive_util.3 archive_set_error.3 +MLINKS+= archive_write.3 archive_write_data.3 +MLINKS+= archive_write.3 archive_write_finish.3 +MLINKS+= archive_write.3 archive_write_header.3 +MLINKS+= archive_write.3 archive_write_new.3 +MLINKS+= archive_write.3 archive_write_open.3 +MLINKS+= archive_write.3 archive_write_open_fd.3 +MLINKS+= archive_write.3 archive_write_open_file.3 +MLINKS+= archive_write.3 archive_write_prepare.3 +MLINKS+= archive_write.3 archive_write_set_bytes_per_block.3 +MLINKS+= archive_write.3 archive_write_set_bytes_in_last_block.3 +MLINKS+= archive_write.3 archive_write_set_callbacks.3 +MLINKS+= archive_write.3 archive_write_set_compression_bzip2.3 +MLINKS+= archive_write.3 archive_write_set_compression_gzip.3 +MLINKS+= archive_write.3 archive_write_set_format_pax.3 +MLINKS+= archive_write.3 archive_write_set_format_shar.3 +MLINKS+= archive_write.3 archive_write_set_format_ustar.3 +MLINKS+= libarchive.3 archive.3 + +.include <bsd.lib.mk> diff --git a/lib/libarchive/README b/lib/libarchive/README new file mode 100644 index 0000000..1380b4a --- /dev/null +++ b/lib/libarchive/README @@ -0,0 +1,91 @@ +$FreeBSD$ + +libarchive: a library for reading and writing streaming archives + +This is all under a BSD license. Use, enjoy, but don't blame me if it breaks! + +Documentation: + * libarchive.3 gives an overview of the library as a whole + * archive_read.3 and archive_write.3 provide detailed calling + sequences for the read and write APIs + * archive_entry.3 details the "struct archive_entry" utility class + * libarchive-formats.5 documents the file formats supported by the library + * tar.5 provides some detailed information about a variety of different + "tar" formats. + +You should also read the copious comments in "archive.h" and the source +code for the sample "bsdtar" program for more details. Please let me know +about any errors or omissions you find. + +Currently, the library automatically detects and reads the following: + * gzip compression + * bzip2 compression + * compress/LZW compression + * GNU tar format (including GNU long filenames, long link names, and + sparse files) + * Solaris 9 extended tar format (including ACLs) + * Old V7 tar archives + * POSIX ustar + * POSIX pax interchange format + * POSIX octet-oriented cpio + * SVR4 ASCII cpio + * Binary cpio (big-endian or little-endian) + * ISO9660 CD-ROM images (with optional Rockridge extensions) + * ZIP archives (with uncompressed or "deflate" compressed entries) + +The library can write: + * gzip compression + * bzip2 compression + * POSIX ustar + * POSIX pax interchange format + * "restricted" pax format, which will create ustar archives except for + entries that require pax extensions (for long filenames, ACLs, etc). + * POSIX octet-oriented cpio + * shar archives + +Notes: + * This is a heavily stream-oriented system. There is no direct + support for in-place modification or random access and no intention + of ever adding such support. Adding such support would require + sacrificing a lot of other features, so don't bother asking. + + * The library is designed to be extended with new compression and + archive formats. The only requirement is that the format be + readable or writable as a stream and that each archive entry be + independent. + + * On read, compression and format are always detected automatically. + + * I've attempted to minimize static link pollution. If you don't + explicitly invoke a particular feature (such as support for a + particular compression or format), it won't get pulled in. + In particular, if you don't explicitly enable a particular + compression or decompression support, you won't need to link + against the corresponding compression or decompression libraries. + This also reduces the size of statically-linked binaries in + environments where that matters. + + * On read, the library accepts whatever blocks you hand it. + Your read callback is free to pass the library a byte at a time + or mmap the entire archive and give it to the library at once. + On write, the library always produces correctly-blocked + output. + + * The object-style approach allows you to have multiple archive streams + open at once. bsdtar uses this in its "@archive" extension. + + * The archive itself is read/written using callback functions. + You can read an archive directly from an in-memory buffer or + write it to a socket, if you wish. There are some utility + functions to provide easy-to-use "open file," etc, capabilities. + + * The read/write APIs are designed to allow individual entries + to be read or written to any data source: You can create + a block of data in memory and add it to a tar archive without + first writing a temporary file. You can also read an entry from + an archive and write the data directly to a socket. If you want + to read/write entries to disk, there are convenience functions to + make this especially easy. + + * Note: "pax interchange format" is really an extended tar format, + despite what the name says. diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in new file mode 100644 index 0000000..62c4d52 --- /dev/null +++ b/lib/libarchive/archive.h.in @@ -0,0 +1,344 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef ARCHIVE_H_INCLUDED +#define ARCHIVE_H_INCLUDED + +/* + * This header file corresponds to: + * Library version @VERSION@ + * Shared library version @SHLIB_MAJOR@ + */ + +#include <sys/types.h> /* Linux requires this for off_t */ +#include <inttypes.h> /* For int64_t */ +#include <unistd.h> /* For ssize_t and size_t */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * If ARCHIVE_API_VERSION != archive_api_version(), then the library you + * were linked with is using an incompatible API. This is almost + * certainly a fatal problem. + * + * ARCHIVE_API_FEATURE is incremented with each significant feature + * addition, so you can test (at compile or run time) if a particular + * feature is implemented. It's no big deal if ARCHIVE_API_FEATURE != + * archive_api_feature(), as long as both are high enough to include + * the features you're relying on. Specific values of FEATURE are + * documented here: + * + * 1 - Version tests are available. + * 2 - archive_{read,write}_close available separately from _finish. + */ +#define ARCHIVE_API_VERSION @ARCHIVE_API_MAJOR@ +int archive_api_version(void); +#define ARCHIVE_API_FEATURE @ARCHIVE_API_MINOR@ +int archive_api_feature(void); +/* Textual name/version of the library. */ +#define ARCHIVE_LIBRARY_VERSION "libarchive @VERSION@" +const char * archive_version(void); + +#define ARCHIVE_BYTES_PER_RECORD 512 +#define ARCHIVE_DEFAULT_BYTES_PER_BLOCK 10240 + +/* Declare our basic types. */ +struct archive; +struct archive_entry; + +/* + * Error codes: Use archive_errno() and archive_error_string() + * to retrieve details. Unless specified otherwise, all functions + * that return 'int' use these codes. + */ +#define ARCHIVE_EOF 1 /* Found end of archive. */ +#define ARCHIVE_OK 0 /* Operation was successful. */ +#define ARCHIVE_RETRY (-10) /* Retry might succeed. */ +#define ARCHIVE_WARN (-20) /* Partial sucess. */ +#define ARCHIVE_FATAL (-30) /* No more operations are possible. */ + +/* + * As far as possible, archive_errno returns standard platform errno codes. + * Of course, the details vary by platform, so the actual definitions + * here are stored in "archive_platform.h". The symbols are listed here + * for reference; as a rule, clients should not need to know the exact + * platform-dependent error code. + */ +/* Unrecognized or invalid file format. */ +/* #define ARCHIVE_ERRNO_FILE_FORMAT */ +/* Illegal usage of the library. */ +/* #define ARCHIVE_ERRNO_PROGRAMMER_ERROR */ +/* Unknown or unclassified error. */ +/* #define ARCHIVE_ERRNO_MISC */ + +/* + * Callbacks are invoked to automatically read/write/open/close the archive. + * You can provide your own for complex tasks (like breaking archives + * across multiple tapes) or use standard ones built into the library. + */ + +/* Returns pointer and size of next block of data from archive. */ +typedef ssize_t archive_read_callback(struct archive *, void *_client_data, + const void **_buffer); +/* Returns size actually written, zero on EOF, -1 on error. */ +typedef ssize_t archive_write_callback(struct archive *, void *_client_data, + void *_buffer, size_t _length); +typedef int archive_open_callback(struct archive *, void *_client_data); +typedef int archive_close_callback(struct archive *, void *_client_data); + +/* + * Codes for archive_compression. + */ +#define ARCHIVE_COMPRESSION_NONE 0 +#define ARCHIVE_COMPRESSION_GZIP 1 +#define ARCHIVE_COMPRESSION_BZIP2 2 +#define ARCHIVE_COMPRESSION_COMPRESS 3 + +/* + * Codes returned by archive_format. + * + * Top 16 bits identifies the format family (e.g., "tar"); lower + * 16 bits indicate the variant. This is updated by read_next_header. + * Note that the lower 16 bits will often vary from entry to entry. + */ +#define ARCHIVE_FORMAT_BASE_MASK 0xff0000U +#define ARCHIVE_FORMAT_CPIO 0x10000 +#define ARCHIVE_FORMAT_CPIO_POSIX (ARCHIVE_FORMAT_CPIO | 1) +#define ARCHIVE_FORMAT_CPIO_BIN_LE (ARCHIVE_FORMAT_CPIO | 2) +#define ARCHIVE_FORMAT_CPIO_BIN_BE (ARCHIVE_FORMAT_CPIO | 3) +#define ARCHIVE_FORMAT_CPIO_SVR4_NOCRC (ARCHIVE_FORMAT_CPIO | 4) +#define ARCHIVE_FORMAT_CPIO_SVR4_CRC (ARCHIVE_FORMAT_CPIO | 5) +#define ARCHIVE_FORMAT_SHAR 0x20000 +#define ARCHIVE_FORMAT_SHAR_BASE (ARCHIVE_FORMAT_SHAR | 1) +#define ARCHIVE_FORMAT_SHAR_DUMP (ARCHIVE_FORMAT_SHAR | 2) +#define ARCHIVE_FORMAT_TAR 0x30000 +#define ARCHIVE_FORMAT_TAR_USTAR (ARCHIVE_FORMAT_TAR | 1) +#define ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE (ARCHIVE_FORMAT_TAR | 2) +#define ARCHIVE_FORMAT_TAR_PAX_RESTRICTED (ARCHIVE_FORMAT_TAR | 3) +#define ARCHIVE_FORMAT_TAR_GNUTAR (ARCHIVE_FORMAT_TAR | 4) +#define ARCHIVE_FORMAT_ISO9660 0x40000 +#define ARCHIVE_FORMAT_ISO9660_ROCKRIDGE (ARCHIVE_FORMAT_ISO9660 | 1) +#define ARCHIVE_FORMAT_ZIP 0x50000 + +/*- + * Basic outline for reading an archive: + * 1) Ask archive_read_new for an archive reader object. + * 2) Update any global properties as appropriate. + * In particular, you'll certainly want to call appropriate + * archive_read_support_XXX functions. + * 3) Call archive_read_open_XXX to open the archive + * 4) Repeatedly call archive_read_next_header to get information about + * successive archive entries. Call archive_read_data to extract + * data for entries of interest. + * 5) Call archive_read_finish to end processing. + */ +struct archive *archive_read_new(void); + +/* + * The archive_read_support_XXX calls enable auto-detect for this + * archive handle. They also link in the necessary support code. + * For example, if you don't want bzlib linked in, don't invoke + * support_compression_bzip2(). The "all" functions provide the + * obvious shorthand. + */ +int archive_read_support_compression_all(struct archive *); +int archive_read_support_compression_bzip2(struct archive *); +int archive_read_support_compression_compress(struct archive *); +int archive_read_support_compression_gzip(struct archive *); +int archive_read_support_compression_none(struct archive *); + +int archive_read_support_format_all(struct archive *); +int archive_read_support_format_cpio(struct archive *); +int archive_read_support_format_gnutar(struct archive *); +int archive_read_support_format_iso9660(struct archive *); +int archive_read_support_format_tar(struct archive *); +int archive_read_support_format_zip(struct archive *); + + +/* Open the archive using callbacks for archive I/O. */ +int archive_read_open(struct archive *, void *_client_data, + archive_open_callback *, archive_read_callback *, + archive_close_callback *); + +/* + * The archive_read_open_file function is a convenience function built + * on archive_read_open that uses a canned callback suitable for + * common situations. Note that a NULL filename indicates stdin. + */ +int archive_read_open_file(struct archive *, const char *_file, + size_t _block_size); +int archive_read_open_fd(struct archive *, int _fd, + size_t _block_size); + +/* Parses and returns next entry header. */ +int archive_read_next_header(struct archive *, + struct archive_entry **); + +/* + * Retrieve the byte offset in UNCOMPRESSED data where last-read + * header started. + */ +int64_t archive_read_header_position(struct archive *); + +/* Read data from the body of an entry. Similar to read(2). */ +ssize_t archive_read_data(struct archive *, void *, size_t); +/* + * A zero-copy version of archive_read_data that also exposes the file offset + * of each returned block. Note that the client has no way to specify + * the desired size of the block. The API does gaurantee that offsets will + * be strictly increasing and that returned blocks will not overlap. + */ +int archive_read_data_block(struct archive *a, + const void **buff, size_t *size, off_t *offset); + +/*- + * Some convenience functions that are built on archive_read_data: + * 'skip': skips entire entry + * 'into_buffer': writes data into memory buffer that you provide + * 'into_fd': writes data to specified filedes + */ +int archive_read_data_skip(struct archive *); +int archive_read_data_into_buffer(struct archive *, void *buffer, + ssize_t len); +int archive_read_data_into_fd(struct archive *, int fd); + +/*- + * Convenience function to recreate the current entry (whose header + * has just been read) on disk. + * + * This does quite a bit more than just copy data to disk. It also: + * - Creates intermediate directories as required. + * - Manages directory permissions: non-writable directories will + * be initially created with write permission enabled; when the + * archive is closed, dir permissions are edited to the values specified + * in the archive. + * - Checks hardlinks: hardlinks will not be extracted unless the + * linked-to file was also extracted within the same session. (TODO) + */ + +/* The "flags" argument selects optional behavior, 'OR' the flags you want. */ +/* TODO: The 'Default' comments here are not quite correct; clean this up. */ +#define ARCHIVE_EXTRACT_OWNER (1) /* Default: owner/group not restored */ +#define ARCHIVE_EXTRACT_PERM (2) /* Default: restore perm only for reg file*/ +#define ARCHIVE_EXTRACT_TIME (4) /* Default: mod time not restored */ +#define ARCHIVE_EXTRACT_NO_OVERWRITE (8) /* Default: Replace files on disk */ +#define ARCHIVE_EXTRACT_UNLINK (16) /* Default: don't unlink existing files */ +#define ARCHIVE_EXTRACT_ACL (32) /* Default: don't restore ACLs */ +#define ARCHIVE_EXTRACT_FFLAGS (64) /* Default: don't restore fflags */ +#define ARCHIVE_EXTRACT_XATTR (128) /* Default: don't restore xattrs */ + +int archive_read_extract(struct archive *, struct archive_entry *, + int flags); +void archive_read_extract_set_progress_callback(struct archive *, + void (*_progress_func)(void *), void *_user_data); + +/* Close the file and release most resources. */ +int archive_read_close(struct archive *); +/* Release all resources and destroy the object. */ +/* Note that archive_read_finish will call archive_read_close for you. */ +void archive_read_finish(struct archive *); + +/*- + * To create an archive: + * 1) Ask archive_write_new for a archive writer object. + * 2) Set any global properties. In particular, you should set + * the compression and format to use. + * 3) Call archive_write_open to open the file (most people + * will use archive_write_open_file or archive_write_open_fd, + * which provide convenient canned I/O callbacks for you). + * 4) For each entry: + * - construct an appropriate struct archive_entry structure + * - archive_write_header to write the header + * - archive_write_data to write the entry data + * 5) archive_write_close to close the output + * 6) archive_write_finish to cleanup the writer and release resources + */ +struct archive *archive_write_new(void); +int archive_write_set_bytes_per_block(struct archive *, + int bytes_per_block); +/* XXX This is badly misnamed; suggestions appreciated. XXX */ +int archive_write_set_bytes_in_last_block(struct archive *, + int bytes_in_last_block); + +int archive_write_set_compression_bzip2(struct archive *); +int archive_write_set_compression_gzip(struct archive *); +int archive_write_set_compression_none(struct archive *); +/* A convenience function to set the format based on the code or name. */ +int archive_write_set_format(struct archive *, int format_code); +int archive_write_set_format_by_name(struct archive *, + const char *name); +/* To minimize link pollution, use one or more of the following. */ +int archive_write_set_format_cpio(struct archive *); +/* TODO: int archive_write_set_format_old_tar(struct archive *); */ +int archive_write_set_format_pax(struct archive *); +int archive_write_set_format_pax_restricted(struct archive *); +int archive_write_set_format_shar(struct archive *); +int archive_write_set_format_shar_dump(struct archive *); +int archive_write_set_format_ustar(struct archive *); +int archive_write_open(struct archive *, void *, + archive_open_callback *, archive_write_callback *, + archive_close_callback *); +int archive_write_open_fd(struct archive *, int _fd); +int archive_write_open_file(struct archive *, const char *_file); + +/* + * Note that the library will truncate writes beyond the size provided + * to archive_write_header or pad if the provided data is short. + */ +int archive_write_header(struct archive *, + struct archive_entry *); +/* TODO: should be ssize_t, but that might require .so version bump? */ +int archive_write_data(struct archive *, const void *, size_t); +int archive_write_close(struct archive *); +void archive_write_finish(struct archive *); + +/* + * Accessor functions to read/set various information in + * the struct archive object: + */ +/* Bytes written after compression or read before decompression. */ +int64_t archive_position_compressed(struct archive *); +/* Bytes written to compressor or read from decompressor. */ +int64_t archive_position_uncompressed(struct archive *); + +const char *archive_compression_name(struct archive *); +int archive_compression(struct archive *); +int archive_errno(struct archive *); +const char *archive_error_string(struct archive *); +const char *archive_format_name(struct archive *); +int archive_format(struct archive *); +void archive_set_error(struct archive *, int _err, const char *fmt, ...); + +#ifdef __cplusplus +} +#endif + +#endif /* !ARCHIVE_H_INCLUDED */ diff --git a/lib/libarchive/archive_check_magic.c b/lib/libarchive/archive_check_magic.c new file mode 100644 index 0000000..528da5e --- /dev/null +++ b/lib/libarchive/archive_check_magic.c @@ -0,0 +1,110 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> + +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "archive_private.h" + +static void +errmsg(const char *m) +{ + write(STDERR_FILENO, m, strlen(m)); +} + +static void +diediedie(void) +{ + *(char *)0 = 1; /* Deliberately segfault and force a coredump. */ + _exit(1); /* If that didn't work, just exit with an error. */ +} + +static const char * +state_name(unsigned s) +{ + switch (s) { + case ARCHIVE_STATE_NEW: return ("new"); + case ARCHIVE_STATE_HEADER: return ("header"); + case ARCHIVE_STATE_DATA: return ("data"); + case ARCHIVE_STATE_EOF: return ("eof"); + case ARCHIVE_STATE_CLOSED: return ("closed"); + case ARCHIVE_STATE_FATAL: return ("fatal"); + default: return ("??"); + } +} + + +static void +write_all_states(int states) +{ + unsigned lowbit; + + /* A trick for computing the lowest set bit. */ + while ((lowbit = states & (-states)) != 0) { + states &= ~lowbit; /* Clear the low bit. */ + errmsg(state_name(lowbit)); + if (states != 0) + errmsg("/"); + } +} + +/* + * Check magic value and current state; bail if it isn't valid. + * + * This is designed to catch serious programming errors that violate + * the libarchive API. + */ +void +__archive_check_magic(struct archive *a, unsigned magic, unsigned state, + const char *function) +{ + if (a->magic != magic) { + errmsg("INTERNAL ERROR: Function "); + errmsg(function); + errmsg(" invoked with invalid struct archive structure.\n"); + diediedie(); + } + + if (state == ARCHIVE_STATE_ANY) + return; + + if ((a->state & state) == 0) { + errmsg("INTERNAL ERROR: Function '"); + errmsg(function); + errmsg("' invoked with archive structure in state '"); + write_all_states(a->state); + errmsg("', should be in state '"); + write_all_states(state); + errmsg("'\n"); + diediedie(); + } +} diff --git a/lib/libarchive/archive_entry.3 b/lib/libarchive/archive_entry.3 new file mode 100644 index 0000000..b619577 --- /dev/null +++ b/lib/libarchive/archive_entry.3 @@ -0,0 +1,341 @@ +.\" Copyright (c) 2003-2004 Tim Kientzle +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd December 15, 2003 +.Dt archive_entry 3 +.Os +.Sh NAME +.Nm archive_entry_acl_add_entry , +.Nm archive_entry_acl_add_entry_w , +.Nm archive_entry_acl_clear , +.Nm archive_entry_acl_count , +.Nm archive_entry_acl_next , +.Nm archive_entry_acl_next_w , +.Nm archive_entry_acl_reset , +.Nm archive_entry_acl_text_w , +.Nm archive_entry_atime , +.Nm archive_entry_atime_nsec , +.Nm archive_entry_clear , +.Nm archive_entry_clone , +.Nm archive_entry_copy_fflags_text_w , +.Nm archive_entry_copy_gname_w , +.Nm archive_entry_copy_hardlink , +.Nm archive_entry_copy_hardlink_w , +.Nm archive_entry_copy_pathname_w , +.Nm archive_entry_copy_stat , +.Nm archive_entry_copy_symlink_w , +.Nm archive_entry_copy_uname_w , +.Nm archive_entry_dev , +.Nm archive_entry_fflags , +.Nm archive_entry_fflags_text , +.Nm archive_entry_free , +.Nm archive_entry_gid , +.Nm archive_entry_gname , +.Nm archive_entry_hardlink , +.Nm archive_entry_ino , +.Nm archive_entry_mode , +.Nm archive_entry_mtime , +.Nm archive_entry_mtime_nsec , +.Nm archive_entry_new , +.Nm archive_entry_pathname , +.Nm archive_entry_pathname_w , +.Nm archive_entry_rdev , +.Nm archive_entry_rdevmajor , +.Nm archive_entry_rdevminor , +.Nm archive_entry_set_fflags , +.Nm archive_entry_set_gid , +.Nm archive_entry_set_gname , +.Nm archive_entry_set_hardlink , +.Nm archive_entry_set_link , +.Nm archive_entry_set_mode , +.Nm archive_entry_set_mtime , +.Nm archive_entry_set_pathname , +.Nm archive_entry_set_rdevmajor , +.Nm archive_entry_set_rdevminor , +.Nm archive_entry_set_size , +.Nm archive_entry_set_symlink , +.Nm archive_entry_set_uid , +.Nm archive_entry_set_uname , +.Nm archive_entry_size , +.Nm archive_entry_stat , +.Nm archive_entry_symlink , +.Nm archive_entry_uid , +.Nm archive_entry_uname +.Nd functions for manipulating archive entry descriptions +.Sh SYNOPSIS +.In archive_entry.h +.Ft void +.Fn archive_entry_acl_add_entry "struct archive_entry *" "int type" "int permset" "int tag" "int qual" "const char *name" +.Ft void +.Fn archive_entry_acl_add_entry_w "struct archive_entry *" "int type" "int permset" "int tag" "int qual" "const wchar_t *name" +.Ft void +.Fn archive_entry_acl_clear "struct archive_entry *" +.Ft int +.Fn archive_entry_acl_count "struct archive_entry *" "int type" +.Ft int +.Fn archive_entry_acl_next "struct archive_entry *" "int want_type" "int *type" "int *permset" "int *tag" "int *qual" "const char **name" +.Ft int +.Fn archive_entry_acl_next_w "struct archive_entry *" "int want_type" "int *type" "int *permset" "int *tag" "int *qual" "const wchar_t **name" +.Ft int +.Fn archive_entry_acl_reset "struct archive_entry *" "int want_type" +.Ft const wchar_t * +.Fn archive_entry_acl_text_w "struct archive_entry *" "int flags" +.Ft time_t +.Fn archive_entry_atime "struct archive_entry *" +.Ft long +.Fn archive_entry_atime_nsec "struct archive_entry *" +.Ft "struct archive_entry *" +.Fn archive_entry_clear "struct archive_entry *" +.Ft struct archive_entry * +.Fn archive_entry_clone "struct archive_entry *" +.Ft const wchar_t * +.Fn archive_entry_copy_fflags_text_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_gname_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_hardlink "struct archive_entry *" "const char *" +.Ft void +.Fn archive_entry_copy_hardlink_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_pathname_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_stat "struct archive_entry *" "const struct stat *" +.Ft void +.Fn archive_entry_copy_symlink_w "struct archive_entry *" "const wchar_t *" +.Ft void +.Fn archive_entry_copy_uname_w "struct archive_entry *" "const wchar_t *" +.Ft dev_t +.Fn archive_entry_dev "struct archive_entry *" +.Ft void +.Fn archive_entry_fflags "struct archive_entry *" "unsigned long *set" "unsigned long *clear" +.Ft const char * +.Fn archive_entry_fflags_text "struct archive_entry *" +.Ft void +.Fn archive_entry_free "struct archive_entry *" +.Ft const char * +.Fn archive_entry_gname "struct archive_entry *" +.Ft const char * +.Fn archive_entry_hardlink "struct archive_entry *" +.Ft ino_t +.Fn archive_entry_ino "struct archive_entry *" +.Ft mode_t +.Fn archive_entry_mode "struct archive_entry *" +.Ft time_t +.Fn archive_entry_mtime "struct archive_entry *" +.Ft long +.Fn archive_entry_mtime_nsec "struct archive_entry *" +.Ft struct archive_entry * +.Fn archive_entry_new "void" +.Ft const char * +.Fn archive_entry_pathname "struct archive_entry *" +.Ft const wchar_t * +.Fn archive_entry_pathname_w "struct archive_entry *" +.Ft dev_t +.Fn archive_entry_rdev "struct archive_entry *" +.Ft dev_t +.Fn archive_entry_rdevmajor "struct archive_entry *" +.Ft dev_t +.Fn archive_entry_rdevminor "struct archive_entry *" +.Ft void +.Fn archive_entry_set_fflags "struct archive_entry *" "unsigned long set" "unsigned long clear" +.Ft void +.Fn archive_entry_set_gid "struct archive_entry *" "gid_t" +.Ft void +.Fn archive_entry_set_gname "struct archive_entry *" "const char *" +.Ft void +.Fn archive_entry_set_hardlink "struct archive_entry *" "const char *" +.Ft void +.Fn archive_entry_set_link "struct archive_entry *" "const char *" +.Ft void +.Fn archive_entry_set_mode "struct archive_entry *" "mode_t" +.Ft void +.Fn archive_entry_set_mtime "struct archive_entry *" "time_t" "long nanos" +.Ft void +.Fn archive_entry_set_pathname "struct archive_entry *" "const char *" +.Ft void +.Fn archive_entry_set_rdevmajor "struct archive_entry *" "dev_t" +.Ft void +.Fn archive_entry_set_rdevminor "struct archive_entry *" "dev_t" +.Ft void +.Fn archive_entry_set_size "struct archive_entry *" "int64_t" +.Ft void +.Fn archive_entry_set_symlink "struct archive_entry *" "const char *" +.Ft void +.Fn archive_entry_set_uid "struct archive_entry *" "uid_t" +.Ft void +.Fn archive_entry_set_uname "struct archive_entry *" "const char *" +.Ft int64_t +.Fn archive_entry_size "struct archive_entry *" +.Ft const struct stat * +.Fn archive_entry_stat "struct archive_entry *" +.Ft const char * +.Fn archive_entry_symlink "struct archive_entry *" +.Ft const char * +.Fn archive_entry_uname "struct archive_entry *" +.Sh DESCRIPTION +These functions create and manipulate data objects that +represent entries within an archive. +You can think of a +.Tn struct archive_entry +as a heavy-duty version of +.Tn struct stat : +it includes everything from +.Tn struct stat +plus associated pathname, textual group and user names, etc. +These objects are used by +.Xr libarchive 3 +to represent the metadata associated with a particular +entry in an archive. +.Ss Create and Destroy +There are functions to allocate, destroy, clear, and copy +.Va archive_entry +objects: +.Bl -tag -compact -width indent +.It Fn archive_entry_clear +Erases the object, resetting all internal fields to the +same state as a newly-created object. +This is provided to allow you to quickly recycle objects +without thrashing the heap. +.It Fn archive_entry_clone +A deep copy operation; all text fields are duplicated. +.It Fn archive_entry_free +Releases the +.Tn struct archive_entry +object. +.It Fn archive_entry_new +Allocate and return a blank +.Tn struct archive_entry +object. +.El +.Ss Set and Get Functions +Most of the functions here set or read entries in an object. +Such functions have one of the following forms: +.Bl -tag -compact -width indent +.It Fn archive_entry_set_XXXX +Stores the provided data in the object. +In particular, for strings, the pointer is stored, +not the referenced string. +.It Fn archive_entry_copy_XXXX +As above, except that the referenced data is copied +into the object. +.It Fn archive_entry_XXXX +Returns the specified data. +In the case of strings, a const-qualified pointer to +the string is returned. +.El +String data can be set or accessed as wide character strings +or normal +.Va char +strings. +The functions that use wide character strings are suffixed with +.Cm _w . +Note that these are different representations of the same data: +For example, if you store a narrow string and read the corresponding +wide string, the object will transparently convert formats +using the current locale. +Similarly, if you store a wide string and then store a +narrow string for the same data, the previously-set wide string will +be discarded in favor of the new data. +.Pp +There are a few set/get functions that merit additional description: +.Bl -tag -compact -width indent +.It Fn archive_entry_set_link +This function sets the symlink field if it is already set. +Otherwise, it sets the hardlink field. +.El +.Ss File Flags +File flags are transparently converted between a bitmap +representation and a textual format. +For example, if you set the bitmap and ask for text, the library +will build a canonical text format. +However, if you set a text format and request a text format, +you will get back the same text, even if it is ill-formed. +If you need to canonicalize a textual flags string, you should first set the +text form, then request the bitmap form, then use that to set the bitmap form. +Setting the bitmap format will clear the internal text representation +and force it to be reconstructed when you next request the text form. +.Pp +The bitmap format consists of two integers, one containing bits +that should be set, the other specifying bits that should be +cleared. +Bits not mentioned in either bitmap will be ignored. +Usually, the bitmap of bits to be cleared will be set to zero. +In unusual circumstances, you can force a fully-specified set +of file flags by setting the bitmap of flags to clear to the complement +of the bitmap of flags to set. +(This differs from +.Xr fflagstostr 3 , +which only includes names for set bits.) +Converting a bitmap to a textual string is a platform-specific +operation; bits that are not meaningful on the current platform +will be ignored. +.Pp +The canonical text format is a comma-separated list of flag names. +The +.Fn archive_entry_copy_fflags_text_w +function parses the provided text and sets the internal bitmap values. +This is a platform-specific operation; names that are not meaningful +on the current platform will be ignored. +The function returns a pointer to the start of the first name that was not +recognized, or NULL if every name was recognized. +Note that every name--including names that follow an unrecognized name--will +be evaluated, and the bitmaps will be set to reflect every name that is +recognized. +(In particular, this differs from +.Xr strtofflags 3 , +which stops parsing at the first unrecognized name.) +.Ss ACL Handling +XXX This needs serious help. +XXX +.Pp +An +.Dq Access Control List +(ACL) is a list of permissions that grant access to particular users or +groups beyond what would normally be provided by standard POSIX mode bits. +The ACL handling here addresses some deficiencies in the POSIX.1e draft 17 ACL +specification. +In particular, POSIX.1e draft 17 specifies several different formats, but +none of those formats include both textual user/group names and numeric +UIDs/GIDs. +.Pp +XXX explain ACL stuff XXX +.\" .Sh EXAMPLE +.\" .Sh RETURN VALUES +.\" .Sh ERRORS +.Sh SEE ALSO +.Xr archive 3 +.Sh HISTORY +The +.Nm libarchive +library first appeared in +.Fx 5.3 . +.Sh AUTHORS +.An -nosplit +The +.Nm libarchive +library was written by +.An Tim Kientzle Aq kientzle@acm.org . +.\" .Sh BUGS diff --git a/lib/libarchive/archive_entry.c b/lib/libarchive/archive_entry.c new file mode 100644 index 0000000..7ec0052 --- /dev/null +++ b/lib/libarchive/archive_entry.c @@ -0,0 +1,1742 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <sys/types.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#else +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif +#endif +#ifdef HAVE_EXT2FS_EXT2_FS_H +#include <ext2fs/ext2_fs.h> /* for Linux file flags */ +#endif +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Obtain suitable wide-character manipulation functions. */ +#ifdef HAVE_WCHAR_H +#include <wchar.h> +#else +static size_t wcslen(const wchar_t *s) +{ + const wchar_t *p = s; + while (*p != L'\0') + ++p; + return p - s; +} +static wchar_t * wcscpy(wchar_t *s1, const wchar_t *s2) +{ + wchar_t *dest = s1; + while ((*s1 = *s2) != L'\0') + ++s1, ++s2; + return dest; +} +#define wmemcpy(a,b,i) (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t)) +/* Good enough for simple equality testing, but not for sorting. */ +#define wmemcmp(a,b,i) memcmp((a), (b), (i) * sizeof(wchar_t)) +#endif + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +#undef max +#define max(a, b) ((a)>(b)?(a):(b)) + +/* + * Handle wide character (i.e., Unicode) and non-wide character + * strings transparently. + * + */ + +struct aes { + const char *aes_mbs; + char *aes_mbs_alloc; + const wchar_t *aes_wcs; + wchar_t *aes_wcs_alloc; +}; + +struct ae_acl { + struct ae_acl *next; + int type; /* E.g., access or default */ + int tag; /* E.g., user/group/other/mask */ + int permset; /* r/w/x bits */ + int id; /* uid/gid for user/group */ + struct aes name; /* uname/gname */ +}; + +struct ae_xattr { + struct ae_xattr *next; + + char *name; + void *value; + size_t size; +}; + +static void aes_clean(struct aes *); +static void aes_copy(struct aes *dest, struct aes *src); +static const char * aes_get_mbs(struct aes *); +static const wchar_t * aes_get_wcs(struct aes *); +static void aes_set_mbs(struct aes *, const char *mbs); +static void aes_copy_mbs(struct aes *, const char *mbs); +/* static void aes_set_wcs(struct aes *, const wchar_t *wcs); */ +static void aes_copy_wcs(struct aes *, const wchar_t *wcs); + +static char * ae_fflagstostr(unsigned long bitset, unsigned long bitclear); +static const wchar_t *ae_wcstofflags(const wchar_t *stringp, + unsigned long *setp, unsigned long *clrp); +static void append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag, + const wchar_t *wname, int perm, int id); +static void append_id_w(wchar_t **wp, int id); + +static int acl_special(struct archive_entry *entry, + int type, int permset, int tag); +static struct ae_acl *acl_new_entry(struct archive_entry *entry, + int type, int permset, int tag, int id); +static void next_field_w(const wchar_t **wp, const wchar_t **start, + const wchar_t **end, wchar_t *sep); +static int prefix_w(const wchar_t *start, const wchar_t *end, + const wchar_t *test); + + +/* + * Description of an archive entry. + * + * Basically, this is a "struct stat" with a few text fields added in. + * + * TODO: Add "comment", "charset", and possibly other entries + * that are supported by "pax interchange" format. However, GNU, ustar, + * cpio, and other variants don't support these features, so they're not an + * excruciatingly high priority right now. + * + * TODO: "pax interchange" format allows essentially arbitrary + * key/value attributes to be attached to any entry. Supporting + * such extensions may make this library useful for special + * applications (e.g., a package manager could attach special + * package-management attributes to each entry). There are tricky + * API issues involved, so this is not going to happen until + * there's a real demand for it. + * + * TODO: Design a good API for handling sparse files. + */ +struct archive_entry { + /* + * Note that ae_stat.st_mode & S_IFMT can be 0! + * + * This occurs when the actual file type of the object is not + * in the archive. For example, 'tar' archives store + * hardlinks without marking the type of the underlying + * object. + */ + struct stat ae_stat; + + /* + * Use aes here so that we get transparent mbs<->wcs conversions. + */ + struct aes ae_fflags_text; /* Text fflags per fflagstostr(3) */ + unsigned long ae_fflags_set; /* Bitmap fflags */ + unsigned long ae_fflags_clear; + struct aes ae_gname; /* Name of owning group */ + struct aes ae_hardlink; /* Name of target for hardlink */ + struct aes ae_pathname; /* Name of entry */ + struct aes ae_symlink; /* symlink contents */ + struct aes ae_uname; /* Name of owner */ + + struct ae_acl *acl_head; + struct ae_acl *acl_p; + int acl_state; /* See acl_next for details. */ + wchar_t *acl_text_w; + + struct ae_xattr *xattr_head; + struct ae_xattr *xattr_p; +}; + +static void +aes_clean(struct aes *aes) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + memset(aes, 0, sizeof(*aes)); +} + +static void +aes_copy(struct aes *dest, struct aes *src) +{ + *dest = *src; + if (src->aes_mbs != NULL) { + dest->aes_mbs_alloc = strdup(src->aes_mbs); + dest->aes_mbs = dest->aes_mbs_alloc; + if (dest->aes_mbs == NULL) + __archive_errx(1, "No memory for aes_copy()"); + } + + if (src->aes_wcs != NULL) { + dest->aes_wcs_alloc = malloc((wcslen(src->aes_wcs) + 1) + * sizeof(wchar_t)); + dest->aes_wcs = dest->aes_wcs_alloc; + if (dest->aes_wcs == NULL) + __archive_errx(1, "No memory for aes_copy()"); + wcscpy(dest->aes_wcs_alloc, src->aes_wcs); + } +} + +static const char * +aes_get_mbs(struct aes *aes) +{ + if (aes->aes_mbs == NULL && aes->aes_wcs == NULL) + return NULL; + if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) { + /* + * XXX Need to estimate the number of byte in the + * multi-byte form. Assume that, on average, wcs + * chars encode to no more than 3 bytes. There must + * be a better way... XXX + */ + int mbs_length = wcslen(aes->aes_wcs) * 3 + 64; + aes->aes_mbs_alloc = malloc(mbs_length); + aes->aes_mbs = aes->aes_mbs_alloc; + if (aes->aes_mbs == NULL) + __archive_errx(1, "No memory for aes_get_mbs()"); + wcstombs(aes->aes_mbs_alloc, aes->aes_wcs, mbs_length - 1); + aes->aes_mbs_alloc[mbs_length - 1] = 0; + } + return (aes->aes_mbs); +} + +static const wchar_t * +aes_get_wcs(struct aes *aes) +{ + if (aes->aes_wcs == NULL && aes->aes_mbs == NULL) + return NULL; + if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) { + /* + * No single byte will be more than one wide character, + * so this length estimate will always be big enough. + */ + int wcs_length = strlen(aes->aes_mbs); + aes->aes_wcs_alloc + = malloc((wcs_length + 1) * sizeof(wchar_t)); + aes->aes_wcs = aes->aes_wcs_alloc; + if (aes->aes_wcs == NULL) + __archive_errx(1, "No memory for aes_get_wcs()"); + mbstowcs(aes->aes_wcs_alloc, aes->aes_mbs, wcs_length); + aes->aes_wcs_alloc[wcs_length] = 0; + } + return (aes->aes_wcs); +} + +static void +aes_set_mbs(struct aes *aes, const char *mbs) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + aes->aes_mbs = mbs; + aes->aes_wcs = NULL; +} + +static void +aes_copy_mbs(struct aes *aes, const char *mbs) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + aes->aes_mbs_alloc = malloc((strlen(mbs) + 1) * sizeof(char)); + if (aes->aes_mbs_alloc == NULL) + __archive_errx(1, "No memory for aes_copy_mbs()"); + strcpy(aes->aes_mbs_alloc, mbs); + aes->aes_mbs = aes->aes_mbs_alloc; + aes->aes_wcs = NULL; +} + +#if 0 +static void +aes_set_wcs(struct aes *aes, const wchar_t *wcs) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + aes->aes_mbs = NULL; + aes->aes_wcs = wcs; +} +#endif + +static void +aes_copy_wcs(struct aes *aes, const wchar_t *wcs) +{ + if (aes->aes_mbs_alloc) { + free(aes->aes_mbs_alloc); + aes->aes_mbs_alloc = NULL; + } + if (aes->aes_wcs_alloc) { + free(aes->aes_wcs_alloc); + aes->aes_wcs_alloc = NULL; + } + aes->aes_mbs = NULL; + aes->aes_wcs_alloc = malloc((wcslen(wcs) + 1) * sizeof(wchar_t)); + if (aes->aes_wcs_alloc == NULL) + __archive_errx(1, "No memory for aes_copy_wcs()"); + wcscpy(aes->aes_wcs_alloc, wcs); + aes->aes_wcs = aes->aes_wcs_alloc; +} + +struct archive_entry * +archive_entry_clear(struct archive_entry *entry) +{ + aes_clean(&entry->ae_fflags_text); + aes_clean(&entry->ae_gname); + aes_clean(&entry->ae_hardlink); + aes_clean(&entry->ae_pathname); + aes_clean(&entry->ae_symlink); + aes_clean(&entry->ae_uname); + archive_entry_acl_clear(entry); + archive_entry_xattr_clear(entry); + memset(entry, 0, sizeof(*entry)); + return entry; +} + +struct archive_entry * +archive_entry_clone(struct archive_entry *entry) +{ + struct archive_entry *entry2; + + /* Allocate new structure and copy over all of the fields. */ + entry2 = malloc(sizeof(*entry2)); + if (entry2 == NULL) + return (NULL); + memset(entry2, 0, sizeof(*entry2)); + entry2->ae_stat = entry->ae_stat; + entry2->ae_fflags_set = entry->ae_fflags_set; + entry2->ae_fflags_clear = entry->ae_fflags_clear; + + aes_copy(&entry2->ae_fflags_text, &entry->ae_fflags_text); + aes_copy(&entry2->ae_gname, &entry->ae_gname); + aes_copy(&entry2->ae_hardlink, &entry->ae_hardlink); + aes_copy(&entry2->ae_pathname, &entry->ae_pathname); + aes_copy(&entry2->ae_symlink, &entry->ae_symlink); + aes_copy(&entry2->ae_uname, &entry->ae_uname); + + /* XXX TODO: Copy ACL data over as well. XXX */ + /* XXX TODO: Copy xattr data over as well. XXX */ + return (entry2); +} + +void +archive_entry_free(struct archive_entry *entry) +{ + archive_entry_clear(entry); + free(entry); +} + +struct archive_entry * +archive_entry_new(void) +{ + struct archive_entry *entry; + + entry = malloc(sizeof(*entry)); + if (entry == NULL) + return (NULL); + memset(entry, 0, sizeof(*entry)); + return (entry); +} + +/* + * Functions for reading fields from an archive_entry. + */ + +time_t +archive_entry_atime(struct archive_entry *entry) +{ + return (entry->ae_stat.st_atime); +} + +long +archive_entry_atime_nsec(struct archive_entry *entry) +{ + (void)entry; /* entry can be unused here. */ + return (ARCHIVE_STAT_ATIME_NANOS(&entry->ae_stat)); +} + +time_t +archive_entry_ctime(struct archive_entry *entry) +{ + return (entry->ae_stat.st_ctime); +} + +long +archive_entry_ctime_nsec(struct archive_entry *entry) +{ + (void)entry; /* entry can be unused here. */ + return (ARCHIVE_STAT_CTIME_NANOS(&entry->ae_stat)); +} + +dev_t +archive_entry_dev(struct archive_entry *entry) +{ + return (entry->ae_stat.st_dev); +} + +void +archive_entry_fflags(struct archive_entry *entry, + unsigned long *set, unsigned long *clear) +{ + *set = entry->ae_fflags_set; + *clear = entry->ae_fflags_clear; +} + +/* + * Note: if text was provided, this just returns that text. If you + * really need the text to be rebuilt in a canonical form, set the + * text, ask for the bitmaps, then set the bitmaps. (Setting the + * bitmaps clears any stored text.) This design is deliberate: if + * we're editing archives, we don't want to discard flags just because + * they aren't supported on the current system. The bitmap<->text + * conversions are platform-specific (see below). + */ +const char * +archive_entry_fflags_text(struct archive_entry *entry) +{ + const char *f; + char *p; + + f = aes_get_mbs(&entry->ae_fflags_text); + if (f != NULL) + return (f); + + if (entry->ae_fflags_set == 0 && entry->ae_fflags_clear == 0) + return (NULL); + + p = ae_fflagstostr(entry->ae_fflags_set, entry->ae_fflags_clear); + if (p == NULL) + return (NULL); + + aes_copy_mbs(&entry->ae_fflags_text, p); + free(p); + f = aes_get_mbs(&entry->ae_fflags_text); + return (f); +} + +gid_t +archive_entry_gid(struct archive_entry *entry) +{ + return (entry->ae_stat.st_gid); +} + +const char * +archive_entry_gname(struct archive_entry *entry) +{ + return (aes_get_mbs(&entry->ae_gname)); +} + +const wchar_t * +archive_entry_gname_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_gname)); +} + +const char * +archive_entry_hardlink(struct archive_entry *entry) +{ + return (aes_get_mbs(&entry->ae_hardlink)); +} + +const wchar_t * +archive_entry_hardlink_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_hardlink)); +} + +ino_t +archive_entry_ino(struct archive_entry *entry) +{ + return (entry->ae_stat.st_ino); +} + +mode_t +archive_entry_mode(struct archive_entry *entry) +{ + return (entry->ae_stat.st_mode); +} + +time_t +archive_entry_mtime(struct archive_entry *entry) +{ + return (entry->ae_stat.st_mtime); +} + +long +archive_entry_mtime_nsec(struct archive_entry *entry) +{ + (void)entry; /* entry can be unused here. */ + return (ARCHIVE_STAT_MTIME_NANOS(&entry->ae_stat)); +} + +const char * +archive_entry_pathname(struct archive_entry *entry) +{ + return (aes_get_mbs(&entry->ae_pathname)); +} + +const wchar_t * +archive_entry_pathname_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_pathname)); +} + +dev_t +archive_entry_rdev(struct archive_entry *entry) +{ + return (entry->ae_stat.st_rdev); +} + +dev_t +archive_entry_rdevmajor(struct archive_entry *entry) +{ + return (major(entry->ae_stat.st_rdev)); +} + +dev_t +archive_entry_rdevminor(struct archive_entry *entry) +{ + return (minor(entry->ae_stat.st_rdev)); +} + +int64_t +archive_entry_size(struct archive_entry *entry) +{ + return (entry->ae_stat.st_size); +} + +const struct stat * +archive_entry_stat(struct archive_entry *entry) +{ + return (&entry->ae_stat); +} + +const char * +archive_entry_symlink(struct archive_entry *entry) +{ + return (aes_get_mbs(&entry->ae_symlink)); +} + +const wchar_t * +archive_entry_symlink_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_symlink)); +} + +uid_t +archive_entry_uid(struct archive_entry *entry) +{ + return (entry->ae_stat.st_uid); +} + +const char * +archive_entry_uname(struct archive_entry *entry) +{ + return (aes_get_mbs(&entry->ae_uname)); +} + +const wchar_t * +archive_entry_uname_w(struct archive_entry *entry) +{ + return (aes_get_wcs(&entry->ae_uname)); +} + +/* + * Functions to set archive_entry properties. + */ + +/* + * Note "copy" not "set" here. The "set" functions that accept a pointer + * only store the pointer; they don't copy the underlying object. + */ +void +archive_entry_copy_stat(struct archive_entry *entry, const struct stat *st) +{ + entry->ae_stat = *st; +} + +void +archive_entry_set_fflags(struct archive_entry *entry, + unsigned long set, unsigned long clear) +{ + aes_clean(&entry->ae_fflags_text); + entry->ae_fflags_set = set; + entry->ae_fflags_clear = clear; +} + +const wchar_t * +archive_entry_copy_fflags_text_w(struct archive_entry *entry, + const wchar_t *flags) +{ + aes_copy_wcs(&entry->ae_fflags_text, flags); + return (ae_wcstofflags(flags, + &entry->ae_fflags_set, &entry->ae_fflags_clear)); +} + +void +archive_entry_set_gid(struct archive_entry *entry, gid_t g) +{ + entry->ae_stat.st_gid = g; +} + +void +archive_entry_set_gname(struct archive_entry *entry, const char *name) +{ + aes_set_mbs(&entry->ae_gname, name); +} + +void +archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name) +{ + aes_copy_wcs(&entry->ae_gname, name); +} + +void +archive_entry_set_hardlink(struct archive_entry *entry, const char *target) +{ + aes_set_mbs(&entry->ae_hardlink, target); +} + +void +archive_entry_copy_hardlink(struct archive_entry *entry, const char *target) +{ + aes_copy_mbs(&entry->ae_hardlink, target); +} + +void +archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target) +{ + aes_copy_wcs(&entry->ae_hardlink, target); +} + +void +archive_entry_set_atime(struct archive_entry *entry, time_t t, long ns) +{ + entry->ae_stat.st_atime = t; + ARCHIVE_STAT_SET_ATIME_NANOS(&entry->ae_stat, ns); +} + +void +archive_entry_set_ctime(struct archive_entry *entry, time_t t, long ns) +{ + entry->ae_stat.st_ctime = t; + ARCHIVE_STAT_SET_CTIME_NANOS(&entry->ae_stat, ns); +} + +/* Set symlink if symlink is already set, else set hardlink. */ +void +archive_entry_set_link(struct archive_entry *entry, const char *target) +{ + if (entry->ae_symlink.aes_mbs != NULL || + entry->ae_symlink.aes_wcs != NULL) + aes_set_mbs(&entry->ae_symlink, target); + else + aes_set_mbs(&entry->ae_hardlink, target); +} + +void +archive_entry_set_mode(struct archive_entry *entry, mode_t m) +{ + entry->ae_stat.st_mode = m; +} + +void +archive_entry_set_mtime(struct archive_entry *entry, time_t m, long ns) +{ + entry->ae_stat.st_mtime = m; + ARCHIVE_STAT_SET_MTIME_NANOS(&entry->ae_stat, ns); +} + +void +archive_entry_set_pathname(struct archive_entry *entry, const char *name) +{ + aes_set_mbs(&entry->ae_pathname, name); +} + +void +archive_entry_copy_pathname(struct archive_entry *entry, const char *name) +{ + aes_copy_mbs(&entry->ae_pathname, name); +} + +void +archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name) +{ + aes_copy_wcs(&entry->ae_pathname, name); +} + +void +archive_entry_set_rdevmajor(struct archive_entry *entry, dev_t m) +{ + dev_t d; + + d = entry->ae_stat.st_rdev; + entry->ae_stat.st_rdev = makedev(major(m), minor(d)); +} + +void +archive_entry_set_rdevminor(struct archive_entry *entry, dev_t m) +{ + dev_t d; + + d = entry->ae_stat.st_rdev; + entry->ae_stat.st_rdev = makedev(major(d), minor(m)); +} + +void +archive_entry_set_size(struct archive_entry *entry, int64_t s) +{ + entry->ae_stat.st_size = s; +} + +void +archive_entry_set_symlink(struct archive_entry *entry, const char *linkname) +{ + aes_set_mbs(&entry->ae_symlink, linkname); +} + +void +archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname) +{ + aes_copy_wcs(&entry->ae_symlink, linkname); +} + +void +archive_entry_set_uid(struct archive_entry *entry, uid_t u) +{ + entry->ae_stat.st_uid = u; +} + +void +archive_entry_set_uname(struct archive_entry *entry, const char *name) +{ + aes_set_mbs(&entry->ae_uname, name); +} + +void +archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name) +{ + aes_copy_wcs(&entry->ae_uname, name); +} + +/* + * ACL management. The following would, of course, be a lot simpler + * if: 1) the last draft of POSIX.1e were a really thorough and + * complete standard that addressed the needs of ACL archiving and 2) + * everyone followed it faithfully. Alas, neither is true, so the + * following is a lot more complex than might seem necessary to the + * uninitiated. + */ + +void +archive_entry_acl_clear(struct archive_entry *entry) +{ + struct ae_acl *ap; + + while (entry->acl_head != NULL) { + ap = entry->acl_head->next; + aes_clean(&entry->acl_head->name); + free(entry->acl_head); + entry->acl_head = ap; + } + if (entry->acl_text_w != NULL) { + free(entry->acl_text_w); + entry->acl_text_w = NULL; + } + entry->acl_p = NULL; + entry->acl_state = 0; /* Not counting. */ +} + +/* + * Add a single ACL entry to the internal list of ACL data. + */ +void +archive_entry_acl_add_entry(struct archive_entry *entry, + int type, int permset, int tag, int id, const char *name) +{ + struct ae_acl *ap; + + if (acl_special(entry, type, permset, tag) == 0) + return; + ap = acl_new_entry(entry, type, permset, tag, id); + if (ap == NULL) { + /* XXX Error XXX */ + return; + } + if (name != NULL && *name != '\0') + aes_copy_mbs(&ap->name, name); + else + aes_clean(&ap->name); +} + +/* + * As above, but with a wide-character name. + */ +void +archive_entry_acl_add_entry_w(struct archive_entry *entry, + int type, int permset, int tag, int id, const wchar_t *name) +{ + struct ae_acl *ap; + + if (acl_special(entry, type, permset, tag) == 0) + return; + ap = acl_new_entry(entry, type, permset, tag, id); + if (ap == NULL) { + /* XXX Error XXX */ + return; + } + if (name != NULL && *name != L'\0') + aes_copy_wcs(&ap->name, name); + else + aes_clean(&ap->name); +} + +/* + * If this ACL entry is part of the standard POSIX permissions set, + * store the permissions in the stat structure and return zero. + */ +static int +acl_special(struct archive_entry *entry, int type, int permset, int tag) +{ + if (type == ARCHIVE_ENTRY_ACL_TYPE_ACCESS) { + switch (tag) { + case ARCHIVE_ENTRY_ACL_USER_OBJ: + entry->ae_stat.st_mode &= ~0700; + entry->ae_stat.st_mode |= (permset & 7) << 6; + return (0); + case ARCHIVE_ENTRY_ACL_GROUP_OBJ: + entry->ae_stat.st_mode &= ~0070; + entry->ae_stat.st_mode |= (permset & 7) << 3; + return (0); + case ARCHIVE_ENTRY_ACL_OTHER: + entry->ae_stat.st_mode &= ~0007; + entry->ae_stat.st_mode |= permset & 7; + return (0); + } + } + return (1); +} + +/* + * Allocate and populate a new ACL entry with everything but the + * name. + */ +static struct ae_acl * +acl_new_entry(struct archive_entry *entry, + int type, int permset, int tag, int id) +{ + struct ae_acl *ap; + + if (type != ARCHIVE_ENTRY_ACL_TYPE_ACCESS && + type != ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) + return (NULL); + if (entry->acl_text_w != NULL) { + free(entry->acl_text_w); + entry->acl_text_w = NULL; + } + + /* XXX TODO: More sanity-checks on the arguments XXX */ + + /* If there's a matching entry already in the list, overwrite it. */ + for (ap = entry->acl_head; ap != NULL; ap = ap->next) { + if (ap->type == type && ap->tag == tag && ap->id == id) { + ap->permset = permset; + return (ap); + } + } + + /* Add a new entry to the list. */ + ap = malloc(sizeof(*ap)); + if (ap == NULL) + return (NULL); + memset(ap, 0, sizeof(*ap)); + ap->next = entry->acl_head; + entry->acl_head = ap; + ap->type = type; + ap->tag = tag; + ap->id = id; + ap->permset = permset; + return (ap); +} + +/* + * Return a count of entries matching "want_type". + */ +int +archive_entry_acl_count(struct archive_entry *entry, int want_type) +{ + int count; + struct ae_acl *ap; + + count = 0; + ap = entry->acl_head; + while (ap != NULL) { + if ((ap->type & want_type) != 0) + count++; + ap = ap->next; + } + + if (count > 0 && ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) + count += 3; + return (count); +} + +/* + * Prepare for reading entries from the ACL data. Returns a count + * of entries matching "want_type", or zero if there are no + * non-extended ACL entries of that type. + */ +int +archive_entry_acl_reset(struct archive_entry *entry, int want_type) +{ + int count, cutoff; + + count = archive_entry_acl_count(entry, want_type); + + /* + * If the only entries are the three standard ones, + * then don't return any ACL data. (In this case, + * client can just use chmod(2) to set permissions.) + */ + if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) + cutoff = 3; + else + cutoff = 0; + + if (count > cutoff) + entry->acl_state = ARCHIVE_ENTRY_ACL_USER_OBJ; + else + entry->acl_state = 0; + entry->acl_p = entry->acl_head; + return (count); +} + +/* + * Return the next ACL entry in the list. Fake entries for the + * standard permissions and include them in the returned list. + */ + +int +archive_entry_acl_next(struct archive_entry *entry, int want_type, int *type, + int *permset, int *tag, int *id, const char **name) +{ + *name = NULL; + *id = -1; + + /* + * The acl_state is either zero (no entries available), -1 + * (reading from list), or an entry type (retrieve that type + * from ae_stat.st_mode). + */ + if (entry->acl_state == 0) + return (ARCHIVE_WARN); + + /* The first three access entries are special. */ + if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { + switch (entry->acl_state) { + case ARCHIVE_ENTRY_ACL_USER_OBJ: + *permset = (entry->ae_stat.st_mode >> 6) & 7; + *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; + *tag = ARCHIVE_ENTRY_ACL_USER_OBJ; + entry->acl_state = ARCHIVE_ENTRY_ACL_GROUP_OBJ; + return (ARCHIVE_OK); + case ARCHIVE_ENTRY_ACL_GROUP_OBJ: + *permset = (entry->ae_stat.st_mode >> 3) & 7; + *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; + *tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; + entry->acl_state = ARCHIVE_ENTRY_ACL_OTHER; + return (ARCHIVE_OK); + case ARCHIVE_ENTRY_ACL_OTHER: + *permset = entry->ae_stat.st_mode & 7; + *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; + *tag = ARCHIVE_ENTRY_ACL_OTHER; + entry->acl_state = -1; + entry->acl_p = entry->acl_head; + return (ARCHIVE_OK); + default: + break; + } + } + + while (entry->acl_p != NULL && (entry->acl_p->type & want_type) == 0) + entry->acl_p = entry->acl_p->next; + if (entry->acl_p == NULL) { + entry->acl_state = 0; + return (ARCHIVE_WARN); + } + *type = entry->acl_p->type; + *permset = entry->acl_p->permset; + *tag = entry->acl_p->tag; + *id = entry->acl_p->id; + *name = aes_get_mbs(&entry->acl_p->name); + entry->acl_p = entry->acl_p->next; + return (ARCHIVE_OK); +} + +/* + * Generate a text version of the ACL. The flags parameter controls + * the style of the generated ACL. + */ +const wchar_t * +archive_entry_acl_text_w(struct archive_entry *entry, int flags) +{ + int count; + int length; + const wchar_t *wname; + const wchar_t *prefix; + wchar_t separator; + struct ae_acl *ap; + int id; + wchar_t *wp; + + if (entry->acl_text_w != NULL) { + free (entry->acl_text_w); + entry->acl_text_w = NULL; + } + + separator = L','; + count = 0; + length = 0; + ap = entry->acl_head; + while (ap != NULL) { + if ((ap->type & flags) != 0) { + count++; + if ((flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) && + (ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)) + length += 8; /* "default:" */ + length += 5; /* tag name */ + length += 1; /* colon */ + wname = aes_get_wcs(&ap->name); + if (wname != NULL) + length += wcslen(wname); + length ++; /* colon */ + length += 3; /* rwx */ + length += 1; /* colon */ + length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1; + length ++; /* newline */ + } + ap = ap->next; + } + + if (count > 0 && ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) { + length += 10; /* "user::rwx\n" */ + length += 11; /* "group::rwx\n" */ + length += 11; /* "other::rwx\n" */ + } + + if (count == 0) + return (NULL); + + /* Now, allocate the string and actually populate it. */ + wp = entry->acl_text_w = malloc(length * sizeof(wchar_t)); + if (wp == NULL) + __archive_errx(1, "No memory to generate the text version of the ACL"); + count = 0; + if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { + append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_USER_OBJ, NULL, + entry->ae_stat.st_mode & 0700, -1); + *wp++ = ','; + append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL, + entry->ae_stat.st_mode & 0070, -1); + *wp++ = ','; + append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_OTHER, NULL, + entry->ae_stat.st_mode & 0007, -1); + count += 3; + + ap = entry->acl_head; + while (ap != NULL) { + if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { + wname = aes_get_wcs(&ap->name); + *wp++ = separator; + if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) + id = ap->id; + else + id = -1; + append_entry_w(&wp, NULL, ap->tag, wname, + ap->permset, id); + count++; + } + ap = ap->next; + } + } + + + if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) { + if (flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) + prefix = L"default:"; + else + prefix = NULL; + ap = entry->acl_head; + count = 0; + while (ap != NULL) { + if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) { + wname = aes_get_wcs(&ap->name); + if (count > 0) + *wp++ = separator; + if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) + id = ap->id; + else + id = -1; + append_entry_w(&wp, prefix, ap->tag, + wname, ap->permset, id); + count ++; + } + ap = ap->next; + } + } + + return (entry->acl_text_w); +} + +static void +append_id_w(wchar_t **wp, int id) +{ + if (id > 9) + append_id_w(wp, id / 10); + *(*wp)++ = L"0123456789"[id % 10]; +} + +static void +append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag, + const wchar_t *wname, int perm, int id) +{ + if (prefix != NULL) { + wcscpy(*wp, prefix); + *wp += wcslen(*wp); + } + switch (tag) { + case ARCHIVE_ENTRY_ACL_USER_OBJ: + wname = NULL; + id = -1; + /* FALL THROUGH */ + case ARCHIVE_ENTRY_ACL_USER: + wcscpy(*wp, L"user"); + break; + case ARCHIVE_ENTRY_ACL_GROUP_OBJ: + wname = NULL; + id = -1; + /* FALL THROUGH */ + case ARCHIVE_ENTRY_ACL_GROUP: + wcscpy(*wp, L"group"); + break; + case ARCHIVE_ENTRY_ACL_MASK: + wcscpy(*wp, L"mask"); + wname = NULL; + id = -1; + break; + case ARCHIVE_ENTRY_ACL_OTHER: + wcscpy(*wp, L"other"); + wname = NULL; + id = -1; + break; + } + *wp += wcslen(*wp); + *(*wp)++ = L':'; + if (wname != NULL) { + wcscpy(*wp, wname); + *wp += wcslen(*wp); + } + *(*wp)++ = L':'; + *(*wp)++ = (perm & 0444) ? L'r' : L'-'; + *(*wp)++ = (perm & 0222) ? L'w' : L'-'; + *(*wp)++ = (perm & 0111) ? L'x' : L'-'; + if (id != -1) { + *(*wp)++ = L':'; + append_id_w(wp, id); + } + **wp = L'\0'; +} + +/* + * Parse a textual ACL. This automatically recognizes and supports + * extensions described above. The 'type' argument is used to + * indicate the type that should be used for any entries not + * explicitly marked as "default:". + */ +int +__archive_entry_acl_parse_w(struct archive_entry *entry, + const wchar_t *text, int default_type) +{ + int type, tag, permset, id; + const wchar_t *start, *end; + const wchar_t *name_start, *name_end; + wchar_t sep; + wchar_t *namebuff; + int namebuff_length; + + name_start = name_end = NULL; + namebuff = NULL; + namebuff_length = 0; + + while (text != NULL && *text != L'\0') { + next_field_w(&text, &start, &end, &sep); + if (sep != L':') + goto fail; + + /* + * Solaris extension: "defaultuser::rwx" is the + * default ACL corresponding to "user::rwx", etc. + */ + if (end-start > 7 && wmemcmp(start, L"default", 7) == 0) { + type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; + start += 7; + } else + type = default_type; + + if (prefix_w(start, end, L"user")) { + next_field_w(&text, &start, &end, &sep); + if (sep != L':') + goto fail; + if (end > start) { + tag = ARCHIVE_ENTRY_ACL_USER; + name_start = start; + name_end = end; + } else + tag = ARCHIVE_ENTRY_ACL_USER_OBJ; + } else if (prefix_w(start, end, L"group")) { + next_field_w(&text, &start, &end, &sep); + if (sep != L':') + goto fail; + if (end > start) { + tag = ARCHIVE_ENTRY_ACL_GROUP; + name_start = start; + name_end = end; + } else + tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; + } else if (prefix_w(start, end, L"other")) { + next_field_w(&text, &start, &end, &sep); + if (sep != L':') + goto fail; + if (end > start) + goto fail; + tag = ARCHIVE_ENTRY_ACL_OTHER; + } else if (prefix_w(start, end, L"mask")) { + next_field_w(&text, &start, &end, &sep); + if (sep != L':') + goto fail; + if (end > start) + goto fail; + tag = ARCHIVE_ENTRY_ACL_MASK; + } else + goto fail; + + next_field_w(&text, &start, &end, &sep); + permset = 0; + while (start < end) { + switch (*start++) { + case 'r': case 'R': + permset |= ARCHIVE_ENTRY_ACL_READ; + break; + case 'w': case 'W': + permset |= ARCHIVE_ENTRY_ACL_WRITE; + break; + case 'x': case 'X': + permset |= ARCHIVE_ENTRY_ACL_EXECUTE; + break; + case '-': + break; + default: + goto fail; + } + } + + /* + * Support star-compatible numeric UID/GID extension. + * This extension adds a ":" followed by the numeric + * ID so that "group:groupname:rwx", for example, + * becomes "group:groupname:rwx:999", where 999 is the + * numeric GID. This extension makes it possible, for + * example, to correctly restore ACLs on a system that + * might have a damaged passwd file or be disconnected + * from a central NIS server. This extension is compatible + * with POSIX.1e draft 17. + */ + if (sep == L':' && (tag == ARCHIVE_ENTRY_ACL_USER || + tag == ARCHIVE_ENTRY_ACL_GROUP)) { + next_field_w(&text, &start, &end, &sep); + + id = 0; + while (start < end && *start >= '0' && *start <= '9') { + if (id > (INT_MAX / 10)) + id = INT_MAX; + else { + id *= 10; + id += *start - '0'; + start++; + } + } + } else + id = -1; /* No id specified. */ + + /* Skip any additional entries. */ + while (sep == L':') { + next_field_w(&text, &start, &end, &sep); + } + + /* Add entry to the internal list. */ + if (name_end == name_start) { + archive_entry_acl_add_entry_w(entry, type, permset, + tag, id, NULL); + } else { + if (namebuff_length <= name_end - name_start) { + if (namebuff != NULL) + free(namebuff); + namebuff_length = name_end - name_start + 256; + namebuff = + malloc(namebuff_length * sizeof(wchar_t)); + if (namebuff == NULL) + goto fail; + } + wmemcpy(namebuff, name_start, name_end - name_start); + namebuff[name_end - name_start] = L'\0'; + archive_entry_acl_add_entry_w(entry, type, + permset, tag, id, namebuff); + } + } + if (namebuff != NULL) + free(namebuff); + return (ARCHIVE_OK); + +fail: + if (namebuff != NULL) + free(namebuff); + return (ARCHIVE_WARN); +} + +/* + * extended attribute handling + */ + +void +archive_entry_xattr_clear(struct archive_entry *entry) +{ + struct ae_xattr *xp; + + while (entry->xattr_head != NULL) { + xp = entry->xattr_head->next; + free(entry->xattr_head->name); + free(entry->xattr_head->value); + free(entry->xattr_head); + entry->xattr_head = xp; + } + + entry->xattr_head = NULL; +} + +void +archive_entry_xattr_add_entry(struct archive_entry *entry, + const char *name, const void *value, size_t size) +{ + struct ae_xattr *xp; + + for (xp = entry->xattr_head; xp != NULL; xp = xp->next) + ; + + if ((xp = malloc(sizeof(struct ae_xattr))) == NULL) + /* XXX Error XXX */ + return; + + xp->name = strdup(name); + if ((xp -> value = malloc(size)) != NULL) { + memcpy(xp -> value, value, size); + xp -> size = size; + } else + xp -> size = 0; + + xp->next = entry->xattr_head; + entry->xattr_head = xp; +} + + +/* + * returns number of the extended attribute entries + */ +int +archive_entry_xattr_count(struct archive_entry *entry) +{ + struct ae_xattr *xp; + int count = 0; + + for (xp = entry->xattr_head; xp != NULL; xp = xp->next) + count++; + + return count; +} + +int +archive_entry_xattr_reset(struct archive_entry * entry) +{ + entry->xattr_p = entry->xattr_head; + + return archive_entry_xattr_count(entry); +} + +int +archive_entry_xattr_next(struct archive_entry * entry, + const char **name, const void **value, size_t *size) +{ + if (entry->xattr_p) { + *name = entry->xattr_p->name; + *value = entry->xattr_p->value; + *size = entry->xattr_p->size; + + entry->xattr_p = entry->xattr_p->next; + + return (ARCHIVE_OK); + } else { + *name = NULL; + *name = NULL; + *size = (size_t)0; + return (ARCHIVE_WARN); + } +} + +/* + * end of xattr handling + */ + +/* + * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated + * to point to just after the separator. *start points to the first + * character of the matched text and *end just after the last + * character of the matched identifier. In particular *end - *start + * is the length of the field body, not including leading or trailing + * whitespace. + */ +static void +next_field_w(const wchar_t **wp, const wchar_t **start, + const wchar_t **end, wchar_t *sep) +{ + /* Skip leading whitespace to find start of field. */ + while (**wp == L' ' || **wp == L'\t' || **wp == L'\n') { + (*wp)++; + } + *start = *wp; + + /* Scan for the separator. */ + while (**wp != L'\0' && **wp != L',' && **wp != L':' && + **wp != L'\n') { + (*wp)++; + } + *sep = **wp; + + /* Trim trailing whitespace to locate end of field. */ + *end = *wp - 1; + while (**end == L' ' || **end == L'\t' || **end == L'\n') { + (*end)--; + } + (*end)++; + + /* Adjust scanner location. */ + if (**wp != L'\0') + (*wp)++; +} + +static int +prefix_w(const wchar_t *start, const wchar_t *end, const wchar_t *test) +{ + if (start == end) + return (0); + + if (*start++ != *test++) + return (0); + + while (start < end && *start++ == *test++) + ; + + if (start < end) + return (0); + + return (1); +} + + +/* + * Following code is modified from UC Berkeley sources, and + * is subject to the following copyright notice. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +static struct flag { + const char *name; + const wchar_t *wname; + unsigned long set; + unsigned long clear; +} flags[] = { + /* Preferred (shorter) names per flag first, all prefixed by "no" */ +#ifdef SF_APPEND + { "nosappnd", L"nosappnd", SF_APPEND, 0 }, + { "nosappend", L"nosappend", SF_APPEND, 0 }, +#endif +#ifdef EXT2_APPEND_FL /* 'a' */ + { "nosappnd", L"nosappnd", EXT2_APPEND_FL, 0 }, + { "nosappend", L"nosappend", EXT2_APPEND_FL, 0 }, +#endif +#ifdef SF_ARCHIVED + { "noarch", L"noarch", SF_ARCHIVED, 0 }, + { "noarchived", L"noarchived", SF_ARCHIVED, 0 }, +#endif +#ifdef SF_IMMUTABLE + { "noschg", L"noschg", SF_IMMUTABLE, 0 }, + { "noschange", L"noschange", SF_IMMUTABLE, 0 }, + { "nosimmutable", L"nosimmutable", SF_IMMUTABLE, 0 }, +#endif +#ifdef EXT2_IMMUTABLE_FL /* 'i' */ + { "noschg", L"noschg", EXT2_IMMUTABLE_FL, 0 }, + { "noschange", L"noschange", EXT2_IMMUTABLE_FL, 0 }, + { "nosimmutable", L"nosimmutable", EXT2_IMMUTABLE_FL, 0 }, +#endif +#ifdef SF_NOUNLINK + { "nosunlnk", L"nosunlnk", SF_NOUNLINK, 0 }, + { "nosunlink", L"nosunlink", SF_NOUNLINK, 0 }, +#endif +#ifdef SF_SNAPSHOT + { "nosnapshot", L"nosnapshot", SF_SNAPSHOT, 0 }, +#endif +#ifdef UF_APPEND + { "nouappnd", L"nouappnd", UF_APPEND, 0 }, + { "nouappend", L"nouappend", UF_APPEND, 0 }, +#endif +#ifdef UF_IMMUTABLE + { "nouchg", L"nouchg", UF_IMMUTABLE, 0 }, + { "nouchange", L"nouchange", UF_IMMUTABLE, 0 }, + { "nouimmutable", L"nouimmutable", UF_IMMUTABLE, 0 }, +#endif +#ifdef UF_NODUMP + { "nodump", L"nodump", 0, UF_NODUMP}, +#endif +#ifdef EXT2_NODUMP_FL /* 'd' */ + { "nodump", L"nodump", 0, EXT2_NODUMP_FL}, +#endif +#ifdef UF_OPAQUE + { "noopaque", L"noopaque", UF_OPAQUE, 0 }, +#endif +#ifdef UF_NOUNLINK + { "nouunlnk", L"nouunlnk", UF_NOUNLINK, 0 }, + { "nouunlink", L"nouunlink", UF_NOUNLINK, 0 }, +#endif +#ifdef EXT2_COMPR_FL /* 'c' */ + { "nocompress", L"nocompress", EXT2_COMPR_FL, 0 }, +#endif + +#ifdef EXT2_NOATIME_FL /* 'A' */ + { "noatime", L"noatime", 0, EXT2_NOATIME_FL}, +#endif + { NULL, NULL, 0, 0 } +}; + +/* + * fflagstostr -- + * Convert file flags to a comma-separated string. If no flags + * are set, return the empty string. + */ +char * +ae_fflagstostr(unsigned long bitset, unsigned long bitclear) +{ + char *string, *dp; + const char *sp; + unsigned long bits; + struct flag *flag; + int length; + + bits = bitset | bitclear; + length = 0; + for (flag = flags; flag->name != NULL; flag++) + if (bits & (flag->set | flag->clear)) { + length += strlen(flag->name) + 1; + bits &= ~(flag->set | flag->clear); + } + + if (length == 0) + return (NULL); + string = malloc(length); + if (string == NULL) + return (NULL); + + dp = string; + for (flag = flags; flag->name != NULL; flag++) { + if (bitset & flag->set || bitclear & flag->clear) { + sp = flag->name + 2; + } else if (bitset & flag->clear || bitclear & flag->set) { + sp = flag->name; + } else + continue; + bitset &= ~(flag->set | flag->clear); + bitclear &= ~(flag->set | flag->clear); + if (dp > string) + *dp++ = ','; + while ((*dp++ = *sp++) != '\0') + ; + dp--; + } + + *dp = '\0'; + return (string); +} + +/* + * wcstofflags -- + * Take string of arguments and return file flags. This + * version works a little differently than strtofflags(3). + * In particular, it always tests every token, skipping any + * unrecognized tokens. It returns a pointer to the first + * unrecognized token, or NULL if every token was recognized. + * This version is also const-correct and does not modify the + * provided string. + */ +const wchar_t * +ae_wcstofflags(const wchar_t *s, unsigned long *setp, unsigned long *clrp) +{ + const wchar_t *start, *end; + struct flag *flag; + unsigned long set, clear; + const wchar_t *failed; + + set = clear = 0; + start = s; + failed = NULL; + /* Find start of first token. */ + while (*start == L'\t' || *start == L' ' || *start == L',') + start++; + while (*start != L'\0') { + /* Locate end of token. */ + end = start; + while (*end != L'\0' && *end != L'\t' && + *end != L' ' && *end != L',') + end++; + for (flag = flags; flag->wname != NULL; flag++) { + if (wmemcmp(start, flag->wname, end - start) == 0) { + /* Matched "noXXXX", so reverse the sense. */ + clear |= flag->set; + set |= flag->clear; + break; + } else if (wmemcmp(start, flag->wname + 2, end - start) + == 0) { + /* Matched "XXXX", so don't reverse. */ + set |= flag->set; + clear |= flag->clear; + break; + } + } + /* Ignore unknown flag names. */ + if (flag->wname == NULL && failed == NULL) + failed = start; + + /* Find start of next token. */ + start = end; + while (*start == L'\t' || *start == L' ' || *start == L',') + start++; + + } + + if (setp) + *setp = set; + if (clrp) + *clrp = clear; + + /* Return location of first failure. */ + return (failed); +} + + +#ifdef TEST +#include <stdio.h> +int +main(int argc, char **argv) +{ + struct archive_entry *entry = archive_entry_new(); + unsigned long set, clear; + const wchar_t *remainder; + + remainder = archive_entry_copy_fflags_text_w(entry, L"nosappnd dump archive,,,,,,,"); + archive_entry_fflags(entry, &set, &clear); + + wprintf(L"set=0x%lX clear=0x%lX remainder='%ls'\n", set, clear, remainder); + + wprintf(L"new flags='%s'\n", archive_entry_fflags_text(entry)); + return (0); +} +#endif diff --git a/lib/libarchive/archive_entry.h b/lib/libarchive/archive_entry.h new file mode 100644 index 0000000..3c3f73d --- /dev/null +++ b/lib/libarchive/archive_entry.h @@ -0,0 +1,251 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef ARCHIVE_ENTRY_H_INCLUDED +#define ARCHIVE_ENTRY_H_INCLUDED + +#include <stddef.h> /* for wchar_t */ +#include <unistd.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Description of an archive entry. + * + * Basically, a "struct stat" with a few text fields added in. + * + * TODO: Add "comment", "charset", and possibly other entries that are + * supported by "pax interchange" format. However, GNU, ustar, cpio, + * and other variants don't support these features, so they're not an + * excruciatingly high priority right now. + * + * TODO: "pax interchange" format allows essentially arbitrary + * key/value attributes to be attached to any entry. Supporting + * such extensions may make this library useful for special + * applications (e.g., a package manager could attach special + * package-management attributes to each entry). + */ +struct archive_entry; + +/* + * Basic object manipulation + */ + +struct archive_entry *archive_entry_clear(struct archive_entry *); +/* The 'clone' function does a deep copy; all of the strings are copied too. */ +struct archive_entry *archive_entry_clone(struct archive_entry *); +void archive_entry_free(struct archive_entry *); +struct archive_entry *archive_entry_new(void); + +/* + * Retrieve fields from an archive_entry. + */ + +time_t archive_entry_atime(struct archive_entry *); +long archive_entry_atime_nsec(struct archive_entry *); +time_t archive_entry_ctime(struct archive_entry *); +long archive_entry_ctime_nsec(struct archive_entry *); +dev_t archive_entry_dev(struct archive_entry *); +void archive_entry_fflags(struct archive_entry *, + unsigned long *set, unsigned long *clear); +const char *archive_entry_fflags_text(struct archive_entry *); +gid_t archive_entry_gid(struct archive_entry *); +const char *archive_entry_gname(struct archive_entry *); +const wchar_t *archive_entry_gname_w(struct archive_entry *); +const char *archive_entry_hardlink(struct archive_entry *); +const wchar_t *archive_entry_hardlink_w(struct archive_entry *); +ino_t archive_entry_ino(struct archive_entry *); +mode_t archive_entry_mode(struct archive_entry *); +time_t archive_entry_mtime(struct archive_entry *); +long archive_entry_mtime_nsec(struct archive_entry *); +const char *archive_entry_pathname(struct archive_entry *); +const wchar_t *archive_entry_pathname_w(struct archive_entry *); +dev_t archive_entry_rdev(struct archive_entry *); +dev_t archive_entry_rdevmajor(struct archive_entry *); +dev_t archive_entry_rdevminor(struct archive_entry *); +int64_t archive_entry_size(struct archive_entry *); +const struct stat *archive_entry_stat(struct archive_entry *); +const char *archive_entry_symlink(struct archive_entry *); +const wchar_t *archive_entry_symlink_w(struct archive_entry *); +uid_t archive_entry_uid(struct archive_entry *); +const char *archive_entry_uname(struct archive_entry *); +const wchar_t *archive_entry_uname_w(struct archive_entry *); + +/* + * Set fields in an archive_entry. + * + * Note that string 'set' functions do not copy the string, only the pointer. + * In contrast, 'copy' functions do copy the object pointed to. + */ + +void archive_entry_copy_stat(struct archive_entry *, const struct stat *); +void archive_entry_set_atime(struct archive_entry *, time_t, long); +void archive_entry_set_ctime(struct archive_entry *, time_t, long); +void archive_entry_set_fflags(struct archive_entry *, + unsigned long set, unsigned long clear); +/* Returns pointer to start of first invalid token, or NULL if none. */ +/* Note that all recognized tokens are processed, regardless. */ +const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *, + const wchar_t *); +void archive_entry_set_gid(struct archive_entry *, gid_t); +void archive_entry_set_gname(struct archive_entry *, const char *); +void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *); +void archive_entry_set_hardlink(struct archive_entry *, const char *); +void archive_entry_copy_hardlink(struct archive_entry *, const char *); +void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *); +void archive_entry_set_link(struct archive_entry *, const char *); +void archive_entry_set_mode(struct archive_entry *, mode_t); +void archive_entry_set_mtime(struct archive_entry *, time_t, long); +void archive_entry_set_pathname(struct archive_entry *, const char *); +void archive_entry_copy_pathname(struct archive_entry *, const char *); +void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *); +void archive_entry_set_rdevmajor(struct archive_entry *, dev_t); +void archive_entry_set_rdevminor(struct archive_entry *, dev_t); +void archive_entry_set_size(struct archive_entry *, int64_t); +void archive_entry_set_symlink(struct archive_entry *, const char *); +void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *); +void archive_entry_set_uid(struct archive_entry *, uid_t); +void archive_entry_set_uname(struct archive_entry *, const char *); +void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *); + +/* + * ACL routines. This used to simply store and return text-format ACL + * strings, but that proved insufficient for a number of reasons: + * = clients need control over uname/uid and gname/gid mappings + * = there are many different ACL text formats + * = would like to be able to read/convert archives containing ACLs + * on platforms that lack ACL libraries + */ + +/* + * Permission bits mimic POSIX.1e. Note that I've not followed POSIX.1e's + * "permset"/"perm" abstract type nonsense. A permset is just a simple + * bitmap, following long-standing Unix tradition. + */ +#define ARCHIVE_ENTRY_ACL_EXECUTE 1 +#define ARCHIVE_ENTRY_ACL_WRITE 2 +#define ARCHIVE_ENTRY_ACL_READ 4 + +/* We need to be able to specify either or both of these. */ +#define ARCHIVE_ENTRY_ACL_TYPE_ACCESS 256 +#define ARCHIVE_ENTRY_ACL_TYPE_DEFAULT 512 + +/* Tag values mimic POSIX.1e */ +#define ARCHIVE_ENTRY_ACL_USER 10001 /* Specified user. */ +#define ARCHIVE_ENTRY_ACL_USER_OBJ 10002 /* User who owns the file. */ +#define ARCHIVE_ENTRY_ACL_GROUP 10003 /* Specified group. */ +#define ARCHIVE_ENTRY_ACL_GROUP_OBJ 10004 /* Group who owns the file. */ +#define ARCHIVE_ENTRY_ACL_MASK 10005 /* Modify group access. */ +#define ARCHIVE_ENTRY_ACL_OTHER 10006 /* Public. */ + +/* + * Set the ACL by clearing it and adding entries one at a time. + * Unlike the POSIX.1e ACL routines, you must specify the type + * (access/default) for each entry. Internally, the ACL data is just + * a soup of entries. API calls here allow you to retrieve just the + * entries of interest. This design (which goes against the spirit of + * POSIX.1e) is useful for handling archive formats that combine + * default and access information in a single ACL list. + */ +void archive_entry_acl_clear(struct archive_entry *); +void archive_entry_acl_add_entry(struct archive_entry *, + int type, int permset, int tag, int qual, const char *name); +void archive_entry_acl_add_entry_w(struct archive_entry *, + int type, int permset, int tag, int qual, const wchar_t *name); + +/* + * To retrieve the ACL, first "reset", then repeatedly ask for the + * "next" entry. The want_type parameter allows you to request only + * access entries or only default entries. + */ +int archive_entry_acl_reset(struct archive_entry *, int want_type); +int archive_entry_acl_next(struct archive_entry *, int want_type, + int *type, int *permset, int *tag, int *qual, const char **name); +int archive_entry_acl_next_w(struct archive_entry *, int want_type, + int *type, int *permset, int *tag, int *qual, + const wchar_t **name); + +/* + * Construct a text-format ACL. The flags argument is a bitmask that + * can include any of the following: + * + * ARCHIVE_ENTRY_ACL_TYPE_ACCESS - Include access entries. + * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT - Include default entries. + * ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID - Include extra numeric ID field in + * each ACL entry. (As used by 'star'.) + * ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT - Include "default:" before each + * default ACL entry. + */ +#define ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID 1024 +#define ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT 2048 +const wchar_t *archive_entry_acl_text_w(struct archive_entry *, int flags); + +/* Return a count of entries matching 'want_type' */ +int archive_entry_acl_count(struct archive_entry *, int want_type); + +/* + * Private ACL parser. This is private because it handles some + * very weird formats that clients should not be messing with. + * Clients should only deal with their platform-native formats. + * Because of the need to support many formats cleanly, new arguments + * are likely to get added on a regular basis. Clients who try to use + * this interface are likely to be surprised when it changes. + * + * You were warned! + */ +int __archive_entry_acl_parse_w(struct archive_entry *, + const wchar_t *, int type); + + +#ifdef __cplusplus +} +#endif + +/* + * extended attributes + */ + +void archive_entry_xattr_clear(struct archive_entry *); +void archive_entry_xattr_add_entry(struct archive_entry *, + const char *name, const void *value, size_t size); + +/* + * To retrieve the xattr list, first "reset", then repeatedly ask for the + * "next" entry. + */ + +int archive_entry_xattr_count(struct archive_entry *); +int archive_entry_xattr_reset(struct archive_entry *); +int archive_entry_xattr_next(struct archive_entry *, + const char **name, const void **value, size_t *); + + +#endif /* !ARCHIVE_ENTRY_H_INCLUDED */ diff --git a/lib/libarchive/archive_platform.h b/lib/libarchive/archive_platform.h new file mode 100644 index 0000000..e49ab5e --- /dev/null +++ b/lib/libarchive/archive_platform.h @@ -0,0 +1,193 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This header is the first thing included in any of the libarchive + * source files. As far as possible, platform-specific issues should + * be dealt with here and not within individual source files. I'm + * actively trying to minimize #if blocks within the main source, + * since they obfuscate the code. + */ + +#ifndef ARCHIVE_PLATFORM_H_INCLUDED +#define ARCHIVE_PLATFORM_H_INCLUDED + +#if HAVE_CONFIG_H +#include "../config.h" +#else + +/* A default configuration for FreeBSD, used if there is no config.h. */ +#ifdef __FreeBSD__ +#if __FreeBSD__ > 4 +#define HAVE_ACL_CREATE_ENTRY 1 +#define HAVE_ACL_INIT 1 +#define HAVE_ACL_SET_FD 1 +#define HAVE_ACL_SET_FD_NP 1 +#define HAVE_ACL_SET_FILE 1 +#define HAVE_ACL_USER 1 +#endif +#define HAVE_BZLIB_H 1 +#define HAVE_CHFLAGS 1 +#define HAVE_DECL_STRERROR_R 1 +#define HAVE_EFTYPE 1 +#define HAVE_EILSEQ 1 +#define HAVE_ERRNO_H 1 +#define HAVE_FCHDIR 1 +#define HAVE_FCHFLAGS 1 +#define HAVE_FCHMOD 1 +#define HAVE_FCHOWN 1 +#define HAVE_FCNTL_H 1 +#define HAVE_FUTIMES 1 +#define HAVE_INTTYPES_H 1 +#define HAVE_LCHFLAGS 1 +#define HAVE_LCHMOD 1 +#define HAVE_LCHOWN 1 +#define HAVE_LIMITS_H 1 +#define HAVE_LUTIMES 1 +#define HAVE_MALLOC 1 +#define HAVE_MEMMOVE 1 +#define HAVE_MEMORY_H 1 +#define HAVE_MEMSET 1 +#define HAVE_MKDIR 1 +#define HAVE_MKFIFO 1 +#define HAVE_PATHS_H 1 +#define HAVE_STDINT_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRCHR 1 +#define HAVE_STRDUP 1 +#define HAVE_STRERROR 1 +#define HAVE_STRERROR_R 1 +#define HAVE_STRINGS_H 1 +#define HAVE_STRING_H 1 +#define HAVE_STRRCHR 1 +#define HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC 1 +#define HAVE_STRUCT_STAT_ST_RDEV 1 +#define HAVE_SYS_ACL_H 1 +#define HAVE_SYS_IOCTL_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TIME_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_WAIT_H 1 +#define HAVE_TIMEGM 1 +#define HAVE_UNISTD_H 1 +#define HAVE_WCHAR_H 1 +#define HAVE_ZLIB_H 1 +#define STDC_HEADERS 1 +#define TIME_WITH_SYS_TIME 1 +#else /* !__FreeBSD__ */ +/* Warn if the library hasn't been (automatically or manually) configured. */ +#error Oops: No config.h and no built-in configuration in archive_platform.h. +#endif /* !__FreeBSD__ */ + +#endif /* !HAVE_CONFIG_H */ + +/* No non-FreeBSD platform will have __FBSDID, so just define it here. */ +#ifdef __FreeBSD__ +#include <sys/cdefs.h> /* For __FBSDID */ +#else +#define __FBSDID(a) /* null */ +#endif + +#if HAVE_INTTYPES_H +#include <inttypes.h> +#endif + +/* FreeBSD 4 and earlier lack intmax_t/uintmax_t */ +#if defined(__FreeBSD__) && __FreeBSD__ < 5 +#define intmax_t int64_t +#define uintmax_t uint64_t +#endif + +/* + * If this platform has <sys/acl.h>, acl_create(), acl_init(), + * acl_set_file(), and ACL_USER, we assume it has the rest of the + * POSIX.1e draft functions used in archive_read_extract.c. + */ +#if HAVE_SYS_ACL_H && HAVE_ACL_CREATE_ENTRY && HAVE_ACL_INIT && HAVE_ACL_SET_FILE && HAVE_ACL_USER +#define HAVE_POSIX_ACL 1 +#endif + +/* + * If we can't restore metadata using a file descriptor, then + * for compatibility's sake, close files before trying to restore metadata. + */ +#if defined(HAVE_FCHMOD) || defined(HAVE_FUTIMES) || defined(HAVE_ACL_SET_FD) || defined(HAVE_ACL_SET_FD_NP) || defined(HAVE_FCHOWN) +#define CAN_RESTORE_METADATA_FD +#endif + +/* Set up defaults for internal error codes. */ +#ifndef ARCHIVE_ERRNO_FILE_FORMAT +#if HAVE_EFTYPE +#define ARCHIVE_ERRNO_FILE_FORMAT EFTYPE +#else +#if HAVE_EILSEQ +#define ARCHIVE_ERRNO_FILE_FORMAT EILSEQ +#else +#define ARCHIVE_ERRNO_FILE_FORMAT EINVAL +#endif +#endif +#endif + +#ifndef ARCHIVE_ERRNO_PROGRAMMER +#define ARCHIVE_ERRNO_PROGRAMMER EINVAL +#endif + +#ifndef ARCHIVE_ERRNO_MISC +#define ARCHIVE_ERRNO_MISC (-1) +#endif + +/* Select the best way to set/get hi-res timestamps. */ +#if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC +/* FreeBSD uses "timespec" members. */ +#define ARCHIVE_STAT_ATIME_NANOS(st) (st)->st_atimespec.tv_nsec +#define ARCHIVE_STAT_CTIME_NANOS(st) (st)->st_ctimespec.tv_nsec +#define ARCHIVE_STAT_MTIME_NANOS(st) (st)->st_mtimespec.tv_nsec +#define ARCHIVE_STAT_SET_ATIME_NANOS(st, n) (st)->st_atimespec.tv_nsec = (n) +#define ARCHIVE_STAT_SET_CTIME_NANOS(st, n) (st)->st_ctimespec.tv_nsec = (n) +#define ARCHIVE_STAT_SET_MTIME_NANOS(st, n) (st)->st_mtimespec.tv_nsec = (n) +#else +#if HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC +/* Linux uses "tim" members. */ +#define ARCHIVE_STAT_ATIME_NANOS(pstat) (pstat)->st_atim.tv_nsec +#define ARCHIVE_STAT_CTIME_NANOS(pstat) (pstat)->st_ctim.tv_nsec +#define ARCHIVE_STAT_MTIME_NANOS(pstat) (pstat)->st_mtim.tv_nsec +#define ARCHIVE_STAT_SET_ATIME_NANOS(st, n) (st)->st_atim.tv_nsec = (n) +#define ARCHIVE_STAT_SET_CTIME_NANOS(st, n) (st)->st_ctim.tv_nsec = (n) +#define ARCHIVE_STAT_SET_MTIME_NANOS(st, n) (st)->st_mtim.tv_nsec = (n) +#else +/* If we can't find a better way, just use stubs. */ +#define ARCHIVE_STAT_ATIME_NANOS(pstat) 0 +#define ARCHIVE_STAT_CTIME_NANOS(pstat) 0 +#define ARCHIVE_STAT_MTIME_NANOS(pstat) 0 +#define ARCHIVE_STAT_SET_ATIME_NANOS(st, n) +#define ARCHIVE_STAT_SET_CTIME_NANOS(st, n) +#define ARCHIVE_STAT_SET_MTIME_NANOS(st, n) +#endif +#endif + +#endif /* !ARCHIVE_H_INCLUDED */ diff --git a/lib/libarchive/archive_private.h b/lib/libarchive/archive_private.h new file mode 100644 index 0000000..6b93820 --- /dev/null +++ b/lib/libarchive/archive_private.h @@ -0,0 +1,244 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef ARCHIVE_PRIVATE_H_INCLUDED +#define ARCHIVE_PRIVATE_H_INCLUDED + +#include "archive.h" +#include "archive_string.h" + +#define ARCHIVE_WRITE_MAGIC (0xb0c5c0deU) +#define ARCHIVE_READ_MAGIC (0xdeb0c5U) + +struct archive { + /* + * The magic/state values are used to sanity-check the + * client's usage. If an API function is called at a + * rediculous time, or the client passes us an invalid + * pointer, these values allow me to catch that. + */ + unsigned magic; + unsigned state; + + struct archive_entry *entry; + uid_t user_uid; /* UID of current user. */ + + /* Dev/ino of the archive being read/written. */ + dev_t skip_file_dev; + ino_t skip_file_ino; + + /* Utility: Pointer to a block of nulls. */ + const unsigned char *nulls; + size_t null_length; + + /* + * Used by archive_read_data() to track blocks and copy + * data to client buffers, filling gaps with zero bytes. + */ + const char *read_data_block; + off_t read_data_offset; + off_t read_data_output_offset; + size_t read_data_remaining; + + /* Callbacks to open/read/write/close archive stream. */ + archive_open_callback *client_opener; + archive_read_callback *client_reader; + archive_write_callback *client_writer; + archive_close_callback *client_closer; + void *client_data; + + /* + * Blocking information. Note that bytes_in_last_block is + * misleadingly named; I should find a better name. These + * control the final output from all compressors, including + * compression_none. + */ + int bytes_per_block; + int bytes_in_last_block; + + /* + * These control whether data within a gzip/bzip2 compressed + * stream gets padded or not. If pad_uncompressed is set, + * the data will be padded to a full block before being + * compressed. The pad_uncompressed_byte determines the value + * that will be used for padding. Note that these have no + * effect on compression "none." + */ + int pad_uncompressed; + int pad_uncompressed_byte; /* TODO: Support this. */ + + /* Position in UNCOMPRESSED data stream. */ + off_t file_position; + /* Position in COMPRESSED data stream. */ + off_t raw_position; + /* File offset of beginning of most recently-read header. */ + off_t header_position; + + /* + * Detection functions for decompression: bid functions are + * given a block of data from the beginning of the stream and + * can bid on whether or not they support the data stream. + * General guideline: bid the number of bits that you actually + * test, e.g., 16 if you test a 2-byte magic value. The + * highest bidder will have their init function invoked, which + * can set up pointers to specific handlers. + * + * On write, the client just invokes an archive_write_set function + * which sets up the data here directly. + */ + int compression_code; /* Currently active compression. */ + const char *compression_name; + struct { + int (*bid)(const void *buff, size_t); + int (*init)(struct archive *, const void *buff, size_t); + } decompressors[4]; + /* Read/write data stream (with compression). */ + void *compression_data; /* Data for (de)compressor. */ + int (*compression_init)(struct archive *); /* Initialize. */ + int (*compression_finish)(struct archive *); + int (*compression_write)(struct archive *, const void *, size_t); + /* + * Read uses a peek/consume I/O model: the decompression code + * returns a pointer to the requested block and advances the + * file position only when requested by a consume call. This + * reduces copying and also simplifies look-ahead for format + * detection. + */ + ssize_t (*compression_read_ahead)(struct archive *, + const void **, size_t request); + ssize_t (*compression_read_consume)(struct archive *, size_t); + + /* + * Format detection is mostly the same as compression + * detection, with two significant differences: The bidders + * use the read_ahead calls above to examine the stream rather + * than having the supervisor hand them a block of data to + * examine, and the auction is repeated for every header. + * Winning bidders should set the archive_format and + * archive_format_name appropriately. Bid routines should + * check archive_format and decline to bid if the format of + * the last header was incompatible. + * + * Again, write support is considerably simpler because there's + * no need for an auction. + */ + int archive_format; + const char *archive_format_name; + + struct archive_format_descriptor { + int (*bid)(struct archive *); + int (*read_header)(struct archive *, struct archive_entry *); + int (*read_data)(struct archive *, const void **, size_t *, off_t *); + int (*read_data_skip)(struct archive *); + int (*cleanup)(struct archive *); + void *format_data; /* Format-specific data for readers. */ + } formats[8]; + struct archive_format_descriptor *format; /* Active format. */ + + /* + * Storage for format-specific data. Note that there can be + * multiple format readers active at one time, so we need to + * allow for multiple format readers to have their data + * available. The pformat_data slot here is the solution: on + * read, it is gauranteed to always point to a void* variable + * that the format can use. + */ + void **pformat_data; /* Pointer to current format_data. */ + void *format_data; /* Used by writers. */ + + /* + * Pointers to format-specific functions for writing. They're + * initialized by archive_write_set_format_XXX() calls. + */ + int (*format_init)(struct archive *); /* Only used on write. */ + int (*format_finish)(struct archive *); + int (*format_finish_entry)(struct archive *); + int (*format_write_header)(struct archive *, + struct archive_entry *); + int (*format_write_data)(struct archive *, + const void *buff, size_t); + + /* + * Various information needed by archive_extract. + */ + struct extract *extract; + void (*extract_progress)(void *); + void *extract_progress_user_data; + void (*cleanup_archive_extract)(struct archive *); + + int archive_error_number; + const char *error; + struct archive_string error_string; +}; + + +#define ARCHIVE_STATE_ANY 0xFFFFU +#define ARCHIVE_STATE_NEW 1U +#define ARCHIVE_STATE_HEADER 2U +#define ARCHIVE_STATE_DATA 4U +#define ARCHIVE_STATE_EOF 8U +#define ARCHIVE_STATE_CLOSED 0x10U +#define ARCHIVE_STATE_FATAL 0x8000U + +/* Check magic value and state; exit if it isn't valid. */ +void __archive_check_magic(struct archive *, unsigned magic, + unsigned state, const char *func); + + +int __archive_read_register_format(struct archive *a, + void *format_data, + int (*bid)(struct archive *), + int (*read_header)(struct archive *, struct archive_entry *), + int (*read_data)(struct archive *, const void **, size_t *, off_t *), + int (*read_data_skip)(struct archive *), + int (*cleanup)(struct archive *)); + +int __archive_read_register_compression(struct archive *a, + int (*bid)(const void *, size_t), + int (*init)(struct archive *, const void *, size_t)); + +void __archive_errx(int retvalue, const char *msg); + +#define err_combine(a,b) ((a) < (b) ? (a) : (b)) + + +/* + * Utility function to format a USTAR header into a buffer. If + * "strict" is set, this tries to create the absolutely most portable + * version of a ustar header. If "strict" is set to 0, then it will + * relax certain requirements. + * + * Generally, format-specific declarations don't belong in this + * header; this is a rare example of a function that is shared by + * two very similar formats (ustar and pax). + */ +int +__archive_write_format_header_ustar(struct archive *, char buff[512], + struct archive_entry *, int tartype, int strict); + +#endif diff --git a/lib/libarchive/archive_read.3 b/lib/libarchive/archive_read.3 new file mode 100644 index 0000000..1575c7b --- /dev/null +++ b/lib/libarchive/archive_read.3 @@ -0,0 +1,493 @@ +.\" Copyright (c) 2003-2005 Tim Kientzle +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 8, 2005 +.Dt archive_read 3 +.Os +.Sh NAME +.Nm archive_read_new , +.Nm archive_read_set_bytes_per_block , +.Nm archive_read_support_compression_all , +.Nm archive_read_support_compression_bzip2 , +.Nm archive_read_support_compression_compress , +.Nm archive_read_support_compression_gzip , +.Nm archive_read_support_compression_none , +.Nm archive_read_support_format_all , +.Nm archive_read_support_format_cpio , +.Nm archive_read_support_format_iso9660 , +.Nm archive_read_support_format_tar , +.Nm archive_read_support_format_zip , +.Nm archive_read_open , +.Nm archive_read_open_fd , +.Nm archive_read_open_file , +.Nm archive_read_next_header , +.Nm archive_read_data , +.Nm archive_read_data_block , +.Nm archive_read_data_skip , +.Nm archive_read_data_into_buffer , +.Nm archive_read_data_into_fd , +.Nm archive_read_extract , +.Nm archive_read_extract_set_progress_callback , +.Nm archive_read_close , +.Nm archive_read_finish +.Nd functions for reading streaming archives +.Sh SYNOPSIS +.In archive.h +.Ft struct archive * +.Fn archive_read_new "void" +.Ft int +.Fn archive_read_set_bytes_per_block "struct archive *" "int" +.Ft int +.Fn archive_read_support_compression_all "struct archive *" +.Ft int +.Fn archive_read_support_compression_bzip2 "struct archive *" +.Ft int +.Fn archive_read_support_compression_compress "struct archive *" +.Ft int +.Fn archive_read_support_compression_gzip "struct archive *" +.Ft int +.Fn archive_read_support_compression_none "struct archive *" +.Ft int +.Fn archive_read_support_format_all "struct archive *" +.Ft int +.Fn archive_read_support_format_cpio "struct archive *" +.Ft int +.Fn archive_read_support_format_iso9660 "struct archive *" +.Ft int +.Fn archive_read_support_format_tar "struct archive *" +.Ft int +.Fn archive_read_support_format_zip "struct archive *" +.Ft int +.Fn archive_read_open "struct archive *" "void *client_data" "archive_open_callback *" "archive_read_callback *" "archive_close_callback *" +.Ft int +.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size" +.Ft int +.Fn archive_read_open_file "struct archive *" "const char *filename" "size_t block_size" +.Ft int +.Fn archive_read_next_header "struct archive *" "struct archive_entry **" +.Ft ssize_t +.Fn archive_read_data "struct archive *" "void *buff" "size_t len" +.Ft int +.Fn archive_read_data_block "struct archive *" "const void **buff" "size_t *len" "off_t *offset" +.Ft int +.Fn archive_read_data_skip "struct archive *" +.Ft int +.Fn archive_read_data_into_buffer "struct archive *" "void *" "ssize_t len" +.Ft int +.Fn archive_read_data_into_fd "struct archive *" "int fd" +.Ft int +.Fn archive_read_extract "struct archive *" "struct archive_entry *" "int flags" +.Ft void +.Fn archive_read_extract_set_progress_callback "struct archive *" "void (*func)(void *)" "void *user_data" +.Ft int +.Fn archive_read_close "struct archive *" +.Ft void +.Fn archive_read_finish "struct archive *" +.Sh DESCRIPTION +These functions provide a complete API for reading streaming archives. +The general process is to first create the +.Tn struct archive +object, set options, initialize the reader, iterate over the archive +headers and associated data, then close the archive and release all +resources. +The following summary describes the functions in approximately the +order they would be used: +.Bl -tag -compact -width indent +.It Fn archive_read_new +Allocates and initializes a +.Tn struct archive +object suitable for reading from an archive. +.It Fn archive_read_set_bytes_per_block +Sets the block size used for reading the archive data. +This controls the size that will be used when invoking the read +callback function. +The default is 20 records or 10240 bytes for tar formats. +.It Fn archive_read_support_compression_all , Fn archive_read_support_compression_bzip2 , Fn archive_read_support_compression_compress , Fn archive_read_support_compression_gzip , Fn archive_read_support_compression_none +Enables auto-detection code and decompression support for the +specified compression. +Note that +.Dq none +is always enabled by default. +For convenience, +.Fn archive_read_support_compression_all +enables all available decompression code. +.It Fn archive_read_support_format_all , Fn archive_read_support_format_cpio , Fn archive_read_support_format_iso9660 , Fn archive_read_support_format_tar, Fn archive_read_support_format_zip +Enables support---including auto-detection code---for the +specified archive format. +For example, +.Fn archive_read_support_format_tar +enables support for a variety of standard tar formats, old-style tar, +ustar, pax interchange format, and many common variants. +For convenience, +.Fn archive_read_support_format_all +enables support for all available formats. +Note that there is no default. +.It Fn archive_read_open +Freeze the settings, open the archive, and prepare for reading entries. +This is the most generic version of this call, which accepts +three callback functions. +Most clients will want to use +.Fn archive_read_open_file +or +.Fn archive_read_open_fd +instead. +The library invokes the client-provided functions to obtain +raw bytes from the archive. +Note: The API permits a decompression method to fork and invoke the +callbacks from another process. +Although none of the current decompression methods use this technique, +future decompression methods may utilize this technique. +If the decompressor forks, it will ensure that the open and close +callbacks are invoked within the same process as the read callback. +In particular, clients should not attempt to use shared variables to +communicate between the open/read/close callbacks and the mainline code. +.It Fn archive_read_open_fd +Like +.Fn archive_read_open , +except that it accepts a file descriptor and block size rather than +a trio of function pointers. +Note that the file descriptor will not be automatically closed at +end-of-archive. +.It Fn archive_read_open_file +Like +.Fn archive_read_open , +except that it accepts a simple filename and a block size. +A NULL filename represents standard input. +.It Fn archive_read_next_header +Read the header for the next entry and return a pointer to +a +.Tn struct archive_entry . +.It Fn archive_read_data +Read data associated with the header just read. +Internally, this is a convenience function that calls +.Fn archive_read_data_block +and fills any gaps with nulls so that callers see a single +continuous stream of data. +.It Fn archive_read_data_block +Return the next available block of data for this entry. +Unlike +.Fn archive_read_data , +the +.Fn archive_read_data_block +function avoids copying data and allows you to correctly handle +sparse files, as supported by some archive formats. +The library guarantees that offsets will increase and that blocks +will not overlap. +Note that the blocks returned from this function can be much larger +than the block size read from disk, due to compression +and internal buffer optimizations. +.It Fn archive_read_data_skip +A convenience function that repeatedly calls +.Fn archive_read_data_block +to skip all of the data for this archive entry. +.It Fn archive_read_data_into_buffer +A convenience function that repeatedly calls +.Fn archive_read_data_block +to copy the entire entry into the client-supplied buffer. +Note that the client is responsible for sizing the buffer appropriately. +.It Fn archive_read_data_into_fd +A convenience function that repeatedly calls +.Fn archive_read_data_block +to copy the entire entry to the provided file descriptor. +.It Fn archive_read_extract +A convenience function that recreates the specified object on +disk and reads the entry data into that object. +The filename, permissions, and other critical information +are taken from the provided +.Va archive_entry +object. +The +.Va flags +argument modifies how the object is recreated. +It consists of a bitwise OR of one or more of the following values: +.Bl -tag -compact -width "indent" +.It Cm ARCHIVE_EXTRACT_OWNER +The user and group IDs should be set on the restored file. +By default, the user and group IDs are not restored. +.It Cm ARCHIVE_EXTRACT_PERM +The permissions (mode bits) should be restored for all objects. +By default, permissions are only restored for regular files. +.It Cm ARCHIVE_EXTRACT_TIME +The timestamps (mtime, ctime, and atime) should be restored. +By default, they are ignored. +Note that restoring of atime is not currently supported. +.It Cm ARCHIVE_EXTRACT_NO_OVERWRITE +Existing files on disk will not be overwritten. +By default, existing regular files are truncated and overwritten; +existing directories will have their permissions updated; +other pre-existing objects are unlinked and recreated from scratch. +.It Cm ARCHIVE_EXTRACT_UNLINK +Existing files on disk will be unlinked and recreated from scratch. +By default, existing files are truncated and rewritten, but +the file is not recreated. +In particular, the default behavior does not break existing hard links. +.It Cm ARCHIVE_EXTRACT_ACL +Attempt to restore ACLs. +By default, extended ACLs are ignored. +.It Cm ARCHIVE_EXTRACT_FFLAGS +Attempt to restore extended file flags. +By default, file flags are ignored. +.El +Note that not all attributes are set immediately; +some attributes are cached in memory and written to disk only +when the archive is closed. +(For example, read-only directories are initially created +writable so that files within those directories can be +restored. +The final permissions are set when the archive is closed.) +.It Fn archive_read_extract_set_progress_callback +Sets a pointer to a user-defined callback that can be used +for updating progress displays during extraction. +The progress function will be invoked during the extraction of large +regular files. +The progress function will be invoked with the pointer provided to this call. +Generally, the data pointed to should include a reference to the archive +object and the archive_entry object so that various statistics +can be retrieved for the progress display. +.It Fn archive_read_close +Complete the archive and invoke the close callback. +.It Fn archive_read_finish +Invokes +.Fn archive_read_close +if it was not invoked manually, then release all resources. +.El +.Pp +Note that the library determines most of the relevant information about +the archive by inspection. +In particular, it automatically detects +.Xr gzip 1 +or +.Xr bzip2 1 +compression and transparently performs the appropriate decompression. +It also automatically detects the archive format. +.Pp +A complete description of the +.Tn struct archive +and +.Tn struct archive_entry +objects can be found in the overview manual page for +.Xr libarchive 3 . +.Sh CLIENT CALLBACKS +The callback functions must match the following prototypes: +.Bl -item -offset indent +.It +.Ft typedef ssize_t +.Fn archive_read_callback "struct archive *" "void *client_data" "const void **buffer" +.It +.Ft typedef int +.Fn archive_open_callback "struct archive *" "void *client_data" +.It +.Ft typedef int +.Fn archive_close_callback "struct archive *" "void *client_data" +.El +.Pp +The open callback is invoked by +.Fn archive_open . +It should return +.Cm ARCHIVE_OK +if the underlying file or data source is successfully +opened. +If the open fails, it should call +.Fn archive_set_error +to register an error code and message and return +.Cm ARCHIVE_FATAL . +.Pp +The read callback is invoked whenever the library +requires raw bytes from the archive. +The read callback should read data into a buffer, +set the +.Li const void **buffer +argument to point to the available data, and +return a count of the number of bytes available. +The library will invoke the read callback again +only after it has consumed this data. +The library imposes no constraints on the size +of the data blocks returned. +On end-of-file, the read callback should +return zero. +On error, the read callback should invoke +.Fn archive_set_error +to register an error code and message and +return -1. +.Pp +The close callback is invoked by archive_close when +the archive processing is complete. +The callback should return +.Cm ARCHIVE_OK +on success. +On failure, the callback should invoke +.Fn archive_set_error +to register an error code and message and +return +.Cm ARCHIVE_FATAL. +.Sh EXAMPLE +The following illustrates basic usage of the library. +In this example, +the callback functions are simply wrappers around the standard +.Xr open 2 , +.Xr read 2 , +and +.Xr close 2 +system calls. +.Bd -literal -offset indent +void +list_archive(const char *name) +{ + struct mydata *mydata; + struct archive *a; + struct archive_entry *entry; + + mydata = malloc(sizeof(struct mydata)); + a = archive_read_new(); + mydata->name = name; + archive_read_support_compression_all(a); + archive_read_support_format_all(a); + archive_read_open(a, mydata, myopen, myread, myclose); + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + printf("%s\\n",archive_entry_pathname(entry)); + archive_read_data_skip(a); + } + archive_read_finish(a); + free(mydata); +} + +ssize_t +myread(struct archive *a, void *client_data, const void **buff) +{ + struct mydata *mydata = client_data; + + *buff = mydata->buff; + return (read(mydata->fd, mydata->buff, 10240)); +} + +int +myopen(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + + mydata->fd = open(mydata->name, O_RDONLY); + return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL); +} + +int +myclose(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + + if (mydata->fd > 0) + close(mydata->fd); + return (ARCHIVE_OK); +} +.Ed +.Sh RETURN VALUES +Most functions return zero on success, non-zero on error. +The possible return codes include: +.Cm ARCHIVE_OK +(the operation succeeded), +.Cm ARCHIVE_WARN +(the operation succeeded but a non-critical error was encountered), +.Cm ARCHIVE_EOF +(end-of-archive was encountered), +.Cm ARCHIVE_RETRY +(the operation failed but can be retried), +and +.Cm ARCHIVE_FATAL +(there was a fatal error; the archive should be closed immediately). +Detailed error codes and textual descriptions are available from the +.Fn archive_errno +and +.Fn archive_error_string +functions. +.Pp +.Fn archive_read_new +returns a pointer to a freshly allocated +.Tn struct archive +object. +It returns +.Dv NULL +on error. +.Pp +.Fn archive_read_data +returns a count of bytes actually read or zero at the end of the entry. +On error, a value of +.Cm ARCHIVE_FATAL , +.Cm ARCHIVE_WARN , +or +.Cm ARCHIVE_RETRY +is returned and an error code and textual description can be retrieved from the +.Fn archive_errno +and +.Fn archive_error_string +functions. +.Pp +The library expects the client callbacks to behave similarly. +If there is an error, you can use +.Fn archive_set_error +to set an appropriate error code and description, +then return one of the non-zero values above. +(Note that the value eventually returned to the client may +not be the same; many errors that are not critical at the level +of basic I/O can prevent the archive from being properly read, +thus most I/O errors eventually cause +.Cm ARCHIVE_FATAL +to be returned.) +.\" .Sh ERRORS +.Sh SEE ALSO +.Xr tar 1 , +.Xr archive 3 , +.Xr archive_util 3 , +.Xr tar 5 +.Sh HISTORY +The +.Nm libarchive +library first appeared in +.Fx 5.3 . +.Sh AUTHORS +.An -nosplit +The +.Nm libarchive +library was written by +.An Tim Kientzle Aq kientzle@acm.org . +.Sh BUGS +Directories are actually extracted in two distinct phases. +Directories are created during +.Fn archive_read_extract , +but final permissions are not set until +.Fn archive_read_close . +This separation is necessary to correctly handle borderline +cases such as a non-writable directory containing +files, but can cause unexpected results. +In particular, directory permissions are not fully +restored until the archive is closed. +If you use +.Xr chdir 2 +to change the current directory between calls to +.Fn archive_read_extract +or before calling +.Fn archive_read_close , +you may confuse the permission-setting logic with +the result that directory permissions are restored +incorrectly. diff --git a/lib/libarchive/archive_read.c b/lib/libarchive/archive_read.c new file mode 100644 index 0000000..c6e47e1 --- /dev/null +++ b/lib/libarchive/archive_read.c @@ -0,0 +1,601 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file contains the "essential" portions of the read API, that + * is, stuff that will probably always be used by any client that + * actually needs to read an archive. Optional pieces have been, as + * far as possible, separated out into separate files to avoid + * needlessly bloating statically-linked clients. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +static int choose_decompressor(struct archive *, const void*, size_t); +static int choose_format(struct archive *); + +/* + * Allocate, initialize and return a struct archive object. + */ +struct archive * +archive_read_new(void) +{ + struct archive *a; + unsigned char *nulls; + + a = malloc(sizeof(*a)); + if (a == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate archive object"); + return (NULL); + } + memset(a, 0, sizeof(*a)); + + a->user_uid = geteuid(); + a->magic = ARCHIVE_READ_MAGIC; + a->bytes_per_block = ARCHIVE_DEFAULT_BYTES_PER_BLOCK; + + a->null_length = 1024; + nulls = malloc(a->null_length); + if (nulls == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate archive object 'nulls' element"); + free(a); + return (NULL); + } + memset(nulls, 0, a->null_length); + a->nulls = nulls; + + a->state = ARCHIVE_STATE_NEW; + a->entry = archive_entry_new(); + + /* We always support uncompressed archives. */ + archive_read_support_compression_none((struct archive*)a); + + return (a); +} + +/* + * Set the block size. + */ +/* +int +archive_read_set_bytes_per_block(struct archive *a, int bytes_per_block) +{ + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_set_bytes_per_block"); + if (bytes_per_block < 1) + bytes_per_block = 1; + a->bytes_per_block = bytes_per_block; + return (0); +} +*/ + +/* + * Open the archive + */ +int +archive_read_open(struct archive *a, void *client_data, + archive_open_callback *client_opener, archive_read_callback *client_reader, + archive_close_callback *client_closer) +{ + const void *buffer; + ssize_t bytes_read; + int high_bidder; + int e; + + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_open"); + + if (client_reader == NULL) + __archive_errx(1, + "No reader function provided to archive_read_open"); + + /* + * Set these NULL initially. If the open or initial read fails, + * we'll leave them NULL to indicate that the file is invalid. + * (In particular, this helps ensure that the closer doesn't + * get called more than once.) + */ + a->client_opener = NULL; + a->client_reader = NULL; + a->client_closer = NULL; + a->client_data = NULL; + + /* Open data source. */ + if (client_opener != NULL) { + e =(client_opener)(a, client_data); + if (e != 0) { + /* If the open failed, call the closer to clean up. */ + if (client_closer) + (client_closer)(a, client_data); + return (e); + } + } + + /* Read first block now for format detection. */ + bytes_read = (client_reader)(a, client_data, &buffer); + + if (bytes_read < 0) { + /* If the first read fails, close before returning error. */ + if (client_closer) + (client_closer)(a, client_data); + /* client_reader should have already set error information. */ + return (ARCHIVE_FATAL); + } + + /* An empty archive is a serious error. */ + if (bytes_read == 0) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Empty input file"); + /* Close the empty file. */ + if (client_closer) + (client_closer)(a, client_data); + return (ARCHIVE_FATAL); + } + + /* Now that the client callbacks have worked, remember them. */ + a->client_opener = client_opener; /* Do we need to remember this? */ + a->client_reader = client_reader; + a->client_closer = client_closer; + a->client_data = client_data; + + /* Select a decompression routine. */ + high_bidder = choose_decompressor(a, buffer, bytes_read); + if (high_bidder < 0) + return (ARCHIVE_FATAL); + + /* Initialize decompression routine with the first block of data. */ + e = (a->decompressors[high_bidder].init)(a, buffer, bytes_read); + + if (e == ARCHIVE_OK) + a->state = ARCHIVE_STATE_HEADER; + + return (e); +} + +/* + * Allow each registered decompression routine to bid on whether it + * wants to handle this stream. Return index of winning bidder. + */ +static int +choose_decompressor(struct archive *a, const void *buffer, size_t bytes_read) +{ + int decompression_slots, i, bid, best_bid, best_bid_slot; + + decompression_slots = sizeof(a->decompressors) / + sizeof(a->decompressors[0]); + + best_bid = -1; + best_bid_slot = -1; + + for (i = 0; i < decompression_slots; i++) { + if (a->decompressors[i].bid) { + bid = (a->decompressors[i].bid)(buffer, bytes_read); + if ((bid > best_bid) || (best_bid_slot < 0)) { + best_bid = bid; + best_bid_slot = i; + } + } + } + + /* + * There were no bidders; this is a serious programmer error + * and demands a quick and definitive abort. + */ + if (best_bid_slot < 0) + __archive_errx(1, "No decompressors were registered; you " + "must call at least one " + "archive_read_support_compression_XXX function in order " + "to successfully read an archive."); + + /* + * There were bidders, but no non-zero bids; this means we can't + * support this stream. + */ + if (best_bid < 1) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Unrecognized archive format"); + return (ARCHIVE_FATAL); + } + + return (best_bid_slot); +} + +/* + * Read header of next entry. + */ +int +archive_read_next_header(struct archive *a, struct archive_entry **entryp) +{ + struct archive_entry *entry; + int slot, ret; + + __archive_check_magic(a, ARCHIVE_READ_MAGIC, + ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_read_next_header"); + + *entryp = NULL; + entry = a->entry; + archive_entry_clear(entry); + archive_string_empty(&a->error_string); + + /* + * If client didn't consume entire data, skip any remainder + * (This is especially important for GNU incremental directories.) + */ + if (a->state == ARCHIVE_STATE_DATA) { + ret = archive_read_data_skip(a); + if (ret == ARCHIVE_EOF) { + archive_set_error(a, EIO, "Premature end-of-file."); + a->state = ARCHIVE_STATE_FATAL; + return (ARCHIVE_FATAL); + } + if (ret != ARCHIVE_OK) + return (ret); + } + + /* Record start-of-header. */ + a->header_position = a->file_position; + + slot = choose_format(a); + if (slot < 0) { + a->state = ARCHIVE_STATE_FATAL; + return (ARCHIVE_FATAL); + } + a->format = &(a->formats[slot]); + a->pformat_data = &(a->format->format_data); + ret = (a->format->read_header)(a, entry); + + /* + * EOF and FATAL are persistent at this layer. By + * modifying the state, we gaurantee that future calls to + * read a header or read data will fail. + */ + switch (ret) { + case ARCHIVE_EOF: + a->state = ARCHIVE_STATE_EOF; + break; + case ARCHIVE_OK: + a->state = ARCHIVE_STATE_DATA; + break; + case ARCHIVE_WARN: + a->state = ARCHIVE_STATE_DATA; + break; + case ARCHIVE_RETRY: + break; + case ARCHIVE_FATAL: + a->state = ARCHIVE_STATE_FATAL; + break; + } + + *entryp = entry; + a->read_data_output_offset = 0; + a->read_data_remaining = 0; + return (ret); +} + +/* + * Allow each registered format to bid on whether it wants to handle + * the next entry. Return index of winning bidder. + */ +static int +choose_format(struct archive *a) +{ + int slots; + int i; + int bid, best_bid; + int best_bid_slot; + + slots = sizeof(a->formats) / sizeof(a->formats[0]); + best_bid = -1; + best_bid_slot = -1; + + /* Set up a->format and a->pformat_data for convenience of bidders. */ + a->format = &(a->formats[0]); + for (i = 0; i < slots; i++, a->format++) { + if (a->format->bid) { + a->pformat_data = &(a->format->format_data); + bid = (a->format->bid)(a); + if (bid == ARCHIVE_FATAL) + return (ARCHIVE_FATAL); + if ((bid > best_bid) || (best_bid_slot < 0)) { + best_bid = bid; + best_bid_slot = i; + } + } + } + + /* + * There were no bidders; this is a serious programmer error + * and demands a quick and definitive abort. + */ + if (best_bid_slot < 0) + __archive_errx(1, "No formats were registered; you must " + "invoke at least one archive_read_support_format_XXX " + "function in order to successfully read an archive."); + + /* + * There were bidders, but no non-zero bids; this means we + * can't support this stream. + */ + if (best_bid < 1) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Unrecognized archive format"); + return (ARCHIVE_FATAL); + } + + return (best_bid_slot); +} + +/* + * Return the file offset (within the uncompressed data stream) where + * the last header started. + */ +int64_t +archive_read_header_position(struct archive *a) +{ + return (a->header_position); +} + +/* + * Read data from an archive entry, using a read(2)-style interface. + * This is a convenience routine that just calls + * archive_read_data_block and copies the results into the client + * buffer, filling any gaps with zero bytes. Clients using this + * API can be completely ignorant of sparse-file issues; sparse files + * will simply be padded with nulls. + * + * DO NOT intermingle calls to this function and archive_read_data_block + * to read a single entry body. + */ +ssize_t +archive_read_data(struct archive *a, void *buff, size_t s) +{ + char *dest; + size_t bytes_read; + size_t len; + int r; + + bytes_read = 0; + dest = buff; + + while (s > 0) { + if (a->read_data_remaining <= 0) { + r = archive_read_data_block(a, + (const void **)&a->read_data_block, + &a->read_data_remaining, + &a->read_data_offset); + if (r == ARCHIVE_EOF) + return (bytes_read); + /* + * Error codes are all negative, so the status + * return here cannot be confused with a valid + * byte count. (ARCHIVE_OK is zero.) + */ + if (r < ARCHIVE_OK) + return (r); + } + + if (a->read_data_offset < a->read_data_output_offset) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Encountered out-of-order sparse blocks"); + return (ARCHIVE_RETRY); + } else { + len = a->read_data_remaining; + if (len > s) + len = s; + memcpy(dest, a->read_data_block, len); + s -= len; + a->read_data_block += len; + a->read_data_remaining -= len; + a->read_data_output_offset += len; + a->read_data_offset += len; + dest += len; + bytes_read += len; + } + } + return (bytes_read); +} + +/* + * Skip over all remaining data in this entry. + */ +int +archive_read_data_skip(struct archive *a) +{ + int r; + const void *buff; + size_t size; + off_t offset; + + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_skip"); + + if (a->format->read_data_skip != NULL) + r = (a->format->read_data_skip)(a); + else { + while ((r = archive_read_data_block(a, &buff, &size, &offset)) + == ARCHIVE_OK) + ; + } + + if (r == ARCHIVE_EOF) + r = ARCHIVE_OK; + + a->state = ARCHIVE_STATE_HEADER; + return (r); +} + +/* + * Read the next block of entry data from the archive. + * This is a zero-copy interface; the client receives a pointer, + * size, and file offset of the next available block of data. + * + * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if + * the end of entry is encountered. + */ +int +archive_read_data_block(struct archive *a, + const void **buff, size_t *size, off_t *offset) +{ + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_block"); + + if (a->format->read_data == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "Internal error: " + "No format_read_data_block function registered"); + return (ARCHIVE_FATAL); + } + + return (a->format->read_data)(a, buff, size, offset); +} + +/* + * Close the file and release most resources. + * + * Be careful: client might just call read_new and then read_finish. + * Don't assume we actually read anything or performed any non-trivial + * initialization. + */ +int +archive_read_close(struct archive *a) +{ + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_close"); + a->state = ARCHIVE_STATE_CLOSED; + + /* Call cleanup functions registered by optional components. */ + if (a->cleanup_archive_extract != NULL) + (a->cleanup_archive_extract)(a); + + /* TODO: Finish the format processing. */ + + /* Close the input machinery. */ + if (a->compression_finish != NULL) + (a->compression_finish)(a); + return (ARCHIVE_OK); +} + +/* + * Release memory and other resources. + */ +void +archive_read_finish(struct archive *a) +{ + int i; + int slots; + + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_finish"); + if (a->state != ARCHIVE_STATE_CLOSED) + archive_read_close(a); + + /* Cleanup format-specific data. */ + slots = sizeof(a->formats) / sizeof(a->formats[0]); + for (i = 0; i < slots; i++) { + a->pformat_data = &(a->formats[i].format_data); + if (a->formats[i].cleanup) + (a->formats[i].cleanup)(a); + } + + /* Casting a pointer to int allows us to remove 'const.' */ + free((void *)(uintptr_t)(const void *)a->nulls); + archive_string_free(&a->error_string); + if (a->entry) + archive_entry_free(a->entry); + a->magic = 0; + free(a); +} + +/* + * Used internally by read format handlers to register their bid and + * initialization functions. + */ +int +__archive_read_register_format(struct archive *a, + void *format_data, + int (*bid)(struct archive *), + int (*read_header)(struct archive *, struct archive_entry *), + int (*read_data)(struct archive *, const void **, size_t *, off_t *), + int (*read_data_skip)(struct archive *), + int (*cleanup)(struct archive *)) +{ + int i, number_slots; + + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "__archive_read_register_format"); + + number_slots = sizeof(a->formats) / sizeof(a->formats[0]); + + for (i = 0; i < number_slots; i++) { + if (a->formats[i].bid == bid) + return (ARCHIVE_WARN); /* We've already installed */ + if (a->formats[i].bid == NULL) { + a->formats[i].bid = bid; + a->formats[i].read_header = read_header; + a->formats[i].read_data = read_data; + a->formats[i].read_data_skip = read_data_skip; + a->formats[i].cleanup = cleanup; + a->formats[i].format_data = format_data; + return (ARCHIVE_OK); + } + } + + __archive_errx(1, "Not enough slots for format registration"); + return (ARCHIVE_FATAL); /* Never actually called. */ +} + +/* + * Used internally by decompression routines to register their bid and + * initialization functions. + */ +int +__archive_read_register_compression(struct archive *a, + int (*bid)(const void *, size_t), + int (*init)(struct archive *, const void *, size_t)) +{ + int i, number_slots; + + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "__archive_read_register_compression"); + + number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]); + + for (i = 0; i < number_slots; i++) { + if (a->decompressors[i].bid == bid) + return (ARCHIVE_OK); /* We've already installed */ + if (a->decompressors[i].bid == NULL) { + a->decompressors[i].bid = bid; + a->decompressors[i].init = init; + return (ARCHIVE_OK); + } + } + + __archive_errx(1, "Not enough slots for compression registration"); + return (ARCHIVE_FATAL); /* Never actually executed. */ +} diff --git a/lib/libarchive/archive_read_data_into_buffer.c b/lib/libarchive/archive_read_data_into_buffer.c new file mode 100644 index 0000000..0b52617 --- /dev/null +++ b/lib/libarchive/archive_read_data_into_buffer.c @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <string.h> + +#include "archive.h" + +int +archive_read_data_into_buffer(struct archive *a, void *d, ssize_t len) +{ + char *dest; + ssize_t bytes_read, total_bytes; + + dest = d; + total_bytes = 0; + bytes_read = archive_read_data(a, dest, len); + while (bytes_read > 0) { + total_bytes += bytes_read; + bytes_read = archive_read_data(a, dest + total_bytes, + len - total_bytes); + } + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_data_into_fd.c b/lib/libarchive/archive_read_data_into_fd.c new file mode 100644 index 0000000..9b31d22 --- /dev/null +++ b/lib/libarchive/archive_read_data_into_fd.c @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <errno.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +/* Maximum amount of data to write at one time. */ +#define MAX_WRITE (1024 * 1024) + +/* + * This implementation minimizes copying of data and is sparse-file aware. + */ +int +archive_read_data_into_fd(struct archive *a, int fd) +{ + int r; + const void *buff; + size_t size; + ssize_t bytes_to_write, bytes_written, total_written; + off_t offset; + off_t output_offset; + + __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_into_fd"); + + total_written = 0; + output_offset = 0; + + while ((r = archive_read_data_block(a, &buff, &size, &offset)) == + ARCHIVE_OK) { + if (offset > output_offset) { + lseek(fd, offset - output_offset, SEEK_CUR); + output_offset = offset; + } + while (size > 0) { + bytes_to_write = size; + if (bytes_to_write > MAX_WRITE) + bytes_to_write = MAX_WRITE; + bytes_written = write(fd, buff, bytes_to_write); + if (bytes_written < 0) { + archive_set_error(a, errno, "Write error"); + return (-1); + } + output_offset += bytes_written; + total_written += bytes_written; + size -= bytes_written; + if (a->extract_progress != NULL) + (*a->extract_progress)(a->extract_progress_user_data); + } + } + + if (r != ARCHIVE_EOF) + return (r); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_extract.c b/lib/libarchive/archive_read_extract.c new file mode 100644 index 0000000..aedd392 --- /dev/null +++ b/lib/libarchive/archive_read_extract.c @@ -0,0 +1,1604 @@ +/*- + * Copyright (c) 2003-2005 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#ifdef HAVE_SYS_ACL_H +#include <sys/acl.h> +#endif +#ifdef HAVE_ATTR_XATTR_H +#include <attr/xattr.h> +#endif +#ifdef HAVE_SYS_IOCTL_H +#include <sys/ioctl.h> +#endif +#include <sys/stat.h> +#include <sys/time.h> + +#ifdef HAVE_EXT2FS_EXT2_FS_H +#include <ext2fs/ext2_fs.h> /* for Linux file flags */ +#endif +#include <errno.h> +#include <fcntl.h> +#include <grp.h> +#ifdef HAVE_LINUX_EXT2_FS_H +#include <linux/ext2_fs.h> /* for Linux file flags */ +#endif +#include <limits.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_string.h" +#include "archive_entry.h" +#include "archive_private.h" + +struct fixup_entry { + struct fixup_entry *next; + mode_t mode; + int64_t mtime; + int64_t atime; + unsigned long mtime_nanos; + unsigned long atime_nanos; + unsigned long fflags_set; + int fixup; /* bitmask of what needs fixing */ + char *name; +}; + +#define FIXUP_MODE 1 +#define FIXUP_TIMES 2 +#define FIXUP_FFLAGS 4 + +struct bucket { + char *name; + int hash; + id_t id; +}; + +struct extract { + mode_t umask; + mode_t default_dir_mode; + struct archive_string create_parent_dir; + struct fixup_entry *fixup_list; + struct fixup_entry *current_fixup; + + struct bucket ucache[127]; + struct bucket gcache[127]; + + /* + * Cached stat data from disk for the current entry. + * If this is valid, pst points to st. Otherwise, + * pst is null. + */ + struct stat st; + struct stat *pst; +}; + +/* Default mode for dirs created automatically (will be modified by umask). */ +#define DEFAULT_DIR_MODE 0777 +/* + * Mode to use for newly-created dirs during extraction; the correct + * mode will be set at the end of the extraction. + */ +#define SECURE_DIR_MODE 0700 + +static void archive_extract_cleanup(struct archive *); +static int extract_block_device(struct archive *, + struct archive_entry *, int); +static int extract_char_device(struct archive *, + struct archive_entry *, int); +static int extract_device(struct archive *, + struct archive_entry *, int flags, mode_t mode); +static int extract_dir(struct archive *, struct archive_entry *, int); +static int extract_fifo(struct archive *, struct archive_entry *, int); +static int extract_file(struct archive *, struct archive_entry *, int); +static int extract_hard_link(struct archive *, struct archive_entry *, int); +static int extract_symlink(struct archive *, struct archive_entry *, int); +static unsigned int hash(const char *); +static gid_t lookup_gid(struct archive *, const char *uname, gid_t); +static uid_t lookup_uid(struct archive *, const char *uname, uid_t); +static int create_dir(struct archive *, const char *, int flags); +static int create_dir_mutable(struct archive *, char *, int flags); +static int create_dir_recursive(struct archive *, char *, int flags); +static int create_parent_dir(struct archive *, const char *, int flags); +static int create_parent_dir_mutable(struct archive *, char *, int flags); +static int restore_metadata(struct archive *, int fd, + struct archive_entry *, int flags); +#ifdef HAVE_POSIX_ACL +static int set_acl(struct archive *, int fd, struct archive_entry *, + acl_type_t, int archive_entry_acl_type, const char *tn); +#endif +static int set_acls(struct archive *, int fd, struct archive_entry *); +static int set_xattrs(struct archive *, int fd, struct archive_entry *); +static int set_fflags(struct archive *, int fd, const char *name, mode_t, + unsigned long fflags_set, unsigned long fflags_clear); +static int set_ownership(struct archive *, int fd, struct archive_entry *, + int flags); +static int set_perm(struct archive *, int fd, struct archive_entry *, + int mode, int flags); +static int set_time(struct archive *, int fd, struct archive_entry *, int); +static struct fixup_entry *sort_dir_list(struct fixup_entry *p); + + +/* + * Extract this entry to disk. + * + * TODO: Validate hardlinks. According to the standards, we're + * supposed to check each extracted hardlink and squawk if it refers + * to a file that we didn't restore. I'm not entirely convinced this + * is a good idea, but more importantly: Is there any way to validate + * hardlinks without keeping a complete list of filenames from the + * entire archive?? Ugh. + * + */ +int +archive_read_extract(struct archive *a, struct archive_entry *entry, int flags) +{ + mode_t mode; + struct extract *extract; + int ret; + int restore_pwd; + char *original_filename; + + if (a->extract == NULL) { + a->extract = malloc(sizeof(*a->extract)); + if (a->extract == NULL) { + archive_set_error(a, ENOMEM, "Can't extract"); + return (ARCHIVE_FATAL); + } + a->cleanup_archive_extract = archive_extract_cleanup; + memset(a->extract, 0, sizeof(*a->extract)); + } + extract = a->extract; + umask(extract->umask = umask(0)); /* Read the current umask. */ + extract->default_dir_mode = DEFAULT_DIR_MODE & ~extract->umask; + extract->pst = NULL; + extract->current_fixup = NULL; + restore_pwd = -1; + original_filename = NULL; + + /* The following is not possible without fchdir. <sigh> */ +#ifdef HAVE_FCHDIR + /* + * If pathname is longer than PATH_MAX, record starting directory + * and chdir to a suitable intermediate dir. + */ + if (strlen(archive_entry_pathname(entry)) > PATH_MAX) { + char *intdir, *tail; + + restore_pwd = open(".", O_RDONLY); + if (restore_pwd < 0) { + archive_set_error(a, errno, + "Unable to restore long pathname"); + return (ARCHIVE_WARN); + } + + /* + * Yes, the copy here is necessary because we edit + * the pathname in-place to create intermediate dirnames. + */ + original_filename = strdup(archive_entry_pathname(entry)); + + /* + * "intdir" points to the initial dir section we're going + * to remove, "tail" points to the remainder of the path. + */ + intdir = tail = original_filename; + while (strlen(tail) > PATH_MAX) { + intdir = tail; + + /* Locate a dir prefix shorter than PATH_MAX. */ + tail = intdir + PATH_MAX - 8; + while (tail > intdir && *tail != '/') + tail--; + if (tail <= intdir) { + archive_set_error(a, EPERM, + "Path element too long"); + ret = ARCHIVE_WARN; + goto cleanup; + } + + /* Create intdir and chdir to it. */ + *tail = '\0'; /* Terminate dir portion */ + ret = create_dir(a, intdir, flags); + if (ret == ARCHIVE_OK && chdir(intdir) != 0) { + archive_set_error(a, errno, "Couldn't chdir"); + ret = ARCHIVE_WARN; + } + *tail = '/'; /* Restore the / we removed. */ + if (ret != ARCHIVE_OK) + goto cleanup; + tail++; + } + archive_entry_set_pathname(entry, tail); + } +#endif + + if (stat(archive_entry_pathname(entry), &extract->st) == 0) + extract->pst = &extract->st; + + if (extract->pst != NULL && + extract->pst->st_dev == a->skip_file_dev && + extract->pst->st_ino == a->skip_file_ino) { + archive_set_error(a, 0, "Refusing to overwrite archive"); + ret = ARCHIVE_WARN; + } else if (archive_entry_hardlink(entry) != NULL) + ret = extract_hard_link(a, entry, flags); + else { + mode = archive_entry_mode(entry); + switch (mode & S_IFMT) { + default: + /* Fall through, as required by POSIX. */ + case S_IFREG: + ret = extract_file(a, entry, flags); + break; + case S_IFLNK: /* Symlink */ + ret = extract_symlink(a, entry, flags); + break; + case S_IFCHR: + ret = extract_char_device(a, entry, flags); + break; + case S_IFBLK: + ret = extract_block_device(a, entry, flags); + break; + case S_IFDIR: + ret = extract_dir(a, entry, flags); + break; + case S_IFIFO: + ret = extract_fifo(a, entry, flags); + break; + } + } + + +cleanup: +#ifdef HAVE_FCHDIR + /* If we changed directory above, restore it here. */ + if (restore_pwd >= 0 && original_filename != NULL) { + fchdir(restore_pwd); + close(restore_pwd); + archive_entry_copy_pathname(entry, original_filename); + free(original_filename); + } +#endif + + return (ret); +} + +/* + * Cleanup function for archive_extract. Mostly, this involves processing + * the fixup list, which is used to address a number of problems: + * * Dir permissions might prevent us from restoring a file in that + * dir, so we restore the dir 0700 first, then correct the + * mode at the end. + * * Similarly, the act of restoring a file touches the directory + * and changes the timestamp on the dir, so we have to touch-up dir + * timestamps at the end as well. + * * Some file flags can interfere with the restore by, for example, + * preventing the creation of hardlinks to those files. + * + * Note that tar/cpio do not require that archives be in a particular + * order; there is no way to know when the last file has been restored + * within a directory, so there's no way to optimize the memory usage + * here by fixing up the directory any earlier than the + * end-of-archive. + * + * XXX TODO: Directory ACLs should be restored here, for the same + * reason we set directory perms here. XXX + * + * Registering this function (rather than calling it explicitly by + * name from archive_read_finish) reduces static link pollution, since + * applications that don't use this API won't get this file linked in. + */ +static void +archive_extract_cleanup(struct archive *a) +{ + struct fixup_entry *next, *p; + struct extract *extract; + + /* Sort dir list so directories are fixed up in depth-first order. */ + extract = a->extract; + p = sort_dir_list(extract->fixup_list); + + while (p != NULL) { + extract->pst = NULL; /* Mark stat cache as out-of-date. */ + if (p->fixup & FIXUP_TIMES) { + struct timeval times[2]; + times[1].tv_sec = p->mtime; + times[1].tv_usec = p->mtime_nanos / 1000; + times[0].tv_sec = p->atime; + times[0].tv_usec = p->atime_nanos / 1000; + utimes(p->name, times); + } + if (p->fixup & FIXUP_MODE) + chmod(p->name, p->mode); + + if (p->fixup & FIXUP_FFLAGS) + set_fflags(a, -1, p->name, p->mode, p->fflags_set, 0); + + next = p->next; + free(p->name); + free(p); + p = next; + } + extract->fixup_list = NULL; + archive_string_free(&extract->create_parent_dir); + free(a->extract); + a->extract = NULL; +} + +/* + * Simple O(n log n) merge sort to order the fixup list. In + * particular, we want to restore dir timestamps depth-first. + */ +static struct fixup_entry * +sort_dir_list(struct fixup_entry *p) +{ + struct fixup_entry *a, *b, *t; + + if (p == NULL) + return (NULL); + /* A one-item list is already sorted. */ + if (p->next == NULL) + return (p); + + /* Step 1: split the list. */ + t = p; + a = p->next->next; + while (a != NULL) { + /* Step a twice, t once. */ + a = a->next; + if (a != NULL) + a = a->next; + t = t->next; + } + /* Now, t is at the mid-point, so break the list here. */ + b = t->next; + t->next = NULL; + a = p; + + /* Step 2: Recursively sort the two sub-lists. */ + a = sort_dir_list(a); + b = sort_dir_list(b); + + /* Step 3: Merge the returned lists. */ + /* Pick the first element for the merged list. */ + if (strcmp(a->name, b->name) > 0) { + t = p = a; + a = a->next; + } else { + t = p = b; + b = b->next; + } + + /* Always put the later element on the list first. */ + while (a != NULL && b != NULL) { + if (strcmp(a->name, b->name) > 0) { + t->next = a; + a = a->next; + } else { + t->next = b; + b = b->next; + } + t = t->next; + } + + /* Only one list is non-empty, so just splice it on. */ + if (a != NULL) + t->next = a; + if (b != NULL) + t->next = b; + + return (p); +} + +/* + * Returns a new, initialized fixup entry. + * + * TODO: Reduce the memory requirements for this list by using a tree + * structure rather than a simple list of names. + */ +static struct fixup_entry * +new_fixup(struct archive *a, const char *pathname) +{ + struct extract *extract; + struct fixup_entry *fe; + + extract = a->extract; + fe = malloc(sizeof(struct fixup_entry)); + if (fe == NULL) + return (NULL); + fe->next = extract->fixup_list; + extract->fixup_list = fe; + fe->fixup = 0; + fe->name = strdup(pathname); + return (fe); +} + +/* + * Returns a fixup structure for the current entry. + */ +static struct fixup_entry * +current_fixup(struct archive *a, const char *pathname) +{ + struct extract *extract; + + extract = a->extract; + if (extract->current_fixup == NULL) + extract->current_fixup = new_fixup(a, pathname); + return (extract->current_fixup); +} + +static int +extract_file(struct archive *a, struct archive_entry *entry, int flags) +{ + struct extract *extract; + const char *name; + mode_t mode; + int fd, r, r2; + + extract = a->extract; + name = archive_entry_pathname(entry); + mode = archive_entry_mode(entry) & 0777; + r = ARCHIVE_OK; + + /* + * If we're not supposed to overwrite pre-existing files, + * use O_EXCL. Otherwise, use O_TRUNC. + */ + if (flags & (ARCHIVE_EXTRACT_UNLINK | ARCHIVE_EXTRACT_NO_OVERWRITE)) + fd = open(name, O_WRONLY | O_CREAT | O_EXCL, mode); + else + fd = open(name, O_WRONLY | O_CREAT | O_TRUNC, mode); + + /* Try removing a pre-existing file. */ + if (fd < 0 && !(flags & ARCHIVE_EXTRACT_NO_OVERWRITE)) { + unlink(name); + fd = open(name, O_WRONLY | O_CREAT | O_EXCL, mode); + } + + /* Might be a non-existent parent dir; try fixing that. */ + if (fd < 0) { + create_parent_dir(a, name, flags); + fd = open(name, O_WRONLY | O_CREAT | O_EXCL, mode); + } + if (fd < 0) { + archive_set_error(a, errno, "Can't open '%s'", name); + return (ARCHIVE_WARN); + } + r = archive_read_data_into_fd(a, fd); + extract->pst = NULL; /* Cached stat data no longer valid. */ + r2 = restore_metadata(a, fd, entry, flags); + close(fd); + return (err_combine(r, r2)); +} + +static int +extract_dir(struct archive *a, struct archive_entry *entry, int flags) +{ + struct extract *extract; + struct fixup_entry *fe; + char *path, *p; + + extract = a->extract; + extract->pst = NULL; /* Invalidate cached stat data. */ + + /* Copy path to mutable storage. */ + archive_strcpy(&(extract->create_parent_dir), + archive_entry_pathname(entry)); + path = extract->create_parent_dir.s; + + if (*path == '\0') { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Invalid empty pathname"); + return (ARCHIVE_WARN); + } + + /* Deal with any troublesome trailing path elements. */ + /* TODO: Someday, generalize this to remove '//' or '/./' from + * the middle of paths. But, it should not compress '..' from + * the middle of paths. It's a feature that restoring + * "a/../b" creates both 'a' and 'b' directories. */ + for (;;) { + /* Locate last element. */ + p = strrchr(path, '/'); + if (p != NULL) + p++; + else + p = path; + /* Trim trailing '/' unless that's the entire path. */ + if (p[0] == '\0' && p - 1 > path) { + p[-1] = '\0'; + continue; + } + /* Trim trailing '.' unless that's the entire path. */ + if (p > path && p[0] == '.' && p[1] == '\0') { + p[0] = '\0'; + continue; + } + /* Just exit on trailing '..'. */ + if (p[0] == '.' && p[1] == '.' && p[2] == '\0') { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Can't restore directory '..'"); + return (ARCHIVE_WARN); + } + break; + } + + if (mkdir(path, SECURE_DIR_MODE) == 0) + goto success; + + if (extract->pst == NULL && stat(path, &extract->st) == 0) + extract->pst = &extract->st; + + if (extract->pst != NULL) { + extract->pst = &extract->st; + /* If dir already exists, don't reset permissions. */ + if (S_ISDIR(extract->pst->st_mode)) + return (ARCHIVE_OK); + /* It exists but isn't a dir. */ + if ((flags & ARCHIVE_EXTRACT_UNLINK)) + unlink(path); + } else { + /* Doesn't already exist; try building the parent path. */ + if (create_parent_dir_mutable(a, path, flags) != ARCHIVE_OK) + return (ARCHIVE_WARN); + } + + /* One final attempt to create the dir. */ + if (mkdir(path, SECURE_DIR_MODE) != 0) { + archive_set_error(a, errno, "Can't create directory"); + return (ARCHIVE_WARN); + } + +success: + /* Add this dir to the fixup list. */ + fe = current_fixup(a, path); + fe->fixup |= FIXUP_MODE; + fe->mode = archive_entry_mode(entry); + if ((flags & ARCHIVE_EXTRACT_PERM) == 0) + fe->mode &= ~extract->umask; + if (flags & ARCHIVE_EXTRACT_TIME) { + fe->fixup |= FIXUP_TIMES; + fe->mtime = archive_entry_mtime(entry); + fe->mtime_nanos = archive_entry_mtime_nsec(entry); + fe->atime = archive_entry_atime(entry); + fe->atime_nanos = archive_entry_atime_nsec(entry); + } + /* For now, set the mode to SECURE_DIR_MODE. */ + archive_entry_set_mode(entry, SECURE_DIR_MODE); + return (restore_metadata(a, -1, entry, flags)); +} + + +/* + * Create the parent of the specified path. Copy the provided + * path into mutable storage first. + */ +static int +create_parent_dir(struct archive *a, const char *path, int flags) +{ + int r; + + /* Copy path to mutable storage. */ + archive_strcpy(&(a->extract->create_parent_dir), path); + r = create_parent_dir_mutable(a, a->extract->create_parent_dir.s, flags); + return (r); +} + +/* + * Like create_parent_dir, but creates the dir actually requested, not + * the parent. + */ +static int +create_dir(struct archive *a, const char *path, int flags) +{ + int r; + /* Copy path to mutable storage. */ + archive_strcpy(&(a->extract->create_parent_dir), path); + r = create_dir_mutable(a, a->extract->create_parent_dir.s, flags); + return (r); +} + +/* + * Create the parent directory of the specified path, assuming path + * is already in mutable storage. + */ +static int +create_parent_dir_mutable(struct archive *a, char *path, int flags) +{ + char *slash; + int r; + + /* Remove tail element to obtain parent name. */ + slash = strrchr(path, '/'); + if (slash == NULL) + return (ARCHIVE_OK); + *slash = '\0'; + r = create_dir_mutable(a, path, flags); + *slash = '/'; + return (r); +} + +/* + * Create the specified dir, assuming path is already in + * mutable storage. + */ +static int +create_dir_mutable(struct archive *a, char *path, int flags) +{ + mode_t old_umask; + int r; + + old_umask = umask(~SECURE_DIR_MODE); + r = create_dir_recursive(a, path, flags); + umask(old_umask); + return (r); +} + +/* + * Create the specified dir, recursing to create parents as necessary. + * + * Returns ARCHIVE_OK if the path exists when we're done here. + * Otherwise, returns ARCHIVE_WARN. + */ +static int +create_dir_recursive(struct archive *a, char *path, int flags) +{ + struct stat st; + struct extract *extract; + struct fixup_entry *le; + char *slash, *base; + int r; + + extract = a->extract; + r = ARCHIVE_OK; + + /* Check for special names and just skip them. */ + slash = strrchr(path, '/'); + base = strrchr(path, '/'); + if (slash == NULL) + base = path; + else + base = slash + 1; + + if (base[0] == '\0' || + (base[0] == '.' && base[1] == '\0') || + (base[0] == '.' && base[1] == '.' && base[2] == '\0')) { + /* Don't bother trying to create null path, '.', or '..'. */ + if (slash != NULL) { + *slash = '\0'; + r = create_dir_recursive(a, path, flags); + *slash = '/'; + return (r); + } + return (ARCHIVE_OK); + } + + /* + * Yes, this should be stat() and not lstat(). Using lstat() + * here loses the ability to extract through symlinks. Also note + * that this should not use the extract->st cache. + */ + if (stat(path, &st) == 0) { + if (S_ISDIR(st.st_mode)) + return (ARCHIVE_OK); + if ((flags & ARCHIVE_EXTRACT_NO_OVERWRITE)) { + archive_set_error(a, EEXIST, + "Can't create directory '%s'", path); + return (ARCHIVE_WARN); + } + if (unlink(path) != 0) { + archive_set_error(a, errno, + "Can't create directory '%s': " + "Conflicting file cannot be removed"); + return (ARCHIVE_WARN); + } + } else if (errno != ENOENT && errno != ENOTDIR) { + /* Stat failed? */ + archive_set_error(a, errno, "Can't test directory '%s'", path); + return (ARCHIVE_WARN); + } else if (slash != NULL) { + *slash = '\0'; + r = create_dir_recursive(a, path, flags); + *slash = '/'; + if (r != ARCHIVE_OK) + return (r); + } + + if (mkdir(path, SECURE_DIR_MODE) == 0) { + le = new_fixup(a, path); + le->fixup |= FIXUP_MODE; + le->mode = extract->default_dir_mode; + return (ARCHIVE_OK); + } + + /* + * Without the following check, a/b/../b/c/d fails at the + * second visit to 'b', so 'd' can't be created. Note that we + * don't add it to the fixup list here, as it's already been + * added. + */ + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) + return (ARCHIVE_OK); + + archive_set_error(a, errno, "Failed to create dir '%s'", path); + return (ARCHIVE_WARN); +} + +static int +extract_hard_link(struct archive *a, struct archive_entry *entry, int flags) +{ + struct extract *extract; + int r; + const char *pathname; + const char *linkname; + + extract = a->extract; + pathname = archive_entry_pathname(entry); + linkname = archive_entry_hardlink(entry); + + /* Just remove any pre-existing file with this name. */ + if (!(flags & ARCHIVE_EXTRACT_NO_OVERWRITE)) + unlink(pathname); + + r = link(linkname, pathname); + extract->pst = NULL; /* Invalidate cached stat data. */ + + if (r != 0) { + /* Might be a non-existent parent dir; try fixing that. */ + create_parent_dir(a, pathname, flags); + r = link(linkname, pathname); + } + + if (r != 0) { + /* XXX Better error message here XXX */ + archive_set_error(a, errno, + "Can't restore hardlink to '%s'", linkname); + return (ARCHIVE_WARN); + } + + /* Set ownership, time, permission information. */ + r = restore_metadata(a, -1, entry, flags); + return (r); +} + +static int +extract_symlink(struct archive *a, struct archive_entry *entry, int flags) +{ + struct extract *extract; + int r; + const char *pathname; + const char *linkname; + + extract = a->extract; + pathname = archive_entry_pathname(entry); + linkname = archive_entry_symlink(entry); + + /* Just remove any pre-existing file with this name. */ + if (!(flags & ARCHIVE_EXTRACT_NO_OVERWRITE)) + unlink(pathname); + + r = symlink(linkname, pathname); + extract->pst = NULL; /* Invalidate cached stat data. */ + + if (r != 0) { + /* Might be a non-existent parent dir; try fixing that. */ + create_parent_dir(a, pathname, flags); + r = symlink(linkname, pathname); + } + + if (r != 0) { + /* XXX Better error message here XXX */ + archive_set_error(a, errno, + "Can't restore symlink to '%s'", linkname); + return (ARCHIVE_WARN); + } + + r = restore_metadata(a, -1, entry, flags); + return (r); +} + +static int +extract_device(struct archive *a, struct archive_entry *entry, + int flags, mode_t mode) +{ + struct extract *extract; + int r; + + extract = a->extract; + /* Just remove any pre-existing file with this name. */ + if (!(flags & ARCHIVE_EXTRACT_NO_OVERWRITE)) + unlink(archive_entry_pathname(entry)); + + r = mknod(archive_entry_pathname(entry), mode, + archive_entry_rdev(entry)); + extract->pst = NULL; /* Invalidate cached stat data. */ + + /* Might be a non-existent parent dir; try fixing that. */ + if (r != 0 && errno == ENOENT) { + create_parent_dir(a, archive_entry_pathname(entry), flags); + r = mknod(archive_entry_pathname(entry), mode, + archive_entry_rdev(entry)); + } + + if (r != 0) { + archive_set_error(a, errno, "Can't restore device node"); + return (ARCHIVE_WARN); + } + + r = restore_metadata(a, -1, entry, flags); + return (r); +} + +static int +extract_char_device(struct archive *a, struct archive_entry *entry, int flags) +{ + mode_t mode; + + mode = (archive_entry_mode(entry) & ~S_IFMT) | S_IFCHR; + return (extract_device(a, entry, flags, mode)); +} + +static int +extract_block_device(struct archive *a, struct archive_entry *entry, int flags) +{ + mode_t mode; + + mode = (archive_entry_mode(entry) & ~S_IFMT) | S_IFBLK; + return (extract_device(a, entry, flags, mode)); +} + +static int +extract_fifo(struct archive *a, struct archive_entry *entry, int flags) +{ + struct extract *extract; + int r; + + extract = a->extract; + /* Just remove any pre-existing file with this name. */ + if (!(flags & ARCHIVE_EXTRACT_NO_OVERWRITE)) + unlink(archive_entry_pathname(entry)); + + r = mkfifo(archive_entry_pathname(entry), + archive_entry_mode(entry)); + extract->pst = NULL; /* Invalidate cached stat data. */ + + /* Might be a non-existent parent dir; try fixing that. */ + if (r != 0 && errno == ENOENT) { + create_parent_dir(a, archive_entry_pathname(entry), flags); + r = mkfifo(archive_entry_pathname(entry), + archive_entry_mode(entry)); + } + + if (r != 0) { + archive_set_error(a, errno, "Can't restore fifo"); + return (ARCHIVE_WARN); + } + + r = restore_metadata(a, -1, entry, flags); + return (r); +} + +static int +restore_metadata(struct archive *a, int fd, struct archive_entry *entry, int flags) +{ + int r, r2; + + r = set_ownership(a, fd, entry, flags); + r2 = set_time(a, fd, entry, flags); + r = err_combine(r, r2); + r2 = set_perm(a, fd, entry, archive_entry_mode(entry), flags); + return (err_combine(r, r2)); +} + +static int +set_ownership(struct archive *a, int fd, + struct archive_entry *entry, int flags) +{ + uid_t uid; + gid_t gid; + + /* Not changed. */ + if ((flags & ARCHIVE_EXTRACT_OWNER) == 0) + return (ARCHIVE_OK); + + uid = lookup_uid(a, archive_entry_uname(entry), + archive_entry_uid(entry)); + gid = lookup_gid(a, archive_entry_gname(entry), + archive_entry_gid(entry)); + + /* If we know we can't change it, don't bother trying. */ + if (a->user_uid != 0 && a->user_uid != uid) + return (ARCHIVE_OK); + +#ifdef HAVE_FCHOWN + if (fd >= 0 && fchown(fd, uid, gid) == 0) + return (ARCHIVE_OK); +#endif + +#ifdef HAVE_LCHOWN + if (lchown(archive_entry_pathname(entry), uid, gid)) +#else + if (!S_ISLNK(archive_entry_mode(entry)) + && chown(archive_entry_pathname(entry), uid, gid) != 0) +#endif + { + archive_set_error(a, errno, + "Can't set user=%d/group=%d for %s", uid, gid, + archive_entry_pathname(entry)); + return (ARCHIVE_WARN); + } + return (ARCHIVE_OK); +} + +static int +set_time(struct archive *a, int fd, struct archive_entry *entry, int flags) +{ + const struct stat *st; + struct timeval times[2]; + + (void)a; /* UNUSED */ + st = archive_entry_stat(entry); + + if ((flags & ARCHIVE_EXTRACT_TIME) == 0) + return (ARCHIVE_OK); + /* It's a waste of time to mess with dir timestamps here. */ + if (S_ISDIR(archive_entry_mode(entry))) + return (ARCHIVE_OK); + + times[1].tv_sec = st->st_mtime; + times[1].tv_usec = ARCHIVE_STAT_MTIME_NANOS(st) / 1000; + + times[0].tv_sec = st->st_atime; + times[0].tv_usec = ARCHIVE_STAT_ATIME_NANOS(st) / 1000; + +#ifdef HAVE_FUTIMES + if (fd >= 0 && futimes(fd, times) == 0) + return (ARCHIVE_OK); +#endif + +#ifdef HAVE_LUTIMES + if (lutimes(archive_entry_pathname(entry), times) != 0) { +#else + if ((archive_entry_mode(entry) & S_IFMT) != S_IFLNK && + utimes(archive_entry_pathname(entry), times) != 0) { +#endif + archive_set_error(a, errno, "Can't update time for %s", + archive_entry_pathname(entry)); + return (ARCHIVE_WARN); + } + + /* + * Note: POSIX does not provide a portable way to restore ctime. + * (Apart from resetting the system clock, which is distasteful.) + * So, any restoration of ctime will necessarily be OS-specific. + */ + + /* XXX TODO: Can FreeBSD restore ctime? XXX */ + + return (ARCHIVE_OK); +} + +static int +set_perm(struct archive *a, int fd, struct archive_entry *entry, + int mode, int flags) +{ + struct extract *extract; + struct fixup_entry *le; + const char *name; + unsigned long set, clear; + int r; + int critical_flags; + + extract = a->extract; + + /* Obey umask unless ARCHIVE_EXTRACT_PERM. */ + if ((flags & ARCHIVE_EXTRACT_PERM) == 0) + mode &= ~extract->umask; /* Enforce umask. */ + name = archive_entry_pathname(entry); + + if (mode & (S_ISUID | S_ISGID)) { + if (extract->pst != NULL) { + /* Already have stat() data available. */ +#ifdef HAVE_FSTAT + } else if (fd >= 0 && fstat(fd, &extract->st) == 0) { + extract->pst = &extract->st; +#endif + } else if (stat(name, &extract->st) == 0) { + extract->pst = &extract->st; + } else { + archive_set_error(a, errno, + "Couldn't stat file"); + return (ARCHIVE_WARN); + } + + /* + * TODO: Use the uid/gid looked up in set_ownership + * above rather than the uid/gid stored in the entry. + */ + if (extract->pst->st_uid != archive_entry_uid(entry)) + mode &= ~ S_ISUID; + if (extract->pst->st_gid != archive_entry_gid(entry)) + mode &= ~ S_ISGID; + } + + /* + * Ensure we change permissions on the object we extracted, + * and not any incidental symlink that might have gotten in + * the way. + */ + if (!S_ISLNK(archive_entry_mode(entry))) { +#ifdef HAVE_FCHMOD + if (fd >= 0) { + if (fchmod(fd, mode) != 0) { + archive_set_error(a, errno, + "Can't set permissions"); + return (ARCHIVE_WARN); + } + } else +#endif + if (chmod(name, mode) != 0) { + archive_set_error(a, errno, "Can't set permissions"); + return (ARCHIVE_WARN); + } +#ifdef HAVE_LCHMOD + } else { + /* + * If lchmod() isn't supported, it's no big deal. + * Permissions on symlinks are actually ignored on + * most platforms. + */ + if (lchmod(name, mode) != 0) { + archive_set_error(a, errno, "Can't set permissions"); + return (ARCHIVE_WARN); + } +#endif + } + + if (flags & ARCHIVE_EXTRACT_ACL) { + r = set_acls(a, fd, entry); + if (r != ARCHIVE_OK) + return (r); + } + + if (flags & ARCHIVE_EXTRACT_XATTR) { + r = set_xattrs(a, fd, entry); + if (r != ARCHIVE_OK) + return (r); + } + + /* + * Make 'critical_flags' hold all file flags that can't be + * immediately restored. For example, on BSD systems, + * SF_IMMUTABLE prevents hardlinks from being created, so + * should not be set until after any hardlinks are created. To + * preserve some semblance of portability, this uses #ifdef + * extensively. Ugly, but it works. + * + * Yes, Virginia, this does create a security race. It's mitigated + * somewhat by the practice of creating dirs 0700 until the extract + * is done, but it would be nice if we could do more than that. + * People restoring critical file systems should be wary of + * other programs that might try to muck with files as they're + * being restored. + */ + /* Hopefully, the compiler will optimize this mess into a constant. */ + critical_flags = 0; +#ifdef SF_IMMUTABLE + critical_flags |= SF_IMMUTABLE; +#endif +#ifdef UF_IMMUTABLE + critical_flags |= UF_IMMUTABLE; +#endif +#ifdef SF_APPEND + critical_flags |= SF_APPEND; +#endif +#ifdef UF_APPEND + critical_flags |= UF_APPEND; +#endif +#ifdef EXT2_APPEND_FL + critical_flags |= EXT2_APPEND_FL; +#endif +#ifdef EXT2_IMMUTABLE_FL + critical_flags |= EXT2_IMMUTABLE_FL; +#endif + + if (flags & ARCHIVE_EXTRACT_FFLAGS) { + archive_entry_fflags(entry, &set, &clear); + + /* + * The first test encourages the compiler to eliminate + * all of this if it's not necessary. + */ + if ((critical_flags != 0) && (set & critical_flags)) { + le = current_fixup(a, archive_entry_pathname(entry)); + le->fixup |= FIXUP_FFLAGS; + le->fflags_set = set; + /* Store the mode if it's not already there. */ + if ((le->fixup & FIXUP_MODE) == 0) + le->mode = mode; + } else { + r = set_fflags(a, fd, archive_entry_pathname(entry), + mode, set, clear); + if (r != ARCHIVE_OK) + return (r); + } + } + return (ARCHIVE_OK); +} + + +#if ( defined(HAVE_LCHFLAGS) || defined(HAVE_CHFLAGS) || defined(HAVE_FCHFLAGS) ) && !defined(__linux) +static int +set_fflags(struct archive *a, int fd, const char *name, mode_t mode, + unsigned long set, unsigned long clear) +{ + struct extract *extract; + + extract = a->extract; + if (set == 0 && clear == 0) + return (ARCHIVE_OK); + + (void)mode; /* UNUSED */ + /* + * XXX Is the stat here really necessary? Or can I just use + * the 'set' flags directly? In particular, I'm not sure + * about the correct approach if we're overwriting an existing + * file that already has flags on it. XXX + */ + if (extract->pst != NULL) { + /* Already have stat() data available. */ + } else if (fd >= 0 && fstat(fd, &extract->st) == 0) + extract->pst = &extract->st; + else if (stat(name, &extract->st) == 0) + extract->pst = &extract->st; + else { + archive_set_error(a, errno, + "Couldn't stat file"); + return (ARCHIVE_WARN); + } + + extract->st.st_flags &= ~clear; + extract->st.st_flags |= set; +#ifdef HAVE_FCHFLAGS + /* If platform has fchflags() and we were given an fd, use it. */ + if (fd >= 0 && fchflags(fd, extract->st.st_flags) == 0) + return (ARCHIVE_OK); +#endif + /* + * If we can't use the fd to set the flags, we'll use the + * pathname to set flags. We prefer lchflags() but will use + * chflags() if we must. + */ +#ifdef HAVE_LCHFLAGS + if (lchflags(name, extract->st.st_flags) == 0) + return (ARCHIVE_OK); +#elif defined(HAVE_CHFLAGS) + if (chflags(name, extract->st.st_flags) == 0) + return (ARCHIVE_OK); +#endif + archive_set_error(a, errno, + "Failed to set file flags"); + return (ARCHIVE_WARN); +} + +#elif defined(__linux) && defined(EXT2_IOC_GETFLAGS) && defined(EXT2_IOC_SETFLAGS) + +/* + * Linux has flags too, but uses ioctl() to access them instead of + * having a separate chflags() system call. + */ +static int +set_fflags(struct archive *a, int fd, const char *name, mode_t mode, + unsigned long set, unsigned long clear) +{ + struct extract *extract; + int ret; + int myfd = fd; + unsigned long newflags, oldflags; + unsigned long sf_mask = 0; + + extract = a->extract; + if (set == 0 && clear == 0) + return (ARCHIVE_OK); + /* Only regular files and dirs can have flags. */ + if (!S_ISREG(mode) && !S_ISDIR(mode)) + return (ARCHIVE_OK); + + /* If we weren't given an fd, open it ourselves. */ + if (myfd < 0) + myfd = open(name, O_RDONLY|O_NONBLOCK); + if (myfd < 0) + return (ARCHIVE_OK); + + /* + * Linux has no define for the flags that are only settable by + * the root user. This code may seem a little complex, but + * there seem to be some Linux systems that lack these + * defines. (?) The code below degrades reasonably gracefully + * if sf_mask is incomplete. + */ +#ifdef EXT2_IMMUTABLE_FL + sf_mask |= EXT2_IMMUTABLE_FL; +#endif +#ifdef EXT2_APPEND_FL + sf_mask |= EXT2_APPEND_FL; +#endif + /* + * XXX As above, this would be way simpler if we didn't have + * to read the current flags from disk. XXX + */ + ret = ARCHIVE_OK; + /* Try setting the flags as given. */ + if (ioctl(myfd, EXT2_IOC_GETFLAGS, &oldflags) >= 0) { + newflags = (oldflags & ~clear) | set; + if (ioctl(myfd, EXT2_IOC_SETFLAGS, &newflags) >= 0) + goto cleanup; + if (errno != EPERM) + goto fail; + } + /* If we couldn't set all the flags, try again with a subset. */ + if (ioctl(myfd, EXT2_IOC_GETFLAGS, &oldflags) >= 0) { + newflags &= ~sf_mask; + oldflags &= sf_mask; + newflags |= oldflags; + if (ioctl(myfd, EXT2_IOC_SETFLAGS, &newflags) >= 0) + goto cleanup; + } + /* We couldn't set the flags, so report the failure. */ +fail: + archive_set_error(a, errno, + "Failed to set file flags"); + ret = ARCHIVE_WARN; +cleanup: + if (fd < 0) + close(myfd); + return (ret); +} + +#else /* Not HAVE_CHFLAGS && Not __linux */ + +/* + * Of course, some systems have neither BSD chflags() nor Linux' flags + * support through ioctl(). + */ +static int +set_fflags(struct archive *a, int fd, const char *name, mode_t mode, + unsigned long set, unsigned long clear) +{ + (void)a; + (void)fd; + (void)name; + (void)mode; + (void)set; + (void)clear; + return (ARCHIVE_OK); +} + +#endif /* __linux */ + +#ifndef HAVE_POSIX_ACL +/* Default empty function body to satisfy mainline code. */ +static int +set_acls(struct archive *a, int fd, struct archive_entry *entry) +{ + (void)a; + (void)fd; + (void)entry; + + return (ARCHIVE_OK); +} + +#else + +/* + * XXX TODO: What about ACL types other than ACCESS and DEFAULT? + */ +static int +set_acls(struct archive *a, int fd, struct archive_entry *entry) +{ + int ret; + + ret = set_acl(a, fd, entry, ACL_TYPE_ACCESS, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS, "access"); + if (ret != ARCHIVE_OK) + return (ret); + ret = set_acl(a, fd, entry, ACL_TYPE_DEFAULT, + ARCHIVE_ENTRY_ACL_TYPE_DEFAULT, "default"); + return (ret); +} + + +static int +set_acl(struct archive *a, int fd, struct archive_entry *entry, + acl_type_t acl_type, int ae_requested_type, const char *typename) +{ + acl_t acl; + acl_entry_t acl_entry; + acl_permset_t acl_permset; + int ret; + int ae_type, ae_permset, ae_tag, ae_id; + uid_t ae_uid; + gid_t ae_gid; + const char *ae_name; + int entries; + const char *name; + + ret = ARCHIVE_OK; + entries = archive_entry_acl_reset(entry, ae_requested_type); + if (entries == 0) + return (ARCHIVE_OK); + acl = acl_init(entries); + while (archive_entry_acl_next(entry, ae_requested_type, &ae_type, + &ae_permset, &ae_tag, &ae_id, &ae_name) == ARCHIVE_OK) { + acl_create_entry(&acl, &acl_entry); + + switch (ae_tag) { + case ARCHIVE_ENTRY_ACL_USER: + acl_set_tag_type(acl_entry, ACL_USER); + ae_uid = lookup_uid(a, ae_name, ae_id); + acl_set_qualifier(acl_entry, &ae_uid); + break; + case ARCHIVE_ENTRY_ACL_GROUP: + acl_set_tag_type(acl_entry, ACL_GROUP); + ae_gid = lookup_gid(a, ae_name, ae_id); + acl_set_qualifier(acl_entry, &ae_gid); + break; + case ARCHIVE_ENTRY_ACL_USER_OBJ: + acl_set_tag_type(acl_entry, ACL_USER_OBJ); + break; + case ARCHIVE_ENTRY_ACL_GROUP_OBJ: + acl_set_tag_type(acl_entry, ACL_GROUP_OBJ); + break; + case ARCHIVE_ENTRY_ACL_MASK: + acl_set_tag_type(acl_entry, ACL_MASK); + break; + case ARCHIVE_ENTRY_ACL_OTHER: + acl_set_tag_type(acl_entry, ACL_OTHER); + break; + default: + /* XXX */ + break; + } + + acl_get_permset(acl_entry, &acl_permset); + acl_clear_perms(acl_permset); + if (ae_permset & ARCHIVE_ENTRY_ACL_EXECUTE) + acl_add_perm(acl_permset, ACL_EXECUTE); + if (ae_permset & ARCHIVE_ENTRY_ACL_WRITE) + acl_add_perm(acl_permset, ACL_WRITE); + if (ae_permset & ARCHIVE_ENTRY_ACL_READ) + acl_add_perm(acl_permset, ACL_READ); + } + + name = archive_entry_pathname(entry); + + /* Try restoring the ACL through 'fd' if we can. */ +#if HAVE_ACL_SET_FD + if (fd >= 0 && acl_type == ACL_TYPE_ACCESS && acl_set_fd(fd, acl) == 0) + ret = ARCHIVE_OK; + else +#else +#if HAVE_ACL_SET_FD_NP + if (fd >= 0 && acl_set_fd_np(fd, acl, acl_type) == 0) + ret = ARCHIVE_OK; + else +#endif +#endif + if (acl_set_file(name, acl_type, acl) != 0) { + archive_set_error(a, errno, "Failed to set %s acl", typename); + ret = ARCHIVE_WARN; + } + acl_free(acl); + return (ret); +} +#endif + +#if HAVE_LSETXATTR +/* + * Restore extended attributes - Linux implementation + */ +static int +set_xattrs(struct archive *a, int fd, struct archive_entry *entry) +{ + static int warning_done = 0; + int ret = ARCHIVE_OK; + int i = archive_entry_xattr_reset(entry); + + while (i--) { + const char *name; + const void *value; + size_t size; + archive_entry_xattr_next(entry, &name, &value, &size); + if (name != NULL && + strncmp(name, "xfsroot.", 8) != 0 && + strncmp(name, "system.", 7) != 0) { + int e; +#if HAVE_FSETXATTR + if (fd >= 0) + e = fsetxattr(fd, name, value, size, 0); + else +#endif + { + e = lsetxattr(archive_entry_pathname(entry), + name, value, size, 0); + } + if (e == -1) { + if (errno == ENOTSUP) { + if (!warning_done) { + warning_done = 1; + archive_set_error(a, errno, + "Cannot restore extended " + "attributes on this file " + "system"); + } + } else + archive_set_error(a, errno, + "Failed to set extended attribute"); + ret = ARCHIVE_WARN; + } + } else { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid extended attribute encountered"); + ret = ARCHIVE_WARN; + } + } + return (ret); +} +#else +/* + * Restore extended attributes - stub implementation for unsupported systems + */ +static int +set_xattrs(struct archive *a, int fd, struct archive_entry *entry) +{ + static int warning_done = 0; + (void)a; /* UNUSED */ + (void)fd; /* UNUSED */ + + /* If there aren't any extended attributes, then it's okay not + * to extract them, otherwise, issue a single warning. */ + if (archive_entry_xattr_count(entry) != 0 && !warning_done) { + warning_done = 1; + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Cannot restore extended attributes on this system"); + return (ARCHIVE_WARN); + } + /* Warning was already emitted; suppress further warnings. */ + return (ARCHIVE_OK); +} +#endif + +/* + * The following routines do some basic caching of uname/gname + * lookups. All such lookups go through these routines, including ACL + * conversions. Even a small cache here provides an enormous speedup, + * especially on systems using NIS, LDAP, or a similar networked + * directory system. + * + * TODO: Provide an API for clients to override these routines. + */ +static gid_t +lookup_gid(struct archive *a, const char *gname, gid_t gid) +{ + struct group *grent; + struct extract *extract; + int h; + struct bucket *b; + int cache_size; + + extract = a->extract; + cache_size = sizeof(extract->gcache) / sizeof(extract->gcache[0]); + + /* If no gname, just use the gid provided. */ + if (gname == NULL || *gname == '\0') + return (gid); + + /* Try to find gname in the cache. */ + h = hash(gname); + b = &extract->gcache[h % cache_size ]; + if (b->name != NULL && b->hash == h && strcmp(gname, b->name) == 0) + return ((gid_t)b->id); + + /* Free the cache slot for a new entry. */ + if (b->name != NULL) + free(b->name); + b->name = strdup(gname); + /* Note: If strdup fails, that's okay; we just won't cache. */ + b->hash = h; + grent = getgrnam(gname); + if (grent != NULL) + gid = grent->gr_gid; + b->id = gid; + + return (gid); +} + +static uid_t +lookup_uid(struct archive *a, const char *uname, uid_t uid) +{ + struct passwd *pwent; + struct extract *extract; + int h; + struct bucket *b; + int cache_size; + + extract = a->extract; + cache_size = sizeof(extract->ucache) / sizeof(extract->ucache[0]); + + /* If no uname, just use the uid provided. */ + if (uname == NULL || *uname == '\0') + return (uid); + + /* Try to find uname in the cache. */ + h = hash(uname); + b = &extract->ucache[h % cache_size ]; + if (b->name != NULL && b->hash == h && strcmp(uname, b->name) == 0) + return ((uid_t)b->id); + + /* Free the cache slot for a new entry. */ + if (b->name != NULL) + free(b->name); + b->name = strdup(uname); + /* Note: If strdup fails, that's okay; we just won't cache. */ + b->hash = h; + pwent = getpwnam(uname); + if (pwent != NULL) + uid = pwent->pw_uid; + b->id = uid; + + return (uid); +} + +static unsigned int +hash(const char *p) +{ + /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm, + as used by ELF for hashing function names. */ + unsigned g, h = 0; + while (*p != '\0') { + h = ( h << 4 ) + *p++; + if (( g = h & 0xF0000000 )) { + h ^= g >> 24; + h &= 0x0FFFFFFF; + } + } + return h; +} + +void +archive_read_extract_set_progress_callback(struct archive *a, + void (*progress_func)(void *), void *user_data) +{ + a->extract_progress = progress_func; + a->extract_progress_user_data = user_data; +} diff --git a/lib/libarchive/archive_read_open_fd.c b/lib/libarchive/archive_read_open_fd.c new file mode 100644 index 0000000..c5716e7 --- /dev/null +++ b/lib/libarchive/archive_read_open_fd.c @@ -0,0 +1,106 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +struct read_fd_data { + int fd; + size_t block_size; + void *buffer; +}; + +static int file_close(struct archive *, void *); +static int file_open(struct archive *, void *); +static ssize_t file_read(struct archive *, void *, const void **buff); + +int +archive_read_open_fd(struct archive *a, int fd, size_t block_size) +{ + struct read_fd_data *mine; + + mine = malloc(sizeof(*mine)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + mine->block_size = block_size; + mine->buffer = malloc(mine->block_size); + if (mine->buffer == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + free(mine); + return (ARCHIVE_FATAL); + } + mine->fd = fd; + return (archive_read_open(a, mine, file_open, file_read, file_close)); +} + +static int +file_open(struct archive *a, void *client_data) +{ + struct read_fd_data *mine = client_data; + struct stat st; + + if (fstat(mine->fd, &st) != 0) { + archive_set_error(a, errno, "Can't stat fd %d", mine->fd); + return (ARCHIVE_FATAL); + } + + a->skip_file_dev = st.st_dev; + a->skip_file_ino = st.st_ino; + return (ARCHIVE_OK); +} + +static ssize_t +file_read(struct archive *a, void *client_data, const void **buff) +{ + struct read_fd_data *mine = client_data; + + (void)a; /* UNUSED */ + *buff = mine->buffer; + return (read(mine->fd, mine->buffer, mine->block_size)); +} + +static int +file_close(struct archive *a, void *client_data) +{ + struct read_fd_data *mine = client_data; + + (void)a; /* UNUSED */ + if (mine->buffer != NULL) + free(mine->buffer); + free(mine); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_open_file.c b/lib/libarchive/archive_read_open_file.c new file mode 100644 index 0000000..b0db61c --- /dev/null +++ b/lib/libarchive/archive_read_open_file.c @@ -0,0 +1,168 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +struct read_file_data { + int fd; + size_t block_size; + void *buffer; + mode_t st_mode; /* Mode bits for opened file. */ + char filename[1]; /* Must be last! */ +}; + +static int file_close(struct archive *, void *); +static int file_open(struct archive *, void *); +static ssize_t file_read(struct archive *, void *, const void **buff); + +int +archive_read_open_file(struct archive *a, const char *filename, + size_t block_size) +{ + struct read_file_data *mine; + + if (filename == NULL || filename[0] == '\0') { + mine = malloc(sizeof(*mine)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + mine->filename[0] = '\0'; + } else { + mine = malloc(sizeof(*mine) + strlen(filename)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + strcpy(mine->filename, filename); + } + mine->block_size = block_size; + mine->buffer = NULL; + mine->fd = -1; + return (archive_read_open(a, mine, file_open, file_read, file_close)); +} + +static int +file_open(struct archive *a, void *client_data) +{ + struct read_file_data *mine = client_data; + struct stat st; + + mine->buffer = malloc(mine->block_size); + if (mine->buffer == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + if (mine->filename[0] != '\0') + mine->fd = open(mine->filename, O_RDONLY); + else + mine->fd = 0; /* Fake "open" for stdin. */ + if (mine->fd < 0) { + archive_set_error(a, errno, "Failed to open '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + if (fstat(mine->fd, &st) == 0) { + /* Set dev/ino of archive file so extract won't overwrite. */ + a->skip_file_dev = st.st_dev; + a->skip_file_ino = st.st_ino; + /* Remember mode so close can decide whether to flush. */ + mine->st_mode = st.st_mode; + } else { + if (mine->filename[0] == '\0') + archive_set_error(a, errno, "Can't stat stdin"); + else + archive_set_error(a, errno, "Can't stat '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + return (0); +} + +static ssize_t +file_read(struct archive *a, void *client_data, const void **buff) +{ + struct read_file_data *mine = client_data; + ssize_t bytes_read; + + (void)a; /* UNUSED */ + *buff = mine->buffer; + bytes_read = read(mine->fd, mine->buffer, mine->block_size); + if (bytes_read < 0) { + if (mine->filename[0] == '\0') + archive_set_error(a, errno, "Error reading stdin"); + else + archive_set_error(a, errno, "Error reading '%s'", + mine->filename); + } + return (bytes_read); +} + +static int +file_close(struct archive *a, void *client_data) +{ + struct read_file_data *mine = client_data; + + (void)a; /* UNUSED */ + + /* + * Sometimes, we should flush the input before closing. + * Regular files: faster to just close without flush. + * Devices: must not flush (user might need to + * read the "next" item on a non-rewind device). + * Pipes and sockets: must flush (otherwise, the + * program feeding the pipe or socket may complain). + * Here, I flush everything except for regular files and + * device nodes. + */ + if (!S_ISREG(mine->st_mode) + && !S_ISCHR(mine->st_mode) + && !S_ISBLK(mine->st_mode)) { + ssize_t bytesRead; + do { + bytesRead = read(mine->fd, mine->buffer, + mine->block_size); + } while (bytesRead > 0); + } + /* If a named file was opened, then it needs to be closed. */ + if (mine->filename[0] != '\0') + close(mine->fd); + if (mine->buffer != NULL) + free(mine->buffer); + free(mine); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_open_filename.c b/lib/libarchive/archive_read_open_filename.c new file mode 100644 index 0000000..b0db61c --- /dev/null +++ b/lib/libarchive/archive_read_open_filename.c @@ -0,0 +1,168 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +struct read_file_data { + int fd; + size_t block_size; + void *buffer; + mode_t st_mode; /* Mode bits for opened file. */ + char filename[1]; /* Must be last! */ +}; + +static int file_close(struct archive *, void *); +static int file_open(struct archive *, void *); +static ssize_t file_read(struct archive *, void *, const void **buff); + +int +archive_read_open_file(struct archive *a, const char *filename, + size_t block_size) +{ + struct read_file_data *mine; + + if (filename == NULL || filename[0] == '\0') { + mine = malloc(sizeof(*mine)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + mine->filename[0] = '\0'; + } else { + mine = malloc(sizeof(*mine) + strlen(filename)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + strcpy(mine->filename, filename); + } + mine->block_size = block_size; + mine->buffer = NULL; + mine->fd = -1; + return (archive_read_open(a, mine, file_open, file_read, file_close)); +} + +static int +file_open(struct archive *a, void *client_data) +{ + struct read_file_data *mine = client_data; + struct stat st; + + mine->buffer = malloc(mine->block_size); + if (mine->buffer == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + if (mine->filename[0] != '\0') + mine->fd = open(mine->filename, O_RDONLY); + else + mine->fd = 0; /* Fake "open" for stdin. */ + if (mine->fd < 0) { + archive_set_error(a, errno, "Failed to open '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + if (fstat(mine->fd, &st) == 0) { + /* Set dev/ino of archive file so extract won't overwrite. */ + a->skip_file_dev = st.st_dev; + a->skip_file_ino = st.st_ino; + /* Remember mode so close can decide whether to flush. */ + mine->st_mode = st.st_mode; + } else { + if (mine->filename[0] == '\0') + archive_set_error(a, errno, "Can't stat stdin"); + else + archive_set_error(a, errno, "Can't stat '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + return (0); +} + +static ssize_t +file_read(struct archive *a, void *client_data, const void **buff) +{ + struct read_file_data *mine = client_data; + ssize_t bytes_read; + + (void)a; /* UNUSED */ + *buff = mine->buffer; + bytes_read = read(mine->fd, mine->buffer, mine->block_size); + if (bytes_read < 0) { + if (mine->filename[0] == '\0') + archive_set_error(a, errno, "Error reading stdin"); + else + archive_set_error(a, errno, "Error reading '%s'", + mine->filename); + } + return (bytes_read); +} + +static int +file_close(struct archive *a, void *client_data) +{ + struct read_file_data *mine = client_data; + + (void)a; /* UNUSED */ + + /* + * Sometimes, we should flush the input before closing. + * Regular files: faster to just close without flush. + * Devices: must not flush (user might need to + * read the "next" item on a non-rewind device). + * Pipes and sockets: must flush (otherwise, the + * program feeding the pipe or socket may complain). + * Here, I flush everything except for regular files and + * device nodes. + */ + if (!S_ISREG(mine->st_mode) + && !S_ISCHR(mine->st_mode) + && !S_ISBLK(mine->st_mode)) { + ssize_t bytesRead; + do { + bytesRead = read(mine->fd, mine->buffer, + mine->block_size); + } while (bytesRead > 0); + } + /* If a named file was opened, then it needs to be closed. */ + if (mine->filename[0] != '\0') + close(mine->fd); + if (mine->buffer != NULL) + free(mine->buffer); + free(mine); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_support_compression_all.c b/lib/libarchive/archive_read_support_compression_all.c new file mode 100644 index 0000000..bd3a9b9 --- /dev/null +++ b/lib/libarchive/archive_read_support_compression_all.c @@ -0,0 +1,44 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include "archive.h" + +int +archive_read_support_compression_all(struct archive *a) +{ +#if HAVE_BZLIB_H + archive_read_support_compression_bzip2(a); +#endif + /* The decompress code doesn't use an outside library. */ + archive_read_support_compression_compress(a); +#if HAVE_ZLIB_H + archive_read_support_compression_gzip(a); +#endif + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_support_compression_bzip2.c b/lib/libarchive/archive_read_support_compression_bzip2.c new file mode 100644 index 0000000..aa2d531 --- /dev/null +++ b/lib/libarchive/archive_read_support_compression_bzip2.c @@ -0,0 +1,393 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef HAVE_BZLIB_H +#include <bzlib.h> +#endif + +#include "archive.h" +#include "archive_private.h" + +#if HAVE_BZLIB_H +struct private_data { + bz_stream stream; + char *uncompressed_buffer; + size_t uncompressed_buffer_size; + char *read_next; + int64_t total_out; +}; + +static int finish(struct archive *); +static ssize_t read_ahead(struct archive *, const void **, size_t); +static ssize_t read_consume(struct archive *, size_t); +static int drive_decompressor(struct archive *a, struct private_data *); +#endif + +/* These two functions are defined even if we lack bzlib. See below. */ +static int bid(const void *, size_t); +static int init(struct archive *, const void *, size_t); + +int +archive_read_support_compression_bzip2(struct archive *a) +{ + return (__archive_read_register_compression(a, bid, init)); +} + +/* + * Test whether we can handle this data. + * + * This logic returns zero if any part of the signature fails. It + * also tries to Do The Right Thing if a very short buffer prevents us + * from verifying as much as we would like. + */ +static int +bid(const void *buff, size_t len) +{ + const unsigned char *buffer; + int bits_checked; + + if (len < 1) + return (0); + + buffer = buff; + bits_checked = 0; + if (buffer[0] != 'B') /* Verify first ID byte. */ + return (0); + bits_checked += 8; + if (len < 2) + return (bits_checked); + + if (buffer[1] != 'Z') /* Verify second ID byte. */ + return (0); + bits_checked += 8; + if (len < 3) + return (bits_checked); + + if (buffer[2] != 'h') /* Verify third ID byte. */ + return (0); + bits_checked += 8; + if (len < 4) + return (bits_checked); + + if (buffer[3] < '1' || buffer[3] > '9') + return (0); + bits_checked += 5; + + /* + * Research Question: Can we do any more to verify that this + * really is BZip2 format?? For 99.9% of the time, the above + * test is sufficient, but it would be nice to do a more + * thorough check. It's especially troubling that the BZip2 + * signature begins with all ASCII characters; a tar archive + * whose first filename begins with 'BZh3' would potentially + * fool this logic. (It may also be possible to gaurd against + * such anomalies in archive_read_support_compression_none.) + */ + + return (bits_checked); +} + +#ifndef HAVE_BZLIB_H + +/* + * If we don't have bzlib on this system, we can't actually do the + * decompression. We can, however, still detect bzip2-compressed + * archives and emit a useful message. + */ +static int +init(struct archive *a, const void *buff, size_t n) +{ + (void)a; /* UNUSED */ + (void)buff; /* UNUSED */ + (void)n; /* UNUSED */ + + archive_set_error(a, -1, + "This version of libarchive was compiled without bzip2 support"); + return (ARCHIVE_FATAL); +} + + +#else + +/* + * Setup the callbacks. + */ +static int +init(struct archive *a, const void *buff, size_t n) +{ + struct private_data *state; + int ret; + + a->compression_code = ARCHIVE_COMPRESSION_BZIP2; + a->compression_name = "bzip2"; + + state = malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for %s decompression", + a->compression_name); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->uncompressed_buffer_size = 64 * 1024; + state->uncompressed_buffer = malloc(state->uncompressed_buffer_size); + state->stream.next_out = state->uncompressed_buffer; + state->read_next = state->uncompressed_buffer; + state->stream.avail_out = state->uncompressed_buffer_size; + + if (state->uncompressed_buffer == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate %s decompression buffers", + a->compression_name); + free(state); + return (ARCHIVE_FATAL); + } + + /* + * A bug in bzlib.h: stream.next_in should be marked 'const' + * but isn't (the library never alters data through the + * next_in pointer, only reads it). The result: this ugly + * cast to remove 'const'. + */ + state->stream.next_in = (void *)(uintptr_t)(const void *)buff; + state->stream.avail_in = n; + + a->compression_read_ahead = read_ahead; + a->compression_read_consume = read_consume; + a->compression_finish = finish; + + /* Initialize compression library. */ + ret = BZ2_bzDecompressInit(&(state->stream), + 0 /* library verbosity */, + 0 /* don't use slow low-mem algorithm */); + + /* If init fails, try using low-memory algorithm instead. */ + if (ret == BZ_MEM_ERROR) { + ret = BZ2_bzDecompressInit(&(state->stream), + 0 /* library verbosity */, + 1 /* do use slow low-mem algorithm */); + } + + if (ret == BZ_OK) { + a->compression_data = state; + return (ARCHIVE_OK); + } + + /* Library setup failed: Clean up. */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing %s library", a->compression_name); + free(state->uncompressed_buffer); + free(state); + + /* Override the error message if we know what really went wrong. */ + switch (ret) { + case BZ_PARAM_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid setup parameter"); + break; + case BZ_MEM_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "out of memory"); + break; + case BZ_CONFIG_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "mis-compiled library"); + break; + } + + return (ARCHIVE_FATAL); +} + +/* + * Return a block of data from the decompression buffer. Decompress more + * as necessary. + */ +static ssize_t +read_ahead(struct archive *a, const void **p, size_t min) +{ + struct private_data *state; + int read_avail, was_avail, ret; + + state = a->compression_data; + was_avail = -1; + if (!a->client_reader) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No read callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + read_avail = state->stream.next_out - state->read_next; + + if (read_avail + state->stream.avail_out < min) { + memmove(state->uncompressed_buffer, state->read_next, + read_avail); + state->read_next = state->uncompressed_buffer; + state->stream.next_out = state->read_next + read_avail; + state->stream.avail_out + = state->uncompressed_buffer_size - read_avail; + } + + while (was_avail < read_avail && /* Made some progress. */ + read_avail < (int)min && /* Haven't satisfied min. */ + read_avail < (int)state->uncompressed_buffer_size) { /* !full */ + if ((ret = drive_decompressor(a, state)) != ARCHIVE_OK) + return (ret); + was_avail = read_avail; + read_avail = state->stream.next_out - state->read_next; + } + + *p = state->read_next; + return (read_avail); +} + +/* + * Mark a previously-returned block of data as read. + */ +static ssize_t +read_consume(struct archive *a, size_t n) +{ + struct private_data *state; + + state = a->compression_data; + a->file_position += n; + state->read_next += n; + if (state->read_next > state->stream.next_out) + __archive_errx(1, "Request to consume too many " + "bytes from bzip2 decompressor"); + return (n); +} + +/* + * Clean up the decompressor. + */ +static int +finish(struct archive *a) +{ + struct private_data *state; + int ret; + + state = a->compression_data; + ret = ARCHIVE_OK; + switch (BZ2_bzDecompressEnd(&(state->stream))) { + case BZ_OK: + break; + default: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Failed to clean up %s compressor", a->compression_name); + ret = ARCHIVE_FATAL; + } + + free(state->uncompressed_buffer); + free(state); + + a->compression_data = NULL; + if (a->client_closer != NULL) + (a->client_closer)(a, a->client_data); + + return (ret); +} + +/* + * Utility function to pull data through decompressor, reading input + * blocks as necessary. + */ +static int +drive_decompressor(struct archive *a, struct private_data *state) +{ + ssize_t ret; + int decompressed, total_decompressed; + char *output; + + total_decompressed = 0; + for (;;) { + if (state->stream.avail_in == 0) { + ret = (a->client_reader)(a, a->client_data, + (const void **)&state->stream.next_in); + if (ret < 0) { + /* + * TODO: Find a better way to handle + * this read failure. + */ + goto fatal; + } + if (ret == 0 && total_decompressed == 0) { + archive_set_error(a, EIO, + "Premature end of %s compressed data", + a->compression_name); + return (ARCHIVE_FATAL); + } + a->raw_position += ret; + state->stream.avail_in = ret; + } + + { + output = state->stream.next_out; + + /* Decompress some data. */ + ret = BZ2_bzDecompress(&(state->stream)); + decompressed = state->stream.next_out - output; + + /* Accumulate the total bytes of output. */ + state->total_out += decompressed; + total_decompressed += decompressed; + + switch (ret) { + case BZ_OK: /* Decompressor made some progress. */ + if (decompressed > 0) + return (ARCHIVE_OK); + break; + case BZ_STREAM_END: /* Found end of stream. */ + return (ARCHIVE_OK); + default: + /* Any other return value is an error. */ + goto fatal; + } + } + } + return (ARCHIVE_OK); + + /* Return a fatal error. */ +fatal: + archive_set_error(a, ARCHIVE_ERRNO_MISC, "%s decompression failed", + a->compression_name); + return (ARCHIVE_FATAL); +} + +#endif /* HAVE_BZLIB_H */ diff --git a/lib/libarchive/archive_read_support_compression_compress.c b/lib/libarchive/archive_read_support_compression_compress.c new file mode 100644 index 0000000..30a7377 --- /dev/null +++ b/lib/libarchive/archive_read_support_compression_compress.c @@ -0,0 +1,482 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This code borrows heavily from "compress" source code, which is + * protected by the following copyright. (Clause 3 dropped by request + * of the Regents.) + */ + +/*- + * Copyright (c) 1985, 1986, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis and James A. Woods, derived from original + * work by Spencer Thomas and Joseph Orost. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +/* + * Because LZW decompression is pretty simple, I've just implemented + * the whole decompressor here (cribbing from "compress" source code, + * of course), rather than relying on an external library. I have + * made an effort to clarify and simplify the algorithm, so the + * names and structure here don't exactly match those used by compress. + */ + +struct private_data { + /* Input variables. */ + const unsigned char *next_in; + size_t avail_in; + int bit_buffer; + int bits_avail; + size_t bytes_in_section; + + /* Output variables. */ + size_t uncompressed_buffer_size; + void *uncompressed_buffer; + unsigned char *read_next; /* Data for client. */ + unsigned char *next_out; /* Where to write new data. */ + size_t avail_out; /* Space at end of buffer. */ + + /* Decompression status variables. */ + int use_reset_code; + int end_of_stream; /* EOF status. */ + int maxcode; /* Largest code. */ + int maxcode_bits; /* Length of largest code. */ + int section_end_code; /* When to increase bits. */ + int bits; /* Current code length. */ + int oldcode; /* Previous code. */ + int finbyte; /* Last byte of prev code. */ + + /* Dictionary. */ + int free_ent; /* Next dictionary entry. */ + unsigned char suffix[65536]; + uint16_t prefix[65536]; + + /* + * Scratch area for expanding dictionary entries. Note: + * "worst" case here comes from compressing /dev/zero: the + * last code in the dictionary will code a sequence of + * 65536-256 zero bytes. Thus, we need stack space to expand + * a 65280-byte dictionary entry. (Of course, 32640:1 + * compression could also be considered the "best" case. ;-) + */ + unsigned char *stackp; + unsigned char stack[65300]; +}; + +static int bid(const void *, size_t); +static int finish(struct archive *); +static int init(struct archive *, const void *, size_t); +static ssize_t read_ahead(struct archive *, const void **, size_t); +static ssize_t read_consume(struct archive *, size_t); +static int getbits(struct archive *, struct private_data *, int n); +static int next_code(struct archive *a, struct private_data *state); + +int +archive_read_support_compression_compress(struct archive *a) +{ + return (__archive_read_register_compression(a, bid, init)); +} + +/* + * Test whether we can handle this data. + * + * This logic returns zero if any part of the signature fails. It + * also tries to Do The Right Thing if a very short buffer prevents us + * from verifying as much as we would like. + */ +static int +bid(const void *buff, size_t len) +{ + const unsigned char *buffer; + int bits_checked; + + if (len < 1) + return (0); + + buffer = buff; + bits_checked = 0; + if (buffer[0] != 037) /* Verify first ID byte. */ + return (0); + bits_checked += 8; + if (len < 2) + return (bits_checked); + + if (buffer[1] != 0235) /* Verify second ID byte. */ + return (0); + bits_checked += 8; + if (len < 3) + return (bits_checked); + + /* + * TODO: Verify more. + */ + + return (bits_checked); +} + +/* + * Setup the callbacks. + */ +static int +init(struct archive *a, const void *buff, size_t n) +{ + struct private_data *state; + int code; + + a->compression_code = ARCHIVE_COMPRESSION_COMPRESS; + a->compression_name = "compress (.Z)"; + + a->compression_read_ahead = read_ahead; + a->compression_read_consume = read_consume; + a->compression_finish = finish; + + state = malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for %s decompression", + a->compression_name); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + a->compression_data = state; + + state->uncompressed_buffer_size = 64 * 1024; + state->uncompressed_buffer = malloc(state->uncompressed_buffer_size); + + if (state->uncompressed_buffer == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate %s decompression buffers", + a->compression_name); + goto fatal; + } + + state->next_in = buff; + state->avail_in = n; + state->read_next = state->next_out = state->uncompressed_buffer; + state->avail_out = state->uncompressed_buffer_size; + + code = getbits(a, state, 8); + if (code != 037) /* This should be impossible. */ + goto fatal; + + code = getbits(a, state, 8); + if (code != 0235) { + /* This can happen if the library is receiving 1-byte + * blocks and gzip and compress are both enabled. + * You can't distinguish gzip and compress only from + * the first byte. */ + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Compress signature did not match."); + goto fatal; + } + + code = getbits(a, state, 8); + state->maxcode_bits = code & 0x1f; + state->maxcode = (1 << state->maxcode_bits); + state->use_reset_code = code & 0x80; + + /* Initialize decompressor. */ + state->free_ent = 256; + state->stackp = state->stack; + if (state->use_reset_code) + state->free_ent++; + state->bits = 9; + state->section_end_code = (1<<state->bits) - 1; + state->oldcode = -1; + for (code = 255; code >= 0; code--) { + state->prefix[code] = 0; + state->suffix[code] = code; + } + next_code(a, state); + return (ARCHIVE_OK); + +fatal: + finish(a); + return (ARCHIVE_FATAL); +} + +/* + * Return a block of data from the decompression buffer. Decompress more + * as necessary. + */ +static ssize_t +read_ahead(struct archive *a, const void **p, size_t min) +{ + struct private_data *state; + int read_avail, was_avail, ret; + + state = a->compression_data; + was_avail = -1; + if (!a->client_reader) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No read callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + read_avail = state->next_out - state->read_next; + + if (read_avail < (int)min && state->end_of_stream) { + if (state->end_of_stream == ARCHIVE_EOF) + return (0); + else + return (-1); + } + + if (read_avail < (int)min) { + memmove(state->uncompressed_buffer, state->read_next, + read_avail); + state->read_next = state->uncompressed_buffer; + state->next_out = state->read_next + read_avail; + state->avail_out + = state->uncompressed_buffer_size - read_avail; + + while (read_avail < (int)state->uncompressed_buffer_size + && !state->end_of_stream) { + if (state->stackp > state->stack) { + *state->next_out++ = *--state->stackp; + state->avail_out--; + read_avail++; + } else { + ret = next_code(a, state); + if (ret == ARCHIVE_EOF) + state->end_of_stream = ret; + else if (ret != ARCHIVE_OK) + return (ret); + } + } + } + + *p = state->read_next; + return (read_avail); +} + +/* + * Mark a previously-returned block of data as read. + */ +static ssize_t +read_consume(struct archive *a, size_t n) +{ + struct private_data *state; + + state = a->compression_data; + a->file_position += n; + state->read_next += n; + if (state->read_next > state->next_out) + __archive_errx(1, "Request to consume too many " + "bytes from compress decompressor"); + return (n); +} + +/* + * Clean up the decompressor. + */ +static int +finish(struct archive *a) +{ + struct private_data *state; + int ret = ARCHIVE_OK; + + state = a->compression_data; + + if (state != NULL) { + if (state->uncompressed_buffer != NULL) + free(state->uncompressed_buffer); + free(state); + } + + a->compression_data = NULL; + if (a->client_closer != NULL) + ret = (a->client_closer)(a, a->client_data); + + return (ret); +} + +/* + * Process the next code and fill the stack with the expansion + * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or + * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise. + */ +static int +next_code(struct archive *a, struct private_data *state) +{ + int code, newcode; + + static int debug_buff[1024]; + static unsigned debug_index; + + code = newcode = getbits(a, state, state->bits); + if (code < 0) + return (code); + + debug_buff[debug_index++] = code; + if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0])) + debug_index = 0; + + /* If it's a reset code, reset the dictionary. */ + if ((code == 256) && state->use_reset_code) { + /* + * The original 'compress' implementation blocked its + * I/O in a manner that resulted in junk bytes being + * inserted after every reset. The next section skips + * this junk. (Yes, the number of *bytes* to skip is + * a function of the current *bit* length.) + */ + int skip_bytes = state->bits - + (state->bytes_in_section % state->bits); + skip_bytes %= state->bits; + state->bits_avail = 0; /* Discard rest of this byte. */ + while (skip_bytes-- > 0) { + code = getbits(a, state, 8); + if (code < 0) + return (code); + } + /* Now, actually do the reset. */ + state->bytes_in_section = 0; + state->bits = 9; + state->section_end_code = (1 << state->bits) - 1; + state->free_ent = 257; + state->oldcode = -1; + return (next_code(a, state)); + } + + if (code > state->free_ent) { + /* An invalid code is a fatal error. */ + archive_set_error(a, -1, "Invalid compressed data"); + return (ARCHIVE_FATAL); + } + + /* Special case for KwKwK string. */ + if (code >= state->free_ent) { + *state->stackp++ = state->finbyte; + code = state->oldcode; + } + + /* Generate output characters in reverse order. */ + while (code >= 256) { + *state->stackp++ = state->suffix[code]; + code = state->prefix[code]; + } + *state->stackp++ = state->finbyte = code; + + /* Generate the new entry. */ + code = state->free_ent; + if (code < state->maxcode && state->oldcode >= 0) { + state->prefix[code] = state->oldcode; + state->suffix[code] = state->finbyte; + ++state->free_ent; + } + if (state->free_ent > state->section_end_code) { + state->bits++; + state->bytes_in_section = 0; + if (state->bits == state->maxcode_bits) + state->section_end_code = state->maxcode; + else + state->section_end_code = (1 << state->bits) - 1; + } + + /* Remember previous code. */ + state->oldcode = newcode; + return (ARCHIVE_OK); +} + +/* + * Return next 'n' bits from stream. + * + * -1 indicates end of available data. + */ +static int +getbits(struct archive *a, struct private_data *state, int n) +{ + int code, ret; + static const int mask[] = { + 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, + 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff + }; + + + while (state->bits_avail < n) { + if (state->avail_in <= 0) { + ret = (a->client_reader)(a, a->client_data, + (const void **)&state->next_in); + if (ret < 0) + return (ARCHIVE_FATAL); + if (ret == 0) + return (ARCHIVE_EOF); + a->raw_position += ret; + state->avail_in = ret; + } + state->bit_buffer |= *state->next_in++ << state->bits_avail; + state->avail_in--; + state->bits_avail += 8; + state->bytes_in_section++; + } + + code = state->bit_buffer; + state->bit_buffer >>= n; + state->bits_avail -= n; + + return (code & mask[n]); +} diff --git a/lib/libarchive/archive_read_support_compression_gzip.c b/lib/libarchive/archive_read_support_compression_gzip.c new file mode 100644 index 0000000..b0cda3a --- /dev/null +++ b/lib/libarchive/archive_read_support_compression_gzip.c @@ -0,0 +1,531 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +__FBSDID("$FreeBSD$"); + + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +#include "archive.h" +#include "archive_private.h" + +#ifdef HAVE_ZLIB_H +struct private_data { + z_stream stream; + unsigned char *uncompressed_buffer; + size_t uncompressed_buffer_size; + unsigned char *read_next; + int64_t total_out; + unsigned long crc; + char header_done; +}; + +static int finish(struct archive *); +static ssize_t read_ahead(struct archive *, const void **, size_t); +static ssize_t read_consume(struct archive *, size_t); +static int drive_decompressor(struct archive *a, struct private_data *); +#endif + +/* These two functions are defined even if we lack zlib. See below. */ +static int bid(const void *, size_t); +static int init(struct archive *, const void *, size_t); + +int +archive_read_support_compression_gzip(struct archive *a) +{ + return (__archive_read_register_compression(a, bid, init)); +} + +/* + * Test whether we can handle this data. + * + * This logic returns zero if any part of the signature fails. It + * also tries to Do The Right Thing if a very short buffer prevents us + * from verifying as much as we would like. + */ +static int +bid(const void *buff, size_t len) +{ + const unsigned char *buffer; + int bits_checked; + + if (len < 1) + return (0); + + buffer = buff; + bits_checked = 0; + if (buffer[0] != 037) /* Verify first ID byte. */ + return (0); + bits_checked += 8; + if (len < 2) + return (bits_checked); + + if (buffer[1] != 0213) /* Verify second ID byte. */ + return (0); + bits_checked += 8; + if (len < 3) + return (bits_checked); + + if (buffer[2] != 8) /* Compression must be 'deflate'. */ + return (0); + bits_checked += 8; + if (len < 4) + return (bits_checked); + + if ((buffer[3] & 0xE0)!= 0) /* No reserved flags set. */ + return (0); + bits_checked += 3; + if (len < 5) + return (bits_checked); + + /* + * TODO: Verify more; in particular, gzip has an optional + * header CRC, which would give us 16 more verified bits. We + * may also be able to verify certain constraints on other + * fields. + */ + + return (bits_checked); +} + + +#ifndef HAVE_ZLIB_H + +/* + * If we don't have zlib on this system, we can't actually do the + * decompression. We can, however, still detect gzip-compressed + * archives and emit a useful message. + */ +static int +init(struct archive *a, const void *buff, size_t n) +{ + (void)a; /* UNUSED */ + (void)buff; /* UNUSED */ + (void)n; /* UNUSED */ + + archive_set_error(a, -1, + "This version of libarchive was compiled without gzip support"); + return (ARCHIVE_FATAL); +} + + +#else + +/* + * Setup the callbacks. + */ +static int +init(struct archive *a, const void *buff, size_t n) +{ + struct private_data *state; + int ret; + + a->compression_code = ARCHIVE_COMPRESSION_GZIP; + a->compression_name = "gzip"; + + state = malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for %s decompression", + a->compression_name); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->crc = crc32(0L, NULL, 0); + state->header_done = 0; /* We've not yet begun to parse header... */ + + state->uncompressed_buffer_size = 64 * 1024; + state->uncompressed_buffer = malloc(state->uncompressed_buffer_size); + state->stream.next_out = state->uncompressed_buffer; + state->read_next = state->uncompressed_buffer; + state->stream.avail_out = state->uncompressed_buffer_size; + + if (state->uncompressed_buffer == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate %s decompression buffers", + a->compression_name); + free(state); + return (ARCHIVE_FATAL); + } + + /* + * A bug in zlib.h: stream.next_in should be marked 'const' + * but isn't (the library never alters data through the + * next_in pointer, only reads it). The result: this ugly + * cast to remove 'const'. + */ + state->stream.next_in = (void *)(uintptr_t)(const void *)buff; + state->stream.avail_in = n; + + a->compression_read_ahead = read_ahead; + a->compression_read_consume = read_consume; + a->compression_finish = finish; + + /* + * TODO: Do I need to parse the gzip header before calling + * inflateInit2()? In particular, one of the header bytes + * marks "best compression" or "fastest", which may be + * appropriate for setting the second parameter here. + * However, I think the only penalty for not setting it + * correctly is wasted memory. If this is necessary, it + * should probably go into drive_decompressor() below. + */ + + /* Initialize compression library. */ + ret = inflateInit2(&(state->stream), + -15 /* Don't check for zlib header */); + if (ret == Z_OK) { + a->compression_data = state; + return (ARCHIVE_OK); + } + + /* Library setup failed: Clean up. */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing %s library", a->compression_name); + free(state->uncompressed_buffer); + free(state); + + /* Override the error message if we know what really went wrong. */ + switch (ret) { + case Z_STREAM_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid setup parameter"); + break; + case Z_MEM_ERROR: + archive_set_error(a, ENOMEM, + "Internal error initializing compression library: " + "out of memory"); + break; + case Z_VERSION_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid library version"); + break; + } + + return (ARCHIVE_FATAL); +} + +/* + * Return a block of data from the decompression buffer. Decompress more + * as necessary. + */ +static ssize_t +read_ahead(struct archive *a, const void **p, size_t min) +{ + struct private_data *state; + int read_avail, was_avail, ret; + + state = a->compression_data; + was_avail = -1; + if (!a->client_reader) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No read callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + read_avail = state->stream.next_out - state->read_next; + + if (read_avail + state->stream.avail_out < min) { + memmove(state->uncompressed_buffer, state->read_next, + read_avail); + state->read_next = state->uncompressed_buffer; + state->stream.next_out = state->read_next + read_avail; + state->stream.avail_out + = state->uncompressed_buffer_size - read_avail; + } + + while (was_avail < read_avail && /* Made some progress. */ + read_avail < (int)min && /* Haven't satisfied min. */ + read_avail < (int)state->uncompressed_buffer_size) { /* !full */ + if ((ret = drive_decompressor(a, state)) != ARCHIVE_OK) + return (ret); + was_avail = read_avail; + read_avail = state->stream.next_out - state->read_next; + } + + *p = state->read_next; + return (read_avail); +} + +/* + * Mark a previously-returned block of data as read. + */ +static ssize_t +read_consume(struct archive *a, size_t n) +{ + struct private_data *state; + + state = a->compression_data; + a->file_position += n; + state->read_next += n; + if (state->read_next > state->stream.next_out) + __archive_errx(1, "Request to consume too many " + "bytes from gzip decompressor"); + return (n); +} + +/* + * Clean up the decompressor. + */ +static int +finish(struct archive *a) +{ + struct private_data *state; + int ret; + + state = a->compression_data; + ret = ARCHIVE_OK; + switch (inflateEnd(&(state->stream))) { + case Z_OK: + break; + default: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Failed to clean up %s compressor", a->compression_name); + ret = ARCHIVE_FATAL; + } + + free(state->uncompressed_buffer); + free(state); + + a->compression_data = NULL; + if (a->client_closer != NULL) + (a->client_closer)(a, a->client_data); + + return (ret); +} + +/* + * Utility function to pull data through decompressor, reading input + * blocks as necessary. + */ +static int +drive_decompressor(struct archive *a, struct private_data *state) +{ + ssize_t ret; + int decompressed, total_decompressed; + int count, flags, header_state; + unsigned char *output; + unsigned char b; + + flags = 0; + count = 0; + header_state = 0; + total_decompressed = 0; + for (;;) { + if (state->stream.avail_in == 0) { + ret = (a->client_reader)(a, a->client_data, + (const void **)&state->stream.next_in); + if (ret < 0) { + /* + * TODO: Find a better way to handle + * this read failure. + */ + goto fatal; + } + if (ret == 0 && total_decompressed == 0) { + archive_set_error(a, EIO, + "Premature end of %s compressed data", + a->compression_name); + return (ARCHIVE_FATAL); + } + a->raw_position += ret; + state->stream.avail_in = ret; + } + + if (!state->header_done) { + /* + * If still parsing the header, interpret the + * next byte. + */ + b = *(state->stream.next_in++); + state->stream.avail_in--; + + /* + * Yes, this is somewhat crude, but it works, + * GZip format isn't likely to change anytime + * in the near future, and header parsing is + * certainly not a performance issue, so + * there's little point in making this more + * elegant. Of course, if you see an easy way + * to make this more elegant, please let me + * know.. ;-) + */ + switch (header_state) { + case 0: /* First byte of signature. */ + if (b != 037) + goto fatal; + header_state = 1; + break; + case 1: /* Second byte of signature. */ + if (b != 0213) + goto fatal; + header_state = 2; + break; + case 2: /* Compression type must be 8. */ + if (b != 8) + goto fatal; + header_state = 3; + break; + case 3: /* GZip flags. */ + flags = b; + header_state = 4; + break; + case 4: case 5: case 6: case 7: /* Mod time. */ + header_state++; + break; + case 8: /* Deflate flags. */ + header_state = 9; + break; + case 9: /* OS. */ + header_state = 10; + break; + case 10: /* Optional Extra: First byte of Length. */ + if ((flags & 4)) { + count = 255 & (int)b; + header_state = 11; + break; + } + /* + * Fall through if there is no + * Optional Extra field. + */ + case 11: /* Optional Extra: Second byte of Length. */ + if ((flags & 4)) { + count = (0xff00 & ((int)b << 8)) | count; + header_state = 12; + break; + } + /* + * Fall through if there is no + * Optional Extra field. + */ + case 12: /* Optional Extra Field: counted length. */ + if ((flags & 4)) { + --count; + if (count == 0) header_state = 13; + else header_state = 12; + break; + } + /* + * Fall through if there is no + * Optional Extra field. + */ + case 13: /* Optional Original Filename. */ + if ((flags & 8)) { + if (b == 0) header_state = 14; + else header_state = 13; + break; + } + /* + * Fall through if no Optional + * Original Filename. + */ + case 14: /* Optional Comment. */ + if ((flags & 16)) { + if (b == 0) header_state = 15; + else header_state = 14; + break; + } + /* Fall through if no Optional Comment. */ + case 15: /* Optional Header CRC: First byte. */ + if ((flags & 2)) { + header_state = 16; + break; + } + /* Fall through if no Optional Header CRC. */ + case 16: /* Optional Header CRC: Second byte. */ + if ((flags & 2)) { + header_state = 17; + break; + } + /* Fall through if no Optional Header CRC. */ + case 17: /* First byte of compressed data. */ + state->header_done = 1; /* done with header */ + state->stream.avail_in++; + state->stream.next_in--; + } + + /* + * TODO: Consider moving the inflateInit2 call + * here so it can include the compression type + * from the header? + */ + } else { + output = state->stream.next_out; + + /* Decompress some data. */ + ret = inflate(&(state->stream), 0); + decompressed = state->stream.next_out - output; + + /* Accumulate the CRC of the uncompressed data. */ + state->crc = crc32(state->crc, output, decompressed); + + /* Accumulate the total bytes of output. */ + state->total_out += decompressed; + total_decompressed += decompressed; + + switch (ret) { + case Z_OK: /* Decompressor made some progress. */ + if (decompressed > 0) + return (ARCHIVE_OK); + break; + case Z_STREAM_END: /* Found end of stream. */ + /* + * TODO: Verify gzip trailer + * (uncompressed length and CRC). + */ + return (ARCHIVE_OK); + default: + /* Any other return value is an error. */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "gzip decompression failed (%s)", + state->stream.msg); + goto fatal; + } + } + } + return (ARCHIVE_OK); + + /* Return a fatal error. */ +fatal: + archive_set_error(a, ARCHIVE_ERRNO_MISC, "%s decompression failed", + a->compression_name); + return (ARCHIVE_FATAL); +} + +#endif /* HAVE_ZLIB_H */ diff --git a/lib/libarchive/archive_read_support_compression_none.c b/lib/libarchive/archive_read_support_compression_none.c new file mode 100644 index 0000000..8e7ca3c --- /dev/null +++ b/lib/libarchive/archive_read_support_compression_none.c @@ -0,0 +1,266 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +struct archive_decompress_none { + char *buffer; + size_t buffer_size; + char *next; /* Current read location. */ + size_t avail; /* Bytes in my buffer. */ + const char *client_buff; /* Client buffer information. */ + size_t client_total; + const char *client_next; + size_t client_avail; + char end_of_file; + char fatal; +}; + +/* + * Size of internal buffer used for combining short reads. This is + * also an upper limit on the size of a read request. Recall, + * however, that we can (and will!) return blocks of data larger than + * this. The read semantics are: you ask for a minimum, I give you a + * pointer to my best-effort match and tell you how much data is + * there. It could be less than you asked for, it could be much more. + * For example, a client might use mmap() to "read" the entire file as + * a single block. In that case, I will return that entire block to + * my clients. + */ +#define BUFFER_SIZE 65536 + +static int archive_decompressor_none_bid(const void *, size_t); +static int archive_decompressor_none_finish(struct archive *); +static int archive_decompressor_none_init(struct archive *, + const void *, size_t); +static ssize_t archive_decompressor_none_read_ahead(struct archive *, + const void **, size_t); +static ssize_t archive_decompressor_none_read_consume(struct archive *, + size_t); + +int +archive_read_support_compression_none(struct archive *a) +{ + return (__archive_read_register_compression(a, + archive_decompressor_none_bid, + archive_decompressor_none_init)); +} + +/* + * Try to detect an "uncompressed" archive. + */ +static int +archive_decompressor_none_bid(const void *buff, size_t len) +{ + (void)buff; + (void)len; + + return (1); /* Default: We'll take it if noone else does. */ +} + +static int +archive_decompressor_none_init(struct archive *a, const void *buff, size_t n) +{ + struct archive_decompress_none *state; + + a->compression_code = ARCHIVE_COMPRESSION_NONE; + a->compression_name = "none"; + + state = (struct archive_decompress_none *)malloc(sizeof(*state)); + if (!state) { + archive_set_error(a, ENOMEM, "Can't allocate input data"); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->buffer_size = BUFFER_SIZE; + state->buffer = malloc(state->buffer_size); + state->next = state->buffer; + if (state->buffer == NULL) { + free(state); + archive_set_error(a, ENOMEM, "Can't allocate input buffer"); + return (ARCHIVE_FATAL); + } + + /* Save reference to first block of data. */ + state->client_buff = buff; + state->client_total = n; + state->client_next = state->client_buff; + state->client_avail = state->client_total; + + a->compression_data = state; + a->compression_read_ahead = archive_decompressor_none_read_ahead; + a->compression_read_consume = archive_decompressor_none_read_consume; + a->compression_finish = archive_decompressor_none_finish; + + return (ARCHIVE_OK); +} + +/* + * We just pass through pointers to the client buffer if we can. + * If the client buffer is short, then we copy stuff to our internal + * buffer to combine reads. + */ +static ssize_t +archive_decompressor_none_read_ahead(struct archive *a, const void **buff, + size_t min) +{ + struct archive_decompress_none *state; + ssize_t bytes_read; + + state = a->compression_data; + if (state->fatal) + return (-1); + + /* + * Don't make special efforts to handle requests larger than + * the copy buffer. + */ + if (min > state->buffer_size) + min = state->buffer_size; + + /* + * Try to satisfy the request directly from the client + * buffer. We can do this if all of the data in the copy + * buffer was copied from the current client buffer. This + * also covers the case where the copy buffer is empty and + * the client buffer has all the data we need. + */ + if (state->client_total >= state->client_avail + state->avail + && state->client_avail + state->avail >= min) { + state->client_avail += state->avail; + state->client_next -= state->avail; + state->avail = 0; + state->next = state->buffer; + *buff = state->client_next; + return (state->client_avail); + } + + /* + * If we can't use client buffer, we'll have to use copy buffer. + */ + + /* Move data forward in copy buffer if necessary. */ + if (state->next > state->buffer && + state->next + min > state->buffer + state->buffer_size) { + if (state->avail > 0) + memmove(state->buffer, state->next, state->avail); + state->next = state->buffer; + } + + /* Collect data in copy buffer to fulfill request. */ + while (state->avail < min) { + /* Copy data from client buffer to our copy buffer. */ + if (state->client_avail > 0) { + /* First estimate: copy to fill rest of buffer. */ + size_t tocopy = (state->buffer + state->buffer_size) + - (state->next + state->avail); + /* Don't copy more than is available. */ + if (tocopy > state->client_avail) + tocopy = state->client_avail; + memcpy(state->next + state->avail, state->client_next, + tocopy); + state->client_next += tocopy; + state->client_avail -= tocopy; + state->avail += tocopy; + } else { + /* There is no more client data: fetch more. */ + /* + * It seems to me that const void ** and const + * char ** should be compatible, but they + * aren't, hence the cast. + */ + bytes_read = (a->client_reader)(a, a->client_data, + (const void **)&state->client_buff); + if (bytes_read < 0) { /* Read error. */ + state->client_total = state->client_avail = 0; + state->client_next = state->client_buff = NULL; + state->fatal = 1; + return (-1); + } + if (bytes_read == 0) { /* End-of-file. */ + state->client_total = state->client_avail = 0; + state->client_next = state->client_buff = NULL; + state->end_of_file = 1; + break; + } + a->raw_position += bytes_read; + state->client_total = bytes_read; + state->client_avail = state->client_total; + state->client_next = state->client_buff; + } + } + + *buff = state->next; + return (state->avail); +} + +/* + * Mark the appropriate data as used. Note that the request here will + * often be much smaller than the size of the previous read_ahead + * request. + */ +static ssize_t +archive_decompressor_none_read_consume(struct archive *a, size_t request) +{ + struct archive_decompress_none *state; + + state = a->compression_data; + if (state->avail > 0) { + /* Read came from copy buffer. */ + state->next += request; + state->avail -= request; + } else { + /* Read came from client buffer. */ + state->client_next += request; + state->client_avail -= request; + } + a->file_position += request; + return (request); +} + +static int +archive_decompressor_none_finish(struct archive *a) +{ + struct archive_decompress_none *state; + + state = a->compression_data; + free(state->buffer); + free(state); + a->compression_data = NULL; + if (a->client_closer != NULL) + return ((a->client_closer)(a, a->client_data)); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_support_format_all.c b/lib/libarchive/archive_read_support_format_all.c new file mode 100644 index 0000000..3b7eaaf --- /dev/null +++ b/lib/libarchive/archive_read_support_format_all.c @@ -0,0 +1,40 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include "archive.h" + +int +archive_read_support_format_all(struct archive *a) +{ + archive_read_support_format_cpio(a); + archive_read_support_format_iso9660(a); + archive_read_support_format_tar(a); + archive_read_support_format_zip(a); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_read_support_format_cpio.c b/lib/libarchive/archive_read_support_format_cpio.c new file mode 100644 index 0000000..75b0417 --- /dev/null +++ b/lib/libarchive/archive_read_support_format_cpio.c @@ -0,0 +1,610 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> + +#include <errno.h> +/* #include <stdint.h> */ /* See archive_platform.h */ +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +struct cpio_bin_header { + unsigned char c_magic[2]; + unsigned char c_dev[2]; + unsigned char c_ino[2]; + unsigned char c_mode[2]; + unsigned char c_uid[2]; + unsigned char c_gid[2]; + unsigned char c_nlink[2]; + unsigned char c_rdev[2]; + unsigned char c_mtime[4]; + unsigned char c_namesize[2]; + unsigned char c_filesize[4]; +}; + +struct cpio_odc_header { + char c_magic[6]; + char c_dev[6]; + char c_ino[6]; + char c_mode[6]; + char c_uid[6]; + char c_gid[6]; + char c_nlink[6]; + char c_rdev[6]; + char c_mtime[11]; + char c_namesize[6]; + char c_filesize[11]; +}; + +struct cpio_newc_header { + char c_magic[6]; + char c_ino[8]; + char c_mode[8]; + char c_uid[8]; + char c_gid[8]; + char c_nlink[8]; + char c_mtime[8]; + char c_filesize[8]; + char c_devmajor[8]; + char c_devminor[8]; + char c_rdevmajor[8]; + char c_rdevminor[8]; + char c_namesize[8]; + char c_crc[8]; +}; + +struct links_entry { + struct links_entry *next; + struct links_entry *previous; + int links; + dev_t dev; + ino_t ino; + char *name; +}; + +#define CPIO_MAGIC 0x13141516 +struct cpio { + int magic; + int (*read_header)(struct archive *, struct cpio *, + struct stat *, size_t *, size_t *); + struct links_entry *links_head; + struct archive_string entry_name; + struct archive_string entry_linkname; + off_t entry_bytes_remaining; + off_t entry_offset; + off_t entry_padding; +}; + +static int64_t atol16(const char *, unsigned); +static int64_t atol8(const char *, unsigned); +static int archive_read_format_cpio_bid(struct archive *); +static int archive_read_format_cpio_cleanup(struct archive *); +static int archive_read_format_cpio_read_data(struct archive *, + const void **, size_t *, off_t *); +static int archive_read_format_cpio_read_header(struct archive *, + struct archive_entry *); +static int be4(const unsigned char *); +static int header_bin_be(struct archive *, struct cpio *, struct stat *, + size_t *, size_t *); +static int header_bin_le(struct archive *, struct cpio *, struct stat *, + size_t *, size_t *); +static int header_newc(struct archive *, struct cpio *, struct stat *, + size_t *, size_t *); +static int header_odc(struct archive *, struct cpio *, struct stat *, + size_t *, size_t *); +static int le4(const unsigned char *); +static void record_hardlink(struct cpio *cpio, struct archive_entry *entry, + const struct stat *st); + +int +archive_read_support_format_cpio(struct archive *a) +{ + struct cpio *cpio; + int r; + + cpio = malloc(sizeof(*cpio)); + if (cpio == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate cpio data"); + return (ARCHIVE_FATAL); + } + memset(cpio, 0, sizeof(*cpio)); + cpio->magic = CPIO_MAGIC; + + r = __archive_read_register_format(a, + cpio, + archive_read_format_cpio_bid, + archive_read_format_cpio_read_header, + archive_read_format_cpio_read_data, + NULL, + archive_read_format_cpio_cleanup); + + if (r != ARCHIVE_OK) + free(cpio); + return (ARCHIVE_OK); +} + + +static int +archive_read_format_cpio_bid(struct archive *a) +{ + int bid, bytes_read; + const void *h; + const unsigned char *p; + struct cpio *cpio; + + cpio = *(a->pformat_data); + bid = 0; + bytes_read = (a->compression_read_ahead)(a, &h, 6); + /* Convert error code into error return. */ + if (bytes_read < 0) + return ((int)bytes_read); + if (bytes_read < 6) + return (-1); + + p = h; + if (memcmp(p, "070707", 6) == 0) { + /* ASCII cpio archive (odc, POSIX.1) */ + cpio->read_header = header_odc; + bid += 48; + /* + * XXX TODO: More verification; Could check that only octal + * digits appear in appropriate header locations. XXX + */ + } else if (memcmp(p, "070701", 6) == 0) { + /* ASCII cpio archive (SVR4 without CRC) */ + cpio->read_header = header_newc; + bid += 48; + /* + * XXX TODO: More verification; Could check that only hex + * digits appear in appropriate header locations. XXX + */ + } else if (memcmp(p, "070702", 6) == 0) { + /* ASCII cpio archive (SVR4 with CRC) */ + /* XXX TODO: Flag that we should check the CRC. XXX */ + cpio->read_header = header_newc; + bid += 48; + /* + * XXX TODO: More verification; Could check that only hex + * digits appear in appropriate header locations. XXX + */ + } else if (p[0] * 256 + p[1] == 070707) { + /* big-endian binary cpio archives */ + cpio->read_header = header_bin_be; + bid += 16; + /* Is more verification possible here? */ + } else if (p[0] + p[1] * 256 == 070707) { + /* little-endian binary cpio archives */ + cpio->read_header = header_bin_le; + bid += 16; + /* Is more verification possible here? */ + } else + return (ARCHIVE_WARN); + + return (bid); +} + +static int +archive_read_format_cpio_read_header(struct archive *a, + struct archive_entry *entry) +{ + struct stat st; + struct cpio *cpio; + size_t bytes; + const void *h; + size_t namelength; + size_t name_pad; + int r; + + memset(&st, 0, sizeof(st)); + + cpio = *(a->pformat_data); + r = (cpio->read_header(a, cpio, &st, &namelength, &name_pad)); + + if (r != ARCHIVE_OK) + return (r); + + /* Assign all of the 'stat' fields at once. */ + archive_entry_copy_stat(entry, &st); + + /* Read name from buffer. */ + bytes = (a->compression_read_ahead)(a, &h, namelength + name_pad); + if (bytes < namelength + name_pad) + return (ARCHIVE_FATAL); + (a->compression_read_consume)(a, namelength + name_pad); + archive_strncpy(&cpio->entry_name, h, namelength); + archive_entry_set_pathname(entry, cpio->entry_name.s); + cpio->entry_offset = 0; + + /* If this is a symlink, read the link contents. */ + if (S_ISLNK(st.st_mode)) { + bytes = (a->compression_read_ahead)(a, &h, + cpio->entry_bytes_remaining); + if ((off_t)bytes < cpio->entry_bytes_remaining) + return (ARCHIVE_FATAL); + (a->compression_read_consume)(a, cpio->entry_bytes_remaining); + archive_strncpy(&cpio->entry_linkname, h, + cpio->entry_bytes_remaining); + archive_entry_set_symlink(entry, cpio->entry_linkname.s); + cpio->entry_bytes_remaining = 0; + } + + /* Compare name to "TRAILER!!!" to test for end-of-archive. */ + if (namelength == 11 && strcmp(h, "TRAILER!!!") == 0) { + /* TODO: Store file location of start of block. */ + archive_set_error(a, 0, NULL); + return (ARCHIVE_EOF); + } + + /* Detect and record hardlinks to previously-extracted entries. */ + record_hardlink(cpio, entry, &st); + + return (ARCHIVE_OK); +} + +static int +archive_read_format_cpio_read_data(struct archive *a, + const void **buff, size_t *size, off_t *offset) +{ + ssize_t bytes_read; + struct cpio *cpio; + + cpio = *(a->pformat_data); + if (cpio->entry_bytes_remaining > 0) { + bytes_read = (a->compression_read_ahead)(a, buff, 1); + if (bytes_read <= 0) + return (ARCHIVE_FATAL); + if (bytes_read > cpio->entry_bytes_remaining) + bytes_read = cpio->entry_bytes_remaining; + *size = bytes_read; + *offset = cpio->entry_offset; + cpio->entry_offset += bytes_read; + cpio->entry_bytes_remaining -= bytes_read; + (a->compression_read_consume)(a, bytes_read); + return (ARCHIVE_OK); + } else { + while (cpio->entry_padding > 0) { + bytes_read = (a->compression_read_ahead)(a, buff, 1); + if (bytes_read <= 0) + return (ARCHIVE_FATAL); + if (bytes_read > cpio->entry_padding) + bytes_read = cpio->entry_padding; + (a->compression_read_consume)(a, bytes_read); + cpio->entry_padding -= bytes_read; + } + *buff = NULL; + *size = 0; + *offset = cpio->entry_offset; + return (ARCHIVE_EOF); + } +} + +static int +header_newc(struct archive *a, struct cpio *cpio, struct stat *st, + size_t *namelength, size_t *name_pad) +{ + const void *h; + const struct cpio_newc_header *header; + size_t bytes; + + /* Read fixed-size portion of header. */ + bytes = (a->compression_read_ahead)(a, &h, sizeof(struct cpio_newc_header)); + if (bytes < sizeof(struct cpio_newc_header)) + return (ARCHIVE_FATAL); + (a->compression_read_consume)(a, sizeof(struct cpio_newc_header)); + + /* Parse out hex fields into struct stat. */ + header = h; + + if (memcmp(header->c_magic, "070701", 6) == 0) { + a->archive_format = ARCHIVE_FORMAT_CPIO_SVR4_NOCRC; + a->archive_format_name = "ASCII cpio (SVR4 with no CRC)"; + } else if (memcmp(header->c_magic, "070702", 6) == 0) { + a->archive_format = ARCHIVE_FORMAT_CPIO_SVR4_CRC; + a->archive_format_name = "ASCII cpio (SVR4 with CRC)"; + } else { + /* TODO: Abort here? */ + } + + st->st_dev = makedev( + atol16(header->c_devmajor, sizeof(header->c_devmajor)), + atol16(header->c_devminor, sizeof(header->c_devminor))); + st->st_ino = atol16(header->c_ino, sizeof(header->c_ino)); + st->st_mode = atol16(header->c_mode, sizeof(header->c_mode)); + st->st_uid = atol16(header->c_uid, sizeof(header->c_uid)); + st->st_gid = atol16(header->c_gid, sizeof(header->c_gid)); + st->st_nlink = atol16(header->c_nlink, sizeof(header->c_nlink)); + st->st_rdev = makedev( + atol16(header->c_rdevmajor, sizeof(header->c_rdevmajor)), + atol16(header->c_rdevminor, sizeof(header->c_rdevminor))); + st->st_mtime = atol16(header->c_mtime, sizeof(header->c_mtime)); + *namelength = atol16(header->c_namesize, sizeof(header->c_namesize)); + /* Pad name to 2 more than a multiple of 4. */ + *name_pad = (2 - *namelength) & 3; + + /* + * Note: entry_bytes_remaining is at least 64 bits and + * therefore gauranteed to be big enough for a 33-bit file + * size. struct stat.st_size may only be 32 bits, so + * assigning there first could lose information. + */ + cpio->entry_bytes_remaining = + atol16(header->c_filesize, sizeof(header->c_filesize)); + st->st_size = cpio->entry_bytes_remaining; + /* Pad file contents to a multiple of 4. */ + cpio->entry_padding = 3 & -cpio->entry_bytes_remaining; + return (ARCHIVE_OK); +} + +static int +header_odc(struct archive *a, struct cpio *cpio, struct stat *st, + size_t *namelength, size_t *name_pad) +{ + const void *h; + const struct cpio_odc_header *header; + size_t bytes; + + a->archive_format = ARCHIVE_FORMAT_CPIO_POSIX; + a->archive_format_name = "POSIX octet-oriented cpio"; + + /* Read fixed-size portion of header. */ + bytes = (a->compression_read_ahead)(a, &h, sizeof(struct cpio_odc_header)); + if (bytes < sizeof(struct cpio_odc_header)) + return (ARCHIVE_FATAL); + (a->compression_read_consume)(a, sizeof(struct cpio_odc_header)); + + /* Parse out octal fields into struct stat. */ + header = h; + + st->st_dev = atol8(header->c_dev, sizeof(header->c_dev)); + st->st_ino = atol8(header->c_ino, sizeof(header->c_ino)); + st->st_mode = atol8(header->c_mode, sizeof(header->c_mode)); + st->st_uid = atol8(header->c_uid, sizeof(header->c_uid)); + st->st_gid = atol8(header->c_gid, sizeof(header->c_gid)); + st->st_nlink = atol8(header->c_nlink, sizeof(header->c_nlink)); + st->st_rdev = atol8(header->c_rdev, sizeof(header->c_rdev)); + st->st_mtime = atol8(header->c_mtime, sizeof(header->c_mtime)); + *namelength = atol8(header->c_namesize, sizeof(header->c_namesize)); + *name_pad = 0; /* No padding of filename. */ + + /* + * Note: entry_bytes_remaining is at least 64 bits and + * therefore gauranteed to be big enough for a 33-bit file + * size. struct stat.st_size may only be 32 bits, so + * assigning there first could lose information. + */ + cpio->entry_bytes_remaining = + atol8(header->c_filesize, sizeof(header->c_filesize)); + st->st_size = cpio->entry_bytes_remaining; + cpio->entry_padding = 0; + return (ARCHIVE_OK); +} + +static int +header_bin_le(struct archive *a, struct cpio *cpio, struct stat *st, + size_t *namelength, size_t *name_pad) +{ + const void *h; + const struct cpio_bin_header *header; + size_t bytes; + + a->archive_format = ARCHIVE_FORMAT_CPIO_BIN_LE; + a->archive_format_name = "cpio (little-endian binary)"; + + /* Read fixed-size portion of header. */ + bytes = (a->compression_read_ahead)(a, &h, sizeof(struct cpio_bin_header)); + if (bytes < sizeof(struct cpio_bin_header)) + return (ARCHIVE_FATAL); + (a->compression_read_consume)(a, sizeof(struct cpio_bin_header)); + + /* Parse out binary fields into struct stat. */ + header = h; + + st->st_dev = header->c_dev[0] + header->c_dev[1] * 256; + st->st_ino = header->c_ino[0] + header->c_ino[1] * 256; + st->st_mode = header->c_mode[0] + header->c_mode[1] * 256; + st->st_uid = header->c_uid[0] + header->c_uid[1] * 256; + st->st_gid = header->c_gid[0] + header->c_gid[1] * 256; + st->st_nlink = header->c_nlink[0] + header->c_nlink[1] * 256; + st->st_rdev = header->c_rdev[0] + header->c_rdev[1] * 256; + st->st_mtime = le4(header->c_mtime); + *namelength = header->c_namesize[0] + header->c_namesize[1] * 256; + *name_pad = *namelength & 1; /* Pad to even. */ + + cpio->entry_bytes_remaining = le4(header->c_filesize); + st->st_size = cpio->entry_bytes_remaining; + cpio->entry_padding = cpio->entry_bytes_remaining & 1; /* Pad to even. */ + return (ARCHIVE_OK); +} + +static int +header_bin_be(struct archive *a, struct cpio *cpio, struct stat *st, + size_t *namelength, size_t *name_pad) +{ + const void *h; + const struct cpio_bin_header *header; + size_t bytes; + + a->archive_format = ARCHIVE_FORMAT_CPIO_BIN_BE; + a->archive_format_name = "cpio (big-endian binary)"; + + /* Read fixed-size portion of header. */ + bytes = (a->compression_read_ahead)(a, &h, + sizeof(struct cpio_bin_header)); + if (bytes < sizeof(struct cpio_bin_header)) + return (ARCHIVE_FATAL); + (a->compression_read_consume)(a, sizeof(struct cpio_bin_header)); + + /* Parse out binary fields into struct stat. */ + header = h; + st->st_dev = header->c_dev[0] * 256 + header->c_dev[1]; + st->st_ino = header->c_ino[0] * 256 + header->c_ino[1]; + st->st_mode = header->c_mode[0] * 256 + header->c_mode[1]; + st->st_uid = header->c_uid[0] * 256 + header->c_uid[1]; + st->st_gid = header->c_gid[0] * 256 + header->c_gid[1]; + st->st_nlink = header->c_nlink[0] * 256 + header->c_nlink[1]; + st->st_rdev = header->c_rdev[0] * 256 + header->c_rdev[1]; + st->st_mtime = be4(header->c_mtime); + *namelength = header->c_namesize[0] * 256 + header->c_namesize[1]; + *name_pad = *namelength & 1; /* Pad to even. */ + + cpio->entry_bytes_remaining = be4(header->c_filesize); + st->st_size = cpio->entry_bytes_remaining; + cpio->entry_padding = cpio->entry_bytes_remaining & 1; /* Pad to even. */ + return (ARCHIVE_OK); +} + +static int +archive_read_format_cpio_cleanup(struct archive *a) +{ + struct cpio *cpio; + + cpio = *(a->pformat_data); + /* Free inode->name map */ + while (cpio->links_head != NULL) { + struct links_entry *lp = cpio->links_head->next; + + if (cpio->links_head->name) + free(cpio->links_head->name); + free(cpio->links_head); + cpio->links_head = lp; + } + + free(cpio); + *(a->pformat_data) = NULL; + return (ARCHIVE_OK); +} + +static int +le4(const unsigned char *p) +{ + return ((p[0]<<16) + (p[1]<<24) + (p[2]<<0) + (p[3]<<8)); +} + + +static int +be4(const unsigned char *p) +{ + return (p[0] + (p[1]<<8) + (p[2]<<16) + (p[3]<<24)); +} + +/* + * Note that this implementation does not (and should not!) obey + * locale settings; you cannot simply substitute strtol here, since + * it does obey locale. + */ +static int64_t +atol8(const char *p, unsigned char_cnt) +{ + int64_t l; + int digit; + + l = 0; + while (char_cnt-- > 0) { + if (*p >= '0' && *p <= '7') + digit = *p - '0'; + else + return (l); + p++; + l <<= 3; + l |= digit; + } + return (l); +} + +static int64_t +atol16(const char *p, unsigned char_cnt) +{ + int64_t l; + int digit; + + l = 0; + while (char_cnt-- > 0) { + if (*p >= 'a' && *p <= 'f') + digit = *p - 'a' + 10; + else if (*p >= 'A' && *p <= 'F') + digit = *p - 'A' + 10; + else if (*p >= '0' && *p <= '9') + digit = *p - '0'; + else + return (l); + p++; + l <<= 4; + l |= digit; + } + return (l); +} + +static void +record_hardlink(struct cpio *cpio, struct archive_entry *entry, + const struct stat *st) +{ + struct links_entry *le; + + /* + * First look in the list of multiply-linked files. If we've + * already dumped it, convert this entry to a hard link entry. + */ + for (le = cpio->links_head; le; le = le->next) { + if (le->dev == st->st_dev && le->ino == st->st_ino) { + archive_entry_set_hardlink(entry, le->name); + + if (--le->links <= 0) { + if (le->previous != NULL) + le->previous->next = le->next; + if (le->next != NULL) + le->next->previous = le->previous; + if (cpio->links_head == le) + cpio->links_head = le->next; + free(le); + } + + return; + } + } + + le = malloc(sizeof(struct links_entry)); + if (le == NULL) + __archive_errx(1, "Out of memory adding file to list"); + if (cpio->links_head != NULL) + cpio->links_head->previous = le; + le->next = cpio->links_head; + le->previous = NULL; + cpio->links_head = le; + le->dev = st->st_dev; + le->ino = st->st_ino; + le->links = st->st_nlink - 1; + le->name = strdup(archive_entry_pathname(entry)); + if (le->name == NULL) + __archive_errx(1, "Out of memory adding file to list"); +} diff --git a/lib/libarchive/archive_read_support_format_iso9660.c b/lib/libarchive/archive_read_support_format_iso9660.c new file mode 100644 index 0000000..065bf9b --- /dev/null +++ b/lib/libarchive/archive_read_support_format_iso9660.c @@ -0,0 +1,1061 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> + +#include <errno.h> +/* #include <stdint.h> */ /* See archive_platform.h */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" +#include "archive_string.h" + +/* + * An overview of ISO 9660 format: + * + * Each disk is laid out as follows: + * * 32k reserved for private use + * * Volume descriptor table. Each volume descriptor + * is 2k and specifies basic format information. + * The "Primary Volume Descriptor" (PVD) is defined by the + * standard and should always be present; other volume + * descriptors include various vendor-specific extensions. + * * Files and directories. Each file/dir is specified by + * an "extent" (starting sector and length in bytes). + * Dirs are just files with directory records packed one + * after another. The PVD contains a single dir entry + * specifying the location of the root directory. Everything + * else follows from there. + * + * This module works by first reading the volume descriptors, then + * building a list of directory entries, sorted by starting + * sector. At each step, I look for the earliest dir entry that + * hasn't yet been read, seek forward to that location and read + * that entry. If it's a dir, I slurp in the new dir entries and + * add them to the heap; if it's a regular file, I return the + * corresponding archive_entry and wait for the client to request + * the file body. This strategy allows us to read most compliant + * CDs with a single pass through the data, as required by libarchive. + */ + +/* Structure of on-disk PVD. */ +struct iso9660_primary_volume_descriptor { + unsigned char type[1]; + char id[5]; + unsigned char version[1]; + char reserved1[1]; + char system_id[32]; + char volume_id[32]; + char reserved2[8]; + char volume_space_size[8]; + char reserved3[32]; + char volume_set_size[4]; + char volume_sequence_number[4]; + char logical_block_size[4]; + char path_table_size[8]; + char type_1_path_table[4]; + char opt_type_1_path_table[4]; + char type_m_path_table[4]; + char opt_type_m_path_table[4]; + char root_directory_record[34]; + char volume_set_id[128]; + char publisher_id[128]; + char preparer_id[128]; + char application_id[128]; + char copyright_file_id[37]; + char abstract_file_id[37]; + char bibliographic_file_id[37]; + char creation_date[17]; + char modification_date[17]; + char expiration_date[17]; + char effective_date[17]; + char file_structure_version[1]; + char reserved4[1]; + char application_data[512]; +}; + +/* Structure of an on-disk directory record. */ +struct iso9660_directory_record { + unsigned char length[1]; + unsigned char ext_attr_length[1]; + unsigned char extent[8]; + unsigned char size[8]; + char date[7]; + unsigned char flags[1]; + unsigned char file_unit_size[1]; + unsigned char interleave[1]; + unsigned char volume_sequence_number[4]; + unsigned char name_len[1]; + char name[1]; +}; + +/* + * Our private data. + */ + +/* In-memory storage for a directory record. */ +struct file_info { + struct file_info *parent; + int refcount; + uint64_t offset; /* Offset on disk. */ + uint64_t size; /* File size in bytes. */ + uint64_t ce_offset; /* Offset of CE */ + uint64_t ce_size; /* Size of CE */ + time_t mtime; /* File last modified time. */ + time_t atime; /* File last accessed time. */ + time_t ctime; /* File creation time. */ + mode_t mode; + uid_t uid; + gid_t gid; + ino_t inode; + int nlinks; + char *name; /* Null-terminated filename. */ + struct archive_string symlink; +}; + + +struct iso9660 { + int magic; +#define ISO9660_MAGIC 0x96609660 + int bid; /* If non-zero, return this as our bid. */ + struct archive_string pathname; + char seenRockridge; /* Set true if RR extensions are used. */ + unsigned char suspOffset; + + uint64_t previous_offset; + uint64_t previous_size; + struct archive_string previous_pathname; + + /* TODO: Make this a heap for fast inserts and deletions. */ + struct file_info **pending_files; + int pending_files_allocated; + int pending_files_used; + + uint64_t current_position; + ssize_t logical_block_size; + + off_t entry_sparse_offset; + ssize_t entry_bytes_remaining; +}; + +static void add_entry(struct iso9660 *iso9660, struct file_info *file); +static int archive_read_format_iso9660_bid(struct archive *); +static int archive_read_format_iso9660_cleanup(struct archive *); +static int archive_read_format_iso9660_read_data(struct archive *, + const void **, size_t *, off_t *); +static int archive_read_format_iso9660_read_header(struct archive *, + struct archive_entry *); +static const char *build_pathname(struct archive_string *, struct file_info *); +static void dump_isodirrec(FILE *, const struct iso9660_directory_record *); +static time_t time_from_tm(struct tm *); +static time_t isodate17(const void *); +static time_t isodate7(const void *); +static int isPVD(struct iso9660 *, const char *); +static struct file_info *next_entry(struct iso9660 *); +static int next_entry_seek(struct archive *a, struct iso9660 *iso9660, + struct file_info **pfile); +static struct file_info * + parse_file_info(struct iso9660 *iso9660, + struct file_info *parent, + const struct iso9660_directory_record *isodirrec); +static void parse_rockridge(struct iso9660 *iso9660, + struct file_info *file, const unsigned char *start, + const unsigned char *end); +static void release_file(struct iso9660 *, struct file_info *); +static int toi(const void *p, int n); + +int +archive_read_support_format_iso9660(struct archive *a) +{ + struct iso9660 *iso9660; + int r; + + iso9660 = malloc(sizeof(*iso9660)); + if (iso9660 == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate iso9660 data"); + return (ARCHIVE_FATAL); + } + memset(iso9660, 0, sizeof(*iso9660)); + iso9660->magic = ISO9660_MAGIC; + iso9660->bid = -1; /* We haven't yet bid. */ + + r = __archive_read_register_format(a, + iso9660, + archive_read_format_iso9660_bid, + archive_read_format_iso9660_read_header, + archive_read_format_iso9660_read_data, + NULL, + archive_read_format_iso9660_cleanup); + + if (r != ARCHIVE_OK) { + free(iso9660); + return (r); + } + return (ARCHIVE_OK); +} + + +static int +archive_read_format_iso9660_bid(struct archive *a) +{ + struct iso9660 *iso9660; + ssize_t bytes_read; + const void *h; + const char *p; + + iso9660 = *(a->pformat_data); + + if (iso9660->bid >= 0) + return (iso9660->bid); + + /* + * Skip the first 32k (reserved area) and get the first + * 8 sectors of the volume descriptor table. Of course, + * if the I/O layer gives us more, we'll take it. + */ + bytes_read = (a->compression_read_ahead)(a, &h, 32768 + 8*2048); + if (bytes_read < 32768 + 8*2048) + return (iso9660->bid = -1); + p = (const char *)h; + + /* Skip the reserved area. */ + bytes_read -= 32768; + p += 32768; + + /* Check each volume descriptor to locate the PVD. */ + for (; bytes_read > 2048; bytes_read -= 2048, p += 2048) { + iso9660->bid = isPVD(iso9660, p); + if (iso9660->bid > 0) + return (iso9660->bid); + if (*p == '\xff') /* End-of-volume-descriptor marker. */ + break; + } + + /* We didn't find a valid PVD; return a bid of zero. */ + iso9660->bid = 0; + return (iso9660->bid); +} + +static int +isPVD(struct iso9660 *iso9660, const char *h) +{ + const struct iso9660_primary_volume_descriptor *voldesc; + struct file_info *file; + + if (h[0] != 1) + return (0); + if (memcmp(h+1, "CD001", 5) != 0) + return (0); + + + voldesc = (const struct iso9660_primary_volume_descriptor *)h; + iso9660->logical_block_size = toi(&voldesc->logical_block_size, 2); + + /* Store the root directory in the pending list. */ + file = parse_file_info(iso9660, NULL, + (struct iso9660_directory_record *)&voldesc->root_directory_record); + add_entry(iso9660, file); + return (48); +} + +static int +archive_read_format_iso9660_read_header(struct archive *a, + struct archive_entry *entry) +{ + struct stat st; + struct iso9660 *iso9660; + struct file_info *file; + ssize_t bytes_read; + int r; + + iso9660 = *(a->pformat_data); + + if (!a->archive_format) { + a->archive_format = ARCHIVE_FORMAT_ISO9660; + a->archive_format_name = "ISO9660"; + } + + /* Get the next entry that appears after the current offset. */ + r = next_entry_seek(a, iso9660, &file); + if (r != ARCHIVE_OK) + return (r); + + iso9660->entry_bytes_remaining = file->size; + iso9660->entry_sparse_offset = 0; /* Offset for sparse-file-aware clients. */ + + /* Set up the entry structure with information about this entry. */ + memset(&st, 0, sizeof(st)); + st.st_mode = file->mode; + st.st_uid = file->uid; + st.st_gid = file->gid; + st.st_nlink = file->nlinks; + st.st_ino = file->inode; + st.st_mtime = file->mtime; + st.st_ctime = file->ctime; + st.st_atime = file->atime; + st.st_size = iso9660->entry_bytes_remaining; + archive_entry_copy_stat(entry, &st); + archive_string_empty(&iso9660->pathname); + archive_entry_set_pathname(entry, + build_pathname(&iso9660->pathname, file)); + if (file->symlink.s != NULL) + archive_entry_set_symlink(entry, file->symlink.s); + + /* If this entry points to the same data as the previous + * entry, convert this into a hardlink to that entry. + * But don't bother for zero-length files. */ + if (file->offset == iso9660->previous_offset + && file->size == iso9660->previous_size + && file->size > 0) { + archive_entry_set_hardlink(entry, + iso9660->previous_pathname.s); + iso9660->entry_bytes_remaining = 0; + iso9660->entry_sparse_offset = 0; + release_file(iso9660, file); + return (ARCHIVE_OK); + } + + /* If the offset is before our current position, we can't + * seek backwards to extract it, so issue a warning. */ + if (file->offset < iso9660->current_position) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Ignoring out-of-order file"); + iso9660->entry_bytes_remaining = 0; + iso9660->entry_sparse_offset = 0; + release_file(iso9660, file); + return (ARCHIVE_WARN); + } + + iso9660->previous_size = file->size; + iso9660->previous_offset = file->offset; + archive_strcpy(&iso9660->previous_pathname, iso9660->pathname.s); + + /* If this is a directory, read in all of the entries right now. */ + if (S_ISDIR(st.st_mode)) { + while (iso9660->entry_bytes_remaining > 0) { + const void *block; + const unsigned char *p; + ssize_t step = iso9660->logical_block_size; + if (step > iso9660->entry_bytes_remaining) + step = iso9660->entry_bytes_remaining; + bytes_read = (a->compression_read_ahead)(a, &block, step); + if (bytes_read < step) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Failed to read full block when scanning ISO9660 directory list"); + release_file(iso9660, file); + return (ARCHIVE_FATAL); + } + if (bytes_read > step) + bytes_read = step; + (a->compression_read_consume)(a, bytes_read); + iso9660->current_position += bytes_read; + iso9660->entry_bytes_remaining -= bytes_read; + for (p = block; + *p != 0 && p < (const unsigned char *)block + bytes_read; + p += *p) { + const struct iso9660_directory_record *dr + = (const struct iso9660_directory_record *)p; + struct file_info *child; + + /* Skip '.' entry. */ + if (dr->name_len[0] == 1 + && dr->name[0] == '\0') + continue; + /* Skip '..' entry. */ + if (dr->name_len[0] == 1 + && dr->name[0] == '\001') + continue; + child = parse_file_info(iso9660, file, dr); + add_entry(iso9660, child); + if (iso9660->seenRockridge) { + a->archive_format = + ARCHIVE_FORMAT_ISO9660_ROCKRIDGE; + a->archive_format_name = + "ISO9660 with Rockridge extensions"; + } + } + } + } + + release_file(iso9660, file); + return (ARCHIVE_OK); +} + +static int +archive_read_format_iso9660_read_data(struct archive *a, + const void **buff, size_t *size, off_t *offset) +{ + ssize_t bytes_read; + struct iso9660 *iso9660; + + iso9660 = *(a->pformat_data); + if (iso9660->entry_bytes_remaining <= 0) { + *buff = NULL; + *size = 0; + *offset = iso9660->entry_sparse_offset; + return (ARCHIVE_EOF); + } + + bytes_read = (a->compression_read_ahead)(a, buff, 1); + if (bytes_read == 0) + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Truncated input file"); + if (bytes_read <= 0) + return (ARCHIVE_FATAL); + if (bytes_read > iso9660->entry_bytes_remaining) + bytes_read = iso9660->entry_bytes_remaining; + *size = bytes_read; + *offset = iso9660->entry_sparse_offset; + iso9660->entry_sparse_offset += bytes_read; + iso9660->entry_bytes_remaining -= bytes_read; + iso9660->current_position += bytes_read; + (a->compression_read_consume)(a, bytes_read); + return (ARCHIVE_OK); +} + +static int +archive_read_format_iso9660_cleanup(struct archive *a) +{ + struct iso9660 *iso9660; + struct file_info *file; + + iso9660 = *(a->pformat_data); + while ((file = next_entry(iso9660)) != NULL) + release_file(iso9660, file); + archive_string_free(&iso9660->pathname); + archive_string_free(&iso9660->previous_pathname); + free(iso9660); + *(a->pformat_data) = NULL; + return (ARCHIVE_OK); +} + +/* + * This routine parses a single ISO directory record, makes sense + * of any extensions, and stores the result in memory. + */ +static struct file_info * +parse_file_info(struct iso9660 *iso9660, struct file_info *parent, + const struct iso9660_directory_record *isodirrec) +{ + struct file_info *file; + + /* TODO: Sanity check that name_len doesn't exceed length, etc. */ + + /* Create a new file entry and copy data from the ISO dir record. */ + file = malloc(sizeof(*file)); + if (file == NULL) + return (NULL); + memset(file, 0, sizeof(*file)); + file->parent = parent; + if (parent != NULL) + parent->refcount++; + file->offset = toi(isodirrec->extent, 4) + * iso9660->logical_block_size; + file->size = toi(isodirrec->size, 4); + file->mtime = isodate7(isodirrec->date); + file->ctime = file->atime = file->mtime; + file->name = malloc(isodirrec->name_len[0] + 1); + if (file->name == NULL) { + free(file); + return (NULL); + } + memcpy(file->name, isodirrec->name, isodirrec->name_len[0]); + file->name[(int)isodirrec->name_len[0]] = '\0'; + if (isodirrec->flags[0] & 0x02) + file->mode = S_IFDIR | 0700; + else + file->mode = S_IFREG | 0400; + + /* Rockridge extensions overwrite information from above. */ + { + const unsigned char *rr_start, *rr_end; + rr_end = (const unsigned char *)isodirrec + + isodirrec->length[0]; + rr_start = (const unsigned char *)isodirrec->name + + isodirrec->name_len[0]; + if ((isodirrec->name_len[0] & 1) == 0) + rr_start++; + rr_start += iso9660->suspOffset; + parse_rockridge(iso9660, file, rr_start, rr_end); + } + + /* DEBUGGING: Warn about attributes I don't yet fully support. */ + if ((isodirrec->flags[0] & ~0x02) != 0) { + fprintf(stderr, "\n ** Unrecognized flag: "); + dump_isodirrec(stderr, isodirrec); + fprintf(stderr, "\n"); + } else if (toi(isodirrec->volume_sequence_number, 2) != 1) { + fprintf(stderr, "\n ** Unrecognized sequence number: "); + dump_isodirrec(stderr, isodirrec); + fprintf(stderr, "\n"); + } else if (isodirrec->file_unit_size[0] != 0) { + fprintf(stderr, "\n ** Unexpected file unit size: "); + dump_isodirrec(stderr, isodirrec); + fprintf(stderr, "\n"); + } else if (isodirrec->interleave[0] != 0) { + fprintf(stderr, "\n ** Unexpected interleave: "); + dump_isodirrec(stderr, isodirrec); + fprintf(stderr, "\n"); + } else if (isodirrec->ext_attr_length[0] != 0) { + fprintf(stderr, "\n ** Unexpected extended attribute length: "); + dump_isodirrec(stderr, isodirrec); + fprintf(stderr, "\n"); + } + + return (file); +} + +static void +add_entry(struct iso9660 *iso9660, struct file_info *file) +{ + /* Expand our pending files list as necessary. */ + if (iso9660->pending_files_used >= iso9660->pending_files_allocated) { + struct file_info **new_pending_files; + int new_size = iso9660->pending_files_allocated * 2; + + if (new_size < 1024) + new_size = 1024; + new_pending_files = malloc(new_size * sizeof(new_pending_files[0])); + if (new_pending_files == NULL) + __archive_errx(1, "Out of memory"); + memcpy(new_pending_files, iso9660->pending_files, + iso9660->pending_files_allocated * sizeof(new_pending_files[0])); + if (iso9660->pending_files != NULL) + free(iso9660->pending_files); + iso9660->pending_files = new_pending_files; + iso9660->pending_files_allocated = new_size; + } + + iso9660->pending_files[iso9660->pending_files_used++] = file; +} + +static void +parse_rockridge(struct iso9660 *iso9660, struct file_info *file, + const unsigned char *p, const unsigned char *end) +{ + (void)iso9660; /* UNUSED */ + + while (p + 4 < end /* Enough space for another entry. */ + && p[0] >= 'A' && p[0] <= 'Z' /* Sanity-check 1st char of name. */ + && p[1] >= 'A' && p[1] <= 'Z' /* Sanity-check 2nd char of name. */ + && p + p[2] <= end) { /* Sanity-check length. */ + const unsigned char *data = p + 4; + int data_length = p[2] - 4; + int version = p[3]; + + /* + * Yes, each 'if' here does test p[0] again. + * Otherwise, the fall-through handling to catch + * unsupported extensions doesn't work. + */ + switch(p[0]) { + case 'C': + if (p[0] == 'C' && p[1] == 'E' && version == 1) { + /* + * CE extension comprises: + * 8 byte sector containing extension + * 8 byte offset w/in above sector + * 8 byte length of continuation + */ + file->ce_offset = toi(data, 4) + * iso9660->logical_block_size + + toi(data + 8, 4); + file->ce_size = toi(data + 16, 4); + break; + } + /* FALLTHROUGH */ + case 'N': + if (p[0] == 'N' && p[1] == 'M' && version == 1 + && *data == 0) { + /* NM extension with flag byte == 0 */ + /* + * NM extension comprises: + * one byte flag + * rest is long name + */ + /* TODO: Obey flags. */ + char *old_name = file->name; + + data++; /* Skip flag byte. */ + data_length--; + file->name = malloc(data_length + 1); + if (file->name != NULL) { + free(old_name); + memcpy(file->name, data, data_length); + file->name[data_length] = '\0'; + } else + file->name = old_name; + break; + } + /* FALLTHROUGH */ + case 'P': + if (p[0] == 'P' && p[1] == 'D' && version == 1) { + /* + * PD extension is padding; + * contents are always ignored. + */ + break; + } + if (p[0] == 'P' && p[1] == 'X' && version == 1) { + /* + * PX extension comprises: + * 8 bytes for mode, + * 8 bytes for nlinks, + * 8 bytes for uid, + * 8 bytes for gid, + * 8 bytes for inode. + */ + if (data_length == 32) { + file->mode = toi(data, 4); + file->nlinks = toi(data + 8, 4); + file->uid = toi(data + 16, 4); + file->gid = toi(data + 24, 4); + file->inode = toi(data + 32, 4); + } + break; + } + /* FALLTHROUGH */ + case 'R': + if (p[0] == 'R' && p[1] == 'R' && version == 1) { + iso9660->seenRockridge = 1; + /* + * RR extension comprises: + * one byte flag value + */ + /* TODO: Handle RR extension. */ + break; + } + /* FALLTHROUGH */ + case 'S': + if (p[0] == 'S' && p[1] == 'L' && version == 1 + && *data == 0) { + int cont = 1; + /* SL extension with flags == 0 */ + /* TODO: handle non-zero flag values. */ + data++; /* Skip flag byte. */ + data_length--; + while (data_length > 0) { + unsigned char flag = *data++; + unsigned char nlen = *data++; + data_length -= 2; + + if (cont == 0) + archive_strcat(&file->symlink, "/"); + cont = 0; + + switch(flag) { + case 0x01: /* Continue */ + archive_strncat(&file->symlink, + (const char *)data, nlen); + cont = 1; + break; + case 0x02: /* Current */ + archive_strcat(&file->symlink, "."); + break; + case 0x04: /* Parent */ + archive_strcat(&file->symlink, ".."); + break; + case 0x08: /* Root */ + case 0x10: /* Volume root */ + archive_string_empty(&file->symlink); + break; + case 0x20: /* Hostname */ + archive_strcat(&file->symlink, "hostname"); + break; + case 0: + archive_strncat(&file->symlink, + (const char *)data, nlen); + break; + default: + /* TODO: issue a warning ? */ + break; + } + data += nlen; + data_length -= nlen; + } + break; + } + if (p[0] == 'S' && p[1] == 'P' + && version == 1 && data_length == 7 + && data[0] == (unsigned char)'\xbe' + && data[1] == (unsigned char)'\xef') { + /* + * SP extension stores the suspOffset + * (Number of bytes to skip between + * filename and SUSP records.) + * It is mandatory by the SUSP standard + * (IEEE 1281). + * + * It allows SUSP to coexist with + * non-SUSP uses of the System + * Use Area by placing non-SUSP data + * before SUSP data. + * + * TODO: Add a check for 'SP' in + * first directory entry, disable all SUSP + * processing if not found. + */ + iso9660->suspOffset = data[2]; + break; + } + if (p[0] == 'S' && p[1] == 'T' + && data_length == 0 && version == 1) { + /* + * ST extension marks end of this + * block of SUSP entries. + * + * It allows SUSP to coexist with + * non-SUSP uses of the System + * Use Area by placing non-SUSP data + * after SUSP data. + */ + return; + } + case 'T': + if (p[0] == 'T' && p[1] == 'F' && version == 1) { + char flag = data[0]; + /* + * TF extension comprises: + * one byte flag + * create time (optional) + * modify time (optional) + * access time (optional) + * attribute time (optional) + * Time format and presence of fields + * is controlled by flag bits. + */ + data++; + if (flag & 0x80) { + /* Use 17-byte time format. */ + if (flag & 1) /* Create time. */ + data += 17; + if (flag & 2) { /* Modify time. */ + file->mtime = isodate17(data); + data += 17; + } + if (flag & 4) { /* Access time. */ + file->atime = isodate17(data); + data += 17; + } + if (flag & 8) { /* Attribute time. */ + file->ctime = isodate17(data); + data += 17; + } + } else { + /* Use 7-byte time format. */ + if (flag & 1) /* Create time. */ + data += 7; + if (flag & 2) { /* Modify time. */ + file->mtime = isodate7(data); + data += 7; + } + if (flag & 4) { /* Access time. */ + file->atime = isodate7(data); + data += 7; + } + if (flag & 8) { /* Attribute time. */ + file->ctime = isodate7(data); + data += 7; + } + } + break; + } + /* FALLTHROUGH */ + default: + /* The FALLTHROUGHs above leave us here for + * any unsupported extension. */ + { + const unsigned char *t; + fprintf(stderr, "\nUnsupported RRIP extension for %s\n", file->name); + fprintf(stderr, " %c%c(%d):", p[0], p[1], data_length); + for (t = data; t < data + data_length && t < data + 16; t++) + fprintf(stderr, " %02x", *t); + fprintf(stderr, "\n"); + } + } + + + + p += p[2]; + } +} + +static void +release_file(struct iso9660 *iso9660, struct file_info *file) +{ + struct file_info *parent; + + if (file->refcount == 0) { + parent = file->parent; + if (file->name) + free(file->name); + archive_string_free(&file->symlink); + free(file); + if (parent != NULL) { + parent->refcount--; + release_file(iso9660, parent); + } + } +} + +static int +next_entry_seek(struct archive *a, struct iso9660 *iso9660, + struct file_info **pfile) +{ + struct file_info *file; + uint64_t offset; + + *pfile = NULL; + for (;;) { + *pfile = file = next_entry(iso9660); + if (file == NULL) + return (ARCHIVE_EOF); + + /* CE area precedes actual file data? Ignore it. */ + if (file->ce_offset > file->offset) { +fprintf(stderr, " *** Discarding CE data.\n"); + file->ce_offset = 0; + file->ce_size = 0; + } + + /* If CE exists, find and read it now. */ + if (file->ce_offset > 0) + offset = file->ce_offset; + else + offset = file->offset; + + /* Seek forward to the start of the entry. */ + while (iso9660->current_position < offset) { + ssize_t step = offset - iso9660->current_position; + ssize_t bytes_read; + const void *buff; + + if (step > iso9660->logical_block_size) + step = iso9660->logical_block_size; + bytes_read = (a->compression_read_ahead)(a, &buff, step); + if (bytes_read <= 0) { + release_file(iso9660, file); + return (ARCHIVE_FATAL); + } + if (bytes_read > step) + bytes_read = step; + iso9660->current_position += bytes_read; + (a->compression_read_consume)(a, bytes_read); + } + + /* We found body of file; handle it now. */ + if (offset == file->offset) + return (ARCHIVE_OK); + + /* Found CE? Process it and push the file back onto list. */ + if (offset == file->ce_offset) { + const void *p; + ssize_t size = file->ce_size; + ssize_t bytes_read; + const unsigned char *rr_start; + + file->ce_offset = 0; + file->ce_size = 0; + bytes_read = (a->compression_read_ahead)(a, &p, size); + if (bytes_read > size) + bytes_read = size; + rr_start = (const unsigned char *)p; + parse_rockridge(iso9660, file, rr_start, + rr_start + bytes_read); + (a->compression_read_consume)(a, bytes_read); + iso9660->current_position += bytes_read; + add_entry(iso9660, file); + } + } +} + +static struct file_info * +next_entry(struct iso9660 *iso9660) +{ + int least_index; + uint64_t least_end_offset; + int i; + struct file_info *r; + + if (iso9660->pending_files_used < 1) + return (NULL); + + /* Assume the first file in the list is the earliest on disk. */ + least_index = 0; + least_end_offset = iso9660->pending_files[0]->offset + + iso9660->pending_files[0]->size; + + /* Now, try to find an earlier one. */ + for (i = 0; i < iso9660->pending_files_used; i++) { + /* Use the position of the file *end* as our comparison. */ + uint64_t end_offset = iso9660->pending_files[i]->offset + + iso9660->pending_files[i]->size; + if (iso9660->pending_files[i]->ce_offset > 0 + && iso9660->pending_files[i]->ce_offset < iso9660->pending_files[i]->offset) + end_offset = iso9660->pending_files[i]->ce_offset + + iso9660->pending_files[i]->ce_size; + if (least_end_offset > end_offset) { + least_index = i; + least_end_offset = end_offset; + } + } + r = iso9660->pending_files[least_index]; + iso9660->pending_files[least_index] + = iso9660->pending_files[--iso9660->pending_files_used]; + return (r); +} + +static int +toi(const void *p, int n) +{ + const unsigned char *v = (const unsigned char *)p; + if (n > 1) + return v[0] + 256 * toi(v + 1, n - 1); + if (n == 1) + return v[0]; + return (0); +} + +static time_t +isodate7(const void *p) +{ + struct tm tm; + const unsigned char *v = (const unsigned char *)p; + int offset; + memset(&tm, 0, sizeof(tm)); + tm.tm_year = v[0]; + tm.tm_mon = v[1] - 1; + tm.tm_mday = v[2]; + tm.tm_hour = v[3]; + tm.tm_min = v[4]; + tm.tm_sec = v[5]; + /* v[6] is the timezone offset, in 1/4-hour increments. */ + offset = ((const signed char *)p)[6]; + if (offset > -48 && offset < 52) { + tm.tm_hour -= offset / 4; + tm.tm_min -= (offset % 4) * 15; + } + return (time_from_tm(&tm)); +} + +static time_t +isodate17(const void *p) +{ + struct tm tm; + const unsigned char *v = (const unsigned char *)p; + int offset; + memset(&tm, 0, sizeof(tm)); + tm.tm_year = (v[0] - '0') * 1000 + (v[1] - '0') * 100 + + (v[2] - '0') * 10 + (v[3] - '0') + - 1900; + tm.tm_mon = (v[4] - '0') * 10 + (v[5] - '0'); + tm.tm_mday = (v[6] - '0') * 10 + (v[7] - '0'); + tm.tm_hour = (v[8] - '0') * 10 + (v[9] - '0'); + tm.tm_min = (v[10] - '0') * 10 + (v[11] - '0'); + tm.tm_sec = (v[12] - '0') * 10 + (v[13] - '0'); + /* v[16] is the timezone offset, in 1/4-hour increments. */ + offset = ((const signed char *)p)[16]; + if (offset > -48 && offset < 52) { + tm.tm_hour -= offset / 4; + tm.tm_min -= (offset % 4) * 15; + } + return (time_from_tm(&tm)); +} + +/* + * timegm() converts a struct tm to a time_t, except it isn't standard, + * so I provide my own function here that (ideally) is just a wrapper + * for timegm(). + */ +static time_t +time_from_tm(struct tm *t) +{ +#if HAVE_TIMEGM + return (timegm(t)); +#else + /* + * Unfortunately, timegm() isn't standard. The standard + * mktime() function is a close match, except that it uses + * local timezone instead of GMT. Close enough for now. + * Note that it is not possible to emulate timegm() using + * standard interfaces: + * * ANSI C90 does not even guarantee that time_t is + * an arithmetic type, so time adjustments can only be + * done by manipulating struct tm elements. You cannot + * portably calculate time_t values. + * * POSIX does promise that time_t is an arithmetic type + * measured in seconds, so you can do time_t calculations + * while remaining POSIX-compliant. + * * Neither ANSI nor POSIX provides an easy way to measure + * the timezone offset, so you can't adjust mktime() to + * work like timegm(). + * * POSIX does not promise that the epoch begins in 1970, + * so you can't write a portable timegm() function from + * scratch. + */ + time_t result = mktime(t); + /* TODO: Find a way to improve this approximation to timegm(). */ + return result; +#endif +} + +static const char * +build_pathname(struct archive_string *as, struct file_info *file) +{ + if (file->parent != NULL && file->parent->name[0] != '\0') { + build_pathname(as, file->parent); + archive_strcat(as, "/"); + } + if (file->name[0] == '\0') + archive_strcat(as, "."); + else + archive_strcat(as, file->name); + return (as->s); +} + +static void +dump_isodirrec(FILE *out, const struct iso9660_directory_record *isodirrec) +{ + fprintf(out, " l %d,", isodirrec->length[0]); + fprintf(out, " a %d,", isodirrec->ext_attr_length[0]); + fprintf(out, " ext 0x%x,", toi(isodirrec->extent, 4)); + fprintf(out, " s %d,", toi(isodirrec->size, 4)); + fprintf(out, " f 0x%02x,", isodirrec->flags[0]); + fprintf(out, " u %d,", isodirrec->file_unit_size[0]); + fprintf(out, " ilv %d,", isodirrec->interleave[0]); + fprintf(out, " seq %d,", toi(isodirrec->volume_sequence_number,2)); + fprintf(out, " nl %d:", isodirrec->name_len[0]); + fprintf(out, " `%.*s'", isodirrec->name_len[0], isodirrec->name); +} diff --git a/lib/libarchive/archive_read_support_format_tar.c b/lib/libarchive/archive_read_support_format_tar.c new file mode 100644 index 0000000..271a41f --- /dev/null +++ b/lib/libarchive/archive_read_support_format_tar.c @@ -0,0 +1,1898 @@ +/*- + * Copyright (c) 2003-2006 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#else +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif +#endif +#include <errno.h> +#include <stddef.h> +/* #include <stdint.h> */ /* See archive_platform.h */ +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Obtain suitable wide-character manipulation functions. */ +#ifdef HAVE_WCHAR_H +#include <wchar.h> +#else +/* Good enough for equality testing, which is all we need. */ +static int wcscmp(const wchar_t *s1, const wchar_t *s2) +{ + int diff = *s1 - *s2; + while (*s1 && diff == 0) + diff = (int)*++s1 - (int)*++s2; + return diff; +} +/* Good enough for equality testing, which is all we need. */ +static int wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t n) +{ + int diff = *s1 - *s2; + while (*s1 && diff == 0 && n-- > 0) + diff = (int)*++s1 - (int)*++s2; + return diff; +} +static size_t wcslen(const wchar_t *s) +{ + const wchar_t *p = s; + while (*p) + p++; + return p - s; +} +#endif + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +/* + * Layout of POSIX 'ustar' tar header. + */ +struct archive_entry_header_ustar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; /* "old format" header ends here */ + char magic[6]; /* For POSIX: "ustar\0" */ + char version[2]; /* For POSIX: "00" */ + char uname[32]; + char gname[32]; + char rdevmajor[8]; + char rdevminor[8]; + char prefix[155]; +}; + +/* + * Structure of GNU tar header + */ +struct gnu_sparse { + char offset[12]; + char numbytes[12]; +}; + +struct archive_entry_header_gnutar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; + char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ + char uname[32]; + char gname[32]; + char rdevmajor[8]; + char rdevminor[8]; + char atime[12]; + char ctime[12]; + char offset[12]; + char longnames[4]; + char unused[1]; + struct gnu_sparse sparse[4]; + char isextended[1]; + char realsize[12]; + /* + * GNU doesn't use POSIX 'prefix' field; they use the 'L' (longname) + * entry instead. + */ +}; + +/* + * Data specific to this format. + */ +struct sparse_block { + struct sparse_block *next; + off_t offset; + off_t remaining; +}; + +struct tar { + struct archive_string acl_text; + struct archive_string entry_name; + struct archive_string entry_linkname; + struct archive_string entry_uname; + struct archive_string entry_gname; + struct archive_string longlink; + struct archive_string longname; + struct archive_string pax_header; + struct archive_string pax_global; + wchar_t *pax_entry; + size_t pax_entry_length; + int header_recursion_depth; + off_t entry_bytes_remaining; + off_t entry_offset; + off_t entry_padding; + struct sparse_block *sparse_list; +}; + +static size_t UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n); +static int archive_block_is_null(const unsigned char *p); +static char *base64_decode(const wchar_t *, size_t, size_t *); +static int gnu_read_sparse_data(struct archive *, struct tar *, + const struct archive_entry_header_gnutar *header); +static void gnu_parse_sparse_data(struct archive *, struct tar *, + const struct gnu_sparse *sparse, int length); +static int header_Solaris_ACL(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *); +static int header_common(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *); +static int header_old_tar(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *); +static int header_pax_extensions(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *); +static int header_pax_global(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_longlink(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_longname(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_volume(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_ustar(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int header_gnutar(struct archive *, struct tar *, + struct archive_entry *, struct stat *, const void *h); +static int archive_read_format_tar_bid(struct archive *); +static int archive_read_format_tar_cleanup(struct archive *); +static int archive_read_format_tar_read_data(struct archive *a, + const void **buff, size_t *size, off_t *offset); +static int archive_read_format_tar_read_header(struct archive *, + struct archive_entry *); +static int checksum(struct archive *, const void *); +static int pax_attribute(struct archive_entry *, struct stat *, + wchar_t *key, wchar_t *value); +static int pax_header(struct archive *, struct tar *, + struct archive_entry *, struct stat *, char *attr); +static void pax_time(const wchar_t *, int64_t *sec, long *nanos); +static int read_body_to_string(struct archive *, struct tar *, + struct archive_string *, const void *h); +static int64_t tar_atol(const char *, unsigned); +static int64_t tar_atol10(const wchar_t *, unsigned); +static int64_t tar_atol256(const char *, unsigned); +static int64_t tar_atol8(const char *, unsigned); +static int tar_read_header(struct archive *, struct tar *, + struct archive_entry *, struct stat *); +static int tohex(int c); +static char *url_decode(const char *); +static int utf8_decode(wchar_t *, const char *, size_t length); +static char *wide_to_narrow(const wchar_t *wval); + +/* + * ANSI C99 defines constants for these, but not everyone supports + * those constants, so I define a couple of static variables here and + * compute the values. These calculations should be portable to any + * 2s-complement architecture. + */ +#ifdef UINT64_MAX +static const uint64_t max_uint64 = UINT64_MAX; +#else +static const uint64_t max_uint64 = ~(uint64_t)0; +#endif +#ifdef INT64_MAX +static const int64_t max_int64 = INT64_MAX; +#else +static const int64_t max_int64 = (int64_t)((~(uint64_t)0) >> 1); +#endif +#ifdef INT64_MIN +static const int64_t min_int64 = INT64_MIN; +#else +static const int64_t min_int64 = (int64_t)(~((~(uint64_t)0) >> 1)); +#endif + +int +archive_read_support_format_gnutar(struct archive *a) +{ + return (archive_read_support_format_tar(a)); +} + + +int +archive_read_support_format_tar(struct archive *a) +{ + struct tar *tar; + int r; + + tar = malloc(sizeof(*tar)); + if (tar == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate tar data"); + return (ARCHIVE_FATAL); + } + memset(tar, 0, sizeof(*tar)); + + r = __archive_read_register_format(a, tar, + archive_read_format_tar_bid, + archive_read_format_tar_read_header, + archive_read_format_tar_read_data, + NULL, + archive_read_format_tar_cleanup); + + if (r != ARCHIVE_OK) + free(tar); + return (ARCHIVE_OK); +} + +static int +archive_read_format_tar_cleanup(struct archive *a) +{ + struct tar *tar; + + tar = *(a->pformat_data); + archive_string_free(&tar->acl_text); + archive_string_free(&tar->entry_name); + archive_string_free(&tar->entry_linkname); + archive_string_free(&tar->entry_uname); + archive_string_free(&tar->entry_gname); + archive_string_free(&tar->pax_global); + archive_string_free(&tar->pax_header); + if (tar->pax_entry != NULL) + free(tar->pax_entry); + free(tar); + *(a->pformat_data) = NULL; + return (ARCHIVE_OK); +} + + +static int +archive_read_format_tar_bid(struct archive *a) +{ + int bid; + ssize_t bytes_read; + const void *h; + const struct archive_entry_header_ustar *header; + + /* + * If we're already reading a non-tar file, don't + * bother to bid. + */ + if (a->archive_format != 0 && + (a->archive_format & ARCHIVE_FORMAT_BASE_MASK) != + ARCHIVE_FORMAT_TAR) + return (0); + bid = 0; + + /* + * If we're already reading a tar format, start the bid at 1 as + * a failsafe. + */ + if ((a->archive_format & ARCHIVE_FORMAT_BASE_MASK) == + ARCHIVE_FORMAT_TAR) + bid++; + + /* Now let's look at the actual header and see if it matches. */ + if (a->compression_read_ahead != NULL) + bytes_read = (a->compression_read_ahead)(a, &h, 512); + else + bytes_read = 0; /* Empty file. */ + if (bytes_read < 0) + return (ARCHIVE_FATAL); + if (bytes_read == 0 && bid > 0) { + /* An archive without a proper end-of-archive marker. */ + /* Hold our nose and bid 1 anyway. */ + return (1); + } + if (bytes_read < 512) { + /* If it's a new archive, then just return a zero bid. */ + if (bid == 0) + return (0); + /* + * If we already know this is a tar archive, + * then we have a problem. + */ + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated tar archive"); + return (ARCHIVE_FATAL); + } + + /* If it's an end-of-archive mark, we can handle it. */ + if ((*(const char *)h) == 0 && archive_block_is_null(h)) { + /* If it's a known tar file, end-of-archive is definite. */ + if ((a->archive_format & ARCHIVE_FORMAT_BASE_MASK) == + ARCHIVE_FORMAT_TAR) + return (512); + /* Empty archive? */ + return (1); + } + + /* If it's not an end-of-archive mark, it must have a valid checksum.*/ + if (!checksum(a, h)) + return (0); + bid += 48; /* Checksum is usually 6 octal digits. */ + + header = h; + + /* Recognize POSIX formats. */ + if ((memcmp(header->magic, "ustar\0", 6) == 0) + &&(memcmp(header->version, "00", 2)==0)) + bid += 56; + + /* Recognize GNU tar format. */ + if ((memcmp(header->magic, "ustar ", 6) == 0) + &&(memcmp(header->version, " \0", 2)==0)) + bid += 56; + + /* Type flag must be null, digit or A-Z, a-z. */ + if (header->typeflag[0] != 0 && + !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && + !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && + !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) + return (0); + bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ + + /* Sanity check: Look at first byte of mode field. */ + switch (255 & (unsigned)header->mode[0]) { + case 0: case 255: + /* Base-256 value: No further verification possible! */ + break; + case ' ': /* Not recommended, but not illegal, either. */ + break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + /* Octal Value. */ + /* TODO: Check format of remainder of this field. */ + break; + default: + /* Not a valid mode; bail out here. */ + return (0); + } + /* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */ + + return (bid); +} + +/* + * The function invoked by archive_read_header(). This + * just sets up a few things and then calls the internal + * tar_read_header() function below. + */ +static int +archive_read_format_tar_read_header(struct archive *a, + struct archive_entry *entry) +{ + /* + * When converting tar archives to cpio archives, it is + * essential that each distinct file have a distinct inode + * number. To simplify this, we keep a static count here to + * assign fake dev/inode numbers to each tar entry. Note that + * pax format archives may overwrite this with something more + * useful. + * + * Ideally, we would track every file read from the archive so + * that we could assign the same dev/ino pair to hardlinks, + * but the memory required to store a complete lookup table is + * probably not worthwhile just to support the relatively + * obscure tar->cpio conversion case. + */ + static int default_inode; + static int default_dev; + struct stat st; + struct tar *tar; + const char *p; + int r; + size_t l; + + memset(&st, 0, sizeof(st)); + /* Assign default device/inode values. */ + st.st_dev = 1 + default_dev; /* Don't use zero. */ + st.st_ino = ++default_inode; /* Don't use zero. */ + /* Limit generated st_ino number to 16 bits. */ + if (default_inode >= 0xffff) { + ++default_dev; + default_inode = 0; + } + + tar = *(a->pformat_data); + tar->entry_offset = 0; + + r = tar_read_header(a, tar, entry, &st); + + if (r == ARCHIVE_OK) { + /* + * "Regular" entry with trailing '/' is really + * directory: This is needed for certain old tar + * variants and even for some broken newer ones. + */ + p = archive_entry_pathname(entry); + l = strlen(p); + if (S_ISREG(st.st_mode) && p[l-1] == '/') { + st.st_mode &= ~S_IFMT; + st.st_mode |= S_IFDIR; + } + + /* Copy the final stat data into the entry. */ + archive_entry_copy_stat(entry, &st); + } + return (r); +} + +static int +archive_read_format_tar_read_data(struct archive *a, + const void **buff, size_t *size, off_t *offset) +{ + ssize_t bytes_read; + struct tar *tar; + struct sparse_block *p; + + tar = *(a->pformat_data); + if (tar->sparse_list != NULL) { + /* Remove exhausted entries from sparse list. */ + while (tar->sparse_list != NULL && + tar->sparse_list->remaining == 0) { + p = tar->sparse_list; + tar->sparse_list = p->next; + free(p); + } + if (tar->sparse_list == NULL) { + /* We exhausted the entire sparse list. */ + tar->entry_bytes_remaining = 0; + } + } + + if (tar->entry_bytes_remaining > 0) { + bytes_read = (a->compression_read_ahead)(a, buff, 1); + if (bytes_read <= 0) + return (ARCHIVE_FATAL); + if (bytes_read > tar->entry_bytes_remaining) + bytes_read = tar->entry_bytes_remaining; + if (tar->sparse_list != NULL) { + /* Don't read more than is available in the + * current sparse block. */ + if (tar->sparse_list->remaining < bytes_read) + bytes_read = tar->sparse_list->remaining; + tar->entry_offset = tar->sparse_list->offset; + tar->sparse_list->remaining -= bytes_read; + tar->sparse_list->offset += bytes_read; + } + *size = bytes_read; + *offset = tar->entry_offset; + tar->entry_offset += bytes_read; + tar->entry_bytes_remaining -= bytes_read; + (a->compression_read_consume)(a, bytes_read); + return (ARCHIVE_OK); + } else { + while (tar->entry_padding > 0) { + bytes_read = (a->compression_read_ahead)(a, buff, 1); + if (bytes_read <= 0) + return (ARCHIVE_FATAL); + if (bytes_read > tar->entry_padding) + bytes_read = tar->entry_padding; + (a->compression_read_consume)(a, bytes_read); + tar->entry_padding -= bytes_read; + } + *buff = NULL; + *size = 0; + *offset = tar->entry_offset; + return (ARCHIVE_EOF); + } +} + +/* + * This function recursively interprets all of the headers associated + * with a single entry. + */ +static int +tar_read_header(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st) +{ + ssize_t bytes; + int err; + const void *h; + const struct archive_entry_header_ustar *header; + + /* Read 512-byte header record */ + bytes = (a->compression_read_ahead)(a, &h, 512); + if (bytes < 512) { + /* + * If we're here, it's becase the _bid function accepted + * this file. So just call a short read end-of-archive + * and be done with it. + */ + return (ARCHIVE_EOF); + } + (a->compression_read_consume)(a, 512); + + /* Check for end-of-archive mark. */ + if (((*(const char *)h)==0) && archive_block_is_null(h)) { + /* Try to consume a second all-null record, as well. */ + bytes = (a->compression_read_ahead)(a, &h, 512); + if (bytes > 0) + (a->compression_read_consume)(a, bytes); + archive_set_error(a, 0, NULL); + return (ARCHIVE_EOF); + } + + /* + * Note: If the checksum fails and we return ARCHIVE_RETRY, + * then the client is likely to just retry. This is a very + * crude way to search for the next valid header! + * + * TODO: Improve this by implementing a real header scan. + */ + if (!checksum(a, h)) { + archive_set_error(a, EINVAL, "Damaged tar archive"); + return (ARCHIVE_RETRY); /* Retryable: Invalid header */ + } + + if (++tar->header_recursion_depth > 32) { + archive_set_error(a, EINVAL, "Too many special headers"); + return (ARCHIVE_WARN); + } + + /* Determine the format variant. */ + header = h; + switch(header->typeflag[0]) { + case 'A': /* Solaris tar ACL */ + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive_format_name = "Solaris tar"; + err = header_Solaris_ACL(a, tar, entry, st, h); + break; + case 'g': /* POSIX-standard 'g' header. */ + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive_format_name = "POSIX pax interchange format"; + err = header_pax_global(a, tar, entry, st, h); + break; + case 'K': /* Long link name (GNU tar, others) */ + err = header_longlink(a, tar, entry, st, h); + break; + case 'L': /* Long filename (GNU tar, others) */ + err = header_longname(a, tar, entry, st, h); + break; + case 'V': /* GNU volume header */ + err = header_volume(a, tar, entry, st, h); + break; + case 'X': /* Used by SUN tar; same as 'x'. */ + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive_format_name = + "POSIX pax interchange format (Sun variant)"; + err = header_pax_extensions(a, tar, entry, st, h); + break; + case 'x': /* POSIX-standard 'x' header. */ + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive_format_name = "POSIX pax interchange format"; + err = header_pax_extensions(a, tar, entry, st, h); + break; + default: + if (memcmp(header->magic, "ustar \0", 8) == 0) { + a->archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; + a->archive_format_name = "GNU tar format"; + err = header_gnutar(a, tar, entry, st, h); + } else if (memcmp(header->magic, "ustar", 5) == 0) { + if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { + a->archive_format = ARCHIVE_FORMAT_TAR_USTAR; + a->archive_format_name = "POSIX ustar format"; + } + err = header_ustar(a, tar, entry, st, h); + } else { + a->archive_format = ARCHIVE_FORMAT_TAR; + a->archive_format_name = "tar (non-POSIX)"; + err = header_old_tar(a, tar, entry, st, h); + } + } + --tar->header_recursion_depth; + return (err); +} + +/* + * Return true if block checksum is correct. + */ +static int +checksum(struct archive *a, const void *h) +{ + const unsigned char *bytes; + const struct archive_entry_header_ustar *header; + int check, i, sum; + + (void)a; /* UNUSED */ + bytes = h; + header = h; + + /* + * Test the checksum. Note that POSIX specifies _unsigned_ + * bytes for this calculation. + */ + sum = tar_atol(header->checksum, sizeof(header->checksum)); + check = 0; + for (i = 0; i < 148; i++) + check += (unsigned char)bytes[i]; + for (; i < 156; i++) + check += 32; + for (; i < 512; i++) + check += (unsigned char)bytes[i]; + if (sum == check) + return (1); + + /* + * Repeat test with _signed_ bytes, just in case this archive + * was created by an old BSD, Solaris, or HP-UX tar with a + * broken checksum calculation. + */ + check = 0; + for (i = 0; i < 148; i++) + check += (signed char)bytes[i]; + for (; i < 156; i++) + check += 32; + for (; i < 512; i++) + check += (signed char)bytes[i]; + if (sum == check) + return (1); + + return (0); +} + +/* + * Return true if this block contains only nulls. + */ +static int +archive_block_is_null(const unsigned char *p) +{ + unsigned i; + + for (i = 0; i < ARCHIVE_BYTES_PER_RECORD / sizeof(*p); i++) + if (*p++) + return (0); + return (1); +} + +/* + * Interpret 'A' Solaris ACL header + */ +static int +header_Solaris_ACL(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + char *p; + wchar_t *wp; + + err = read_body_to_string(a, tar, &(tar->acl_text), h); + err2 = tar_read_header(a, tar, entry, st); + err = err_combine(err, err2); + + /* XXX Ensure p doesn't overrun acl_text */ + + /* Skip leading octal number. */ + /* XXX TODO: Parse the octal number and sanity-check it. */ + p = tar->acl_text.s; + while (*p != '\0') + p++; + p++; + + wp = malloc((strlen(p) + 1) * sizeof(wchar_t)); + if (wp != NULL) { + utf8_decode(wp, p, strlen(p)); + err2 = __archive_entry_acl_parse_w(entry, wp, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS); + err = err_combine(err, err2); + free(wp); + } + + return (err); +} + +/* + * Interpret 'K' long linkname header. + */ +static int +header_longlink(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + + err = read_body_to_string(a, tar, &(tar->longlink), h); + err2 = tar_read_header(a, tar, entry, st); + if (err == ARCHIVE_OK && err2 == ARCHIVE_OK) { + /* Set symlink if symlink already set, else hardlink. */ + archive_entry_set_link(entry, tar->longlink.s); + } + return (err_combine(err, err2)); +} + +/* + * Interpret 'L' long filename header. + */ +static int +header_longname(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + + err = read_body_to_string(a, tar, &(tar->longname), h); + /* Read and parse "real" header, then override name. */ + err2 = tar_read_header(a, tar, entry, st); + if (err == ARCHIVE_OK && err2 == ARCHIVE_OK) + archive_entry_set_pathname(entry, tar->longname.s); + return (err_combine(err, err2)); +} + + +/* + * Interpret 'V' GNU tar volume header. + */ +static int +header_volume(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + (void)h; + + /* Just skip this and read the next header. */ + return (tar_read_header(a, tar, entry, st)); +} + +/* + * Read body of an archive entry into an archive_string object. + */ +static int +read_body_to_string(struct archive *a, struct tar *tar, + struct archive_string *as, const void *h) +{ + off_t size, padded_size; + ssize_t bytes_read, bytes_to_copy; + const struct archive_entry_header_ustar *header; + const void *src; + char *dest; + + (void)tar; /* UNUSED */ + header = h; + size = tar_atol(header->size, sizeof(header->size)); + + /* Read the body into the string. */ + archive_string_ensure(as, size+1); + padded_size = (size + 511) & ~ 511; + dest = as->s; + while (padded_size > 0) { + bytes_read = (a->compression_read_ahead)(a, &src, padded_size); + if (bytes_read < 0) + return (ARCHIVE_FATAL); + if (bytes_read > padded_size) + bytes_read = padded_size; + (a->compression_read_consume)(a, bytes_read); + bytes_to_copy = bytes_read; + if ((off_t)bytes_to_copy > size) + bytes_to_copy = (ssize_t)size; + memcpy(dest, src, bytes_to_copy); + dest += bytes_to_copy; + size -= bytes_to_copy; + padded_size -= bytes_read; + } + *dest = '\0'; + return (ARCHIVE_OK); +} + +/* + * Parse out common header elements. + * + * This would be the same as header_old_tar, except that the + * filename is handled slightly differently for old and POSIX + * entries (POSIX entries support a 'prefix'). This factoring + * allows header_old_tar and header_ustar + * to handle filenames differently, while still putting most of the + * common parsing into one place. + */ +static int +header_common(struct archive *a, struct tar *tar, struct archive_entry *entry, + struct stat *st, const void *h) +{ + const struct archive_entry_header_ustar *header; + char tartype; + + (void)a; /* UNUSED */ + + header = h; + if (header->linkname[0]) + archive_strncpy(&(tar->entry_linkname), header->linkname, + sizeof(header->linkname)); + else + archive_string_empty(&(tar->entry_linkname)); + + /* Parse out the numeric fields (all are octal) */ + st->st_mode = tar_atol(header->mode, sizeof(header->mode)); + st->st_uid = tar_atol(header->uid, sizeof(header->uid)); + st->st_gid = tar_atol(header->gid, sizeof(header->gid)); + st->st_size = tar_atol(header->size, sizeof(header->size)); + st->st_mtime = tar_atol(header->mtime, sizeof(header->mtime)); + + /* Handle the tar type flag appropriately. */ + tartype = header->typeflag[0]; + st->st_mode &= ~S_IFMT; + + switch (tartype) { + case '1': /* Hard link */ + archive_entry_set_hardlink(entry, tar->entry_linkname.s); + /* + * The following may seem odd, but: Technically, tar + * does not store the file type for a "hard link" + * entry, only the fact that it is a hard link. So, I + * leave the type zero normally. But, pax interchange + * format allows hard links to have data, which + * implies that the underlying entry is a regular + * file. + */ + if (st->st_size > 0) + st->st_mode |= S_IFREG; + + /* + * A tricky point: Traditionally, tar readers have + * ignored the size field when reading hardlink + * entries, and some writers put non-zero sizes even + * though the body is empty. POSIX.1-2001 broke with + * this tradition by permitting hardlink entries to + * store valid bodies in pax interchange format, but + * not in ustar format. Since there is no hard and + * fast way to distinguish pax interchange from + * earlier archives (the 'x' and 'g' entries are + * optional, after all), we need a heuristic. Here, I + * use the bid function to test whether or not there's + * a valid header following. Of course, if we know + * this is pax interchange format, then we must obey + * the size. + * + * This heuristic will only fail for a pax interchange + * archive that is storing hardlink bodies, no pax + * extended attribute entries have yet occurred, and + * we encounter a hardlink entry for a file that is + * itself an uncompressed tar archive. + */ + if (st->st_size > 0 && + a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE && + archive_read_format_tar_bid(a) > 50) + st->st_size = 0; + break; + case '2': /* Symlink */ + st->st_mode |= S_IFLNK; + st->st_size = 0; + archive_entry_set_symlink(entry, tar->entry_linkname.s); + break; + case '3': /* Character device */ + st->st_mode |= S_IFCHR; + st->st_size = 0; + break; + case '4': /* Block device */ + st->st_mode |= S_IFBLK; + st->st_size = 0; + break; + case '5': /* Dir */ + st->st_mode |= S_IFDIR; + st->st_size = 0; + break; + case '6': /* FIFO device */ + st->st_mode |= S_IFIFO; + st->st_size = 0; + break; + case 'D': /* GNU incremental directory type */ + /* + * No special handling is actually required here. + * It might be nice someday to preprocess the file list and + * provide it to the client, though. + */ + st->st_mode |= S_IFDIR; + break; + case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ + /* + * As far as I can tell, this is just like a regular file + * entry, except that the contents should be _appended_ to + * the indicated file at the indicated offset. This may + * require some API work to fully support. + */ + break; + case 'N': /* Old GNU "long filename" entry. */ + /* The body of this entry is a script for renaming + * previously-extracted entries. Ugh. It will never + * be supported by libarchive. */ + st->st_mode |= S_IFREG; + break; + case 'S': /* GNU sparse files */ + /* + * Sparse files are really just regular files with + * sparse information in the extended area. + */ + /* FALL THROUGH */ + default: /* Regular file and non-standard types */ + /* + * Per POSIX: non-recognized types should always be + * treated as regular files. + */ + st->st_mode |= S_IFREG; + break; + } + return (0); +} + +/* + * Parse out header elements for "old-style" tar archives. + */ +static int +header_old_tar(struct archive *a, struct tar *tar, struct archive_entry *entry, + struct stat *st, const void *h) +{ + const struct archive_entry_header_ustar *header; + + /* Copy filename over (to ensure null termination). */ + header = h; + archive_strncpy(&(tar->entry_name), header->name, sizeof(header->name)); + archive_entry_set_pathname(entry, tar->entry_name.s); + + /* Grab rest of common fields */ + header_common(a, tar, entry, st, h); + + tar->entry_bytes_remaining = st->st_size; + tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); + return (0); +} + +/* + * Parse a file header for a pax extended archive entry. + */ +static int +header_pax_global(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + + err = read_body_to_string(a, tar, &(tar->pax_global), h); + err2 = tar_read_header(a, tar, entry, st); + return (err_combine(err, err2)); +} + +static int +header_pax_extensions(struct archive *a, struct tar *tar, + struct archive_entry *entry, struct stat *st, const void *h) +{ + int err, err2; + + read_body_to_string(a, tar, &(tar->pax_header), h); + + /* Parse the next header. */ + err = tar_read_header(a, tar, entry, st); + + /* + * TODO: Parse global/default options into 'entry' struct here + * before handling file-specific options. + * + * This design (parse standard header, then overwrite with pax + * extended attribute data) usually works well, but isn't ideal; + * it would be better to parse the pax extended attributes first + * and then skip any fields in the standard header that were + * defined in the pax header. + */ + err2 = pax_header(a, tar, entry, st, tar->pax_header.s); + err = err_combine(err, err2); + tar->entry_bytes_remaining = st->st_size; + tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); + return (err); +} + + +/* + * Parse a file header for a Posix "ustar" archive entry. This also + * handles "pax" or "extended ustar" entries. + */ +static int +header_ustar(struct archive *a, struct tar *tar, struct archive_entry *entry, + struct stat *st, const void *h) +{ + const struct archive_entry_header_ustar *header; + struct archive_string *as; + + header = h; + + /* Copy name into an internal buffer to ensure null-termination. */ + as = &(tar->entry_name); + if (header->prefix[0]) { + archive_strncpy(as, header->prefix, sizeof(header->prefix)); + if (as->s[archive_strlen(as) - 1] != '/') + archive_strappend_char(as, '/'); + archive_strncat(as, header->name, sizeof(header->name)); + } else + archive_strncpy(as, header->name, sizeof(header->name)); + + archive_entry_set_pathname(entry, as->s); + + /* Handle rest of common fields. */ + header_common(a, tar, entry, st, h); + + /* Handle POSIX ustar fields. */ + archive_strncpy(&(tar->entry_uname), header->uname, + sizeof(header->uname)); + archive_entry_set_uname(entry, tar->entry_uname.s); + + archive_strncpy(&(tar->entry_gname), header->gname, + sizeof(header->gname)); + archive_entry_set_gname(entry, tar->entry_gname.s); + + /* Parse out device numbers only for char and block specials. */ + if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { + st->st_rdev = makedev( + tar_atol(header->rdevmajor, sizeof(header->rdevmajor)), + tar_atol(header->rdevminor, sizeof(header->rdevminor))); + } + + tar->entry_bytes_remaining = st->st_size; + tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); + + return (0); +} + + +/* + * Parse the pax extended attributes record. + * + * Returns non-zero if there's an error in the data. + */ +static int +pax_header(struct archive *a, struct tar *tar, struct archive_entry *entry, + struct stat *st, char *attr) +{ + size_t attr_length, l, line_length; + char *line, *p; + wchar_t *key, *wp, *value; + int err, err2; + + attr_length = strlen(attr); + err = ARCHIVE_OK; + while (attr_length > 0) { + /* Parse decimal length field at start of line. */ + line_length = 0; + l = attr_length; + line = p = attr; /* Record start of line. */ + while (l>0) { + if (*p == ' ') { + p++; + l--; + break; + } + if (*p < '0' || *p > '9') + return (-1); + line_length *= 10; + line_length += *p - '0'; + if (line_length > 999999) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Rejecting pax extended attribute > 1MB"); + return (ARCHIVE_WARN); + } + p++; + l--; + } + + if (line_length > attr_length) + return (0); + + /* Ensure pax_entry buffer is big enough. */ + if (tar->pax_entry_length <= line_length) { + wchar_t *old_entry = tar->pax_entry; + + if (tar->pax_entry_length <= 0) + tar->pax_entry_length = 1024; + while (tar->pax_entry_length <= line_length + 1) + tar->pax_entry_length *= 2; + + old_entry = tar->pax_entry; + tar->pax_entry = realloc(tar->pax_entry, + tar->pax_entry_length * sizeof(wchar_t)); + if (tar->pax_entry == NULL) { + free(old_entry); + archive_set_error(a, ENOMEM, + "No memory"); + return (ARCHIVE_FATAL); + } + } + + /* Decode UTF-8 to wchar_t, null-terminate result. */ + if (utf8_decode(tar->pax_entry, p, + line_length - (p - attr) - 1)) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Invalid UTF8 character in pax extended attribute"); + err = err_combine(err, ARCHIVE_WARN); + } + + /* Null-terminate 'key' value. */ + wp = key = tar->pax_entry; + if (key[0] == L'=') + return (-1); + while (*wp && *wp != L'=') + ++wp; + if (*wp == L'\0' || wp == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Invalid pax extended attributes"); + return (ARCHIVE_WARN); + } + *wp = 0; + + /* Identify null-terminated 'value' portion. */ + value = wp + 1; + + /* Identify this attribute and set it in the entry. */ + err2 = pax_attribute(entry, st, key, value); + err = err_combine(err, err2); + + /* Skip to next line */ + attr += line_length; + attr_length -= line_length; + } + return (err); +} + +static int +pax_attribute_xattr(struct archive_entry *entry, + wchar_t *name, wchar_t *value) +{ + char *name_decoded, *name_narrow; + void *value_decoded; + size_t value_len; + + if (wcslen(name) < 18 || (wcsncmp(name, L"LIBARCHIVE.xattr.", 17)) != 0) + return 3; + + name += 17; + + /* URL-decode name */ + name_narrow = wide_to_narrow(name); + if (name_narrow == NULL) + return 2; + name_decoded = url_decode(name_narrow); + free(name_narrow); + if (name_decoded == NULL) + return 2; + + /* Base-64 decode value */ + value_decoded = base64_decode(value, wcslen(value), &value_len); + if (value_decoded == NULL) { + free(name_decoded); + return 1; + } + + archive_entry_xattr_add_entry(entry, name_decoded, + value_decoded, value_len); + + free(name_decoded); + free(value_decoded); + return 0; +} + +/* + * Parse a single key=value attribute. key/value pointers are + * assumed to point into reasonably long-lived storage. + * + * Note that POSIX reserves all-lowercase keywords. Vendor-specific + * extensions should always have keywords of the form "VENDOR.attribute" + * In particular, it's quite feasible to support many different + * vendor extensions here. I'm using "LIBARCHIVE" for extensions + * unique to this library (currently, there are none). + * + * Investigate other vendor-specific extensions, as well and see if + * any of them look useful. + */ +static int +pax_attribute(struct archive_entry *entry, struct stat *st, + wchar_t *key, wchar_t *value) +{ + int64_t s; + long n; + + switch (key[0]) { + case 'L': + /* Our extensions */ +/* TODO: Handle arbitrary extended attributes... */ +/* + if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0) + archive_entry_set_xxxxxx(entry, value); +*/ + if (wcsncmp(key, L"LIBARCHIVE.xattr.", 17)==0) + pax_attribute_xattr(entry, key, value); + break; + case 'S': + /* We support some keys used by the "star" archiver */ + if (wcscmp(key, L"SCHILY.acl.access")==0) + __archive_entry_acl_parse_w(entry, value, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS); + else if (wcscmp(key, L"SCHILY.acl.default")==0) + __archive_entry_acl_parse_w(entry, value, + ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); + else if (wcscmp(key, L"SCHILY.devmajor")==0) + st->st_rdev = makedev(tar_atol10(value, wcslen(value)), + minor(st->st_rdev)); + else if (wcscmp(key, L"SCHILY.devminor")==0) + st->st_rdev = makedev(major(st->st_rdev), + tar_atol10(value, wcslen(value))); + else if (wcscmp(key, L"SCHILY.fflags")==0) + archive_entry_copy_fflags_text_w(entry, value); + else if (wcscmp(key, L"SCHILY.nlink")==0) + st->st_nlink = tar_atol10(value, wcslen(value)); + break; + case 'a': + if (wcscmp(key, L"atime")==0) { + pax_time(value, &s, &n); + st->st_atime = s; + ARCHIVE_STAT_SET_ATIME_NANOS(st, n); + } + break; + case 'c': + if (wcscmp(key, L"ctime")==0) { + pax_time(value, &s, &n); + st->st_ctime = s; + ARCHIVE_STAT_SET_CTIME_NANOS(st, n); + } else if (wcscmp(key, L"charset")==0) { + /* TODO: Publish charset information in entry. */ + } else if (wcscmp(key, L"comment")==0) { + /* TODO: Publish comment in entry. */ + } + break; + case 'g': + if (wcscmp(key, L"gid")==0) + st->st_gid = tar_atol10(value, wcslen(value)); + else if (wcscmp(key, L"gname")==0) + archive_entry_copy_gname_w(entry, value); + break; + case 'l': + /* pax interchange doesn't distinguish hardlink vs. symlink. */ + if (wcscmp(key, L"linkpath")==0) { + if (archive_entry_hardlink(entry)) + archive_entry_copy_hardlink_w(entry, value); + else + archive_entry_copy_symlink_w(entry, value); + } + break; + case 'm': + if (wcscmp(key, L"mtime")==0) { + pax_time(value, &s, &n); + st->st_mtime = s; + ARCHIVE_STAT_SET_MTIME_NANOS(st, n); + } + break; + case 'p': + if (wcscmp(key, L"path")==0) + archive_entry_copy_pathname_w(entry, value); + break; + case 'r': + /* POSIX has reserved 'realtime.*' */ + break; + case 's': + /* POSIX has reserved 'security.*' */ + /* Someday: if (wcscmp(key, L"security.acl")==0) { ... } */ + if (wcscmp(key, L"size")==0) + st->st_size = tar_atol10(value, wcslen(value)); + break; + case 'u': + if (wcscmp(key, L"uid")==0) + st->st_uid = tar_atol10(value, wcslen(value)); + else if (wcscmp(key, L"uname")==0) + archive_entry_copy_uname_w(entry, value); + break; + } + return (0); +} + + + +/* + * parse a decimal time value, which may include a fractional portion + */ +static void +pax_time(const wchar_t *p, int64_t *ps, long *pn) +{ + char digit; + int64_t s; + unsigned long l; + int sign; + int64_t limit, last_digit_limit; + + limit = max_int64 / 10; + last_digit_limit = max_int64 % 10; + + s = 0; + sign = 1; + if (*p == '-') { + sign = -1; + p++; + } + while (*p >= '0' && *p <= '9') { + digit = *p - '0'; + if (s > limit || + (s == limit && digit > last_digit_limit)) { + s = max_uint64; + break; + } + s = (s * 10) + digit; + ++p; + } + + *ps = s * sign; + + /* Calculate nanoseconds. */ + *pn = 0; + + if (*p != '.') + return; + + l = 100000000UL; + do { + ++p; + if (*p >= '0' && *p <= '9') + *pn += (*p - '0') * l; + else + break; + } while (l /= 10); +} + +/* + * Parse GNU tar header + */ +static int +header_gnutar(struct archive *a, struct tar *tar, struct archive_entry *entry, + struct stat *st, const void *h) +{ + const struct archive_entry_header_gnutar *header; + + (void)a; + + /* + * GNU header is like POSIX ustar, except 'prefix' is + * replaced with some other fields. This also means the + * filename is stored as in old-style archives. + */ + + /* Grab fields common to all tar variants. */ + header_common(a, tar, entry, st, h); + + /* Copy filename over (to ensure null termination). */ + header = h; + archive_strncpy(&(tar->entry_name), header->name, + sizeof(header->name)); + archive_entry_set_pathname(entry, tar->entry_name.s); + + /* Fields common to ustar and GNU */ + /* XXX Can the following be factored out since it's common + * to ustar and gnu tar? Is it okay to move it down into + * header_common, perhaps? */ + archive_strncpy(&(tar->entry_uname), + header->uname, sizeof(header->uname)); + archive_entry_set_uname(entry, tar->entry_uname.s); + + archive_strncpy(&(tar->entry_gname), + header->gname, sizeof(header->gname)); + archive_entry_set_gname(entry, tar->entry_gname.s); + + /* Parse out device numbers only for char and block specials */ + if (header->typeflag[0] == '3' || header->typeflag[0] == '4') + st->st_rdev = makedev ( + tar_atol(header->rdevmajor, sizeof(header->rdevmajor)), + tar_atol(header->rdevminor, sizeof(header->rdevminor))); + else + st->st_rdev = 0; + + tar->entry_bytes_remaining = st->st_size; + tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); + + /* Grab GNU-specific fields. */ + st->st_atime = tar_atol(header->atime, sizeof(header->atime)); + st->st_ctime = tar_atol(header->ctime, sizeof(header->ctime)); + if (header->realsize[0] != 0) { + st->st_size = tar_atol(header->realsize, + sizeof(header->realsize)); + } + + if (header->sparse[0].offset[0] != 0) { + gnu_read_sparse_data(a, tar, header); + } else { + if (header->isextended[0] != 0) { + /* XXX WTF? XXX */ + } + } + + return (0); +} + +static int +gnu_read_sparse_data(struct archive *a, struct tar *tar, + const struct archive_entry_header_gnutar *header) +{ + ssize_t bytes_read; + const void *data; + struct extended { + struct gnu_sparse sparse[21]; + char isextended[1]; + char padding[7]; + }; + const struct extended *ext; + + gnu_parse_sparse_data(a, tar, header->sparse, 4); + if (header->isextended[0] == 0) + return (ARCHIVE_OK); + + do { + bytes_read = (a->compression_read_ahead)(a, &data, 512); + if (bytes_read < 0) + return (ARCHIVE_FATAL); + if (bytes_read < 512) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated tar archive " + "detected while reading sparse file data"); + return (ARCHIVE_FATAL); + } + (a->compression_read_consume)(a, 512); + ext = (const struct extended *)data; + gnu_parse_sparse_data(a, tar, ext->sparse, 21); + } while (ext->isextended[0] != 0); + if (tar->sparse_list != NULL) + tar->entry_offset = tar->sparse_list->offset; + return (ARCHIVE_OK); +} + +static void +gnu_parse_sparse_data(struct archive *a, struct tar *tar, + const struct gnu_sparse *sparse, int length) +{ + struct sparse_block *last; + struct sparse_block *p; + + (void)a; /* UNUSED */ + + last = tar->sparse_list; + while (last != NULL && last->next != NULL) + last = last->next; + + while (length > 0 && sparse->offset[0] != 0) { + p = malloc(sizeof(*p)); + if (p == NULL) + __archive_errx(1, "Out of memory"); + memset(p, 0, sizeof(*p)); + if (last != NULL) + last->next = p; + else + tar->sparse_list = p; + last = p; + p->offset = tar_atol(sparse->offset, sizeof(sparse->offset)); + p->remaining = + tar_atol(sparse->numbytes, sizeof(sparse->numbytes)); + sparse++; + length--; + } +} + +/*- + * Convert text->integer. + * + * Traditional tar formats (including POSIX) specify base-8 for + * all of the standard numeric fields. This is a significant limitation + * in practice: + * = file size is limited to 8GB + * = rdevmajor and rdevminor are limited to 21 bits + * = uid/gid are limited to 21 bits + * + * There are two workarounds for this: + * = pax extended headers, which use variable-length string fields + * = GNU tar and STAR both allow either base-8 or base-256 in + * most fields. The high bit is set to indicate base-256. + * + * On read, this implementation supports both extensions. + */ +static int64_t +tar_atol(const char *p, unsigned char_cnt) +{ + /* + * Technically, GNU tar considers a field to be in base-256 + * only if the first byte is 0xff or 0x80. + */ + if (*p & 0x80) + return (tar_atol256(p, char_cnt)); + return (tar_atol8(p, char_cnt)); +} + +/* + * Note that this implementation does not (and should not!) obey + * locale settings; you cannot simply substitute strtol here, since + * it does obey locale. + */ +static int64_t +tar_atol8(const char *p, unsigned char_cnt) +{ + int64_t l, limit, last_digit_limit; + int digit, sign, base; + + base = 8; + limit = max_int64 / base; + last_digit_limit = max_int64 % base; + + while (*p == ' ' || *p == '\t') + p++; + if (*p == '-') { + sign = -1; + p++; + } else + sign = 1; + + l = 0; + digit = *p - '0'; + while (digit >= 0 && digit < base && char_cnt-- > 0) { + if (l>limit || (l == limit && digit > last_digit_limit)) { + l = max_uint64; /* Truncate on overflow. */ + break; + } + l = (l * base) + digit; + digit = *++p - '0'; + } + return (sign < 0) ? -l : l; +} + + +/* + * Note that this implementation does not (and should not!) obey + * locale settings; you cannot simply substitute strtol here, since + * it does obey locale. + */ +static int64_t +tar_atol10(const wchar_t *p, unsigned char_cnt) +{ + int64_t l, limit, last_digit_limit; + int base, digit, sign; + + base = 10; + limit = max_int64 / base; + last_digit_limit = max_int64 % base; + + while (*p == ' ' || *p == '\t') + p++; + if (*p == '-') { + sign = -1; + p++; + } else + sign = 1; + + l = 0; + digit = *p - '0'; + while (digit >= 0 && digit < base && char_cnt-- > 0) { + if (l > limit || (l == limit && digit > last_digit_limit)) { + l = max_uint64; /* Truncate on overflow. */ + break; + } + l = (l * base) + digit; + digit = *++p - '0'; + } + return (sign < 0) ? -l : l; +} + +/* + * Parse a base-256 integer. This is just a straight signed binary + * value in big-endian order, except that the high-order bit is + * ignored. Remember that "int64_t" may or may not be exactly 64 + * bits; the implementation here tries to avoid making any assumptions + * about the actual size of an int64_t. It does assume we're using + * twos-complement arithmetic, though. + */ +static int64_t +tar_atol256(const char *_p, unsigned char_cnt) +{ + int64_t l, upper_limit, lower_limit; + const unsigned char *p = (const unsigned char *)_p; + + upper_limit = max_int64 / 256; + lower_limit = min_int64 / 256; + + /* Pad with 1 or 0 bits, depending on sign. */ + if ((0x40 & *p) == 0x40) + l = (int64_t)-1; + else + l = 0; + l = (l << 6) | (0x3f & *p++); + while (--char_cnt > 0) { + if (l > upper_limit) { + l = max_int64; /* Truncate on overflow */ + break; + } else if (l < lower_limit) { + l = min_int64; + break; + } + l = (l << 8) | (0xff & (int64_t)*p++); + } + return (l); +} + +static int +utf8_decode(wchar_t *dest, const char *src, size_t length) +{ + size_t n; + int err; + + err = 0; + while (length > 0) { + n = UTF8_mbrtowc(dest, src, length); + if (n == 0) + break; + dest++; + src += n; + length -= n; + } + *dest++ = L'\0'; + return (err); +} + +/* + * Copied and simplified from FreeBSD libc/locale. + */ +static size_t +UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n) +{ + int ch, i, len, mask; + unsigned long wch; + + if (s == NULL || n == 0 || pwc == NULL) + return (0); + + /* + * Determine the number of octets that make up this character from + * the first octet, and a mask that extracts the interesting bits of + * the first octet. + */ + ch = (unsigned char)*s; + if ((ch & 0x80) == 0) { + mask = 0x7f; + len = 1; + } else if ((ch & 0xe0) == 0xc0) { + mask = 0x1f; + len = 2; + } else if ((ch & 0xf0) == 0xe0) { + mask = 0x0f; + len = 3; + } else if ((ch & 0xf8) == 0xf0) { + mask = 0x07; + len = 4; + } else if ((ch & 0xfc) == 0xf8) { + mask = 0x03; + len = 5; + } else if ((ch & 0xfe) == 0xfc) { + mask = 0x01; + len = 6; + } else { + /* Invalid first byte; convert to '?' */ + *pwc = '?'; + return (1); + } + + if (n < (size_t)len) { + /* Invalid first byte; convert to '?' */ + *pwc = '?'; + return (1); + } + + /* + * Decode the octet sequence representing the character in chunks + * of 6 bits, most significant first. + */ + wch = (unsigned char)*s++ & mask; + i = len; + while (--i != 0) { + if ((*s & 0xc0) != 0x80) { + /* Invalid intermediate byte; consume one byte and + * emit '?' */ + *pwc = '?'; + return (1); + } + wch <<= 6; + wch |= *s++ & 0x3f; + } + + /* Assign the value to the output; out-of-range values + * just get truncated. */ + *pwc = (wchar_t)wch; +#ifdef WCHAR_MAX + /* + * If platform has WCHAR_MAX, we can do something + * more sensible with out-of-range values. + */ + if (wch >= WCHAR_MAX) + *pwc = '?'; +#endif + /* Return number of bytes input consumed: 0 for end-of-string. */ + return (wch == L'\0' ? 0 : len); +} + + +/* + * base64_decode - Base64 decode + * + * This accepts most variations of base-64 encoding, including: + * * with or without line breaks + * * with or without the final group padded with '=' or '_' characters + * (The most economical Base-64 variant does not pad the last group and + * omits line breaks; RFC1341 used for MIME requires both.) + */ +static char * +base64_decode(const wchar_t *src, size_t len, size_t *out_len) +{ + static const unsigned char digits[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + static unsigned char decode_table[128]; + char *out, *d; + + /* If the decode table is not yet initialized, prepare it. */ + if (decode_table[digits[1]] != 1) { + size_t i; + memset(decode_table, 0xff, sizeof(decode_table)); + for (i = 0; i < sizeof(digits); i++) + decode_table[digits[i]] = i; + } + + /* Allocate enough space to hold the entire output. */ + /* Note that we may not use all of this... */ + out = malloc((len * 3 + 3) / 4); + if (out == NULL) { + *out_len = 0; + return (NULL); + } + d = out; + + while (len > 0) { + /* Collect the next group of (up to) four characters. */ + int v = 0; + int group_size = 0; + while (group_size < 4 && len > 0) { + /* '=' or '_' padding indicates final group. */ + if (*src == '=' || *src == '_') { + len = 0; + break; + } + /* Skip illegal characters (including line breaks) */ + if (*src > 127 || *src < 32 + || decode_table[*src] == 0xff) { + len--; + src++; + continue; + } + v <<= 6; + v |= decode_table[*src++]; + len --; + group_size++; + } + /* Align a short group properly. */ + v <<= 6 * (4 - group_size); + /* Unpack the group we just collected. */ + switch (group_size) { + case 4: d[2] = v & 0xff; + /* FALLTHROUGH */ + case 3: d[1] = (v >> 8) & 0xff; + /* FALLTHROUGH */ + case 2: d[0] = (v >> 16) & 0xff; + break; + case 1: /* this is invalid! */ + break; + } + d += group_size * 3 / 4; + } + + *out_len = d - out; + return (out); +} + +/* + * This is a little tricky because the C99 standard wcstombs() + * function returns the number of bytes that were converted, + * not the number that should be converted. As a result, + * we can never accurately size the output buffer (without + * doing a tedious output size calculation in advance). + * This approach (try a conversion, then try again if it fails) + * will almost always succeed on the first try, and is thus + * much faster, at the cost of sometimes requiring multiple + * passes while we expand the buffer. + */ +static char * +wide_to_narrow(const wchar_t *wval) +{ + int converted_length; + /* Guess an output buffer size and try the conversion. */ + int alloc_length = wcslen(wval) * 3; + char *mbs_val = malloc(alloc_length + 1); + if (mbs_val == NULL) + return (NULL); + converted_length = wcstombs(mbs_val, wval, alloc_length); + + /* If we exhausted the buffer, resize and try again. */ + while (converted_length >= alloc_length) { + free(mbs_val); + alloc_length *= 2; + mbs_val = malloc(alloc_length + 1); + if (mbs_val == NULL) + return (NULL); + converted_length = wcstombs(mbs_val, wval, alloc_length); + } + + /* Ensure a trailing null and return the final string. */ + mbs_val[alloc_length] = '\0'; + return (mbs_val); +} + +static char * +url_decode(const char *in) +{ + char *out, *d; + const char *s; + + out = malloc(strlen(in) + 1); + if (out == NULL) + return (NULL); + for (s = in, d = out; *s != '\0'; ) { + if (*s == '%') { + /* Try to convert % escape */ + int digit1 = tohex(s[1]); + int digit2 = tohex(s[2]); + if (digit1 >= 0 && digit2 >= 0) { + /* Looks good, consume three chars */ + s += 3; + /* Convert output */ + *d++ = ((digit1 << 4) | digit2); + continue; + } + /* Else fall through and treat '%' as normal char */ + } + *d++ = *s++; + } + *d = '\0'; + return (out); +} + +static int +tohex(int c) +{ + if (c >= '0' && c <= '9') + return (c - '0'); + else if (c >= 'A' && c <= 'F') + return (c - 'A' + 10); + else if (c >= 'a' && c <= 'f') + return (c - 'a' + 10); + else + return (-1); +} diff --git a/lib/libarchive/archive_read_support_format_zip.c b/lib/libarchive/archive_read_support_format_zip.c new file mode 100644 index 0000000..c29d11d --- /dev/null +++ b/lib/libarchive/archive_read_support_format_zip.c @@ -0,0 +1,797 @@ +/*- + * Copyright (c) 2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +struct zip { + /* entry_bytes_remaining is the number of bytes we expect. */ + off_t entry_bytes_remaining; + off_t entry_offset; + + /* These count the number of bytes actually read for the entry. */ + off_t entry_compressed_bytes_read; + off_t entry_uncompressed_bytes_read; + + unsigned version; + unsigned system; + unsigned flags; + unsigned compression; + const char * compression_name; + time_t mtime; + time_t ctime; + time_t atime; + mode_t mode; + uid_t uid; + gid_t gid; + + /* Flags to mark progress of decompression. */ + char decompress_init; + char end_of_entry; + char end_of_entry_cleanup; + + long crc32; + ssize_t filename_length; + ssize_t extra_length; + off_t uncompressed_size; + off_t compressed_size; + + unsigned char *uncompressed_buffer; + size_t uncompressed_buffer_size; +#ifdef HAVE_ZLIB_H + z_stream stream; +#endif + + struct archive_string pathname; + struct archive_string extra; + char format_name[64]; +}; + +#define ZIP_LENGTH_AT_END 8 + +struct zip_file_header { + char signature[4]; + char version[2]; + char flags[2]; + char compression[2]; + char timedate[4]; + char crc32[4]; + char compressed_size[4]; + char uncompressed_size[4]; + char filename_length[2]; + char extra_length[2]; +}; + +static const char *compression_names[] = { + "uncompressed", + "shrinking", + "reduced-1", + "reduced-2", + "reduced-3", + "reduced-4", + "imploded", + "reserved", + "deflation" +}; + +static int archive_read_format_zip_bid(struct archive *); +static int archive_read_format_zip_cleanup(struct archive *); +static int archive_read_format_zip_read_data(struct archive *, + const void **, size_t *, off_t *); +static int archive_read_format_zip_read_data_skip(struct archive *a); +static int archive_read_format_zip_read_header(struct archive *, + struct archive_entry *); +static int i2(const char *); +static int i4(const char *); +static unsigned int u2(const char *); +static unsigned int u4(const char *); +static uint64_t u8(const char *); +static int zip_read_data_deflate(struct archive *a, const void **buff, + size_t *size, off_t *offset); +static int zip_read_data_none(struct archive *a, const void **buff, + size_t *size, off_t *offset); +static int zip_read_file_header(struct archive *a, + struct archive_entry *entry, struct zip *zip); +static time_t zip_time(const char *); +static void process_extra(const void* extra, struct zip* zip); + +int +archive_read_support_format_zip(struct archive *a) +{ + struct zip *zip; + int r; + + zip = malloc(sizeof(*zip)); + if (zip == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate zip data"); + return (ARCHIVE_FATAL); + } + memset(zip, 0, sizeof(*zip)); + + r = __archive_read_register_format(a, + zip, + archive_read_format_zip_bid, + archive_read_format_zip_read_header, + archive_read_format_zip_read_data, + archive_read_format_zip_read_data_skip, + archive_read_format_zip_cleanup); + + if (r != ARCHIVE_OK) + free(zip); + return (ARCHIVE_OK); +} + + +static int +archive_read_format_zip_bid(struct archive *a) +{ + int bytes_read; + int bid = 0; + const void *h; + const char *p; + + if (a->archive_format == ARCHIVE_FORMAT_ZIP) + bid += 1; + + bytes_read = (a->compression_read_ahead)(a, &h, 4); + if (bytes_read < 4) + return (-1); + p = h; + + if (p[0] == 'P' && p[1] == 'K') { + bid += 16; + if (p[2] == '\001' && p[3] == '\002') + bid += 16; + else if (p[2] == '\003' && p[3] == '\004') + bid += 16; + else if (p[2] == '\005' && p[3] == '\006') + bid += 16; + else if (p[2] == '\007' && p[3] == '\010') + bid += 16; + } + return (bid); +} + +static int +archive_read_format_zip_read_header(struct archive *a, + struct archive_entry *entry) +{ + int bytes_read; + const void *h; + const char *signature; + struct zip *zip; + + a->archive_format = ARCHIVE_FORMAT_ZIP; + if (a->archive_format_name == NULL) + a->archive_format_name = "ZIP"; + + zip = *(a->pformat_data); + zip->decompress_init = 0; + zip->end_of_entry = 0; + zip->end_of_entry_cleanup = 0; + zip->entry_uncompressed_bytes_read = 0; + zip->entry_compressed_bytes_read = 0; + bytes_read = (a->compression_read_ahead)(a, &h, 4); + if (bytes_read < 4) + return (ARCHIVE_FATAL); + + signature = h; + if (signature[0] != 'P' || signature[1] != 'K') { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Bad ZIP file"); + return (ARCHIVE_FATAL); + } + + if (signature[2] == '\001' && signature[3] == '\002') { + /* Beginning of central directory. */ + return (ARCHIVE_EOF); + } + + if (signature[2] == '\003' && signature[3] == '\004') { + /* Regular file entry. */ + return (zip_read_file_header(a, entry, zip)); + } + + if (signature[2] == '\005' && signature[3] == '\006') { + /* End-of-archive record. */ + return (ARCHIVE_EOF); + } + + if (signature[2] == '\007' && signature[3] == '\010') { + /* + * We should never encounter this record here; + * see ZIP_LENGTH_AT_END handling below for details. + */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Bad ZIP file: Unexpected end-of-entry record"); + return (ARCHIVE_FATAL); + } + + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Damaged ZIP file or unsupported format variant (%d,%d)", + signature[2], signature[3]); + return (ARCHIVE_FATAL); +} + +int +zip_read_file_header(struct archive *a, struct archive_entry *entry, + struct zip *zip) +{ + const struct zip_file_header *p; + const void *h; + int bytes_read; + struct stat st; + + bytes_read = + (a->compression_read_ahead)(a, &h, sizeof(struct zip_file_header)); + if (bytes_read < (int)sizeof(struct zip_file_header)) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file header"); + return (ARCHIVE_FATAL); + } + p = h; + + zip->version = p->version[0]; + zip->system = p->version[1]; + zip->flags = i2(p->flags); + zip->compression = i2(p->compression); + if (zip->compression < + sizeof(compression_names)/sizeof(compression_names[0])) + zip->compression_name = compression_names[zip->compression]; + else + zip->compression_name = "??"; + zip->mtime = zip_time(p->timedate); + zip->ctime = 0; + zip->atime = 0; + zip->mode = 0; + zip->uid = 0; + zip->gid = 0; + zip->crc32 = i4(p->crc32); + zip->filename_length = i2(p->filename_length); + zip->extra_length = i2(p->extra_length); + zip->uncompressed_size = u4(p->uncompressed_size); + zip->compressed_size = u4(p->compressed_size); + + (a->compression_read_consume)(a, sizeof(struct zip_file_header)); + + + /* Read the filename. */ + bytes_read = (a->compression_read_ahead)(a, &h, zip->filename_length); + if (bytes_read < zip->filename_length) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file header"); + return (ARCHIVE_FATAL); + } + archive_string_ensure(&zip->pathname, zip->filename_length); + archive_strncpy(&zip->pathname, h, zip->filename_length); + (a->compression_read_consume)(a, zip->filename_length); + archive_entry_set_pathname(entry, zip->pathname.s); + + if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/') + zip->mode = S_IFDIR | 0777; + else + zip->mode = S_IFREG | 0777; + + /* Read the extra data. */ + bytes_read = (a->compression_read_ahead)(a, &h, zip->extra_length); + if (bytes_read < zip->extra_length) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file header"); + return (ARCHIVE_FATAL); + } + process_extra(h, zip); + (a->compression_read_consume)(a, zip->extra_length); + + /* Populate some additional entry fields: */ + memset(&st, 0, sizeof(st)); + st.st_mode = zip->mode; + st.st_uid = zip->uid; + st.st_gid = zip->gid; + st.st_mtime = zip->mtime; + st.st_ctime = zip->ctime; + st.st_atime = zip->atime; + st.st_size = zip->uncompressed_size; + archive_entry_copy_stat(entry, &st); + + zip->entry_bytes_remaining = zip->compressed_size; + zip->entry_offset = 0; + + /* Set up a more descriptive format name. */ + sprintf(zip->format_name, "ZIP %d.%d (%s)", + zip->version / 10, zip->version % 10, + zip->compression_name); + a->archive_format_name = zip->format_name; + + return (ARCHIVE_OK); +} + +/* Convert an MSDOS-style date/time into Unix-style time. */ +static time_t +zip_time(const char *p) +{ + int msTime, msDate; + struct tm ts; + + msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); + msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); + + memset(&ts, 0, sizeof(ts)); + ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ + ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ + ts.tm_mday = msDate & 0x1f; /* Day of month. */ + ts.tm_hour = (msTime >> 11) & 0x1f; + ts.tm_min = (msTime >> 5) & 0x3f; + ts.tm_sec = (msTime << 1) & 0x3e; + ts.tm_isdst = -1; + return mktime(&ts); +} + +static int +archive_read_format_zip_read_data(struct archive *a, + const void **buff, size_t *size, off_t *offset) +{ + int r; + struct zip *zip; + + zip = *(a->pformat_data); + + /* + * If we hit end-of-entry last time, clean up and return + * ARCHIVE_EOF this time. + */ + if (zip->end_of_entry) { + if (!zip->end_of_entry_cleanup) { + if (zip->flags & ZIP_LENGTH_AT_END) { + const void *h; + const char *p; + int bytes_read = + (a->compression_read_ahead)(a, &h, 16); + if (bytes_read < 16) { + archive_set_error(a, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP end-of-file record"); + return (ARCHIVE_FATAL); + } + p = h; + zip->crc32 = i4(p + 4); + zip->compressed_size = u4(p + 8); + zip->uncompressed_size = u4(p + 12); + bytes_read = (a->compression_read_consume)(a, 16); + } + + /* Check file size, CRC against these values. */ + if (zip->compressed_size != zip->entry_compressed_bytes_read) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "ZIP compressed data is wrong size"); + return (ARCHIVE_WARN); + } + if (zip->uncompressed_size != zip->entry_uncompressed_bytes_read) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "ZIP uncompressed data is wrong size"); + return (ARCHIVE_WARN); + } +/* TODO: Compute CRC. */ +/* + if (zip->crc32 != zip->entry_crc32_calculated) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "ZIP data CRC error"); + return (ARCHIVE_WARN); + } +*/ + /* End-of-entry cleanup done. */ + zip->end_of_entry_cleanup = 1; + } + return (ARCHIVE_EOF); + } + + switch(zip->compression) { + case 0: /* No compression. */ + r = zip_read_data_none(a, buff, size, offset); + break; + case 8: /* Deflate compression. */ + r = zip_read_data_deflate(a, buff, size, offset); + break; + default: /* Unsupported compression. */ + *buff = NULL; + *size = 0; + *offset = 0; + /* Return a warning. */ + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Unsupported ZIP compression method (%s)", + zip->compression_name); + if (zip->flags & ZIP_LENGTH_AT_END) { + /* + * ZIP_LENGTH_AT_END requires us to + * decompress the entry in order to + * skip it, but we don't know this + * compression method, so we give up. + */ + r = ARCHIVE_FATAL; + } else { + /* We know compressed size; just skip it. */ + archive_read_format_zip_read_data_skip(a); + r = ARCHIVE_WARN; + } + break; + } + return (r); +} + +/* + * Read "uncompressed" data. According to the current specification, + * if ZIP_LENGTH_AT_END is specified, then the size fields in the + * initial file header are supposed to be set to zero. This would, of + * course, make it impossible for us to read the archive, since we + * couldn't determine the end of the file data. Info-ZIP seems to + * include the real size fields both before and after the data in this + * case (the CRC only appears afterwards), so this works as you would + * expect. + * + * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets + * zip->end_of_entry if it consumes all of the data. + */ +static int +zip_read_data_none(struct archive *a, const void **buff, + size_t *size, off_t *offset) +{ + struct zip *zip; + ssize_t bytes_avail; + + zip = *(a->pformat_data); + + if (zip->entry_bytes_remaining == 0) { + *buff = NULL; + *size = 0; + *offset = zip->entry_offset; + zip->end_of_entry = 1; + return (ARCHIVE_OK); + } + /* + * Note: '1' here is a performance optimization. + * Recall that the decompression layer returns a count of + * available bytes; asking for more than that forces the + * decompressor to combine reads by copying data. + */ + bytes_avail = (a->compression_read_ahead)(a, buff, 1); + if (bytes_avail <= 0) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file data"); + return (ARCHIVE_FATAL); + } + if (bytes_avail > zip->entry_bytes_remaining) + bytes_avail = zip->entry_bytes_remaining; + (a->compression_read_consume)(a, bytes_avail); + *size = bytes_avail; + *offset = zip->entry_offset; + zip->entry_offset += *size; + zip->entry_bytes_remaining -= *size; + zip->entry_uncompressed_bytes_read += *size; + zip->entry_compressed_bytes_read += *size; + return (ARCHIVE_OK); +} + +#ifdef HAVE_ZLIB_H +static int +zip_read_data_deflate(struct archive *a, const void **buff, + size_t *size, off_t *offset) +{ + struct zip *zip; + ssize_t bytes_avail; + const void *compressed_buff; + int r; + + zip = *(a->pformat_data); + + /* If the buffer hasn't been allocated, allocate it now. */ + if (zip->uncompressed_buffer == NULL) { + zip->uncompressed_buffer_size = 32 * 1024; + zip->uncompressed_buffer + = malloc(zip->uncompressed_buffer_size); + if (zip->uncompressed_buffer == NULL) { + archive_set_error(a, ENOMEM, + "No memory for ZIP decompression"); + return (ARCHIVE_FATAL); + } + } + + /* If we haven't yet read any data, initialize the decompressor. */ + if (!zip->decompress_init) { + r = inflateInit2(&zip->stream, + -15 /* Don't check for zlib header */); + if (r != Z_OK) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Can't initialize ZIP decompression."); + return (ARCHIVE_FATAL); + } + zip->decompress_init = 1; + } + + /* + * Note: '1' here is a performance optimization. + * Recall that the decompression layer returns a count of + * available bytes; asking for more than that forces the + * decompressor to combine reads by copying data. + */ + bytes_avail = (a->compression_read_ahead)(a, &compressed_buff, 1); + if (bytes_avail <= 0) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file body"); + return (ARCHIVE_FATAL); + } + + /* + * A bug in zlib.h: stream.next_in should be marked 'const' + * but isn't (the library never alters data through the + * next_in pointer, only reads it). The result: this ugly + * cast to remove 'const'. + */ + zip->stream.next_in = (void *)(uintptr_t)(const void *)compressed_buff; + zip->stream.avail_in = bytes_avail; + zip->stream.total_in = 0; + zip->stream.next_out = zip->uncompressed_buffer; + zip->stream.avail_out = zip->uncompressed_buffer_size; + zip->stream.total_out = 0; + + r = inflate(&zip->stream, 0); + switch (r) { + case Z_OK: + break; + case Z_STREAM_END: + zip->end_of_entry = 1; + break; + case Z_MEM_ERROR: + archive_set_error(a, ENOMEM, + "Out of memory for ZIP decompression"); + return (ARCHIVE_FATAL); + default: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "ZIP decompression failed (%d)", r); + return (ARCHIVE_FATAL); + } + + /* Consume as much as the compressor actually used. */ + bytes_avail = zip->stream.total_in; + (a->compression_read_consume)(a, bytes_avail); + zip->entry_bytes_remaining -= bytes_avail; + zip->entry_compressed_bytes_read += bytes_avail; + + *offset = zip->entry_offset; + *size = zip->stream.total_out; + zip->entry_uncompressed_bytes_read += *size; + *buff = zip->uncompressed_buffer; + zip->entry_offset += *size; + return (ARCHIVE_OK); +} +#else +static int +zip_read_data_deflate(struct archive *a, const void **buff, + size_t *size, off_t *offset) +{ + int r; + + *buff = NULL; + *size = 0; + *offset = 0; + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "libarchive compiled without deflate support (no libz)"); + return (ARCHIVE_FATAL); +} +#endif + +static int +archive_read_format_zip_read_data_skip(struct archive *a) +{ + struct zip *zip; + const void *buff = NULL; + ssize_t bytes_avail; + + zip = *(a->pformat_data); + + /* + * If the length is at the end, we have no choice but + * to decompress all the data to find the end marker. + */ + if (zip->flags & ZIP_LENGTH_AT_END) { + size_t size; + off_t offset; + int r; + do { + r = archive_read_format_zip_read_data(a, &buff, + &size, &offset); + } while (r == ARCHIVE_OK); + return (r); + } + + /* + * If the length is at the beginning, we can skip the + * compressed data much more quickly. + */ + while (zip->entry_bytes_remaining > 0) { + bytes_avail = (a->compression_read_ahead)(a, &buff, 1); + if (bytes_avail <= 0) { + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP file body"); + return (ARCHIVE_FATAL); + } + if (bytes_avail > zip->entry_bytes_remaining) + bytes_avail = zip->entry_bytes_remaining; + (a->compression_read_consume)(a, bytes_avail); + zip->entry_bytes_remaining -= bytes_avail; + } + /* This entry is finished and done. */ + zip->end_of_entry_cleanup = zip->end_of_entry = 1; + return (ARCHIVE_OK); +} + +static int +archive_read_format_zip_cleanup(struct archive *a) +{ + struct zip *zip; + + zip = *(a->pformat_data); + if (zip->uncompressed_buffer != NULL) + free(zip->uncompressed_buffer); + archive_string_free(&(zip->pathname)); + archive_string_free(&(zip->extra)); + free(zip); + *(a->pformat_data) = NULL; + return (ARCHIVE_OK); +} + +static int +i2(const char *p) +{ + return ((0xff & (int)p[0]) + 256 * (0xff & (int)p[1])); +} + + +static int +i4(const char *p) +{ + return ((0xffff & i2(p)) + 0x10000 * (0xffff & i2(p+2))); +} + +static unsigned int +u2(const char *p) +{ + return ((0xff & (unsigned int)p[0]) + 256 * (0xff & (unsigned int)p[1])); +} + +static unsigned int +u4(const char *p) +{ + return u2(p) + 0x10000 * u2(p+2); +} + +static uint64_t +u8(const char *p) +{ + return u4(p) + 0x100000000LL * u4(p+4); +} + +/* + * The extra data is stored as a list of + * id1+size1+data1 + id2+size2+data2 ... + * triplets. id and size are 2 bytes each. + */ +static void +process_extra(const void* extra, struct zip* zip) +{ + int offset = 0; + const char *p = extra; + while (offset < zip->extra_length - 4) + { + unsigned short headerid = u2(p + offset); + unsigned short datasize = u2(p + offset + 2); + offset += 4; + if (offset + datasize > zip->extra_length) + break; +#ifdef DEBUG + fprintf(stderr, "Header id 0x%04x, length %d\n", + headerid, datasize); +#endif + switch (headerid) { + case 0x0001: + /* Zip64 extended information extra field. */ + if (datasize >= 8) + zip->uncompressed_size = u8(p + offset); + if (datasize >= 16) + zip->compressed_size = u8(p + offset + 8); + break; + case 0x5455: + { + /* Extended time field "UT". */ + int flags = p[offset]; + offset++; + datasize--; + /* Flag bits indicate which dates are present. */ + if (flags & 0x01) + { +#ifdef DEBUG + fprintf(stderr, "mtime: %d -> %d\n", + zip->mtime, i4(p + offset)); +#endif + if (datasize < 4) + break; + zip->mtime = i4(p + offset); + offset += 4; + datasize -= 4; + } + if (flags & 0x02) + { + if (datasize < 4) + break; + zip->atime = i4(p + offset); + offset += 4; + datasize -= 4; + } + if (flags & 0x04) + { + if (datasize < 4) + break; + zip->ctime = i4(p + offset); + offset += 4; + datasize -= 4; + } + break; + } + case 0x7855: + /* Info-ZIP Unix Extra Field (type 2) "Ux". */ +#ifdef DEBUG + fprintf(stderr, "uid %d gid %d\n", + i2(p + offset), i2(p + offset + 2)); +#endif + if (datasize >= 2) + zip->uid = i2(p + offset); + if (datasize >= 4) + zip->gid = i2(p + offset + 2); + break; + default: + break; + } + offset += datasize; + } +#ifdef DEBUG + if (offset != zip->extra_length) + { + fprintf(stderr, + "Extra data field contents do not match reported size!"); + } +#endif +} diff --git a/lib/libarchive/archive_string.c b/lib/libarchive/archive_string.c new file mode 100644 index 0000000..b18f31d --- /dev/null +++ b/lib/libarchive/archive_string.c @@ -0,0 +1,113 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +/* + * Basic resizable string support, to simplify manipulating arbitrary-sized + * strings while minimizing heap activity. + */ + +#include <stdlib.h> +#include <string.h> + +#include "archive_private.h" +#include "archive_string.h" + +struct archive_string * +__archive_string_append(struct archive_string *as, const char *p, size_t s) +{ + __archive_string_ensure(as, as->length + s + 1); + memcpy(as->s + as->length, p, s); + as->s[as->length + s] = 0; + as->length += s; + return (as); +} + +void +__archive_string_free(struct archive_string *as) +{ + as->length = 0; + as->buffer_length = 0; + if (as->s != NULL) + free(as->s); +} + +struct archive_string * +__archive_string_ensure(struct archive_string *as, size_t s) +{ + if (as->s && (s <= as->buffer_length)) + return (as); + + if (as->buffer_length < 32) + as->buffer_length = 32; + while (as->buffer_length < s) + as->buffer_length *= 2; + as->s = realloc(as->s, as->buffer_length); + /* TODO: Return null instead and fix up all of our callers to + * handle this correctly. */ + if (as->s == NULL) + __archive_errx(1, "Out of memory"); + return (as); +} + +struct archive_string * +__archive_strncat(struct archive_string *as, const char *p, size_t n) +{ + size_t s; + const char *pp; + + /* Like strlen(p), except won't examine positions beyond p[n]. */ + s = 0; + pp = p; + while (*pp && s < n) { + pp++; + s++; + } + return (__archive_string_append(as, p, s)); +} + +struct archive_string * +__archive_strappend_char(struct archive_string *as, char c) +{ + return (__archive_string_append(as, &c, 1)); +} + +struct archive_string * +__archive_strappend_int(struct archive_string *as, int d, int base) +{ + static const char *digits = "0123457890abcdef"; + + if (d < 0) { + __archive_strappend_char(as, '-'); + d = -d; + } + if (d >= base) + __archive_strappend_int(as, d/base, base); + __archive_strappend_char(as, digits[d % base]); + return (as); +} diff --git a/lib/libarchive/archive_string.h b/lib/libarchive/archive_string.h new file mode 100644 index 0000000..c9d1c00 --- /dev/null +++ b/lib/libarchive/archive_string.h @@ -0,0 +1,112 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef ARCHIVE_STRING_H_INCLUDED +#define ARCHIVE_STRING_H_INCLUDED + +#include <stdarg.h> +#include <string.h> + +/* + * Basic resizable/reusable string support a la Java's "StringBuffer." + * + * Unlike sbuf(9), the buffers here are fully reusable and track the + * length throughout. + * + * Note that all visible symbols here begin with "__archive" as they + * are internal symbols not intended for anyone outside of this library + * to see or use. + */ + +struct archive_string { + char *s; /* Pointer to the storage */ + size_t length; /* Length of 's' */ + size_t buffer_length; /* Length of malloc-ed storage */ +}; + +/* Initialize an archive_string object on the stack or elsewhere. */ +#define archive_string_init(a) \ + do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0) + +/* Append a C char to an archive_string, resizing as necessary. */ +struct archive_string * +__archive_strappend_char(struct archive_string *, char); +#define archive_strappend_char __archive_strappend_char + +/* Append a char to an archive_string using UTF8. */ +struct archive_string * +__archive_strappend_char_UTF8(struct archive_string *, int); +#define archive_strappend_char_UTF8 __archive_strappend_char_UTF8 + +/* Append an integer in the specified base (2 <= base <= 16). */ +struct archive_string * +__archive_strappend_int(struct archive_string *as, int d, int base); +#define archive_strappend_int __archive_strappend_int + +/* Basic append operation. */ +struct archive_string * +__archive_string_append(struct archive_string *as, const char *p, size_t s); + +/* Ensure that the underlying buffer is at least as large as the request. */ +struct archive_string * +__archive_string_ensure(struct archive_string *, size_t); +#define archive_string_ensure __archive_string_ensure + +/* Append C string, which may lack trailing \0. */ +struct archive_string * +__archive_strncat(struct archive_string *, const char *, size_t); +#define archive_strncat __archive_strncat + +/* Append a C string to an archive_string, resizing as necessary. */ +#define archive_strcat(as,p) __archive_string_append((as),(p),strlen(p)) + +/* Copy a C string to an archive_string, resizing as necessary. */ +#define archive_strcpy(as,p) \ + ((as)->length = 0, __archive_string_append((as), (p), strlen(p))) + +/* Copy a C string to an archive_string with limit, resizing as necessary. */ +#define archive_strncpy(as,p,l) \ + ((as)->length=0, archive_strncat((as), (p), (l))) + +/* Return length of string. */ +#define archive_strlen(a) ((a)->length) + +/* Set string length to zero. */ +#define archive_string_empty(a) ((a)->length = 0) + +/* Release any allocated storage resources. */ +void __archive_string_free(struct archive_string *); +#define archive_string_free __archive_string_free + +/* Like 'vsprintf', but resizes the underlying string as necessary. */ +void __archive_string_vsprintf(struct archive_string *, const char *, + va_list); +#define archive_string_vsprintf __archive_string_vsprintf + +#endif diff --git a/lib/libarchive/archive_string_sprintf.c b/lib/libarchive/archive_string_sprintf.c new file mode 100644 index 0000000..64ca7d0 --- /dev/null +++ b/lib/libarchive/archive_string_sprintf.c @@ -0,0 +1,129 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +/* + * The use of printf()-family functions can be troublesome + * for space-constrained applications. In addition, correctly + * implementing this function in terms of vsnprintf() requires + * two calls (one to determine the size, another to format the + * result), which in turn requires duplicating the argument list + * using va_copy, which isn't yet universally available. + * + * So, I've implemented a bare minimum of printf()-like capability + * here. This is only used to format error messages, so doesn't + * require any floating-point support or field-width handling. + */ + +#include <stdio.h> + +#include "archive_string.h" + +/* + * Like 'vsprintf', but ensures the target is big enough, resizing if + * necessary. + */ +void +__archive_string_vsprintf(struct archive_string *as, const char *fmt, + va_list ap) +{ + char long_flag; + intmax_t s; /* Signed integer temp. */ + uintmax_t u; /* Unsigned integer temp. */ + const char *p, *p2; + + __archive_string_ensure(as, 64); + + if (fmt == NULL) { + as->s[0] = 0; + return; + } + + long_flag = '\0'; + for (p = fmt; *p != '\0'; p++) { + const char *saved_p = p; + + if (*p != '%') { + archive_strappend_char(as, *p); + continue; + } + + p++; + + switch(*p) { + case 'j': + long_flag = 'j'; + p++; + break; + case 'l': + long_flag = 'l'; + p++; + break; + } + + switch (*p) { + case '%': + __archive_strappend_char(as, '%'); + break; + case 'c': + s = va_arg(ap, int); + __archive_strappend_char(as, s); + break; + case 'd': + switch(long_flag) { + case 'j': s = va_arg(ap, intmax_t); break; + case 'l': s = va_arg(ap, long); break; + default: s = va_arg(ap, int); break; + } + archive_strappend_int(as, s, 10); + break; + case 's': + p2 = va_arg(ap, char *); + archive_strcat(as, p2); + break; + case 'o': case 'u': case 'x': case 'X': + /* Common handling for unsigned integer formats. */ + switch(long_flag) { + case 'j': u = va_arg(ap, uintmax_t); break; + case 'l': u = va_arg(ap, unsigned long); break; + default: u = va_arg(ap, unsigned int); break; + } + /* Format it in the correct base. */ + switch (*p) { + case 'o': archive_strappend_int(as, u, 8); break; + case 'u': archive_strappend_int(as, u, 10); break; + default: archive_strappend_int(as, u, 16); break; + } + break; + default: + /* Rewind and print the initial '%' literally. */ + p = saved_p; + archive_strappend_char(as, *p); + } + } +} diff --git a/lib/libarchive/archive_util.3 b/lib/libarchive/archive_util.3 new file mode 100644 index 0000000..4646ac9 --- /dev/null +++ b/lib/libarchive/archive_util.3 @@ -0,0 +1,135 @@ +.\" Copyright (c) 2003-2004 Tim Kientzle +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 8, 2005 +.Dt archive_util 3 +.Os +.Sh NAME +.Nm archive_compression , +.Nm archive_compression_name , +.Nm archive_errno , +.Nm archive_error_string , +.Nm archive_format , +.Nm archive_format_name , +.Nm archive_set_error +.Nd libarchive utility functions +.Sh SYNOPSIS +.In archive.h +.Ft int +.Fn archive_compression "struct archive *" +.Ft const char * +.Fn archive_compression_name "struct archive *" +.Ft int +.Fn archive_errno "struct archive *" +.Ft const char * +.Fn archive_error_string "struct archive *" +.Ft int +.Fn archive_format "struct archive *" +.Ft const char * +.Fn archive_format_name "struct archive *" +.Ft void +.Fn archive_set_error "struct archive *" "int error_code" "const char *fmt" "..." +.Sh DESCRIPTION +These functions provide access to various information about the +.Tn struct archive +object used in the +.Xr libarchive 3 +library. +.Bl -tag -compact -width indent +.It Fn archive_compression +Returns a numeric code indicating the current compression. +This value is set by +.Fn archive_read_open . +.It Fn archive_compression_name +Returns a text description of the current compression suitable for display. +.It Fn archive_errno +Returns a numeric error code (see +.Xr errno 2 ) +indicating the reason for the most recent error return. +.It Fn archive_error_string +Returns a textual error message suitable for display. +The error message here is usually more specific than that +obtained from passing the result of +.Fn archive_errno +to +.Xr strerror 3 . +.It Fn archive_format +Returns a numeric code indicating the format of the current +archive entry. +This value is set by a successful call to +.Fn archive_read_next_header . +Note that it is common for this value to change from +entry to entry. +For example, a tar archive might have several entries that +utilize GNU tar extensions and several entries that do not. +These entries will have different format codes. +.It Fn archive_format_name +A textual description of the format of the current entry. +.It Fn archive_set_error +Sets the numeric error code and error description that will be returned +by +.Fn archive_errno +and +.Fn archive_error_string . +This function should be used within I/O callbacks to set system-specific +error codes and error descriptions. +This function accepts a printf-like format string and arguments. +However, you should be careful to use only the following printf +format specifiers: +.Dq %c , +.Dq %d , +.Dq %jd , +.Dq %jo , +.Dq %ju , +.Dq %jx , +.Dq %ld , +.Dq %lo , +.Dq %lu , +.Dq %lx , +.Dq %o , +.Dq %u , +.Dq %s , +.Dq %x , +.Dq %% . +Field-width specifiers and other printf features are +not uniformly supported and should not be used. +.El +.Sh SEE ALSO +.Xr archive_read 3 , +.Xr archive_write 3 , +.Xr libarchive 3 , +.Xr printf 3 +.Sh HISTORY +The +.Nm libarchive +library first appeared in +.Fx 5.3 . +.Sh AUTHORS +.An -nosplit +The +.Nm libarchive +library was written by +.An Tim Kientzle Aq kientzle@acm.org . diff --git a/lib/libarchive/archive_util.c b/lib/libarchive/archive_util.c new file mode 100644 index 0000000..1567234 --- /dev/null +++ b/lib/libarchive/archive_util.c @@ -0,0 +1,161 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <stdlib.h> +#include <string.h> + +#include "archive.h" +#include "archive_private.h" + +int +archive_api_feature(void) +{ + return (ARCHIVE_API_FEATURE); +} + +int +archive_api_version(void) +{ + return (ARCHIVE_API_VERSION); +} + +const char * +archive_version(void) +{ + return (PACKAGE_NAME " " PACKAGE_VERSION); +} + +int +archive_errno(struct archive *a) +{ + return (a->archive_error_number); +} + +const char * +archive_error_string(struct archive *a) +{ + + if (a->error != NULL && *a->error != '\0') + return (a->error); + else + return ("(Empty error message)"); +} + + +int +archive_format(struct archive *a) +{ + return (a->archive_format); +} + +const char * +archive_format_name(struct archive *a) +{ + return (a->archive_format_name); +} + + +int +archive_compression(struct archive *a) +{ + return (a->compression_code); +} + +const char * +archive_compression_name(struct archive *a) +{ + return (a->compression_name); +} + + +/* + * Return a count of the number of compressed bytes processed. + */ +int64_t +archive_position_compressed(struct archive *a) +{ + return (a->raw_position); +} + +/* + * Return a count of the number of uncompressed bytes processed. + */ +int64_t +archive_position_uncompressed(struct archive *a) +{ + return (a->file_position); +} + + +void +archive_set_error(struct archive *a, int error_number, const char *fmt, ...) +{ + va_list ap; +#ifdef HAVE_STRERROR_R + char errbuff[512]; +#endif + char *errp; + + a->archive_error_number = error_number; + if (fmt == NULL) { + a->error = NULL; + return; + } + + va_start(ap, fmt); + archive_string_vsprintf(&(a->error_string), fmt, ap); + if (error_number > 0) { + archive_strcat(&(a->error_string), ": "); +#ifdef HAVE_STRERROR_R +#ifdef STRERROR_R_CHAR_P + errp = strerror_r(error_number, errbuff, sizeof(errbuff)); +#else + strerror_r(error_number, errbuff, sizeof(errbuff)); + errp = errbuff; +#endif +#else + /* Note: this is not threadsafe! */ + errp = strerror(error_number); +#endif + archive_strcat(&(a->error_string), errp); + } + a->error = a->error_string.s; + va_end(ap); +} + +void +__archive_errx(int retvalue, const char *msg) +{ + static const char *msg1 = "Fatal Internal Error in libarchive: "; + write(2, msg1, strlen(msg1)); + write(2, msg, strlen(msg)); + write(2, "\n", 1); + exit(retvalue); +} diff --git a/lib/libarchive/archive_write.3 b/lib/libarchive/archive_write.3 new file mode 100644 index 0000000..f32bfcb --- /dev/null +++ b/lib/libarchive/archive_write.3 @@ -0,0 +1,433 @@ +.\" Copyright (c) 2003-2005 Tim Kientzle +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 8, 2005 +.Dt archive_write 3 +.Os +.Sh NAME +.Nm archive_write_new , +.Nm archive_write_set_format_cpio , +.Nm archive_write_set_format_pax , +.Nm archive_write_set_format_pax_restricted , +.Nm archive_write_set_format_shar , +.Nm archive_write_set_format_shar_binary , +.Nm archive_write_set_format_ustar , +.Nm archive_write_set_bytes_per_block , +.Nm archive_write_set_bytes_in_last_block , +.Nm archive_write_set_compressor_gzip , +.Nm archive_write_set_compressor_bzip2 , +.Nm archive_write_open , +.Nm archive_write_open_fd , +.Nm archive_write_open_file , +.Nm archive_write_prepare , +.Nm archive_write_header , +.Nm archive_write_data , +.Nm archive_write_close , +.Nm archive_write_finish +.Nd functions for creating archives +.Sh SYNOPSIS +.In archive.h +.Ft struct archive * +.Fn archive_write_new "void" +.Ft int +.Fn archive_write_set_bytes_per_block "struct archive *" "int bytes_per_block" +.Ft int +.Fn archive_write_set_bytes_in_last_block "struct archive *" "int" +.Ft int +.Fn archive_write_set_compressor_gzip "struct archive *" +.Ft int +.Fn archive_write_set_compressor_bzip2 "struct archive *" +.Ft int +.Fn archive_write_set_format_cpio "struct archive *" +.Ft int +.Fn archive_write_set_format_pax "struct archive *" +.Ft int +.Fn archive_write_set_format_pax_restricted "struct archive *" +.Ft int +.Fn archive_write_set_format_shar "struct archive *" +.Ft int +.Fn archive_write_set_format_shar_binary "struct archive *" +.Ft int +.Fn archive_write_set_format_ustar "struct archive *" +.Ft int +.Fn archive_write_open "struct archive *" "void *client_data" "archive_open_callback *" "archive_write_callback *" "archive_close_callback *" +.Ft int +.Fn archive_write_open_fd "struct archive *" "int fd" +.Ft int +.Fn archive_write_open_file "struct archive *" "const char *filename" +.Ft int +.Fn archive_write_header "struct archive *" "struct archive_entry *" +.Ft int +.Fn archive_write_data "struct archive *" "const void *" "size_t" +.Ft int +.Fn archive_write_close "struct archive *" +.Ft void +.Fn archive_write_finish "struct archive *" +.Sh DESCRIPTION +These functions provide a complete API for creating streaming +archive files. +The general process is to first create the +.Tn struct archive +object, set any desired options, initialize the archive, append entries, then +close the archive and release all resources. +The following summary describes the functions in approximately +the order they are ordinarily used: +.Bl -tag -width indent +.It Fn archive_write_new +Allocates and initializes a +.Tn struct archive +object suitable for writing a tar archive. +.It Fn archive_write_set_bytes_per_block +Sets the block size used for writing the archive data. +Every call to the write callback function, except possibly the last one, will +use this value for the length. +The third parameter is a boolean that specifies whether or not the final block +written will be padded to the full block size. +If it is zero, the last block will not be padded. +If it is non-zero, padding will be added both before and after compression. +The default is to use a block size of 10240 bytes and to pad the last block. +.It Fn archive_write_set_bytes_in_last_block +Sets the block size used for writing the last block. +If this value is zero, the last block will be padded to the same size +as the other blocks. +Otherwise, the final block will be padded to a multiple of this size. +In particular, setting it to 1 will cause the final block to not be padded. +For compressed output, any padding generated by this option +is applied only after the compression. +The uncompressed data is always unpadded. +The default is to pad the last block to the full block size (note that +.Fn archive_write_open_file +will set this based on the file type). +Unlike the other +.Dq set +functions, this function can be called after the archive is opened. +.It Fn archive_write_set_format_cpio , Fn archive_write_set_format_pax , Fn archive_write_set_format_pax_restricted , Fn archive_write_set_format_shar , Fn archive_write_set_format_shar_binary , Fn archive_write_set_format_ustar +Sets the format that will be used for the archive. +The library can write +POSIX octet-oriented cpio format archives, +POSIX-standard +.Dq pax interchange +format archives, +traditional +.Dq shar +archives, +enhanced +.Dq binary +shar archives that store a variety of file attributes and handle binary files, +and +POSIX-standard +.Dq ustar +archives. +The pax interchange format is a backwards-compatible tar format that +adds key/value attributes to each entry and supports arbitrary +filenames, linknames, uids, sizes, etc. +.Dq Restricted pax interchange format +is the library default; this is the same as pax format, but suppresses +the pax extended header for most normal files. +In most cases, this will result in ordinary ustar archives. +.It Fn archive_write_set_compression_gzip , Fn archive_write_set_compression_bzip2 +The resulting archive will be compressed as specified. +Note that the compressed output is always properly blocked. +.It Fn archive_write_open +Freeze the settings, open the archive, and prepare for writing entries. +This is the most generic form of this function, which accepts +pointers to three callback functions which will be invoked by +the compression layer to write the constructed archive. +In order to support external compression programs, the compression +is permitted to fork and invoke the callbacks from a separate process. +In particular, clients should not assume that they can communicate +between the callbacks and the mainline code using shared variables. +(The standard gzip, bzip2, and "none" compression methods do not fork.) +.It Fn archive_write_open_fd +A convenience form of +.Fn archive_write_open +that accepts a file descriptor. +.It Fn archive_write_open_file +A convenience form of +.Fn archive_write_open +that accepts a filename. +A NULL argument indicates that the output should be written to standard output; +an argument of +.Dq - +will open a file with that name. +If you have not invoked +.Fn archive_write_set_bytes_in_last_block , +then +.Fn archive_write_open_file +will adjust the last-block padding depending on the file: +it will enable padding when writing to standard output or +to a character or block device node, it will disable padding otherwise. +You can override this by manually invoking +.Fn archive_write_set_bytes_in_last_block +either before or after calling +.Fn archive_write_open . +.It Fn archive_write_header +Build and write a header using the data in the provided +.Tn struct archive_entry +structure. +.It Fn archive_write_data +Write data corresponding to the header just written. +Returns number of bytes written or -1 on error. +.It Fn archive_write_close +Complete the archive and invoke the close callback. +.It Fn archive_write_finish +Invokes +.Fn archive_write_close +if it was not invoked manually, then release all resources. +.El +More information about the +.Va struct archive +object and the overall design of the library can be found in the +.Xr libarchive 3 +overview. +.Sh IMPLEMENTATION +Compression support is built-in to libarchive, which uses zlib and bzlib +to handle gzip and bzip2 compression, respectively. +.Sh CLIENT CALLBACKS +To use this library, you will need to define and register +callback functions that will be invoked to write data to the +resulting archive. +These functions are registered by calling +.Fn archive_write_open : +.Bl -item -offset indent +.It +.Ft typedef int +.Fn archive_open_callback "struct archive *" "void *client_data" +.El +.Pp +The open callback is invoked by +.Fn archive_write_open . +It should return +.Cm ARCHIVE_OK +if the underlying file or data source is successfully +opened. +If the open fails, it should call +.Fn archive_set_error +to register an error code and message and return +.Cm ARCHIVE_FATAL . +.Bl -item -offset indent +.It +.Ft typedef ssize_t +.Fn archive_write_callback "struct archive *" "void *client_data" "void *buffer" "size_t length" +.El +.Pp +The write callback is invoked whenever the library +needs to write raw bytes to the archive. +For correct blocking, each call to the write callback function +should translate into a single +.Xr write 2 +system call. +This is especially critical when writing archives to tape drives. +On success, the write callback should return the +number of bytes actually written. +On error, the callback should invoke +.Fn archive_set_error +to register an error code and message and return -1. +.Bl -item -offset indent +.It +.Ft typedef int +.Fn archive_close_callback "struct archive *" "void *client_data" +.El +.Pp +The close callback is invoked by archive_close when +the archive processing is complete. +The callback should return +.Cm ARCHIVE_OK +on success. +On failure, the callback should invoke +.Fn archive_set_error +to register an error code and message and +regurn +.Cm ARCHIVE_FATAL. +.Sh EXAMPLE +The following sketch illustrates basic usage of the library. +In this example, +the callback functions are simply wrappers around the standard +.Xr open 2 , +.Xr write 2 , +and +.Xr close 2 +system calls. +.Bd -literal -offset indent +#include <sys/stat.h> +#include <archive.h> +#include <archive_entry.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +struct mydata { + const char *name; + int fd; +}; + +int +myopen(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + + mydata->fd = open(mydata->name, O_WRONLY | O_CREAT, 0644); + if (mydata->fd >= 0) + return (ARCHIVE_OK); + else + return (ARCHIVE_FATAL); +} + +ssize_t +mywrite(struct archive *a, void *client_data, void *buff, size_t n) +{ + struct mydata *mydata = client_data; + + return (write(mydata->fd, buff, n)); +} + +int +myclose(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + + if (mydata->fd > 0) + close(mydata->fd); + return (0); +} + +void +write_archive(const char *outname, const char **filename) +{ + struct mydata *mydata = malloc(sizeof(struct mydata)); + struct archive *a; + struct archive_entry *entry; + struct stat st; + char buff[8192]; + int len; + int fd; + + a = archive_write_new(); + mydata->name = outname; + archive_write_set_compression_gzip(a); + archive_write_set_format_ustar(a); + archive_write_open(a, mydata, myopen, mywrite, myclose); + while (*filename) { + stat(*filename, &st); + entry = archive_entry_new(); + archive_entry_copy_stat(entry, &st); + archive_entry_set_pathname(entry, *filename); + archive_write_header(a, entry); + fd = open(*filename, O_RDONLY); + len = read(fd, buff, sizeof(buff)); + while ( len > 0 ) { + archive_write_data(a, buff, len); + len = read(fd, buff, sizeof(buff)); + } + archive_entry_free(entry); + filename++; + } + archive_write_finish(a); +} + +int main(int argc, const char **argv) +{ + const char *outname; + argv++; + outname = argv++; + write_archive(outname, argv); + return 0; +} +.Ed +.Sh RETURN VALUES +Most functions return zero on success, non-zero on error. +The +.Fn archive_errno +and +.Fn archive_error_string +functions can be used to retrieve an appropriate error code and a +textual error message. +.Pp +.Fn archive_write_new +returns a pointer to a newly-allocated +.Tn struct archive +object. +.Pp +.Fn archive_write_data +returns a count of the number of bytes actually written. +On error, -1 is returned and the +.Fn archive_errno +and +.Fn archive_error_string +functions will return appropriate values. +Note that if the client-provided write callback function +returns a non-zero value, that error will be propagated back to the caller +through whatever API function resulted in that call, which +may include +.Fn archive_write_header , +.Fn archive_write_data , +or +.Fn archive_write_close . +The client callback can call +.Fn archive_set_error +to provide values that can then be retrieved by +.Fn archive_errno +and +.Fn archive_error_string . +.Sh SEE ALSO +.Xr tar 1 , +.Xr libarchive 3 , +.Xr tar 5 +.Sh HISTORY +The +.Nm libarchive +library first appeared in +.Fx 5.3 . +.Sh AUTHORS +.An -nosplit +The +.Nm libarchive +library was written by +.An Tim Kientzle Aq kientzle@acm.org . +.Sh BUGS +There are many peculiar bugs in historic tar implementations that may cause +certain programs to reject archives written by this library. +For example, several historic implementations calculated header checksums +incorrectly and will thus reject valid archives; GNU tar does not fully support +pax interchange format; some old tar implementations required specific +field terminations. +.Pp +The default pax interchange format eliminates most of the historic +tar limitations and provides a generic key/value attribute facility +for vendor-defined extensions. +One oversight in POSIX is the failure to provide a standard attribute +for large device numbers. +This library uses +.Dq SCHILY.devminor +and +.Dq SCHILY.devmajor +for device numbers that exceed the range supported by the backwards-compatible +ustar header. +These keys are compatible with Joerg Schilling's +.Nm star +archiver. +Other implementations may not recognize these keys and will thus be unable +to correctly restore large device numbers archived by this library. diff --git a/lib/libarchive/archive_write.c b/lib/libarchive/archive_write.c new file mode 100644 index 0000000..73283e4 --- /dev/null +++ b/lib/libarchive/archive_write.c @@ -0,0 +1,229 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +/* + * This file contains the "essential" portions of the write API, that + * is, stuff that will essentially always be used by any client that + * actually needs to write a archive. Optional pieces have been, as + * far as possible, separated out into separate files to reduce + * needlessly bloating statically-linked clients. + */ + +#include <sys/wait.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +extern char **environ; + +/* + * Allocate, initialize and return an archive object. + */ +struct archive * +archive_write_new(void) +{ + struct archive *a; + unsigned char *nulls; + + a = malloc(sizeof(*a)); + if (a == NULL) + return (NULL); + memset(a, 0, sizeof(*a)); + a->magic = ARCHIVE_WRITE_MAGIC; + a->user_uid = geteuid(); + a->bytes_per_block = ARCHIVE_DEFAULT_BYTES_PER_BLOCK; + a->bytes_in_last_block = -1; /* Default */ + a->state = ARCHIVE_STATE_NEW; + a->pformat_data = &(a->format_data); + + /* Initialize a block of nulls for padding purposes. */ + a->null_length = 1024; + nulls = malloc(a->null_length); + if (nulls == NULL) { + free(a); + return (NULL); + } + memset(nulls, 0, a->null_length); + a->nulls = nulls; + /* + * Set default compression, but don't set a default format. + * Were we to set a default format here, we would force every + * client to link in support for that format, even if they didn't + * ever use it. + */ + archive_write_set_compression_none(a); + return (a); +} + + +/* + * Set the block size. Returns 0 if successful. + */ +int +archive_write_set_bytes_per_block(struct archive *a, int bytes_per_block) +{ + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_set_bytes_per_block"); + a->bytes_per_block = bytes_per_block; + return (ARCHIVE_OK); +} + + +/* + * Set the size for the last block. + * Returns 0 if successful. + */ +int +archive_write_set_bytes_in_last_block(struct archive *a, int bytes) +{ + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_ANY, "archive_write_set_bytes_in_last_block"); + a->bytes_in_last_block = bytes; + return (ARCHIVE_OK); +} + + +/* + * Open the archive using the current settings. + */ +int +archive_write_open(struct archive *a, void *client_data, + archive_open_callback *opener, archive_write_callback *writer, + archive_close_callback *closer) +{ + int ret; + + ret = ARCHIVE_OK; + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_open"); + archive_string_empty(&a->error_string); + a->state = ARCHIVE_STATE_HEADER; + a->client_data = client_data; + a->client_writer = writer; + a->client_opener = opener; + a->client_closer = closer; + ret = (a->compression_init)(a); + if (a->format_init && ret == ARCHIVE_OK) + ret = (a->format_init)(a); + return (ret); +} + + +/* + * Close out the archive. + * + * Be careful: user might just call write_new and then write_finish. + * Don't assume we actually wrote anything or performed any non-trivial + * initialization. + */ +int +archive_write_close(struct archive *a) +{ + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_ANY, "archive_write_close"); + + /* Finish the last entry. */ + if (a->state & ARCHIVE_STATE_DATA) + ((a->format_finish_entry)(a)); + + /* Finish off the archive. */ + if (a->format_finish != NULL) + (a->format_finish)(a); + + /* Finish the compression and close the stream. */ + if (a->compression_finish != NULL) + (a->compression_finish)(a); + + a->state = ARCHIVE_STATE_CLOSED; + return (ARCHIVE_OK); +} + +/* + * Destroy the archive structure. + */ +void +archive_write_finish(struct archive *a) +{ + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_ANY, "archive_write_finish"); + if (a->state != ARCHIVE_STATE_CLOSED) + archive_write_close(a); + + /* Release various dynamic buffers. */ + free((void *)(uintptr_t)(const void *)a->nulls); + archive_string_free(&a->error_string); + a->magic = 0; + free(a); +} + + +/* + * Write the appropriate header. + */ +int +archive_write_header(struct archive *a, struct archive_entry *entry) +{ + int ret; + + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_write_header"); + archive_string_empty(&a->error_string); + + /* Finish last entry. */ + if (a->state & ARCHIVE_STATE_DATA) + ((a->format_finish_entry)(a)); + + if (archive_entry_dev(entry) == a->skip_file_dev && + archive_entry_ino(entry) == a->skip_file_ino) { + archive_set_error(a, 0, "Can't add archive to itself"); + return (ARCHIVE_WARN); + } + + /* Format and write header. */ + ret = ((a->format_write_header)(a, entry)); + + a->state = ARCHIVE_STATE_DATA; + return (ret); +} + +/* + * Note that the compressor is responsible for blocking. + */ +/* Should be "ssize_t", but that breaks the ABI. <sigh> */ +int +archive_write_data(struct archive *a, const void *buff, size_t s) +{ + int ret; + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_DATA, "archive_write_data"); + archive_string_empty(&a->error_string); + ret = (a->format_write_data)(a, buff, s); + return (ret == ARCHIVE_OK ? (ssize_t)s : -1); +} diff --git a/lib/libarchive/archive_write_open_fd.c b/lib/libarchive/archive_write_open_fd.c new file mode 100644 index 0000000..58bfbd3 --- /dev/null +++ b/lib/libarchive/archive_write_open_fd.c @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +struct write_fd_data { + off_t offset; + int fd; +}; + +static int file_close(struct archive *, void *); +static int file_open(struct archive *, void *); +static ssize_t file_write(struct archive *, void *, void *buff, size_t); + +int +archive_write_open_fd(struct archive *a, int fd) +{ + struct write_fd_data *mine; + + mine = malloc(sizeof(*mine)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + mine->fd = fd; + return (archive_write_open(a, mine, + file_open, file_write, file_close)); +} + +static int +file_open(struct archive *a, void *client_data) +{ + struct write_fd_data *mine; + struct stat st, *pst; + + pst = NULL; + mine = client_data; + + /* + * If client hasn't explicitly set the last block handling, + * then set it here: If the output is a block or character + * device, pad the last block, otherwise leave it unpadded. + */ + if (mine->fd >= 0 && a->bytes_in_last_block < 0) { + /* Last block will be fully padded. */ + if (fstat(mine->fd, &st) == 0) { + pst = &st; + if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode) || + S_ISFIFO(st.st_mode)) + archive_write_set_bytes_in_last_block(a, 0); + else + archive_write_set_bytes_in_last_block(a, 1); + } + } + + if (mine->fd == 1) { + if (a->bytes_in_last_block < 0) /* Still default? */ + /* Last block will be fully padded. */ + archive_write_set_bytes_in_last_block(a, 0); + } + + if (mine->fd < 0) { + archive_set_error(a, errno, "Failed to open"); + return (ARCHIVE_FATAL); + } + + if (pst == NULL && fstat(mine->fd, &st) == 0) + pst = &st; + if (pst == NULL) { + archive_set_error(a, errno, "Couldn't stat fd %d", mine->fd); + return (ARCHIVE_FATAL); + } + + return (ARCHIVE_OK); +} + +static ssize_t +file_write(struct archive *a, void *client_data, void *buff, size_t length) +{ + struct write_fd_data *mine; + ssize_t bytesWritten; + + mine = client_data; + bytesWritten = write(mine->fd, buff, length); + if (bytesWritten <= 0) { + archive_set_error(a, errno, "Write error"); + return (-1); + } + return (bytesWritten); +} + +static int +file_close(struct archive *a, void *client_data) +{ + struct write_fd_data *mine = client_data; + + (void)a; /* UNUSED */ + free(mine); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_write_open_file.c b/lib/libarchive/archive_write_open_file.c new file mode 100644 index 0000000..65cdbb6 --- /dev/null +++ b/lib/libarchive/archive_write_open_file.c @@ -0,0 +1,166 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +struct write_file_data { + int fd; + char filename[1]; +}; + +static int file_close(struct archive *, void *); +static int file_open(struct archive *, void *); +static ssize_t file_write(struct archive *, void *, void *buff, size_t); + +int +archive_write_open_file(struct archive *a, const char *filename) +{ + struct write_file_data *mine; + + if (filename == NULL || filename[0] == '\0') { + mine = malloc(sizeof(*mine)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + mine->filename[0] = '\0'; /* Record that we're using stdout. */ + } else { + mine = malloc(sizeof(*mine) + strlen(filename)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + strcpy(mine->filename, filename); + } + mine->fd = -1; + return (archive_write_open(a, mine, + file_open, file_write, file_close)); +} + +static int +file_open(struct archive *a, void *client_data) +{ + int flags; + struct write_file_data *mine; + struct stat st, *pst; + + pst = NULL; + mine = client_data; + flags = O_WRONLY | O_CREAT | O_TRUNC; + + if (mine->filename[0] != '\0') { + mine->fd = open(mine->filename, flags, 0666); + + /* + * If client hasn't explicitly set the last block + * handling, then set it here: If the output is a + * block or character device, pad the last block, + * otherwise leave it unpadded. + */ + if (mine->fd >= 0 && a->bytes_in_last_block < 0) { + if (fstat(mine->fd, &st) == 0) { + pst = &st; + if (S_ISCHR(st.st_mode) || + S_ISBLK(st.st_mode) || + S_ISFIFO(st.st_mode)) + /* Pad last block. */ + archive_write_set_bytes_in_last_block(a, 0); + else + /* Don't pad last block. */ + archive_write_set_bytes_in_last_block(a, 1); + } + } + } else { + mine->fd = 1; + if (a->bytes_in_last_block < 0) /* Still default? */ + /* Last block will be fully padded. */ + archive_write_set_bytes_in_last_block(a, 0); + } + + if (mine->fd < 0) { + archive_set_error(a, errno, "Failed to open '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + + if (pst == NULL && fstat(mine->fd, &st) == 0) + pst = &st; + if (pst == NULL) { + archive_set_error(a, errno, "Couldn't stat '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + + /* + * If the output file is a regular file, don't add it to + * itself. If it's a device file, it's okay to add the device + * entry to the output archive. + */ + if (S_ISREG(pst->st_mode)) { + a->skip_file_dev = pst->st_dev; + a->skip_file_ino = pst->st_ino; + } + + return (ARCHIVE_OK); +} + +static ssize_t +file_write(struct archive *a, void *client_data, void *buff, size_t length) +{ + struct write_file_data *mine; + ssize_t bytesWritten; + + mine = client_data; + bytesWritten = write(mine->fd, buff, length); + if (bytesWritten <= 0) { + archive_set_error(a, errno, "Write error"); + return (-1); + } + return (bytesWritten); +} + +static int +file_close(struct archive *a, void *client_data) +{ + struct write_file_data *mine = client_data; + + (void)a; /* UNUSED */ + if (mine->filename[0] != '\0') + close(mine->fd); + free(mine); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_write_open_filename.c b/lib/libarchive/archive_write_open_filename.c new file mode 100644 index 0000000..65cdbb6 --- /dev/null +++ b/lib/libarchive/archive_write_open_filename.c @@ -0,0 +1,166 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_private.h" + +struct write_file_data { + int fd; + char filename[1]; +}; + +static int file_close(struct archive *, void *); +static int file_open(struct archive *, void *); +static ssize_t file_write(struct archive *, void *, void *buff, size_t); + +int +archive_write_open_file(struct archive *a, const char *filename) +{ + struct write_file_data *mine; + + if (filename == NULL || filename[0] == '\0') { + mine = malloc(sizeof(*mine)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + mine->filename[0] = '\0'; /* Record that we're using stdout. */ + } else { + mine = malloc(sizeof(*mine) + strlen(filename)); + if (mine == NULL) { + archive_set_error(a, ENOMEM, "No memory"); + return (ARCHIVE_FATAL); + } + strcpy(mine->filename, filename); + } + mine->fd = -1; + return (archive_write_open(a, mine, + file_open, file_write, file_close)); +} + +static int +file_open(struct archive *a, void *client_data) +{ + int flags; + struct write_file_data *mine; + struct stat st, *pst; + + pst = NULL; + mine = client_data; + flags = O_WRONLY | O_CREAT | O_TRUNC; + + if (mine->filename[0] != '\0') { + mine->fd = open(mine->filename, flags, 0666); + + /* + * If client hasn't explicitly set the last block + * handling, then set it here: If the output is a + * block or character device, pad the last block, + * otherwise leave it unpadded. + */ + if (mine->fd >= 0 && a->bytes_in_last_block < 0) { + if (fstat(mine->fd, &st) == 0) { + pst = &st; + if (S_ISCHR(st.st_mode) || + S_ISBLK(st.st_mode) || + S_ISFIFO(st.st_mode)) + /* Pad last block. */ + archive_write_set_bytes_in_last_block(a, 0); + else + /* Don't pad last block. */ + archive_write_set_bytes_in_last_block(a, 1); + } + } + } else { + mine->fd = 1; + if (a->bytes_in_last_block < 0) /* Still default? */ + /* Last block will be fully padded. */ + archive_write_set_bytes_in_last_block(a, 0); + } + + if (mine->fd < 0) { + archive_set_error(a, errno, "Failed to open '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + + if (pst == NULL && fstat(mine->fd, &st) == 0) + pst = &st; + if (pst == NULL) { + archive_set_error(a, errno, "Couldn't stat '%s'", + mine->filename); + return (ARCHIVE_FATAL); + } + + /* + * If the output file is a regular file, don't add it to + * itself. If it's a device file, it's okay to add the device + * entry to the output archive. + */ + if (S_ISREG(pst->st_mode)) { + a->skip_file_dev = pst->st_dev; + a->skip_file_ino = pst->st_ino; + } + + return (ARCHIVE_OK); +} + +static ssize_t +file_write(struct archive *a, void *client_data, void *buff, size_t length) +{ + struct write_file_data *mine; + ssize_t bytesWritten; + + mine = client_data; + bytesWritten = write(mine->fd, buff, length); + if (bytesWritten <= 0) { + archive_set_error(a, errno, "Write error"); + return (-1); + } + return (bytesWritten); +} + +static int +file_close(struct archive *a, void *client_data) +{ + struct write_file_data *mine = client_data; + + (void)a; /* UNUSED */ + if (mine->filename[0] != '\0') + close(mine->fd); + free(mine); + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_write_set_compression_bzip2.c b/lib/libarchive/archive_write_set_compression_bzip2.c new file mode 100644 index 0000000..429cc15 --- /dev/null +++ b/lib/libarchive/archive_write_set_compression_bzip2.c @@ -0,0 +1,343 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +/* Don't compile this if we don't have bzlib. */ +#if HAVE_BZLIB_H + +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <bzlib.h> + +#include "archive.h" +#include "archive_private.h" + +struct private_data { + bz_stream stream; + int64_t total_in; + char *compressed; + size_t compressed_buffer_size; +}; + + +/* + * Yuck. bzlib.h is not const-correct, so I need this one bit + * of ugly hackery to convert a const * pointer to a non-const pointer. + */ +#define SET_NEXT_IN(st,src) \ + (st)->stream.next_in = (void *)(uintptr_t)(const void *)(src) + +static int archive_compressor_bzip2_finish(struct archive *); +static int archive_compressor_bzip2_init(struct archive *); +static int archive_compressor_bzip2_write(struct archive *, const void *, + size_t); +static int drive_compressor(struct archive *, struct private_data *, + int finishing); + +/* + * Allocate, initialize and return an archive object. + */ +int +archive_write_set_compression_bzip2(struct archive *a) +{ + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_set_compression_bzip2"); + a->compression_init = &archive_compressor_bzip2_init; + a->compression_code = ARCHIVE_COMPRESSION_BZIP2; + a->compression_name = "bzip2"; + return (ARCHIVE_OK); +} + +/* + * Setup callback. + */ +static int +archive_compressor_bzip2_init(struct archive *a) +{ + int ret; + struct private_data *state; + + a->compression_code = ARCHIVE_COMPRESSION_BZIP2; + a->compression_name = "bzip2"; + + if (a->client_opener != NULL) { + ret = (a->client_opener)(a, a->client_data); + if (ret != 0) + return (ret); + } + + state = malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for compression"); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->compressed_buffer_size = a->bytes_per_block; + state->compressed = malloc(state->compressed_buffer_size); + + if (state->compressed == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for compression buffer"); + free(state); + return (ARCHIVE_FATAL); + } + + state->stream.next_out = state->compressed; + state->stream.avail_out = state->compressed_buffer_size; + a->compression_write = archive_compressor_bzip2_write; + a->compression_finish = archive_compressor_bzip2_finish; + + /* Initialize compression library */ + ret = BZ2_bzCompressInit(&(state->stream), 9, 0, 30); + if (ret == BZ_OK) { + a->compression_data = state; + return (ARCHIVE_OK); + } + + /* Library setup failed: clean up. */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library"); + free(state->compressed); + free(state); + + /* Override the error message if we know what really went wrong. */ + switch (ret) { + case BZ_PARAM_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid setup parameter"); + break; + case BZ_MEM_ERROR: + archive_set_error(a, ENOMEM, + "Internal error initializing compression library: " + "out of memory"); + break; + case BZ_CONFIG_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "mis-compiled library"); + break; + } + + return (ARCHIVE_FATAL); + +} + +/* + * Write data to the compressed stream. + * + * Returns ARCHIVE_OK if all data written, error otherwise. + */ +static int +archive_compressor_bzip2_write(struct archive *a, const void *buff, + size_t length) +{ + struct private_data *state; + + state = a->compression_data; + if (a->client_writer == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + /* Update statistics */ + state->total_in += length; + + /* Compress input data to output buffer */ + SET_NEXT_IN(state, buff); + state->stream.avail_in = length; + if (drive_compressor(a, state, 0)) + return (ARCHIVE_FATAL); + a->file_position += length; + return (ARCHIVE_OK); +} + + +/* + * Finish the compression. + */ +static int +archive_compressor_bzip2_finish(struct archive *a) +{ + ssize_t block_length; + int ret; + struct private_data *state; + ssize_t target_block_length; + ssize_t bytes_written; + unsigned tocopy; + + state = a->compression_data; + ret = ARCHIVE_OK; + if (a->client_writer == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered?\n" + "This is probably an internal programming error."); + ret = ARCHIVE_FATAL; + goto cleanup; + } + + /* By default, always pad the uncompressed data. */ + if (a->pad_uncompressed) { + tocopy = a->bytes_per_block - + (state->total_in % a->bytes_per_block); + while (tocopy > 0 && tocopy < (unsigned)a->bytes_per_block) { + SET_NEXT_IN(state, a->nulls); + state->stream.avail_in = tocopy < a->null_length ? + tocopy : a->null_length; + state->total_in += state->stream.avail_in; + tocopy -= state->stream.avail_in; + ret = drive_compressor(a, state, 0); + if (ret != ARCHIVE_OK) + goto cleanup; + } + } + + /* Finish compression cycle. */ + if ((ret = drive_compressor(a, state, 1))) + goto cleanup; + + /* Optionally, pad the final compressed block. */ + block_length = state->stream.next_out - state->compressed; + + + /* Tricky calculation to determine size of last block. */ + target_block_length = block_length; + if (a->bytes_in_last_block <= 0) + /* Default or Zero: pad to full block */ + target_block_length = a->bytes_per_block; + else + /* Round length to next multiple of bytes_in_last_block. */ + target_block_length = a->bytes_in_last_block * + ( (block_length + a->bytes_in_last_block - 1) / + a->bytes_in_last_block); + if (target_block_length > a->bytes_per_block) + target_block_length = a->bytes_per_block; + if (block_length < target_block_length) { + memset(state->stream.next_out, 0, + target_block_length - block_length); + block_length = target_block_length; + } + + /* Write the last block */ + bytes_written = (a->client_writer)(a, a->client_data, + state->compressed, block_length); + + /* TODO: Handle short write of final block. */ + if (bytes_written <= 0) + ret = ARCHIVE_FATAL; + else { + a->raw_position += ret; + ret = ARCHIVE_OK; + } + + /* Cleanup: shut down compressor, release memory, etc. */ +cleanup: + switch (BZ2_bzCompressEnd(&(state->stream))) { + case BZ_OK: + break; + default: + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "Failed to clean up compressor"); + ret = ARCHIVE_FATAL; + } + + free(state->compressed); + free(state); + + /* Close the output */ + if (a->client_closer != NULL) + (a->client_closer)(a, a->client_data); + + return (ret); +} + +/* + * Utility function to push input data through compressor, writing + * full output blocks as necessary. + * + * Note that this handles both the regular write case (finishing == + * false) and the end-of-archive case (finishing == true). + */ +static int +drive_compressor(struct archive *a, struct private_data *state, int finishing) +{ + ssize_t bytes_written; + int ret; + + for (;;) { + if (state->stream.avail_out == 0) { + bytes_written = (a->client_writer)(a, a->client_data, + state->compressed, state->compressed_buffer_size); + if (bytes_written <= 0) { + /* TODO: Handle this write failure */ + return (ARCHIVE_FATAL); + } else if ((size_t)bytes_written < state->compressed_buffer_size) { + /* Short write: Move remainder to + * front and keep filling */ + memmove(state->compressed, + state->compressed + bytes_written, + state->compressed_buffer_size - bytes_written); + } + + a->raw_position += bytes_written; + state->stream.next_out = state->compressed + + state->compressed_buffer_size - bytes_written; + state->stream.avail_out = bytes_written; + } + + ret = BZ2_bzCompress(&(state->stream), + finishing ? BZ_FINISH : BZ_RUN); + + switch (ret) { + case BZ_RUN_OK: + /* In non-finishing case, did compressor + * consume everything? */ + if (!finishing && state->stream.avail_in == 0) + return (ARCHIVE_OK); + break; + case BZ_FINISH_OK: /* Finishing: There's more work to do */ + break; + case BZ_STREAM_END: /* Finishing: all done */ + /* Only occurs in finishing case */ + return (ARCHIVE_OK); + default: + /* Any other return value indicates an error */ + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "Bzip2 compression failed"); + return (ARCHIVE_FATAL); + } + } +} + +#endif /* HAVE_BZLIB_H */ diff --git a/lib/libarchive/archive_write_set_compression_gzip.c b/lib/libarchive/archive_write_set_compression_gzip.c new file mode 100644 index 0000000..3229507 --- /dev/null +++ b/lib/libarchive/archive_write_set_compression_gzip.c @@ -0,0 +1,399 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +/* Don't compile this if we don't have zlib. */ +#if HAVE_ZLIB_H + +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <zlib.h> + +#include "archive.h" +#include "archive_private.h" + +struct private_data { + z_stream stream; + int64_t total_in; + unsigned char *compressed; + size_t compressed_buffer_size; + unsigned long crc; +}; + + +/* + * Yuck. zlib.h is not const-correct, so I need this one bit + * of ugly hackery to convert a const * pointer to a non-const pointer. + */ +#define SET_NEXT_IN(st,src) \ + (st)->stream.next_in = (void *)(uintptr_t)(const void *)(src) + +static int archive_compressor_gzip_finish(struct archive *); +static int archive_compressor_gzip_init(struct archive *); +static int archive_compressor_gzip_write(struct archive *, const void *, + size_t); +static int drive_compressor(struct archive *, struct private_data *, + int finishing); + + +/* + * Allocate, initialize and return a archive object. + */ +int +archive_write_set_compression_gzip(struct archive *a) +{ + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_set_compression_gzip"); + a->compression_init = &archive_compressor_gzip_init; + a->compression_code = ARCHIVE_COMPRESSION_GZIP; + a->compression_name = "gzip"; + return (ARCHIVE_OK); +} + +/* + * Setup callback. + */ +static int +archive_compressor_gzip_init(struct archive *a) +{ + int ret; + struct private_data *state; + time_t t; + + a->compression_code = ARCHIVE_COMPRESSION_GZIP; + a->compression_name = "gzip"; + + if (a->client_opener != NULL) { + ret = (a->client_opener)(a, a->client_data); + if (ret != ARCHIVE_OK) + return (ret); + } + + state = (struct private_data *)malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for compression"); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->compressed_buffer_size = a->bytes_per_block; + state->compressed = malloc(state->compressed_buffer_size); + state->crc = crc32(0L, NULL, 0); + + if (state->compressed == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for compression buffer"); + free(state); + return (ARCHIVE_FATAL); + } + + state->stream.next_out = state->compressed; + state->stream.avail_out = state->compressed_buffer_size; + + /* Prime output buffer with a gzip header. */ + t = time(NULL); + state->compressed[0] = 0x1f; /* GZip signature bytes */ + state->compressed[1] = 0x8b; + state->compressed[2] = 0x08; /* "Deflate" compression */ + state->compressed[3] = 0; /* No options */ + state->compressed[4] = (t)&0xff; /* Timestamp */ + state->compressed[5] = (t>>8)&0xff; + state->compressed[6] = (t>>16)&0xff; + state->compressed[7] = (t>>24)&0xff; + state->compressed[8] = 0; /* No deflate options */ + state->compressed[9] = 3; /* OS=Unix */ + state->stream.next_out += 10; + state->stream.avail_out -= 10; + + a->compression_write = archive_compressor_gzip_write; + a->compression_finish = archive_compressor_gzip_finish; + + /* Initialize compression library. */ + ret = deflateInit2(&(state->stream), + Z_DEFAULT_COMPRESSION, + Z_DEFLATED, + -15 /* < 0 to suppress zlib header */, + 8, + Z_DEFAULT_STRATEGY); + + if (ret == Z_OK) { + a->compression_data = state; + return (0); + } + + /* Library setup failed: clean up. */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, "Internal error " + "initializing compression library"); + free(state->compressed); + free(state); + + /* Override the error message if we know what really went wrong. */ + switch (ret) { + case Z_STREAM_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing " + "compression library: invalid setup parameter"); + break; + case Z_MEM_ERROR: + archive_set_error(a, ENOMEM, "Internal error initializing " + "compression library"); + break; + case Z_VERSION_ERROR: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing " + "compression library: invalid library version"); + break; + } + + return (ARCHIVE_FATAL); +} + +/* + * Write data to the compressed stream. + */ +static int +archive_compressor_gzip_write(struct archive *a, const void *buff, + size_t length) +{ + struct private_data *state; + int ret; + + state = a->compression_data; + if (a->client_writer == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + /* Update statistics */ + state->crc = crc32(state->crc, buff, length); + state->total_in += length; + + /* Compress input data to output buffer */ + SET_NEXT_IN(state, buff); + state->stream.avail_in = length; + if ((ret = drive_compressor(a, state, 0)) != ARCHIVE_OK) + return (ret); + + a->file_position += length; + return (ARCHIVE_OK); +} + + +/* + * Finish the compression... + */ +static int +archive_compressor_gzip_finish(struct archive *a) +{ + ssize_t block_length, target_block_length, bytes_written; + int ret; + struct private_data *state; + unsigned tocopy; + unsigned char trailer[8]; + + state = a->compression_data; + ret = 0; + if (a->client_writer == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered? " + "This is probably an internal programming error."); + ret = ARCHIVE_FATAL; + goto cleanup; + } + + /* By default, always pad the uncompressed data. */ + if (a->pad_uncompressed) { + tocopy = a->bytes_per_block - + (state->total_in % a->bytes_per_block); + while (tocopy > 0 && tocopy < (unsigned)a->bytes_per_block) { + SET_NEXT_IN(state, a->nulls); + state->stream.avail_in = tocopy < a->null_length ? + tocopy : a->null_length; + state->crc = crc32(state->crc, a->nulls, + state->stream.avail_in); + state->total_in += state->stream.avail_in; + tocopy -= state->stream.avail_in; + ret = drive_compressor(a, state, 0); + if (ret != ARCHIVE_OK) + goto cleanup; + } + } + + /* Finish compression cycle */ + if (((ret = drive_compressor(a, state, 1))) != ARCHIVE_OK) + goto cleanup; + + /* Build trailer: 4-byte CRC and 4-byte length. */ + trailer[0] = (state->crc)&0xff; + trailer[1] = (state->crc >> 8)&0xff; + trailer[2] = (state->crc >> 16)&0xff; + trailer[3] = (state->crc >> 24)&0xff; + trailer[4] = (state->total_in)&0xff; + trailer[5] = (state->total_in >> 8)&0xff; + trailer[6] = (state->total_in >> 16)&0xff; + trailer[7] = (state->total_in >> 24)&0xff; + + /* Add trailer to current block. */ + tocopy = 8; + if (tocopy > state->stream.avail_out) + tocopy = state->stream.avail_out; + memcpy(state->stream.next_out, trailer, tocopy); + state->stream.next_out += tocopy; + state->stream.avail_out -= tocopy; + + /* If it overflowed, flush and start a new block. */ + if (tocopy < 8) { + bytes_written = (a->client_writer)(a, a->client_data, + state->compressed, state->compressed_buffer_size); + if (bytes_written <= 0) { + ret = ARCHIVE_FATAL; + goto cleanup; + } + a->raw_position += bytes_written; + state->stream.next_out = state->compressed; + state->stream.avail_out = state->compressed_buffer_size; + memcpy(state->stream.next_out, trailer + tocopy, 8-tocopy); + state->stream.next_out += 8-tocopy; + state->stream.avail_out -= 8-tocopy; + } + + /* Optionally, pad the final compressed block. */ + block_length = state->stream.next_out - state->compressed; + + + /* Tricky calculation to determine size of last block. */ + target_block_length = block_length; + if (a->bytes_in_last_block <= 0) + /* Default or Zero: pad to full block */ + target_block_length = a->bytes_per_block; + else + /* Round length to next multiple of bytes_in_last_block. */ + target_block_length = a->bytes_in_last_block * + ( (block_length + a->bytes_in_last_block - 1) / + a->bytes_in_last_block); + if (target_block_length > a->bytes_per_block) + target_block_length = a->bytes_per_block; + if (block_length < target_block_length) { + memset(state->stream.next_out, 0, + target_block_length - block_length); + block_length = target_block_length; + } + + /* Write the last block */ + bytes_written = (a->client_writer)(a, a->client_data, + state->compressed, block_length); + if (bytes_written <= 0) { + ret = ARCHIVE_FATAL; + goto cleanup; + } + a->raw_position += bytes_written; + + /* Cleanup: shut down compressor, release memory, etc. */ +cleanup: + switch (deflateEnd(&(state->stream))) { + case Z_OK: + break; + default: + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Failed to clean up compressor"); + ret = ARCHIVE_FATAL; + } + free(state->compressed); + free(state); + + /* Close the output */ + if (a->client_closer != NULL) + (a->client_closer)(a, a->client_data); + + return (ret); +} + +/* + * Utility function to push input data through compressor, + * writing full output blocks as necessary. + * + * Note that this handles both the regular write case (finishing == + * false) and the end-of-archive case (finishing == true). + */ +static int +drive_compressor(struct archive *a, struct private_data *state, int finishing) +{ + ssize_t bytes_written; + int ret; + + for (;;) { + if (state->stream.avail_out == 0) { + bytes_written = (a->client_writer)(a, a->client_data, + state->compressed, state->compressed_buffer_size); + if (bytes_written <= 0) { + /* TODO: Handle this write failure */ + return (ARCHIVE_FATAL); + } else if ((size_t)bytes_written < state->compressed_buffer_size) { + /* Short write: Move remaining to + * front of block and keep filling */ + memmove(state->compressed, + state->compressed + bytes_written, + state->compressed_buffer_size - bytes_written); + } + a->raw_position += bytes_written; + state->stream.next_out + = state->compressed + + state->compressed_buffer_size - bytes_written; + state->stream.avail_out = bytes_written; + } + + ret = deflate(&(state->stream), + finishing ? Z_FINISH : Z_NO_FLUSH ); + + switch (ret) { + case Z_OK: + /* In non-finishing case, check if compressor + * consumed everything */ + if (!finishing && state->stream.avail_in == 0) + return (ARCHIVE_OK); + /* In finishing case, this return always means + * there's more work */ + break; + case Z_STREAM_END: + /* This return can only occur in finishing case. */ + return (ARCHIVE_OK); + default: + /* Any other return value indicates an error. */ + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "GZip compression failed"); + return (ARCHIVE_FATAL); + } + } +} + +#endif /* HAVE_ZLIB_H */ diff --git a/lib/libarchive/archive_write_set_compression_none.c b/lib/libarchive/archive_write_set_compression_none.c new file mode 100644 index 0000000..2a63629 --- /dev/null +++ b/lib/libarchive/archive_write_set_compression_none.c @@ -0,0 +1,218 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "archive.h" +#include "archive_private.h" + +static int archive_compressor_none_finish(struct archive *a); +static int archive_compressor_none_init(struct archive *); +static int archive_compressor_none_write(struct archive *, const void *, + size_t); + +struct archive_none { + char *buffer; + ssize_t buffer_size; + char *next; /* Current insert location */ + ssize_t avail; /* Free space left in buffer */ +}; + +int +archive_write_set_compression_none(struct archive *a) +{ + __archive_check_magic(a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_set_compression_none"); + a->compression_init = &archive_compressor_none_init; + a->compression_code = ARCHIVE_COMPRESSION_NONE; + a->compression_name = "none"; + return (0); +} + +/* + * Setup callback. + */ +static int +archive_compressor_none_init(struct archive *a) +{ + int ret; + struct archive_none *state; + + a->compression_code = ARCHIVE_COMPRESSION_NONE; + a->compression_name = "none"; + + if (a->client_opener != NULL) { + ret = (a->client_opener)(a, a->client_data); + if (ret != 0) + return (ret); + } + + state = (struct archive_none *)malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate data for output buffering"); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->buffer_size = a->bytes_per_block; + state->buffer = malloc(state->buffer_size); + + if (state->buffer == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate output buffer"); + free(state); + return (ARCHIVE_FATAL); + } + + state->next = state->buffer; + state->avail = state->buffer_size; + + a->compression_data = state; + a->compression_write = archive_compressor_none_write; + a->compression_finish = archive_compressor_none_finish; + return (ARCHIVE_OK); +} + +/* + * Write data to the stream. + */ +static int +archive_compressor_none_write(struct archive *a, const void *vbuff, + size_t length) +{ + const char *buff; + ssize_t remaining, to_copy; + ssize_t bytes_written; + struct archive_none *state; + + state = a->compression_data; + buff = vbuff; + if (a->client_writer == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + remaining = length; + while (remaining > 0) { + /* + * If we have a full output block, write it and reset the + * output buffer. + */ + if (state->avail == 0) { + bytes_written = (a->client_writer)(a, a->client_data, + state->buffer, state->buffer_size); + if (bytes_written <= 0) + return (ARCHIVE_FATAL); + /* XXX TODO: if bytes_written < state->buffer_size */ + a->raw_position += bytes_written; + state->next = state->buffer; + state->avail = state->buffer_size; + } + + /* Now we have space in the buffer; copy new data into it. */ + to_copy = (remaining > state->avail) ? + state->avail : remaining; + memcpy(state->next, buff, to_copy); + state->next += to_copy; + state->avail -= to_copy; + buff += to_copy; + remaining -= to_copy; + } + a->file_position += length; + return (ARCHIVE_OK); +} + + +/* + * Finish the compression. + */ +static int +archive_compressor_none_finish(struct archive *a) +{ + ssize_t block_length; + ssize_t target_block_length; + ssize_t bytes_written; + int ret; + int ret2; + struct archive_none *state; + + state = a->compression_data; + ret = ret2 = ARCHIVE_OK; + if (a->client_writer == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + /* If there's pending data, pad and write the last block */ + if (state->next != state->buffer) { + block_length = state->buffer_size - state->avail; + + /* Tricky calculation to determine size of last block */ + target_block_length = block_length; + if (a->bytes_in_last_block <= 0) + /* Default or Zero: pad to full block */ + target_block_length = a->bytes_per_block; + else + /* Round to next multiple of bytes_in_last_block. */ + target_block_length = a->bytes_in_last_block * + ( (block_length + a->bytes_in_last_block - 1) / + a->bytes_in_last_block); + if (target_block_length > a->bytes_per_block) + target_block_length = a->bytes_per_block; + if (block_length < target_block_length) { + memset(state->next, 0, + target_block_length - block_length); + block_length = target_block_length; + } + bytes_written = (a->client_writer)(a, a->client_data, + state->buffer, block_length); + if (bytes_written <= 0) + ret = ARCHIVE_FATAL; + else { + a->raw_position += bytes_written; + ret = ARCHIVE_OK; + } + } + + /* Close the output */ + if (a->client_closer != NULL) + ret2 = (a->client_closer)(a, a->client_data); + + free(state->buffer); + free(state); + a->compression_data = NULL; + + return (ret != ARCHIVE_OK ? ret : ret2); +} diff --git a/lib/libarchive/archive_write_set_format.c b/lib/libarchive/archive_write_set_format.c new file mode 100644 index 0000000..5f6df0e --- /dev/null +++ b/lib/libarchive/archive_write_set_format.c @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> + +#include <errno.h> +#include "archive.h" +#include "archive_private.h" + +/* A table that maps format codes to functions. */ +static +struct { int code; int (*setter)(struct archive *); } codes[] = +{ + { ARCHIVE_FORMAT_CPIO, archive_write_set_format_cpio }, + { ARCHIVE_FORMAT_CPIO_POSIX, archive_write_set_format_cpio }, + { ARCHIVE_FORMAT_SHAR, archive_write_set_format_shar }, + { ARCHIVE_FORMAT_SHAR_BASE, archive_write_set_format_shar }, + { ARCHIVE_FORMAT_SHAR_DUMP, archive_write_set_format_shar_dump }, + { ARCHIVE_FORMAT_TAR, archive_write_set_format_pax_restricted }, + { ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, archive_write_set_format_pax }, + { ARCHIVE_FORMAT_TAR_PAX_RESTRICTED, + archive_write_set_format_pax_restricted }, + { ARCHIVE_FORMAT_TAR_USTAR, archive_write_set_format_ustar }, + { 0, NULL } +}; + +int +archive_write_set_format(struct archive *a, int code) +{ + int i; + + for (i = 0; codes[i].code != 0; i++) { + if (code == codes[i].code) + return ((codes[i].setter)(a)); + } + + archive_set_error(a, EINVAL, "No such format"); + return (ARCHIVE_FATAL); +} diff --git a/lib/libarchive/archive_write_set_format_by_name.c b/lib/libarchive/archive_write_set_format_by_name.c new file mode 100644 index 0000000..2402e1c --- /dev/null +++ b/lib/libarchive/archive_write_set_format_by_name.c @@ -0,0 +1,63 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> + +#include <errno.h> +#include <string.h> + +#include "archive.h" +#include "archive_private.h" + +/* A table that maps names to functions. */ +static +struct { const char *name; int (*setter)(struct archive *); } names[] = +{ + { "cpio", archive_write_set_format_cpio }, + { "pax", archive_write_set_format_pax }, + { "posix", archive_write_set_format_pax }, + { "shar", archive_write_set_format_shar }, + { "shardump", archive_write_set_format_shar_dump }, + { "ustar", archive_write_set_format_ustar }, + { NULL, NULL } +}; + +int +archive_write_set_format_by_name(struct archive *a, const char *name) +{ + int i; + + for (i = 0; names[i].name != NULL; i++) { + if (strcmp(name, names[i].name) == 0) + return ((names[i].setter)(a)); + } + + archive_set_error(a, EINVAL, "No such format '%s'", name); + return (ARCHIVE_FATAL); +} diff --git a/lib/libarchive/archive_write_set_format_cpio.c b/lib/libarchive/archive_write_set_format_cpio.c new file mode 100644 index 0000000..659b0a1 --- /dev/null +++ b/lib/libarchive/archive_write_set_format_cpio.c @@ -0,0 +1,246 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +static int archive_write_cpio_data(struct archive *, const void *buff, + size_t s); +static int archive_write_cpio_finish(struct archive *); +static int archive_write_cpio_finish_entry(struct archive *); +static int archive_write_cpio_header(struct archive *, + struct archive_entry *); +static int format_octal(int64_t, void *, int); +static int64_t format_octal_recursive(int64_t, char *, int); + +struct cpio { + uint64_t entry_bytes_remaining; +}; + +struct cpio_header { + char c_magic[6]; + char c_dev[6]; + char c_ino[6]; + char c_mode[6]; + char c_uid[6]; + char c_gid[6]; + char c_nlink[6]; + char c_rdev[6]; + char c_mtime[11]; + char c_namesize[6]; + char c_filesize[11]; +}; + +/* + * Set output format to 'cpio' format. + */ +int +archive_write_set_format_cpio(struct archive *a) +{ + struct cpio *cpio; + + /* If someone else was already registered, unregister them. */ + if (a->format_finish != NULL) + (a->format_finish)(a); + + cpio = malloc(sizeof(*cpio)); + if (cpio == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate cpio data"); + return (ARCHIVE_FATAL); + } + memset(cpio, 0, sizeof(*cpio)); + a->format_data = cpio; + + a->pad_uncompressed = 1; + a->format_write_header = archive_write_cpio_header; + a->format_write_data = archive_write_cpio_data; + a->format_finish_entry = archive_write_cpio_finish_entry; + a->format_finish = archive_write_cpio_finish; + a->archive_format = ARCHIVE_FORMAT_CPIO_POSIX; + a->archive_format_name = "POSIX cpio"; + return (ARCHIVE_OK); +} + +static int +archive_write_cpio_header(struct archive *a, struct archive_entry *entry) +{ + struct cpio *cpio; + const char *p, *path; + int pathlength, ret; + const struct stat *st; + struct cpio_header h; + + cpio = a->format_data; + ret = 0; + + path = archive_entry_pathname(entry); + pathlength = strlen(path) + 1; /* Include trailing null. */ + st = archive_entry_stat(entry); + + memset(&h, 0, sizeof(h)); + format_octal(070707, &h.c_magic, sizeof(h.c_magic)); + format_octal(st->st_dev, &h.c_dev, sizeof(h.c_dev)); + /* + * TODO: Generate artificial inode numbers rather than just + * re-using the ones off the disk. That way, the 18-bit c_ino + * field only limits the number of files in the archive. + */ + if (st->st_ino > 0777777) { + archive_set_error(a, ERANGE, "large inode number truncated"); + ret = ARCHIVE_WARN; + } + + format_octal(st->st_ino & 0777777, &h.c_ino, sizeof(h.c_ino)); + format_octal(st->st_mode, &h.c_mode, sizeof(h.c_mode)); + format_octal(st->st_uid, &h.c_uid, sizeof(h.c_uid)); + format_octal(st->st_gid, &h.c_gid, sizeof(h.c_gid)); + format_octal(st->st_nlink, &h.c_nlink, sizeof(h.c_nlink)); + if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) + format_octal(st->st_rdev, &h.c_rdev, sizeof(h.c_rdev)); + else + format_octal(0, &h.c_rdev, sizeof(h.c_rdev)); + format_octal(st->st_mtime, &h.c_mtime, sizeof(h.c_mtime)); + format_octal(pathlength, &h.c_namesize, sizeof(h.c_namesize)); + + /* Symlinks get the link written as the body of the entry. */ + p = archive_entry_symlink(entry); + if (p != NULL && *p != '\0') + format_octal(strlen(p), &h.c_filesize, sizeof(h.c_filesize)); + else + format_octal(st->st_size, &h.c_filesize, sizeof(h.c_filesize)); + + ret = (a->compression_write)(a, &h, sizeof(h)); + if (ret != ARCHIVE_OK) + return (ARCHIVE_FATAL); + + ret = (a->compression_write)(a, path, pathlength); + if (ret != ARCHIVE_OK) + return (ARCHIVE_FATAL); + + cpio->entry_bytes_remaining = st->st_size; + + /* Write the symlink now. */ + if (p != NULL && *p != '\0') + ret = (a->compression_write)(a, p, strlen(p)); + + return (ret); +} + +static int +archive_write_cpio_data(struct archive *a, const void *buff, size_t s) +{ + struct cpio *cpio; + int ret; + + cpio = a->format_data; + if (s > cpio->entry_bytes_remaining) + s = cpio->entry_bytes_remaining; + + ret = (a->compression_write)(a, buff, s); + cpio->entry_bytes_remaining -= s; + return (ret); +} + +/* + * Format a number into the specified field. + */ +static int +format_octal(int64_t v, void *p, int digits) +{ + int64_t max; + int ret; + + max = (((int64_t)1) << (digits * 3)) - 1; + if (v >= 0 && v <= max) { + format_octal_recursive(v, p, digits); + ret = 0; + } else { + format_octal_recursive(max, p, digits); + ret = -1; + } + return (ret); +} + +static int64_t +format_octal_recursive(int64_t v, char *p, int s) +{ + if (s == 0) + return (v); + v = format_octal_recursive(v, p+1, s-1); + *p = '0' + (v & 7); + return (v >>= 3); +} + +static int +archive_write_cpio_finish(struct archive *a) +{ + struct cpio *cpio; + struct stat st; + int er; + struct archive_entry *trailer; + + cpio = a->format_data; + trailer = archive_entry_new(); + memset(&st, 0, sizeof(st)); + st.st_nlink = 1; + archive_entry_copy_stat(trailer, &st); + archive_entry_set_pathname(trailer, "TRAILER!!!"); + er = archive_write_cpio_header(a, trailer); + archive_entry_free(trailer); + + free(cpio); + a->format_data = NULL; + return (er); +} + +static int +archive_write_cpio_finish_entry(struct archive *a) +{ + struct cpio *cpio; + int to_write, ret; + + cpio = a->format_data; + ret = ARCHIVE_OK; + while (cpio->entry_bytes_remaining > 0) { + to_write = cpio->entry_bytes_remaining < a->null_length ? + cpio->entry_bytes_remaining : a->null_length; + ret = (a->compression_write)(a, a->nulls, to_write); + if (ret != ARCHIVE_OK) + return (ret); + cpio->entry_bytes_remaining -= to_write; + } + return (ret); +} diff --git a/lib/libarchive/archive_write_set_format_pax.c b/lib/libarchive/archive_write_set_format_pax.c new file mode 100644 index 0000000..c256cb2 --- /dev/null +++ b/lib/libarchive/archive_write_set_format_pax.c @@ -0,0 +1,1187 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#else +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif +#endif +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +struct pax { + uint64_t entry_bytes_remaining; + uint64_t entry_padding; + struct archive_string pax_header; + char written; +}; + +static void add_pax_attr(struct archive_string *, const char *key, + const char *value); +static void add_pax_attr_int(struct archive_string *, + const char *key, int64_t value); +static void add_pax_attr_time(struct archive_string *, + const char *key, int64_t sec, + unsigned long nanos); +static void add_pax_attr_w(struct archive_string *, + const char *key, const wchar_t *wvalue); +static int archive_write_pax_data(struct archive *, + const void *, size_t); +static int archive_write_pax_finish(struct archive *); +static int archive_write_pax_finish_entry(struct archive *); +static int archive_write_pax_header(struct archive *, + struct archive_entry *); +static char *base64_encode(const char *src, size_t len); +static char *build_pax_attribute_name(char *dest, const char *src); +static char *build_ustar_entry_name(char *dest, const char *src, + size_t src_length, const char *insert); +static char *format_int(char *dest, int64_t); +static int has_non_ASCII(const wchar_t *); +static char *url_encode(const char *in); +static int write_nulls(struct archive *, size_t); + +/* + * Set output format to 'restricted pax' format. + * + * This is the same as normal 'pax', but tries to suppress + * the pax header whenever possible. This is the default for + * bsdtar, for instance. + */ +int +archive_write_set_format_pax_restricted(struct archive *a) +{ + int r; + r = archive_write_set_format_pax(a); + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED; + a->archive_format_name = "restricted POSIX pax interchange"; + return (r); +} + +/* + * Set output format to 'pax' format. + */ +int +archive_write_set_format_pax(struct archive *a) +{ + struct pax *pax; + + if (a->format_finish != NULL) + (a->format_finish)(a); + + pax = malloc(sizeof(*pax)); + if (pax == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate pax data"); + return (ARCHIVE_FATAL); + } + memset(pax, 0, sizeof(*pax)); + a->format_data = pax; + + a->pad_uncompressed = 1; + a->format_write_header = archive_write_pax_header; + a->format_write_data = archive_write_pax_data; + a->format_finish = archive_write_pax_finish; + a->format_finish_entry = archive_write_pax_finish_entry; + a->archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; + a->archive_format_name = "POSIX pax interchange"; + return (ARCHIVE_OK); +} + +/* + * Note: This code assumes that 'nanos' has the same sign as 'sec', + * which implies that sec=-1, nanos=200000000 represents -1.2 seconds + * and not -0.8 seconds. This is a pretty pedantic point, as we're + * unlikely to encounter many real files created before Jan 1, 1970, + * much less ones with timestamps recorded to sub-second resolution. + */ +static void +add_pax_attr_time(struct archive_string *as, const char *key, + int64_t sec, unsigned long nanos) +{ + int digit, i; + char *t; + /* + * Note that each byte contributes fewer than 3 base-10 + * digits, so this will always be big enough. + */ + char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)]; + + tmp[sizeof(tmp) - 1] = 0; + t = tmp + sizeof(tmp) - 1; + + /* Skip trailing zeros in the fractional part. */ + for (digit = 0, i = 10; i > 0 && digit == 0; i--) { + digit = nanos % 10; + nanos /= 10; + } + + /* Only format the fraction if it's non-zero. */ + if (i > 0) { + while (i > 0) { + *--t = "0123456789"[digit]; + digit = nanos % 10; + nanos /= 10; + i--; + } + *--t = '.'; + } + t = format_int(t, sec); + + add_pax_attr(as, key, t); +} + +static char * +format_int(char *t, int64_t i) +{ + int sign; + + if (i < 0) { + sign = -1; + i = -i; + } else + sign = 1; + + do { + *--t = "0123456789"[i % 10]; + } while (i /= 10); + if (sign < 0) + *--t = '-'; + return (t); +} + +static void +add_pax_attr_int(struct archive_string *as, const char *key, int64_t value) +{ + char tmp[1 + 3 * sizeof(value)]; + + tmp[sizeof(tmp) - 1] = 0; + add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); +} + +static char * +utf8_encode(const wchar_t *wval) +{ + int utf8len; + const wchar_t *wp; + unsigned long wc; + char *utf8_value, *p; + + utf8len = 0; + for (wp = wval; *wp != L'\0'; ) { + wc = *wp++; + if (wc <= 0x7f) + utf8len++; + else if (wc <= 0x7ff) + utf8len += 2; + else if (wc <= 0xffff) + utf8len += 3; + else if (wc <= 0x1fffff) + utf8len += 4; + else if (wc <= 0x3ffffff) + utf8len += 5; + else if (wc <= 0x7fffffff) + utf8len += 6; + /* Ignore larger values; UTF-8 can't encode them. */ + } + + utf8_value = malloc(utf8len + 1); + if (utf8_value == NULL) { + __archive_errx(1, "Not enough memory for attributes"); + return (NULL); + } + + for (wp = wval, p = utf8_value; *wp != L'\0'; ) { + wc = *wp++; + if (wc <= 0x7f) { + *p++ = (char)wc; + } else if (wc <= 0x7ff) { + p[0] = 0xc0 | ((wc >> 6) & 0x1f); + p[1] = 0x80 | (wc & 0x3f); + p += 2; + } else if (wc <= 0xffff) { + p[0] = 0xe0 | ((wc >> 12) & 0x0f); + p[1] = 0x80 | ((wc >> 6) & 0x3f); + p[2] = 0x80 | (wc & 0x3f); + p += 3; + } else if (wc <= 0x1fffff) { + p[0] = 0xf0 | ((wc >> 18) & 0x07); + p[1] = 0x80 | ((wc >> 12) & 0x3f); + p[2] = 0x80 | ((wc >> 6) & 0x3f); + p[3] = 0x80 | (wc & 0x3f); + p += 4; + } else if (wc <= 0x3ffffff) { + p[0] = 0xf8 | ((wc >> 24) & 0x03); + p[1] = 0x80 | ((wc >> 18) & 0x3f); + p[2] = 0x80 | ((wc >> 12) & 0x3f); + p[3] = 0x80 | ((wc >> 6) & 0x3f); + p[4] = 0x80 | (wc & 0x3f); + p += 5; + } else if (wc <= 0x7fffffff) { + p[0] = 0xfc | ((wc >> 30) & 0x01); + p[1] = 0x80 | ((wc >> 24) & 0x3f); + p[1] = 0x80 | ((wc >> 18) & 0x3f); + p[2] = 0x80 | ((wc >> 12) & 0x3f); + p[3] = 0x80 | ((wc >> 6) & 0x3f); + p[4] = 0x80 | (wc & 0x3f); + p += 6; + } + /* Ignore larger values; UTF-8 can't encode them. */ + } + *p = '\0'; + + return (utf8_value); +} + +static void +add_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) +{ + char *utf8_value = utf8_encode(wval); + if (utf8_value == NULL) + return; + add_pax_attr(as, key, utf8_value); + free(utf8_value); +} + +/* + * Add a key/value attribute to the pax header. This function handles + * the length field and various other syntactic requirements. + */ +static void +add_pax_attr(struct archive_string *as, const char *key, const char *value) +{ + int digits, i, len, next_ten; + char tmp[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */ + + /*- + * PAX attributes have the following layout: + * <len> <space> <key> <=> <value> <nl> + */ + len = 1 + strlen(key) + 1 + strlen(value) + 1; + + /* + * The <len> field includes the length of the <len> field, so + * computing the correct length is tricky. I start by + * counting the number of base-10 digits in 'len' and + * computing the next higher power of 10. + */ + next_ten = 1; + digits = 0; + i = len; + while (i > 0) { + i = i / 10; + digits++; + next_ten = next_ten * 10; + } + /* + * For example, if string without the length field is 99 + * chars, then adding the 2 digit length "99" will force the + * total length past 100, requiring an extra digit. The next + * statement adjusts for this effect. + */ + if (len + digits >= next_ten) + digits++; + + /* Now, we have the right length so we can build the line. */ + tmp[sizeof(tmp) - 1] = 0; /* Null-terminate the work area. */ + archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits)); + archive_strappend_char(as, ' '); + archive_strcat(as, key); + archive_strappend_char(as, '='); + archive_strcat(as, value); + archive_strappend_char(as, '\n'); +} + +static void +archive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) +{ + struct archive_string s; + int i = archive_entry_xattr_reset(entry); + + while (i--) { + const char *name; + const void *value; + char *encoded_value; + char *url_encoded_name = NULL, *encoded_name = NULL; + wchar_t *wcs_name = NULL; + size_t size; + + archive_entry_xattr_next(entry, &name, &value, &size); + /* Name is URL-encoded, then converted to wchar_t, + * then UTF-8 encoded. */ + url_encoded_name = url_encode(name); + if (url_encoded_name != NULL) { + /* Convert narrow-character to wide-character. */ + int wcs_length = strlen(url_encoded_name); + wcs_name = malloc((wcs_length + 1) * sizeof(wchar_t)); + if (wcs_name == NULL) + __archive_errx(1, "No memory for xattr conversion"); + mbstowcs(wcs_name, url_encoded_name, wcs_length); + wcs_name[wcs_length] = 0; + free(url_encoded_name); /* Done with this. */ + } + if (wcs_name != NULL) { + encoded_name = utf8_encode(wcs_name); + free(wcs_name); /* Done with wchar_t name. */ + } + + encoded_value = base64_encode(value, size); + + if (encoded_name != NULL && encoded_value != NULL) { + archive_string_init(&s); + archive_strcpy(&s, "LIBARCHIVE.xattr."); + archive_strcat(&s, encoded_name); + add_pax_attr(&(pax->pax_header), s.s, encoded_value); + archive_string_free(&s); + } + free(encoded_name); + free(encoded_value); + } +} + +/* + * TODO: Consider adding 'comment' and 'charset' fields to + * archive_entry so that clients can specify them. Also, consider + * adding generic key/value tags so clients can add arbitrary + * key/value data. + */ +static int +archive_write_pax_header(struct archive *a, + struct archive_entry *entry_original) +{ + struct archive_entry *entry_main; + const char *linkname, *p; + const char *hardlink; + const wchar_t *wp; + const char *suffix_start; + int need_extension, r, ret; + struct pax *pax; + const struct stat *st_main, *st_original; + + char paxbuff[512]; + char ustarbuff[512]; + char ustar_entry_name[256]; + char pax_entry_name[256]; + + need_extension = 0; + pax = a->format_data; + pax->written = 1; + + st_original = archive_entry_stat(entry_original); + + hardlink = archive_entry_hardlink(entry_original); + + /* Make sure this is a type of entry that we can handle here */ + if (hardlink == NULL) { + switch (st_original->st_mode & S_IFMT) { + case S_IFREG: + case S_IFLNK: + case S_IFCHR: + case S_IFBLK: + case S_IFDIR: + case S_IFIFO: + break; + case S_IFSOCK: + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "tar format cannot archive socket"); + return (ARCHIVE_WARN); + default: + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "tar format cannot archive this (mode=0%lo)", + (unsigned long)st_original->st_mode); + return (ARCHIVE_WARN); + } + } + + /* Copy entry so we can modify it as needed. */ + entry_main = archive_entry_clone(entry_original); + archive_string_empty(&(pax->pax_header)); /* Blank our work area. */ + st_main = archive_entry_stat(entry_main); + + /* + * Determining whether or not the name is too big is ugly + * because of the rules for dividing names between 'name' and + * 'prefix' fields. Here, I pick out the longest possible + * suffix, then test whether the remaining prefix is too long. + */ + wp = archive_entry_pathname_w(entry_main); + p = archive_entry_pathname(entry_main); + if (strlen(p) <= 100) /* Short enough for just 'name' field */ + suffix_start = p; /* Record a zero-length prefix */ + else + /* Find the largest suffix that fits in 'name' field. */ + suffix_start = strchr(p + strlen(p) - 100 - 1, '/'); + + /* + * If name is too long, or has non-ASCII characters, add + * 'path' to pax extended attrs. + */ + if (suffix_start == NULL || suffix_start - p > 155 || has_non_ASCII(wp)) { + add_pax_attr_w(&(pax->pax_header), "path", wp); + archive_entry_set_pathname(entry_main, + build_ustar_entry_name(ustar_entry_name, p, strlen(p), NULL)); + need_extension = 1; + } + + /* If link name is too long or has non-ASCII characters, add + * 'linkpath' to pax extended attrs. */ + linkname = hardlink; + if (linkname == NULL) + linkname = archive_entry_symlink(entry_main); + + if (linkname != NULL) { + /* There is a link name, get the wide version as well. */ + if (hardlink != NULL) + wp = archive_entry_hardlink_w(entry_main); + else + wp = archive_entry_symlink_w(entry_main); + + /* If the link is long or has a non-ASCII character, + * store it as a pax extended attribute. */ + if (strlen(linkname) > 100 || has_non_ASCII(wp)) { + add_pax_attr_w(&(pax->pax_header), "linkpath", wp); + if (hardlink != NULL) + archive_entry_set_hardlink(entry_main, + "././@LongHardLink"); + else + archive_entry_set_symlink(entry_main, + "././@LongSymLink"); + need_extension = 1; + } + } + + /* If file size is too large, add 'size' to pax extended attrs. */ + if (st_main->st_size >= (((int64_t)1) << 33)) { + add_pax_attr_int(&(pax->pax_header), "size", st_main->st_size); + need_extension = 1; + } + + /* If numeric GID is too large, add 'gid' to pax extended attrs. */ + if (st_main->st_gid >= (1 << 18)) { + add_pax_attr_int(&(pax->pax_header), "gid", st_main->st_gid); + need_extension = 1; + } + + /* If group name is too large or has non-ASCII characters, add + * 'gname' to pax extended attrs. */ + p = archive_entry_gname(entry_main); + wp = archive_entry_gname_w(entry_main); + if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) { + add_pax_attr_w(&(pax->pax_header), "gname", wp); + archive_entry_set_gname(entry_main, NULL); + need_extension = 1; + } + + /* If numeric UID is too large, add 'uid' to pax extended attrs. */ + if (st_main->st_uid >= (1 << 18)) { + add_pax_attr_int(&(pax->pax_header), "uid", st_main->st_uid); + need_extension = 1; + } + + /* If user name is too large, add 'uname' to pax extended attrs. */ + /* TODO: If uname has non-ASCII characters, use pax attribute. */ + p = archive_entry_uname(entry_main); + wp = archive_entry_uname_w(entry_main); + if (p != NULL && (strlen(p) > 31 || has_non_ASCII(wp))) { + add_pax_attr_w(&(pax->pax_header), "uname", wp); + archive_entry_set_uname(entry_main, NULL); + need_extension = 1; + } + + /* + * POSIX/SUSv3 doesn't provide a standard key for large device + * numbers. I use the same keys here that Joerg Schilling + * used for 'star.' (Which, somewhat confusingly, are called + * "devXXX" even though they code "rdev" values.) No doubt, + * other implementations use other keys. Note that there's no + * reason we can't write the same information into a number of + * different keys. + * + * Of course, this is only needed for block or char device entries. + */ + if (S_ISBLK(st_main->st_mode) || + S_ISCHR(st_main->st_mode)) { + /* + * If rdevmajor is too large, add 'SCHILY.devmajor' to + * extended attributes. + */ + dev_t rdevmajor, rdevminor; + rdevmajor = major(st_main->st_rdev); + rdevminor = minor(st_main->st_rdev); + if (rdevmajor >= (1 << 18)) { + add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor", + rdevmajor); + /* + * Non-strict formatting below means we don't + * have to truncate here. Not truncating improves + * the chance that some more modern tar archivers + * (such as GNU tar 1.13) can restore the full + * value even if they don't understand the pax + * extended attributes. See my rant below about + * file size fields for additional details. + */ + /* archive_entry_set_rdevmajor(entry_main, + rdevmajor & ((1 << 18) - 1)); */ + need_extension = 1; + } + + /* + * If devminor is too large, add 'SCHILY.devminor' to + * extended attributes. + */ + if (rdevminor >= (1 << 18)) { + add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor", + rdevminor); + /* Truncation is not necessary here, either. */ + /* archive_entry_set_rdevminor(entry_main, + rdevminor & ((1 << 18) - 1)); */ + need_extension = 1; + } + } + + /* + * Technically, the mtime field in the ustar header can + * support 33 bits, but many platforms use signed 32-bit time + * values. The cutoff of 0x7fffffff here is a compromise. + * Yes, this check is duplicated just below; this helps to + * avoid writing an mtime attribute just to handle a + * high-resolution timestamp in "restricted pax" mode. + */ + if (!need_extension && + ((st_main->st_mtime < 0) || (st_main->st_mtime >= 0x7fffffff))) + need_extension = 1; + + /* I use a star-compatible file flag attribute. */ + p = archive_entry_fflags_text(entry_main); + if (!need_extension && p != NULL && *p != '\0') + need_extension = 1; + + /* If there are non-trivial ACL entries, we need an extension. */ + if (!need_extension && archive_entry_acl_count(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS) > 0) + need_extension = 1; + + /* If there are non-trivial ACL entries, we need an extension. */ + if (!need_extension && archive_entry_acl_count(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0) + need_extension = 1; + + /* If there are extended attributes, we need an extension */ + if (!need_extension && archive_entry_xattr_count(entry_original) > 0) + need_extension = 1; + + /* + * The following items are handled differently in "pax + * restricted" format. In particular, in "pax restricted" + * format they won't be added unless need_extension is + * already set (we're already generating an extended header, so + * may as well include these). + */ + if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED || + need_extension) { + + if (st_main->st_mtime < 0 || + st_main->st_mtime >= 0x7fffffff || + ARCHIVE_STAT_MTIME_NANOS(st_main) != 0) + add_pax_attr_time(&(pax->pax_header), "mtime", + st_main->st_mtime, + ARCHIVE_STAT_MTIME_NANOS(st_main)); + + if (st_main->st_ctime != 0 || + ARCHIVE_STAT_CTIME_NANOS(st_main) != 0) + add_pax_attr_time(&(pax->pax_header), "ctime", + st_main->st_ctime, + ARCHIVE_STAT_CTIME_NANOS(st_main)); + + if (st_main->st_atime != 0 || + ARCHIVE_STAT_ATIME_NANOS(st_main) != 0) + add_pax_attr_time(&(pax->pax_header), "atime", + st_main->st_atime, + ARCHIVE_STAT_ATIME_NANOS(st_main)); + + /* I use a star-compatible file flag attribute. */ + p = archive_entry_fflags_text(entry_main); + if (p != NULL && *p != '\0') + add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p); + + /* I use star-compatible ACL attributes. */ + wp = archive_entry_acl_text_w(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS | + ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); + if (wp != NULL && *wp != L'\0') + add_pax_attr_w(&(pax->pax_header), + "SCHILY.acl.access", wp); + wp = archive_entry_acl_text_w(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_DEFAULT | + ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); + if (wp != NULL && *wp != L'\0') + add_pax_attr_w(&(pax->pax_header), + "SCHILY.acl.default", wp); + + /* Include star-compatible metadata info. */ + /* Note: "SCHILY.dev{major,minor}" are NOT the + * major/minor portions of "SCHILY.dev". */ + add_pax_attr_int(&(pax->pax_header), "SCHILY.dev", + st_main->st_dev); + add_pax_attr_int(&(pax->pax_header), "SCHILY.ino", + st_main->st_ino); + add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink", + st_main->st_nlink); + + /* Store extended attributes */ + archive_write_pax_header_xattrs(pax, entry_original); + } + + /* Only regular files have data. */ + if (!S_ISREG(archive_entry_mode(entry_main))) + archive_entry_set_size(entry_main, 0); + + /* + * Pax-restricted does not store data for hardlinks, in order + * to improve compatibility with ustar. + */ + if (a->archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE && + hardlink != NULL) + archive_entry_set_size(entry_main, 0); + + /* + * XXX Full pax interchange format does permit a hardlink + * entry to have data associated with it. I'm not supporting + * that here because the client expects me to tell them whether + * or not this format expects data for hardlinks. If I + * don't check here, then every pax archive will end up with + * duplicated data for hardlinks. Someday, there may be + * need to select this behavior, in which case the following + * will need to be revisited. XXX + */ + if (hardlink != NULL) + archive_entry_set_size(entry_main, 0); + + /* Format 'ustar' header for main entry. + * + * The trouble with file size: If the reader can't understand + * the file size, they may not be able to locate the next + * entry and the rest of the archive is toast. Pax-compliant + * readers are supposed to ignore the file size in the main + * header, so the question becomes how to maximize portability + * for readers that don't support pax attribute extensions. + * For maximum compatibility, I permit numeric extensions in + * the main header so that the file size stored will always be + * correct, even if it's in a format that only some + * implementations understand. The technique used here is: + * + * a) If possible, follow the standard exactly. This handles + * files up to 8 gigabytes minus 1. + * + * b) If that fails, try octal but omit the field terminator. + * That handles files up to 64 gigabytes minus 1. + * + * c) Otherwise, use base-256 extensions. That handles files + * up to 2^63 in this implementation, with the potential to + * go up to 2^94. That should hold us for a while. ;-) + * + * The non-strict formatter uses similar logic for other + * numeric fields, though they're less critical. + */ + __archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0); + + /* If we built any extended attributes, write that entry first. */ + ret = ARCHIVE_OK; + if (archive_strlen(&(pax->pax_header)) > 0) { + struct stat st; + struct archive_entry *pax_attr_entry; + time_t s; + long ns; + + memset(&st, 0, sizeof(st)); + pax_attr_entry = archive_entry_new(); + p = archive_entry_pathname(entry_main); + archive_entry_set_pathname(pax_attr_entry, + build_pax_attribute_name(pax_entry_name, p)); + st.st_size = archive_strlen(&(pax->pax_header)); + /* Copy uid/gid (but clip to ustar limits). */ + st.st_uid = st_main->st_uid; + if (st.st_uid >= 1 << 18) + st.st_uid = (1 << 18) - 1; + st.st_gid = st_main->st_gid; + if (st.st_gid >= 1 << 18) + st.st_gid = (1 << 18) - 1; + /* Copy mode over (but not setuid/setgid bits) */ + st.st_mode = st_main->st_mode; +#ifdef S_ISUID + st.st_mode &= ~S_ISUID; +#endif +#ifdef S_ISGID + st.st_mode &= ~S_ISGID; +#endif +#ifdef S_ISVTX + st.st_mode &= ~S_ISVTX; +#endif + archive_entry_copy_stat(pax_attr_entry, &st); + + /* Copy uname/gname. */ + archive_entry_set_uname(pax_attr_entry, + archive_entry_uname(entry_main)); + archive_entry_set_gname(pax_attr_entry, + archive_entry_gname(entry_main)); + + /* Copy mtime, but clip to ustar limits. */ + s = archive_entry_mtime(entry_main); + ns = archive_entry_mtime_nsec(entry_main); + if (s < 0) { s = 0; ns = 0; } + if (s > 0x7fffffff) { s = 0x7fffffff; ns = 0; } + archive_entry_set_mtime(pax_attr_entry, s, ns); + + /* Ditto for atime. */ + s = archive_entry_atime(entry_main); + ns = archive_entry_atime_nsec(entry_main); + if (s < 0) { s = 0; ns = 0; } + if (s > 0x7fffffff) { s = 0x7fffffff; ns = 0; } + archive_entry_set_atime(pax_attr_entry, s, ns); + + /* Standard ustar doesn't support ctime. */ + archive_entry_set_ctime(pax_attr_entry, 0, 0); + + ret = __archive_write_format_header_ustar(a, paxbuff, + pax_attr_entry, 'x', 1); + + archive_entry_free(pax_attr_entry); + + /* Note that the 'x' header shouldn't ever fail to format */ + if (ret != 0) { + const char *msg = "archive_write_pax_header: " + "'x' header failed?! This can't happen.\n"; + write(2, msg, strlen(msg)); + exit(1); + } + r = (a->compression_write)(a, paxbuff, 512); + if (r != ARCHIVE_OK) { + pax->entry_bytes_remaining = 0; + pax->entry_padding = 0; + return (ARCHIVE_FATAL); + } + + pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header)); + pax->entry_padding = 0x1ff & (- pax->entry_bytes_remaining); + + r = (a->compression_write)(a, pax->pax_header.s, + archive_strlen(&(pax->pax_header))); + if (r != ARCHIVE_OK) { + /* If a write fails, we're pretty much toast. */ + return (ARCHIVE_FATAL); + } + /* Pad out the end of the entry. */ + r = write_nulls(a, pax->entry_padding); + if (r != ARCHIVE_OK) { + /* If a write fails, we're pretty much toast. */ + return (ARCHIVE_FATAL); + } + pax->entry_bytes_remaining = pax->entry_padding = 0; + } + + /* Write the header for main entry. */ + r = (a->compression_write)(a, ustarbuff, 512); + if (r != ARCHIVE_OK) + return (r); + + /* + * Inform the client of the on-disk size we're using, so + * they can avoid unnecessarily writing a body for something + * that we're just going to ignore. + */ + archive_entry_set_size(entry_original, archive_entry_size(entry_main)); + pax->entry_bytes_remaining = archive_entry_size(entry_main); + pax->entry_padding = 0x1ff & (- pax->entry_bytes_remaining); + archive_entry_free(entry_main); + + return (ret); +} + +/* + * We need a valid name for the regular 'ustar' entry. This routine + * tries to hack something more-or-less reasonable. + * + * The approach here tries to preserve leading dir names. We do so by + * working with four sections: + * 1) "prefix" directory names, + * 2) "suffix" directory names, + * 3) inserted dir name (optional), + * 4) filename. + * + * These sections must satisfy the following requirements: + * * Parts 1 & 2 together form an initial portion of the dir name. + * * Part 3 is specified by the caller. (It should not contain a leading + * or trailing '/'.) + * * Part 4 forms an initial portion of the base filename. + * * The filename must be <= 99 chars to fit the ustar 'name' field. + * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld. + * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field. + * * If the original name ends in a '/', the new name must also end in a '/' + * * Trailing '/.' sequences may be stripped. + * + * Note: Recall that the ustar format does not store the '/' separating + * parts 1 & 2, but does store the '/' separating parts 2 & 3. + */ +static char * +build_ustar_entry_name(char *dest, const char *src, size_t src_length, + const char *insert) +{ + const char *prefix, *prefix_end; + const char *suffix, *suffix_end; + const char *filename, *filename_end; + char *p; + int need_slash = 0; /* Was there a trailing slash? */ + size_t suffix_length = 99; + int insert_length; + + /* Length of additional dir element to be added. */ + if (insert == NULL) + insert_length = 0; + else + /* +2 here allows for '/' before and after the insert. */ + insert_length = strlen(insert) + 2; + + /* Step 0: Quick bailout in a common case. */ + if (src_length < 100 && insert == NULL) { + strncpy(dest, src, src_length); + dest[src_length] = '\0'; + return (dest); + } + + /* Step 1: Locate filename and enforce the length restriction. */ + filename_end = src + src_length; + /* Remove trailing '/' chars and '/.' pairs. */ + for (;;) { + if (filename_end > src && filename_end[-1] == '/') { + filename_end --; + need_slash = 1; /* Remember to restore trailing '/'. */ + continue; + } + if (filename_end > src + 1 && filename_end[-1] == '.' + && filename_end[-2] == '/') { + filename_end -= 2; + need_slash = 1; /* "foo/." will become "foo/" */ + continue; + } + break; + } + if (need_slash) + suffix_length--; + /* Find start of filename. */ + filename = filename_end - 1; + while ((filename > src) && (*filename != '/')) + filename --; + if ((*filename == '/') && (filename < filename_end - 1)) + filename ++; + /* Adjust filename_end so that filename + insert fits in 99 chars. */ + suffix_length -= insert_length; + if (filename_end > filename + suffix_length) + filename_end = filename + suffix_length; + /* Calculate max size for "suffix" section (#3 above). */ + suffix_length -= filename_end - filename; + + /* Step 2: Locate the "prefix" section of the dirname, including + * trailing '/'. */ + prefix = src; + prefix_end = prefix + 155; + if (prefix_end > filename) + prefix_end = filename; + while (prefix_end > prefix && *prefix_end != '/') + prefix_end--; + if ((prefix_end < filename) && (*prefix_end == '/')) + prefix_end++; + + /* Step 3: Locate the "suffix" section of the dirname, + * including trailing '/'. */ + suffix = prefix_end; + suffix_end = suffix + suffix_length; /* Enforce limit. */ + if (suffix_end > filename) + suffix_end = filename; + if (suffix_end < suffix) + suffix_end = suffix; + while (suffix_end > suffix && *suffix_end != '/') + suffix_end--; + if ((suffix_end < filename) && (*suffix_end == '/')) + suffix_end++; + + /* Step 4: Build the new name. */ + /* The OpenBSD strlcpy function is safer, but less portable. */ + /* Rather than maintain two versions, just use the strncpy version. */ + p = dest; + if (prefix_end > prefix) { + strncpy(p, prefix, prefix_end - prefix); + p += prefix_end - prefix; + } + if (suffix_end > suffix) { + strncpy(p, suffix, suffix_end - suffix); + p += suffix_end - suffix; + } + if (insert != NULL) { + /* Note: assume insert does not have leading or trailing '/' */ + strcpy(p, insert); + p += strlen(insert); + *p++ = '/'; + } + strncpy(p, filename, filename_end - filename); + p += filename_end - filename; + if (need_slash) + *p++ = '/'; + *p++ = '\0'; + + return (dest); +} + +/* + * The ustar header for the pax extended attributes must have a + * reasonable name: SUSv3 suggests 'dirname'/PaxHeader/'filename' + * + * Joerg Schiling has argued that this is unnecessary because, in practice, + * if the pax extended attributes get extracted as regular files, noone is + * going to bother reading those attributes to manually restore them. + * Based on this, 'star' uses /tmp/PaxHeader/'basename' as the ustar header + * name. This is a tempting argument, but I'm not entirely convinced. + * I'm also uncomfortable with the fact that "/tmp" is a Unix-ism. + * + * The following routine implements the SUSv3 recommendation, and is + * much simpler because build_ustar_entry_name() above already does + * most of the work (we just need to give it an extra path element to + * insert and handle a few pathological cases). + */ +static char * +build_pax_attribute_name(char *dest, const char *src) +{ + const char *p; + + /* Handle the null filename case. */ + if (src == NULL || *src == '\0') { + strcpy(dest, "PaxHeader/blank"); + return (dest); + } + + /* Prune final '/' and other unwanted final elements. */ + p = src + strlen(src); + for (;;) { + /* Ends in "/", remove the '/' */ + if (p > src && p[-1] == '/') { + --p; + continue; + } + /* Ends in "/.", remove the '.' */ + if (p > src + 1 && p[-1] == '.' + && p[-2] == '/') { + --p; + continue; + } + break; + } + + /* Pathological case: After above, there was nothing left. + * This includes "/." "/./." "/.//./." etc. */ + if (p == src) { + strcpy(dest, "/PaxHeader/rootdir"); + return (dest); + } + + /* Convert unadorned "." into a suitable filename. */ + if (*src == '.' && p == src + 1) { + strcpy(dest, "PaxHeader/currentdir"); + return (dest); + } + + /* General case: build a ustar-compatible name adding "/PaxHeader/". */ + build_ustar_entry_name(dest, src, p - src, "PaxHeader"); + + return (dest); +} + +/* Write two null blocks for the end of archive */ +static int +archive_write_pax_finish(struct archive *a) +{ + struct pax *pax; + int r; + + r = ARCHIVE_OK; + pax = a->format_data; + if (pax->written && a->compression_write != NULL) + r = write_nulls(a, 512 * 2); + archive_string_free(&pax->pax_header); + free(pax); + a->format_data = NULL; + return (r); +} + +static int +archive_write_pax_finish_entry(struct archive *a) +{ + struct pax *pax; + int ret; + + pax = a->format_data; + ret = write_nulls(a, pax->entry_bytes_remaining + pax->entry_padding); + pax->entry_bytes_remaining = pax->entry_padding = 0; + return (ret); +} + +static int +write_nulls(struct archive *a, size_t padding) +{ + int ret, to_write; + + while (padding > 0) { + to_write = padding < a->null_length ? padding : a->null_length; + ret = (a->compression_write)(a, a->nulls, to_write); + if (ret != ARCHIVE_OK) + return (ret); + padding -= to_write; + } + return (ARCHIVE_OK); +} + +static int +archive_write_pax_data(struct archive *a, const void *buff, size_t s) +{ + struct pax *pax; + int ret; + + pax = a->format_data; + pax->written = 1; + if (s > pax->entry_bytes_remaining) + s = pax->entry_bytes_remaining; + + ret = (a->compression_write)(a, buff, s); + pax->entry_bytes_remaining -= s; + return (ret); +} + +static int +has_non_ASCII(const wchar_t *wp) +{ + while (*wp != L'\0' && *wp < 128) + wp++; + return (*wp != L'\0'); +} + +/* + * Used by extended attribute support; encodes the name + * so that there will be no '=' characters in the result. + */ +static char * +url_encode(const char *in) +{ + const char *s; + char *d; + int out_len = 0; + char *out; + + for (s = in; *s != '\0'; s++) { + if (*s < 33 || *s > 126 || *s == '%' || *s == '=') + out_len += 3; + else + out_len++; + } + + out = (char *)malloc(out_len + 1); + if (out == NULL) + return (NULL); + + for (s = in, d = out; *s != '\0'; s++) { + /* encode any non-printable ASCII character or '%' or '=' */ + if (*s < 33 || *s > 126 || *s == '%' || *s == '=') { + /* URL encoding is '%' followed by two hex digits */ + *d++ = '%'; + *d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)]; + *d++ = "0123456789ABCDEF"[0x0f & *s]; + } else { + *d++ = *s; + } + } + *d = '\0'; + return (out); +} + +/* + * Encode a sequence of bytes into a C string using base-64 encoding. + * + * Returns a null-terminated C string allocated with malloc(); caller + * is responsible for freeing the result. + */ +static char * +base64_encode(const char *s, size_t len) +{ + static const char digits[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + int v; + char *d, *out; + + /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */ + out = malloc((len * 4 + 2) / 3 + 1); + if (out == NULL) + return (NULL); + d = out; + + /* Convert each group of 3 bytes into 4 characters. */ + while (len >= 3) { + v = (((int)s[0] << 16) & 0xff0000) + | (((int)s[1] << 8) & 0xff00) + | (((int)s[2]) & 0x00ff); + s += 3; + len -= 3; + *d++ = digits[(v >> 18) & 0x3f]; + *d++ = digits[(v >> 12) & 0x3f]; + *d++ = digits[(v >> 6) & 0x3f]; + *d++ = digits[(v) & 0x3f]; + } + /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */ + switch (len) { + case 0: break; + case 1: + v = (((int)s[0] << 16) & 0xff0000); + *d++ = digits[(v >> 18) & 0x3f]; + *d++ = digits[(v >> 12) & 0x3f]; + break; + case 2: + v = (((int)s[0] << 16) & 0xff0000) + | (((int)s[1] << 8) & 0xff00); + *d++ = digits[(v >> 18) & 0x3f]; + *d++ = digits[(v >> 12) & 0x3f]; + *d++ = digits[(v >> 6) & 0x3f]; + break; + } + /* Add trailing NUL character so output is a valid C string. */ + *d++ = '\0'; + return (out); +} diff --git a/lib/libarchive/archive_write_set_format_shar.c b/lib/libarchive/archive_write_set_format_shar.c new file mode 100644 index 0000000..b636cfb --- /dev/null +++ b/lib/libarchive/archive_write_set_format_shar.c @@ -0,0 +1,534 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +struct shar { + int dump; + int end_of_line; + struct archive_entry *entry; + int has_data; + char *last_dir; + char outbuff[1024]; + size_t outbytes; + size_t outpos; + int uuavail; + char uubuffer[3]; + int wrote_header; + struct archive_string work; +}; + +static int archive_write_shar_finish(struct archive *); +static int archive_write_shar_header(struct archive *, + struct archive_entry *); +static int archive_write_shar_data_sed(struct archive *, + const void * buff, size_t); +static int archive_write_shar_data_uuencode(struct archive *, + const void * buff, size_t); +static int archive_write_shar_finish_entry(struct archive *); +static int shar_printf(struct archive *, const char *fmt, ...); +static void uuencode_group(struct shar *); + +static int +shar_printf(struct archive *a, const char *fmt, ...) +{ + struct shar *shar; + va_list ap; + int ret; + + shar = a->format_data; + va_start(ap, fmt); + archive_string_empty(&(shar->work)); + archive_string_vsprintf(&(shar->work), fmt, ap); + ret = ((a->compression_write)(a, shar->work.s, strlen(shar->work.s))); + va_end(ap); + return (ret); +} + +/* + * Set output format to 'shar' format. + */ +int +archive_write_set_format_shar(struct archive *a) +{ + struct shar *shar; + + /* If someone else was already registered, unregister them. */ + if (a->format_finish != NULL) + (a->format_finish)(a); + + shar = malloc(sizeof(*shar)); + if (shar == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate shar data"); + return (ARCHIVE_FATAL); + } + memset(shar, 0, sizeof(*shar)); + a->format_data = shar; + + a->pad_uncompressed = 0; + a->format_write_header = archive_write_shar_header; + a->format_finish = archive_write_shar_finish; + a->format_write_data = archive_write_shar_data_sed; + a->format_finish_entry = archive_write_shar_finish_entry; + a->archive_format = ARCHIVE_FORMAT_SHAR_BASE; + a->archive_format_name = "shar"; + return (ARCHIVE_OK); +} + +/* + * An alternate 'shar' that uses uudecode instead of 'sed' to encode + * file contents and can therefore be used to archive binary files. + * In addition, this variant also attempts to restore ownership, file modes, + * and other extended file information. + */ +int +archive_write_set_format_shar_dump(struct archive *a) +{ + struct shar *shar; + + archive_write_set_format_shar(a); + shar = a->format_data; + shar->dump = 1; + a->format_write_data = archive_write_shar_data_uuencode; + a->archive_format = ARCHIVE_FORMAT_SHAR_DUMP; + a->archive_format_name = "shar dump"; + return (ARCHIVE_OK); +} + +static int +archive_write_shar_header(struct archive *a, struct archive_entry *entry) +{ + const char *linkname; + const char *name; + char *p, *pp; + struct shar *shar; + const struct stat *st; + int ret; + + shar = a->format_data; + if (!shar->wrote_header) { + ret = shar_printf(a, "#!/bin/sh\n"); + if (ret != ARCHIVE_OK) + return (ret); + ret = shar_printf(a, "# This is a shell archive\n"); + if (ret != ARCHIVE_OK) + return (ret); + shar->wrote_header = 1; + } + + /* Save the entry for the closing. */ + if (shar->entry) + archive_entry_free(shar->entry); + shar->entry = archive_entry_clone(entry); + name = archive_entry_pathname(entry); + st = archive_entry_stat(entry); + + /* Handle some preparatory issues. */ + switch(st->st_mode & S_IFMT) { + case S_IFREG: + /* Only regular files have non-zero size. */ + break; + case S_IFDIR: + archive_entry_set_size(entry, 0); + /* Don't bother trying to recreate '.' */ + if (strcmp(name, ".") == 0 || strcmp(name, "./") == 0) + return (ARCHIVE_OK); + break; + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + /* All other file types have zero size in the archive. */ + archive_entry_set_size(entry, 0); + break; + default: + archive_entry_set_size(entry, 0); + if (archive_entry_hardlink(entry) == NULL && + archive_entry_symlink(entry) == NULL) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "shar format cannot archive this"); + return (ARCHIVE_WARN); + } + } + + /* Stock preparation for all file types. */ + ret = shar_printf(a, "echo x %s\n", name); + if (ret != ARCHIVE_OK) + return (ret); + + if (!S_ISDIR(st->st_mode)) { + /* Try to create the dir. */ + p = strdup(name); + pp = strrchr(p, '/'); + /* If there is a / character, try to create the dir. */ + if (pp != NULL) { + *pp = '\0'; + + /* Try to avoid a lot of redundant mkdir commands. */ + if (strcmp(p, ".") == 0) { + /* Don't try to "mkdir ." */ + } else if (shar->last_dir == NULL) { + ret = shar_printf(a, + "mkdir -p %s > /dev/null 2>&1\n", p); + if (ret != ARCHIVE_OK) + return (ret); + shar->last_dir = p; + } else if (strcmp(p, shar->last_dir) == 0) { + /* We've already created this exact dir. */ + free(p); + } else if (strlen(p) < strlen(shar->last_dir) && + strncmp(p, shar->last_dir, strlen(p)) == 0) { + /* We've already created a subdir. */ + free(p); + } else { + ret = shar_printf(a, + "mkdir -p %s > /dev/null 2>&1\n", p); + if (ret != ARCHIVE_OK) + return (ret); + free(shar->last_dir); + shar->last_dir = p; + } + } + } + + /* Handle file-type specific issues. */ + shar->has_data = 0; + if ((linkname = archive_entry_hardlink(entry)) != NULL) { + ret = shar_printf(a, "ln -f %s %s\n", linkname, name); + if (ret != ARCHIVE_OK) + return (ret); + } else if ((linkname = archive_entry_symlink(entry)) != NULL) { + ret = shar_printf(a, "ln -fs %s %s\n", linkname, name); + if (ret != ARCHIVE_OK) + return (ret); + } else { + switch(st->st_mode & S_IFMT) { + case S_IFREG: + if (archive_entry_size(entry) == 0) { + /* More portable than "touch." */ + ret = shar_printf(a, "test -e \"%s\" || :> \"%s\"\n", name, name); + if (ret != ARCHIVE_OK) + return (ret); + } else { + if (shar->dump) { + ret = shar_printf(a, + "uudecode -o %s << 'SHAR_END'\n", + name); + if (ret != ARCHIVE_OK) + return (ret); + ret = shar_printf(a, "begin %o %s\n", + archive_entry_mode(entry) & 0777, + name); + if (ret != ARCHIVE_OK) + return (ret); + } else { + ret = shar_printf(a, + "sed 's/^X//' > %s << 'SHAR_END'\n", + name); + if (ret != ARCHIVE_OK) + return (ret); + } + shar->has_data = 1; + shar->end_of_line = 1; + shar->outpos = 0; + shar->outbytes = 0; + } + break; + case S_IFDIR: + ret = shar_printf(a, "mkdir -p %s > /dev/null 2>&1\n", + name); + if (ret != ARCHIVE_OK) + return (ret); + /* Record that we just created this directory. */ + if (shar->last_dir != NULL) + free(shar->last_dir); + + shar->last_dir = strdup(name); + /* Trim a trailing '/'. */ + pp = strrchr(shar->last_dir, '/'); + if (pp != NULL && pp[1] == '\0') + *pp = '\0'; + /* + * TODO: Put dir name/mode on a list to be fixed + * up at end of archive. + */ + break; + case S_IFIFO: + ret = shar_printf(a, "mkfifo %s\n", name); + if (ret != ARCHIVE_OK) + return (ret); + break; + case S_IFCHR: + ret = shar_printf(a, "mknod %s c %d %d\n", name, + archive_entry_rdevmajor(entry), + archive_entry_rdevminor(entry)); + if (ret != ARCHIVE_OK) + return (ret); + break; + case S_IFBLK: + ret = shar_printf(a, "mknod %s b %d %d\n", name, + archive_entry_rdevmajor(entry), + archive_entry_rdevminor(entry)); + if (ret != ARCHIVE_OK) + return (ret); + break; + default: + return (ARCHIVE_WARN); + } + } + + return (ARCHIVE_OK); +} + +/* XXX TODO: This could be more efficient XXX */ +static int +archive_write_shar_data_sed(struct archive *a, const void *buff, size_t n) +{ + struct shar *shar; + const char *src; + int ret; + + shar = a->format_data; + if (!shar->has_data) + return (0); + + src = buff; + ret = ARCHIVE_OK; + shar->outpos = 0; + while (n-- > 0) { + if (shar->end_of_line) { + shar->outbuff[shar->outpos++] = 'X'; + shar->end_of_line = 0; + } + if (*src == '\n') + shar->end_of_line = 1; + shar->outbuff[shar->outpos++] = *src++; + + if (shar->outpos > sizeof(shar->outbuff) - 2) { + ret = (a->compression_write)(a, shar->outbuff, + shar->outpos); + if (ret != ARCHIVE_OK) + return (ret); + shar->outpos = 0; + } + } + + if (shar->outpos > 0) + ret = (a->compression_write)(a, shar->outbuff, shar->outpos); + return (ret); +} + +#define UUENC(c) (((c)!=0) ? ((c) & 077) + ' ': '`') + +/* XXX This could be a lot more efficient. XXX */ +static void +uuencode_group(struct shar *shar) +{ + int t; + + t = 0; + if (shar->uuavail > 0) + t = 0xff0000 & (shar->uubuffer[0] << 16); + if (shar->uuavail > 1) + t |= 0x00ff00 & (shar->uubuffer[1] << 8); + if (shar->uuavail > 2) + t |= 0x0000ff & (shar->uubuffer[2]); + shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t>>18) ); + shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t>>12) ); + shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t>>6) ); + shar->outbuff[shar->outpos++] = UUENC( 0x3f & (t) ); + shar->uuavail = 0; + shar->outbytes += shar->uuavail; + shar->outbuff[shar->outpos] = 0; +} + +static int +archive_write_shar_data_uuencode(struct archive *a, const void *buff, + size_t length) +{ + struct shar *shar; + const char *src; + size_t n; + int ret; + + shar = a->format_data; + if (!shar->has_data) + return (ARCHIVE_OK); + src = buff; + n = length; + while (n-- > 0) { + if (shar->uuavail == 3) + uuencode_group(shar); + if (shar->outpos >= 60) { + ret = shar_printf(a, "%c%s\n", UUENC(shar->outbytes), + shar->outbuff); + if (ret != ARCHIVE_OK) + return (ret); + shar->outpos = 0; + shar->outbytes = 0; + } + + shar->uubuffer[shar->uuavail++] = *src++; + shar->outbytes++; + } + return (ARCHIVE_OK); +} + +static int +archive_write_shar_finish_entry(struct archive *a) +{ + const char *g, *p, *u; + struct shar *shar; + int ret; + + shar = a->format_data; + if (shar->entry == NULL) + return (0); + + if (shar->dump) { + /* Finish uuencoded data. */ + if (shar->has_data) { + if (shar->uuavail > 0) + uuencode_group(shar); + if (shar->outpos > 0) { + ret = shar_printf(a, "%c%s\n", + UUENC(shar->outbytes), shar->outbuff); + if (ret != ARCHIVE_OK) + return (ret); + shar->outpos = 0; + shar->uuavail = 0; + shar->outbytes = 0; + } + ret = shar_printf(a, "%c\n", UUENC(0)); + if (ret != ARCHIVE_OK) + return (ret); + ret = shar_printf(a, "end\n", UUENC(0)); + if (ret != ARCHIVE_OK) + return (ret); + ret = shar_printf(a, "SHAR_END\n"); + if (ret != ARCHIVE_OK) + return (ret); + } + /* Restore file mode, owner, flags. */ + /* + * TODO: Don't immediately restore mode for + * directories; defer that to end of script. + */ + ret = shar_printf(a, "chmod %o %s\n", + archive_entry_mode(shar->entry) & 07777, + archive_entry_pathname(shar->entry)); + if (ret != ARCHIVE_OK) + return (ret); + + u = archive_entry_uname(shar->entry); + g = archive_entry_gname(shar->entry); + if (u != NULL || g != NULL) { + ret = shar_printf(a, "chown %s%s%s %s\n", + (u != NULL) ? u : "", + (g != NULL) ? ":" : "", (g != NULL) ? g : "", + archive_entry_pathname(shar->entry)); + if (ret != ARCHIVE_OK) + return (ret); + } + + if ((p = archive_entry_fflags_text(shar->entry)) != NULL) { + ret = shar_printf(a, "chflags %s %s\n", p, + archive_entry_pathname(shar->entry)); + if (ret != ARCHIVE_OK) + return (ret); + } + + /* TODO: restore ACLs */ + + } else { + if (shar->has_data) { + /* Finish sed-encoded data: ensure last line ends. */ + if (!shar->end_of_line) { + ret = shar_printf(a, "\n"); + if (ret != ARCHIVE_OK) + return (ret); + } + ret = shar_printf(a, "SHAR_END\n"); + if (ret != ARCHIVE_OK) + return (ret); + } + } + + archive_entry_free(shar->entry); + shar->entry = NULL; + return (0); +} + +static int +archive_write_shar_finish(struct archive *a) +{ + struct shar *shar; + int ret; + + /* + * TODO: Accumulate list of directory names/modes and + * fix them all up at end-of-archive. + */ + + shar = a->format_data; + + /* + * Only write the end-of-archive markers if the archive was + * actually started. This avoids problems if someone sets + * shar format, then sets another format (which would invoke + * shar_finish to free the format-specific data). + */ + if (shar->wrote_header) { + ret = shar_printf(a, "exit\n"); + if (ret != ARCHIVE_OK) + return (ret); + /* Shar output is never padded. */ + archive_write_set_bytes_in_last_block(a, 1); + /* + * TODO: shar should also suppress padding of + * uncompressed data within gzip/bzip2 streams. + */ + } + if (shar->entry != NULL) + archive_entry_free(shar->entry); + if (shar->last_dir != NULL) + free(shar->last_dir); + archive_string_free(&(shar->work)); + free(shar); + a->format_data = NULL; + return (ARCHIVE_OK); +} diff --git a/lib/libarchive/archive_write_set_format_ustar.c b/lib/libarchive/archive_write_set_format_ustar.c new file mode 100644 index 0000000..95006e8 --- /dev/null +++ b/lib/libarchive/archive_write_set_format_ustar.c @@ -0,0 +1,498 @@ +/*- + * Copyright (c) 2003-2004 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" +__FBSDID("$FreeBSD$"); + +#include <sys/stat.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#else +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif +#endif +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "archive.h" +#include "archive_entry.h" +#include "archive_private.h" + +struct ustar { + uint64_t entry_bytes_remaining; + uint64_t entry_padding; + char written; +}; + +/* + * Define structure of POSIX 'ustar' tar header. + */ +struct archive_entry_header_ustar { + char name[100]; + char mode[6]; + char mode_padding[2]; + char uid[6]; + char uid_padding[2]; + char gid[6]; + char gid_padding[2]; + char size[11]; + char size_padding[1]; + char mtime[11]; + char mtime_padding[1]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; + char magic[6]; /* For POSIX: "ustar\0" */ + char version[2]; /* For POSIX: "00" */ + char uname[32]; + char gname[32]; + char rdevmajor[6]; + char rdevmajor_padding[2]; + char rdevminor[6]; + char rdevminor_padding[2]; + char prefix[155]; + char padding[12]; +}; + +/* + * A filled-in copy of the header for initialization. + */ +static const struct archive_entry_header_ustar template_header = { + { "" }, /* name */ + { "000000" }, { ' ', '\0' }, /* mode, space-null termination. */ + { "000000" }, { ' ', '\0' }, /* uid, space-null termination. */ + { "000000" }, { ' ', '\0' }, /* gid, space-null termination. */ + { "00000000000" }, { ' ' }, /* size, space termination. */ + { "00000000000" }, { ' ' }, /* mtime, space termination. */ + { " " }, /* Initial checksum value. */ + { '0' }, /* default: regular file */ + { "" }, /* linkname */ + { "ustar" }, /* magic */ + { '0', '0' }, /* version */ + { "" }, /* uname */ + { "" }, /* gname */ + { "000000" }, { ' ', '\0' }, /* rdevmajor, space-null termination */ + { "000000" }, { ' ', '\0' }, /* rdevminor, space-null termination */ + { "" }, /* prefix */ + { "" } /* padding */ +}; + +static int archive_write_ustar_data(struct archive *a, const void *buff, + size_t s); +static int archive_write_ustar_finish(struct archive *); +static int archive_write_ustar_finish_entry(struct archive *); +static int archive_write_ustar_header(struct archive *, + struct archive_entry *entry); +static int format_256(int64_t, char *, int); +static int format_number(int64_t, char *, int size, int max, int strict); +static int format_octal(int64_t, char *, int); +static int write_nulls(struct archive *a, size_t); + +/* + * Set output format to 'ustar' format. + */ +int +archive_write_set_format_ustar(struct archive *a) +{ + struct ustar *ustar; + + /* If someone else was already registered, unregister them. */ + if (a->format_finish != NULL) + (a->format_finish)(a); + + ustar = malloc(sizeof(*ustar)); + if (ustar == NULL) { + archive_set_error(a, ENOMEM, "Can't allocate ustar data"); + return (ARCHIVE_FATAL); + } + memset(ustar, 0, sizeof(*ustar)); + a->format_data = ustar; + + a->pad_uncompressed = 1; /* Mimic gtar in this respect. */ + a->format_write_header = archive_write_ustar_header; + a->format_write_data = archive_write_ustar_data; + a->format_finish = archive_write_ustar_finish; + a->format_finish_entry = archive_write_ustar_finish_entry; + a->archive_format = ARCHIVE_FORMAT_TAR_USTAR; + a->archive_format_name = "POSIX ustar"; + return (ARCHIVE_OK); +} + +static int +archive_write_ustar_header(struct archive *a, struct archive_entry *entry) +{ + char buff[512]; + int ret; + struct ustar *ustar; + + ustar = a->format_data; + ustar->written = 1; + + /* Only regular files (not hardlinks) have data. */ + if (archive_entry_hardlink(entry) != NULL || + archive_entry_symlink(entry) != NULL || + !S_ISREG(archive_entry_mode(entry))) + archive_entry_set_size(entry, 0); + + ret = __archive_write_format_header_ustar(a, buff, entry, -1, 1); + if (ret != ARCHIVE_OK) + return (ret); + ret = (a->compression_write)(a, buff, 512); + if (ret != ARCHIVE_OK) + return (ret); + + ustar->entry_bytes_remaining = archive_entry_size(entry); + ustar->entry_padding = 0x1ff & (- ustar->entry_bytes_remaining); + return (ARCHIVE_OK); +} + +/* + * Format a basic 512-byte "ustar" header. + * + * Returns -1 if format failed (due to field overflow). + * Note that this always formats as much of the header as possible. + * If "strict" is set to zero, it will extend numeric fields as + * necessary (overwriting terminators or using base-256 extensions). + * + * This is exported so that other 'tar' formats can use it. + */ +int +__archive_write_format_header_ustar(struct archive *a, char buff[512], + struct archive_entry *entry, int tartype, int strict) +{ + unsigned int checksum; + struct archive_entry_header_ustar *h; + int i, ret; + size_t copy_length; + const char *p, *pp; + const struct stat *st; + int mytartype; + + ret = 0; + mytartype = -1; + /* + * The "template header" already includes the "ustar" + * signature, various end-of-field markers and other required + * elements. + */ + memcpy(buff, &template_header, 512); + + h = (struct archive_entry_header_ustar *)buff; + + /* + * Because the block is already null-filled, and strings + * are allowed to exactly fill their destination (without null), + * I use memcpy(dest, src, strlen()) here a lot to copy strings. + */ + + pp = archive_entry_pathname(entry); + if (strlen(pp) <= sizeof(h->name)) + memcpy(h->name, pp, strlen(pp)); + else { + /* Store in two pieces, splitting at a '/'. */ + p = strchr(pp + strlen(pp) - sizeof(h->name) - 1, '/'); + /* + * If there is no path separator, or the prefix or + * remaining name are too large, return an error. + */ + if (!p) { + archive_set_error(a, ENAMETOOLONG, + "Pathname too long"); + ret = ARCHIVE_WARN; + } else if (p > pp + sizeof(h->prefix)) { + archive_set_error(a, ENAMETOOLONG, + "Pathname too long"); + ret = ARCHIVE_WARN; + } else { + /* Copy prefix and remainder to appropriate places */ + memcpy(h->prefix, pp, p - pp); + memcpy(h->name, p + 1, pp + strlen(pp) - p - 1); + } + } + + p = archive_entry_hardlink(entry); + if (p != NULL) + mytartype = '1'; + else + p = archive_entry_symlink(entry); + if (p != NULL && p[0] != '\0') { + copy_length = strlen(p); + if (copy_length > sizeof(h->linkname)) { + archive_set_error(a, ENAMETOOLONG, + "Link contents too long"); + ret = ARCHIVE_WARN; + copy_length = sizeof(h->linkname); + } + memcpy(h->linkname, p, copy_length); + } + + p = archive_entry_uname(entry); + if (p != NULL && p[0] != '\0') { + copy_length = strlen(p); + if (copy_length > sizeof(h->uname)) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Username too long"); + ret = ARCHIVE_WARN; + copy_length = sizeof(h->uname); + } + memcpy(h->uname, p, copy_length); + } + + p = archive_entry_gname(entry); + if (p != NULL && p[0] != '\0') { + copy_length = strlen(p); + if (strlen(p) > sizeof(h->gname)) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Group name too long"); + ret = ARCHIVE_WARN; + copy_length = sizeof(h->gname); + } + memcpy(h->gname, p, copy_length); + } + + st = archive_entry_stat(entry); + + if (format_number(st->st_mode & 07777, h->mode, sizeof(h->mode), 8, strict)) { + archive_set_error(a, ERANGE, "Numeric mode too large"); + ret = ARCHIVE_WARN; + } + + if (format_number(st->st_uid, h->uid, sizeof(h->uid), 8, strict)) { + archive_set_error(a, ERANGE, "Numeric user ID too large"); + ret = ARCHIVE_WARN; + } + + if (format_number(st->st_gid, h->gid, sizeof(h->gid), 8, strict)) { + archive_set_error(a, ERANGE, "Numeric group ID too large"); + ret = ARCHIVE_WARN; + } + + if (format_number(st->st_size, h->size, sizeof(h->size), 12, strict)) { + archive_set_error(a, ERANGE, "File size out of range"); + ret = ARCHIVE_WARN; + } + + if (format_number(st->st_mtime, h->mtime, sizeof(h->mtime), 12, strict)) { + archive_set_error(a, ERANGE, + "File modification time too large"); + ret = ARCHIVE_WARN; + } + + if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) { + if (format_number(major(st->st_rdev), h->rdevmajor, + sizeof(h->rdevmajor), 8, strict)) { + archive_set_error(a, ERANGE, + "Major device number too large"); + ret = ARCHIVE_WARN; + } + + if (format_number(minor(st->st_rdev), h->rdevminor, + sizeof(h->rdevminor), 8, strict)) { + archive_set_error(a, ERANGE, + "Minor device number too large"); + ret = ARCHIVE_WARN; + } + } + + if (tartype >= 0) { + h->typeflag[0] = tartype; + } else if (mytartype >= 0) { + h->typeflag[0] = mytartype; + } else { + switch (st->st_mode & S_IFMT) { + case S_IFREG: h->typeflag[0] = '0' ; break; + case S_IFLNK: h->typeflag[0] = '2' ; break; + case S_IFCHR: h->typeflag[0] = '3' ; break; + case S_IFBLK: h->typeflag[0] = '4' ; break; + case S_IFDIR: h->typeflag[0] = '5' ; break; + case S_IFIFO: h->typeflag[0] = '6' ; break; + case S_IFSOCK: + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "tar format cannot archive socket"); + ret = ARCHIVE_WARN; + break; + default: + archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, + "tar format cannot archive this (mode=0%lo)", + (unsigned long)st->st_mode); + ret = ARCHIVE_WARN; + } + } + + checksum = 0; + for (i = 0; i < 512; i++) + checksum += 255 & (unsigned int)buff[i]; + h->checksum[6] = '\0'; /* Can't be pre-set in the template. */ + /* h->checksum[7] = ' '; */ /* This is pre-set in the template. */ + format_octal(checksum, h->checksum, 6); + return (ret); +} + +/* + * Format a number into a field, with some intelligence. + */ +static int +format_number(int64_t v, char *p, int s, int maxsize, int strict) +{ + int64_t limit; + + limit = ((int64_t)1 << (s*3)); + + /* "Strict" only permits octal values with proper termination. */ + if (strict) + return (format_octal(v, p, s)); + + /* + * In non-strict mode, we allow the number to overwrite one or + * more bytes of the field termination. Even old tar + * implementations should be able to handle this with no + * problem. + */ + if (v >= 0) { + while (s <= maxsize) { + if (v < limit) + return (format_octal(v, p, s)); + s++; + limit <<= 3; + } + } + + /* Base-256 can handle any number, positive or negative. */ + return (format_256(v, p, maxsize)); +} + +/* + * Format a number into the specified field using base-256. + */ +static int +format_256(int64_t v, char *p, int s) +{ + p += s; + while (s-- > 0) { + *--p = (char)(v & 0xff); + v >>= 8; + } + *p |= 0x80; /* Set the base-256 marker bit. */ + return (0); +} + +/* + * Format a number into the specified field. + */ +static int +format_octal(int64_t v, char *p, int s) +{ + int len; + + len = s; + + /* Octal values can't be negative, so use 0. */ + if (v < 0) { + while (len-- > 0) + *p++ = '0'; + return (-1); + } + + p += s; /* Start at the end and work backwards. */ + while (s-- > 0) { + *--p = '0' + (v & 7); + v >>= 3; + } + + if (v == 0) + return (0); + + /* If it overflowed, fill field with max value. */ + while (len-- > 0) + *p++ = '7'; + + return (-1); +} + +static int +archive_write_ustar_finish(struct archive *a) +{ + struct ustar *ustar; + int r; + + r = ARCHIVE_OK; + ustar = a->format_data; + /* + * Suppress end-of-archive if nothing else was ever written. + * This fixes a problem where setting one format, then another + * ends up writing a gratuitous end-of-archive marker. + */ + if (ustar->written && a->compression_write != NULL) + r = write_nulls(a, 512*2); + free(ustar); + a->format_data = NULL; + return (r); +} + +static int +archive_write_ustar_finish_entry(struct archive *a) +{ + struct ustar *ustar; + int ret; + + ustar = a->format_data; + ret = write_nulls(a, + ustar->entry_bytes_remaining + ustar->entry_padding); + ustar->entry_bytes_remaining = ustar->entry_padding = 0; + return (ret); +} + +static int +write_nulls(struct archive *a, size_t padding) +{ + int ret, to_write; + + while (padding > 0) { + to_write = padding < a->null_length ? padding : a->null_length; + ret = (a->compression_write)(a, a->nulls, to_write); + if (ret != ARCHIVE_OK) + return (ret); + padding -= to_write; + } + return (ARCHIVE_OK); +} + +static int +archive_write_ustar_data(struct archive *a, const void *buff, size_t s) +{ + struct ustar *ustar; + int ret; + + ustar = a->format_data; + if (s > ustar->entry_bytes_remaining) + s = ustar->entry_bytes_remaining; + ret = (a->compression_write)(a, buff, s); + ustar->entry_bytes_remaining -= s; + return (ret); +} diff --git a/lib/libarchive/libarchive-formats.5 b/lib/libarchive/libarchive-formats.5 new file mode 100644 index 0000000..8dc2d2a --- /dev/null +++ b/lib/libarchive/libarchive-formats.5 @@ -0,0 +1,255 @@ +.\" Copyright (c) 2003-2004 Tim Kientzle +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd April 27, 2004 +.Dt libarchive-formats 3 +.Os +.Sh NAME +.Nm libarchive-formats +.Nd archive formats supported by the libarchive library +.Sh DESCRIPTION +The +.Xr libarchive 3 +library reads and writes a variety of streaming archive formats. +Generally speaking, all of these archive formats consist of a series of +.Dq entries . +Each entry stores a single file system object, such as a file, directory, +or symbolic link. +.Pp +The following provides a brief description of each format supported +by libarchive, with some information about recognized extensions or +limitations of the current library support. +Note that just because a format is supported by libarchive does not +imply that a program that uses libarchive will support that format. +Applications that use libarchive specify which formats they wish +to support. +.Ss Tar Formats +The +.Xr libarchive 3 +library can read most tar archives. +However, it only writes POSIX-standard +.Dq ustar +and +.Dq pax interchange +formats. +.Pp +All tar formats store each entry in one or more 512-byte records. +The first record is used for file metadata, including filename, +timestamp, and mode information, and the file data is stored in +subsequent records. +Later variants have extended this by either appropriating undefined +areas of the header record, extending the header to multiple records, +or by storing special entries that modify the interpretation of +subsequent entries. +.Pp +.Bl -tag -width indent +.It Cm gnutar +The +.Xr libarchive 3 +library can read GNU-format tar archives. +It currently supports the most popular GNU extensions, including +modern long filename and linkname support, as well as atime and ctime data. +The libarchive library does not support multi-volume +archives, nor the old GNU long filename format. +.It Cm pax +The +.Xr libarchive 3 +library can read and write POSIX-compliant pax interchange format +archives. +Pax interchange format archives are an extension of the older ustar +format that adds a separate entry with additional attributes stored +as key/value pairs. +The presence of this additional entry is the only difference between +pax interchange format and the older ustar format. +The extended attributes are of unlimited length and are stored +as UTF-8 Unicode strings. +Keywords defined in the standard are in all lowercase; vendors are allowed +to define custom keys by preceding them with the vendor name in all uppercase. +When writing pax archives, libarchive uses many of the SCHILY keys +defined by Joerg Schilling's +.Dq star +archiver. +The libarchive library can read most of the SCHILY keys. +It ignores any keywords that it does not understand. +.It Cm restricted pax +The libarchive library can also write pax archives in which it +attempts to suppress the extended attributes entry whenever +possible. +The result will be identical to a ustar archive unless the +extended attributes entry is required to store a long file +name, long linkname, extended ACL, file flags, or if any of the standard +ustar data (user name, group name, UID, GID, etc) cannot be fully +represented in the ustar header. +In all cases, the result can be dearchived by any program that +can read POSIX-compliant pax interchange format archives. +Programs that correctly read ustar format (see below) will also be +able to read this format; any extended attributes will be extracted as +separate files stored in +.Pa PaxHeader +directories. +.It Cm ustar +The libarchive library can both read and write this format. +This format has the following limitations: +.Bl -bullet -compact +.It +Device major and minor numbers are limited to 21 bits. +Nodes with larger numbers will not be added to the archive. +.It +Path names in the archive are limited to 255 bytes. +(Shorter if there is no / character in exactly the right place.) +.It +Symbolic links and hard links are stored in the archive with +the name of the referenced file. +This name is limited to 100 bytes. +.It +Extended attributes, file flags, and other extended +security information cannot be stored. +.It +Archive entries are limited to 2 gigabytes in size. +.El +Note that the pax interchange format has none of these restrictions. +.El +.Pp +The libarchive library can also read a variety of commonly-used extensions to +the basic tar format. +In particular, it supports base-256 values in certain numeric fields. +This essentially removes the limitations on file size, modification time, +and device numbers. +.Pp +The first tar program appeared in Sixth Edition Unix (circa 1976). +This makes the tar format one of the oldest and most widely-supported +archive formats. +The first official standard for the tar file format was the +.Dq ustar +(Unix Standard Tar) format defined by POSIX in 1988. +POSIX.1-2001 extended the ustar format to create the +.Dq pax interchange +format. +There have also been many custom variations. +.Ss Cpio Formats +The libarchive library can read a number of common cpio variants and can write +.Dq odc +format archives. +A cpio archive stores each entry as a fixed-size header followed +by a variable-length filename and variable-length data. +Unlike tar, cpio does only minimal padding of the header or file data. +There are a variety of cpio formats, which differ primarily in +how they store the initial header: some store the values as +octal or hexadecimal numbers in ASCII, others as binary values of +varying byte order and length. +.Bl -tag -width indent +.It Cm binary +The libarchive library can read both big-endian and little-endian +variants of the original binary cpio format. +This format used 32-bit binary values for file size and mtime, +and 16-bit binary values for the other fields. +.It Cm odc +The libarchive library can both read and write this +POSIX-standard format. +This format stores the header contents as octal values in ASCII. +It is standard, portable, and immune from byte-order confusion. +File sizes and mtime are limited to 33 bits (8GB file size), +other fields are limited to 18 bits. +.It Cm SVR4 +The libarchive library can read both CRC and non-CRC variants of +this format. +The SVR4 format uses eight-digit hexadecimal values for +all header fields. +This limits file size to 4GB, and also limits the mtime and +other fields to 32 bits. +The SVR4 format can optionally include a CRC of the file +contents, although libarchive does not currently verify this CRC. +.El +.Pp +Cpio is an old format that was widely used because of its simplicity +and its support for very long filenames. +Unfortunately, it has many limitations that make it unsuitable +for widespread use. +Only the POSIX format permits files over 4GB, and its 18-bit +limit for most other fields makes it unsuitable for modern systems. +In addition, cpio formats only store numeric UID/GID values (not +usernames and group names), which can make it very difficult to correctly +transfer archives across systems. +.Ss Shar Formats +A +.Dq shell archive +is a shell script that, when executed on a POSIX-compliant +system, will recreate a collection of file system objects. +The libarchive library can write two different kinds of shar archives: +.Bl -tag -width indent +.It Cm shar +The traditional shar format uses a limited set of POSIX +commands, including +.Xr echo 1 , +.Xr mkdir 1 , +and +.Xr sed 1 . +It is suitable for portably archiving small collections of plain text files. +However, it is not generally well-suited for large archives +(many implementations of +.Xr sh 1 +have limits on the size of a script) nor should it be used with non-text files. +.It Cm shardump +This format is similar to shar but encodes files using +.Xr uuencode 1 +so that the result will be a plain text file regardless of the file contents. +It also includes additional shell commands that attempt to reproduce as +many file attributes as possible, including owner, mode, and flags. +The additional commands used to restore file attributes make +shardump archives less portable than plain shar archives. +.El +.Ss ISO9660 format +Libarchive can read and extract from files containing ISO9660-compliant +CDROM images. +It also has partial support for Rockridge extensions. +In many cases, this can remove the need to burn a physical CDROM. +It also avoids security and complexity issues that come with +virtual mounts and loopback devices. +.Ss Zip format +Libarchive can extract from most zip format archives. +It currently only supports uncompressed entries and entries +compressed with the +.Dq deflate +algorithm. +Older zip compression algorithms are not supported. +.Ss Tp Formats +The libarchive library has experimental support for tp format, +which was used in Fourth Edition through Sixth Edition Unix. +(It was supplanted by tar in Seventh Edition Unix.) +There were several distinct variants of this format; libarchive +supports the original tp format and the itp variant. +Currently, tp format support is not enabled by +.Fn archive_read_support_format_all , +it must be explicitly enabled by calling +.Fn archive_read_support_format_tp . +.Sh SEE ALSO +.Xr cpio 1 , +.Xr mkisofs 1 , +.Xr shar 1 , +.Xr tar 1 , +.Xr zip 1 , +.Xr zlib 3 , +.Xr tar 5 diff --git a/lib/libarchive/libarchive.3 b/lib/libarchive/libarchive.3 new file mode 100644 index 0000000..5c8976d --- /dev/null +++ b/lib/libarchive/libarchive.3 @@ -0,0 +1,317 @@ +.\" Copyright (c) 2003-2005 Tim Kientzle +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 8, 2005 +.Dt LIBARCHIVE 3 +.Os +.Sh NAME +.Nm libarchive +.Nd functions for reading and writing streaming archives +.Sh LIBRARY +.Lb libarchive +.Sh OVERVIEW +The +.Nm +library provides a flexible interface for reading and writing +streaming archive files such as tar and cpio. +The library is inherently stream-oriented; readers serially iterate through +the archive, writers serially add things to the archive. +In particular, note that there is no built-in support for +random access nor for in-place modification. +.Pp +When reading an archive, the library automatically detects the +format and the compression. +The library currently has read support for: +.Bl -bullet -compact +.It +old-style tar +.It +most variants of the POSIX +.Dq ustar +format, +.It +the POSIX +.Dq pax interchange +format, +.It +GNU-format tar archives, +.It +POSIX octet-oriented cpio archives. +.El +The library automatically detects archives compressed with +.Xr gzip 1 , +.Xr bzip2 1 , +or +.Xr compress 1 +and decompresses them transparently. +.Pp +When writing an archive, you can specify the compression +to be used and the format to use. +The library can write +.Bl -bullet -compact +.It +POSIX-standard +.Dq ustar +archives, +.It +POSIX +.Dq pax interchange format +archives, +.It +POSIX octet-oriented cpio archives, +.It +two different variants of shar archives. +.El +Pax interchange format is an extension of the tar archive format that +eliminates essentially all of the limitations of historic tar formats +in a standard fashion that is supported +by POSIX-compliant +.Xr pax 1 +implementations on many systems as well as several newer implementations of +.Xr tar 1 . +Note that the default write format will suppress the pax extended +attributes for most entries; explicitly requesting pax format will +enable those attributes for all entries. +.Pp +The read and write APIs are accessed through the +.Fn archive_read_XXX +functions and the +.Fn archive_write_XXX +functions, respectively, and either can be used independently +of the other. +.Pp +The rest of this manual page provides an overview of the library +operation. +More detailed information can be found in the individual manual +pages for each API or utility function. +.Sh READING AN ARCHIVE +To read an archive, you must first obtain an initialized +.Tn struct archive +object from +.Fn archive_read_new . +You can then modify this object for the desired operations with the +various +.Fn archive_read_set_XXX +and +.Fn archive_read_support_XXX +functions. +In particular, you will need to invoke appropriate +.Fn archive_read_support_XXX +functions to enable the corresponding compression and format +support. +Note that these latter functions perform two distinct operations: +they cause the corresponding support code to be linked into your +program, and they enable the corresponding auto-detect code. +Unless you have specific constraints, you will generally want +to invoke +.Fn archive_read_support_compression_all +and +.Fn archive_read_support_format_all +to enable auto-detect for all formats and compression types +currently supported by the library. +.Pp +Once you have prepared the +.Tn struct archive +object, you call +.Fn archive_read_open +to actually open the archive and prepare it for reading. +.Pp +Each archive entry consists of a header followed by a certain +amount of data. +You can obtain the next header with +.Fn archive_read_next_header , +which returns a pointer to an +.Tn struct archive_entry +structure with information about the current archive element. +If the entry is a regular file, then the header will be followed +by the file data. +You can use +.Fn archive_read_data +(which works much like the +.Xr read 2 +system call) +to read this data from the archive. +You may prefer to use the higher-level +.Fn archive_read_data_skip , +which reads and discards the data for this entry, +.Fn archive_read_data_to_buffer , +which reads the data into an in-memory buffer, +.Fn archive_read_data_to_file , +which copies the data to the provided file descriptor, or +.Fn archive_read_extract , +which recreates the specified entry on disk and copies data +from the archive. +In particular, note that +.Fn archive_read_extract +uses the +.Tn struct archive_entry +structure that you provide it, which may differ from the +entry just read from the archive. +In particular, many applications will want to override the +pathname, file permissions, or ownership. +.Pp +Once you have finished reading data from the archive, you +should call +.Fn archive_read_finish +to release all resources. +In particular, +.Fn archive_read_finish +closes the archive and frees any memory that was allocated by the library. +.Pp +The +.Xr archive_read 3 +manual page provides more detailed calling information for this API. +.Sh WRITING AN ARCHIVE +You use a similar process to write an archive. +The +.Fn archive_write_new +function creates an archive object useful for writing, +the various +.Fn archive_write_set_XXX +functions are used to set parameters for writing the archive, and +.Fn archive_write_open +completes the setup and opens the archive for writing. +.Pp +Individual archive entries are written in a three-step +process: +You first initialize a +.Tn struct archive_entry +structure with information about the new entry. +At a minimum, you should set the pathname of the +entry and provide a +.Va struct stat +with a valid +.Va st_mode +field, which specifies the type of object and +.Va st_size +field, which specifies the size of the data portion of the object. +The +.Fn archive_write_header +function actually writes the header data to the archive. +You can then use +.Fn archive_write_data +to write the actual data. +.Pp +After all entries have been written, use the +.Fn archive_write_finish +function to release all resources. +.Pp +The +.Xr archive_write 3 +manual page provides more detailed calling information for this API. +.Sh DESCRIPTION +Detailed descriptions of each function are provided by the +corresponding manual pages. +.Pp +All of the functions utilize an opaque +.Tn struct archive +datatype that provides access to the archive contents. +.Pp +The +.Tn struct archive_entry +structure contains a complete description of a single archive +entry. +It uses an opaque interface that is fully documented in +.Xr archive_entry 3 . +.Pp +Users familiar with historic formats should be aware that the newer +variants have eliminated most restrictions on the length of textual fields. +Clients should not assume that filenames, link names, user names, or +group names are limited in length. +In particular, pax interchange format can easily accomodate pathnames +in arbitrary character sets that exceed +.Va PATH_MAX . +.Sh RETURN VALUES +Most functions return zero on success, non-zero on error. +The return value indicates the general severity of the error, ranging +from +.Cm ARCHIVE_WARNING , +which indicates a minor problem that should probably be reported +to the user, to +.Cm ARCHIVE_FATAL , +which indicates a serious problem that will prevent any further +operations on this archive. +On error, the +.Fn archive_errno +function can be used to retrieve a numeric error code (see +.Xr errno 2 ) . +The +.Fn archive_error_string +returns a textual error message suitable for display. +.Pp +.Fn archive_read_new +and +.Fn archive_write_new +return pointers to an allocated and initialized +.Tn struct archive +object. +.Pp +.Fn archive_read_data +and +.Fn archive_write_data +return a count of the number of bytes actually read or written. +A value of zero indicates the end of the data for this entry. +A negative value indicates an error, in which case the +.Fn archive_errno +and +.Fn archive_error_string +functions can be used to obtain more information. +.Sh ENVIRONMENT +There are character set conversions within the +.Xr archive_entry 3 +functions that are impacted by the currently-selected locale. +.Sh SEE ALSO +.Xr tar 1 , +.Xr archive_entry 3 , +.Xr archive_read 3 , +.Xr archive_util 3 , +.Xr archive_write 3 , +.Xr tar 5 +.Sh HISTORY +The +.Nm libarchive +library first appeared in +.Fx 5.3 . +.Sh AUTHORS +.An -nosplit +The +.Nm libarchive +library was written by +.An Tim Kientzle Aq kientzle@acm.org . +.Sh BUGS +Some archive formats support information that is not supported by +.Tn struct archive_entry . +Such information cannot be fully archived or restored using this library. +This includes, for example, comments, character sets, +or the arbitrary key/value pairs that can appear in +pax interchange format archives. +.Pp +Conversely, of course, not all of the information that can be +stored in an +.Tn struct archive_entry +is supported by all formats. +For example, cpio formats do not support nanosecond timestamps; +old tar formats do not support large device numbers. diff --git a/lib/libarchive/tar.5 b/lib/libarchive/tar.5 new file mode 100644 index 0000000..242a3d0 --- /dev/null +++ b/lib/libarchive/tar.5 @@ -0,0 +1,730 @@ +.\" Copyright (c) 2003-2004 Tim Kientzle +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 20, 2004 +.Dt TAR 5 +.Os +.Sh NAME +.Nm tar +.Nd format of tape archive files +.Sh DESCRIPTION +The +.Nm +archive format collects any number of files, directories, and other +file system objects (symbolic links, device nodes, etc.) into a single +stream of bytes. +The format was originally designed to be used with +tape drives that operate with fixed-size blocks, but is widely used as +a general packaging mechanism. +.Ss General Format +A +.Nm +archive consists of a series of 512-byte records. +Each file system object requires a header record which stores basic metadata +(pathname, owner, permissions, etc.) and zero or more records containing any +file data. +The end of the archive is indicated by two records consisting +entirely of zero bytes. +.Pp +For compatibility with tape drives that use fixed block sizes, +programs that read or write tar files always read or write a fixed +number of records with each I/O operation. +These +.Dq blocks +are always a multiple of the record size. +The most common block size\(emand the maximum supported by historic +implementations\(emis 10240 bytes or 20 records. +(Note: the terms +.Dq block +and +.Dq record +here are not entirely standard; this document follows the +convention established by John Gilmore in documenting +.Nm pdtar . ) +.Ss Old-Style Archive Format +The original tar archive format has been extended many times to +include additional information that various implementors found +necessary. +This section describes the variant implemented by the tar command +included in +.At v7 , +which is one of the earliest widely-used versions of the tar program. +.Pp +The header record for an old-style +.Nm +archive consists of the following: +.Bd -literal -offset indent +struct header_old_tar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char linkflag[1]; + char linkname[100]; + char pad[255]; +}; +.Ed +All unused bytes in the header record are filled with nulls. +.Bl -tag -width indent +.It Va name +Pathname, stored as a null-terminated string. +Early tar implementations only stored regular files (including +hardlinks to those files). +One common early convention used a trailing "/" character to indicate +a directory name, allowing directory permissions and owner information +to be archived and restored. +.It Va mode +File mode, stored as an octal number in ASCII. +.It Va uid , Va gid +User id and group id of owner, as octal numbers in ASCII. +.It Va size +Size of file, as octal number in ASCII. +For regular files only, this indicates the amount of data +that follows the header. +In particular, this field was ignored by early tar implementations +when extracting hardlinks. +Modern writers should always store a zero length for hardlink entries. +.It Va mtime +Modification time of file, as an octal number in ASCII. +This indicates the number of seconds since the start of the epoch, +00:00:00 UTC January 1, 1970. +Note that negative values should be avoided +here, as they are handled inconsistently. +.It Va checksum +Header checksum, stored as an octal number in ASCII. +To compute the checksum, set the checksum field to all spaces, +then sum all bytes in the header using unsigned arithmetic. +This field should be stored as six octal digits followed by a null and a space +character. +Note that many early implementations of tar used signed arithmetic +for the checksum field, which can cause interoperability problems +when transferring archives between systems. +Modern robust readers compute the checksum both ways and accept the +header if either computation matches. +.It Va linkflag , Va linkname +In order to preserve hardlinks and conserve tape, a file +with multiple links is only written to the archive the first +time it is encountered. +The next time it is encountered, the +.Va linkflag +is set to an ASCII +.Sq 1 +and the +.Va linkname +field holds the first name under which this file appears. +(Note that regular files have a null value in the +.Va linkflag +field.) +.El +.Pp +Early tar implementations varied in how they terminated these fields. +The tar command in +.At v7 +used the following conventions (this is also documented in early BSD manpages): +the pathname must be null-terminated; +the mode, uid, and gid fields must end in a space and a null byte; +the size and mtime fields must end in a space; +the checksum is terminated by a null and a space. +Early implementations filled the numeric fields with leading spaces. +This seems to have been common practice until the +.St -p1003.1 +standard was released. +For best portability, modern implementations should fill the numeric +fields with leading zeros. +.Ss Pre-POSIX Archives +An early draft of +.St -p1003.1-88 +served as the basis for John Gilmore's +.Nm pdtar +program and many system implementations from the late 1980s +and early 1990s. +These archives generally follow the POSIX ustar +format described below with the following variations: +.Bl -bullet -compact -width indent +.It +The magic value is +.Dq ustar\ \& +(note the following space). +The version field contains a space character followed by a null. +.It +The numeric fields are generally filled with leading spaces +(not leading zeros as recommended in the final standard). +.It +The prefix field is often not used, limiting pathnames to +the 100 characters of old-style archives. +.El +.Ss POSIX ustar Archives +.St -p1003.1-88 +defined a standard tar file format to be read and written +by compliant implementations of +.Xr tar 1 +and +.Xr pax 1 . +This format is often called the +.Dq ustar +format, after the magic value used +in the header. +(The name is an acronym for +.Dq Unix Standard TAR . ) +It extends the historic format with new fields: +.Bd -literal -offset indent +struct header_posix_ustar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; + char pad[12]; +}; +.Ed +.Bl -tag -width indent +.It Va typeflag +Type of entry. +POSIX extended the earlier +.Va linkflag +field with several new type values: +.Bl -tag -width indent -compact +.It Dq 0 +Regular file. +NULL should be treated as a synonym, for compatibility purposes. +.It Dq 1 +Hard link. +.It Dq 2 +Symbolic link. +.It Dq 3 +Character device node. +.It Dq 4 +Block device node. +.It Dq 5 +Directory. +.It Dq 6 +FIFO node. +.It Dq 7 +Reserved. +.It Other +A POSIX-compliant implementation must treat any unrecognized typeflag value +as a regular file. +In particular, writers should ensure that all entries +have a valid filename so that they can be restored by readers that do not +support the corresponding extension. +Uppercase letters "A" through "Z" are reserved for custom extensions. +Note that sockets and whiteout entries are not archivable. +.El +It is worth noting that the +.Va size +field, in particular, has different meanings depending on the type. +For regular files, of course, it indicates the amount of data +following the header. +For directories, it may be used to indicate the total size of all +files in the directory, for use by operating systems that pre-allocate +directory space. +For all other types, it should be set to zero by writers and ignored +by readers. +.It Va magic +Contains the magic value +.Dq ustar +followed by a NULL byte to indicate that this is a POSIX standard archive. +Full compliance requires the uname and gname fields be properly set. +.It Va version +Version. +This should be +.Dq 00 +(two copies of the ASCII digit zero) for POSIX standard archives. +.It Va uname , Va gname +User and group names, as null-terminated ASCII strings. +These should be used in preference to the uid/gid values +when they are set and the corresponding names exist on +the system. +.It Va devmajor , Va devminor +Major and minor numbers for character device or block device entry. +.It Va prefix +First part of pathname. +If the pathname is too long to fit in the 100 bytes provided by the standard +format, it can be split at any +.Pa / +character with the first portion going here. +If the prefix field is not empty, the reader will prepend +the prefix value and a +.Pa / +character to the regular name field to obtain the full pathname. +.El +.Pp +Note that all unused bytes must be set to +.Dv NULL . +.Pp +Field termination is specified slightly differently by POSIX +than by previous implementations. +The +.Va magic , +.Va uname , +and +.Va gname +fields must have a trailing +.Dv NULL . +The +.Va pathname , +.Va linkname , +and +.Va prefix +fields must have a trailing +.Dv NULL +unless they fill the entire field. +(In particular, it is possible to store a 256-character pathname if it +happens to have a +.Pa / +as the 156th character.) +POSIX requires numeric fields to be zero-padded in the front, and allows +them to be terminated with either space or +.Dv NULL +characters. +.Pp +Currently, most tar implementations comply with the ustar +format, occasionally extending it by adding new fields to the +blank area at the end of the header record. +.Ss Pax Interchange Format +There are many attributes that cannot be portably stored in a +POSIX ustar archive. +.St -p1003.1-2001 +defined a +.Dq pax interchange format +that uses two new types of entries to hold text-formatted +metadata that applies to following entries. +Note that a pax interchange format archive is a ustar archive in every +respect. +The new data is stored in ustar-compatible archive entries that use the +.Dq x +or +.Dq g +typeflag. +In particular, older implementations that do not fully support these +extensions will extract the metadata into regular files, where the +metadata can be examined as necessary. +.Pp +An entry in a pax interchange format archive consists of one or +two standard ustar entries, each with its own header and data. +The first optional entry stores the extended attributes +for the following entry. +This optional first entry has an "x" typeflag and a size field that +indicates the total size of the extended attributes. +The extended attributes themselves are stored as a series of text-format +lines encoded in the portable UTF-8 encoding. +Each line consists of a decimal number, a space, a key string, an equals +sign, a value string, and a new line. +The decimal number indicates the length of the entire line, including the +initial length field and the trailing newline. +An example of such a field is: +.Dl 25 ctime=1084839148.1212\en +Keys in all lowercase are standard keys. +Vendors can add their own keys by prefixing them with an all uppercase +vendor name and a period. +Note that, unlike the historic header, numeric values are stored using +decimal, not octal. +A description of some common keys follows: +.Bl -tag -width indent +.It Cm atime , Cm ctime , Cm mtime +File access, inode change, and modification times. +These fields can be negative or include a decimal point and a fractional value. +.It Cm uname , Cm uid , Cm gname , Cm gid +User name, group name, and numeric UID and GID values. +The user name and group name stored here are encoded in UTF8 +and can thus include non-ASCII characters. +The UID and GID fields can be of arbitrary length. +.It Cm linkpath +The full path of the linked-to file. +Note that this is encoded in UTF8 and can thus include non-ASCII characters. +.It Cm path +The full pathname of the entry. +Note that this is encoded in UTF8 and can thus include non-ASCII characters. +.It Cm realtime.* , Cm security.* +These keys are reserved and may be used for future standardization. +.It Cm size +The size of the file. +Note that there is no length limit on this field, allowing conforming +archives to store files much larger than the historic 8GB limit. +.It Cm SCHILY.* +Vendor-specific attributes used by Joerg Schilling's +.Nm star +implementation. +.It Cm SCHILY.acl.access , Cm SCHILY.acl.default +Stores the access and default ACLs as textual strings in a format +that is an extension of the format specified by POSIX.1e draft 17. +In particular, each user or group access specification can include a fourth +colon-separated field with the numeric UID or GID. +This allows ACLs to be restored on systems that may not have complete +user or group information available (such as when NIS/YP or LDAP services +are temporarily unavailable). +.It Cm SCHILY.devminor , Cm SCHILY.devmajor +The full minor and major numbers for device nodes. +.It Cm SCHILY.dev, Cm SCHILY.ino , Cm SCHILY.nlinks +The device number, inode number, and link count for the entry. +In particular, note that a pax interchange format archive using Joerg +Schilling's +.Cm SCHILY.* +extensions can store all of the data from +.Va struct stat . +.It Cm LIBARCHIVE.xattr. Ns Ar namespace Ns . Ns Ar key +Libarchive stores POSIX.1e-style extended attributes using +keys of this form. The +.Ar key +value is URL-encoded: +All non-ASCII characters and the two special characters +.Dq = +and +.Dq % +are encoded as +.Dq % +followed by two uppercase hexadecimal digits. +The value of this key is the extended attribute value +encoded in base 64. +XXX Detail the base-64 format here XXX +.It Cm VENDOR.* +XXX document other vendor-specific extensions XXX +.El +.Pp +Any values stored in an extended attribute override the corresponding +values in the regular tar header. +Note that compliant readers should ignore the regular fields when they +are overridden. +This is important, as existing archivers are known to store non-compliant +values in the standard header fields in this situation. +There are no limits on length for any of these fields. +In particular, numeric fields can be arbitrarily large. +All text fields are encoded in UTF8. +Compliant writers should store only portable 7-bit ASCII characters in +the standard ustar header and use extended +attributes whenever a text value contains non-ASCII characters. +.Pp +In addition to the +.Cm x +entry described above, the pax interchange format +also supports a +.Cm g +entry. +The +.Cm g +entry is identical in format, but specifies attributes that serve as +defaults for all subsequent archive entries. +The +.Cm g +entry is not widely used. +.Pp +Besides the new +.Cm x +and +.Cm g +entries, the pax interchange format has a few other minor variations +from the earlier ustar format. +The most troubling one is that hardlinks are permitted to have +data following them. +This allows readers to restore any hardlink to a file without +having to rewind the archive to find an earlier entry. +However, it creates complications for robust readers, as it is no longer +clear whether or not they should ignore the size field for hardlink entries. +.Ss GNU Tar Archives +The GNU tar program started with a pre-POSIX format similar to that +described earlier and has extended it using several different mechanisms: +It added new fields to the empty space in the header (some of which was later +used by POSIX for conflicting purposes); +it allowed the header to be continued over multiple records; +and it defined new entries that modify following entries +(similar in principle to the +.Cm x +entry described above, but each GNU special entry is single-purpose, +unlike the general-purpose +.Cm x +entry). +As a result, GNU tar archives are not POSIX compatible, although +more lenient POSIX-compliant readers can successfully extract most +GNU tar archives. +.Bd -literal -offset indent +struct header_gnu_tar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char atime[12]; + char ctime[12]; + char offset[12]; + char longnames[4]; + char unused[1]; + struct { + char offset[12]; + char numbytes[12]; + } sparse[4]; + char isextended[1]; + char realsize[12]; + char pad[17]; +}; +.Ed +.Bl -tag -width indent +.It Va typeflag +GNU tar uses the following special entry types, in addition to +those defined by POSIX: +.Bl -tag -width indent +.It "7" +GNU tar treats type "7" records identically to type "0" records, +except on one obscure RTOS where they are used to indicate the +pre-allocation of a contiguous file on disk. +.It "D" +This indicates a directory entry. +Unlike the POSIX-standard "5" +typeflag, the header is followed by data records listing the names +of files in this directory. +Each name is preceded by an ASCII "Y" +if the file is stored in this archive or "N" if the file is not +stored in this archive. +Each name is terminated with a null, and +an extra null marks the end of the name list. +The purpose of this +entry is to support incremental backups; a program restoring from +such an archive may wish to delete files on disk that did not exist +in the directory when the archive was made. +.Pp +Note that the "D" typeflag specifically violates POSIX, which requires +that unrecognized typeflags be restored as normal files. +In this case, restoring the "D" entry as a file could interfere +with subsequent creation of the like-named directory. +.It "K" +The data for this entry is a long linkname for the following regular entry. +.It "L" +The data for this entry is a long pathname for the following regular entry. +.It "M" +This is a continuation of the last file on the previous volume. +GNU multi-volume archives guarantee that each volume begins with a valid +entry header. +To ensure this, a file may be split, with part stored at the end of one volume, +and part stored at the beginning of the next volume. +The "M" typeflag indicates that this entry continues an existing file. +Such entries can only occur as the first or second entry +in an archive (the latter only if the first entry is a volume label). +The +.Va size +field specifies the size of this entry. +The +.Va offset +field at bytes 369-380 specifies the offset where this file fragment +begins. +The +.Va realsize +field specifies the total size of the file (which must equal +.Va size +plus +.Va offset ) . +When extracting, GNU tar checks that the header file name is the one it is +expecting, that the header offset is in the correct sequence, and that +the sum of offset and size is equal to realsize. +FreeBSD's version of GNU tar does not handle the corner case of an +archive's being continued in the middle of a long name or other +extension header. +.It "N" +Type "N" records are no longer generated by GNU tar. +They contained a +list of files to be renamed or symlinked after extraction; this was +originally used to support long names. +The contents of this record +are a text description of the operations to be done, in the form +.Dq Rename %s to %s\en +or +.Dq Symlink %s to %s\en ; +in either case, both +filenames are escaped using K&R C syntax. +.It "S" +This is a +.Dq sparse +regular file. +Sparse files are stored as a series of fragments. +The header contains a list of fragment offset/length pairs. +If more than four such entries are required, the header is +extended as necessary with +.Dq extra +header extensions (an older format that is no longer used), or +.Dq sparse +extensions. +.It "V" +The +.Va name +field should be interpreted as a tape/volume header name. +This entry should generally be ignored on extraction. +.El +.It Va magic +The magic field holds the five characters +.Dq ustar +followed by a space. +Note that POSIX ustar archives have a trailing null. +.It Va version +The version field holds a space character followed by a null. +Note that POSIX ustar archives use two copies of the ASCII digit +.Dq 0 . +.It Va atime , Va ctime +The time the file was last accessed and the time of +last change of file information, stored in octal as with +.Va mtime . +.It Va longnames +This field is apparently no longer used. +.It Sparse Va offset / Va numbytes +Each such structure specifies a single fragment of a sparse +file. +The two fields store values as octal numbers. +The fragments are each padded to a multiple of 512 bytes +in the archive. +On extraction, the list of fragments is collected from the +header (including any extension headers), and the data +is then read and written to the file at appropriate offsets. +.It Va isextended +If this is set to non-zero, the header will be followed by additional +.Dq sparse header +records. +Each such record contains information about as many as 21 additional +sparse blocks as shown here: +.Bd -literal -offset indent +struct gnu_sparse_header { + struct { + char offset[12]; + char numbytes[12]; + } sparse[21]; + char isextended[1]; + char padding[7]; +}; +.Ed +.It Va realsize +A binary representation of the file's complete size, with a much larger range +than the POSIX file size. +In particular, with +.Cm M +type files, the current entry is only a portion of the file. +In that case, the POSIX size field will indicate the size of this +entry; the +.Va realsize +field will indicate the total size of the file. +.El +.Ss Solaris Tar +XXX More Details Needed XXX +.Pp +Solaris tar (beginning with SunOS XXX 5.7 ?? XXX) supports an +.Dq extended +format that is fundamentally similar to pax interchange format, +with the following differences: +.Bl -bullet -compact -width indent +.It +Extended attributes are stored in an entry whose type is +.Cm X , +not +.Cm x , +as used by pax interchange format. +The detailed format of this entry appears to be the same +as detailed above for the +.Cm x +entry. +.It +An additional +.Cm A +entry is used to store an ACL for the following regular entry. +The body of this entry contains a seven-digit octal number +(whose value is 01000000 plus the number of ACL entries) +followed by a zero byte, followed by the +textual ACL description. +.El +.Ss Other Extensions +One common extension, utilized by GNU tar, star, and other newer +.Nm +implementations, permits binary numbers in the standard numeric +fields. +This is flagged by setting the high bit of the first character. +This permits 95-bit values for the length and time fields +and 63-bit values for the uid, gid, and device numbers. +GNU tar supports this extension for the +length, mtime, ctime, and atime fields. +Joerg Schilling's star program supports this extension for +all numeric fields. +Note that this extension is largely obsoleted by the extended attribute +record provided by the pax interchange format. +.Pp +Another early GNU extension allowed base-64 values rather +than octal. +This extension was short-lived and such archives are almost never seen. +However, there is still code in GNU tar to support them; this code is +responsible for a very cryptic warning message that is sometimes seen when +GNU tar encounters a damaged archive. +.Sh SEE ALSO +.Xr ar 1 , +.Xr pax 1 , +.Xr tar 1 +.Sh STANDARDS +The +.Nm tar +utility is no longer a part of POSIX or the Single Unix Standard. +It last appeared in +.St -susv2 . +It has been supplanted in subsequent standards by +.Xr pax 1 . +The ustar format is currently part of the specification for the +.Xr pax 1 +utility. +The pax interchange file format is new with +.St -p1003.1-2001 . +.Sh HISTORY +A +.Nm tar +command appeared in Seventh Edition Unix, which was released in January, 1979. +It replaced the +.Nm tp +program from Fourth Edition Unix which in turn replaced the +.Nm tap +program from First Edition Unix. +John Gilmore's +.Nm pdtar +public-domain implementation (circa 1987) was highly influential +and formed the basis of GNU tar. +Joerg Shilling's +.Nm star +archiver is another open-source (GPL) archiver (originally developed +circa 1985) which features complete support for pax interchange +format. |