diff options
author | kientzle <kientzle@FreeBSD.org> | 2008-03-14 20:35:38 +0000 |
---|---|---|
committer | kientzle <kientzle@FreeBSD.org> | 2008-03-14 20:35:38 +0000 |
commit | 0614ffca9550c6e6bad69f26a1dbf0b606dbd570 (patch) | |
tree | 02128afd204db625cea9b900d5f2d76fd0713a8e /lib | |
parent | 98633157bb8dca69dd5ac8fc5e9cff3adbca76d2 (diff) | |
download | FreeBSD-src-0614ffca9550c6e6bad69f26a1dbf0b606dbd570.zip FreeBSD-src-0614ffca9550c6e6bad69f26a1dbf0b606dbd570.tar.gz |
Support for writing 'compress' format, thanks to Joerg Sonnenberger.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libarchive/Makefile | 1 | ||||
-rw-r--r-- | lib/libarchive/archive.h.in | 1 | ||||
-rw-r--r-- | lib/libarchive/archive_write_set_compression_compress.c | 494 | ||||
-rw-r--r-- | lib/libarchive/test/Makefile | 1 | ||||
-rw-r--r-- | lib/libarchive/test/test_write_compress.c | 99 |
5 files changed, 596 insertions, 0 deletions
diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile index bd59b2a..00acf1f 100644 --- a/lib/libarchive/Makefile +++ b/lib/libarchive/Makefile @@ -81,6 +81,7 @@ SRCS= archive.h \ archive_write_open_filename.c \ archive_write_open_memory.c \ archive_write_set_compression_bzip2.c \ + archive_write_set_compression_compress.c \ archive_write_set_compression_gzip.c \ archive_write_set_compression_none.c \ archive_write_set_compression_program.c \ diff --git a/lib/libarchive/archive.h.in b/lib/libarchive/archive.h.in index 22e3888..f29a72f 100644 --- a/lib/libarchive/archive.h.in +++ b/lib/libarchive/archive.h.in @@ -407,6 +407,7 @@ int archive_write_get_bytes_in_last_block(struct archive *); int archive_write_set_skip_file(struct archive *, dev_t, ino_t); int archive_write_set_compression_bzip2(struct archive *); +int archive_write_set_compression_compress(struct archive *); int archive_write_set_compression_gzip(struct archive *); int archive_write_set_compression_none(struct archive *); int archive_write_set_compression_program(struct archive *, diff --git a/lib/libarchive/archive_write_set_compression_compress.c b/lib/libarchive/archive_write_set_compression_compress.c new file mode 100644 index 0000000..fe6b905 --- /dev/null +++ b/lib/libarchive/archive_write_set_compression_compress.c @@ -0,0 +1,494 @@ +/*- + * Copyright (c) 2008 Joerg Sonnenberger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1985, 1986, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis and James A. Woods, derived from original + * work by Spencer Thomas and Joseph Orost. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "archive_platform.h" + +__FBSDID("$FreeBSD$"); + +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#include "archive.h" +#include "archive_private.h" +#include "archive_write_private.h" + +#define HSIZE 69001 /* 95% occupancy */ +#define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */ +#define CHECK_GAP 10000 /* Ratio check interval. */ + +#define MAXCODE(bits) ((1 << (bits)) - 1) + +/* + * the next two codes should not be changed lightly, as they must not + * lie within the contiguous general code space. + */ +#define FIRST 257 /* First free entry. */ +#define CLEAR 256 /* Table clear output code. */ + +struct private_data { + off_t in_count, out_count, checkpoint; + + int code_len; /* Number of bits/code. */ + int cur_maxcode; /* Maximum code, given n_bits. */ + int max_maxcode; /* Should NEVER generate this code. */ + int hashtab [HSIZE]; + unsigned short codetab [HSIZE]; + int first_free; /* First unused entry. */ + int compress_ratio; + + int cur_code, cur_fcode; + + int bit_offset; + unsigned char bit_buf; + + unsigned char *compressed; + size_t compressed_buffer_size; + size_t compressed_offset; +}; + +static int archive_compressor_compress_finish(struct archive_write *); +static int archive_compressor_compress_init(struct archive_write *); +static int archive_compressor_compress_write(struct archive_write *, + const void *, size_t); + +/* + * Allocate, initialize and return a archive object. + */ +int +archive_write_set_compression_compress(struct archive *_a) +{ + struct archive_write *a = (struct archive_write *)_a; + __archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_NEW, "archive_write_set_compression_compress"); + a->compressor.init = &archive_compressor_compress_init; + a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS; + a->archive.compression_name = "compress"; + return (ARCHIVE_OK); +} + +/* + * Setup callback. + */ +static int +archive_compressor_compress_init(struct archive_write *a) +{ + int ret; + struct private_data *state; + + a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS; + a->archive.compression_name = "compress"; + + if (a->bytes_per_block < 4) { + archive_set_error(&a->archive, EINVAL, + "Can't write Compress header as single block"); + return (ARCHIVE_FATAL); + } + + if (a->client_opener != NULL) { + ret = (a->client_opener)(&a->archive, a->client_data); + if (ret != ARCHIVE_OK) + return (ret); + } + + state = (struct private_data *)malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate data for compression"); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->compressed_buffer_size = a->bytes_per_block; + state->compressed = malloc(state->compressed_buffer_size); + + if (state->compressed == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate data for compression buffer"); + free(state); + return (ARCHIVE_FATAL); + } + + a->compressor.write = archive_compressor_compress_write; + a->compressor.finish = archive_compressor_compress_finish; + + state->max_maxcode = 0x10000; /* Should NEVER generate this code. */ + state->in_count = 0; /* Length of input. */ + state->bit_buf = 0; + state->bit_offset = 0; + state->out_count = 3; /* Includes 3-byte header mojo. */ + state->compress_ratio = 0; + state->checkpoint = CHECK_GAP; + state->code_len = 9; + state->cur_maxcode = MAXCODE(state->code_len); + state->first_free = FIRST; + + memset(state->hashtab, 0xff, sizeof(state->hashtab)); + + /* Prime output buffer with a gzip header. */ + state->compressed[0] = 0x1f; /* Compress */ + state->compressed[1] = 0x9d; + state->compressed[2] = 0x90; /* Block mode, 16bit max */ + state->compressed_offset = 3; + + a->compressor.data = state; + return (0); +} + +/*- + * Output the given code. + * Inputs: + * code: A n_bits-bit integer. If == -1, then EOF. This assumes + * that n_bits =< (long)wordsize - 1. + * Outputs: + * Outputs code to the file. + * Assumptions: + * Chars are 8 bits long. + * Algorithm: + * Maintain a BITS character long buffer (so that 8 codes will + * fit in it exactly). Use the VAX insv instruction to insert each + * code in turn. When the buffer fills up empty it and start over. + */ + +static unsigned char rmask[9] = + {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; + +static int +output_byte(struct archive_write *a, unsigned char c) +{ + struct private_data *state = a->compressor.data; + ssize_t bytes_written; + + state->compressed[state->compressed_offset++] = c; + ++state->out_count; + + if (state->compressed_buffer_size == state->compressed_offset) { + bytes_written = (a->client_writer)(&a->archive, + a->client_data, + state->compressed, state->compressed_buffer_size); + if (bytes_written <= 0) + return ARCHIVE_FATAL; + a->archive.raw_position += bytes_written; + state->compressed_offset = 0; + } + + return ARCHIVE_OK; +} + +static int +output_code(struct archive_write *a, int ocode) +{ + struct private_data *state = a->compressor.data; + int bits, ret, clear_flg, bit_offset; + + clear_flg = ocode == CLEAR; + bits = state->code_len; + + /* + * Since ocode is always >= 8 bits, only need to mask the first + * hunk on the left. + */ + bit_offset = state->bit_offset % 8; + state->bit_buf |= (ocode << bit_offset) & 0xff; + output_byte(a, state->bit_buf); + + bits = state->code_len - (8 - bit_offset); + ocode >>= 8 - bit_offset; + /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ + if (bits >= 8) { + output_byte(a, ocode & 0xff); + ocode >>= 8; + bits -= 8; + } + /* Last bits. */ + state->bit_offset += state->code_len; + state->bit_buf = ocode & rmask[bits]; + if (state->bit_offset == state->code_len * 8) + state->bit_offset = 0; + + /* + * If the next entry is going to be too big for the ocode size, + * then increase it, if possible. + */ + if (clear_flg || state->first_free > state->cur_maxcode) { + /* + * Write the whole buffer, because the input side won't + * discover the size increase until after it has read it. + */ + if (state->bit_offset > 0) { + while (state->bit_offset < state->code_len * 8) { + ret = output_byte(a, state->bit_buf); + if (ret != ARCHIVE_OK) + return ret; + state->bit_offset += 8; + state->bit_buf = 0; + } + } + state->bit_buf = 0; + state->bit_offset = 0; + + if (clear_flg) { + state->code_len = 9; + state->cur_maxcode = MAXCODE(state->code_len); + } else { + state->code_len++; + if (state->code_len == 16) + state->cur_maxcode = state->max_maxcode; + else + state->cur_maxcode = MAXCODE(state->code_len); + } + } + + return (ARCHIVE_OK); +} + +static int +output_flush(struct archive_write *a) +{ + struct private_data *state = a->compressor.data; + int ret; + + /* At EOF, write the rest of the buffer. */ + if (state->bit_offset % 8) { + state->code_len = (state->bit_offset % 8 + 7) / 8; + ret = output_byte(a, state->bit_buf); + if (ret != ARCHIVE_OK) + return ret; + } + + return (ARCHIVE_OK); +} + +/* + * Write data to the compressed stream. + */ +static int +archive_compressor_compress_write(struct archive_write *a, const void *buff, + size_t length) +{ + struct private_data *state; + int i; + int ratio; + int c, disp, ret; + const unsigned char *bp; + + state = (struct private_data *)a->compressor.data; + if (a->client_writer == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + if (length == 0) + return ARCHIVE_OK; + + bp = buff; + + if (state->in_count == 0) { + state->cur_code = *bp++; + ++state->in_count; + --length; + } + + while (length--) { + c = *bp++; + state->in_count++; + state->cur_fcode = (c << 16) + state->cur_code; + i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */ + + if (state->hashtab[i] == state->cur_fcode) { + state->cur_code = state->codetab[i]; + continue; + } + if (state->hashtab[i] < 0) /* Empty slot. */ + goto nomatch; + /* Secondary hash (after G. Knott). */ + if (i == 0) + disp = 1; + else + disp = HSIZE - i; + probe: + if ((i -= disp) < 0) + i += HSIZE; + + if (state->hashtab[i] == state->cur_fcode) { + state->cur_code = state->codetab[i]; + continue; + } + if (state->hashtab[i] >= 0) + goto probe; + nomatch: + ret = output_code(a, state->cur_code); + if (ret != ARCHIVE_OK) + return ret; + state->cur_code = c; + if (state->first_free < state->max_maxcode) { + state->codetab[i] = state->first_free++; /* code -> hashtable */ + state->hashtab[i] = state->cur_fcode; + continue; + } + if (state->in_count < state->checkpoint) + continue; + + state->checkpoint = state->in_count + CHECK_GAP; + + if (state->in_count <= 0x007fffff) + ratio = state->in_count * 256 / state->out_count; + else if ((ratio = state->out_count / 256) == 0) + ratio = 0x7fffffff; + else + ratio = state->in_count / ratio; + + if (ratio > state->compress_ratio) + state->compress_ratio = ratio; + else { + state->compress_ratio = 0; + memset(state->hashtab, 0xff, sizeof(state->hashtab)); + state->first_free = FIRST; + ret = output_code(a, CLEAR); + if (ret != ARCHIVE_OK) + return ret; + } + } + + return (ARCHIVE_OK); +} + + +/* + * Finish the compression... + */ +static int +archive_compressor_compress_finish(struct archive_write *a) +{ + ssize_t block_length, target_block_length, bytes_written; + int ret; + struct private_data *state; + unsigned tocopy; + + state = (struct private_data *)a->compressor.data; + ret = 0; + if (a->client_writer == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, + "No write callback is registered? " + "This is probably an internal programming error."); + ret = ARCHIVE_FATAL; + goto cleanup; + } + + /* By default, always pad the uncompressed data. */ + if (a->pad_uncompressed) { + while (state->in_count % a->bytes_per_block != 0) { + tocopy = a->bytes_per_block - + (state->in_count % a->bytes_per_block); + if (tocopy > a->null_length) + tocopy = a->null_length; + ret = archive_compressor_compress_write(a, a->nulls, + tocopy); + if (ret != ARCHIVE_OK) + goto cleanup; + } + } + + ret = output_code(a, state->cur_code); + if (ret != ARCHIVE_OK) + goto cleanup; + ret = output_flush(a); + if (ret != ARCHIVE_OK) + goto cleanup; + + /* Optionally, pad the final compressed block. */ + block_length = state->compressed_offset; + + /* Tricky calculation to determine size of last block. */ + if (a->bytes_in_last_block <= 0) + /* Default or Zero: pad to full block */ + target_block_length = a->bytes_per_block; + else + /* Round length to next multiple of bytes_in_last_block. */ + target_block_length = a->bytes_in_last_block * + ( (block_length + a->bytes_in_last_block - 1) / + a->bytes_in_last_block); + if (target_block_length > a->bytes_per_block) + target_block_length = a->bytes_per_block; + if (block_length < target_block_length) { + memset(state->compressed + state->compressed_offset, 0, + target_block_length - block_length); + block_length = target_block_length; + } + + /* Write the last block */ + bytes_written = (a->client_writer)(&a->archive, a->client_data, + state->compressed, block_length); + if (bytes_written <= 0) + ret = ARCHIVE_FATAL; + else + a->archive.raw_position += bytes_written; + +cleanup: + free(state->compressed); + free(state); + return (ret); +} diff --git a/lib/libarchive/test/Makefile b/lib/libarchive/test/Makefile index 866e1f5..429b319 100644 --- a/lib/libarchive/test/Makefile +++ b/lib/libarchive/test/Makefile @@ -48,6 +48,7 @@ TESTS= \ test_tar_filenames.c \ test_tar_large.c \ test_write_compress_program.c \ + test_write_compress.c \ test_write_disk.c \ test_write_disk_hardlink.c \ test_write_disk_perms.c \ diff --git a/lib/libarchive/test/test_write_compress.c b/lib/libarchive/test/test_write_compress.c new file mode 100644 index 0000000..78cb3ac --- /dev/null +++ b/lib/libarchive/test/test_write_compress.c @@ -0,0 +1,99 @@ +/*- + * Copyright (c) 2007 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" +__FBSDID("$FreeBSD$"); + +/* + * A basic exercise of compress reading and writing. + * + * TODO: Add a reference file and make sure we can decompress that. + */ + +DEFINE_TEST(test_write_compress) +{ + struct archive_entry *ae; + struct archive* a; + char *buff, *data; + size_t buffsize, datasize; + char path[16]; + size_t used; + int i; + + buffsize = 1000000; + assert(NULL != (buff = (char *)malloc(buffsize))); + + datasize = 10000; + assert(NULL != (data = (char *)malloc(datasize))); + memset(data, 0, datasize); + + assert((a = archive_write_new()) != NULL); + assertA(0 == archive_write_set_format_ustar(a)); + assertA(0 == archive_write_set_compression_compress(a)); + assertA(0 == archive_write_open_memory(a, buff, buffsize, &used)); + + for (i = 0; i < 100; i++) { + sprintf(path, "file%03d", i); + assert((ae = archive_entry_new()) != NULL); + archive_entry_copy_pathname(ae, path); + archive_entry_set_size(ae, datasize); + archive_entry_set_filetype(ae, AE_IFREG); + assertA(0 == archive_write_header(a, ae)); + assertA(datasize == (size_t)archive_write_data(a, data, datasize)); + archive_entry_free(ae); + } + + + archive_write_close(a); +#if ARCHIVE_API_VERSION > 1 + assert(0 == archive_write_finish(a)); +#else + archive_write_finish(a); +#endif + + /* + * Now, read the data back. + */ + assert((a = archive_read_new()) != NULL); + assertA(0 == archive_read_support_format_all(a)); + assertA(0 == archive_read_support_compression_all(a)); + assertA(0 == archive_read_open_memory(a, buff, used)); + + + for (i = 0; i < 100; i++) { + sprintf(path, "file%03d", i); + assertEqualInt(0, archive_read_next_header(a, &ae)); + assertEqualString(path, archive_entry_pathname(ae)); + assertEqualInt(datasize, archive_entry_size(ae)); + } + + assert(0 == archive_read_close(a)); +#if ARCHIVE_API_VERSION > 1 + assert(0 == archive_read_finish(a)); +#else + archive_read_finish(a); +#endif +} |