diff options
Diffstat (limited to 'contrib/xz/src/liblzma/subblock/subblock_encoder.c')
-rw-r--r-- | contrib/xz/src/liblzma/subblock/subblock_encoder.c | 984 |
1 files changed, 984 insertions, 0 deletions
diff --git a/contrib/xz/src/liblzma/subblock/subblock_encoder.c b/contrib/xz/src/liblzma/subblock/subblock_encoder.c new file mode 100644 index 0000000..4f71f99 --- /dev/null +++ b/contrib/xz/src/liblzma/subblock/subblock_encoder.c @@ -0,0 +1,984 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_encoder.c +/// \brief Encoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_encoder.h" +#include "filter_encoder.h" + + +/// Maximum number of repeats that a single Repeating Data can indicate. +/// This is directly from the file format specification. +#define REPEAT_COUNT_MAX (1U << 28) + +/// Number of bytes the data chunk (not including the header part) must be +/// before we care about alignment. This is somewhat arbitrary. It just +/// doesn't make sense to waste bytes for alignment when the data chunk +/// is very small. +#define MIN_CHUNK_SIZE_FOR_ALIGN 4 + +/// Number of bytes of the header part of Subblock Type `Data'. This is +/// used as the `skew' argument for subblock_align(). +#define ALIGN_SKEW_DATA 4 + +/// Like above but for Repeating Data. +#define ALIGN_SKEW_REPEATING_DATA 5 + +/// Writes one byte to output buffer and updates the alignment counter. +#define write_byte(b) \ +do { \ + assert(*out_pos < out_size); \ + out[*out_pos] = b; \ + ++*out_pos; \ + ++coder->alignment.out_pos; \ +} while (0) + + +struct lzma_coder_s { + lzma_next_coder next; + bool next_finished; + + enum { + SEQ_FILL, + SEQ_FLUSH, + SEQ_RLE_COUNT_0, + SEQ_RLE_COUNT_1, + SEQ_RLE_COUNT_2, + SEQ_RLE_COUNT_3, + SEQ_RLE_SIZE, + SEQ_RLE_DATA, + SEQ_DATA_SIZE_0, + SEQ_DATA_SIZE_1, + SEQ_DATA_SIZE_2, + SEQ_DATA_SIZE_3, + SEQ_DATA, + SEQ_SUBFILTER_INIT, + SEQ_SUBFILTER_FLAGS, + } sequence; + + /// Pointer to the options given by the application. This is used + /// for two-way communication with the application. + lzma_options_subblock *options; + + /// Position in various arrays. + size_t pos; + + /// Holds subblock.size - 1 or rle.size - 1 when encoding size + /// of Data or Repeat Count. + uint32_t tmp; + + struct { + /// This is a copy of options->alignment, or + /// LZMA_SUBBLOCK_ALIGNMENT_DEFAULT if options is NULL. + uint32_t multiple; + + /// Number of input bytes which we have processed and started + /// writing out. 32-bit integer is enough since we care only + /// about the lowest bits when fixing alignment. + uint32_t in_pos; + + /// Number of bytes written out. + uint32_t out_pos; + } alignment; + + struct { + /// Pointer to allocated buffer holding the Data field + /// of Subblock Type "Data". + uint8_t *data; + + /// Number of bytes in the buffer. + size_t size; + + /// Allocated size of the buffer. + size_t limit; + + /// Number of input bytes that we have already read but + /// not yet started writing out. This can be different + /// to `size' when using Subfilter. That's why we track + /// in_pending separately for RLE (see below). + uint32_t in_pending; + } subblock; + + struct { + /// Buffer to hold the data that may be coded with + /// Subblock Type `Repeating Data'. + uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + + /// Number of bytes in buffer[]. + size_t size; + + /// Number of times the first `size' bytes of buffer[] + /// will be repeated. + uint64_t count; + + /// Like subblock.in_pending above, but for RLE. + uint32_t in_pending; + } rle; + + struct { + enum { + SUB_NONE, + SUB_SET, + SUB_RUN, + SUB_FLUSH, + SUB_FINISH, + SUB_END_MARKER, + } mode; + + /// This is a copy of options->allow_subfilters. We use + /// this to verify that the application doesn't change + /// the value of allow_subfilters. + bool allow; + + /// When this is true, application is not allowed to modify + /// options->subblock_mode. We may still modify it here. + bool mode_locked; + + /// True if we have encoded at least one byte of data with + /// the Subfilter. + bool got_input; + + /// Track the amount of input available once + /// LZMA_SUBFILTER_FINISH has been enabled. + /// This is needed for sanity checking (kind + /// of duplicating what common/code.c does). + size_t in_avail; + + /// Buffer for the Filter Flags field written after + /// the `Set Subfilter' indicator. + uint8_t *flags; + + /// Size of Filter Flags field. + uint32_t flags_size; + + /// Pointers to Subfilter. + lzma_next_coder subcoder; + + } subfilter; + + /// Temporary buffer used when we are not the last filter in the chain. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +/// \brief Aligns the output buffer +/// +/// Aligns the output buffer so that after skew bytes the output position is +/// a multiple of coder->alignment.multiple. +static bool +subblock_align(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + size_t chunk_size, uint32_t skew) +{ + assert(*out_pos < out_size); + + // Fix the alignment only if it makes sense at least a little. + if (chunk_size >= MIN_CHUNK_SIZE_FOR_ALIGN) { + const uint32_t target = coder->alignment.in_pos + % coder->alignment.multiple; + + while ((coder->alignment.out_pos + skew) + % coder->alignment.multiple != target) { + // Zero indicates padding. + write_byte(0x00); + + // Check if output buffer got full and indicate it to + // the caller. + if (*out_pos == out_size) + return true; + } + } + + // Output buffer is not full. + return false; +} + + +/// \brief Checks if buffer contains repeated data +/// +/// \param needle Buffer containing a single repeat chunk +/// \param needle_size Size of needle in bytes +/// \param buf Buffer to search for repeated needles +/// \param buf_chunks Buffer size is buf_chunks * needle_size. +/// +/// \return True if the whole buf is filled with repeated needles. +/// +static bool +is_repeating(const uint8_t *restrict needle, size_t needle_size, + const uint8_t *restrict buf, size_t buf_chunks) +{ + while (buf_chunks-- != 0) { + if (memcmp(buf, needle, needle_size) != 0) + return false; + + buf += needle_size; + } + + return true; +} + + +/// \brief Optimizes the repeating style and updates coder->sequence +static void +subblock_rle_flush(lzma_coder *coder) +{ + // The Subblock decoder can use memset() when the size of the data + // being repeated is one byte, so we check if the RLE buffer is + // filled with a single repeating byte. + if (coder->rle.size > 1) { + const uint8_t b = coder->rle.buffer[0]; + size_t i = 0; + while (true) { + if (coder->rle.buffer[i] != b) + break; + + if (++i == coder->rle.size) { + // TODO Integer overflow check maybe, + // although this needs at least 2**63 bytes + // of input until it gets triggered... + coder->rle.count *= coder->rle.size; + coder->rle.size = 1; + break; + } + } + } + + if (coder->rle.count == 1) { + // The buffer should be repeated only once. It is + // waste of space to use Repeating Data. Instead, + // write a regular Data Subblock. See SEQ_RLE_COUNT_0 + // in subblock_buffer() for more info. + coder->tmp = coder->rle.size - 1; + } else if (coder->rle.count > REPEAT_COUNT_MAX) { + // There's so much to repeat that it doesn't fit into + // 28-bit integer. We will write two or more Subblocks + // of type Repeating Data. + coder->tmp = REPEAT_COUNT_MAX - 1; + } else { + coder->tmp = coder->rle.count - 1; + } + + coder->sequence = SEQ_RLE_COUNT_0; + + return; +} + + +/// \brief Resizes coder->subblock.data for a new size limit +static lzma_ret +subblock_data_size(lzma_coder *coder, lzma_allocator *allocator, + size_t new_limit) +{ + // Verify that the new limit is valid. + if (new_limit < LZMA_SUBBLOCK_DATA_SIZE_MIN + || new_limit > LZMA_SUBBLOCK_DATA_SIZE_MAX) + return LZMA_OPTIONS_ERROR; + + // Ff the new limit is different than the previous one, we need + // to reallocate the data buffer. + if (new_limit != coder->subblock.limit) { + lzma_free(coder->subblock.data, allocator); + coder->subblock.data = lzma_alloc(new_limit, allocator); + if (coder->subblock.data == NULL) + return LZMA_MEM_ERROR; + } + + coder->subblock.limit = new_limit; + + return LZMA_OK; +} + + +static lzma_ret +subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Changing allow_subfilter is not allowed. + if (coder->options != NULL && coder->subfilter.allow + != coder->options->allow_subfilters) + return LZMA_PROG_ERROR; + + // Check if we need to do something special with the Subfilter. + if (coder->subfilter.allow) { + assert(coder->options != NULL); + + // See if subfilter_mode has been changed. + switch (coder->options->subfilter_mode) { + case LZMA_SUBFILTER_NONE: + if (coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; + break; + + case LZMA_SUBFILTER_SET: + if (coder->subfilter.mode_locked + || coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; + + coder->subfilter.mode = SUB_SET; + coder->subfilter.got_input = false; + + if (coder->sequence == SEQ_FILL) + coder->sequence = SEQ_FLUSH; + + break; + + case LZMA_SUBFILTER_RUN: + if (coder->subfilter.mode != SUB_RUN) + return LZMA_PROG_ERROR; + + break; + + case LZMA_SUBFILTER_FINISH: { + const size_t in_avail = in_size - *in_pos; + + if (coder->subfilter.mode == SUB_RUN) { + if (coder->subfilter.mode_locked) + return LZMA_PROG_ERROR; + + coder->subfilter.mode = SUB_FINISH; + coder->subfilter.in_avail = in_avail; + + } else if (coder->subfilter.mode != SUB_FINISH + || coder->subfilter.in_avail + != in_avail) { + return LZMA_PROG_ERROR; + } + + break; + } + + default: + return LZMA_OPTIONS_ERROR; + } + + // If we are sync-flushing or finishing, the application may + // no longer change subfilter_mode. Note that this check is + // done after checking the new subfilter_mode above; this + // way the application may e.g. set LZMA_SUBFILTER_SET and + // LZMA_SYNC_FLUSH at the same time, but it cannot modify + // subfilter_mode on the later lzma_code() calls before + // we have returned LZMA_STREAM_END. + if (action != LZMA_RUN) + coder->subfilter.mode_locked = true; + } + + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_FILL: + // Grab the new Subblock Data Size and reallocate the buffer. + if (coder->subblock.size == 0 && coder->options != NULL + && coder->options->subblock_data_size + != coder->subblock.limit) + return_if_error(subblock_data_size(coder, + allocator, coder->options + ->subblock_data_size)); + + if (coder->subfilter.mode == SUB_NONE) { + assert(coder->subfilter.subcoder.code == NULL); + + // No Subfilter is enabled, just copy the data as is. + coder->subblock.in_pending += lzma_bufcpy( + in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit); + + // If we ran out of input before the whole buffer + // was filled, return to application. + if (coder->subblock.size < coder->subblock.limit + && action == LZMA_RUN) + return LZMA_OK; + + } else { + assert(coder->options->subfilter_mode + != LZMA_SUBFILTER_SET); + + // Using LZMA_FINISH automatically toggles + // LZMA_SUBFILTER_FINISH. + // + // NOTE: It is possible that application had set + // LZMA_SUBFILTER_SET and LZMA_FINISH at the same + // time. In that case it is possible that we will + // cycle to LZMA_SUBFILTER_RUN, LZMA_SUBFILTER_FINISH, + // and back to LZMA_SUBFILTER_NONE in a single + // Subblock encoder function call. + if (action == LZMA_FINISH) { + coder->options->subfilter_mode + = LZMA_SUBFILTER_FINISH; + coder->subfilter.mode = SUB_FINISH; + } + + const size_t in_start = *in_pos; + + const lzma_ret ret = coder->subfilter.subcoder.code( + coder->subfilter.subcoder.coder, + allocator, in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit, + coder->subfilter.mode == SUB_FINISH + ? LZMA_FINISH : action); + + const size_t in_used = *in_pos - in_start; + coder->subblock.in_pending += in_used; + if (in_used > 0) + coder->subfilter.got_input = true; + + coder->subfilter.in_avail = in_size - *in_pos; + + if (ret == LZMA_STREAM_END) { + // All currently available input must have + // been processed. + assert(*in_pos == in_size); + + // Flush now. Even if coder->subblock.size + // happened to be zero, we still need to go + // to SEQ_FLUSH to possibly finish RLE or + // write the Subfilter Unset indicator. + coder->sequence = SEQ_FLUSH; + + if (coder->subfilter.mode == SUB_RUN) { + // Flushing with Subfilter enabled. + assert(action == LZMA_SYNC_FLUSH); + coder->subfilter.mode = SUB_FLUSH; + break; + } + + // Subfilter finished its job. + assert(coder->subfilter.mode == SUB_FINISH + || action == LZMA_FINISH); + + // At least one byte of input must have been + // encoded with the Subfilter. This is + // required by the file format specification. + if (!coder->subfilter.got_input) + return LZMA_PROG_ERROR; + + // We don't strictly need to do this, but + // doing it sounds like a good idea, because + // otherwise the Subfilter's memory could be + // left allocated for long time, and would + // just waste memory. + lzma_next_end(&coder->subfilter.subcoder, + allocator); + + // We need to flush the currently buffered + // data and write Unset Subfilter marker. + // Note that we cannot set + // coder->options->subfilter_mode to + // LZMA_SUBFILTER_NONE yet, because we + // haven't written the Unset Subfilter + // marker yet. + coder->subfilter.mode = SUB_END_MARKER; + coder->sequence = SEQ_FLUSH; + break; + } + + // Return if we couldn't fill the buffer or + // if an error occurred. + if (coder->subblock.size < coder->subblock.limit + || ret != LZMA_OK) + return ret; + } + + coder->sequence = SEQ_FLUSH; + + // SEQ_FILL doesn't produce any output so falling through + // to SEQ_FLUSH is safe. + assert(*out_pos < out_size); + + // Fall through + + case SEQ_FLUSH: + if (coder->options != NULL) { + // Update the alignment variable. + coder->alignment.multiple = coder->options->alignment; + if (coder->alignment.multiple + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || coder->alignment.multiple + > LZMA_SUBBLOCK_ALIGNMENT_MAX) + return LZMA_OPTIONS_ERROR; + + // Run-length encoder + // + // First check if there is some data pending and we + // have an obvious need to flush it immediately. + if (coder->rle.count > 0 + && (coder->rle.size + != coder->options->rle + || coder->subblock.size + % coder->rle.size)) { + subblock_rle_flush(coder); + break; + } + + // Grab the (possibly new) RLE chunk size and + // validate it. + coder->rle.size = coder->options->rle; + if (coder->rle.size > LZMA_SUBBLOCK_RLE_MAX) + return LZMA_OPTIONS_ERROR; + + if (coder->subblock.size != 0 + && coder->rle.size + != LZMA_SUBBLOCK_RLE_OFF + && coder->subblock.size + % coder->rle.size == 0) { + + // Initialize coder->rle.buffer if we don't + // have RLE already running. + if (coder->rle.count == 0) + memcpy(coder->rle.buffer, + coder->subblock.data, + coder->rle.size); + + // Test if coder->subblock.data is repeating. + // If coder->rle.count would overflow, we + // force flushing. Forced flushing shouldn't + // really happen in real-world situations. + const size_t count = coder->subblock.size + / coder->rle.size; + if (UINT64_MAX - count > coder->rle.count + && is_repeating( + coder->rle.buffer, + coder->rle.size, + coder->subblock.data, + count)) { + coder->rle.count += count; + coder->rle.in_pending += coder + ->subblock.in_pending; + coder->subblock.in_pending = 0; + coder->subblock.size = 0; + + } else if (coder->rle.count > 0) { + // It's not repeating or at least not + // with the same byte sequence as the + // earlier Subblock Data buffers. We + // have some data pending in the RLE + // buffer already, so do a flush. + // Once flushed, we will check again + // if the Subblock Data happens to + // contain a different repeating + // sequence. + subblock_rle_flush(coder); + break; + } + } + } + + // If we now have some data left in coder->subblock, the RLE + // buffer is empty and we must write a regular Subblock Data. + if (coder->subblock.size > 0) { + assert(coder->rle.count == 0); + coder->tmp = coder->subblock.size - 1; + coder->sequence = SEQ_DATA_SIZE_0; + break; + } + + // Check if we should enable Subfilter. + if (coder->subfilter.mode == SUB_SET) { + if (coder->rle.count > 0) + subblock_rle_flush(coder); + else + coder->sequence = SEQ_SUBFILTER_INIT; + break; + } + + // Check if we have just finished Subfiltering. + if (coder->subfilter.mode == SUB_END_MARKER) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + coder->options->subfilter_mode = LZMA_SUBFILTER_NONE; + coder->subfilter.mode = SUB_NONE; + + write_byte(0x50); + if (*out_pos == out_size) + return LZMA_OK; + } + + // Check if we have already written everything. + if (action != LZMA_RUN && *in_pos == in_size + && (coder->subfilter.mode == SUB_NONE + || coder->subfilter.mode == SUB_FLUSH)) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + if (action == LZMA_SYNC_FLUSH) { + if (coder->subfilter.mode == SUB_FLUSH) + coder->subfilter.mode = SUB_RUN; + + coder->subfilter.mode_locked = false; + coder->sequence = SEQ_FILL; + + } else { + assert(action == LZMA_FINISH); + + // Write EOPM. + // NOTE: No need to use write_byte() here + // since we are finishing. + out[*out_pos] = 0x10; + ++*out_pos; + } + + return LZMA_STREAM_END; + } + + // Otherwise we have more work to do. + coder->sequence = SEQ_FILL; + break; + + case SEQ_RLE_COUNT_0: + assert(coder->rle.count > 0); + + if (coder->rle.count == 1) { + // The buffer should be repeated only once. Fix + // the alignment and write the first byte of + // Subblock Type `Data'. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, ALIGN_SKEW_DATA)) + return LZMA_OK; + + write_byte(0x20 | (coder->tmp & 0x0F)); + + } else { + // We have something to actually repeat, which should + // mean that it takes less space with run-length + // encoding. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, + ALIGN_SKEW_REPEATING_DATA)) + return LZMA_OK; + + write_byte(0x30 | (coder->tmp & 0x0F)); + } + + // NOTE: If we have to write more than one Repeating Data + // due to rle.count > REPEAT_COUNT_MAX, the subsequent + // Repeating Data Subblocks may get wrong alignment, because + // we add rle.in_pending to alignment.in_pos at once instead + // of adding only as much as this particular Repeating Data + // consumed input data. Correct alignment is always restored + // after all the required Repeating Data Subblocks have been + // written. This problem occurs in such a weird cases that + // it's not worth fixing. + coder->alignment.out_pos += coder->rle.size; + coder->alignment.in_pos += coder->rle.in_pending; + coder->rle.in_pending = 0; + + coder->sequence = SEQ_RLE_COUNT_1; + break; + + case SEQ_RLE_COUNT_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_RLE_COUNT_2; + break; + + case SEQ_RLE_COUNT_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_RLE_COUNT_3; + break; + + case SEQ_RLE_COUNT_3: + write_byte(coder->tmp >> 20); + + // Again, see if we are writing regular Data or Repeating Data. + // In the former case, we skip SEQ_RLE_SIZE. + if (coder->rle.count == 1) + coder->sequence = SEQ_RLE_DATA; + else + coder->sequence = SEQ_RLE_SIZE; + + if (coder->rle.count > REPEAT_COUNT_MAX) + coder->rle.count -= REPEAT_COUNT_MAX; + else + coder->rle.count = 0; + + break; + + case SEQ_RLE_SIZE: + assert(coder->rle.size >= LZMA_SUBBLOCK_RLE_MIN); + assert(coder->rle.size <= LZMA_SUBBLOCK_RLE_MAX); + write_byte(coder->rle.size - 1); + coder->sequence = SEQ_RLE_DATA; + break; + + case SEQ_RLE_DATA: + lzma_bufcpy(coder->rle.buffer, &coder->pos, coder->rle.size, + out, out_pos, out_size); + if (coder->pos < coder->rle.size) + return LZMA_OK; + + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_DATA_SIZE_0: + // We need four bytes for the Size field. + if (subblock_align(coder, out, out_pos, out_size, + coder->subblock.size, ALIGN_SKEW_DATA)) + return LZMA_OK; + + coder->alignment.out_pos += coder->subblock.size; + coder->alignment.in_pos += coder->subblock.in_pending; + coder->subblock.in_pending = 0; + + write_byte(0x20 | (coder->tmp & 0x0F)); + coder->sequence = SEQ_DATA_SIZE_1; + break; + + case SEQ_DATA_SIZE_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_DATA_SIZE_2; + break; + + case SEQ_DATA_SIZE_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_DATA_SIZE_3; + break; + + case SEQ_DATA_SIZE_3: + write_byte(coder->tmp >> 20); + coder->sequence = SEQ_DATA; + break; + + case SEQ_DATA: + lzma_bufcpy(coder->subblock.data, &coder->pos, + coder->subblock.size, out, out_pos, out_size); + if (coder->pos < coder->subblock.size) + return LZMA_OK; + + coder->subblock.size = 0; + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_SUBFILTER_INIT: { + assert(coder->subblock.size == 0); + assert(coder->subblock.in_pending == 0); + assert(coder->rle.count == 0); + assert(coder->rle.in_pending == 0); + assert(coder->subfilter.mode == SUB_SET); + assert(coder->options != NULL); + + // There must be a filter specified. + if (coder->options->subfilter_options.id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // Initialize a raw encoder to work as a Subfilter. + lzma_filter options[2]; + options[0] = coder->options->subfilter_options; + options[1].id = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_raw_encoder_init( + &coder->subfilter.subcoder, allocator, + options)); + + // Encode the Filter Flags field into a buffer. This should + // never fail since we have already successfully initialized + // the Subfilter itself. Check it still, and return + // LZMA_PROG_ERROR instead of whatever the ret would say. + lzma_ret ret = lzma_filter_flags_size( + &coder->subfilter.flags_size, options); + assert(ret == LZMA_OK); + if (ret != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->subfilter.flags = lzma_alloc( + coder->subfilter.flags_size, allocator); + if (coder->subfilter.flags == NULL) + return LZMA_MEM_ERROR; + + // Now we have a big-enough buffer. Encode the Filter Flags. + // Like above, this should never fail. + size_t dummy = 0; + ret = lzma_filter_flags_encode(options, coder->subfilter.flags, + &dummy, coder->subfilter.flags_size); + assert(ret == LZMA_OK); + assert(dummy == coder->subfilter.flags_size); + if (ret != LZMA_OK || dummy != coder->subfilter.flags_size) + return LZMA_PROG_ERROR; + + // Write a Subblock indicating a new Subfilter. + write_byte(0x40); + + coder->options->subfilter_mode = LZMA_SUBFILTER_RUN; + coder->subfilter.mode = SUB_RUN; + coder->alignment.out_pos += coder->subfilter.flags_size; + coder->sequence = SEQ_SUBFILTER_FLAGS; + + // It is safe to fall through because SEQ_SUBFILTER_FLAGS + // uses lzma_bufcpy() which doesn't write unless there is + // output space. + } + + // Fall through + + case SEQ_SUBFILTER_FLAGS: + // Copy the Filter Flags to the output stream. + lzma_bufcpy(coder->subfilter.flags, &coder->pos, + coder->subfilter.flags_size, + out, out_pos, out_size); + if (coder->pos < coder->subfilter.flags_size) + return LZMA_OK; + + lzma_free(coder->subfilter.flags, allocator); + coder->subfilter.flags = NULL; + + coder->pos = 0; + coder->sequence = SEQ_FILL; + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +subblock_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (coder->next.code == NULL) + return subblock_buffer(coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + if (ret == LZMA_STREAM_END) { + assert(action != LZMA_RUN); + coder->next_finished = true; + } else if (coder->temp.size == 0 || ret != LZMA_OK) { + return ret; + } + } + + const lzma_ret ret = subblock_buffer(coder, allocator, + coder->temp.buffer, &coder->temp.pos, + coder->temp.size, out, out_pos, out_size, + coder->next_finished ? LZMA_FINISH : LZMA_RUN); + if (ret == LZMA_STREAM_END) { + assert(action != LZMA_RUN); + assert(coder->next_finished); + return LZMA_STREAM_END; + } + + if (ret != LZMA_OK) + return ret; + } + + return LZMA_OK; +} + + +static void +subblock_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_next_end(&coder->subfilter.subcoder, allocator); + lzma_free(coder->subblock.data, allocator); + lzma_free(coder->subfilter.flags, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &subblock_encode; + next->end = &subblock_encoder_end; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->subblock.data = NULL; + next->coder->subblock.limit = 0; + next->coder->subfilter.subcoder = LZMA_NEXT_CODER_INIT; + } else { + lzma_next_end(&next->coder->subfilter.subcoder, + allocator); + lzma_free(next->coder->subfilter.flags, allocator); + } + + next->coder->subfilter.flags = NULL; + + next->coder->next_finished = false; + next->coder->sequence = SEQ_FILL; + next->coder->options = filters[0].options; + next->coder->pos = 0; + + next->coder->alignment.in_pos = 0; + next->coder->alignment.out_pos = 0; + next->coder->subblock.size = 0; + next->coder->subblock.in_pending = 0; + next->coder->rle.count = 0; + next->coder->rle.in_pending = 0; + next->coder->subfilter.mode = SUB_NONE; + next->coder->subfilter.mode_locked = false; + + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Grab some values from the options structure if it is available. + size_t subblock_size_limit; + if (next->coder->options != NULL) { + if (next->coder->options->alignment + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || next->coder->options->alignment + > LZMA_SUBBLOCK_ALIGNMENT_MAX) { + subblock_encoder_end(next->coder, allocator); + return LZMA_OPTIONS_ERROR; + } + next->coder->alignment.multiple + = next->coder->options->alignment; + next->coder->subfilter.allow + = next->coder->options->allow_subfilters; + subblock_size_limit = next->coder->options->subblock_data_size; + } else { + next->coder->alignment.multiple + = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; + next->coder->subfilter.allow = false; + subblock_size_limit = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT; + } + + return_if_error(subblock_data_size(next->coder, allocator, + subblock_size_limit)); + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} |