diff options
Diffstat (limited to 'contrib/xz/src/liblzma/lz')
-rw-r--r-- | contrib/xz/src/liblzma/lz/lz_decoder.c | 8 | ||||
-rw-r--r-- | contrib/xz/src/liblzma/lz/lz_decoder.h | 7 | ||||
-rw-r--r-- | contrib/xz/src/liblzma/lz/lz_encoder.c | 124 | ||||
-rw-r--r-- | contrib/xz/src/liblzma/lz/lz_encoder.h | 8 | ||||
-rw-r--r-- | contrib/xz/src/liblzma/lz/lz_encoder_mf.c | 69 |
5 files changed, 115 insertions, 101 deletions
diff --git a/contrib/xz/src/liblzma/lz/lz_decoder.c b/contrib/xz/src/liblzma/lz/lz_decoder.c index d74085c..2328a8e 100644 --- a/contrib/xz/src/liblzma/lz/lz_decoder.c +++ b/contrib/xz/src/liblzma/lz/lz_decoder.c @@ -126,7 +126,7 @@ decode_buffer(lzma_coder *coder, static lzma_ret lz_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((__unused__)), + const lzma_allocator *allocator lzma_attribute((__unused__)), const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, @@ -184,7 +184,7 @@ lz_decode(lzma_coder *coder, static void -lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +lz_decoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder->dict.buf, allocator); @@ -200,10 +200,10 @@ lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator) extern lzma_ret -lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_decoder *lz, - lzma_allocator *allocator, const void *options, + const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)) { // Allocate the base structure if it isn't already allocated. diff --git a/contrib/xz/src/liblzma/lz/lz_decoder.h b/contrib/xz/src/liblzma/lz/lz_decoder.h index 7266e80..277900a 100644 --- a/contrib/xz/src/liblzma/lz/lz_decoder.h +++ b/contrib/xz/src/liblzma/lz/lz_decoder.h @@ -67,7 +67,7 @@ typedef struct { lzma_vli uncompressed_size); /// Free allocated resources - void (*end)(lzma_coder *coder, lzma_allocator *allocator); + void (*end)(lzma_coder *coder, const lzma_allocator *allocator); } lzma_lz_decoder; @@ -83,9 +83,10 @@ typedef struct { extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters, + const lzma_allocator *allocator, + const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_decoder *lz, - lzma_allocator *allocator, const void *options, + const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)); extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); diff --git a/contrib/xz/src/liblzma/lz/lz_encoder.c b/contrib/xz/src/liblzma/lz/lz_encoder.c index e240696..48bc487 100644 --- a/contrib/xz/src/liblzma/lz/lz_encoder.c +++ b/contrib/xz/src/liblzma/lz/lz_encoder.c @@ -20,6 +20,8 @@ # include "lz_encoder_hash_table.h" #endif +#include "memcmplen.h" + struct lzma_coder_s { /// LZ-based encoder e.g. LZMA @@ -76,8 +78,9 @@ move_window(lzma_mf *mf) /// This function must not be called once it has returned LZMA_STREAM_END. /// static lzma_ret -fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, - size_t *in_pos, size_t in_size, lzma_action action) +fill_window(lzma_coder *coder, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + lzma_action action) { assert(coder->mf.read_pos <= coder->mf.write_pos); @@ -107,6 +110,12 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, coder->mf.write_pos = write_pos; + // Silence Valgrind. lzma_memcmplen() can read extra bytes + // and Valgrind will give warnings if those bytes are uninitialized + // because Valgrind cannot see that the values of the uninitialized + // bytes are eventually ignored. + memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA); + // If end of stream has been reached or flushing completed, we allow // the encoder to process all the input (that is, read_pos is allowed // to reach write_pos). Otherwise we keep keep_size_after bytes @@ -130,7 +139,7 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, && coder->mf.read_pos < coder->mf.read_limit) { // Match finder may update coder->pending and expects it to // start from zero, so use a temporary variable. - const size_t pending = coder->mf.pending; + const uint32_t pending = coder->mf.pending; coder->mf.pending = 0; // Rewind read_pos so that the match finder can hash @@ -148,7 +157,7 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, static lzma_ret -lz_encode(lzma_coder *coder, lzma_allocator *allocator, +lz_encode(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, @@ -179,7 +188,7 @@ lz_encode(lzma_coder *coder, lzma_allocator *allocator, static bool -lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, +lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, const lzma_lz_options *lz_options) { // For now, the dictionary size is limited to 1.5 GiB. This may grow @@ -325,25 +334,22 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, hs += HASH_4_SIZE; */ - // If the above code calculating hs is modified, make sure that - // this assertion stays valid (UINT32_MAX / 5 is not strictly the - // exact limit). If it doesn't, you need to calculate that - // hash_size_sum + sons_count cannot overflow. - assert(hs < UINT32_MAX / 5); - - const uint32_t old_count = mf->hash_size_sum + mf->sons_count; - mf->hash_size_sum = hs; + const uint32_t old_hash_count = mf->hash_count; + const uint32_t old_sons_count = mf->sons_count; + mf->hash_count = hs; mf->sons_count = mf->cyclic_size; if (is_bt) mf->sons_count *= 2; - const uint32_t new_count = mf->hash_size_sum + mf->sons_count; - // Deallocate the old hash array if it exists and has different size // than what is needed now. - if (old_count != new_count) { + if (old_hash_count != mf->hash_count + || old_sons_count != mf->sons_count) { lzma_free(mf->hash, allocator); mf->hash = NULL; + + lzma_free(mf->son, allocator); + mf->son = NULL; } // Maximum number of match finder cycles @@ -360,14 +366,23 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, static bool -lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator, +lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator, const lzma_lz_options *lz_options) { // Allocate the history buffer. if (mf->buffer == NULL) { - mf->buffer = lzma_alloc(mf->size, allocator); + // lzma_memcmplen() is used for the dictionary buffer + // so we need to allocate a few extra bytes to prevent + // it from reading past the end of the buffer. + mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA, + allocator); if (mf->buffer == NULL) return true; + + // Keep Valgrind happy with lzma_memcmplen() and initialize + // the extra bytes whose value may get read but which will + // effectively get ignored. + memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA); } // Use cyclic_size as initial mf->offset. This allows @@ -381,43 +396,48 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator, mf->write_pos = 0; mf->pending = 0; - // Allocate match finder's hash array. - const size_t alloc_count = mf->hash_size_sum + mf->sons_count; - #if UINT32_MAX >= SIZE_MAX / 4 // Check for integer overflow. (Huge dictionaries are not // possible on 32-bit CPU.) - if (alloc_count > SIZE_MAX / sizeof(uint32_t)) + if (mf->hash_count > SIZE_MAX / sizeof(uint32_t) + || mf->sons_count > SIZE_MAX / sizeof(uint32_t)) return true; #endif + // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE + // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash. + // + // We don't need to initialize mf->son, but not doing that may + // make Valgrind complain in normalization (see normalize() in + // lz_encoder_mf.c). Skipping the initialization is *very* good + // when big dictionary is used but only small amount of data gets + // actually compressed: most of the mf->son won't get actually + // allocated by the kernel, so we avoid wasting RAM and improve + // initialization speed a lot. if (mf->hash == NULL) { - mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t), + mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t), + allocator); + mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t), allocator); - if (mf->hash == NULL) - return true; - } - mf->son = mf->hash + mf->hash_size_sum; - mf->cyclic_pos = 0; + if (mf->hash == NULL || mf->son == NULL) { + lzma_free(mf->hash, allocator); + mf->hash = NULL; - // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we - // can use memset(). + lzma_free(mf->son, allocator); + mf->son = NULL; + + return true; + } + } else { /* - for (uint32_t i = 0; i < hash_size_sum; ++i) - mf->hash[i] = EMPTY_HASH_VALUE; + for (uint32_t i = 0; i < mf->hash_count; ++i) + mf->hash[i] = EMPTY_HASH_VALUE; */ - memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t)); + memzero(mf->hash, mf->hash_count * sizeof(uint32_t)); + } - // We don't need to initialize mf->son, but not doing that will - // make Valgrind complain in normalization (see normalize() in - // lz_encoder_mf.c). - // - // Skipping this initialization is *very* good when big dictionary is - // used but only small amount of data gets actually compressed: most - // of the mf->hash won't get actually allocated by the kernel, so - // we avoid wasting RAM and improve initialization speed a lot. - //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); + mf->cyclic_pos = 0; // Handle preset dictionary. if (lz_options->preset_dict != NULL @@ -445,7 +465,8 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) lzma_mf mf = { .buffer = NULL, .hash = NULL, - .hash_size_sum = 0, + .son = NULL, + .hash_count = 0, .sons_count = 0, }; @@ -454,17 +475,17 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) return UINT64_MAX; // Calculate the memory usage. - return (uint64_t)(mf.hash_size_sum + mf.sons_count) - * sizeof(uint32_t) - + (uint64_t)(mf.size) + sizeof(lzma_coder); + return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t) + + mf.size + sizeof(lzma_coder); } static void -lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +lz_encoder_end(lzma_coder *coder, const lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); + lzma_free(coder->mf.son, allocator); lzma_free(coder->mf.hash, allocator); lzma_free(coder->mf.buffer, allocator); @@ -479,7 +500,7 @@ lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator, +lz_encoder_update(lzma_coder *coder, const lzma_allocator *allocator, const lzma_filter *filters_null lzma_attribute((__unused__)), const lzma_filter *reversed_filters) { @@ -495,10 +516,10 @@ lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator, extern lzma_ret -lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, +lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_encoder *lz, - lzma_allocator *allocator, const void *options, + const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)) { #ifdef HAVE_SMALL @@ -522,7 +543,8 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->mf.buffer = NULL; next->coder->mf.hash = NULL; - next->coder->mf.hash_size_sum = 0; + next->coder->mf.son = NULL; + next->coder->mf.hash_count = 0; next->coder->mf.sons_count = 0; next->coder->next = LZMA_NEXT_CODER_INIT; diff --git a/contrib/xz/src/liblzma/lz/lz_encoder.h b/contrib/xz/src/liblzma/lz/lz_encoder.h index 741c453..dad9c6b 100644 --- a/contrib/xz/src/liblzma/lz/lz_encoder.h +++ b/contrib/xz/src/liblzma/lz/lz_encoder.h @@ -119,7 +119,7 @@ struct lzma_mf_s { lzma_action action; /// Number of elements in hash[] - uint32_t hash_size_sum; + uint32_t hash_count; /// Number of elements in son[] uint32_t sons_count; @@ -199,7 +199,7 @@ typedef struct { size_t *restrict out_pos, size_t out_size); /// Free allocated resources - void (*end)(lzma_coder *coder, lzma_allocator *allocator); + void (*end)(lzma_coder *coder, const lzma_allocator *allocator); /// Update the options in the middle of the encoding. lzma_ret (*options_update)(lzma_coder *coder, @@ -296,10 +296,10 @@ mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, extern lzma_ret lzma_lz_encoder_init( - lzma_next_coder *next, lzma_allocator *allocator, + lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_encoder *lz, - lzma_allocator *allocator, const void *options, + const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)); diff --git a/contrib/xz/src/liblzma/lz/lz_encoder_mf.c b/contrib/xz/src/liblzma/lz/lz_encoder_mf.c index f82a1c1..7852077 100644 --- a/contrib/xz/src/liblzma/lz/lz_encoder_mf.c +++ b/contrib/xz/src/liblzma/lz/lz_encoder_mf.c @@ -13,6 +13,7 @@ #include "lz_encoder.h" #include "lz_encoder_hash.h" +#include "memcmplen.h" /// \brief Find matches starting from the current byte @@ -65,9 +66,7 @@ lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) // here because the match distances are zero based. const uint8_t *p2 = p1 - matches[count - 1].dist - 1; - while (len_best < limit - && p1[len_best] == p2[len_best]) - ++len_best; + len_best = lzma_memcmplen(p1, p2, len_best, limit); } } @@ -116,24 +115,27 @@ normalize(lzma_mf *mf) = (MUST_NORMALIZE_POS - mf->cyclic_size); // & (~(UINT32_C(1) << 10) - 1); - const uint32_t count = mf->hash_size_sum + mf->sons_count; - uint32_t *hash = mf->hash; - - for (uint32_t i = 0; i < count; ++i) { + for (uint32_t i = 0; i < mf->hash_count; ++i) { // If the distance is greater than the dictionary size, // we can simply mark the hash element as empty. + if (mf->hash[i] <= subvalue) + mf->hash[i] = EMPTY_HASH_VALUE; + else + mf->hash[i] -= subvalue; + } + + for (uint32_t i = 0; i < mf->sons_count; ++i) { + // Do the same for mf->son. // - // NOTE: Only the first mf->hash_size_sum elements are - // initialized for sure. There may be uninitialized elements - // in mf->son. Since we go through both mf->hash and - // mf->son here in normalization, Valgrind may complain - // that the "if" below depends on uninitialized value. In - // this case it is safe to ignore the warning. See also the - // comments in lz_encoder_init() in lz_encoder.c. - if (hash[i] <= subvalue) - hash[i] = EMPTY_HASH_VALUE; + // NOTE: There may be uninitialized elements in mf->son. + // Valgrind may complain that the "if" below depends on + // an uninitialized value. In this case it is safe to ignore + // the warning. See also the comments in lz_encoder_init() + // in lz_encoder.c. + if (mf->son[i] <= subvalue) + mf->son[i] = EMPTY_HASH_VALUE; else - hash[i] -= subvalue; + mf->son[i] -= subvalue; } // Update offset to match the new locations. @@ -269,10 +271,7 @@ hc_find_func( + (delta > cyclic_pos ? cyclic_size : 0)]; if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { - uint32_t len = 0; - while (++len != len_limit) - if (pb[len] != cur[len]) - break; + uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit); if (len_best < len) { len_best = len; @@ -318,9 +317,8 @@ lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) uint32_t len_best = 2; if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { - for ( ; len_best != len_limit; ++len_best) - if (*(cur + len_best - delta2) != cur[len_best]) - break; + len_best = lzma_memcmplen(cur - delta2, cur, + len_best, len_limit); matches[0].len = len_best; matches[0].dist = delta2 - 1; @@ -397,9 +395,8 @@ lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) } if (matches_count != 0) { - for ( ; len_best != len_limit; ++len_best) - if (*(cur + len_best - delta2) != cur[len_best]) - break; + len_best = lzma_memcmplen(cur - delta2, cur, + len_best, len_limit); matches[matches_count - 1].len = len_best; @@ -484,9 +481,7 @@ bt_find_func( uint32_t len = my_min(len0, len1); if (pb[len] == cur[len]) { - while (++len != len_limit) - if (pb[len] != cur[len]) - break; + len = lzma_memcmplen(pb, cur, len + 1, len_limit); if (len_best < len) { len_best = len; @@ -549,9 +544,7 @@ bt_skip_func( uint32_t len = my_min(len0, len1); if (pb[len] == cur[len]) { - while (++len != len_limit) - if (pb[len] != cur[len]) - break; + len = lzma_memcmplen(pb, cur, len + 1, len_limit); if (len == len_limit) { *ptr1 = pair[0]; @@ -639,9 +632,8 @@ lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) uint32_t len_best = 2; if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { - for ( ; len_best != len_limit; ++len_best) - if (*(cur + len_best - delta2) != cur[len_best]) - break; + len_best = lzma_memcmplen( + cur, cur - delta2, len_best, len_limit); matches[0].len = len_best; matches[0].dist = delta2 - 1; @@ -712,9 +704,8 @@ lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) } if (matches_count != 0) { - for ( ; len_best != len_limit; ++len_best) - if (*(cur + len_best - delta2) != cur[len_best]) - break; + len_best = lzma_memcmplen( + cur, cur - delta2, len_best, len_limit); matches[matches_count - 1].len = len_best; |