5 files changed, 115 insertions, 101 deletions
diff --git a/contrib/xz/src/liblzma/lz/lz_decoder.c b/contrib/xz/src/liblzma/lz/lz_decoder.c
index d74085c..2328a8e 100644
--- a/contrib/xz/src/liblzma/lz/lz_decoder.c
+++ b/contrib/xz/src/liblzma/lz/lz_decoder.c
@@ -126,7 +126,7 @@ decode_buffer(lzma_coder *coder,
 
 static lzma_ret
 lz_decode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
@@ -184,7 +184,7 @@ lz_decode(lzma_coder *coder,
 
 
 static void
-lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+lz_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder->dict.buf, allocator);
@@ -200,10 +200,10 @@ lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 extern lzma_ret
-lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options))
 {
 	// Allocate the base structure if it isn't already allocated.
diff --git a/contrib/xz/src/liblzma/lz/lz_decoder.h b/contrib/xz/src/liblzma/lz/lz_decoder.h
index 7266e80..277900a 100644
--- a/contrib/xz/src/liblzma/lz/lz_decoder.h
+++ b/contrib/xz/src/liblzma/lz/lz_decoder.h
@@ -67,7 +67,7 @@ typedef struct {
 			lzma_vli uncompressed_size);
 
 	/// Free allocated resources
-	void (*end)(lzma_coder *coder, lzma_allocator *allocator);
+	void (*end)(lzma_coder *coder, const lzma_allocator *allocator);
 
 } lzma_lz_decoder;
 
@@ -83,9 +83,10 @@ typedef struct {
 
 
 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters,
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options));
 
 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
diff --git a/contrib/xz/src/liblzma/lz/lz_encoder.c b/contrib/xz/src/liblzma/lz/lz_encoder.c
index e240696..48bc487 100644
--- a/contrib/xz/src/liblzma/lz/lz_encoder.c
+++ b/contrib/xz/src/liblzma/lz/lz_encoder.c
@@ -20,6 +20,8 @@
 #	include "lz_encoder_hash_table.h"
 #endif
 
+#include "memcmplen.h"
+
 
 struct lzma_coder_s {
 	/// LZ-based encoder e.g. LZMA
@@ -76,8 +78,9 @@ move_window(lzma_mf *mf)
 /// This function must not be called once it has returned LZMA_STREAM_END.
 ///
 static lzma_ret
-fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
-		size_t *in_pos, size_t in_size, lzma_action action)
+fill_window(lzma_coder *coder, const lzma_allocator *allocator,
+		const uint8_t *in, size_t *in_pos, size_t in_size,
+		lzma_action action)
 {
 	assert(coder->mf.read_pos <= coder->mf.write_pos);
 
@@ -107,6 +110,12 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
 
 	coder->mf.write_pos = write_pos;
 
+	// Silence Valgrind. lzma_memcmplen() can read extra bytes
+	// and Valgrind will give warnings if those bytes are uninitialized
+	// because Valgrind cannot see that the values of the uninitialized
+	// bytes are eventually ignored.
+	memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA);
+
 	// If end of stream has been reached or flushing completed, we allow
 	// the encoder to process all the input (that is, read_pos is allowed
 	// to reach write_pos). Otherwise we keep keep_size_after bytes
@@ -130,7 +139,7 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
 			&& coder->mf.read_pos < coder->mf.read_limit) {
 		// Match finder may update coder->pending and expects it to
 		// start from zero, so use a temporary variable.
-		const size_t pending = coder->mf.pending;
+		const uint32_t pending = coder->mf.pending;
 		coder->mf.pending = 0;
 
 		// Rewind read_pos so that the match finder can hash
@@ -148,7 +157,7 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
 
 
 static lzma_ret
-lz_encode(lzma_coder *coder, lzma_allocator *allocator,
+lz_encode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size,
 		uint8_t *restrict out, size_t *restrict out_pos,
@@ -179,7 +188,7 @@ lz_encode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static bool
-lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
+lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
 		const lzma_lz_options *lz_options)
 {
 	// For now, the dictionary size is limited to 1.5 GiB. This may grow
@@ -325,25 +334,22 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
 		hs += HASH_4_SIZE;
 */
 
-	// If the above code calculating hs is modified, make sure that
-	// this assertion stays valid (UINT32_MAX / 5 is not strictly the
-	// exact limit). If it doesn't, you need to calculate that
-	// hash_size_sum + sons_count cannot overflow.
-	assert(hs < UINT32_MAX / 5);
-
-	const uint32_t old_count = mf->hash_size_sum + mf->sons_count;
-	mf->hash_size_sum = hs;
+	const uint32_t old_hash_count = mf->hash_count;
+	const uint32_t old_sons_count = mf->sons_count;
+	mf->hash_count = hs;
 	mf->sons_count = mf->cyclic_size;
 	if (is_bt)
 		mf->sons_count *= 2;
 
-	const uint32_t new_count = mf->hash_size_sum + mf->sons_count;
-
 	// Deallocate the old hash array if it exists and has different size
 	// than what is needed now.
-	if (old_count != new_count) {
+	if (old_hash_count != mf->hash_count
+			|| old_sons_count != mf->sons_count) {
 		lzma_free(mf->hash, allocator);
 		mf->hash = NULL;
+
+		lzma_free(mf->son, allocator);
+		mf->son = NULL;
 	}
 
 	// Maximum number of match finder cycles
@@ -360,14 +366,23 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
 
 
 static bool
-lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
+lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator,
 		const lzma_lz_options *lz_options)
 {
 	// Allocate the history buffer.
 	if (mf->buffer == NULL) {
-		mf->buffer = lzma_alloc(mf->size, allocator);
+		// lzma_memcmplen() is used for the dictionary buffer
+		// so we need to allocate a few extra bytes to prevent
+		// it from reading past the end of the buffer.
+		mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,
+				allocator);
 		if (mf->buffer == NULL)
 			return true;
+
+		// Keep Valgrind happy with lzma_memcmplen() and initialize
+		// the extra bytes whose value may get read but which will
+		// effectively get ignored.
+		memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);
 	}
 
 	// Use cyclic_size as initial mf->offset. This allows
@@ -381,43 +396,48 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
 	mf->write_pos = 0;
 	mf->pending = 0;
 
-	// Allocate match finder's hash array.
-	const size_t alloc_count = mf->hash_size_sum + mf->sons_count;
-
 #if UINT32_MAX >= SIZE_MAX / 4
 	// Check for integer overflow. (Huge dictionaries are not
 	// possible on 32-bit CPU.)
-	if (alloc_count > SIZE_MAX / sizeof(uint32_t))
+	if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
+			|| mf->sons_count > SIZE_MAX / sizeof(uint32_t))
 		return true;
 #endif
 
+	// Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
+	// is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
+	//
+	// We don't need to initialize mf->son, but not doing that may
+	// make Valgrind complain in normalization (see normalize() in
+	// lz_encoder_mf.c). Skipping the initialization is *very* good
+	// when big dictionary is used but only small amount of data gets
+	// actually compressed: most of the mf->son won't get actually
+	// allocated by the kernel, so we avoid wasting RAM and improve
+	// initialization speed a lot.
 	if (mf->hash == NULL) {
-		mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t),
+		mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),
+				allocator);
+		mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
 				allocator);
-		if (mf->hash == NULL)
-			return true;
-	}
 
-	mf->son = mf->hash + mf->hash_size_sum;
-	mf->cyclic_pos = 0;
+		if (mf->hash == NULL || mf->son == NULL) {
+			lzma_free(mf->hash, allocator);
+			mf->hash = NULL;
 
-	// Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we
-	// can use memset().
+			lzma_free(mf->son, allocator);
+			mf->son = NULL;
+
+			return true;
+		}
+	} else {
 /*
-	for (uint32_t i = 0; i < hash_size_sum; ++i)
-		mf->hash[i] = EMPTY_HASH_VALUE;
+		for (uint32_t i = 0; i < mf->hash_count; ++i)
+			mf->hash[i] = EMPTY_HASH_VALUE;
 */
-	memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t));
+		memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
+	}
 
-	// We don't need to initialize mf->son, but not doing that will
-	// make Valgrind complain in normalization (see normalize() in
-	// lz_encoder_mf.c).
-	//
-	// Skipping this initialization is *very* good when big dictionary is
-	// used but only small amount of data gets actually compressed: most
-	// of the mf->hash won't get actually allocated by the kernel, so
-	// we avoid wasting RAM and improve initialization speed a lot.
-	//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
+	mf->cyclic_pos = 0;
 
 	// Handle preset dictionary.
 	if (lz_options->preset_dict != NULL
@@ -445,7 +465,8 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
 	lzma_mf mf = {
 		.buffer = NULL,
 		.hash = NULL,
-		.hash_size_sum = 0,
+		.son = NULL,
+		.hash_count = 0,
 		.sons_count = 0,
 	};
 
@@ -454,17 +475,17 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
 		return UINT64_MAX;
 
 	// Calculate the memory usage.
-	return (uint64_t)(mf.hash_size_sum + mf.sons_count)
-				* sizeof(uint32_t)
-			+ (uint64_t)(mf.size) + sizeof(lzma_coder);
+	return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
+			+ mf.size + sizeof(lzma_coder);
 }
 
 
 static void
-lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+lz_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 
+	lzma_free(coder->mf.son, allocator);
 	lzma_free(coder->mf.hash, allocator);
 	lzma_free(coder->mf.buffer, allocator);
 
@@ -479,7 +500,7 @@ lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
+lz_encoder_update(lzma_coder *coder, const lzma_allocator *allocator,
 		const lzma_filter *filters_null lzma_attribute((__unused__)),
 		const lzma_filter *reversed_filters)
 {
@@ -495,10 +516,10 @@ lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_encoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options))
 {
 #ifdef HAVE_SMALL
@@ -522,7 +543,8 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 		next->coder->mf.buffer = NULL;
 		next->coder->mf.hash = NULL;
-		next->coder->mf.hash_size_sum = 0;
+		next->coder->mf.son = NULL;
+		next->coder->mf.hash_count = 0;
 		next->coder->mf.sons_count = 0;
 
 		next->coder->next = LZMA_NEXT_CODER_INIT;
diff --git a/contrib/xz/src/liblzma/lz/lz_encoder.h b/contrib/xz/src/liblzma/lz/lz_encoder.h
index 741c453..dad9c6b 100644
--- a/contrib/xz/src/liblzma/lz/lz_encoder.h
+++ b/contrib/xz/src/liblzma/lz/lz_encoder.h
@@ -119,7 +119,7 @@ struct lzma_mf_s {
 	lzma_action action;
 
 	/// Number of elements in hash[]
-	uint32_t hash_size_sum;
+	uint32_t hash_count;
 
 	/// Number of elements in son[]
 	uint32_t sons_count;
@@ -199,7 +199,7 @@ typedef struct {
 			size_t *restrict out_pos, size_t out_size);
 
 	/// Free allocated resources
-	void (*end)(lzma_coder *coder, lzma_allocator *allocator);
+	void (*end)(lzma_coder *coder, const lzma_allocator *allocator);
 
 	/// Update the options in the middle of the encoding.
 	lzma_ret (*options_update)(lzma_coder *coder,
@@ -296,10 +296,10 @@ mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size,
 
 
 extern lzma_ret lzma_lz_encoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_encoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options));
 
 
diff --git a/contrib/xz/src/liblzma/lz/lz_encoder_mf.c b/contrib/xz/src/liblzma/lz/lz_encoder_mf.c
index f82a1c1..7852077 100644
--- a/contrib/xz/src/liblzma/lz/lz_encoder_mf.c
+++ b/contrib/xz/src/liblzma/lz/lz_encoder_mf.c
@@ -13,6 +13,7 @@
 
 #include "lz_encoder.h"
 #include "lz_encoder_hash.h"
+#include "memcmplen.h"
 
 
 /// \brief      Find matches starting from the current byte
@@ -65,9 +66,7 @@ lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches)
 			// here because the match distances are zero based.
 			const uint8_t *p2 = p1 - matches[count - 1].dist - 1;
 
-			while (len_best < limit
-					&& p1[len_best] == p2[len_best])
-				++len_best;
+			len_best = lzma_memcmplen(p1, p2, len_best, limit);
 		}
 	}
 
@@ -116,24 +115,27 @@ normalize(lzma_mf *mf)
 			= (MUST_NORMALIZE_POS - mf->cyclic_size);
 				// & (~(UINT32_C(1) << 10) - 1);
 
-	const uint32_t count = mf->hash_size_sum + mf->sons_count;
-	uint32_t *hash = mf->hash;
-
-	for (uint32_t i = 0; i < count; ++i) {
+	for (uint32_t i = 0; i < mf->hash_count; ++i) {
 		// If the distance is greater than the dictionary size,
 		// we can simply mark the hash element as empty.
+		if (mf->hash[i] <= subvalue)
+			mf->hash[i] = EMPTY_HASH_VALUE;
+		else
+			mf->hash[i] -= subvalue;
+	}
+
+	for (uint32_t i = 0; i < mf->sons_count; ++i) {
+		// Do the same for mf->son.
 		//
-		// NOTE: Only the first mf->hash_size_sum elements are
-		// initialized for sure. There may be uninitialized elements
-		// in mf->son. Since we go through both mf->hash and
-		// mf->son here in normalization, Valgrind may complain
-		// that the "if" below depends on uninitialized value. In
-		// this case it is safe to ignore the warning. See also the
-		// comments in lz_encoder_init() in lz_encoder.c.
-		if (hash[i] <= subvalue)
-			hash[i] = EMPTY_HASH_VALUE;
+		// NOTE: There may be uninitialized elements in mf->son.
+		// Valgrind may complain that the "if" below depends on
+		// an uninitialized value. In this case it is safe to ignore
+		// the warning. See also the comments in lz_encoder_init()
+		// in lz_encoder.c.
+		if (mf->son[i] <= subvalue)
+			mf->son[i] = EMPTY_HASH_VALUE;
 		else
-			hash[i] -= subvalue;
+			mf->son[i] -= subvalue;
 	}
 
 	// Update offset to match the new locations.
@@ -269,10 +271,7 @@ hc_find_func(
 				+ (delta > cyclic_pos ? cyclic_size : 0)];
 
 		if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {
-			uint32_t len = 0;
-			while (++len != len_limit)
-				if (pb[len] != cur[len])
-					break;
+			uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit);
 
 			if (len_best < len) {
 				len_best = len;
@@ -318,9 +317,8 @@ lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches)
 	uint32_t len_best = 2;
 
 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(cur - delta2, cur,
+				len_best, len_limit);
 
 		matches[0].len = len_best;
 		matches[0].dist = delta2 - 1;
@@ -397,9 +395,8 @@ lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches)
 	}
 
 	if (matches_count != 0) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(cur - delta2, cur,
+				len_best, len_limit);
 
 		matches[matches_count - 1].len = len_best;
 
@@ -484,9 +481,7 @@ bt_find_func(
 		uint32_t len = my_min(len0, len1);
 
 		if (pb[len] == cur[len]) {
-			while (++len != len_limit)
-				if (pb[len] != cur[len])
-					break;
+			len = lzma_memcmplen(pb, cur, len + 1, len_limit);
 
 			if (len_best < len) {
 				len_best = len;
@@ -549,9 +544,7 @@ bt_skip_func(
 		uint32_t len = my_min(len0, len1);
 
 		if (pb[len] == cur[len]) {
-			while (++len != len_limit)
-				if (pb[len] != cur[len])
-					break;
+			len = lzma_memcmplen(pb, cur, len + 1, len_limit);
 
 			if (len == len_limit) {
 				*ptr1 = pair[0];
@@ -639,9 +632,8 @@ lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches)
 	uint32_t len_best = 2;
 
 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(
+				cur, cur - delta2, len_best, len_limit);
 
 		matches[0].len = len_best;
 		matches[0].dist = delta2 - 1;
@@ -712,9 +704,8 @@ lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches)
 	}
 
 	if (matches_count != 0) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(
+				cur, cur - delta2, len_best, len_limit);
 
 		matches[matches_count - 1].len = len_best;