diff options
Diffstat (limited to 'subversion/libsvn_delta/xdelta.c')
-rw-r--r-- | subversion/libsvn_delta/xdelta.c | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/subversion/libsvn_delta/xdelta.c b/subversion/libsvn_delta/xdelta.c new file mode 100644 index 0000000..2075a51 --- /dev/null +++ b/subversion/libsvn_delta/xdelta.c @@ -0,0 +1,514 @@ +/* + * xdelta.c: xdelta generator. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include <assert.h> + +#include <apr_general.h> /* for APR_INLINE */ +#include <apr_hash.h> + +#include "svn_hash.h" +#include "svn_delta.h" +#include "delta.h" + +/* This is pseudo-adler32. It is adler32 without the prime modulus. + The idea is borrowed from monotone, and is a translation of the C++ + code. Graydon Hoare, the author of the original code, gave his + explicit permission to use it under these terms at 8:02pm on + Friday, February 11th, 2005. */ + +/* Size of the blocks we compute checksums for. This was chosen out of + thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. + However, later optimizations assume it to be 256 or less. + */ +#define MATCH_BLOCKSIZE 64 + +/* "no" / "invalid" / "unused" value for positions within the delta windows + */ +#define NO_POSITION ((apr_uint32_t)-1) + +/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time. + * This function may (and will) only be called for characters that are + * MATCH_BLOCKSIZE positions apart. + * + * Please note that the lower 16 bits cannot overflow in neither direction. + * Therefore, we don't need to split the value into separate values for + * sum(char) and sum(sum(char)). + */ +static APR_INLINE apr_uint32_t +adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in) +{ + adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out)); + + adler32 -= (unsigned char)c_out; + adler32 += (unsigned char)c_in; + + return adler32 + adler32 * 0x10000; +} + +/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting + at DATA. Return the checksum value. */ + +static APR_INLINE apr_uint32_t +init_adler32(const char *data) +{ + const unsigned char *input = (const unsigned char *)data; + const unsigned char *last = input + MATCH_BLOCKSIZE; + + apr_uint32_t s1 = 0; + apr_uint32_t s2 = 0; + + for (; input < last; input += 8) + { + s1 += input[0]; s2 += s1; + s1 += input[1]; s2 += s1; + s1 += input[2]; s2 += s1; + s1 += input[3]; s2 += s1; + s1 += input[4]; s2 += s1; + s1 += input[5]; s2 += s1; + s1 += input[6]; s2 += s1; + s1 += input[7]; s2 += s1; + } + + return s2 * 0x10000 + s1; +} + +/* Information for a block of the delta source. The length of the + block is the smaller of MATCH_BLOCKSIZE and the difference between + the size of the source data and the position of this block. */ +struct block +{ + apr_uint32_t adlersum; + +/* Even in 64 bit systems, store only 32 bit offsets in our hash table + (our delta window size much much smaller then 4GB). + That reduces the hash table size by 50% from 32to 16KB + and makes it easier to fit into the CPU's L1 cache. */ + apr_uint32_t pos; /* NO_POSITION -> block is not used */ +}; + +/* A hash table, using open addressing, of the blocks of the source. */ +struct blocks +{ + /* The largest valid index of slots. + This value has an upper bound proportionate to the text delta + window size, so unless we dramatically increase the window size, + it's safe to make this a 32-bit value. In any case, it has to be + hte same width as the block position index, (struct + block).pos. */ + apr_uint32_t max; + /* Source buffer that the positions in SLOTS refer to. */ + const char* data; + /* The vector of blocks. A pos value of NO_POSITION represents an unused + slot. */ + struct block *slots; +}; + + +/* Return a hash value calculated from the adler32 SUM, suitable for use with + our hash table. */ +static apr_uint32_t hash_func(apr_uint32_t sum) +{ + /* Since the adl32 checksum have a bad distribution for the 11th to 16th + bits when used for our small block size, we add some bits from the + other half of the checksum. */ + return sum ^ (sum >> 12); +} + +/* Insert a block with the checksum ADLERSUM at position POS in the source + data into the table BLOCKS. Ignore true duplicates, i.e. blocks with + actually the same content. */ +static void +add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos) +{ + apr_uint32_t h = hash_func(adlersum) & blocks->max; + + /* This will terminate, since we know that we will not fill the table. */ + for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) + if (blocks->slots[h].adlersum == adlersum) + if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos, + MATCH_BLOCKSIZE) == 0) + return; + + blocks->slots[h].adlersum = adlersum; + blocks->slots[h].pos = pos; +} + +/* Find a block in BLOCKS with the checksum ADLERSUM and matching the content + at DATA, returning its position in the source data. If there is no such + block, return NO_POSITION. */ +static apr_uint32_t +find_block(const struct blocks *blocks, + apr_uint32_t adlersum, + const char* data) +{ + apr_uint32_t h = hash_func(adlersum) & blocks->max; + + for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) + if (blocks->slots[h].adlersum == adlersum) + if (memcmp(blocks->data + blocks->slots[h].pos, data, + MATCH_BLOCKSIZE) == 0) + return blocks->slots[h].pos; + + return NO_POSITION; +} + +/* Initialize the matches table from DATA of size DATALEN. This goes + through every block of MATCH_BLOCKSIZE bytes in the source and + checksums it, inserting the result into the BLOCKS table. */ +static void +init_blocks_table(const char *data, + apr_size_t datalen, + struct blocks *blocks, + apr_pool_t *pool) +{ + apr_size_t nblocks; + apr_size_t wnslots = 1; + apr_uint32_t nslots; + apr_uint32_t i; + + /* Be pessimistic about the block count. */ + nblocks = datalen / MATCH_BLOCKSIZE + 1; + /* Find nearest larger power of two. */ + while (wnslots <= nblocks) + wnslots *= 2; + /* Double the number of slots to avoid a too high load. */ + wnslots *= 2; + /* Narrow the number of slots to 32 bits, which is the size of the + block position index in the hash table. + Sanity check: On 64-bit platforms, apr_size_t is likely to be + larger than apr_uint32_t. Make sure that the number of slots + actually fits into blocks->max. It's safe to use a hard assert + here, because the largest possible value for nslots is + proportional to the text delta window size and is therefore much + smaller than the range of an apr_uint32_t. If we ever happen to + increase the window size too much, this assertion will get + triggered by the test suite. */ + nslots = (apr_uint32_t) wnslots; + SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots); + blocks->max = nslots - 1; + blocks->data = data; + blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots))); + for (i = 0; i < nslots; ++i) + { + /* Avoid using an indeterminate value in the lookup. */ + blocks->slots[i].adlersum = 0; + blocks->slots[i].pos = NO_POSITION; + } + + /* If there is an odd block at the end of the buffer, we will + not use that shorter block for deltification (only indirectly + as an extension of some previous block). */ + for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE) + add_block(blocks, init_adler32(data + i), i); +} + +/* Return the lowest position at which A and B differ. If no difference + * can be found in the first MAX_LEN characters, MAX_LEN will be returned. + */ +static apr_size_t +match_length(const char *a, const char *b, apr_size_t max_len) +{ + apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + + /* Chunky processing is so much faster ... + * + * We can't make this work on architectures that require aligned access + * because A and B will probably have different alignment. So, skipping + * the first few chars until alignment is reached is not an option. + */ + for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) + if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) + break; + +#endif + + for (; pos < max_len; ++pos) + if (a[pos] != b[pos]) + break; + + return pos; +} + +/* Return the number of bytes before A and B that don't differ. If no + * difference can be found in the first MAX_LEN characters, MAX_LEN will + * be returned. Please note that A-MAX_LEN and B-MAX_LEN must both be + * valid addresses. + */ +static apr_size_t +reverse_match_length(const char *a, const char *b, apr_size_t max_len) +{ + apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + + /* Chunky processing is so much faster ... + * + * We can't make this work on architectures that require aligned access + * because A and B will probably have different alignment. So, skipping + * the first few chars until alignment is reached is not an option. + */ + for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) + if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) + break; + + pos -= sizeof(apr_size_t); + +#endif + + /* If we find a mismatch at -pos, pos-1 characters matched. + */ + while (++pos <= max_len) + if (a[0-pos] != b[0-pos]) + return pos - 1; + + /* No mismatch found -> at least MAX_LEN matching chars. + */ + return max_len; +} + + +/* Try to find a match for the target data B in BLOCKS, and then + extend the match as long as data in A and B at the match position + continues to match. We set the position in A we ended up in (in + case we extended it backwards) in APOSP and update the corresponding + position within B given in BPOSP. PENDING_INSERT_START sets the + lower limit to BPOSP. + Return number of matching bytes starting at ASOP. Return 0 if + no match has been found. + */ +static apr_size_t +find_match(const struct blocks *blocks, + const apr_uint32_t rolling, + const char *a, + apr_size_t asize, + const char *b, + apr_size_t bsize, + apr_size_t *bposp, + apr_size_t *aposp, + apr_size_t pending_insert_start) +{ + apr_size_t apos, bpos = *bposp; + apr_size_t delta, max_delta; + + apos = find_block(blocks, rolling, b + bpos); + + /* See if we have a match. */ + if (apos == NO_POSITION) + return 0; + + /* Extend the match forward as far as possible */ + max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE + ? asize - apos - MATCH_BLOCKSIZE + : bsize - bpos - MATCH_BLOCKSIZE; + delta = match_length(a + apos + MATCH_BLOCKSIZE, + b + bpos + MATCH_BLOCKSIZE, + max_delta); + + /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's + content has been sampled only every MATCH_BLOCKSIZE positions). */ + while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1]) + { + --apos; + --bpos; + ++delta; + } + + *aposp = apos; + *bposp = bpos; + + return MATCH_BLOCKSIZE + delta; +} + +/* Utility for compute_delta() that compares the range B[START,BSIZE) with + * the range of similar size before A[ASIZE]. Create corresponding copy and + * insert operations. + * + * BUILD_BATON and POOL will be passed through from compute_delta(). + */ +static void +store_delta_trailer(svn_txdelta__ops_baton_t *build_baton, + const char *a, + apr_size_t asize, + const char *b, + apr_size_t bsize, + apr_size_t start, + apr_pool_t *pool) +{ + apr_size_t end_match; + apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize; + if (max_len == 0) + return; + + end_match = reverse_match_length(a + asize, b + bsize, max_len); + if (end_match <= 4) + end_match = 0; + + if (bsize - start > end_match) + svn_txdelta__insert_op(build_baton, svn_txdelta_new, + start, bsize - start - end_match, b + start, pool); + if (end_match) + svn_txdelta__insert_op(build_baton, svn_txdelta_source, + asize - end_match, end_match, NULL, pool); +} + + +/* Compute a delta from A to B using xdelta. + + The basic xdelta algorithm is as follows: + + 1. Go through the source data, checksumming every MATCH_BLOCKSIZE + block of bytes using adler32, and inserting the checksum into a + match table with the position of the match. + 2. Go through the target byte by byte, seeing if that byte starts a + match that we have in the match table. + 2a. If so, try to extend the match as far as possible both + forwards and backwards, and then insert a source copy + operation into the delta ops builder for the match. + 2b. If not, insert the byte as new data using an insert delta op. + + Our implementation doesn't immediately insert "insert" operations, + it waits until we have another copy, or we are done. The reasoning + is twofold: + + 1. Otherwise, we would just be building a ton of 1 byte insert + operations + 2. So that we can extend a source match backwards into a pending + insert operation, and possibly remove the need for the insert + entirely. This can happen due to stream alignment. +*/ +static void +compute_delta(svn_txdelta__ops_baton_t *build_baton, + const char *a, + apr_size_t asize, + const char *b, + apr_size_t bsize, + apr_pool_t *pool) +{ + struct blocks blocks; + apr_uint32_t rolling; + apr_size_t lo = 0, pending_insert_start = 0; + + /* Optimization: directly compare window starts. If more than 4 + * bytes match, we can immediately create a matching windows. + * Shorter sequences result in a net data increase. */ + lo = match_length(a, b, asize > bsize ? bsize : asize); + if ((lo > 4) || (lo == bsize)) + { + svn_txdelta__insert_op(build_baton, svn_txdelta_source, + 0, lo, NULL, pool); + pending_insert_start = lo; + } + else + lo = 0; + + /* If the size of the target is smaller than the match blocksize, just + insert the entire target. */ + if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE)) + { + store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool); + return; + } + + /* Initialize the matches table. */ + init_blocks_table(a, asize, &blocks, pool); + + /* Initialize our rolling checksum. */ + rolling = init_adler32(b + lo); + while (lo < bsize) + { + apr_size_t matchlen = 0; + apr_size_t apos; + + if (lo + MATCH_BLOCKSIZE <= bsize) + matchlen = find_match(&blocks, rolling, a, asize, b, bsize, + &lo, &apos, pending_insert_start); + + /* If we didn't find a real match, insert the byte at the target + position into the pending insert. */ + if (matchlen == 0) + { + /* move block one position forward. Short blocks at the end of + the buffer cannot be used as the beginning of a new match */ + if (lo + MATCH_BLOCKSIZE < bsize) + rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]); + + lo++; + } + else + { + /* store the sequence of B that is between the matches */ + if (lo - pending_insert_start > 0) + svn_txdelta__insert_op(build_baton, svn_txdelta_new, + 0, lo - pending_insert_start, + b + pending_insert_start, pool); + else + { + /* the match borders on the previous op. Maybe, we found a + * match that is better than / overlapping the previous one. */ + apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo); + if (len > 0) + { + len = svn_txdelta__remove_copy(build_baton, len); + apos -= len; + matchlen += len; + lo -= len; + } + } + + /* Reset the pending insert start to immediately after the + match. */ + lo += matchlen; + pending_insert_start = lo; + svn_txdelta__insert_op(build_baton, svn_txdelta_source, + apos, matchlen, NULL, pool); + + /* Calculate the Adler32 sum for the first block behind the match. + * Ignore short buffers at the end of B. + */ + if (lo + MATCH_BLOCKSIZE <= bsize) + rolling = init_adler32(b + lo); + } + } + + /* If we still have an insert pending at the end, throw it in. */ + store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool); +} + +void +svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton, + const char *data, + apr_size_t source_len, + apr_size_t target_len, + apr_pool_t *pool) +{ + /* We should never be asked to compute something when the source_len is 0; + we just use a single insert op there (and rely on zlib for + compression). */ + assert(source_len != 0); + compute_delta(build_baton, data, source_len, + data + source_len, target_len, + pool); +} |