diff options
Diffstat (limited to 'subversion/libsvn_fs_base/reps-strings.c')
-rw-r--r-- | subversion/libsvn_fs_base/reps-strings.c | 1617 |
1 files changed, 1617 insertions, 0 deletions
diff --git a/subversion/libsvn_fs_base/reps-strings.c b/subversion/libsvn_fs_base/reps-strings.c new file mode 100644 index 0000000..553075d --- /dev/null +++ b/subversion/libsvn_fs_base/reps-strings.c @@ -0,0 +1,1617 @@ +/* reps-strings.c : intepreting representations with respect to strings + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <assert.h> + +#include "svn_fs.h" +#include "svn_pools.h" + +#include "fs.h" +#include "err.h" +#include "trail.h" +#include "reps-strings.h" + +#include "bdb/reps-table.h" +#include "bdb/strings-table.h" + +#include "../libsvn_fs/fs-loader.h" +#define SVN_WANT_BDB +#include "svn_private_config.h" + + +/*** Helper Functions ***/ + + +/* Return non-zero iff REP is mutable under transaction TXN_ID. */ +static svn_boolean_t rep_is_mutable(representation_t *rep, + const char *txn_id) +{ + if ((! rep->txn_id) || (strcmp(rep->txn_id, txn_id) != 0)) + return FALSE; + return TRUE; +} + +/* Helper macro that evaluates to an error message indicating that + the representation referred to by X has an unknown node kind. */ +#define UNKNOWN_NODE_KIND(x) \ + svn_error_createf \ + (SVN_ERR_FS_CORRUPT, NULL, \ + _("Unknown node kind for representation '%s'"), x) + +/* Return a `fulltext' representation, allocated in POOL, which + * references the string STR_KEY. + * + * If TXN_ID is non-zero and non-NULL, make the representation mutable + * under that TXN_ID. + * + * If STR_KEY is non-null, copy it into an allocation from POOL. + * + * If MD5_CHECKSUM is non-null, use it as the MD5 checksum for the new + * rep; else initialize the rep with an all-zero (i.e., always + * successful) MD5 checksum. + * + * If SHA1_CHECKSUM is non-null, use it as the SHA1 checksum for the new + * rep; else initialize the rep with an all-zero (i.e., always + * successful) SHA1 checksum. + */ +static representation_t * +make_fulltext_rep(const char *str_key, + const char *txn_id, + svn_checksum_t *md5_checksum, + svn_checksum_t *sha1_checksum, + apr_pool_t *pool) + +{ + representation_t *rep = apr_pcalloc(pool, sizeof(*rep)); + if (txn_id && *txn_id) + rep->txn_id = apr_pstrdup(pool, txn_id); + rep->kind = rep_kind_fulltext; + rep->md5_checksum = svn_checksum_dup(md5_checksum, pool); + rep->sha1_checksum = svn_checksum_dup(sha1_checksum, pool); + rep->contents.fulltext.string_key + = str_key ? apr_pstrdup(pool, str_key) : NULL; + return rep; +} + + +/* Set *KEYS to an array of string keys gleaned from `delta' + representation REP. Allocate *KEYS in POOL. */ +static svn_error_t * +delta_string_keys(apr_array_header_t **keys, + const representation_t *rep, + apr_pool_t *pool) +{ + const char *key; + int i; + apr_array_header_t *chunks; + + if (rep->kind != rep_kind_delta) + return svn_error_create + (SVN_ERR_FS_GENERAL, NULL, + _("Representation is not of type 'delta'")); + + /* Set up a convenience variable. */ + chunks = rep->contents.delta.chunks; + + /* Initialize *KEYS to an empty array. */ + *keys = apr_array_make(pool, chunks->nelts, sizeof(key)); + if (! chunks->nelts) + return SVN_NO_ERROR; + + /* Now, push the string keys for each window into *KEYS */ + for (i = 0; i < chunks->nelts; i++) + { + rep_delta_chunk_t *chunk = APR_ARRAY_IDX(chunks, i, rep_delta_chunk_t *); + + key = apr_pstrdup(pool, chunk->string_key); + APR_ARRAY_PUSH(*keys, const char *) = key; + } + + return SVN_NO_ERROR; +} + + +/* Delete the strings associated with array KEYS in FS as part of TRAIL. */ +static svn_error_t * +delete_strings(const apr_array_header_t *keys, + svn_fs_t *fs, + trail_t *trail, + apr_pool_t *pool) +{ + int i; + const char *str_key; + apr_pool_t *subpool = svn_pool_create(pool); + + for (i = 0; i < keys->nelts; i++) + { + svn_pool_clear(subpool); + str_key = APR_ARRAY_IDX(keys, i, const char *); + SVN_ERR(svn_fs_bdb__string_delete(fs, str_key, trail, subpool)); + } + svn_pool_destroy(subpool); + return SVN_NO_ERROR; +} + + + +/*** Reading the contents from a representation. ***/ + +struct compose_handler_baton +{ + /* The combined window, and the pool it's allocated from. */ + svn_txdelta_window_t *window; + apr_pool_t *window_pool; + + /* If the incoming window was self-compressed, and the combined WINDOW + exists from previous iterations, SOURCE_BUF will point to the + expanded self-compressed window. */ + char *source_buf; + + /* The trail for this operation. WINDOW_POOL will be a child of + TRAIL->pool. No allocations will be made from TRAIL->pool itself. */ + trail_t *trail; + + /* TRUE when no more windows have to be read/combined. */ + svn_boolean_t done; + + /* TRUE if we've just started reading a new window. We need this + because the svndiff handler will push a NULL window at the end of + the stream, and we have to ignore that; but we must also know + when it's appropriate to push a NULL window at the combiner. */ + svn_boolean_t init; +}; + + +/* Handle one window. If BATON is emtpy, copy the WINDOW into it; + otherwise, combine WINDOW with the one in BATON, unless WINDOW + is self-compressed (i.e., does not copy from the source view), + in which case expand. */ + +static svn_error_t * +compose_handler(svn_txdelta_window_t *window, void *baton) +{ + struct compose_handler_baton *cb = baton; + SVN_ERR_ASSERT(!cb->done || window == NULL); + SVN_ERR_ASSERT(cb->trail && cb->trail->pool); + + if (!cb->init && !window) + return SVN_NO_ERROR; + + /* We should never get here if we've already expanded a + self-compressed window. */ + SVN_ERR_ASSERT(!cb->source_buf); + + if (cb->window) + { + if (window && (window->sview_len == 0 || window->src_ops == 0)) + { + /* This is a self-compressed window. Don't combine it with + the others, because the combiner may go quadratic. Instead, + expand it here and signal that the combination has + ended. */ + apr_size_t source_len = window->tview_len; + SVN_ERR_ASSERT(cb->window->sview_len == source_len); + cb->source_buf = apr_palloc(cb->window_pool, source_len); + svn_txdelta_apply_instructions(window, NULL, + cb->source_buf, &source_len); + cb->done = TRUE; + } + else + { + /* Combine the incoming window with whatever's in the baton. */ + apr_pool_t *composite_pool = svn_pool_create(cb->trail->pool); + svn_txdelta_window_t *composite; + + composite = svn_txdelta_compose_windows(window, cb->window, + composite_pool); + svn_pool_destroy(cb->window_pool); + cb->window = composite; + cb->window_pool = composite_pool; + cb->done = (composite->sview_len == 0 || composite->src_ops == 0); + } + } + else if (window) + { + /* Copy the (first) window into the baton. */ + apr_pool_t *window_pool = svn_pool_create(cb->trail->pool); + SVN_ERR_ASSERT(cb->window_pool == NULL); + cb->window = svn_txdelta_window_dup(window, window_pool); + cb->window_pool = window_pool; + cb->done = (window->sview_len == 0 || window->src_ops == 0); + } + else + cb->done = TRUE; + + cb->init = FALSE; + return SVN_NO_ERROR; +} + + + +/* Read one delta window from REP[CUR_CHUNK] and push it at the + composition handler. */ + +static svn_error_t * +get_one_window(struct compose_handler_baton *cb, + svn_fs_t *fs, + representation_t *rep, + int cur_chunk) +{ + svn_stream_t *wstream; + char diffdata[4096]; /* hunk of svndiff data */ + svn_filesize_t off; /* offset into svndiff data */ + apr_size_t amt; /* how much svndiff data to/was read */ + const char *str_key; + + apr_array_header_t *chunks = rep->contents.delta.chunks; + rep_delta_chunk_t *this_chunk, *first_chunk; + + cb->init = TRUE; + if (chunks->nelts <= cur_chunk) + return compose_handler(NULL, cb); + + /* Set up a window handling stream for the svndiff data. */ + wstream = svn_txdelta_parse_svndiff(compose_handler, cb, TRUE, + cb->trail->pool); + + /* First things first: send the "SVN"{version} header through the + stream. ### For now, we will just use the version specified + in the first chunk, and then verify that no chunks have a + different version number than the one used. In the future, + we might simply convert chunks that use a different version + of the diff format -- or, heck, a different format + altogether -- to the format/version of the first chunk. */ + first_chunk = APR_ARRAY_IDX(chunks, 0, rep_delta_chunk_t*); + diffdata[0] = 'S'; + diffdata[1] = 'V'; + diffdata[2] = 'N'; + diffdata[3] = (char) (first_chunk->version); + amt = 4; + SVN_ERR(svn_stream_write(wstream, diffdata, &amt)); + /* FIXME: The stream write handler is borked; assert (amt == 4); */ + + /* Get this string key which holds this window's data. + ### todo: make sure this is an `svndiff' DIFF skel here. */ + this_chunk = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*); + str_key = this_chunk->string_key; + + /* Run through the svndiff data, at least as far as necessary. */ + off = 0; + do + { + amt = sizeof(diffdata); + SVN_ERR(svn_fs_bdb__string_read(fs, str_key, diffdata, + off, &amt, cb->trail, + cb->trail->pool)); + off += amt; + SVN_ERR(svn_stream_write(wstream, diffdata, &amt)); + } + while (amt != 0); + SVN_ERR(svn_stream_close(wstream)); + + SVN_ERR_ASSERT(!cb->init); + SVN_ERR_ASSERT(cb->window != NULL); + SVN_ERR_ASSERT(cb->window_pool != NULL); + return SVN_NO_ERROR; +} + + +/* Undeltify a range of data. DELTAS is the set of delta windows to + combine, FULLTEXT is the source text, CUR_CHUNK is the index of the + delta chunk we're starting from. OFFSET is the relative offset of + the requested data within the chunk; BUF and LEN are what we're + undeltifying to. */ + +static svn_error_t * +rep_undeltify_range(svn_fs_t *fs, + const apr_array_header_t *deltas, + representation_t *fulltext, + int cur_chunk, + char *buf, + apr_size_t offset, + apr_size_t *len, + trail_t *trail, + apr_pool_t *pool) +{ + apr_size_t len_read = 0; + + do + { + struct compose_handler_baton cb = { 0 }; + char *source_buf, *target_buf; + apr_size_t target_len; + int cur_rep; + + cb.trail = trail; + cb.done = FALSE; + for (cur_rep = 0; !cb.done && cur_rep < deltas->nelts; ++cur_rep) + { + representation_t *const rep = + APR_ARRAY_IDX(deltas, cur_rep, representation_t*); + SVN_ERR(get_one_window(&cb, fs, rep, cur_chunk)); + } + + if (!cb.window) + /* That's it, no more source data is available. */ + break; + + /* The source view length should not be 0 if there are source + copy ops in the window. */ + SVN_ERR_ASSERT(cb.window->sview_len > 0 || cb.window->src_ops == 0); + + /* cb.window is the combined delta window. Read the source text + into a buffer. */ + if (cb.source_buf) + { + /* The combiner already created the source text from a + self-compressed window. */ + source_buf = cb.source_buf; + } + else if (fulltext && cb.window->sview_len > 0 && cb.window->src_ops > 0) + { + apr_size_t source_len = cb.window->sview_len; + source_buf = apr_palloc(cb.window_pool, source_len); + SVN_ERR(svn_fs_bdb__string_read + (fs, fulltext->contents.fulltext.string_key, + source_buf, cb.window->sview_offset, &source_len, + trail, pool)); + if (source_len != cb.window->sview_len) + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Svndiff source length inconsistency")); + } + else + { + source_buf = NULL; /* Won't read anything from here. */ + } + + if (offset > 0) + { + target_len = *len - len_read + offset; + target_buf = apr_palloc(cb.window_pool, target_len); + } + else + { + target_len = *len - len_read; + target_buf = buf; + } + + svn_txdelta_apply_instructions(cb.window, source_buf, + target_buf, &target_len); + if (offset > 0) + { + SVN_ERR_ASSERT(target_len > offset); + target_len -= offset; + memcpy(buf, target_buf + offset, target_len); + offset = 0; /* Read from the beginning of the next chunk. */ + } + /* Don't need this window any more. */ + svn_pool_destroy(cb.window_pool); + + len_read += target_len; + buf += target_len; + ++cur_chunk; + } + while (len_read < *len); + + *len = len_read; + return SVN_NO_ERROR; +} + + + +/* Calculate the index of the chunk in REP that contains REP_OFFSET, + and find the relative CHUNK_OFFSET within the chunk. + Return -1 if offset is beyond the end of the represented data. + ### The basic assumption is that all delta windows are the same size + and aligned at the same offset, so this number is the same in all + dependent deltas. Oh, and the chunks in REP must be ordered. */ + +static int +get_chunk_offset(representation_t *rep, + svn_filesize_t rep_offset, + apr_size_t *chunk_offset) +{ + const apr_array_header_t *chunks = rep->contents.delta.chunks; + int cur_chunk; + assert(chunks->nelts); + + /* ### Yes, this is a linear search. I'll change this to bisection + the very second we notice it's slowing us down. */ + for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk) + { + const rep_delta_chunk_t *const this_chunk + = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*); + + if ((this_chunk->offset + this_chunk->size) > rep_offset) + { + assert(this_chunk->offset <= rep_offset); + assert(rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE); + *chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset); + return cur_chunk; + } + } + + return -1; +} + +/* Copy into BUF *LEN bytes starting at OFFSET from the string + represented via REP_KEY in FS, as part of TRAIL. + The number of bytes actually copied is stored in *LEN. */ +static svn_error_t * +rep_read_range(svn_fs_t *fs, + const char *rep_key, + svn_filesize_t offset, + char *buf, + apr_size_t *len, + trail_t *trail, + apr_pool_t *pool) +{ + representation_t *rep; + apr_size_t chunk_offset; + + /* Read in our REP. */ + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + if (rep->kind == rep_kind_fulltext) + { + SVN_ERR(svn_fs_bdb__string_read(fs, rep->contents.fulltext.string_key, + buf, offset, len, trail, pool)); + } + else if (rep->kind == rep_kind_delta) + { + const int cur_chunk = get_chunk_offset(rep, offset, &chunk_offset); + if (cur_chunk < 0) + *len = 0; + else + { + svn_error_t *err; + /* Preserve for potential use in error message. */ + const char *first_rep_key = rep_key; + /* Make a list of all the rep's we need to undeltify this range. + We'll have to read them within this trail anyway, so we might + as well do it once and up front. */ + apr_array_header_t *reps = apr_array_make(pool, 30, sizeof(rep)); + do + { + const rep_delta_chunk_t *const first_chunk + = APR_ARRAY_IDX(rep->contents.delta.chunks, + 0, rep_delta_chunk_t*); + const rep_delta_chunk_t *const chunk + = APR_ARRAY_IDX(rep->contents.delta.chunks, + cur_chunk, rep_delta_chunk_t*); + + /* Verify that this chunk is of the same version as the first. */ + if (first_chunk->version != chunk->version) + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + _("Diff version inconsistencies in representation '%s'"), + rep_key); + + rep_key = chunk->rep_key; + APR_ARRAY_PUSH(reps, representation_t *) = rep; + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, + trail, pool)); + } + while (rep->kind == rep_kind_delta + && rep->contents.delta.chunks->nelts > cur_chunk); + + /* Right. We've either just read the fulltext rep, or a rep that's + too short, in which case we'll undeltify without source data.*/ + if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext) + return UNKNOWN_NODE_KIND(rep_key); + + if (rep->kind == rep_kind_delta) + rep = NULL; /* Don't use source data */ + + err = rep_undeltify_range(fs, reps, rep, cur_chunk, buf, + chunk_offset, len, trail, pool); + if (err) + { + if (err->apr_err == SVN_ERR_FS_CORRUPT) + return svn_error_createf + (SVN_ERR_FS_CORRUPT, err, + _("Corruption detected whilst reading delta chain from " + "representation '%s' to '%s'"), first_rep_key, rep_key); + else + return svn_error_trace(err); + } + } + } + else /* unknown kind */ + return UNKNOWN_NODE_KIND(rep_key); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_base__get_mutable_rep(const char **new_rep_key, + const char *rep_key, + svn_fs_t *fs, + const char *txn_id, + trail_t *trail, + apr_pool_t *pool) +{ + representation_t *rep = NULL; + const char *new_str = NULL; + + /* We were passed an existing REP_KEY, so examine it. If it is + mutable already, then just return REP_KEY as the mutable result + key. */ + if (rep_key && (rep_key[0] != '\0')) + { + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + if (rep_is_mutable(rep, txn_id)) + { + *new_rep_key = rep_key; + return SVN_NO_ERROR; + } + } + + /* Either we weren't provided a base key to examine, or the base key + we were provided was not mutable. So, let's make a new + representation and return its key to the caller. */ + SVN_ERR(svn_fs_bdb__string_append(fs, &new_str, 0, NULL, trail, pool)); + rep = make_fulltext_rep(new_str, txn_id, + svn_checksum_empty_checksum(svn_checksum_md5, + pool), + svn_checksum_empty_checksum(svn_checksum_sha1, + pool), + pool); + return svn_fs_bdb__write_new_rep(new_rep_key, fs, rep, trail, pool); +} + + +svn_error_t * +svn_fs_base__delete_rep_if_mutable(svn_fs_t *fs, + const char *rep_key, + const char *txn_id, + trail_t *trail, + apr_pool_t *pool) +{ + representation_t *rep; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + if (! rep_is_mutable(rep, txn_id)) + return SVN_NO_ERROR; + + if (rep->kind == rep_kind_fulltext) + { + SVN_ERR(svn_fs_bdb__string_delete(fs, + rep->contents.fulltext.string_key, + trail, pool)); + } + else if (rep->kind == rep_kind_delta) + { + apr_array_header_t *keys; + SVN_ERR(delta_string_keys(&keys, rep, pool)); + SVN_ERR(delete_strings(keys, fs, trail, pool)); + } + else /* unknown kind */ + return UNKNOWN_NODE_KIND(rep_key); + + return svn_fs_bdb__delete_rep(fs, rep_key, trail, pool); +} + + + +/*** Reading and writing data via representations. ***/ + +/** Reading. **/ + +struct rep_read_baton +{ + /* The FS from which we're reading. */ + svn_fs_t *fs; + + /* The representation skel whose contents we want to read. If this + is NULL, the rep has never had any contents, so all reads fetch 0 + bytes. + + Formerly, we cached the entire rep skel here, not just the key. + That way we didn't have to fetch the rep from the db every time + we want to read a little bit more of the file. Unfortunately, + this has a problem: if, say, a file's representation changes + while we're reading (changes from fulltext to delta, for + example), we'll never know it. So for correctness, we now + refetch the representation skel every time we want to read + another chunk. */ + const char *rep_key; + + /* How many bytes have been read already. */ + svn_filesize_t offset; + + /* If present, the read will be done as part of this trail, and the + trail's pool will be used. Otherwise, see `pool' below. */ + trail_t *trail; + + /* MD5 checksum context. Initialized when the baton is created, updated as + we read data, and finalized when the stream is closed. */ + svn_checksum_ctx_t *md5_checksum_ctx; + + /* Final resting place of the checksum created by md5_checksum_cxt. */ + svn_checksum_t *md5_checksum; + + /* SHA1 checksum context. Initialized when the baton is created, updated as + we read data, and finalized when the stream is closed. */ + svn_checksum_ctx_t *sha1_checksum_ctx; + + /* Final resting place of the checksum created by sha1_checksum_cxt. */ + svn_checksum_t *sha1_checksum; + + /* The length of the rep's contents (as fulltext, that is, + independent of how the rep actually stores the data.) This is + retrieved when the baton is created, and used to determine when + we have read the last byte, at which point we compare checksums. + + Getting this at baton creation time makes interleaved reads and + writes on the same rep in the same trail impossible. But we're + not doing that, and probably no one ever should. And anyway if + they do, they should see problems immediately. */ + svn_filesize_t size; + + /* Set to FALSE when the baton is created, TRUE when the checksum_ctx + is digestified. */ + svn_boolean_t checksum_finalized; + + /* Used for temporary allocations. This pool is cleared at the + start of each invocation of the relevant stream read function -- + see rep_read_contents(). */ + apr_pool_t *scratch_pool; + +}; + + +static svn_error_t * +rep_read_get_baton(struct rep_read_baton **rb_p, + svn_fs_t *fs, + const char *rep_key, + svn_boolean_t use_trail_for_reads, + trail_t *trail, + apr_pool_t *pool) +{ + struct rep_read_baton *b; + + b = apr_pcalloc(pool, sizeof(*b)); + b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); + b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool); + + if (rep_key) + SVN_ERR(svn_fs_base__rep_contents_size(&(b->size), fs, rep_key, + trail, pool)); + else + b->size = 0; + + b->checksum_finalized = FALSE; + b->fs = fs; + b->trail = use_trail_for_reads ? trail : NULL; + b->scratch_pool = svn_pool_create(pool); + b->rep_key = rep_key; + b->offset = 0; + + *rb_p = b; + + return SVN_NO_ERROR; +} + + + +/*** Retrieving data. ***/ + +svn_error_t * +svn_fs_base__rep_contents_size(svn_filesize_t *size_p, + svn_fs_t *fs, + const char *rep_key, + trail_t *trail, + apr_pool_t *pool) +{ + representation_t *rep; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + + if (rep->kind == rep_kind_fulltext) + { + /* Get the size by asking Berkeley for the string's length. */ + SVN_ERR(svn_fs_bdb__string_size(size_p, fs, + rep->contents.fulltext.string_key, + trail, pool)); + } + else if (rep->kind == rep_kind_delta) + { + /* Get the size by finding the last window pkg in the delta and + adding its offset to its size. This way, we won't even be + messed up by overlapping windows, as long as the window pkgs + are still ordered. */ + apr_array_header_t *chunks = rep->contents.delta.chunks; + rep_delta_chunk_t *last_chunk; + + SVN_ERR_ASSERT(chunks->nelts); + + last_chunk = APR_ARRAY_IDX(chunks, chunks->nelts - 1, + rep_delta_chunk_t *); + *size_p = last_chunk->offset + last_chunk->size; + } + else /* unknown kind */ + return UNKNOWN_NODE_KIND(rep_key); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_base__rep_contents_checksums(svn_checksum_t **md5_checksum, + svn_checksum_t **sha1_checksum, + svn_fs_t *fs, + const char *rep_key, + trail_t *trail, + apr_pool_t *pool) +{ + representation_t *rep; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + if (md5_checksum) + *md5_checksum = svn_checksum_dup(rep->md5_checksum, pool); + if (sha1_checksum) + *sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_base__rep_contents(svn_string_t *str, + svn_fs_t *fs, + const char *rep_key, + trail_t *trail, + apr_pool_t *pool) +{ + svn_filesize_t contents_size; + apr_size_t len; + char *data; + + SVN_ERR(svn_fs_base__rep_contents_size(&contents_size, fs, rep_key, + trail, pool)); + + /* What if the contents are larger than we can handle? */ + if (contents_size > SVN_MAX_OBJECT_SIZE) + return svn_error_createf + (SVN_ERR_FS_GENERAL, NULL, + _("Rep contents are too large: " + "got %s, limit is %s"), + apr_psprintf(pool, "%" SVN_FILESIZE_T_FMT, contents_size), + apr_psprintf(pool, "%" APR_SIZE_T_FMT, SVN_MAX_OBJECT_SIZE)); + else + str->len = (apr_size_t) contents_size; + + data = apr_palloc(pool, str->len); + str->data = data; + len = str->len; + SVN_ERR(rep_read_range(fs, rep_key, 0, data, &len, trail, pool)); + + /* Paranoia. */ + if (len != str->len) + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + _("Failure reading representation '%s'"), rep_key); + + /* Just the standard paranoia. */ + { + representation_t *rep; + svn_checksum_t *checksum, *rep_checksum; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + rep_checksum = rep->sha1_checksum ? rep->sha1_checksum : rep->md5_checksum; + SVN_ERR(svn_checksum(&checksum, rep_checksum->kind, str->data, str->len, + pool)); + + if (! svn_checksum_match(checksum, rep_checksum)) + return svn_error_create(SVN_ERR_FS_CORRUPT, + svn_checksum_mismatch_err(rep_checksum, checksum, pool, + _("Checksum mismatch on representation '%s'"), + rep_key), + NULL); + } + + return SVN_NO_ERROR; +} + + +struct read_rep_args +{ + struct rep_read_baton *rb; /* The data source. */ + char *buf; /* Where to put what we read. */ + apr_size_t *len; /* How much to read / was read. */ +}; + + +/* BATON is of type `read_rep_args': + + Read into BATON->rb->buf the *(BATON->len) bytes starting at + BATON->rb->offset from the data represented at BATON->rb->rep_key + in BATON->rb->fs, as part of TRAIL. + + Afterwards, *(BATON->len) is the number of bytes actually read, and + BATON->rb->offset is incremented by that amount. + + If BATON->rb->rep_key is null, this is assumed to mean the file's + contents have no representation, i.e., the file has no contents. + In that case, if BATON->rb->offset > 0, return the error + SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to + zero and return. */ +static svn_error_t * +txn_body_read_rep(void *baton, trail_t *trail) +{ + struct read_rep_args *args = baton; + + if (args->rb->rep_key) + { + SVN_ERR(rep_read_range(args->rb->fs, + args->rb->rep_key, + args->rb->offset, + args->buf, + args->len, + trail, + args->rb->scratch_pool)); + + args->rb->offset += *(args->len); + + /* We calculate the checksum just once, the moment we see the + * last byte of data. But we can't assume there was a short + * read. The caller may have known the length of the data and + * requested exactly that amount, so there would never be a + * short read. (That's why the read baton has to know the + * length of the data in advance.) + * + * On the other hand, some callers invoke the stream reader in a + * loop whose termination condition is that the read returned + * zero bytes of data -- which usually results in the read + * function being called one more time *after* the call that got + * a short read (indicating end-of-stream). + * + * The conditions below ensure that we compare checksums even + * when there is no short read associated with the last byte of + * data, while also ensuring that it's harmless to repeatedly + * read 0 bytes from the stream. + */ + if (! args->rb->checksum_finalized) + { + SVN_ERR(svn_checksum_update(args->rb->md5_checksum_ctx, args->buf, + *(args->len))); + SVN_ERR(svn_checksum_update(args->rb->sha1_checksum_ctx, args->buf, + *(args->len))); + + if (args->rb->offset == args->rb->size) + { + representation_t *rep; + + SVN_ERR(svn_checksum_final(&args->rb->md5_checksum, + args->rb->md5_checksum_ctx, + trail->pool)); + SVN_ERR(svn_checksum_final(&args->rb->sha1_checksum, + args->rb->sha1_checksum_ctx, + trail->pool)); + args->rb->checksum_finalized = TRUE; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, args->rb->fs, + args->rb->rep_key, + trail, trail->pool)); + + if (rep->md5_checksum + && (! svn_checksum_match(rep->md5_checksum, + args->rb->md5_checksum))) + return svn_error_create(SVN_ERR_FS_CORRUPT, + svn_checksum_mismatch_err(rep->md5_checksum, + args->rb->sha1_checksum, trail->pool, + _("MD5 checksum mismatch on representation '%s'"), + args->rb->rep_key), + NULL); + + if (rep->sha1_checksum + && (! svn_checksum_match(rep->sha1_checksum, + args->rb->sha1_checksum))) + return svn_error_createf(SVN_ERR_FS_CORRUPT, + svn_checksum_mismatch_err(rep->sha1_checksum, + args->rb->sha1_checksum, trail->pool, + _("SHA1 checksum mismatch on representation '%s'"), + args->rb->rep_key), + NULL); + } + } + } + else if (args->rb->offset > 0) + { + return + svn_error_create + (SVN_ERR_FS_REP_CHANGED, NULL, + _("Null rep, but offset past zero already")); + } + else + *(args->len) = 0; + + return SVN_NO_ERROR; +} + + +static svn_error_t * +rep_read_contents(void *baton, char *buf, apr_size_t *len) +{ + struct rep_read_baton *rb = baton; + struct read_rep_args args; + + /* Clear the scratch pool of the results of previous invocations. */ + svn_pool_clear(rb->scratch_pool); + + args.rb = rb; + args.buf = buf; + args.len = len; + + /* If we got a trail, use it; else make one. */ + if (rb->trail) + SVN_ERR(txn_body_read_rep(&args, rb->trail)); + else + { + /* In the case of reading from the db, any returned data should + live in our pre-allocated buffer, so the whole operation can + happen within a single malloc/free cycle. This prevents us + from creating millions of unnecessary trail subpools when + reading a big file. */ + SVN_ERR(svn_fs_base__retry_txn(rb->fs, + txn_body_read_rep, + &args, + TRUE, + rb->scratch_pool)); + } + return SVN_NO_ERROR; +} + + +/** Writing. **/ + + +struct rep_write_baton +{ + /* The FS in which we're writing. */ + svn_fs_t *fs; + + /* The representation skel whose contents we want to write. */ + const char *rep_key; + + /* The transaction id under which this write action will take + place. */ + const char *txn_id; + + /* If present, do the write as part of this trail, and use trail's + pool. Otherwise, see `pool' below. */ + trail_t *trail; + + /* SHA1 and MD5 checksums. Initialized when the baton is created, + updated as we write data, and finalized and stored when the + stream is closed. */ + svn_checksum_ctx_t *md5_checksum_ctx; + svn_checksum_t *md5_checksum; + svn_checksum_ctx_t *sha1_checksum_ctx; + svn_checksum_t *sha1_checksum; + svn_boolean_t finalized; + + /* Used for temporary allocations, iff `trail' (above) is null. */ + apr_pool_t *pool; + +}; + + +static struct rep_write_baton * +rep_write_get_baton(svn_fs_t *fs, + const char *rep_key, + const char *txn_id, + trail_t *trail, + apr_pool_t *pool) +{ + struct rep_write_baton *b; + + b = apr_pcalloc(pool, sizeof(*b)); + b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); + b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool); + b->fs = fs; + b->trail = trail; + b->pool = pool; + b->rep_key = rep_key; + b->txn_id = txn_id; + return b; +} + + + +/* Write LEN bytes from BUF into the end of the string represented via + REP_KEY in FS, as part of TRAIL. If the representation is not + mutable, return the error SVN_FS_REP_NOT_MUTABLE. */ +static svn_error_t * +rep_write(svn_fs_t *fs, + const char *rep_key, + const char *buf, + apr_size_t len, + const char *txn_id, + trail_t *trail, + apr_pool_t *pool) +{ + representation_t *rep; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + + if (! rep_is_mutable(rep, txn_id)) + return svn_error_createf + (SVN_ERR_FS_REP_NOT_MUTABLE, NULL, + _("Rep '%s' is not mutable"), rep_key); + + if (rep->kind == rep_kind_fulltext) + { + SVN_ERR(svn_fs_bdb__string_append + (fs, &(rep->contents.fulltext.string_key), len, buf, + trail, pool)); + } + else if (rep->kind == rep_kind_delta) + { + /* There should never be a case when we have a mutable + non-fulltext rep. The only code that creates mutable reps is + in this file, and it creates them fulltext. */ + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + _("Rep '%s' both mutable and non-fulltext"), rep_key); + } + else /* unknown kind */ + return UNKNOWN_NODE_KIND(rep_key); + + return SVN_NO_ERROR; +} + + +struct write_rep_args +{ + struct rep_write_baton *wb; /* Destination. */ + const char *buf; /* Data. */ + apr_size_t len; /* How much to write. */ +}; + + +/* BATON is of type `write_rep_args': + Append onto BATON->wb->rep_key's contents BATON->len bytes of + data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL. + + If the representation is not mutable, return the error + SVN_FS_REP_NOT_MUTABLE. */ +static svn_error_t * +txn_body_write_rep(void *baton, trail_t *trail) +{ + struct write_rep_args *args = baton; + + SVN_ERR(rep_write(args->wb->fs, + args->wb->rep_key, + args->buf, + args->len, + args->wb->txn_id, + trail, + trail->pool)); + SVN_ERR(svn_checksum_update(args->wb->md5_checksum_ctx, + args->buf, args->len)); + SVN_ERR(svn_checksum_update(args->wb->sha1_checksum_ctx, + args->buf, args->len)); + return SVN_NO_ERROR; +} + + +static svn_error_t * +rep_write_contents(void *baton, + const char *buf, + apr_size_t *len) +{ + struct rep_write_baton *wb = baton; + struct write_rep_args args; + + /* We toss LEN's indirectness because if not all the bytes are + written, it's an error, so we wouldn't be reporting anything back + through *LEN anyway. */ + args.wb = wb; + args.buf = buf; + args.len = *len; + + /* If we got a trail, use it; else make one. */ + if (wb->trail) + SVN_ERR(txn_body_write_rep(&args, wb->trail)); + else + { + /* In the case of simply writing the rep to the db, we're + *certain* that there's no data coming back to us that needs + to be preserved... so the whole operation can happen within a + single malloc/free cycle. This prevents us from creating + millions of unnecessary trail subpools when writing a big + file. */ + SVN_ERR(svn_fs_base__retry_txn(wb->fs, + txn_body_write_rep, + &args, + TRUE, + wb->pool)); + } + + return SVN_NO_ERROR; +} + + +/* Helper for rep_write_close_contents(); see that doc string for + more. BATON is of type `struct rep_write_baton'. */ +static svn_error_t * +txn_body_write_close_rep(void *baton, trail_t *trail) +{ + struct rep_write_baton *wb = baton; + representation_t *rep; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, wb->fs, wb->rep_key, + trail, trail->pool)); + rep->md5_checksum = svn_checksum_dup(wb->md5_checksum, trail->pool); + rep->sha1_checksum = svn_checksum_dup(wb->sha1_checksum, trail->pool); + return svn_fs_bdb__write_rep(wb->fs, wb->rep_key, rep, + trail, trail->pool); +} + + +/* BATON is of type `struct rep_write_baton'. + * + * Finalize BATON->md5_context and store the resulting digest under + * BATON->rep_key. + */ +static svn_error_t * +rep_write_close_contents(void *baton) +{ + struct rep_write_baton *wb = baton; + + /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated + digestification, then we wouldn't need a stream close function at + all -- instead, we could update the stored checksum each time a + write occurred, which would have the added advantage of making + interleaving reads and writes work. Currently, they'd fail with + a checksum mismatch, it just happens that our code never tries to + do that anyway. */ + + if (! wb->finalized) + { + SVN_ERR(svn_checksum_final(&wb->md5_checksum, wb->md5_checksum_ctx, + wb->pool)); + SVN_ERR(svn_checksum_final(&wb->sha1_checksum, wb->sha1_checksum_ctx, + wb->pool)); + wb->finalized = TRUE; + } + + /* If we got a trail, use it; else make one. */ + if (wb->trail) + return txn_body_write_close_rep(wb, wb->trail); + else + /* We need to keep our trail pool around this time so the + checksums we've calculated survive. */ + return svn_fs_base__retry_txn(wb->fs, txn_body_write_close_rep, + wb, FALSE, wb->pool); +} + + +/** Public read and write stream constructors. **/ + +svn_error_t * +svn_fs_base__rep_contents_read_stream(svn_stream_t **rs_p, + svn_fs_t *fs, + const char *rep_key, + svn_boolean_t use_trail_for_reads, + trail_t *trail, + apr_pool_t *pool) +{ + struct rep_read_baton *rb; + + SVN_ERR(rep_read_get_baton(&rb, fs, rep_key, use_trail_for_reads, + trail, pool)); + *rs_p = svn_stream_create(rb, pool); + svn_stream_set_read(*rs_p, rep_read_contents); + + return SVN_NO_ERROR; +} + + +/* Clear the contents of REP_KEY, so that it represents the empty + string, as part of TRAIL. TXN_ID is the id of the Subversion + transaction under which this occurs. If REP_KEY is not mutable, + return the error SVN_ERR_FS_REP_NOT_MUTABLE. */ +static svn_error_t * +rep_contents_clear(svn_fs_t *fs, + const char *rep_key, + const char *txn_id, + trail_t *trail, + apr_pool_t *pool) +{ + representation_t *rep; + const char *str_key; + + SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); + + /* Make sure it's mutable. */ + if (! rep_is_mutable(rep, txn_id)) + return svn_error_createf + (SVN_ERR_FS_REP_NOT_MUTABLE, NULL, + _("Rep '%s' is not mutable"), rep_key); + + SVN_ERR_ASSERT(rep->kind == rep_kind_fulltext); + + /* If rep has no string, just return success. Else, clear the + underlying string. */ + str_key = rep->contents.fulltext.string_key; + if (str_key && *str_key) + { + SVN_ERR(svn_fs_bdb__string_clear(fs, str_key, trail, pool)); + rep->md5_checksum = NULL; + rep->sha1_checksum = NULL; + SVN_ERR(svn_fs_bdb__write_rep(fs, rep_key, rep, trail, pool)); + } + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_base__rep_contents_write_stream(svn_stream_t **ws_p, + svn_fs_t *fs, + const char *rep_key, + const char *txn_id, + svn_boolean_t use_trail_for_writes, + trail_t *trail, + apr_pool_t *pool) +{ + struct rep_write_baton *wb; + + /* Clear the current rep contents (free mutability check!). */ + SVN_ERR(rep_contents_clear(fs, rep_key, txn_id, trail, pool)); + + /* Now, generate the write baton and stream. */ + wb = rep_write_get_baton(fs, rep_key, txn_id, + use_trail_for_writes ? trail : NULL, pool); + *ws_p = svn_stream_create(wb, pool); + svn_stream_set_write(*ws_p, rep_write_contents); + svn_stream_set_close(*ws_p, rep_write_close_contents); + + return SVN_NO_ERROR; +} + + + +/*** Deltified storage. ***/ + +/* Baton for svn_write_fn_t write_string_set(). */ +struct write_svndiff_strings_baton +{ + /* The fs where lives the string we're writing. */ + svn_fs_t *fs; + + /* The key of the string we're writing to. Typically this is + initialized to NULL, so svn_fs_base__string_append() can fill in a + value. */ + const char *key; + + /* The amount of txdelta data written to the current + string-in-progress. */ + apr_size_t size; + + /* The amount of svndiff header information we've written thus far + to the strings table. */ + apr_size_t header_read; + + /* The version number of the svndiff data written. ### You'd better + not count on this being populated after the first chunk is sent + through the interface, since it lives at the 4th byte of the + stream. */ + apr_byte_t version; + + /* The trail we're writing in. */ + trail_t *trail; + +}; + + +/* Function of type `svn_write_fn_t', for writing to a collection of + strings; BATON is `struct write_svndiff_strings_baton *'. + + On the first call, BATON->key is null. A new string key in + BATON->fs is chosen and stored in BATON->key; each call appends + *LEN bytes from DATA onto the string. *LEN is never changed; if + the write fails to write all *LEN bytes, an error is returned. + BATON->size is used to track the total amount of data written via + this handler, and must be reset by the caller to 0 when appropriate. */ +static svn_error_t * +write_svndiff_strings(void *baton, const char *data, apr_size_t *len) +{ + struct write_svndiff_strings_baton *wb = baton; + const char *buf = data; + apr_size_t nheader = 0; + + /* If we haven't stripped all the header information from this + stream yet, keep stripping. If someone sends a first window + through here that's shorter than 4 bytes long, this will probably + cause a nuclear reactor meltdown somewhere in the American + midwest. */ + if (wb->header_read < 4) + { + nheader = 4 - wb->header_read; + *len -= nheader; + buf += nheader; + wb->header_read += nheader; + + /* If we have *now* read the full 4-byte header, check that + least byte for the version number of the svndiff format. */ + if (wb->header_read == 4) + wb->version = *(buf - 1); + } + + /* Append to the current string we're writing (or create a new one + if WB->key is NULL). */ + SVN_ERR(svn_fs_bdb__string_append(wb->fs, &(wb->key), *len, + buf, wb->trail, wb->trail->pool)); + + /* Make sure we (still) have a key. */ + if (wb->key == NULL) + return svn_error_create(SVN_ERR_FS_GENERAL, NULL, + _("Failed to get new string key")); + + /* Restore *LEN to the value it *would* have been were it not for + header stripping. */ + *len += nheader; + + /* Increment our running total of bytes written to this string. */ + wb->size += *len; + + return SVN_NO_ERROR; +} + + +typedef struct window_write_t +{ + const char *key; /* string key for this window */ + apr_size_t svndiff_len; /* amount of svndiff data written to the string */ + svn_filesize_t text_off; /* offset of fulltext represented by this window */ + apr_size_t text_len; /* amount of fulltext data represented by this window */ + +} window_write_t; + + +svn_error_t * +svn_fs_base__rep_deltify(svn_fs_t *fs, + const char *target, + const char *source, + trail_t *trail, + apr_pool_t *pool) +{ + base_fs_data_t *bfd = fs->fsap_data; + svn_stream_t *source_stream; /* stream to read the source */ + svn_stream_t *target_stream; /* stream to read the target */ + svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows */ + + /* window-y things, and an array to track them */ + window_write_t *ww; + apr_array_header_t *windows; + + /* stream to write new (deltified) target data and its baton */ + svn_stream_t *new_target_stream; + struct write_svndiff_strings_baton new_target_baton; + + /* window handler/baton for writing to above stream */ + svn_txdelta_window_handler_t new_target_handler; + void *new_target_handler_baton; + + /* yes, we do windows */ + svn_txdelta_window_t *window; + + /* The current offset into the fulltext that our window is about to + write. This doubles, after all windows are written, as the + total size of the svndiff data for the deltification process. */ + svn_filesize_t tview_off = 0; + + /* The total amount of diff data written while deltifying. */ + svn_filesize_t diffsize = 0; + + /* TARGET's original string keys */ + apr_array_header_t *orig_str_keys; + + /* The checksums for the representation's fulltext contents. */ + svn_checksum_t *rep_md5_checksum; + svn_checksum_t *rep_sha1_checksum; + + /* MD5 digest */ + const unsigned char *digest; + + /* pool for holding the windows */ + apr_pool_t *wpool; + + /* Paranoia: never allow a rep to be deltified against itself, + because then there would be no fulltext reachable in the delta + chain, and badness would ensue. */ + if (strcmp(target, source) == 0) + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + _("Attempt to deltify '%s' against itself"), + target); + + /* Set up a handler for the svndiff data, which will write each + window to its own string in the `strings' table. */ + new_target_baton.fs = fs; + new_target_baton.trail = trail; + new_target_baton.header_read = FALSE; + new_target_stream = svn_stream_create(&new_target_baton, pool); + svn_stream_set_write(new_target_stream, write_svndiff_strings); + + /* Get streams to our source and target text data. */ + SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream, fs, source, + TRUE, trail, pool)); + SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream, fs, target, + TRUE, trail, pool)); + + /* Setup a stream to convert the textdelta data into svndiff windows. */ + svn_txdelta2(&txdelta_stream, source_stream, target_stream, TRUE, pool); + + if (bfd->format >= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT) + svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton, + new_target_stream, 1, + SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool); + else + svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton, + new_target_stream, 0, + SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool); + + /* subpool for the windows */ + wpool = svn_pool_create(pool); + + /* Now, loop, manufacturing and dispatching windows of svndiff data. */ + windows = apr_array_make(pool, 1, sizeof(ww)); + do + { + /* Reset some baton variables. */ + new_target_baton.size = 0; + new_target_baton.key = NULL; + + /* Free the window. */ + svn_pool_clear(wpool); + + /* Fetch the next window of txdelta data. */ + SVN_ERR(svn_txdelta_next_window(&window, txdelta_stream, wpool)); + + /* Send off this package to be written as svndiff data. */ + SVN_ERR(new_target_handler(window, new_target_handler_baton)); + if (window) + { + /* Add a new window description to our array. */ + ww = apr_pcalloc(pool, sizeof(*ww)); + ww->key = new_target_baton.key; + ww->svndiff_len = new_target_baton.size; + ww->text_off = tview_off; + ww->text_len = window->tview_len; + APR_ARRAY_PUSH(windows, window_write_t *) = ww; + + /* Update our recordkeeping variables. */ + tview_off += window->tview_len; + diffsize += ww->svndiff_len; + } + + } while (window); + + svn_pool_destroy(wpool); + + /* Having processed all the windows, we can query the MD5 digest + from the stream. */ + digest = svn_txdelta_md5_digest(txdelta_stream); + if (! digest) + return svn_error_createf + (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, NULL, + _("Failed to calculate MD5 digest for '%s'"), + source); + + /* Construct a list of the strings used by the old representation so + that we can delete them later. While we are here, if the old + representation was a fulltext, check to make sure the delta we're + replacing it with is actually smaller. (Don't perform this check + if we're replacing a delta; in that case, we're going for a time + optimization, not a space optimization.) */ + { + representation_t *old_rep; + const char *str_key; + + SVN_ERR(svn_fs_bdb__read_rep(&old_rep, fs, target, trail, pool)); + if (old_rep->kind == rep_kind_fulltext) + { + svn_filesize_t old_size = 0; + + str_key = old_rep->contents.fulltext.string_key; + SVN_ERR(svn_fs_bdb__string_size(&old_size, fs, str_key, + trail, pool)); + orig_str_keys = apr_array_make(pool, 1, sizeof(str_key)); + APR_ARRAY_PUSH(orig_str_keys, const char *) = str_key; + + /* If the new data is NOT an space optimization, destroy the + string(s) we created, and get outta here. */ + if (diffsize >= old_size) + { + int i; + for (i = 0; i < windows->nelts; i++) + { + ww = APR_ARRAY_IDX(windows, i, window_write_t *); + SVN_ERR(svn_fs_bdb__string_delete(fs, ww->key, trail, pool)); + } + return SVN_NO_ERROR; + } + } + else if (old_rep->kind == rep_kind_delta) + SVN_ERR(delta_string_keys(&orig_str_keys, old_rep, pool)); + else /* unknown kind */ + return UNKNOWN_NODE_KIND(target); + + /* Save the checksums, since the new rep needs them. */ + rep_md5_checksum = svn_checksum_dup(old_rep->md5_checksum, pool); + rep_sha1_checksum = svn_checksum_dup(old_rep->sha1_checksum, pool); + } + + /* Hook the new strings we wrote into the rest of the filesystem by + building a new representation to replace our old one. */ + { + representation_t new_rep; + rep_delta_chunk_t *chunk; + apr_array_header_t *chunks; + int i; + + new_rep.kind = rep_kind_delta; + new_rep.txn_id = NULL; + + /* Migrate the old rep's checksums to the new rep. */ + new_rep.md5_checksum = svn_checksum_dup(rep_md5_checksum, pool); + new_rep.sha1_checksum = svn_checksum_dup(rep_sha1_checksum, pool); + + chunks = apr_array_make(pool, windows->nelts, sizeof(chunk)); + + /* Loop through the windows we wrote, creating and adding new + chunks to the representation. */ + for (i = 0; i < windows->nelts; i++) + { + ww = APR_ARRAY_IDX(windows, i, window_write_t *); + + /* Allocate a chunk and its window */ + chunk = apr_palloc(pool, sizeof(*chunk)); + chunk->offset = ww->text_off; + + /* Populate the window */ + chunk->version = new_target_baton.version; + chunk->string_key = ww->key; + chunk->size = ww->text_len; + chunk->rep_key = source; + + /* Add this chunk to the array. */ + APR_ARRAY_PUSH(chunks, rep_delta_chunk_t *) = chunk; + } + + /* Put the chunks array into the representation. */ + new_rep.contents.delta.chunks = chunks; + + /* Write out the new representation. */ + SVN_ERR(svn_fs_bdb__write_rep(fs, target, &new_rep, trail, pool)); + + /* Delete the original pre-deltified strings. */ + SVN_ERR(delete_strings(orig_str_keys, fs, trail, pool)); + } + + return SVN_NO_ERROR; +} |