diff options
Diffstat (limited to 'subversion/libsvn_fs_fs')
22 files changed, 24963 insertions, 0 deletions
diff --git a/subversion/libsvn_fs_fs/caching.c b/subversion/libsvn_fs_fs/caching.c new file mode 100644 index 0000000..4af48b8 --- /dev/null +++ b/subversion/libsvn_fs_fs/caching.c @@ -0,0 +1,692 @@ +/* caching.c : in-memory caching + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "fs.h" +#include "fs_fs.h" +#include "id.h" +#include "dag.h" +#include "tree.h" +#include "temp_serializer.h" +#include "../libsvn_fs/fs-loader.h" + +#include "svn_config.h" +#include "svn_cache_config.h" + +#include "svn_private_config.h" +#include "svn_hash.h" +#include "svn_pools.h" + +#include "private/svn_debug.h" +#include "private/svn_subr_private.h" + +/* Take the ORIGINAL string and replace all occurrences of ":" without + * limiting the key space. Allocate the result in POOL. + */ +static const char * +normalize_key_part(const char *original, + apr_pool_t *pool) +{ + apr_size_t i; + apr_size_t len = strlen(original); + svn_stringbuf_t *normalized = svn_stringbuf_create_ensure(len, pool); + + for (i = 0; i < len; ++i) + { + char c = original[i]; + switch (c) + { + case ':': svn_stringbuf_appendbytes(normalized, "%_", 2); + break; + case '%': svn_stringbuf_appendbytes(normalized, "%%", 2); + break; + default : svn_stringbuf_appendbyte(normalized, c); + } + } + + return normalized->data; +} + +/* Return a memcache in *MEMCACHE_P for FS if it's configured to use + memcached, or NULL otherwise. Also, sets *FAIL_STOP to a boolean + indicating whether cache errors should be returned to the caller or + just passed to the FS warning handler. + + *CACHE_TXDELTAS, *CACHE_FULLTEXTS and *CACHE_REVPROPS flags will be set + according to FS->CONFIG. *CACHE_NAMESPACE receives the cache prefix + to use. + + Use FS->pool for allocating the memcache and CACHE_NAMESPACE, and POOL + for temporary allocations. */ +static svn_error_t * +read_config(svn_memcache_t **memcache_p, + svn_boolean_t *fail_stop, + const char **cache_namespace, + svn_boolean_t *cache_txdeltas, + svn_boolean_t *cache_fulltexts, + svn_boolean_t *cache_revprops, + svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + SVN_ERR(svn_cache__make_memcache_from_config(memcache_p, ffd->config, + fs->pool)); + + /* No cache namespace by default. I.e. all FS instances share the + * cached data. If you specify different namespaces, the data will + * share / compete for the same cache memory but keys will not match + * across namespaces and, thus, cached data will not be shared between + * namespaces. + * + * Since the namespace will be concatenated with other elements to form + * the complete key prefix, we must make sure that the resulting string + * is unique and cannot be created by any other combination of elements. + */ + *cache_namespace + = normalize_key_part(svn_hash__get_cstring(fs->config, + SVN_FS_CONFIG_FSFS_CACHE_NS, + ""), + pool); + + /* don't cache text deltas by default. + * Once we reconstructed the fulltexts from the deltas, + * these deltas are rarely re-used. Therefore, only tools + * like svnadmin will activate this to speed up operations + * dump and verify. + */ + *cache_txdeltas + = svn_hash__get_bool(fs->config, + SVN_FS_CONFIG_FSFS_CACHE_DELTAS, + FALSE); + /* by default, cache fulltexts. + * Most SVN tools care about reconstructed file content. + * Thus, this is a reasonable default. + * SVN admin tools may set that to FALSE because fulltexts + * won't be re-used rendering the cache less effective + * by squeezing wanted data out. + */ + *cache_fulltexts + = svn_hash__get_bool(fs->config, + SVN_FS_CONFIG_FSFS_CACHE_FULLTEXTS, + TRUE); + + /* don't cache revprops by default. + * Revprop caching significantly speeds up operations like + * svn ls -v. However, it requires synchronization that may + * not be available or efficient in the current server setup. + * + * If the caller chose option "2", enable revprop caching if + * the required API support is there to make it efficient. + */ + if (strcmp(svn_hash__get_cstring(fs->config, + SVN_FS_CONFIG_FSFS_CACHE_REVPROPS, + ""), "2")) + *cache_revprops + = svn_hash__get_bool(fs->config, + SVN_FS_CONFIG_FSFS_CACHE_REVPROPS, + FALSE); + else + *cache_revprops = svn_named_atomic__is_efficient(); + + return svn_config_get_bool(ffd->config, fail_stop, + CONFIG_SECTION_CACHES, CONFIG_OPTION_FAIL_STOP, + FALSE); +} + + +/* Implements svn_cache__error_handler_t + * This variant clears the error after logging it. + */ +static svn_error_t * +warn_and_continue_on_cache_errors(svn_error_t *err, + void *baton, + apr_pool_t *pool) +{ + svn_fs_t *fs = baton; + (fs->warning)(fs->warning_baton, err); + svn_error_clear(err); + + return SVN_NO_ERROR; +} + +/* Implements svn_cache__error_handler_t + * This variant logs the error and passes it on to the callers. + */ +static svn_error_t * +warn_and_fail_on_cache_errors(svn_error_t *err, + void *baton, + apr_pool_t *pool) +{ + svn_fs_t *fs = baton; + (fs->warning)(fs->warning_baton, err); + return err; +} + +#ifdef SVN_DEBUG_CACHE_DUMP_STATS +/* Baton to be used for the dump_cache_statistics() pool cleanup function, */ +struct dump_cache_baton_t +{ + /* the pool about to be cleaned up. Will be used for temp. allocations. */ + apr_pool_t *pool; + + /* the cache to dump the statistics for */ + svn_cache__t *cache; +}; + +/* APR pool cleanup handler that will printf the statistics of the + cache referenced by the baton in BATON_VOID. */ +static apr_status_t +dump_cache_statistics(void *baton_void) +{ + struct dump_cache_baton_t *baton = baton_void; + + apr_status_t result = APR_SUCCESS; + svn_cache__info_t info; + svn_string_t *text_stats; + apr_array_header_t *lines; + int i; + + svn_error_t *err = svn_cache__get_info(baton->cache, + &info, + TRUE, + baton->pool); + + if (! err) + { + text_stats = svn_cache__format_info(&info, baton->pool); + lines = svn_cstring_split(text_stats->data, "\n", FALSE, baton->pool); + + for (i = 0; i < lines->nelts; ++i) + { + const char *line = APR_ARRAY_IDX(lines, i, const char *); +#ifdef SVN_DEBUG + SVN_DBG(("%s\n", line)); +#endif + } + } + + /* process error returns */ + if (err) + { + result = err->apr_err; + svn_error_clear(err); + } + + return result; +} +#endif /* SVN_DEBUG_CACHE_DUMP_STATS */ + +/* This function sets / registers the required callbacks for a given + * not transaction-specific CACHE object in FS, if CACHE is not NULL. + * + * All these svn_cache__t instances shall be handled uniformly. Unless + * ERROR_HANDLER is NULL, register it for the given CACHE in FS. + */ +static svn_error_t * +init_callbacks(svn_cache__t *cache, + svn_fs_t *fs, + svn_cache__error_handler_t error_handler, + apr_pool_t *pool) +{ + if (cache != NULL) + { +#ifdef SVN_DEBUG_CACHE_DUMP_STATS + + /* schedule printing the access statistics upon pool cleanup, + * i.e. end of FSFS session. + */ + struct dump_cache_baton_t *baton; + + baton = apr_palloc(pool, sizeof(*baton)); + baton->pool = pool; + baton->cache = cache; + + apr_pool_cleanup_register(pool, + baton, + dump_cache_statistics, + apr_pool_cleanup_null); +#endif + + if (error_handler) + SVN_ERR(svn_cache__set_error_handler(cache, + error_handler, + fs, + pool)); + + } + + return SVN_NO_ERROR; +} + +/* Sets *CACHE_P to cache instance based on provided options. + * Creates memcache if MEMCACHE is not NULL. Creates membuffer cache if + * MEMBUFFER is not NULL. Fallbacks to inprocess cache if MEMCACHE and + * MEMBUFFER are NULL and pages is non-zero. Sets *CACHE_P to NULL + * otherwise. + * + * Unless NO_HANDLER is true, register an error handler that reports errors + * as warnings to the FS warning callback. + * + * Cache is allocated in POOL. + * */ +static svn_error_t * +create_cache(svn_cache__t **cache_p, + svn_memcache_t *memcache, + svn_membuffer_t *membuffer, + apr_int64_t pages, + apr_int64_t items_per_page, + svn_cache__serialize_func_t serializer, + svn_cache__deserialize_func_t deserializer, + apr_ssize_t klen, + const char *prefix, + svn_fs_t *fs, + svn_boolean_t no_handler, + apr_pool_t *pool) +{ + svn_cache__error_handler_t error_handler = no_handler + ? NULL + : warn_and_fail_on_cache_errors; + + if (memcache) + { + SVN_ERR(svn_cache__create_memcache(cache_p, memcache, + serializer, deserializer, klen, + prefix, pool)); + error_handler = no_handler + ? NULL + : warn_and_continue_on_cache_errors; + } + else if (membuffer) + { + SVN_ERR(svn_cache__create_membuffer_cache( + cache_p, membuffer, serializer, deserializer, + klen, prefix, FALSE, pool)); + } + else if (pages) + { + SVN_ERR(svn_cache__create_inprocess( + cache_p, serializer, deserializer, klen, pages, + items_per_page, FALSE, prefix, pool)); + } + else + { + *cache_p = NULL; + } + + SVN_ERR(init_callbacks(*cache_p, fs, error_handler, pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__initialize_caches(svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + const char *prefix = apr_pstrcat(pool, + "fsfs:", fs->uuid, + "/", normalize_key_part(fs->path, pool), + ":", + (char *)NULL); + svn_memcache_t *memcache; + svn_membuffer_t *membuffer; + svn_boolean_t no_handler; + svn_boolean_t cache_txdeltas; + svn_boolean_t cache_fulltexts; + svn_boolean_t cache_revprops; + const char *cache_namespace; + + /* Evaluating the cache configuration. */ + SVN_ERR(read_config(&memcache, + &no_handler, + &cache_namespace, + &cache_txdeltas, + &cache_fulltexts, + &cache_revprops, + fs, + pool)); + + prefix = apr_pstrcat(pool, "ns:", cache_namespace, ":", prefix, NULL); + + membuffer = svn_cache__get_global_membuffer_cache(); + + /* Make the cache for revision roots. For the vast majority of + * commands, this is only going to contain a few entries (svnadmin + * dump/verify is an exception here), so to reduce overhead let's + * try to keep it to just one page. I estimate each entry has about + * 72 bytes of overhead (svn_revnum_t key, svn_fs_id_t + + * id_private_t + 3 strings for value, and the cache_entry); the + * default pool size is 8192, so about a hundred should fit + * comfortably. */ + SVN_ERR(create_cache(&(ffd->rev_root_id_cache), + NULL, + membuffer, + 1, 100, + svn_fs_fs__serialize_id, + svn_fs_fs__deserialize_id, + sizeof(svn_revnum_t), + apr_pstrcat(pool, prefix, "RRI", (char *)NULL), + fs, + no_handler, + fs->pool)); + + /* Rough estimate: revision DAG nodes have size around 320 bytes, so + * let's put 16 on a page. */ + SVN_ERR(create_cache(&(ffd->rev_node_cache), + NULL, + membuffer, + 1024, 16, + svn_fs_fs__dag_serialize, + svn_fs_fs__dag_deserialize, + APR_HASH_KEY_STRING, + apr_pstrcat(pool, prefix, "DAG", (char *)NULL), + fs, + no_handler, + fs->pool)); + + /* 1st level DAG node cache */ + ffd->dag_node_cache = svn_fs_fs__create_dag_cache(pool); + + /* Very rough estimate: 1K per directory. */ + SVN_ERR(create_cache(&(ffd->dir_cache), + NULL, + membuffer, + 1024, 8, + svn_fs_fs__serialize_dir_entries, + svn_fs_fs__deserialize_dir_entries, + APR_HASH_KEY_STRING, + apr_pstrcat(pool, prefix, "DIR", (char *)NULL), + fs, + no_handler, + fs->pool)); + + /* Only 16 bytes per entry (a revision number + the corresponding offset). + Since we want ~8k pages, that means 512 entries per page. */ + SVN_ERR(create_cache(&(ffd->packed_offset_cache), + NULL, + membuffer, + 32, 1, + svn_fs_fs__serialize_manifest, + svn_fs_fs__deserialize_manifest, + sizeof(svn_revnum_t), + apr_pstrcat(pool, prefix, "PACK-MANIFEST", + (char *)NULL), + fs, + no_handler, + fs->pool)); + + /* initialize node revision cache, if caching has been enabled */ + SVN_ERR(create_cache(&(ffd->node_revision_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + svn_fs_fs__serialize_node_revision, + svn_fs_fs__deserialize_node_revision, + sizeof(pair_cache_key_t), + apr_pstrcat(pool, prefix, "NODEREVS", (char *)NULL), + fs, + no_handler, + fs->pool)); + + /* initialize node change list cache, if caching has been enabled */ + SVN_ERR(create_cache(&(ffd->changes_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + svn_fs_fs__serialize_changes, + svn_fs_fs__deserialize_changes, + sizeof(svn_revnum_t), + apr_pstrcat(pool, prefix, "CHANGES", (char *)NULL), + fs, + no_handler, + fs->pool)); + + /* if enabled, cache fulltext and other derived information */ + if (cache_fulltexts) + { + SVN_ERR(create_cache(&(ffd->fulltext_cache), + memcache, + membuffer, + 0, 0, /* Do not use inprocess cache */ + /* Values are svn_stringbuf_t */ + NULL, NULL, + sizeof(pair_cache_key_t), + apr_pstrcat(pool, prefix, "TEXT", (char *)NULL), + fs, + no_handler, + fs->pool)); + + SVN_ERR(create_cache(&(ffd->properties_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + svn_fs_fs__serialize_properties, + svn_fs_fs__deserialize_properties, + sizeof(pair_cache_key_t), + apr_pstrcat(pool, prefix, "PROP", + (char *)NULL), + fs, + no_handler, + fs->pool)); + + SVN_ERR(create_cache(&(ffd->mergeinfo_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + svn_fs_fs__serialize_mergeinfo, + svn_fs_fs__deserialize_mergeinfo, + APR_HASH_KEY_STRING, + apr_pstrcat(pool, prefix, "MERGEINFO", + (char *)NULL), + fs, + no_handler, + fs->pool)); + + SVN_ERR(create_cache(&(ffd->mergeinfo_existence_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + /* Values are svn_stringbuf_t */ + NULL, NULL, + APR_HASH_KEY_STRING, + apr_pstrcat(pool, prefix, "HAS_MERGEINFO", + (char *)NULL), + fs, + no_handler, + fs->pool)); + } + else + { + ffd->fulltext_cache = NULL; + ffd->properties_cache = NULL; + ffd->mergeinfo_cache = NULL; + ffd->mergeinfo_existence_cache = NULL; + } + + /* initialize revprop cache, if full-text caching has been enabled */ + if (cache_revprops) + { + SVN_ERR(create_cache(&(ffd->revprop_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + svn_fs_fs__serialize_properties, + svn_fs_fs__deserialize_properties, + sizeof(pair_cache_key_t), + apr_pstrcat(pool, prefix, "REVPROP", + (char *)NULL), + fs, + no_handler, + fs->pool)); + } + else + { + ffd->revprop_cache = NULL; + } + + /* if enabled, cache text deltas and their combinations */ + if (cache_txdeltas) + { + SVN_ERR(create_cache(&(ffd->txdelta_window_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + svn_fs_fs__serialize_txdelta_window, + svn_fs_fs__deserialize_txdelta_window, + APR_HASH_KEY_STRING, + apr_pstrcat(pool, prefix, "TXDELTA_WINDOW", + (char *)NULL), + fs, + no_handler, + fs->pool)); + + SVN_ERR(create_cache(&(ffd->combined_window_cache), + NULL, + membuffer, + 0, 0, /* Do not use inprocess cache */ + /* Values are svn_stringbuf_t */ + NULL, NULL, + APR_HASH_KEY_STRING, + apr_pstrcat(pool, prefix, "COMBINED_WINDOW", + (char *)NULL), + fs, + no_handler, + fs->pool)); + } + else + { + ffd->txdelta_window_cache = NULL; + ffd->combined_window_cache = NULL; + } + + return SVN_NO_ERROR; +} + +/* Baton to be used for the remove_txn_cache() pool cleanup function, */ +struct txn_cleanup_baton_t +{ + /* the cache to reset */ + svn_cache__t *txn_cache; + + /* the position where to reset it */ + svn_cache__t **to_reset; +}; + +/* APR pool cleanup handler that will reset the cache pointer given in + BATON_VOID. */ +static apr_status_t +remove_txn_cache(void *baton_void) +{ + struct txn_cleanup_baton_t *baton = baton_void; + + /* be careful not to hurt performance by resetting newer txn's caches. */ + if (*baton->to_reset == baton->txn_cache) + { + /* This is equivalent to calling svn_fs_fs__reset_txn_caches(). */ + *baton->to_reset = NULL; + } + + return APR_SUCCESS; +} + +/* This function sets / registers the required callbacks for a given + * transaction-specific *CACHE object, if CACHE is not NULL and a no-op + * otherwise. In particular, it will ensure that *CACHE gets reset to NULL + * upon POOL destruction latest. + */ +static void +init_txn_callbacks(svn_cache__t **cache, + apr_pool_t *pool) +{ + if (*cache != NULL) + { + struct txn_cleanup_baton_t *baton; + + baton = apr_palloc(pool, sizeof(*baton)); + baton->txn_cache = *cache; + baton->to_reset = cache; + + apr_pool_cleanup_register(pool, + baton, + remove_txn_cache, + apr_pool_cleanup_null); + } +} + +svn_error_t * +svn_fs_fs__initialize_txn_caches(svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + /* Transaction content needs to be carefully prefixed to virtually + eliminate any chance for conflicts. The (repo, txn_id) pair + should be unique but if a transaction fails, it might be possible + to start a new transaction later that receives the same id. + Therefore, throw in a uuid as well - just to be sure. */ + const char *prefix = apr_pstrcat(pool, + "fsfs:", fs->uuid, + "/", fs->path, + ":", txn_id, + ":", svn_uuid_generate(pool), ":", + (char *)NULL); + + /* We don't support caching for concurrent transactions in the SAME + * FSFS session. Maybe, you forgot to clean POOL. */ + if (ffd->txn_dir_cache != NULL || ffd->concurrent_transactions) + { + ffd->txn_dir_cache = NULL; + ffd->concurrent_transactions = TRUE; + + return SVN_NO_ERROR; + } + + /* create a txn-local directory cache */ + SVN_ERR(create_cache(&ffd->txn_dir_cache, + NULL, + svn_cache__get_global_membuffer_cache(), + 1024, 8, + svn_fs_fs__serialize_dir_entries, + svn_fs_fs__deserialize_dir_entries, + APR_HASH_KEY_STRING, + apr_pstrcat(pool, prefix, "TXNDIR", + (char *)NULL), + fs, + TRUE, + pool)); + + /* reset the transaction-specific cache if the pool gets cleaned up. */ + init_txn_callbacks(&(ffd->txn_dir_cache), pool); + + return SVN_NO_ERROR; +} + +void +svn_fs_fs__reset_txn_caches(svn_fs_t *fs) +{ + /* we can always just reset the caches. This may degrade performance but + * can never cause in incorrect behavior. */ + + fs_fs_data_t *ffd = fs->fsap_data; + ffd->txn_dir_cache = NULL; +} diff --git a/subversion/libsvn_fs_fs/dag.c b/subversion/libsvn_fs_fs/dag.c new file mode 100644 index 0000000..3c51ffd --- /dev/null +++ b/subversion/libsvn_fs_fs/dag.c @@ -0,0 +1,1338 @@ +/* dag.c : DAG-like interface filesystem, private to libsvn_fs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <string.h> + +#include "svn_path.h" +#include "svn_error.h" +#include "svn_fs.h" +#include "svn_props.h" +#include "svn_pools.h" + +#include "dag.h" +#include "fs.h" +#include "key-gen.h" +#include "fs_fs.h" +#include "id.h" + +#include "../libsvn_fs/fs-loader.h" + +#include "private/svn_fspath.h" +#include "svn_private_config.h" +#include "private/svn_temp_serializer.h" +#include "temp_serializer.h" + + +/* Initializing a filesystem. */ + +struct dag_node_t +{ + /* The filesystem this dag node came from. */ + svn_fs_t *fs; + + /* The node revision ID for this dag node, allocated in POOL. */ + svn_fs_id_t *id; + + /* In the special case that this node is the root of a transaction + that has not yet been modified, the node revision ID for this dag + node's predecessor; otherwise NULL. (Used in + svn_fs_node_created_rev.) */ + const svn_fs_id_t *fresh_root_predecessor_id; + + /* The node's type (file, dir, etc.) */ + svn_node_kind_t kind; + + /* The node's NODE-REVISION, or NULL if we haven't read it in yet. + This is allocated in this node's POOL. + + If you're willing to respect all the rules above, you can munge + this yourself, but you're probably better off just calling + `get_node_revision' and `set_node_revision', which take care of + things for you. */ + node_revision_t *node_revision; + + /* The pool to allocate NODE_REVISION in. */ + apr_pool_t *node_pool; + + /* the path at which this node was created. */ + const char *created_path; +}; + + + +/* Trivial helper/accessor functions. */ +svn_node_kind_t svn_fs_fs__dag_node_kind(dag_node_t *node) +{ + return node->kind; +} + + +const svn_fs_id_t * +svn_fs_fs__dag_get_id(const dag_node_t *node) +{ + return node->id; +} + + +const char * +svn_fs_fs__dag_get_created_path(dag_node_t *node) +{ + return node->created_path; +} + + +svn_fs_t * +svn_fs_fs__dag_get_fs(dag_node_t *node) +{ + return node->fs; +} + +void +svn_fs_fs__dag_set_fs(dag_node_t *node, svn_fs_t *fs) +{ + node->fs = fs; +} + + +/* Dup NODEREV and all associated data into POOL. + Leaves the id and is_fresh_txn_root fields as zero bytes. */ +static node_revision_t * +copy_node_revision(node_revision_t *noderev, + apr_pool_t *pool) +{ + node_revision_t *nr = apr_pcalloc(pool, sizeof(*nr)); + nr->kind = noderev->kind; + if (noderev->predecessor_id) + nr->predecessor_id = svn_fs_fs__id_copy(noderev->predecessor_id, pool); + nr->predecessor_count = noderev->predecessor_count; + if (noderev->copyfrom_path) + nr->copyfrom_path = apr_pstrdup(pool, noderev->copyfrom_path); + nr->copyfrom_rev = noderev->copyfrom_rev; + nr->copyroot_path = apr_pstrdup(pool, noderev->copyroot_path); + nr->copyroot_rev = noderev->copyroot_rev; + nr->data_rep = svn_fs_fs__rep_copy(noderev->data_rep, pool); + nr->prop_rep = svn_fs_fs__rep_copy(noderev->prop_rep, pool); + nr->mergeinfo_count = noderev->mergeinfo_count; + nr->has_mergeinfo = noderev->has_mergeinfo; + + if (noderev->created_path) + nr->created_path = apr_pstrdup(pool, noderev->created_path); + return nr; +} + + +/* Set *NODEREV_P to the cached node-revision for NODE. + If the node-revision was not already cached in NODE, read it in, + allocating the cache in NODE->NODE_POOL. + + If you plan to change the contents of NODE, be careful! We're + handing you a pointer directly to our cached node-revision, not + your own copy. If you change it as part of some operation, but + then some Berkeley DB function deadlocks or gets an error, you'll + need to back out your changes, or else the cache will reflect + changes that never got committed. It's probably best not to change + the structure at all. */ +static svn_error_t * +get_node_revision(node_revision_t **noderev_p, + dag_node_t *node) +{ + /* If we've already got a copy, there's no need to read it in. */ + if (! node->node_revision) + { + node_revision_t *noderev; + + SVN_ERR(svn_fs_fs__get_node_revision(&noderev, node->fs, + node->id, node->node_pool)); + node->node_revision = noderev; + } + + /* Now NODE->node_revision is set. */ + *noderev_p = node->node_revision; + return SVN_NO_ERROR; +} + + +svn_boolean_t svn_fs_fs__dag_check_mutable(const dag_node_t *node) +{ + return (svn_fs_fs__id_txn_id(svn_fs_fs__dag_get_id(node)) != NULL); +} + + +svn_error_t * +svn_fs_fs__dag_get_node(dag_node_t **node, + svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool) +{ + dag_node_t *new_node; + node_revision_t *noderev; + + /* Construct the node. */ + new_node = apr_pcalloc(pool, sizeof(*new_node)); + new_node->fs = fs; + new_node->id = svn_fs_fs__id_copy(id, pool); + + /* Grab the contents so we can inspect the node's kind and created path. */ + new_node->node_pool = pool; + SVN_ERR(get_node_revision(&noderev, new_node)); + + /* Initialize the KIND and CREATED_PATH attributes */ + new_node->kind = noderev->kind; + new_node->created_path = apr_pstrdup(pool, noderev->created_path); + + if (noderev->is_fresh_txn_root) + new_node->fresh_root_predecessor_id = noderev->predecessor_id; + else + new_node->fresh_root_predecessor_id = NULL; + + /* Return a fresh new node */ + *node = new_node; + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__dag_get_revision(svn_revnum_t *rev, + dag_node_t *node, + apr_pool_t *pool) +{ + /* In the special case that this is an unmodified transaction root, + we need to actually get the revision of the noderev's predecessor + (the revision root); see Issue #2608. */ + const svn_fs_id_t *correct_id = node->fresh_root_predecessor_id + ? node->fresh_root_predecessor_id : node->id; + + /* Look up the committed revision from the Node-ID. */ + *rev = svn_fs_fs__id_rev(correct_id); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__dag_get_predecessor_id(const svn_fs_id_t **id_p, + dag_node_t *node) +{ + node_revision_t *noderev; + + SVN_ERR(get_node_revision(&noderev, node)); + *id_p = noderev->predecessor_id; + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__dag_get_predecessor_count(int *count, + dag_node_t *node) +{ + node_revision_t *noderev; + + SVN_ERR(get_node_revision(&noderev, node)); + *count = noderev->predecessor_count; + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_get_mergeinfo_count(apr_int64_t *count, + dag_node_t *node) +{ + node_revision_t *noderev; + + SVN_ERR(get_node_revision(&noderev, node)); + *count = noderev->mergeinfo_count; + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_has_mergeinfo(svn_boolean_t *has_mergeinfo, + dag_node_t *node) +{ + node_revision_t *noderev; + + SVN_ERR(get_node_revision(&noderev, node)); + *has_mergeinfo = noderev->has_mergeinfo; + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_has_descendants_with_mergeinfo(svn_boolean_t *do_they, + dag_node_t *node) +{ + node_revision_t *noderev; + + if (node->kind != svn_node_dir) + { + *do_they = FALSE; + return SVN_NO_ERROR; + } + + SVN_ERR(get_node_revision(&noderev, node)); + if (noderev->mergeinfo_count > 1) + *do_they = TRUE; + else if (noderev->mergeinfo_count == 1 && !noderev->has_mergeinfo) + *do_they = TRUE; + else + *do_they = FALSE; + return SVN_NO_ERROR; +} + + +/*** Directory node functions ***/ + +/* Some of these are helpers for functions outside this section. */ + +/* Set *ID_P to the node-id for entry NAME in PARENT. If no such + entry, set *ID_P to NULL but do not error. The node-id is + allocated in POOL. */ +static svn_error_t * +dir_entry_id_from_node(const svn_fs_id_t **id_p, + dag_node_t *parent, + const char *name, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_fs_dirent_t *dirent; + + SVN_ERR(svn_fs_fs__dag_dir_entry(&dirent, parent, name, scratch_pool)); + *id_p = dirent ? svn_fs_fs__id_copy(dirent->id, result_pool) : NULL; + + return SVN_NO_ERROR; +} + + +/* Add or set in PARENT a directory entry NAME pointing to ID. + Allocations are done in POOL. + + Assumptions: + - PARENT is a mutable directory. + - ID does not refer to an ancestor of parent + - NAME is a single path component +*/ +static svn_error_t * +set_entry(dag_node_t *parent, + const char *name, + const svn_fs_id_t *id, + svn_node_kind_t kind, + const char *txn_id, + apr_pool_t *pool) +{ + node_revision_t *parent_noderev; + + /* Get the parent's node-revision. */ + SVN_ERR(get_node_revision(&parent_noderev, parent)); + + /* Set the new entry. */ + return svn_fs_fs__set_entry(parent->fs, txn_id, parent_noderev, name, id, + kind, pool); +} + + +/* Make a new entry named NAME in PARENT. If IS_DIR is true, then the + node revision the new entry points to will be a directory, else it + will be a file. The new node will be allocated in POOL. PARENT + must be mutable, and must not have an entry named NAME. + + Use POOL for all allocations, except caching the node_revision in PARENT. + */ +static svn_error_t * +make_entry(dag_node_t **child_p, + dag_node_t *parent, + const char *parent_path, + const char *name, + svn_boolean_t is_dir, + const char *txn_id, + apr_pool_t *pool) +{ + const svn_fs_id_t *new_node_id; + node_revision_t new_noderev, *parent_noderev; + + /* Make sure that NAME is a single path component. */ + if (! svn_path_is_single_path_component(name)) + return svn_error_createf + (SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL, + _("Attempted to create a node with an illegal name '%s'"), name); + + /* Make sure that parent is a directory */ + if (parent->kind != svn_node_dir) + return svn_error_create + (SVN_ERR_FS_NOT_DIRECTORY, NULL, + _("Attempted to create entry in non-directory parent")); + + /* Check that the parent is mutable. */ + if (! svn_fs_fs__dag_check_mutable(parent)) + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + _("Attempted to clone child of non-mutable node")); + + /* Create the new node's NODE-REVISION */ + memset(&new_noderev, 0, sizeof(new_noderev)); + new_noderev.kind = is_dir ? svn_node_dir : svn_node_file; + new_noderev.created_path = svn_fspath__join(parent_path, name, pool); + + SVN_ERR(get_node_revision(&parent_noderev, parent)); + new_noderev.copyroot_path = apr_pstrdup(pool, + parent_noderev->copyroot_path); + new_noderev.copyroot_rev = parent_noderev->copyroot_rev; + new_noderev.copyfrom_rev = SVN_INVALID_REVNUM; + new_noderev.copyfrom_path = NULL; + + SVN_ERR(svn_fs_fs__create_node + (&new_node_id, svn_fs_fs__dag_get_fs(parent), &new_noderev, + svn_fs_fs__id_copy_id(svn_fs_fs__dag_get_id(parent)), + txn_id, pool)); + + /* Create a new dag_node_t for our new node */ + SVN_ERR(svn_fs_fs__dag_get_node(child_p, svn_fs_fs__dag_get_fs(parent), + new_node_id, pool)); + + /* We can safely call set_entry because we already know that + PARENT is mutable, and we just created CHILD, so we know it has + no ancestors (therefore, PARENT cannot be an ancestor of CHILD) */ + return set_entry(parent, name, svn_fs_fs__dag_get_id(*child_p), + new_noderev.kind, txn_id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_dir_entries(apr_hash_t **entries, + dag_node_t *node, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + SVN_ERR(get_node_revision(&noderev, node)); + + if (noderev->kind != svn_node_dir) + return svn_error_create(SVN_ERR_FS_NOT_DIRECTORY, NULL, + _("Can't get entries of non-directory")); + + return svn_fs_fs__rep_contents_dir(entries, node->fs, noderev, pool); +} + +svn_error_t * +svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent, + dag_node_t *node, + const char* name, + apr_pool_t *pool) +{ + node_revision_t *noderev; + SVN_ERR(get_node_revision(&noderev, node)); + + if (noderev->kind != svn_node_dir) + return svn_error_create(SVN_ERR_FS_NOT_DIRECTORY, NULL, + _("Can't get entries of non-directory")); + + /* Get a dirent hash for this directory. */ + return svn_fs_fs__rep_contents_dir_entry(dirent, node->fs, + noderev, name, pool, pool); +} + + +svn_error_t * +svn_fs_fs__dag_set_entry(dag_node_t *node, + const char *entry_name, + const svn_fs_id_t *id, + svn_node_kind_t kind, + const char *txn_id, + apr_pool_t *pool) +{ + /* Check it's a directory. */ + if (node->kind != svn_node_dir) + return svn_error_create + (SVN_ERR_FS_NOT_DIRECTORY, NULL, + _("Attempted to set entry in non-directory node")); + + /* Check it's mutable. */ + if (! svn_fs_fs__dag_check_mutable(node)) + return svn_error_create + (SVN_ERR_FS_NOT_MUTABLE, NULL, + _("Attempted to set entry in immutable node")); + + return set_entry(node, entry_name, id, kind, txn_id, pool); +} + + + +/*** Proplists. ***/ + +svn_error_t * +svn_fs_fs__dag_get_proplist(apr_hash_t **proplist_p, + dag_node_t *node, + apr_pool_t *pool) +{ + node_revision_t *noderev; + apr_hash_t *proplist = NULL; + + SVN_ERR(get_node_revision(&noderev, node)); + + SVN_ERR(svn_fs_fs__get_proplist(&proplist, node->fs, + noderev, pool)); + + *proplist_p = proplist; + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__dag_set_proplist(dag_node_t *node, + apr_hash_t *proplist, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + /* Sanity check: this node better be mutable! */ + if (! svn_fs_fs__dag_check_mutable(node)) + { + svn_string_t *idstr = svn_fs_fs__id_unparse(node->id, pool); + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + "Can't set proplist on *immutable* node-revision %s", + idstr->data); + } + + /* Go get a fresh NODE-REVISION for this node. */ + SVN_ERR(get_node_revision(&noderev, node)); + + /* Set the new proplist. */ + return svn_fs_fs__set_proplist(node->fs, noderev, proplist, pool); +} + + +svn_error_t * +svn_fs_fs__dag_increment_mergeinfo_count(dag_node_t *node, + apr_int64_t increment, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + /* Sanity check: this node better be mutable! */ + if (! svn_fs_fs__dag_check_mutable(node)) + { + svn_string_t *idstr = svn_fs_fs__id_unparse(node->id, pool); + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + "Can't increment mergeinfo count on *immutable* node-revision %s", + idstr->data); + } + + if (increment == 0) + return SVN_NO_ERROR; + + /* Go get a fresh NODE-REVISION for this node. */ + SVN_ERR(get_node_revision(&noderev, node)); + + noderev->mergeinfo_count += increment; + if (noderev->mergeinfo_count < 0) + { + svn_string_t *idstr = svn_fs_fs__id_unparse(node->id, pool); + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + apr_psprintf(pool, + _("Can't increment mergeinfo count on node-revision %%s " + "to negative value %%%s"), + APR_INT64_T_FMT), + idstr->data, noderev->mergeinfo_count); + } + if (noderev->mergeinfo_count > 1 && noderev->kind == svn_node_file) + { + svn_string_t *idstr = svn_fs_fs__id_unparse(node->id, pool); + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + apr_psprintf(pool, + _("Can't increment mergeinfo count on *file* " + "node-revision %%s to %%%s (> 1)"), + APR_INT64_T_FMT), + idstr->data, noderev->mergeinfo_count); + } + + /* Flush it out. */ + return svn_fs_fs__put_node_revision(node->fs, noderev->id, + noderev, FALSE, pool); +} + +svn_error_t * +svn_fs_fs__dag_set_has_mergeinfo(dag_node_t *node, + svn_boolean_t has_mergeinfo, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + /* Sanity check: this node better be mutable! */ + if (! svn_fs_fs__dag_check_mutable(node)) + { + svn_string_t *idstr = svn_fs_fs__id_unparse(node->id, pool); + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + "Can't set mergeinfo flag on *immutable* node-revision %s", + idstr->data); + } + + /* Go get a fresh NODE-REVISION for this node. */ + SVN_ERR(get_node_revision(&noderev, node)); + + noderev->has_mergeinfo = has_mergeinfo; + + /* Flush it out. */ + return svn_fs_fs__put_node_revision(node->fs, noderev->id, + noderev, FALSE, pool); +} + + +/*** Roots. ***/ + +svn_error_t * +svn_fs_fs__dag_revision_root(dag_node_t **node_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + svn_fs_id_t *root_id; + + SVN_ERR(svn_fs_fs__rev_get_root(&root_id, fs, rev, pool)); + return svn_fs_fs__dag_get_node(node_p, fs, root_id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_txn_root(dag_node_t **node_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + const svn_fs_id_t *root_id, *ignored; + + SVN_ERR(svn_fs_fs__get_txn_ids(&root_id, &ignored, fs, txn_id, pool)); + return svn_fs_fs__dag_get_node(node_p, fs, root_id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_txn_base_root(dag_node_t **node_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + const svn_fs_id_t *base_root_id, *ignored; + + SVN_ERR(svn_fs_fs__get_txn_ids(&ignored, &base_root_id, fs, txn_id, pool)); + return svn_fs_fs__dag_get_node(node_p, fs, base_root_id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_clone_child(dag_node_t **child_p, + dag_node_t *parent, + const char *parent_path, + const char *name, + const char *copy_id, + const char *txn_id, + svn_boolean_t is_parent_copyroot, + apr_pool_t *pool) +{ + dag_node_t *cur_entry; /* parent's current entry named NAME */ + const svn_fs_id_t *new_node_id; /* node id we'll put into NEW_NODE */ + svn_fs_t *fs = svn_fs_fs__dag_get_fs(parent); + apr_pool_t *subpool = svn_pool_create(pool); + + /* First check that the parent is mutable. */ + if (! svn_fs_fs__dag_check_mutable(parent)) + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + "Attempted to clone child of non-mutable node"); + + /* Make sure that NAME is a single path component. */ + if (! svn_path_is_single_path_component(name)) + return svn_error_createf + (SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL, + "Attempted to make a child clone with an illegal name '%s'", name); + + /* Find the node named NAME in PARENT's entries list if it exists. */ + SVN_ERR(svn_fs_fs__dag_open(&cur_entry, parent, name, pool, subpool)); + + /* Check for mutability in the node we found. If it's mutable, we + don't need to clone it. */ + if (svn_fs_fs__dag_check_mutable(cur_entry)) + { + /* This has already been cloned */ + new_node_id = cur_entry->id; + } + else + { + node_revision_t *noderev, *parent_noderev; + + /* Go get a fresh NODE-REVISION for current child node. */ + SVN_ERR(get_node_revision(&noderev, cur_entry)); + + if (is_parent_copyroot) + { + SVN_ERR(get_node_revision(&parent_noderev, parent)); + noderev->copyroot_rev = parent_noderev->copyroot_rev; + noderev->copyroot_path = apr_pstrdup(pool, + parent_noderev->copyroot_path); + } + + noderev->copyfrom_path = NULL; + noderev->copyfrom_rev = SVN_INVALID_REVNUM; + + noderev->predecessor_id = svn_fs_fs__id_copy(cur_entry->id, pool); + if (noderev->predecessor_count != -1) + noderev->predecessor_count++; + noderev->created_path = svn_fspath__join(parent_path, name, pool); + + SVN_ERR(svn_fs_fs__create_successor(&new_node_id, fs, cur_entry->id, + noderev, copy_id, txn_id, pool)); + + /* Replace the ID in the parent's ENTRY list with the ID which + refers to the mutable clone of this child. */ + SVN_ERR(set_entry(parent, name, new_node_id, noderev->kind, txn_id, + pool)); + } + + /* Initialize the youngster. */ + svn_pool_destroy(subpool); + return svn_fs_fs__dag_get_node(child_p, fs, new_node_id, pool); +} + + + +svn_error_t * +svn_fs_fs__dag_clone_root(dag_node_t **root_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + const svn_fs_id_t *base_root_id, *root_id; + + /* Get the node ID's of the root directories of the transaction and + its base revision. */ + SVN_ERR(svn_fs_fs__get_txn_ids(&root_id, &base_root_id, fs, txn_id, pool)); + + /* Oh, give me a clone... + (If they're the same, we haven't cloned the transaction's root + directory yet.) */ + SVN_ERR_ASSERT(!svn_fs_fs__id_eq(root_id, base_root_id)); + + /* + * (Sung to the tune of "Home, Home on the Range", with thanks to + * Randall Garrett and Isaac Asimov.) + */ + + /* One way or another, root_id now identifies a cloned root node. */ + return svn_fs_fs__dag_get_node(root_p, fs, root_id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_delete(dag_node_t *parent, + const char *name, + const char *txn_id, + apr_pool_t *pool) +{ + node_revision_t *parent_noderev; + svn_fs_t *fs = parent->fs; + svn_fs_dirent_t *dirent; + svn_fs_id_t *id; + apr_pool_t *subpool; + + /* Make sure parent is a directory. */ + if (parent->kind != svn_node_dir) + return svn_error_createf + (SVN_ERR_FS_NOT_DIRECTORY, NULL, + "Attempted to delete entry '%s' from *non*-directory node", name); + + /* Make sure parent is mutable. */ + if (! svn_fs_fs__dag_check_mutable(parent)) + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + "Attempted to delete entry '%s' from immutable directory node", name); + + /* Make sure that NAME is a single path component. */ + if (! svn_path_is_single_path_component(name)) + return svn_error_createf + (SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL, + "Attempted to delete a node with an illegal name '%s'", name); + + /* Get a fresh NODE-REVISION for the parent node. */ + SVN_ERR(get_node_revision(&parent_noderev, parent)); + + subpool = svn_pool_create(pool); + + /* Search this directory for a dirent with that NAME. */ + SVN_ERR(svn_fs_fs__rep_contents_dir_entry(&dirent, fs, parent_noderev, + name, subpool, subpool)); + + /* If we never found ID in ENTRIES (perhaps because there are no + ENTRIES, perhaps because ID just isn't in the existing ENTRIES + ... it doesn't matter), return an error. */ + if (! dirent) + return svn_error_createf + (SVN_ERR_FS_NO_SUCH_ENTRY, NULL, + "Delete failed--directory has no entry '%s'", name); + + /* Copy the ID out of the subpool and release the rest of the + directory listing. */ + id = svn_fs_fs__id_copy(dirent->id, pool); + svn_pool_destroy(subpool); + + /* If mutable, remove it and any mutable children from db. */ + SVN_ERR(svn_fs_fs__dag_delete_if_mutable(parent->fs, id, pool)); + + /* Remove this entry from its parent's entries list. */ + return svn_fs_fs__set_entry(parent->fs, txn_id, parent_noderev, name, + NULL, svn_node_unknown, pool); +} + + +svn_error_t * +svn_fs_fs__dag_remove_node(svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool) +{ + dag_node_t *node; + + /* Fetch the node. */ + SVN_ERR(svn_fs_fs__dag_get_node(&node, fs, id, pool)); + + /* If immutable, do nothing and return immediately. */ + if (! svn_fs_fs__dag_check_mutable(node)) + return svn_error_createf(SVN_ERR_FS_NOT_MUTABLE, NULL, + "Attempted removal of immutable node"); + + /* Delete the node revision. */ + return svn_fs_fs__delete_node_revision(fs, id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_delete_if_mutable(svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool) +{ + dag_node_t *node; + + /* Get the node. */ + SVN_ERR(svn_fs_fs__dag_get_node(&node, fs, id, pool)); + + /* If immutable, do nothing and return immediately. */ + if (! svn_fs_fs__dag_check_mutable(node)) + return SVN_NO_ERROR; + + /* Else it's mutable. Recurse on directories... */ + if (node->kind == svn_node_dir) + { + apr_hash_t *entries; + apr_hash_index_t *hi; + + /* Loop over hash entries */ + SVN_ERR(svn_fs_fs__dag_dir_entries(&entries, node, pool)); + if (entries) + { + for (hi = apr_hash_first(pool, entries); + hi; + hi = apr_hash_next(hi)) + { + svn_fs_dirent_t *dirent = svn__apr_hash_index_val(hi); + + SVN_ERR(svn_fs_fs__dag_delete_if_mutable(fs, dirent->id, + pool)); + } + } + } + + /* ... then delete the node itself, after deleting any mutable + representations and strings it points to. */ + return svn_fs_fs__dag_remove_node(fs, id, pool); +} + +svn_error_t * +svn_fs_fs__dag_make_file(dag_node_t **child_p, + dag_node_t *parent, + const char *parent_path, + const char *name, + const char *txn_id, + apr_pool_t *pool) +{ + /* Call our little helper function */ + return make_entry(child_p, parent, parent_path, name, FALSE, txn_id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_make_dir(dag_node_t **child_p, + dag_node_t *parent, + const char *parent_path, + const char *name, + const char *txn_id, + apr_pool_t *pool) +{ + /* Call our little helper function */ + return make_entry(child_p, parent, parent_path, name, TRUE, txn_id, pool); +} + + +svn_error_t * +svn_fs_fs__dag_get_contents(svn_stream_t **contents_p, + dag_node_t *file, + apr_pool_t *pool) +{ + node_revision_t *noderev; + svn_stream_t *contents; + + /* Make sure our node is a file. */ + if (file->kind != svn_node_file) + return svn_error_createf + (SVN_ERR_FS_NOT_FILE, NULL, + "Attempted to get textual contents of a *non*-file node"); + + /* Go get a fresh node-revision for FILE. */ + SVN_ERR(get_node_revision(&noderev, file)); + + /* Get a stream to the contents. */ + SVN_ERR(svn_fs_fs__get_contents(&contents, file->fs, + noderev, pool)); + + *contents_p = contents; + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__dag_get_file_delta_stream(svn_txdelta_stream_t **stream_p, + dag_node_t *source, + dag_node_t *target, + apr_pool_t *pool) +{ + node_revision_t *src_noderev; + node_revision_t *tgt_noderev; + + /* Make sure our nodes are files. */ + if ((source && source->kind != svn_node_file) + || target->kind != svn_node_file) + return svn_error_createf + (SVN_ERR_FS_NOT_FILE, NULL, + "Attempted to get textual contents of a *non*-file node"); + + /* Go get fresh node-revisions for the nodes. */ + if (source) + SVN_ERR(get_node_revision(&src_noderev, source)); + else + src_noderev = NULL; + SVN_ERR(get_node_revision(&tgt_noderev, target)); + + /* Get the delta stream. */ + return svn_fs_fs__get_file_delta_stream(stream_p, target->fs, + src_noderev, tgt_noderev, pool); +} + + +svn_error_t * +svn_fs_fs__dag_try_process_file_contents(svn_boolean_t *success, + dag_node_t *node, + svn_fs_process_contents_func_t processor, + void* baton, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + /* Go get fresh node-revisions for the nodes. */ + SVN_ERR(get_node_revision(&noderev, node)); + + return svn_fs_fs__try_process_file_contents(success, node->fs, + noderev, + processor, baton, pool); +} + + +svn_error_t * +svn_fs_fs__dag_file_length(svn_filesize_t *length, + dag_node_t *file, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + /* Make sure our node is a file. */ + if (file->kind != svn_node_file) + return svn_error_createf + (SVN_ERR_FS_NOT_FILE, NULL, + "Attempted to get length of a *non*-file node"); + + /* Go get a fresh node-revision for FILE, and . */ + SVN_ERR(get_node_revision(&noderev, file)); + + return svn_fs_fs__file_length(length, noderev, pool); +} + + +svn_error_t * +svn_fs_fs__dag_file_checksum(svn_checksum_t **checksum, + dag_node_t *file, + svn_checksum_kind_t kind, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + if (file->kind != svn_node_file) + return svn_error_createf + (SVN_ERR_FS_NOT_FILE, NULL, + "Attempted to get checksum of a *non*-file node"); + + SVN_ERR(get_node_revision(&noderev, file)); + + return svn_fs_fs__file_checksum(checksum, noderev, kind, pool); +} + + +svn_error_t * +svn_fs_fs__dag_get_edit_stream(svn_stream_t **contents, + dag_node_t *file, + apr_pool_t *pool) +{ + node_revision_t *noderev; + svn_stream_t *ws; + + /* Make sure our node is a file. */ + if (file->kind != svn_node_file) + return svn_error_createf + (SVN_ERR_FS_NOT_FILE, NULL, + "Attempted to set textual contents of a *non*-file node"); + + /* Make sure our node is mutable. */ + if (! svn_fs_fs__dag_check_mutable(file)) + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + "Attempted to set textual contents of an immutable node"); + + /* Get the node revision. */ + SVN_ERR(get_node_revision(&noderev, file)); + + SVN_ERR(svn_fs_fs__set_contents(&ws, file->fs, noderev, pool)); + + *contents = ws; + + return SVN_NO_ERROR; +} + + + +svn_error_t * +svn_fs_fs__dag_finalize_edits(dag_node_t *file, + const svn_checksum_t *checksum, + apr_pool_t *pool) +{ + if (checksum) + { + svn_checksum_t *file_checksum; + + SVN_ERR(svn_fs_fs__dag_file_checksum(&file_checksum, file, + checksum->kind, pool)); + if (!svn_checksum_match(checksum, file_checksum)) + return svn_checksum_mismatch_err(checksum, file_checksum, pool, + _("Checksum mismatch for '%s'"), + file->created_path); + } + + return SVN_NO_ERROR; +} + + +dag_node_t * +svn_fs_fs__dag_dup(const dag_node_t *node, + apr_pool_t *pool) +{ + /* Allocate our new node. */ + dag_node_t *new_node = apr_pcalloc(pool, sizeof(*new_node)); + + new_node->fs = node->fs; + new_node->id = svn_fs_fs__id_copy(node->id, pool); + new_node->kind = node->kind; + new_node->created_path = apr_pstrdup(pool, node->created_path); + + /* Only copy cached node_revision_t for immutable nodes. */ + if (node->node_revision && !svn_fs_fs__dag_check_mutable(node)) + { + new_node->node_revision = copy_node_revision(node->node_revision, pool); + new_node->node_revision->id = + svn_fs_fs__id_copy(node->node_revision->id, pool); + new_node->node_revision->is_fresh_txn_root = + node->node_revision->is_fresh_txn_root; + } + new_node->node_pool = pool; + + return new_node; +} + +svn_error_t * +svn_fs_fs__dag_serialize(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + dag_node_t *node = in; + svn_stringbuf_t *serialized; + + /* create an serialization context and serialize the dag node as root */ + svn_temp_serializer__context_t *context = + svn_temp_serializer__init(node, + sizeof(*node), + 1024 - SVN_TEMP_SERIALIZER__OVERHEAD, + pool); + + /* for mutable nodes, we will _never_ cache the noderev */ + if (node->node_revision && !svn_fs_fs__dag_check_mutable(node)) + svn_fs_fs__noderev_serialize(context, &node->node_revision); + else + svn_temp_serializer__set_null(context, + (const void * const *)&node->node_revision); + + /* The deserializer will use its own pool. */ + svn_temp_serializer__set_null(context, + (const void * const *)&node->node_pool); + + /* serialize other sub-structures */ + svn_fs_fs__id_serialize(context, (const svn_fs_id_t **)&node->id); + svn_fs_fs__id_serialize(context, &node->fresh_root_predecessor_id); + svn_temp_serializer__add_string(context, &node->created_path); + + /* return serialized data */ + serialized = svn_temp_serializer__get(context); + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_deserialize(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + dag_node_t *node = (dag_node_t *)data; + if (data_len == 0) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Empty noderev in cache")); + + /* Copy the _full_ buffer as it also contains the sub-structures. */ + node->fs = NULL; + + /* fixup all references to sub-structures */ + svn_fs_fs__id_deserialize(node, &node->id); + svn_fs_fs__id_deserialize(node, + (svn_fs_id_t **)&node->fresh_root_predecessor_id); + svn_fs_fs__noderev_deserialize(node, &node->node_revision); + node->node_pool = pool; + + svn_temp_deserializer__resolve(node, (void**)&node->created_path); + + /* return result */ + *out = node; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_open(dag_node_t **child_p, + dag_node_t *parent, + const char *name, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + const svn_fs_id_t *node_id; + + /* Ensure that NAME exists in PARENT's entry list. */ + SVN_ERR(dir_entry_id_from_node(&node_id, parent, name, + scratch_pool, scratch_pool)); + if (! node_id) + return svn_error_createf + (SVN_ERR_FS_NOT_FOUND, NULL, + "Attempted to open non-existent child node '%s'", name); + + /* Make sure that NAME is a single path component. */ + if (! svn_path_is_single_path_component(name)) + return svn_error_createf + (SVN_ERR_FS_NOT_SINGLE_PATH_COMPONENT, NULL, + "Attempted to open node with an illegal name '%s'", name); + + /* Now get the node that was requested. */ + return svn_fs_fs__dag_get_node(child_p, svn_fs_fs__dag_get_fs(parent), + node_id, result_pool); +} + + +svn_error_t * +svn_fs_fs__dag_copy(dag_node_t *to_node, + const char *entry, + dag_node_t *from_node, + svn_boolean_t preserve_history, + svn_revnum_t from_rev, + const char *from_path, + const char *txn_id, + apr_pool_t *pool) +{ + const svn_fs_id_t *id; + + if (preserve_history) + { + node_revision_t *from_noderev, *to_noderev; + const char *copy_id; + const svn_fs_id_t *src_id = svn_fs_fs__dag_get_id(from_node); + svn_fs_t *fs = svn_fs_fs__dag_get_fs(from_node); + + /* Make a copy of the original node revision. */ + SVN_ERR(get_node_revision(&from_noderev, from_node)); + to_noderev = copy_node_revision(from_noderev, pool); + + /* Reserve a copy ID for this new copy. */ + SVN_ERR(svn_fs_fs__reserve_copy_id(©_id, fs, txn_id, pool)); + + /* Create a successor with its predecessor pointing at the copy + source. */ + to_noderev->predecessor_id = svn_fs_fs__id_copy(src_id, pool); + if (to_noderev->predecessor_count != -1) + to_noderev->predecessor_count++; + to_noderev->created_path = + svn_fspath__join(svn_fs_fs__dag_get_created_path(to_node), entry, + pool); + to_noderev->copyfrom_path = apr_pstrdup(pool, from_path); + to_noderev->copyfrom_rev = from_rev; + + /* Set the copyroot equal to our own id. */ + to_noderev->copyroot_path = NULL; + + SVN_ERR(svn_fs_fs__create_successor(&id, fs, src_id, to_noderev, + copy_id, txn_id, pool)); + + } + else /* don't preserve history */ + { + id = svn_fs_fs__dag_get_id(from_node); + } + + /* Set the entry in to_node to the new id. */ + return svn_fs_fs__dag_set_entry(to_node, entry, id, from_node->kind, + txn_id, pool); +} + + + +/*** Comparison. ***/ + +svn_error_t * +svn_fs_fs__dag_things_different(svn_boolean_t *props_changed, + svn_boolean_t *contents_changed, + dag_node_t *node1, + dag_node_t *node2) +{ + node_revision_t *noderev1, *noderev2; + + /* If we have no place to store our results, don't bother doing + anything. */ + if (! props_changed && ! contents_changed) + return SVN_NO_ERROR; + + /* The node revision skels for these two nodes. */ + SVN_ERR(get_node_revision(&noderev1, node1)); + SVN_ERR(get_node_revision(&noderev2, node2)); + + /* Compare property keys. */ + if (props_changed != NULL) + *props_changed = (! svn_fs_fs__noderev_same_rep_key(noderev1->prop_rep, + noderev2->prop_rep)); + + /* Compare contents keys. */ + if (contents_changed != NULL) + *contents_changed = + (! svn_fs_fs__noderev_same_rep_key(noderev1->data_rep, + noderev2->data_rep)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_get_copyroot(svn_revnum_t *rev, + const char **path, + dag_node_t *node) +{ + node_revision_t *noderev; + + /* Go get a fresh node-revision for NODE. */ + SVN_ERR(get_node_revision(&noderev, node)); + + *rev = noderev->copyroot_rev; + *path = noderev->copyroot_path; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_get_copyfrom_rev(svn_revnum_t *rev, + dag_node_t *node) +{ + node_revision_t *noderev; + + /* Go get a fresh node-revision for NODE. */ + SVN_ERR(get_node_revision(&noderev, node)); + + *rev = noderev->copyfrom_rev; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_get_copyfrom_path(const char **path, + dag_node_t *node) +{ + node_revision_t *noderev; + + /* Go get a fresh node-revision for NODE. */ + SVN_ERR(get_node_revision(&noderev, node)); + + *path = noderev->copyfrom_path; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__dag_update_ancestry(dag_node_t *target, + dag_node_t *source, + apr_pool_t *pool) +{ + node_revision_t *source_noderev, *target_noderev; + + if (! svn_fs_fs__dag_check_mutable(target)) + return svn_error_createf + (SVN_ERR_FS_NOT_MUTABLE, NULL, + _("Attempted to update ancestry of non-mutable node")); + + SVN_ERR(get_node_revision(&source_noderev, source)); + SVN_ERR(get_node_revision(&target_noderev, target)); + + target_noderev->predecessor_id = source->id; + target_noderev->predecessor_count = source_noderev->predecessor_count; + if (target_noderev->predecessor_count != -1) + target_noderev->predecessor_count++; + + return svn_fs_fs__put_node_revision(target->fs, target->id, target_noderev, + FALSE, pool); +} diff --git a/subversion/libsvn_fs_fs/dag.h b/subversion/libsvn_fs_fs/dag.h new file mode 100644 index 0000000..867b025 --- /dev/null +++ b/subversion/libsvn_fs_fs/dag.h @@ -0,0 +1,581 @@ +/* dag.h : DAG-like interface filesystem, private to libsvn_fs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS_DAG_H +#define SVN_LIBSVN_FS_DAG_H + +#include "svn_fs.h" +#include "svn_delta.h" +#include "private/svn_cache.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/* The interface in this file provides all the essential filesystem + operations, but exposes the filesystem's DAG structure. This makes + it simpler to implement than the public interface, since a client + of this interface has to understand and cope with shared structure + directly as it appears in the database. However, it's still a + self-consistent set of invariants to maintain, making it + (hopefully) a useful interface boundary. + + In other words: + + - The dag_node_t interface exposes the internal DAG structure of + the filesystem, while the svn_fs.h interface does any cloning + necessary to make the filesystem look like a tree. + + - The dag_node_t interface exposes the existence of copy nodes, + whereas the svn_fs.h handles them transparently. + + - dag_node_t's must be explicitly cloned, whereas the svn_fs.h + operations make clones implicitly. + + - Callers of the dag_node_t interface use Berkeley DB transactions + to ensure consistency between operations, while callers of the + svn_fs.h interface use Subversion transactions. */ + + +/* Generic DAG node stuff. */ + +typedef struct dag_node_t dag_node_t; + +/* Fill *NODE with a dag_node_t representing node revision ID in FS, + allocating in POOL. */ +svn_error_t * +svn_fs_fs__dag_get_node(dag_node_t **node, + svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool); + + +/* Return a new dag_node_t object referring to the same node as NODE, + allocated in POOL. If you're trying to build a structure in a + pool that wants to refer to dag nodes that may have been allocated + elsewhere, you can call this function and avoid inter-pool pointers. */ +dag_node_t * +svn_fs_fs__dag_dup(const dag_node_t *node, + apr_pool_t *pool); + +/* Serialize a DAG node, except don't try to preserve the 'fs' member. + Implements svn_cache__serialize_func_t */ +svn_error_t * +svn_fs_fs__dag_serialize(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/* Deserialize a DAG node, leaving the 'fs' member as NULL. + Implements svn_cache__deserialize_func_t */ +svn_error_t * +svn_fs_fs__dag_deserialize(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/* Return the filesystem containing NODE. */ +svn_fs_t *svn_fs_fs__dag_get_fs(dag_node_t *node); + +/* Changes the filesystem containing NODE to FS. (Used when pulling + nodes out of a shared cache, say.) */ +void svn_fs_fs__dag_set_fs(dag_node_t *node, svn_fs_t *fs); + + +/* Set *REV to NODE's revision number, allocating in POOL. If NODE + has never been committed as part of a revision, set *REV to + SVN_INVALID_REVNUM. */ +svn_error_t *svn_fs_fs__dag_get_revision(svn_revnum_t *rev, + dag_node_t *node, + apr_pool_t *pool); + + +/* Return the node revision ID of NODE. The value returned is shared + with NODE, and will be deallocated when NODE is. */ +const svn_fs_id_t *svn_fs_fs__dag_get_id(const dag_node_t *node); + + +/* Return the created path of NODE. The value returned is shared + with NODE, and will be deallocated when NODE is. */ +const char *svn_fs_fs__dag_get_created_path(dag_node_t *node); + + +/* Set *ID_P to the node revision ID of NODE's immediate predecessor, + or NULL if NODE has no predecessor. + */ +svn_error_t *svn_fs_fs__dag_get_predecessor_id(const svn_fs_id_t **id_p, + dag_node_t *node); + + +/* Set *COUNT to the number of predecessors NODE has (recursively), or + -1 if not known. + */ +/* ### This function is currently only used by 'verify'. */ +svn_error_t *svn_fs_fs__dag_get_predecessor_count(int *count, + dag_node_t *node); + +/* Set *COUNT to the number of node under NODE (inclusive) with + svn:mergeinfo properties. + */ +svn_error_t *svn_fs_fs__dag_get_mergeinfo_count(apr_int64_t *count, + dag_node_t *node); + +/* Set *DO_THEY to a flag indicating whether or not NODE is a + directory with at least one descendant (not including itself) with + svn:mergeinfo. + */ +svn_error_t * +svn_fs_fs__dag_has_descendants_with_mergeinfo(svn_boolean_t *do_they, + dag_node_t *node); + +/* Set *HAS_MERGEINFO to a flag indicating whether or not NODE itself + has svn:mergeinfo set on it. + */ +svn_error_t * +svn_fs_fs__dag_has_mergeinfo(svn_boolean_t *has_mergeinfo, + dag_node_t *node); + +/* Return non-zero IFF NODE is currently mutable. */ +svn_boolean_t svn_fs_fs__dag_check_mutable(const dag_node_t *node); + +/* Return the node kind of NODE. */ +svn_node_kind_t svn_fs_fs__dag_node_kind(dag_node_t *node); + +/* Set *PROPLIST_P to a PROPLIST hash representing the entire property + list of NODE, allocating from POOL. The hash has const char * + names (the property names) and svn_string_t * values (the property + values). + + If properties do not exist on NODE, *PROPLIST_P will be set to + NULL. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_get_proplist(apr_hash_t **proplist_p, + dag_node_t *node, + apr_pool_t *pool); + +/* Set the property list of NODE to PROPLIST, allocating from POOL. + The node being changed must be mutable. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_set_proplist(dag_node_t *node, + apr_hash_t *proplist, + apr_pool_t *pool); + +/* Increment the mergeinfo_count field on NODE by INCREMENT. The node + being changed must be mutable. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_increment_mergeinfo_count(dag_node_t *node, + apr_int64_t increment, + apr_pool_t *pool); + +/* Set the has-mergeinfo flag on NODE to HAS_MERGEINFO. The node + being changed must be mutable. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_set_has_mergeinfo(dag_node_t *node, + svn_boolean_t has_mergeinfo, + apr_pool_t *pool); + + + +/* Revision and transaction roots. */ + + +/* Open the root of revision REV of filesystem FS, allocating from + POOL. Set *NODE_P to the new node. */ +svn_error_t *svn_fs_fs__dag_revision_root(dag_node_t **node_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool); + + +/* Set *NODE_P to the root of transaction TXN_ID in FS, allocating + from POOL. + + Note that the root node of TXN_ID is not necessarily mutable. If + no changes have been made in the transaction, then it may share its + root directory with its base revision. To get a mutable root node + for a transaction, call svn_fs_fs__dag_clone_root. */ +svn_error_t *svn_fs_fs__dag_txn_root(dag_node_t **node_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + + +/* Set *NODE_P to the base root of transaction TXN_ID in FS, + allocating from POOL. Allocate the node in TRAIL->pool. */ +svn_error_t *svn_fs_fs__dag_txn_base_root(dag_node_t **node_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + + +/* Clone the root directory of TXN_ID in FS, and update the + `transactions' table entry to point to it, unless this has been + done already. In either case, set *ROOT_P to a reference to the + root directory clone. Allocate *ROOT_P in POOL. */ +svn_error_t *svn_fs_fs__dag_clone_root(dag_node_t **root_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + + + +/* Directories. */ + + +/* Open the node named NAME in the directory PARENT. Set *CHILD_P to + the new node, allocated in RESULT_POOL. NAME must be a single path + component; it cannot be a slash-separated directory path. + */ +svn_error_t * +svn_fs_fs__dag_open(dag_node_t **child_p, + dag_node_t *parent, + const char *name, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + + +/* Set *ENTRIES_P to a hash table of NODE's entries. The keys of the + table are entry names, and the values are svn_fs_dirent_t's. The + returned table (and its keys and values) is allocated in POOL, + which is also used for temporary allocations. */ +svn_error_t *svn_fs_fs__dag_dir_entries(apr_hash_t **entries_p, + dag_node_t *node, + apr_pool_t *pool); + +/* Fetches the NODE's entries and returns a copy of the entry selected + by the key value given in NAME and set *DIRENT to a copy of that + entry. If such entry was found, the copy will be allocated in POOL. + Otherwise, the *DIRENT will be set to NULL. + */ +/* ### This function is currently only called from dag.c. */ +svn_error_t * svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent, + dag_node_t *node, + const char* name, + apr_pool_t *pool); + +/* Set ENTRY_NAME in NODE to point to ID (with kind KIND), allocating + from POOL. NODE must be a mutable directory. ID can refer to a + mutable or immutable node. If ENTRY_NAME does not exist, it will + be created. TXN_ID is the Subversion transaction under which this + occurs. + + Use POOL for all allocations, including to cache the node_revision in + NODE. + */ +svn_error_t *svn_fs_fs__dag_set_entry(dag_node_t *node, + const char *entry_name, + const svn_fs_id_t *id, + svn_node_kind_t kind, + const char *txn_id, + apr_pool_t *pool); + + +/* Make a new mutable clone of the node named NAME in PARENT, and + adjust PARENT's directory entry to point to it, unless NAME in + PARENT already refers to a mutable node. In either case, set + *CHILD_P to a reference to the new node, allocated in POOL. PARENT + must be mutable. NAME must be a single path component; it cannot + be a slash-separated directory path. PARENT_PATH must be the + canonicalized absolute path of the parent directory. + + COPY_ID, if non-NULL, is a key into the `copies' table, and + indicates that this new node is being created as the result of a + copy operation, and specifically which operation that was. + + PATH is the canonicalized absolute path at which this node is being + created. + + TXN_ID is the Subversion transaction under which this occurs. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_clone_child(dag_node_t **child_p, + dag_node_t *parent, + const char *parent_path, + const char *name, + const char *copy_id, + const char *txn_id, + svn_boolean_t is_parent_copyroot, + apr_pool_t *pool); + + +/* Delete the directory entry named NAME from PARENT, allocating from + POOL. PARENT must be mutable. NAME must be a single path + component; it cannot be a slash-separated directory path. If the + node being deleted is a mutable directory, remove all mutable nodes + reachable from it. TXN_ID is the Subversion transaction under + which this occurs. + + If return SVN_ERR_FS_NO_SUCH_ENTRY, then there is no entry NAME in + PARENT. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_delete(dag_node_t *parent, + const char *name, + const char *txn_id, + apr_pool_t *pool); + + +/* Delete the node revision assigned to node ID from FS's `nodes' + table, allocating from POOL. Also delete any mutable + representations and strings associated with that node revision. ID + may refer to a file or directory, which must be mutable. + + NOTE: If ID represents a directory, and that directory has mutable + children, you risk orphaning those children by leaving them + dangling, disconnected from all DAG trees. It is assumed that + callers of this interface know what in the world they are doing. */ +svn_error_t *svn_fs_fs__dag_remove_node(svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool); + + +/* Delete all mutable node revisions reachable from node ID, including + ID itself, from FS's `nodes' table, allocating from POOL. Also + delete any mutable representations and strings associated with that + node revision. ID may refer to a file or directory, which may be + mutable or immutable. */ +svn_error_t *svn_fs_fs__dag_delete_if_mutable(svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool); + + +/* Create a new mutable directory named NAME in PARENT. Set *CHILD_P + to a reference to the new node, allocated in POOL. The new + directory has no contents, and no properties. PARENT must be + mutable. NAME must be a single path component; it cannot be a + slash-separated directory path. PARENT_PATH must be the + canonicalized absolute path of the parent directory. PARENT must + not currently have an entry named NAME. TXN_ID is the Subversion + transaction under which this occurs. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_make_dir(dag_node_t **child_p, + dag_node_t *parent, + const char *parent_path, + const char *name, + const char *txn_id, + apr_pool_t *pool); + + + +/* Files. */ + + +/* Set *CONTENTS to a readable generic stream which yields the + contents of FILE. Allocate the stream in POOL. + + If FILE is not a file, return SVN_ERR_FS_NOT_FILE. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_get_contents(svn_stream_t **contents, + dag_node_t *file, + apr_pool_t *pool); + +/* Attempt to fetch the contents of NODE and pass it along with the BATON + to the PROCESSOR. Set *SUCCESS only of the data could be provided + and the processor had been called. + + Use POOL for all allocations. + */ +svn_error_t * +svn_fs_fs__dag_try_process_file_contents(svn_boolean_t *success, + dag_node_t *node, + svn_fs_process_contents_func_t processor, + void* baton, + apr_pool_t *pool); + + +/* Set *STREAM_P to a delta stream that will turn the contents of SOURCE into + the contents of TARGET, allocated in POOL. If SOURCE is null, the empty + string will be used. + + Use POOL for all allocations. + */ +svn_error_t * +svn_fs_fs__dag_get_file_delta_stream(svn_txdelta_stream_t **stream_p, + dag_node_t *source, + dag_node_t *target, + apr_pool_t *pool); + +/* Return a generic writable stream in *CONTENTS with which to set the + contents of FILE. Allocate the stream in POOL. + + Any previous edits on the file will be deleted, and a new edit + stream will be constructed. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_get_edit_stream(svn_stream_t **contents, + dag_node_t *file, + apr_pool_t *pool); + + +/* Signify the completion of edits to FILE made using the stream + returned by svn_fs_fs__dag_get_edit_stream, allocating from POOL. + + If CHECKSUM is non-null, it must match the checksum for FILE's + contents (note: this is not recalculated, the recorded checksum is + used), else the error SVN_ERR_CHECKSUM_MISMATCH is returned. + + This operation is a no-op if no edits are present. + + Use POOL for all allocations, including to cache the node_revision in + FILE. + */ +svn_error_t *svn_fs_fs__dag_finalize_edits(dag_node_t *file, + const svn_checksum_t *checksum, + apr_pool_t *pool); + + +/* Set *LENGTH to the length of the contents of FILE. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_file_length(svn_filesize_t *length, + dag_node_t *file, + apr_pool_t *pool); + +/* Put the recorded checksum of type KIND for FILE into CHECKSUM, allocating + from POOL. + + If no stored checksum is available, do not calculate the checksum, + just put NULL into CHECKSUM. + + Use POOL for all allocations. + */ +svn_error_t * +svn_fs_fs__dag_file_checksum(svn_checksum_t **checksum, + dag_node_t *file, + svn_checksum_kind_t kind, + apr_pool_t *pool); + +/* Create a new mutable file named NAME in PARENT. Set *CHILD_P to a + reference to the new node, allocated in POOL. The new file's + contents are the empty string, and it has no properties. PARENT + must be mutable. NAME must be a single path component; it cannot + be a slash-separated directory path. PARENT_PATH must be the + canonicalized absolute path of the parent directory. TXN_ID is the + Subversion transaction under which this occurs. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_make_file(dag_node_t **child_p, + dag_node_t *parent, + const char *parent_path, + const char *name, + const char *txn_id, + apr_pool_t *pool); + + + +/* Copies */ + +/* Make ENTRY in TO_NODE be a copy of FROM_NODE, allocating from POOL. + TO_NODE must be mutable. TXN_ID is the Subversion transaction + under which this occurs. + + If PRESERVE_HISTORY is true, the new node will record that it was + copied from FROM_PATH in FROM_REV; therefore, FROM_NODE should be + the node found at FROM_PATH in FROM_REV, although this is not + checked. FROM_PATH should be canonicalized before being passed + here. + + If PRESERVE_HISTORY is false, FROM_PATH and FROM_REV are ignored. + + Use POOL for all allocations. + */ +svn_error_t *svn_fs_fs__dag_copy(dag_node_t *to_node, + const char *entry, + dag_node_t *from_node, + svn_boolean_t preserve_history, + svn_revnum_t from_rev, + const char *from_path, + const char *txn_id, + apr_pool_t *pool); + + +/* Comparison */ + +/* Find out what is the same between two nodes. + + If PROPS_CHANGED is non-null, set *PROPS_CHANGED to 1 if the two + nodes have different property lists, or to 0 if same. + + If CONTENTS_CHANGED is non-null, set *CONTENTS_CHANGED to 1 if the + two nodes have different contents, or to 0 if same. For files, + file contents are compared; for directories, the entries lists are + compared. If one is a file and the other is a directory, the one's + contents will be compared to the other's entries list. (Not + terribly useful, I suppose, but that's the caller's business.) + + ### todo: This function only compares rep keys at the moment. This + may leave us with a slight chance of a false positive, though I + don't really see how that would happen in practice. Nevertheless, + it should probably be fixed. + */ +svn_error_t *svn_fs_fs__dag_things_different(svn_boolean_t *props_changed, + svn_boolean_t *contents_changed, + dag_node_t *node1, + dag_node_t *node2); + + +/* Set *REV and *PATH to the copyroot revision and path of node NODE, or + to SVN_INVALID_REVNUM and NULL if no copyroot exists. + */ +svn_error_t *svn_fs_fs__dag_get_copyroot(svn_revnum_t *rev, + const char **path, + dag_node_t *node); + +/* Set *REV to the copyfrom revision associated with NODE. + */ +svn_error_t *svn_fs_fs__dag_get_copyfrom_rev(svn_revnum_t *rev, + dag_node_t *node); + +/* Set *PATH to the copyfrom path associated with NODE. + */ +svn_error_t *svn_fs_fs__dag_get_copyfrom_path(const char **path, + dag_node_t *node); + +/* Update *TARGET so that SOURCE is it's predecessor. + */ +svn_error_t * +svn_fs_fs__dag_update_ancestry(dag_node_t *target, + dag_node_t *source, + apr_pool_t *pool); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_LIBSVN_FS_DAG_H */ diff --git a/subversion/libsvn_fs_fs/fs.c b/subversion/libsvn_fs_fs/fs.c new file mode 100644 index 0000000..4f3a340 --- /dev/null +++ b/subversion/libsvn_fs_fs/fs.c @@ -0,0 +1,456 @@ +/* fs.c --- creating, opening and closing filesystems + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <apr_general.h> +#include <apr_pools.h> +#include <apr_file_io.h> +#include <apr_thread_mutex.h> + +#include "svn_fs.h" +#include "svn_delta.h" +#include "svn_version.h" +#include "svn_pools.h" +#include "fs.h" +#include "fs_fs.h" +#include "tree.h" +#include "lock.h" +#include "id.h" +#include "rep-cache.h" +#include "svn_private_config.h" +#include "private/svn_fs_util.h" + +#include "../libsvn_fs/fs-loader.h" + +/* A prefix for the pool userdata variables used to hold + per-filesystem shared data. See fs_serialized_init. */ +#define SVN_FSFS_SHARED_USERDATA_PREFIX "svn-fsfs-shared-" + + + +static svn_error_t * +fs_serialized_init(svn_fs_t *fs, apr_pool_t *common_pool, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + const char *key; + void *val; + fs_fs_shared_data_t *ffsd; + apr_status_t status; + + /* Note that we are allocating a small amount of long-lived data for + each separate repository opened during the lifetime of the + svn_fs_initialize pool. It's unlikely that anyone will notice + the modest expenditure; the alternative is to allocate each structure + in a subpool, add a reference-count, and add a serialized deconstructor + to the FS vtable. That's more machinery than it's worth. + + Using the uuid to obtain the lock creates a corner case if a + caller uses svn_fs_set_uuid on the repository in a process where + other threads might be using the same repository through another + FS object. The only real-world consumer of svn_fs_set_uuid is + "svnadmin load", so this is a low-priority problem, and we don't + know of a better way of associating such data with the + repository. */ + + SVN_ERR_ASSERT(fs->uuid); + key = apr_pstrcat(pool, SVN_FSFS_SHARED_USERDATA_PREFIX, fs->uuid, + (char *) NULL); + status = apr_pool_userdata_get(&val, key, common_pool); + if (status) + return svn_error_wrap_apr(status, _("Can't fetch FSFS shared data")); + ffsd = val; + + if (!ffsd) + { + ffsd = apr_pcalloc(common_pool, sizeof(*ffsd)); + ffsd->common_pool = common_pool; + + /* POSIX fcntl locks are per-process, so we need a mutex for + intra-process synchronization when grabbing the repository write + lock. */ + SVN_ERR(svn_mutex__init(&ffsd->fs_write_lock, + SVN_FS_FS__USE_LOCK_MUTEX, common_pool)); + + /* ... not to mention locking the txn-current file. */ + SVN_ERR(svn_mutex__init(&ffsd->txn_current_lock, + SVN_FS_FS__USE_LOCK_MUTEX, common_pool)); + + SVN_ERR(svn_mutex__init(&ffsd->txn_list_lock, + SVN_FS_FS__USE_LOCK_MUTEX, common_pool)); + + key = apr_pstrdup(common_pool, key); + status = apr_pool_userdata_set(ffsd, key, NULL, common_pool); + if (status) + return svn_error_wrap_apr(status, _("Can't store FSFS shared data")); + } + + ffd->shared = ffsd; + + return SVN_NO_ERROR; +} + + + +/* This function is provided for Subversion 1.0.x compatibility. It + has no effect for fsfs backed Subversion filesystems. It conforms + to the fs_library_vtable_t.bdb_set_errcall() API. */ +static svn_error_t * +fs_set_errcall(svn_fs_t *fs, + void (*db_errcall_fcn)(const char *errpfx, char *msg)) +{ + + return SVN_NO_ERROR; +} + +struct fs_freeze_baton_t { + svn_fs_t *fs; + svn_fs_freeze_func_t freeze_func; + void *freeze_baton; +}; + +static svn_error_t * +fs_freeze_body(void *baton, + apr_pool_t *pool) +{ + struct fs_freeze_baton_t *b = baton; + svn_boolean_t exists; + + SVN_ERR(svn_fs_fs__exists_rep_cache(&exists, b->fs, pool)); + if (exists) + SVN_ERR(svn_fs_fs__lock_rep_cache(b->fs, pool)); + + SVN_ERR(b->freeze_func(b->freeze_baton, pool)); + + return SVN_NO_ERROR; +} + +static svn_error_t * +fs_freeze(svn_fs_t *fs, + svn_fs_freeze_func_t freeze_func, + void *freeze_baton, + apr_pool_t *pool) +{ + struct fs_freeze_baton_t b; + + b.fs = fs; + b.freeze_func = freeze_func; + b.freeze_baton = freeze_baton; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + SVN_ERR(svn_fs_fs__with_write_lock(fs, fs_freeze_body, &b, pool)); + + return SVN_NO_ERROR; +} + + + +/* The vtable associated with a specific open filesystem. */ +static fs_vtable_t fs_vtable = { + svn_fs_fs__youngest_rev, + svn_fs_fs__revision_prop, + svn_fs_fs__revision_proplist, + svn_fs_fs__change_rev_prop, + svn_fs_fs__set_uuid, + svn_fs_fs__revision_root, + svn_fs_fs__begin_txn, + svn_fs_fs__open_txn, + svn_fs_fs__purge_txn, + svn_fs_fs__list_transactions, + svn_fs_fs__deltify, + svn_fs_fs__lock, + svn_fs_fs__generate_lock_token, + svn_fs_fs__unlock, + svn_fs_fs__get_lock, + svn_fs_fs__get_locks, + svn_fs_fs__verify_root, + fs_freeze, + fs_set_errcall +}; + + +/* Creating a new filesystem. */ + +/* Set up vtable and fsap_data fields in FS. */ +static svn_error_t * +initialize_fs_struct(svn_fs_t *fs) +{ + fs_fs_data_t *ffd = apr_pcalloc(fs->pool, sizeof(*ffd)); + fs->vtable = &fs_vtable; + fs->fsap_data = ffd; + return SVN_NO_ERROR; +} + +/* This implements the fs_library_vtable_t.create() API. Create a new + fsfs-backed Subversion filesystem at path PATH and link it into + *FS. Perform temporary allocations in POOL, and fs-global allocations + in COMMON_POOL. */ +static svn_error_t * +fs_create(svn_fs_t *fs, const char *path, apr_pool_t *pool, + apr_pool_t *common_pool) +{ + SVN_ERR(svn_fs__check_fs(fs, FALSE)); + + SVN_ERR(initialize_fs_struct(fs)); + + SVN_ERR(svn_fs_fs__create(fs, path, pool)); + + SVN_ERR(svn_fs_fs__initialize_caches(fs, pool)); + return fs_serialized_init(fs, common_pool, pool); +} + + + +/* Gaining access to an existing filesystem. */ + +/* This implements the fs_library_vtable_t.open() API. Open an FSFS + Subversion filesystem located at PATH, set *FS to point to the + correct vtable for the filesystem. Use POOL for any temporary + allocations, and COMMON_POOL for fs-global allocations. */ +static svn_error_t * +fs_open(svn_fs_t *fs, const char *path, apr_pool_t *pool, + apr_pool_t *common_pool) +{ + SVN_ERR(initialize_fs_struct(fs)); + + SVN_ERR(svn_fs_fs__open(fs, path, pool)); + + SVN_ERR(svn_fs_fs__initialize_caches(fs, pool)); + return fs_serialized_init(fs, common_pool, pool); +} + + + +/* This implements the fs_library_vtable_t.open_for_recovery() API. */ +static svn_error_t * +fs_open_for_recovery(svn_fs_t *fs, + const char *path, + apr_pool_t *pool, apr_pool_t *common_pool) +{ + /* Recovery for FSFS is currently limited to recreating the 'current' + file from the latest revision. */ + + /* The only thing we have to watch out for is that the 'current' file + might not exist. So we'll try to create it here unconditionally, + and just ignore any errors that might indicate that it's already + present. (We'll need it to exist later anyway as a source for the + new file's permissions). */ + + /* Use a partly-filled fs pointer first to create 'current'. This will fail + if 'current' already exists, but we don't care about that. */ + fs->path = apr_pstrdup(fs->pool, path); + svn_error_clear(svn_io_file_create(svn_fs_fs__path_current(fs, pool), + "0 1 1\n", pool)); + + /* Now open the filesystem properly by calling the vtable method directly. */ + return fs_open(fs, path, pool, common_pool); +} + + + +/* This implements the fs_library_vtable_t.upgrade_fs() API. */ +static svn_error_t * +fs_upgrade(svn_fs_t *fs, const char *path, apr_pool_t *pool, + apr_pool_t *common_pool) +{ + SVN_ERR(svn_fs__check_fs(fs, FALSE)); + SVN_ERR(initialize_fs_struct(fs)); + SVN_ERR(svn_fs_fs__open(fs, path, pool)); + SVN_ERR(svn_fs_fs__initialize_caches(fs, pool)); + SVN_ERR(fs_serialized_init(fs, common_pool, pool)); + return svn_fs_fs__upgrade(fs, pool); +} + +static svn_error_t * +fs_verify(svn_fs_t *fs, const char *path, + svn_revnum_t start, + svn_revnum_t end, + svn_fs_progress_notify_func_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool, + apr_pool_t *common_pool) +{ + SVN_ERR(svn_fs__check_fs(fs, FALSE)); + SVN_ERR(initialize_fs_struct(fs)); + SVN_ERR(svn_fs_fs__open(fs, path, pool)); + SVN_ERR(svn_fs_fs__initialize_caches(fs, pool)); + SVN_ERR(fs_serialized_init(fs, common_pool, pool)); + return svn_fs_fs__verify(fs, start, end, notify_func, notify_baton, + cancel_func, cancel_baton, pool); +} + +static svn_error_t * +fs_pack(svn_fs_t *fs, + const char *path, + svn_fs_pack_notify_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool, + apr_pool_t *common_pool) +{ + SVN_ERR(svn_fs__check_fs(fs, FALSE)); + SVN_ERR(initialize_fs_struct(fs)); + SVN_ERR(svn_fs_fs__open(fs, path, pool)); + SVN_ERR(svn_fs_fs__initialize_caches(fs, pool)); + SVN_ERR(fs_serialized_init(fs, common_pool, pool)); + return svn_fs_fs__pack(fs, notify_func, notify_baton, + cancel_func, cancel_baton, pool); +} + + + + +/* This implements the fs_library_vtable_t.hotcopy() API. Copy a + possibly live Subversion filesystem SRC_FS from SRC_PATH to a + DST_FS at DEST_PATH. If INCREMENTAL is TRUE, make an effort not to + re-copy data which already exists in DST_FS. + The CLEAN_LOGS argument is ignored and included for Subversion + 1.0.x compatibility. Perform all temporary allocations in POOL. */ +static svn_error_t * +fs_hotcopy(svn_fs_t *src_fs, + svn_fs_t *dst_fs, + const char *src_path, + const char *dst_path, + svn_boolean_t clean_logs, + svn_boolean_t incremental, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + SVN_ERR(svn_fs__check_fs(src_fs, FALSE)); + SVN_ERR(initialize_fs_struct(src_fs)); + SVN_ERR(svn_fs_fs__open(src_fs, src_path, pool)); + SVN_ERR(svn_fs_fs__initialize_caches(src_fs, pool)); + SVN_ERR(fs_serialized_init(src_fs, pool, pool)); + + SVN_ERR(svn_fs__check_fs(dst_fs, FALSE)); + SVN_ERR(initialize_fs_struct(dst_fs)); + /* In INCREMENTAL mode, svn_fs_fs__hotcopy() will open DST_FS. + Otherwise, it's not an FS yet --- possibly just an empty dir --- so + can't be opened. + */ + return svn_fs_fs__hotcopy(src_fs, dst_fs, src_path, dst_path, + incremental, cancel_func, cancel_baton, pool); +} + + + +/* This function is included for Subversion 1.0.x compatibility. It + has no effect for fsfs backed Subversion filesystems. It conforms + to the fs_library_vtable_t.bdb_logfiles() API. */ +static svn_error_t * +fs_logfiles(apr_array_header_t **logfiles, + const char *path, + svn_boolean_t only_unused, + apr_pool_t *pool) +{ + /* A no-op for FSFS. */ + *logfiles = apr_array_make(pool, 0, sizeof(const char *)); + + return SVN_NO_ERROR; +} + + + + + +/* Delete the filesystem located at path PATH. Perform any temporary + allocations in POOL. */ +static svn_error_t * +fs_delete_fs(const char *path, + apr_pool_t *pool) +{ + /* Remove everything. */ + return svn_io_remove_dir2(path, FALSE, NULL, NULL, pool); +} + +static const svn_version_t * +fs_version(void) +{ + SVN_VERSION_BODY; +} + +static const char * +fs_get_description(void) +{ + return _("Module for working with a plain file (FSFS) repository."); +} + +static svn_error_t * +fs_set_svn_fs_open(svn_fs_t *fs, + svn_error_t *(*svn_fs_open_)(svn_fs_t **, + const char *, + apr_hash_t *, + apr_pool_t *)) +{ + fs_fs_data_t *ffd = fs->fsap_data; + ffd->svn_fs_open_ = svn_fs_open_; + return SVN_NO_ERROR; +} + + +/* Base FS library vtable, used by the FS loader library. */ + +static fs_library_vtable_t library_vtable = { + fs_version, + fs_create, + fs_open, + fs_open_for_recovery, + fs_upgrade, + fs_verify, + fs_delete_fs, + fs_hotcopy, + fs_get_description, + svn_fs_fs__recover, + fs_pack, + fs_logfiles, + NULL /* parse_id */, + fs_set_svn_fs_open +}; + +svn_error_t * +svn_fs_fs__init(const svn_version_t *loader_version, + fs_library_vtable_t **vtable, apr_pool_t* common_pool) +{ + static const svn_version_checklist_t checklist[] = + { + { "svn_subr", svn_subr_version }, + { "svn_delta", svn_delta_version }, + { NULL, NULL } + }; + + /* Simplified version check to make sure we can safely use the + VTABLE parameter. The FS loader does a more exhaustive check. */ + if (loader_version->major != SVN_VER_MAJOR) + return svn_error_createf(SVN_ERR_VERSION_MISMATCH, NULL, + _("Unsupported FS loader version (%d) for fsfs"), + loader_version->major); + SVN_ERR(svn_ver_check_list(fs_version(), checklist)); + + *vtable = &library_vtable; + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_fs_fs/fs.h b/subversion/libsvn_fs_fs/fs.h new file mode 100644 index 0000000..ea301f6 --- /dev/null +++ b/subversion/libsvn_fs_fs/fs.h @@ -0,0 +1,523 @@ +/* fs.h : interface to Subversion filesystem, private to libsvn_fs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS_FS_H +#define SVN_LIBSVN_FS_FS_H + +#include <apr_pools.h> +#include <apr_hash.h> +#include <apr_network_io.h> + +#include "svn_fs.h" +#include "svn_config.h" +#include "private/svn_atomic.h" +#include "private/svn_cache.h" +#include "private/svn_fs_private.h" +#include "private/svn_sqlite.h" +#include "private/svn_mutex.h" +#include "private/svn_named_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*** The filesystem structure. ***/ + +/* Following are defines that specify the textual elements of the + native filesystem directories and revision files. */ + +/* Names of special files in the fs_fs filesystem. */ +#define PATH_FORMAT "format" /* Contains format number */ +#define PATH_UUID "uuid" /* Contains UUID */ +#define PATH_CURRENT "current" /* Youngest revision */ +#define PATH_LOCK_FILE "write-lock" /* Revision lock file */ +#define PATH_REVS_DIR "revs" /* Directory of revisions */ +#define PATH_REVPROPS_DIR "revprops" /* Directory of revprops */ +#define PATH_TXNS_DIR "transactions" /* Directory of transactions */ +#define PATH_NODE_ORIGINS_DIR "node-origins" /* Lazy node-origin cache */ +#define PATH_TXN_PROTOS_DIR "txn-protorevs" /* Directory of proto-revs */ +#define PATH_TXN_CURRENT "txn-current" /* File with next txn key */ +#define PATH_TXN_CURRENT_LOCK "txn-current-lock" /* Lock for txn-current */ +#define PATH_LOCKS_DIR "locks" /* Directory of locks */ +#define PATH_MIN_UNPACKED_REV "min-unpacked-rev" /* Oldest revision which + has not been packed. */ +#define PATH_REVPROP_GENERATION "revprop-generation" + /* Current revprop generation*/ +#define PATH_MANIFEST "manifest" /* Manifest file name */ +#define PATH_PACKED "pack" /* Packed revision data file */ +#define PATH_EXT_PACKED_SHARD ".pack" /* Extension for packed + shards */ +/* If you change this, look at tests/svn_test_fs.c(maybe_install_fsfs_conf) */ +#define PATH_CONFIG "fsfs.conf" /* Configuration */ + +/* Names of special files and file extensions for transactions */ +#define PATH_CHANGES "changes" /* Records changes made so far */ +#define PATH_TXN_PROPS "props" /* Transaction properties */ +#define PATH_NEXT_IDS "next-ids" /* Next temporary ID assignments */ +#define PATH_PREFIX_NODE "node." /* Prefix for node filename */ +#define PATH_EXT_TXN ".txn" /* Extension of txn dir */ +#define PATH_EXT_CHILDREN ".children" /* Extension for dir contents */ +#define PATH_EXT_PROPS ".props" /* Extension for node props */ +#define PATH_EXT_REV ".rev" /* Extension of protorev file */ +#define PATH_EXT_REV_LOCK ".rev-lock" /* Extension of protorev lock file */ +/* Names of files in legacy FS formats */ +#define PATH_REV "rev" /* Proto rev file */ +#define PATH_REV_LOCK "rev-lock" /* Proto rev (write) lock file */ + +/* Names of sections and options in fsfs.conf. */ +#define CONFIG_SECTION_CACHES "caches" +#define CONFIG_OPTION_FAIL_STOP "fail-stop" +#define CONFIG_SECTION_REP_SHARING "rep-sharing" +#define CONFIG_OPTION_ENABLE_REP_SHARING "enable-rep-sharing" +#define CONFIG_SECTION_DELTIFICATION "deltification" +#define CONFIG_OPTION_ENABLE_DIR_DELTIFICATION "enable-dir-deltification" +#define CONFIG_OPTION_ENABLE_PROPS_DELTIFICATION "enable-props-deltification" +#define CONFIG_OPTION_MAX_DELTIFICATION_WALK "max-deltification-walk" +#define CONFIG_OPTION_MAX_LINEAR_DELTIFICATION "max-linear-deltification" +#define CONFIG_SECTION_PACKED_REVPROPS "packed-revprops" +#define CONFIG_OPTION_REVPROP_PACK_SIZE "revprop-pack-size" +#define CONFIG_OPTION_COMPRESS_PACKED_REVPROPS "compress-packed-revprops" + +/* The format number of this filesystem. + This is independent of the repository format number, and + independent of any other FS back ends. */ +#define SVN_FS_FS__FORMAT_NUMBER 6 + +/* The minimum format number that supports svndiff version 1. */ +#define SVN_FS_FS__MIN_SVNDIFF1_FORMAT 2 + +/* The minimum format number that supports transaction ID generation + using a transaction sequence in the txn-current file. */ +#define SVN_FS_FS__MIN_TXN_CURRENT_FORMAT 3 + +/* The minimum format number that supports the "layout" filesystem + format option. */ +#define SVN_FS_FS__MIN_LAYOUT_FORMAT_OPTION_FORMAT 3 + +/* The minimum format number that stores protorevs in a separate directory. */ +#define SVN_FS_FS__MIN_PROTOREVS_DIR_FORMAT 3 + +/* The minimum format number that doesn't keep node and copy ID counters. */ +#define SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT 3 + +/* The minimum format number that maintains minfo-here and minfo-count + noderev fields. */ +#define SVN_FS_FS__MIN_MERGEINFO_FORMAT 3 + +/* The minimum format number that allows rep sharing. */ +#define SVN_FS_FS__MIN_REP_SHARING_FORMAT 4 + +/* The minimum format number that supports packed shards. */ +#define SVN_FS_FS__MIN_PACKED_FORMAT 4 + +/* The minimum format number that stores node kinds in changed-paths lists. */ +#define SVN_FS_FS__MIN_KIND_IN_CHANGED_FORMAT 4 + +/* 1.8 deltification options should work with any FSFS repo but to avoid + * issues with very old servers, restrict those options to the 1.6+ format*/ +#define SVN_FS_FS__MIN_DELTIFICATION_FORMAT 4 + +/* The 1.7-dev format, never released, that packed revprops into SQLite + revprops.db . */ +#define SVN_FS_FS__PACKED_REVPROP_SQLITE_DEV_FORMAT 5 + +/* The minimum format number that supports packed revprops. */ +#define SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT 6 + +/* The minimum format number that supports a configuration file (fsfs.conf) */ +#define SVN_FS_FS__MIN_CONFIG_FILE 4 + +/* Private FSFS-specific data shared between all svn_txn_t objects that + relate to a particular transaction in a filesystem (as identified + by transaction id and filesystem UUID). Objects of this type are + allocated in their own subpool of the common pool. */ +typedef struct fs_fs_shared_txn_data_t +{ + /* The next transaction in the list, or NULL if there is no following + transaction. */ + struct fs_fs_shared_txn_data_t *next; + + /* This transaction's ID. For repositories whose format is less + than SVN_FS_FS__MIN_TXN_CURRENT_FORMAT, the ID is in the form + <rev>-<uniqueifier>, where <uniqueifier> runs from 0-99999 (see + create_txn_dir_pre_1_5() in fs_fs.c). For newer repositories, + the form is <rev>-<200 digit base 36 number> (see + create_txn_dir() in fs_fs.c). */ + char txn_id[SVN_FS__TXN_MAX_LEN+1]; + + /* Whether the transaction's prototype revision file is locked for + writing by any thread in this process (including the current + thread; recursive locks are not permitted). This is effectively + a non-recursive mutex. */ + svn_boolean_t being_written; + + /* The pool in which this object has been allocated; a subpool of the + common pool. */ + apr_pool_t *pool; +} fs_fs_shared_txn_data_t; + +/* On most operating systems apr implements file locks per process, not + per file. On Windows apr implements the locking as per file handle + locks, so we don't have to add our own mutex for just in-process + synchronization. */ +/* Compare ../libsvn_subr/named_atomic.c:USE_THREAD_MUTEX */ +#if APR_HAS_THREADS && !defined(WIN32) +#define SVN_FS_FS__USE_LOCK_MUTEX 1 +#else +#define SVN_FS_FS__USE_LOCK_MUTEX 0 +#endif + +/* Private FSFS-specific data shared between all svn_fs_t objects that + relate to a particular filesystem, as identified by filesystem UUID. + Objects of this type are allocated in the common pool. */ +typedef struct fs_fs_shared_data_t +{ + /* A list of shared transaction objects for each transaction that is + currently active, or NULL if none are. All access to this list, + including the contents of the objects stored in it, is synchronised + under TXN_LIST_LOCK. */ + fs_fs_shared_txn_data_t *txns; + + /* A free transaction object, or NULL if there is no free object. + Access to this object is synchronised under TXN_LIST_LOCK. */ + fs_fs_shared_txn_data_t *free_txn; + + /* A lock for intra-process synchronization when accessing the TXNS list. */ + svn_mutex__t *txn_list_lock; + + /* A lock for intra-process synchronization when grabbing the + repository write lock. */ + svn_mutex__t *fs_write_lock; + + /* A lock for intra-process synchronization when locking the + txn-current file. */ + svn_mutex__t *txn_current_lock; + + /* The common pool, under which this object is allocated, subpools + of which are used to allocate the transaction objects. */ + apr_pool_t *common_pool; +} fs_fs_shared_data_t; + +/* Data structure for the 1st level DAG node cache. */ +typedef struct fs_fs_dag_cache_t fs_fs_dag_cache_t; + +/* Key type for all caches that use revision + offset / counter as key. */ +typedef struct pair_cache_key_t +{ + svn_revnum_t revision; + + apr_int64_t second; +} pair_cache_key_t; + +/* Private (non-shared) FSFS-specific data for each svn_fs_t object. + Any caches in here may be NULL. */ +typedef struct fs_fs_data_t +{ + /* The format number of this FS. */ + int format; + /* The maximum number of files to store per directory (for sharded + layouts) or zero (for linear layouts). */ + int max_files_per_dir; + + /* The revision that was youngest, last time we checked. */ + svn_revnum_t youngest_rev_cache; + + /* The fsfs.conf file, parsed. Allocated in FS->pool. */ + svn_config_t *config; + + /* Caches of immutable data. (Note that if these are created with + svn_cache__create_memcache, the data can be shared between + multiple svn_fs_t's for the same filesystem.) */ + + /* A cache of revision root IDs, mapping from (svn_revnum_t *) to + (svn_fs_id_t *). (Not threadsafe.) */ + svn_cache__t *rev_root_id_cache; + + /* Caches native dag_node_t* instances and acts as a 1st level cache */ + fs_fs_dag_cache_t *dag_node_cache; + + /* DAG node cache for immutable nodes. Maps (revision, fspath) + to (dag_node_t *). This is the 2nd level cache for DAG nodes. */ + svn_cache__t *rev_node_cache; + + /* A cache of the contents of immutable directories; maps from + unparsed FS ID to a apr_hash_t * mapping (const char *) dirent + names to (svn_fs_dirent_t *). */ + svn_cache__t *dir_cache; + + /* Fulltext cache; currently only used with memcached. Maps from + rep key (revision/offset) to svn_string_t. */ + svn_cache__t *fulltext_cache; + + /* Access object to the atomics namespace used by revprop caching. + Will be NULL until the first access. */ + svn_atomic_namespace__t *revprop_namespace; + + /* Access object to the revprop "generation". Will be NULL until + the first access. */ + svn_named_atomic__t *revprop_generation; + + /* Access object to the revprop update timeout. Will be NULL until + the first access. */ + svn_named_atomic__t *revprop_timeout; + + /* Revision property cache. Maps from (rev,generation) to apr_hash_t. */ + svn_cache__t *revprop_cache; + + /* Node properties cache. Maps from rep key to apr_hash_t. */ + svn_cache__t *properties_cache; + + /* Pack manifest cache; a cache mapping (svn_revnum_t) shard number to + a manifest; and a manifest is a mapping from (svn_revnum_t) revision + number offset within a shard to (apr_off_t) byte-offset in the + respective pack file. */ + svn_cache__t *packed_offset_cache; + + /* Cache for txdelta_window_t objects; the key is (revFilePath, offset) */ + svn_cache__t *txdelta_window_cache; + + /* Cache for combined windows as svn_stringbuf_t objects; + the key is (revFilePath, offset) */ + svn_cache__t *combined_window_cache; + + /* Cache for node_revision_t objects; the key is (revision, id offset) */ + svn_cache__t *node_revision_cache; + + /* Cache for change lists as APR arrays of change_t * objects; the key + is the revision */ + svn_cache__t *changes_cache; + + /* Cache for svn_mergeinfo_t objects; the key is a combination of + revision, inheritance flags and path. */ + svn_cache__t *mergeinfo_cache; + + /* Cache for presence of svn_mergeinfo_t on a noderev; the key is a + combination of revision, inheritance flags and path; value is "1" + if the node has mergeinfo, "0" if it doesn't. */ + svn_cache__t *mergeinfo_existence_cache; + + /* TRUE while the we hold a lock on the write lock file. */ + svn_boolean_t has_write_lock; + + /* If set, there are or have been more than one concurrent transaction */ + svn_boolean_t concurrent_transactions; + + /* Temporary cache for changed directories yet to be committed; maps from + unparsed FS ID to ###x. NULL outside transactions. */ + svn_cache__t *txn_dir_cache; + + /* Data shared between all svn_fs_t objects for a given filesystem. */ + fs_fs_shared_data_t *shared; + + /* The sqlite database used for rep caching. */ + svn_sqlite__db_t *rep_cache_db; + + /* Thread-safe boolean */ + svn_atomic_t rep_cache_db_opened; + + /* The oldest revision not in a pack file. It also applies to revprops + * if revprop packing has been enabled by the FSFS format version. */ + svn_revnum_t min_unpacked_rev; + + /* Whether rep-sharing is supported by the filesystem + * and allowed by the configuration. */ + svn_boolean_t rep_sharing_allowed; + + /* File size limit in bytes up to which multiple revprops shall be packed + * into a single file. */ + apr_int64_t revprop_pack_size; + + /* Whether packed revprop files shall be compressed. */ + svn_boolean_t compress_packed_revprops; + + /* Whether directory nodes shall be deltified just like file nodes. */ + svn_boolean_t deltify_directories; + + /* Whether nodes properties shall be deltified. */ + svn_boolean_t deltify_properties; + + /* Restart deltification histories after each multiple of this value */ + apr_int64_t max_deltification_walk; + + /* Maximum number of length of the linear part at the top of the + * deltification history after which skip deltas will be used. */ + apr_int64_t max_linear_deltification; + + /* Pointer to svn_fs_open. */ + svn_error_t *(*svn_fs_open_)(svn_fs_t **, const char *, apr_hash_t *, + apr_pool_t *); +} fs_fs_data_t; + + +/*** Filesystem Transaction ***/ +typedef struct transaction_t +{ + /* property list (const char * name, svn_string_t * value). + may be NULL if there are no properties. */ + apr_hash_t *proplist; + + /* node revision id of the root node. */ + const svn_fs_id_t *root_id; + + /* node revision id of the node which is the root of the revision + upon which this txn is base. (unfinished only) */ + const svn_fs_id_t *base_id; + + /* copies list (const char * copy_ids), or NULL if there have been + no copies in this transaction. */ + apr_array_header_t *copies; + +} transaction_t; + + +/*** Representation ***/ +/* If you add fields to this, check to see if you need to change + * svn_fs_fs__rep_copy. */ +typedef struct representation_t +{ + /* Checksums for the contents produced by this representation. + This checksum is for the contents the rep shows to consumers, + regardless of how the rep stores the data under the hood. It is + independent of the storage (fulltext, delta, whatever). + + If checksum is NULL, then for compatibility behave as though this + checksum matches the expected checksum. + + The md5 checksum is always filled, unless this is rep which was + retrieved from the rep-cache. The sha1 checksum is only computed on + a write, for use with rep-sharing; it may be read from an existing + representation, but otherwise it is NULL. */ + svn_checksum_t *md5_checksum; + svn_checksum_t *sha1_checksum; + + /* Revision where this representation is located. */ + svn_revnum_t revision; + + /* Offset into the revision file where it is located. */ + apr_off_t offset; + + /* The size of the representation in bytes as seen in the revision + file. */ + svn_filesize_t size; + + /* The size of the fulltext of the representation. If this is 0, + * the fulltext size is equal to representation size in the rev file, */ + svn_filesize_t expanded_size; + + /* Is this representation a transaction? */ + const char *txn_id; + + /* For rep-sharing, we need a way of uniquifying node-revs which share the + same representation (see svn_fs_fs__noderev_same_rep_key() ). So, we + store the original txn of the node rev (not the rep!), along with some + intra-node uniqification content. + + May be NULL, in which case, it is considered to match other NULL + values.*/ + const char *uniquifier; +} representation_t; + + +/*** Node-Revision ***/ +/* If you add fields to this, check to see if you need to change + * copy_node_revision in dag.c. */ +typedef struct node_revision_t +{ + /* node kind */ + svn_node_kind_t kind; + + /* The node-id for this node-rev. */ + const svn_fs_id_t *id; + + /* predecessor node revision id, or NULL if there is no predecessor + for this node revision */ + const svn_fs_id_t *predecessor_id; + + /* If this node-rev is a copy, where was it copied from? */ + const char *copyfrom_path; + svn_revnum_t copyfrom_rev; + + /* Helper for history tracing, root of the parent tree from whence + this node-rev was copied. */ + svn_revnum_t copyroot_rev; + const char *copyroot_path; + + /* number of predecessors this node revision has (recursively), or + -1 if not known (for backward compatibility). */ + int predecessor_count; + + /* representation key for this node's properties. may be NULL if + there are no properties. */ + representation_t *prop_rep; + + /* representation for this node's data. may be NULL if there is + no data. */ + representation_t *data_rep; + + /* path at which this node first came into existence. */ + const char *created_path; + + /* is this the unmodified root of a transaction? */ + svn_boolean_t is_fresh_txn_root; + + /* Number of nodes with svn:mergeinfo properties that are + descendants of this node (including it itself) */ + apr_int64_t mergeinfo_count; + + /* Does this node itself have svn:mergeinfo? */ + svn_boolean_t has_mergeinfo; + +} node_revision_t; + + +/*** Change ***/ +typedef struct change_t +{ + /* Path of the change. */ + const char *path; + + /* Node revision ID of the change. */ + const svn_fs_id_t *noderev_id; + + /* The kind of change. */ + svn_fs_path_change_kind_t kind; + + /* Text or property mods? */ + svn_boolean_t text_mod; + svn_boolean_t prop_mod; + + /* Node kind (possibly svn_node_unknown). */ + svn_node_kind_t node_kind; + + /* Copyfrom revision and path. */ + svn_revnum_t copyfrom_rev; + const char * copyfrom_path; + +} change_t; + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_LIBSVN_FS_FS_H */ diff --git a/subversion/libsvn_fs_fs/fs_fs.c b/subversion/libsvn_fs_fs/fs_fs.c new file mode 100644 index 0000000..0354a1f --- /dev/null +++ b/subversion/libsvn_fs_fs/fs_fs.c @@ -0,0 +1,11469 @@ +/* fs_fs.c --- filesystem operations specific to fs_fs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <assert.h> +#include <errno.h> + +#include <apr_general.h> +#include <apr_pools.h> +#include <apr_file_io.h> +#include <apr_uuid.h> +#include <apr_lib.h> +#include <apr_md5.h> +#include <apr_sha1.h> +#include <apr_strings.h> +#include <apr_thread_mutex.h> + +#include "svn_pools.h" +#include "svn_fs.h" +#include "svn_dirent_uri.h" +#include "svn_path.h" +#include "svn_hash.h" +#include "svn_props.h" +#include "svn_sorts.h" +#include "svn_string.h" +#include "svn_time.h" +#include "svn_mergeinfo.h" +#include "svn_config.h" +#include "svn_ctype.h" +#include "svn_version.h" + +#include "fs.h" +#include "tree.h" +#include "lock.h" +#include "key-gen.h" +#include "fs_fs.h" +#include "id.h" +#include "rep-cache.h" +#include "temp_serializer.h" + +#include "private/svn_string_private.h" +#include "private/svn_fs_util.h" +#include "private/svn_subr_private.h" +#include "private/svn_delta_private.h" +#include "../libsvn_fs/fs-loader.h" + +#include "svn_private_config.h" +#include "temp_serializer.h" + +/* An arbitrary maximum path length, so clients can't run us out of memory + * by giving us arbitrarily large paths. */ +#define FSFS_MAX_PATH_LEN 4096 + +/* The default maximum number of files per directory to store in the + rev and revprops directory. The number below is somewhat arbitrary, + and can be overridden by defining the macro while compiling; the + figure of 1000 is reasonable for VFAT filesystems, which are by far + the worst performers in this area. */ +#ifndef SVN_FS_FS_DEFAULT_MAX_FILES_PER_DIR +#define SVN_FS_FS_DEFAULT_MAX_FILES_PER_DIR 1000 +#endif + +/* Begin deltification after a node history exceeded this this limit. + Useful values are 4 to 64 with 16 being a good compromise between + computational overhead and repository size savings. + Should be a power of 2. + Values < 2 will result in standard skip-delta behavior. */ +#define SVN_FS_FS_MAX_LINEAR_DELTIFICATION 16 + +/* Finding a deltification base takes operations proportional to the + number of changes being skipped. To prevent exploding runtime + during commits, limit the deltification range to this value. + Should be a power of 2 minus one. + Values < 1 disable deltification. */ +#define SVN_FS_FS_MAX_DELTIFICATION_WALK 1023 + +/* Give writing processes 10 seconds to replace an existing revprop + file with a new one. After that time, we assume that the writing + process got aborted and that we have re-read revprops. */ +#define REVPROP_CHANGE_TIMEOUT (10 * 1000000) + +/* The following are names of atomics that will be used to communicate + * revprop updates across all processes on this machine. */ +#define ATOMIC_REVPROP_GENERATION "rev-prop-generation" +#define ATOMIC_REVPROP_TIMEOUT "rev-prop-timeout" +#define ATOMIC_REVPROP_NAMESPACE "rev-prop-atomics" + +/* Following are defines that specify the textual elements of the + native filesystem directories and revision files. */ + +/* Headers used to describe node-revision in the revision file. */ +#define HEADER_ID "id" +#define HEADER_TYPE "type" +#define HEADER_COUNT "count" +#define HEADER_PROPS "props" +#define HEADER_TEXT "text" +#define HEADER_CPATH "cpath" +#define HEADER_PRED "pred" +#define HEADER_COPYFROM "copyfrom" +#define HEADER_COPYROOT "copyroot" +#define HEADER_FRESHTXNRT "is-fresh-txn-root" +#define HEADER_MINFO_HERE "minfo-here" +#define HEADER_MINFO_CNT "minfo-cnt" + +/* Kinds that a change can be. */ +#define ACTION_MODIFY "modify" +#define ACTION_ADD "add" +#define ACTION_DELETE "delete" +#define ACTION_REPLACE "replace" +#define ACTION_RESET "reset" + +/* True and False flags. */ +#define FLAG_TRUE "true" +#define FLAG_FALSE "false" + +/* Kinds that a node-rev can be. */ +#define KIND_FILE "file" +#define KIND_DIR "dir" + +/* Kinds of representation. */ +#define REP_PLAIN "PLAIN" +#define REP_DELTA "DELTA" + +/* Notes: + +To avoid opening and closing the rev-files all the time, it would +probably be advantageous to keep each rev-file open for the +lifetime of the transaction object. I'll leave that as a later +optimization for now. + +I didn't keep track of pool lifetimes at all in this code. There +are likely some errors because of that. + +*/ + +/* The vtable associated with an open transaction object. */ +static txn_vtable_t txn_vtable = { + svn_fs_fs__commit_txn, + svn_fs_fs__abort_txn, + svn_fs_fs__txn_prop, + svn_fs_fs__txn_proplist, + svn_fs_fs__change_txn_prop, + svn_fs_fs__txn_root, + svn_fs_fs__change_txn_props +}; + +/* Declarations. */ + +static svn_error_t * +read_min_unpacked_rev(svn_revnum_t *min_unpacked_rev, + const char *path, + apr_pool_t *pool); + +static svn_error_t * +update_min_unpacked_rev(svn_fs_t *fs, apr_pool_t *pool); + +static svn_error_t * +get_youngest(svn_revnum_t *youngest_p, const char *fs_path, apr_pool_t *pool); + +static svn_error_t * +verify_walker(representation_t *rep, + void *baton, + svn_fs_t *fs, + apr_pool_t *scratch_pool); + +/* Pathname helper functions */ + +/* Return TRUE is REV is packed in FS, FALSE otherwise. */ +static svn_boolean_t +is_packed_rev(svn_fs_t *fs, svn_revnum_t rev) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + return (rev < ffd->min_unpacked_rev); +} + +/* Return TRUE is REV is packed in FS, FALSE otherwise. */ +static svn_boolean_t +is_packed_revprop(svn_fs_t *fs, svn_revnum_t rev) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + /* rev 0 will not be packed */ + return (rev < ffd->min_unpacked_rev) + && (rev != 0) + && (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT); +} + +static const char * +path_format(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_FORMAT, pool); +} + +static APR_INLINE const char * +path_uuid(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_UUID, pool); +} + +const char * +svn_fs_fs__path_current(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_CURRENT, pool); +} + +static APR_INLINE const char * +path_txn_current(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_TXN_CURRENT, pool); +} + +static APR_INLINE const char * +path_txn_current_lock(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_TXN_CURRENT_LOCK, pool); +} + +static APR_INLINE const char * +path_lock(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_LOCK_FILE, pool); +} + +static const char * +path_revprop_generation(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_REVPROP_GENERATION, pool); +} + +static const char * +path_rev_packed(svn_fs_t *fs, svn_revnum_t rev, const char *kind, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + assert(ffd->max_files_per_dir); + assert(is_packed_rev(fs, rev)); + + return svn_dirent_join_many(pool, fs->path, PATH_REVS_DIR, + apr_psprintf(pool, + "%ld" PATH_EXT_PACKED_SHARD, + rev / ffd->max_files_per_dir), + kind, NULL); +} + +static const char * +path_rev_shard(svn_fs_t *fs, svn_revnum_t rev, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + assert(ffd->max_files_per_dir); + return svn_dirent_join_many(pool, fs->path, PATH_REVS_DIR, + apr_psprintf(pool, "%ld", + rev / ffd->max_files_per_dir), + NULL); +} + +static const char * +path_rev(svn_fs_t *fs, svn_revnum_t rev, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + assert(! is_packed_rev(fs, rev)); + + if (ffd->max_files_per_dir) + { + return svn_dirent_join(path_rev_shard(fs, rev, pool), + apr_psprintf(pool, "%ld", rev), + pool); + } + + return svn_dirent_join_many(pool, fs->path, PATH_REVS_DIR, + apr_psprintf(pool, "%ld", rev), NULL); +} + +svn_error_t * +svn_fs_fs__path_rev_absolute(const char **path, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT + || ! is_packed_rev(fs, rev)) + { + *path = path_rev(fs, rev, pool); + } + else + { + *path = path_rev_packed(fs, rev, PATH_PACKED, pool); + } + + return SVN_NO_ERROR; +} + +static const char * +path_revprops_shard(svn_fs_t *fs, svn_revnum_t rev, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + assert(ffd->max_files_per_dir); + return svn_dirent_join_many(pool, fs->path, PATH_REVPROPS_DIR, + apr_psprintf(pool, "%ld", + rev / ffd->max_files_per_dir), + NULL); +} + +static const char * +path_revprops_pack_shard(svn_fs_t *fs, svn_revnum_t rev, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + assert(ffd->max_files_per_dir); + return svn_dirent_join_many(pool, fs->path, PATH_REVPROPS_DIR, + apr_psprintf(pool, "%ld" PATH_EXT_PACKED_SHARD, + rev / ffd->max_files_per_dir), + NULL); +} + +static const char * +path_revprops(svn_fs_t *fs, svn_revnum_t rev, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + if (ffd->max_files_per_dir) + { + return svn_dirent_join(path_revprops_shard(fs, rev, pool), + apr_psprintf(pool, "%ld", rev), + pool); + } + + return svn_dirent_join_many(pool, fs->path, PATH_REVPROPS_DIR, + apr_psprintf(pool, "%ld", rev), NULL); +} + +static APR_INLINE const char * +path_txn_dir(svn_fs_t *fs, const char *txn_id, apr_pool_t *pool) +{ + SVN_ERR_ASSERT_NO_RETURN(txn_id != NULL); + return svn_dirent_join_many(pool, fs->path, PATH_TXNS_DIR, + apr_pstrcat(pool, txn_id, PATH_EXT_TXN, + (char *)NULL), + NULL); +} + +/* Return the name of the sha1->rep mapping file in transaction TXN_ID + * within FS for the given SHA1 checksum. Use POOL for allocations. + */ +static APR_INLINE const char * +path_txn_sha1(svn_fs_t *fs, const char *txn_id, svn_checksum_t *sha1, + apr_pool_t *pool) +{ + return svn_dirent_join(path_txn_dir(fs, txn_id, pool), + svn_checksum_to_cstring(sha1, pool), + pool); +} + +static APR_INLINE const char * +path_txn_changes(svn_fs_t *fs, const char *txn_id, apr_pool_t *pool) +{ + return svn_dirent_join(path_txn_dir(fs, txn_id, pool), PATH_CHANGES, pool); +} + +static APR_INLINE const char * +path_txn_props(svn_fs_t *fs, const char *txn_id, apr_pool_t *pool) +{ + return svn_dirent_join(path_txn_dir(fs, txn_id, pool), PATH_TXN_PROPS, pool); +} + +static APR_INLINE const char * +path_txn_next_ids(svn_fs_t *fs, const char *txn_id, apr_pool_t *pool) +{ + return svn_dirent_join(path_txn_dir(fs, txn_id, pool), PATH_NEXT_IDS, pool); +} + +static APR_INLINE const char * +path_min_unpacked_rev(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_dirent_join(fs->path, PATH_MIN_UNPACKED_REV, pool); +} + + +static APR_INLINE const char * +path_txn_proto_rev(svn_fs_t *fs, const char *txn_id, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + if (ffd->format >= SVN_FS_FS__MIN_PROTOREVS_DIR_FORMAT) + return svn_dirent_join_many(pool, fs->path, PATH_TXN_PROTOS_DIR, + apr_pstrcat(pool, txn_id, PATH_EXT_REV, + (char *)NULL), + NULL); + else + return svn_dirent_join(path_txn_dir(fs, txn_id, pool), PATH_REV, pool); +} + +static APR_INLINE const char * +path_txn_proto_rev_lock(svn_fs_t *fs, const char *txn_id, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + if (ffd->format >= SVN_FS_FS__MIN_PROTOREVS_DIR_FORMAT) + return svn_dirent_join_many(pool, fs->path, PATH_TXN_PROTOS_DIR, + apr_pstrcat(pool, txn_id, PATH_EXT_REV_LOCK, + (char *)NULL), + NULL); + else + return svn_dirent_join(path_txn_dir(fs, txn_id, pool), PATH_REV_LOCK, + pool); +} + +static const char * +path_txn_node_rev(svn_fs_t *fs, const svn_fs_id_t *id, apr_pool_t *pool) +{ + const char *txn_id = svn_fs_fs__id_txn_id(id); + const char *node_id = svn_fs_fs__id_node_id(id); + const char *copy_id = svn_fs_fs__id_copy_id(id); + const char *name = apr_psprintf(pool, PATH_PREFIX_NODE "%s.%s", + node_id, copy_id); + + return svn_dirent_join(path_txn_dir(fs, txn_id, pool), name, pool); +} + +static APR_INLINE const char * +path_txn_node_props(svn_fs_t *fs, const svn_fs_id_t *id, apr_pool_t *pool) +{ + return apr_pstrcat(pool, path_txn_node_rev(fs, id, pool), PATH_EXT_PROPS, + (char *)NULL); +} + +static APR_INLINE const char * +path_txn_node_children(svn_fs_t *fs, const svn_fs_id_t *id, apr_pool_t *pool) +{ + return apr_pstrcat(pool, path_txn_node_rev(fs, id, pool), + PATH_EXT_CHILDREN, (char *)NULL); +} + +static APR_INLINE const char * +path_node_origin(svn_fs_t *fs, const char *node_id, apr_pool_t *pool) +{ + size_t len = strlen(node_id); + const char *node_id_minus_last_char = + (len == 1) ? "0" : apr_pstrmemdup(pool, node_id, len - 1); + return svn_dirent_join_many(pool, fs->path, PATH_NODE_ORIGINS_DIR, + node_id_minus_last_char, NULL); +} + +static APR_INLINE const char * +path_and_offset_of(apr_file_t *file, apr_pool_t *pool) +{ + const char *path; + apr_off_t offset = 0; + + if (apr_file_name_get(&path, file) != APR_SUCCESS) + path = "(unknown)"; + + if (apr_file_seek(file, APR_CUR, &offset) != APR_SUCCESS) + offset = -1; + + return apr_psprintf(pool, "%s:%" APR_OFF_T_FMT, path, offset); +} + + + +/* Functions for working with shared transaction data. */ + +/* Return the transaction object for transaction TXN_ID from the + transaction list of filesystem FS (which must already be locked via the + txn_list_lock mutex). If the transaction does not exist in the list, + then create a new transaction object and return it (if CREATE_NEW is + true) or return NULL (otherwise). */ +static fs_fs_shared_txn_data_t * +get_shared_txn(svn_fs_t *fs, const char *txn_id, svn_boolean_t create_new) +{ + fs_fs_data_t *ffd = fs->fsap_data; + fs_fs_shared_data_t *ffsd = ffd->shared; + fs_fs_shared_txn_data_t *txn; + + for (txn = ffsd->txns; txn; txn = txn->next) + if (strcmp(txn->txn_id, txn_id) == 0) + break; + + if (txn || !create_new) + return txn; + + /* Use the transaction object from the (single-object) freelist, + if one is available, or otherwise create a new object. */ + if (ffsd->free_txn) + { + txn = ffsd->free_txn; + ffsd->free_txn = NULL; + } + else + { + apr_pool_t *subpool = svn_pool_create(ffsd->common_pool); + txn = apr_palloc(subpool, sizeof(*txn)); + txn->pool = subpool; + } + + assert(strlen(txn_id) < sizeof(txn->txn_id)); + apr_cpystrn(txn->txn_id, txn_id, sizeof(txn->txn_id)); + txn->being_written = FALSE; + + /* Link this transaction into the head of the list. We will typically + be dealing with only one active transaction at a time, so it makes + sense for searches through the transaction list to look at the + newest transactions first. */ + txn->next = ffsd->txns; + ffsd->txns = txn; + + return txn; +} + +/* Free the transaction object for transaction TXN_ID, and remove it + from the transaction list of filesystem FS (which must already be + locked via the txn_list_lock mutex). Do nothing if the transaction + does not exist. */ +static void +free_shared_txn(svn_fs_t *fs, const char *txn_id) +{ + fs_fs_data_t *ffd = fs->fsap_data; + fs_fs_shared_data_t *ffsd = ffd->shared; + fs_fs_shared_txn_data_t *txn, *prev = NULL; + + for (txn = ffsd->txns; txn; prev = txn, txn = txn->next) + if (strcmp(txn->txn_id, txn_id) == 0) + break; + + if (!txn) + return; + + if (prev) + prev->next = txn->next; + else + ffsd->txns = txn->next; + + /* As we typically will be dealing with one transaction after another, + we will maintain a single-object free list so that we can hopefully + keep reusing the same transaction object. */ + if (!ffsd->free_txn) + ffsd->free_txn = txn; + else + svn_pool_destroy(txn->pool); +} + + +/* Obtain a lock on the transaction list of filesystem FS, call BODY + with FS, BATON, and POOL, and then unlock the transaction list. + Return what BODY returned. */ +static svn_error_t * +with_txnlist_lock(svn_fs_t *fs, + svn_error_t *(*body)(svn_fs_t *fs, + const void *baton, + apr_pool_t *pool), + const void *baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + fs_fs_shared_data_t *ffsd = ffd->shared; + + SVN_MUTEX__WITH_LOCK(ffsd->txn_list_lock, + body(fs, baton, pool)); + + return SVN_NO_ERROR; +} + + +/* Get a lock on empty file LOCK_FILENAME, creating it in POOL. */ +static svn_error_t * +get_lock_on_filesystem(const char *lock_filename, + apr_pool_t *pool) +{ + svn_error_t *err = svn_io_file_lock2(lock_filename, TRUE, FALSE, pool); + + if (err && APR_STATUS_IS_ENOENT(err->apr_err)) + { + /* No lock file? No big deal; these are just empty files + anyway. Create it and try again. */ + svn_error_clear(err); + err = NULL; + + SVN_ERR(svn_io_file_create(lock_filename, "", pool)); + SVN_ERR(svn_io_file_lock2(lock_filename, TRUE, FALSE, pool)); + } + + return svn_error_trace(err); +} + +/* Reset the HAS_WRITE_LOCK member in the FFD given as BATON_VOID. + When registered with the pool holding the lock on the lock file, + this makes sure the flag gets reset just before we release the lock. */ +static apr_status_t +reset_lock_flag(void *baton_void) +{ + fs_fs_data_t *ffd = baton_void; + ffd->has_write_lock = FALSE; + return APR_SUCCESS; +} + +/* Obtain a write lock on the file LOCK_FILENAME (protecting with + LOCK_MUTEX if APR is threaded) in a subpool of POOL, call BODY with + BATON and that subpool, destroy the subpool (releasing the write + lock) and return what BODY returned. If IS_GLOBAL_LOCK is set, + set the HAS_WRITE_LOCK flag while we keep the write lock. */ +static svn_error_t * +with_some_lock_file(svn_fs_t *fs, + svn_error_t *(*body)(void *baton, + apr_pool_t *pool), + void *baton, + const char *lock_filename, + svn_boolean_t is_global_lock, + apr_pool_t *pool) +{ + apr_pool_t *subpool = svn_pool_create(pool); + svn_error_t *err = get_lock_on_filesystem(lock_filename, subpool); + + if (!err) + { + fs_fs_data_t *ffd = fs->fsap_data; + + if (is_global_lock) + { + /* set the "got the lock" flag and register reset function */ + apr_pool_cleanup_register(subpool, + ffd, + reset_lock_flag, + apr_pool_cleanup_null); + ffd->has_write_lock = TRUE; + } + + /* nobody else will modify the repo state + => read HEAD & pack info once */ + if (ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + SVN_ERR(update_min_unpacked_rev(fs, pool)); + SVN_ERR(get_youngest(&ffd->youngest_rev_cache, fs->path, + pool)); + err = body(baton, subpool); + } + + svn_pool_destroy(subpool); + + return svn_error_trace(err); +} + +svn_error_t * +svn_fs_fs__with_write_lock(svn_fs_t *fs, + svn_error_t *(*body)(void *baton, + apr_pool_t *pool), + void *baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + fs_fs_shared_data_t *ffsd = ffd->shared; + + SVN_MUTEX__WITH_LOCK(ffsd->fs_write_lock, + with_some_lock_file(fs, body, baton, + path_lock(fs, pool), + TRUE, + pool)); + + return SVN_NO_ERROR; +} + +/* Run BODY (with BATON and POOL) while the txn-current file + of FS is locked. */ +static svn_error_t * +with_txn_current_lock(svn_fs_t *fs, + svn_error_t *(*body)(void *baton, + apr_pool_t *pool), + void *baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + fs_fs_shared_data_t *ffsd = ffd->shared; + + SVN_MUTEX__WITH_LOCK(ffsd->txn_current_lock, + with_some_lock_file(fs, body, baton, + path_txn_current_lock(fs, pool), + FALSE, + pool)); + + return SVN_NO_ERROR; +} + +/* A structure used by unlock_proto_rev() and unlock_proto_rev_body(), + which see. */ +struct unlock_proto_rev_baton +{ + const char *txn_id; + void *lockcookie; +}; + +/* Callback used in the implementation of unlock_proto_rev(). */ +static svn_error_t * +unlock_proto_rev_body(svn_fs_t *fs, const void *baton, apr_pool_t *pool) +{ + const struct unlock_proto_rev_baton *b = baton; + const char *txn_id = b->txn_id; + apr_file_t *lockfile = b->lockcookie; + fs_fs_shared_txn_data_t *txn = get_shared_txn(fs, txn_id, FALSE); + apr_status_t apr_err; + + if (!txn) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Can't unlock unknown transaction '%s'"), + txn_id); + if (!txn->being_written) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Can't unlock nonlocked transaction '%s'"), + txn_id); + + apr_err = apr_file_unlock(lockfile); + if (apr_err) + return svn_error_wrap_apr + (apr_err, + _("Can't unlock prototype revision lockfile for transaction '%s'"), + txn_id); + apr_err = apr_file_close(lockfile); + if (apr_err) + return svn_error_wrap_apr + (apr_err, + _("Can't close prototype revision lockfile for transaction '%s'"), + txn_id); + + txn->being_written = FALSE; + + return SVN_NO_ERROR; +} + +/* Unlock the prototype revision file for transaction TXN_ID in filesystem + FS using cookie LOCKCOOKIE. The original prototype revision file must + have been closed _before_ calling this function. + + Perform temporary allocations in POOL. */ +static svn_error_t * +unlock_proto_rev(svn_fs_t *fs, const char *txn_id, void *lockcookie, + apr_pool_t *pool) +{ + struct unlock_proto_rev_baton b; + + b.txn_id = txn_id; + b.lockcookie = lockcookie; + return with_txnlist_lock(fs, unlock_proto_rev_body, &b, pool); +} + +/* Same as unlock_proto_rev(), but requires that the transaction list + lock is already held. */ +static svn_error_t * +unlock_proto_rev_list_locked(svn_fs_t *fs, const char *txn_id, + void *lockcookie, + apr_pool_t *pool) +{ + struct unlock_proto_rev_baton b; + + b.txn_id = txn_id; + b.lockcookie = lockcookie; + return unlock_proto_rev_body(fs, &b, pool); +} + +/* A structure used by get_writable_proto_rev() and + get_writable_proto_rev_body(), which see. */ +struct get_writable_proto_rev_baton +{ + apr_file_t **file; + void **lockcookie; + const char *txn_id; +}; + +/* Callback used in the implementation of get_writable_proto_rev(). */ +static svn_error_t * +get_writable_proto_rev_body(svn_fs_t *fs, const void *baton, apr_pool_t *pool) +{ + const struct get_writable_proto_rev_baton *b = baton; + apr_file_t **file = b->file; + void **lockcookie = b->lockcookie; + const char *txn_id = b->txn_id; + svn_error_t *err; + fs_fs_shared_txn_data_t *txn = get_shared_txn(fs, txn_id, TRUE); + + /* First, ensure that no thread in this process (including this one) + is currently writing to this transaction's proto-rev file. */ + if (txn->being_written) + return svn_error_createf(SVN_ERR_FS_REP_BEING_WRITTEN, NULL, + _("Cannot write to the prototype revision file " + "of transaction '%s' because a previous " + "representation is currently being written by " + "this process"), + txn_id); + + + /* We know that no thread in this process is writing to the proto-rev + file, and by extension, that no thread in this process is holding a + lock on the prototype revision lock file. It is therefore safe + for us to attempt to lock this file, to see if any other process + is holding a lock. */ + + { + apr_file_t *lockfile; + apr_status_t apr_err; + const char *lockfile_path = path_txn_proto_rev_lock(fs, txn_id, pool); + + /* Open the proto-rev lockfile, creating it if necessary, as it may + not exist if the transaction dates from before the lockfiles were + introduced. + + ### We'd also like to use something like svn_io_file_lock2(), but + that forces us to create a subpool just to be able to unlock + the file, which seems a waste. */ + SVN_ERR(svn_io_file_open(&lockfile, lockfile_path, + APR_WRITE | APR_CREATE, APR_OS_DEFAULT, pool)); + + apr_err = apr_file_lock(lockfile, + APR_FLOCK_EXCLUSIVE | APR_FLOCK_NONBLOCK); + if (apr_err) + { + svn_error_clear(svn_io_file_close(lockfile, pool)); + + if (APR_STATUS_IS_EAGAIN(apr_err)) + return svn_error_createf(SVN_ERR_FS_REP_BEING_WRITTEN, NULL, + _("Cannot write to the prototype revision " + "file of transaction '%s' because a " + "previous representation is currently " + "being written by another process"), + txn_id); + + return svn_error_wrap_apr(apr_err, + _("Can't get exclusive lock on file '%s'"), + svn_dirent_local_style(lockfile_path, pool)); + } + + *lockcookie = lockfile; + } + + /* We've successfully locked the transaction; mark it as such. */ + txn->being_written = TRUE; + + + /* Now open the prototype revision file and seek to the end. */ + err = svn_io_file_open(file, path_txn_proto_rev(fs, txn_id, pool), + APR_WRITE | APR_BUFFERED, APR_OS_DEFAULT, pool); + + /* You might expect that we could dispense with the following seek + and achieve the same thing by opening the file using APR_APPEND. + Unfortunately, APR's buffered file implementation unconditionally + places its initial file pointer at the start of the file (even for + files opened with APR_APPEND), so we need this seek to reconcile + the APR file pointer to the OS file pointer (since we need to be + able to read the current file position later). */ + if (!err) + { + apr_off_t offset = 0; + err = svn_io_file_seek(*file, APR_END, &offset, pool); + } + + if (err) + { + err = svn_error_compose_create( + err, + unlock_proto_rev_list_locked(fs, txn_id, *lockcookie, pool)); + + *lockcookie = NULL; + } + + return svn_error_trace(err); +} + +/* Get a handle to the prototype revision file for transaction TXN_ID in + filesystem FS, and lock it for writing. Return FILE, a file handle + positioned at the end of the file, and LOCKCOOKIE, a cookie that + should be passed to unlock_proto_rev() to unlock the file once FILE + has been closed. + + If the prototype revision file is already locked, return error + SVN_ERR_FS_REP_BEING_WRITTEN. + + Perform all allocations in POOL. */ +static svn_error_t * +get_writable_proto_rev(apr_file_t **file, + void **lockcookie, + svn_fs_t *fs, const char *txn_id, + apr_pool_t *pool) +{ + struct get_writable_proto_rev_baton b; + + b.file = file; + b.lockcookie = lockcookie; + b.txn_id = txn_id; + + return with_txnlist_lock(fs, get_writable_proto_rev_body, &b, pool); +} + +/* Callback used in the implementation of purge_shared_txn(). */ +static svn_error_t * +purge_shared_txn_body(svn_fs_t *fs, const void *baton, apr_pool_t *pool) +{ + const char *txn_id = baton; + + free_shared_txn(fs, txn_id); + svn_fs_fs__reset_txn_caches(fs); + + return SVN_NO_ERROR; +} + +/* Purge the shared data for transaction TXN_ID in filesystem FS. + Perform all allocations in POOL. */ +static svn_error_t * +purge_shared_txn(svn_fs_t *fs, const char *txn_id, apr_pool_t *pool) +{ + return with_txnlist_lock(fs, purge_shared_txn_body, txn_id, pool); +} + + + +/* Fetch the current offset of FILE into *OFFSET_P. */ +static svn_error_t * +get_file_offset(apr_off_t *offset_p, apr_file_t *file, apr_pool_t *pool) +{ + apr_off_t offset; + + /* Note that, for buffered files, one (possibly surprising) side-effect + of this call is to flush any unwritten data to disk. */ + offset = 0; + SVN_ERR(svn_io_file_seek(file, APR_CUR, &offset, pool)); + *offset_p = offset; + + return SVN_NO_ERROR; +} + + +/* Check that BUF, a nul-terminated buffer of text from file PATH, + contains only digits at OFFSET and beyond, raising an error if not. + TITLE contains a user-visible description of the file, usually the + short file name. + + Uses POOL for temporary allocation. */ +static svn_error_t * +check_file_buffer_numeric(const char *buf, apr_off_t offset, + const char *path, const char *title, + apr_pool_t *pool) +{ + const char *p; + + for (p = buf + offset; *p; p++) + if (!svn_ctype_isdigit(*p)) + return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, + _("%s file '%s' contains unexpected non-digit '%c' within '%s'"), + title, svn_dirent_local_style(path, pool), *p, buf); + + return SVN_NO_ERROR; +} + +/* Check that BUF, a nul-terminated buffer of text from format file PATH, + contains only digits at OFFSET and beyond, raising an error if not. + + Uses POOL for temporary allocation. */ +static svn_error_t * +check_format_file_buffer_numeric(const char *buf, apr_off_t offset, + const char *path, apr_pool_t *pool) +{ + return check_file_buffer_numeric(buf, offset, path, "Format", pool); +} + +/* Read the format number and maximum number of files per directory + from PATH and return them in *PFORMAT and *MAX_FILES_PER_DIR + respectively. + + *MAX_FILES_PER_DIR is obtained from the 'layout' format option, and + will be set to zero if a linear scheme should be used. + + Use POOL for temporary allocation. */ +static svn_error_t * +read_format(int *pformat, int *max_files_per_dir, + const char *path, apr_pool_t *pool) +{ + svn_error_t *err; + svn_stream_t *stream; + svn_stringbuf_t *content; + svn_stringbuf_t *buf; + svn_boolean_t eos = FALSE; + + err = svn_stringbuf_from_file2(&content, path, pool); + if (err && APR_STATUS_IS_ENOENT(err->apr_err)) + { + /* Treat an absent format file as format 1. Do not try to + create the format file on the fly, because the repository + might be read-only for us, or this might be a read-only + operation, and the spirit of FSFS is to make no changes + whatseover in read-only operations. See thread starting at + http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=97600 + for more. */ + svn_error_clear(err); + *pformat = 1; + *max_files_per_dir = 0; + + return SVN_NO_ERROR; + } + SVN_ERR(err); + + stream = svn_stream_from_stringbuf(content, pool); + SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eos, pool)); + if (buf->len == 0 && eos) + { + /* Return a more useful error message. */ + return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, + _("Can't read first line of format file '%s'"), + svn_dirent_local_style(path, pool)); + } + + /* Check that the first line contains only digits. */ + SVN_ERR(check_format_file_buffer_numeric(buf->data, 0, path, pool)); + SVN_ERR(svn_cstring_atoi(pformat, buf->data)); + + /* Set the default values for anything that can be set via an option. */ + *max_files_per_dir = 0; + + /* Read any options. */ + while (!eos) + { + SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eos, pool)); + if (buf->len == 0) + break; + + if (*pformat >= SVN_FS_FS__MIN_LAYOUT_FORMAT_OPTION_FORMAT && + strncmp(buf->data, "layout ", 7) == 0) + { + if (strcmp(buf->data + 7, "linear") == 0) + { + *max_files_per_dir = 0; + continue; + } + + if (strncmp(buf->data + 7, "sharded ", 8) == 0) + { + /* Check that the argument is numeric. */ + SVN_ERR(check_format_file_buffer_numeric(buf->data, 15, path, pool)); + SVN_ERR(svn_cstring_atoi(max_files_per_dir, buf->data + 15)); + continue; + } + } + + return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, + _("'%s' contains invalid filesystem format option '%s'"), + svn_dirent_local_style(path, pool), buf->data); + } + + return SVN_NO_ERROR; +} + +/* Write the format number and maximum number of files per directory + to a new format file in PATH, possibly expecting to overwrite a + previously existing file. + + Use POOL for temporary allocation. */ +static svn_error_t * +write_format(const char *path, int format, int max_files_per_dir, + svn_boolean_t overwrite, apr_pool_t *pool) +{ + svn_stringbuf_t *sb; + + SVN_ERR_ASSERT(1 <= format && format <= SVN_FS_FS__FORMAT_NUMBER); + + sb = svn_stringbuf_createf(pool, "%d\n", format); + + if (format >= SVN_FS_FS__MIN_LAYOUT_FORMAT_OPTION_FORMAT) + { + if (max_files_per_dir) + svn_stringbuf_appendcstr(sb, apr_psprintf(pool, "layout sharded %d\n", + max_files_per_dir)); + else + svn_stringbuf_appendcstr(sb, "layout linear\n"); + } + + /* svn_io_write_version_file() does a load of magic to allow it to + replace version files that already exist. We only need to do + that when we're allowed to overwrite an existing file. */ + if (! overwrite) + { + /* Create the file */ + SVN_ERR(svn_io_file_create(path, sb->data, pool)); + } + else + { + const char *path_tmp; + + SVN_ERR(svn_io_write_unique(&path_tmp, + svn_dirent_dirname(path, pool), + sb->data, sb->len, + svn_io_file_del_none, pool)); + + /* rename the temp file as the real destination */ + SVN_ERR(svn_io_file_rename(path_tmp, path, pool)); + } + + /* And set the perms to make it read only */ + return svn_io_set_file_read_only(path, FALSE, pool); +} + +/* Return the error SVN_ERR_FS_UNSUPPORTED_FORMAT if FS's format + number is not the same as a format number supported by this + Subversion. */ +static svn_error_t * +check_format(int format) +{ + /* Blacklist. These formats may be either younger or older than + SVN_FS_FS__FORMAT_NUMBER, but we don't support them. */ + if (format == SVN_FS_FS__PACKED_REVPROP_SQLITE_DEV_FORMAT) + return svn_error_createf(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, + _("Found format '%d', only created by " + "unreleased dev builds; see " + "http://subversion.apache.org" + "/docs/release-notes/1.7#revprop-packing"), + format); + + /* We support all formats from 1-current simultaneously */ + if (1 <= format && format <= SVN_FS_FS__FORMAT_NUMBER) + return SVN_NO_ERROR; + + return svn_error_createf(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, + _("Expected FS format between '1' and '%d'; found format '%d'"), + SVN_FS_FS__FORMAT_NUMBER, format); +} + +svn_boolean_t +svn_fs_fs__fs_supports_mergeinfo(svn_fs_t *fs) +{ + fs_fs_data_t *ffd = fs->fsap_data; + return ffd->format >= SVN_FS_FS__MIN_MERGEINFO_FORMAT; +} + +/* Read the configuration information of the file system at FS_PATH + * and set the respective values in FFD. Use POOL for allocations. + */ +static svn_error_t * +read_config(fs_fs_data_t *ffd, + const char *fs_path, + apr_pool_t *pool) +{ + SVN_ERR(svn_config_read3(&ffd->config, + svn_dirent_join(fs_path, PATH_CONFIG, pool), + FALSE, FALSE, FALSE, pool)); + + /* Initialize ffd->rep_sharing_allowed. */ + if (ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT) + SVN_ERR(svn_config_get_bool(ffd->config, &ffd->rep_sharing_allowed, + CONFIG_SECTION_REP_SHARING, + CONFIG_OPTION_ENABLE_REP_SHARING, TRUE)); + else + ffd->rep_sharing_allowed = FALSE; + + /* Initialize deltification settings in ffd. */ + if (ffd->format >= SVN_FS_FS__MIN_DELTIFICATION_FORMAT) + { + SVN_ERR(svn_config_get_bool(ffd->config, &ffd->deltify_directories, + CONFIG_SECTION_DELTIFICATION, + CONFIG_OPTION_ENABLE_DIR_DELTIFICATION, + FALSE)); + SVN_ERR(svn_config_get_bool(ffd->config, &ffd->deltify_properties, + CONFIG_SECTION_DELTIFICATION, + CONFIG_OPTION_ENABLE_PROPS_DELTIFICATION, + FALSE)); + SVN_ERR(svn_config_get_int64(ffd->config, &ffd->max_deltification_walk, + CONFIG_SECTION_DELTIFICATION, + CONFIG_OPTION_MAX_DELTIFICATION_WALK, + SVN_FS_FS_MAX_DELTIFICATION_WALK)); + SVN_ERR(svn_config_get_int64(ffd->config, &ffd->max_linear_deltification, + CONFIG_SECTION_DELTIFICATION, + CONFIG_OPTION_MAX_LINEAR_DELTIFICATION, + SVN_FS_FS_MAX_LINEAR_DELTIFICATION)); + } + else + { + ffd->deltify_directories = FALSE; + ffd->deltify_properties = FALSE; + ffd->max_deltification_walk = SVN_FS_FS_MAX_DELTIFICATION_WALK; + ffd->max_linear_deltification = SVN_FS_FS_MAX_LINEAR_DELTIFICATION; + } + + /* Initialize revprop packing settings in ffd. */ + if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT) + { + SVN_ERR(svn_config_get_bool(ffd->config, &ffd->compress_packed_revprops, + CONFIG_SECTION_PACKED_REVPROPS, + CONFIG_OPTION_COMPRESS_PACKED_REVPROPS, + FALSE)); + SVN_ERR(svn_config_get_int64(ffd->config, &ffd->revprop_pack_size, + CONFIG_SECTION_PACKED_REVPROPS, + CONFIG_OPTION_REVPROP_PACK_SIZE, + ffd->compress_packed_revprops + ? 0x100 + : 0x40)); + + ffd->revprop_pack_size *= 1024; + } + else + { + ffd->revprop_pack_size = 0x10000; + ffd->compress_packed_revprops = FALSE; + } + + return SVN_NO_ERROR; +} + +static svn_error_t * +write_config(svn_fs_t *fs, + apr_pool_t *pool) +{ +#define NL APR_EOL_STR + static const char * const fsfs_conf_contents = +"### This file controls the configuration of the FSFS filesystem." NL +"" NL +"[" SVN_CACHE_CONFIG_CATEGORY_MEMCACHED_SERVERS "]" NL +"### These options name memcached servers used to cache internal FSFS" NL +"### data. See http://www.danga.com/memcached/ for more information on" NL +"### memcached. To use memcached with FSFS, run one or more memcached" NL +"### servers, and specify each of them as an option like so:" NL +"# first-server = 127.0.0.1:11211" NL +"# remote-memcached = mymemcached.corp.example.com:11212" NL +"### The option name is ignored; the value is of the form HOST:PORT." NL +"### memcached servers can be shared between multiple repositories;" NL +"### however, if you do this, you *must* ensure that repositories have" NL +"### distinct UUIDs and paths, or else cached data from one repository" NL +"### might be used by another accidentally. Note also that memcached has" NL +"### no authentication for reads or writes, so you must ensure that your" NL +"### memcached servers are only accessible by trusted users." NL +"" NL +"[" CONFIG_SECTION_CACHES "]" NL +"### When a cache-related error occurs, normally Subversion ignores it" NL +"### and continues, logging an error if the server is appropriately" NL +"### configured (and ignoring it with file:// access). To make" NL +"### Subversion never ignore cache errors, uncomment this line." NL +"# " CONFIG_OPTION_FAIL_STOP " = true" NL +"" NL +"[" CONFIG_SECTION_REP_SHARING "]" NL +"### To conserve space, the filesystem can optionally avoid storing" NL +"### duplicate representations. This comes at a slight cost in" NL +"### performance, as maintaining a database of shared representations can" NL +"### increase commit times. The space savings are dependent upon the size" NL +"### of the repository, the number of objects it contains and the amount of" NL +"### duplication between them, usually a function of the branching and" NL +"### merging process." NL +"###" NL +"### The following parameter enables rep-sharing in the repository. It can" NL +"### be switched on and off at will, but for best space-saving results" NL +"### should be enabled consistently over the life of the repository." NL +"### 'svnadmin verify' will check the rep-cache regardless of this setting." NL +"### rep-sharing is enabled by default." NL +"# " CONFIG_OPTION_ENABLE_REP_SHARING " = true" NL +"" NL +"[" CONFIG_SECTION_DELTIFICATION "]" NL +"### To conserve space, the filesystem stores data as differences against" NL +"### existing representations. This comes at a slight cost in performance," NL +"### as calculating differences can increase commit times. Reading data" NL +"### will also create higher CPU load and the data will be fragmented." NL +"### Since deltification tends to save significant amounts of disk space," NL +"### the overall I/O load can actually be lower." NL +"###" NL +"### The options in this section allow for tuning the deltification" NL +"### strategy. Their effects on data size and server performance may vary" NL +"### from one repository to another. Versions prior to 1.8 will ignore" NL +"### this section." NL +"###" NL +"### The following parameter enables deltification for directories. It can" NL +"### be switched on and off at will, but for best space-saving results" NL +"### should be enabled consistently over the life of the repository." NL +"### Repositories containing large directories will benefit greatly." NL +"### In rarely read repositories, the I/O overhead may be significant as" NL +"### cache hit rates will most likely be low" NL +"### directory deltification is disabled by default." NL +"# " CONFIG_OPTION_ENABLE_DIR_DELTIFICATION " = false" NL +"###" NL +"### The following parameter enables deltification for properties on files" NL +"### and directories. Overall, this is a minor tuning option but can save" NL +"### some disk space if you merge frequently or frequently change node" NL +"### properties. You should not activate this if rep-sharing has been" NL +"### disabled because this may result in a net increase in repository size." NL +"### property deltification is disabled by default." NL +"# " CONFIG_OPTION_ENABLE_PROPS_DELTIFICATION " = false" NL +"###" NL +"### During commit, the server may need to walk the whole change history of" NL +"### of a given node to find a suitable deltification base. This linear" NL +"### process can impact commit times, svnadmin load and similar operations." NL +"### This setting limits the depth of the deltification history. If the" NL +"### threshold has been reached, the node will be stored as fulltext and a" NL +"### new deltification history begins." NL +"### Note, this is unrelated to svn log." NL +"### Very large values rarely provide significant additional savings but" NL +"### can impact performance greatly - in particular if directory" NL +"### deltification has been activated. Very small values may be useful in" NL +"### repositories that are dominated by large, changing binaries." NL +"### Should be a power of two minus 1. A value of 0 will effectively" NL +"### disable deltification." NL +"### For 1.8, the default value is 1023; earlier versions have no limit." NL +"# " CONFIG_OPTION_MAX_DELTIFICATION_WALK " = 1023" NL +"###" NL +"### The skip-delta scheme used by FSFS tends to repeatably store redundant" NL +"### delta information where a simple delta against the latest version is" NL +"### often smaller. By default, 1.8+ will therefore use skip deltas only" NL +"### after the linear chain of deltas has grown beyond the threshold" NL +"### specified by this setting." NL +"### Values up to 64 can result in some reduction in repository size for" NL +"### the cost of quickly increasing I/O and CPU costs. Similarly, smaller" NL +"### numbers can reduce those costs at the cost of more disk space. For" NL +"### rarely read repositories or those containing larger binaries, this may" NL +"### present a better trade-off." NL +"### Should be a power of two. A value of 1 or smaller will cause the" NL +"### exclusive use of skip-deltas (as in pre-1.8)." NL +"### For 1.8, the default value is 16; earlier versions use 1." NL +"# " CONFIG_OPTION_MAX_LINEAR_DELTIFICATION " = 16" NL +"" NL +"[" CONFIG_SECTION_PACKED_REVPROPS "]" NL +"### This parameter controls the size (in kBytes) of packed revprop files." NL +"### Revprops of consecutive revisions will be concatenated into a single" NL +"### file up to but not exceeding the threshold given here. However, each" NL +"### pack file may be much smaller and revprops of a single revision may be" NL +"### much larger than the limit set here. The threshold will be applied" NL +"### before optional compression takes place." NL +"### Large values will reduce disk space usage at the expense of increased" NL +"### latency and CPU usage reading and changing individual revprops. They" NL +"### become an advantage when revprop caching has been enabled because a" NL +"### lot of data can be read in one go. Values smaller than 4 kByte will" NL +"### not improve latency any further and quickly render revprop packing" NL +"### ineffective." NL +"### revprop-pack-size is 64 kBytes by default for non-compressed revprop" NL +"### pack files and 256 kBytes when compression has been enabled." NL +"# " CONFIG_OPTION_REVPROP_PACK_SIZE " = 64" NL +"###" NL +"### To save disk space, packed revprop files may be compressed. Standard" NL +"### revprops tend to allow for very effective compression. Reading and" NL +"### even more so writing, become significantly more CPU intensive. With" NL +"### revprop caching enabled, the overhead can be offset by reduced I/O" NL +"### unless you often modify revprops after packing." NL +"### Compressing packed revprops is disabled by default." NL +"# " CONFIG_OPTION_COMPRESS_PACKED_REVPROPS " = false" NL +; +#undef NL + return svn_io_file_create(svn_dirent_join(fs->path, PATH_CONFIG, pool), + fsfs_conf_contents, pool); +} + +static svn_error_t * +read_min_unpacked_rev(svn_revnum_t *min_unpacked_rev, + const char *path, + apr_pool_t *pool) +{ + char buf[80]; + apr_file_t *file; + apr_size_t len; + + SVN_ERR(svn_io_file_open(&file, path, APR_READ | APR_BUFFERED, + APR_OS_DEFAULT, pool)); + len = sizeof(buf); + SVN_ERR(svn_io_read_length_line(file, buf, &len, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + + *min_unpacked_rev = SVN_STR_TO_REV(buf); + return SVN_NO_ERROR; +} + +static svn_error_t * +update_min_unpacked_rev(svn_fs_t *fs, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT); + + return read_min_unpacked_rev(&ffd->min_unpacked_rev, + path_min_unpacked_rev(fs, pool), + pool); +} + +svn_error_t * +svn_fs_fs__open(svn_fs_t *fs, const char *path, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_file_t *uuid_file; + int format, max_files_per_dir; + char buf[APR_UUID_FORMATTED_LENGTH + 2]; + apr_size_t limit; + + fs->path = apr_pstrdup(fs->pool, path); + + /* Read the FS format number. */ + SVN_ERR(read_format(&format, &max_files_per_dir, + path_format(fs, pool), pool)); + SVN_ERR(check_format(format)); + + /* Now we've got a format number no matter what. */ + ffd->format = format; + ffd->max_files_per_dir = max_files_per_dir; + + /* Read in and cache the repository uuid. */ + SVN_ERR(svn_io_file_open(&uuid_file, path_uuid(fs, pool), + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + limit = sizeof(buf); + SVN_ERR(svn_io_read_length_line(uuid_file, buf, &limit, pool)); + fs->uuid = apr_pstrdup(fs->pool, buf); + + SVN_ERR(svn_io_file_close(uuid_file, pool)); + + /* Read the min unpacked revision. */ + if (ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + SVN_ERR(update_min_unpacked_rev(fs, pool)); + + /* Read the configuration file. */ + SVN_ERR(read_config(ffd, fs->path, pool)); + + return get_youngest(&(ffd->youngest_rev_cache), path, pool); +} + +/* Wrapper around svn_io_file_create which ignores EEXIST. */ +static svn_error_t * +create_file_ignore_eexist(const char *file, + const char *contents, + apr_pool_t *pool) +{ + svn_error_t *err = svn_io_file_create(file, contents, pool); + if (err && APR_STATUS_IS_EEXIST(err->apr_err)) + { + svn_error_clear(err); + err = SVN_NO_ERROR; + } + return svn_error_trace(err); +} + +/* forward declarations */ + +static svn_error_t * +pack_revprops_shard(const char *pack_file_dir, + const char *shard_path, + apr_int64_t shard, + int max_files_per_dir, + apr_off_t max_pack_size, + int compression_level, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *scratch_pool); + +static svn_error_t * +delete_revprops_shard(const char *shard_path, + apr_int64_t shard, + int max_files_per_dir, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *scratch_pool); + +/* In the filesystem FS, pack all revprop shards up to min_unpacked_rev. + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +upgrade_pack_revprops(svn_fs_t *fs, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + const char *revprops_shard_path; + const char *revprops_pack_file_dir; + apr_int64_t shard; + apr_int64_t first_unpacked_shard + = ffd->min_unpacked_rev / ffd->max_files_per_dir; + + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + const char *revsprops_dir = svn_dirent_join(fs->path, PATH_REVPROPS_DIR, + scratch_pool); + int compression_level = ffd->compress_packed_revprops + ? SVN_DELTA_COMPRESSION_LEVEL_DEFAULT + : SVN_DELTA_COMPRESSION_LEVEL_NONE; + + /* first, pack all revprops shards to match the packed revision shards */ + for (shard = 0; shard < first_unpacked_shard; ++shard) + { + revprops_pack_file_dir = svn_dirent_join(revsprops_dir, + apr_psprintf(iterpool, + "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, + shard), + iterpool); + revprops_shard_path = svn_dirent_join(revsprops_dir, + apr_psprintf(iterpool, "%" APR_INT64_T_FMT, shard), + iterpool); + + SVN_ERR(pack_revprops_shard(revprops_pack_file_dir, revprops_shard_path, + shard, ffd->max_files_per_dir, + (int)(0.9 * ffd->revprop_pack_size), + compression_level, + NULL, NULL, iterpool)); + svn_pool_clear(iterpool); + } + + /* delete the non-packed revprops shards afterwards */ + for (shard = 0; shard < first_unpacked_shard; ++shard) + { + revprops_shard_path = svn_dirent_join(revsprops_dir, + apr_psprintf(iterpool, "%" APR_INT64_T_FMT, shard), + iterpool); + SVN_ERR(delete_revprops_shard(revprops_shard_path, + shard, ffd->max_files_per_dir, + NULL, NULL, iterpool)); + svn_pool_clear(iterpool); + } + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +static svn_error_t * +upgrade_body(void *baton, apr_pool_t *pool) +{ + svn_fs_t *fs = baton; + int format, max_files_per_dir; + const char *format_path = path_format(fs, pool); + svn_node_kind_t kind; + + /* Read the FS format number and max-files-per-dir setting. */ + SVN_ERR(read_format(&format, &max_files_per_dir, format_path, pool)); + SVN_ERR(check_format(format)); + + /* If the config file does not exist, create one. */ + SVN_ERR(svn_io_check_path(svn_dirent_join(fs->path, PATH_CONFIG, pool), + &kind, pool)); + switch (kind) + { + case svn_node_none: + SVN_ERR(write_config(fs, pool)); + break; + case svn_node_file: + break; + default: + return svn_error_createf(SVN_ERR_FS_GENERAL, NULL, + _("'%s' is not a regular file." + " Please move it out of " + "the way and try again"), + svn_dirent_join(fs->path, PATH_CONFIG, pool)); + } + + /* If we're already up-to-date, there's nothing else to be done here. */ + if (format == SVN_FS_FS__FORMAT_NUMBER) + return SVN_NO_ERROR; + + /* If our filesystem predates the existance of the 'txn-current + file', make that file and its corresponding lock file. */ + if (format < SVN_FS_FS__MIN_TXN_CURRENT_FORMAT) + { + SVN_ERR(create_file_ignore_eexist(path_txn_current(fs, pool), "0\n", + pool)); + SVN_ERR(create_file_ignore_eexist(path_txn_current_lock(fs, pool), "", + pool)); + } + + /* If our filesystem predates the existance of the 'txn-protorevs' + dir, make that directory. */ + if (format < SVN_FS_FS__MIN_PROTOREVS_DIR_FORMAT) + { + /* We don't use path_txn_proto_rev() here because it expects + we've already bumped our format. */ + SVN_ERR(svn_io_make_dir_recursively( + svn_dirent_join(fs->path, PATH_TXN_PROTOS_DIR, pool), pool)); + } + + /* If our filesystem is new enough, write the min unpacked rev file. */ + if (format < SVN_FS_FS__MIN_PACKED_FORMAT) + SVN_ERR(svn_io_file_create(path_min_unpacked_rev(fs, pool), "0\n", pool)); + + /* If the file system supports revision packing but not revprop packing, + pack the revprops up to the point that revision data has been packed. */ + if ( format >= SVN_FS_FS__MIN_PACKED_FORMAT + && format < SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT) + SVN_ERR(upgrade_pack_revprops(fs, pool)); + + /* Bump the format file. */ + return write_format(format_path, SVN_FS_FS__FORMAT_NUMBER, max_files_per_dir, + TRUE, pool); +} + + +svn_error_t * +svn_fs_fs__upgrade(svn_fs_t *fs, apr_pool_t *pool) +{ + return svn_fs_fs__with_write_lock(fs, upgrade_body, (void *)fs, pool); +} + + +/* Functions for dealing with recoverable errors on mutable files + * + * Revprops, current, and txn-current files are mutable; that is, they + * change as part of normal fsfs operation, in constrat to revs files, or + * the format file, which are written once at create (or upgrade) time. + * When more than one host writes to the same repository, we will + * sometimes see these recoverable errors when accesssing these files. + * + * These errors all relate to NFS, and thus we only use this retry code if + * ESTALE is defined. + * + ** ESTALE + * + * In NFS v3 and under, the server doesn't track opened files. If you + * unlink(2) or rename(2) a file held open by another process *on the + * same host*, that host's kernel typically renames the file to + * .nfsXXXX and automatically deletes that when it's no longer open, + * but this behavior is not required. + * + * For obvious reasons, this does not work *across hosts*. No one + * knows about the opened file; not the server, and not the deleting + * client. So the file vanishes, and the reader gets stale NFS file + * handle. + * + ** EIO, ENOENT + * + * Some client implementations (at least the 2.6.18.5 kernel that ships + * with Ubuntu Dapper) sometimes give spurious ENOENT (only on open) or + * even EIO errors when trying to read these files that have been renamed + * over on some other host. + * + ** Solution + * + * Try open and read of such files in try_stringbuf_from_file(). Call + * this function within a loop of RECOVERABLE_RETRY_COUNT iterations + * (though, realistically, the second try will succeed). + */ + +#define RECOVERABLE_RETRY_COUNT 10 + +/* Read the file at PATH and return its content in *CONTENT. *CONTENT will + * not be modified unless the whole file was read successfully. + * + * ESTALE, EIO and ENOENT will not cause this function to return an error + * unless LAST_ATTEMPT has been set. If MISSING is not NULL, indicate + * missing files (ENOENT) there. + * + * Use POOL for allocations. + */ +static svn_error_t * +try_stringbuf_from_file(svn_stringbuf_t **content, + svn_boolean_t *missing, + const char *path, + svn_boolean_t last_attempt, + apr_pool_t *pool) +{ + svn_error_t *err = svn_stringbuf_from_file2(content, path, pool); + if (missing) + *missing = FALSE; + + if (err) + { + *content = NULL; + + if (APR_STATUS_IS_ENOENT(err->apr_err)) + { + if (!last_attempt) + { + svn_error_clear(err); + if (missing) + *missing = TRUE; + return SVN_NO_ERROR; + } + } +#ifdef ESTALE + else if (APR_TO_OS_ERROR(err->apr_err) == ESTALE + || APR_TO_OS_ERROR(err->apr_err) == EIO) + { + if (!last_attempt) + { + svn_error_clear(err); + return SVN_NO_ERROR; + } + } +#endif + } + + return svn_error_trace(err); +} + +/* Read the 'current' file FNAME and store the contents in *BUF. + Allocations are performed in POOL. */ +static svn_error_t * +read_content(svn_stringbuf_t **content, const char *fname, apr_pool_t *pool) +{ + int i; + *content = NULL; + + for (i = 0; !*content && (i < RECOVERABLE_RETRY_COUNT); ++i) + SVN_ERR(try_stringbuf_from_file(content, NULL, + fname, i + 1 < RECOVERABLE_RETRY_COUNT, + pool)); + + if (!*content) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Can't read '%s'"), + svn_dirent_local_style(fname, pool)); + + return SVN_NO_ERROR; +} + +/* Find the youngest revision in a repository at path FS_PATH and + return it in *YOUNGEST_P. Perform temporary allocations in + POOL. */ +static svn_error_t * +get_youngest(svn_revnum_t *youngest_p, + const char *fs_path, + apr_pool_t *pool) +{ + svn_stringbuf_t *buf; + SVN_ERR(read_content(&buf, svn_dirent_join(fs_path, PATH_CURRENT, pool), + pool)); + + *youngest_p = SVN_STR_TO_REV(buf->data); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__youngest_rev(svn_revnum_t *youngest_p, + svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + SVN_ERR(get_youngest(youngest_p, fs->path, pool)); + ffd->youngest_rev_cache = *youngest_p; + + return SVN_NO_ERROR; +} + +/* Given a revision file FILE that has been pre-positioned at the + beginning of a Node-Rev header block, read in that header block and + store it in the apr_hash_t HEADERS. All allocations will be from + POOL. */ +static svn_error_t * read_header_block(apr_hash_t **headers, + svn_stream_t *stream, + apr_pool_t *pool) +{ + *headers = apr_hash_make(pool); + + while (1) + { + svn_stringbuf_t *header_str; + const char *name, *value; + apr_size_t i = 0; + svn_boolean_t eof; + + SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool)); + + if (eof || header_str->len == 0) + break; /* end of header block */ + + while (header_str->data[i] != ':') + { + if (header_str->data[i] == '\0') + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Found malformed header '%s' in " + "revision file"), + header_str->data); + i++; + } + + /* Create a 'name' string and point to it. */ + header_str->data[i] = '\0'; + name = header_str->data; + + /* Skip over the NULL byte and the space following it. */ + i += 2; + + if (i > header_str->len) + { + /* Restore the original line for the error. */ + i -= 2; + header_str->data[i] = ':'; + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Found malformed header '%s' in " + "revision file"), + header_str->data); + } + + value = header_str->data + i; + + /* header_str is safely in our pool, so we can use bits of it as + key and value. */ + svn_hash_sets(*headers, name, value); + } + + return SVN_NO_ERROR; +} + +/* Return SVN_ERR_FS_NO_SUCH_REVISION if the given revision is newer + than the current youngest revision or is simply not a valid + revision number, else return success. + + FSFS is based around the concept that commits only take effect when + the number in "current" is bumped. Thus if there happens to be a rev + or revprops file installed for a revision higher than the one recorded + in "current" (because a commit failed between installing the rev file + and bumping "current", or because an administrator rolled back the + repository by resetting "current" without deleting rev files, etc), it + ought to be completely ignored. This function provides the check + by which callers can make that decision. */ +static svn_error_t * +ensure_revision_exists(svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + if (! SVN_IS_VALID_REVNUM(rev)) + return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("Invalid revision number '%ld'"), rev); + + + /* Did the revision exist the last time we checked the current + file? */ + if (rev <= ffd->youngest_rev_cache) + return SVN_NO_ERROR; + + SVN_ERR(get_youngest(&(ffd->youngest_rev_cache), fs->path, pool)); + + /* Check again. */ + if (rev <= ffd->youngest_rev_cache) + return SVN_NO_ERROR; + + return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("No such revision %ld"), rev); +} + +svn_error_t * +svn_fs_fs__revision_exists(svn_revnum_t rev, + svn_fs_t *fs, + apr_pool_t *pool) +{ + /* Different order of parameters. */ + SVN_ERR(ensure_revision_exists(fs, rev, pool)); + return SVN_NO_ERROR; +} + +/* Open the correct revision file for REV. If the filesystem FS has + been packed, *FILE will be set to the packed file; otherwise, set *FILE + to the revision file for REV. Return SVN_ERR_FS_NO_SUCH_REVISION if the + file doesn't exist. + + TODO: Consider returning an indication of whether this is a packed rev + file, so the caller need not rely on is_packed_rev() which in turn + relies on the cached FFD->min_unpacked_rev value not having changed + since the rev file was opened. + + Use POOL for allocations. */ +static svn_error_t * +open_pack_or_rev_file(apr_file_t **file, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_error_t *err; + const char *path; + svn_boolean_t retry = FALSE; + + do + { + err = svn_fs_fs__path_rev_absolute(&path, fs, rev, pool); + + /* open the revision file in buffered r/o mode */ + if (! err) + err = svn_io_file_open(file, path, + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool); + + if (err && APR_STATUS_IS_ENOENT(err->apr_err)) + { + if (ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + { + /* Could not open the file. This may happen if the + * file once existed but got packed later. */ + svn_error_clear(err); + + /* if that was our 2nd attempt, leave it at that. */ + if (retry) + return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("No such revision %ld"), rev); + + /* We failed for the first time. Refresh cache & retry. */ + SVN_ERR(update_min_unpacked_rev(fs, pool)); + + retry = TRUE; + } + else + { + svn_error_clear(err); + return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("No such revision %ld"), rev); + } + } + else + { + retry = FALSE; + } + } + while (retry); + + return svn_error_trace(err); +} + +/* Reads a line from STREAM and converts it to a 64 bit integer to be + * returned in *RESULT. If we encounter eof, set *HIT_EOF and leave + * *RESULT unchanged. If HIT_EOF is NULL, EOF causes an "corrupt FS" + * error return. + * SCRATCH_POOL is used for temporary allocations. + */ +static svn_error_t * +read_number_from_stream(apr_int64_t *result, + svn_boolean_t *hit_eof, + svn_stream_t *stream, + apr_pool_t *scratch_pool) +{ + svn_stringbuf_t *sb; + svn_boolean_t eof; + svn_error_t *err; + + SVN_ERR(svn_stream_readline(stream, &sb, "\n", &eof, scratch_pool)); + if (hit_eof) + *hit_eof = eof; + else + if (eof) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, _("Unexpected EOF")); + + if (!eof) + { + err = svn_cstring_atoi64(result, sb->data); + if (err) + return svn_error_createf(SVN_ERR_FS_CORRUPT, err, + _("Number '%s' invalid or too large"), + sb->data); + } + + return SVN_NO_ERROR; +} + +/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file. + Use POOL for temporary allocations. */ +static svn_error_t * +get_packed_offset(apr_off_t *rev_offset, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_stream_t *manifest_stream; + svn_boolean_t is_cached; + svn_revnum_t shard; + apr_int64_t shard_pos; + apr_array_header_t *manifest; + apr_pool_t *iterpool; + + shard = rev / ffd->max_files_per_dir; + + /* position of the shard within the manifest */ + shard_pos = rev % ffd->max_files_per_dir; + + /* fetch exactly that element into *rev_offset, if the manifest is found + in the cache */ + SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached, + ffd->packed_offset_cache, &shard, + svn_fs_fs__get_sharded_offset, &shard_pos, + pool)); + + if (is_cached) + return SVN_NO_ERROR; + + /* Open the manifest file. */ + SVN_ERR(svn_stream_open_readonly(&manifest_stream, + path_rev_packed(fs, rev, PATH_MANIFEST, + pool), + pool, pool)); + + /* While we're here, let's just read the entire manifest file into an array, + so we can cache the entire thing. */ + iterpool = svn_pool_create(pool); + manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t)); + while (1) + { + svn_boolean_t eof; + apr_int64_t val; + + svn_pool_clear(iterpool); + SVN_ERR(read_number_from_stream(&val, &eof, manifest_stream, iterpool)); + if (eof) + break; + + APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val; + } + svn_pool_destroy(iterpool); + + *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir, + apr_off_t); + + /* Close up shop and cache the array. */ + SVN_ERR(svn_stream_close(manifest_stream)); + return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool); +} + +/* Open the revision file for revision REV in filesystem FS and store + the newly opened file in FILE. Seek to location OFFSET before + returning. Perform temporary allocations in POOL. */ +static svn_error_t * +open_and_seek_revision(apr_file_t **file, + svn_fs_t *fs, + svn_revnum_t rev, + apr_off_t offset, + apr_pool_t *pool) +{ + apr_file_t *rev_file; + + SVN_ERR(ensure_revision_exists(fs, rev, pool)); + + SVN_ERR(open_pack_or_rev_file(&rev_file, fs, rev, pool)); + + if (is_packed_rev(fs, rev)) + { + apr_off_t rev_offset; + + SVN_ERR(get_packed_offset(&rev_offset, fs, rev, pool)); + offset += rev_offset; + } + + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + + *file = rev_file; + + return SVN_NO_ERROR; +} + +/* Open the representation for a node-revision in transaction TXN_ID + in filesystem FS and store the newly opened file in FILE. Seek to + location OFFSET before returning. Perform temporary allocations in + POOL. Only appropriate for file contents, nor props or directory + contents. */ +static svn_error_t * +open_and_seek_transaction(apr_file_t **file, + svn_fs_t *fs, + const char *txn_id, + representation_t *rep, + apr_pool_t *pool) +{ + apr_file_t *rev_file; + apr_off_t offset; + + SVN_ERR(svn_io_file_open(&rev_file, path_txn_proto_rev(fs, txn_id, pool), + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + offset = rep->offset; + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + + *file = rev_file; + + return SVN_NO_ERROR; +} + +/* Given a node-id ID, and a representation REP in filesystem FS, open + the correct file and seek to the correction location. Store this + file in *FILE_P. Perform any allocations in POOL. */ +static svn_error_t * +open_and_seek_representation(apr_file_t **file_p, + svn_fs_t *fs, + representation_t *rep, + apr_pool_t *pool) +{ + if (! rep->txn_id) + return open_and_seek_revision(file_p, fs, rep->revision, rep->offset, + pool); + else + return open_and_seek_transaction(file_p, fs, rep->txn_id, rep, pool); +} + +/* Parse the description of a representation from STRING and store it + into *REP_P. If the representation is mutable (the revision is + given as -1), then use TXN_ID for the representation's txn_id + field. If MUTABLE_REP_TRUNCATED is true, then this representation + is for property or directory contents, and no information will be + expected except the "-1" revision number for a mutable + representation. Allocate *REP_P in POOL. */ +static svn_error_t * +read_rep_offsets_body(representation_t **rep_p, + char *string, + const char *txn_id, + svn_boolean_t mutable_rep_truncated, + apr_pool_t *pool) +{ + representation_t *rep; + char *str; + apr_int64_t val; + + rep = apr_pcalloc(pool, sizeof(*rep)); + *rep_p = rep; + + str = svn_cstring_tokenize(" ", &string); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed text representation offset line in node-rev")); + + + rep->revision = SVN_STR_TO_REV(str); + if (rep->revision == SVN_INVALID_REVNUM) + { + rep->txn_id = txn_id; + if (mutable_rep_truncated) + return SVN_NO_ERROR; + } + + str = svn_cstring_tokenize(" ", &string); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed text representation offset line in node-rev")); + + SVN_ERR(svn_cstring_atoi64(&val, str)); + rep->offset = (apr_off_t)val; + + str = svn_cstring_tokenize(" ", &string); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed text representation offset line in node-rev")); + + SVN_ERR(svn_cstring_atoi64(&val, str)); + rep->size = (svn_filesize_t)val; + + str = svn_cstring_tokenize(" ", &string); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed text representation offset line in node-rev")); + + SVN_ERR(svn_cstring_atoi64(&val, str)); + rep->expanded_size = (svn_filesize_t)val; + + /* Read in the MD5 hash. */ + str = svn_cstring_tokenize(" ", &string); + if ((str == NULL) || (strlen(str) != (APR_MD5_DIGESTSIZE * 2))) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed text representation offset line in node-rev")); + + SVN_ERR(svn_checksum_parse_hex(&rep->md5_checksum, svn_checksum_md5, str, + pool)); + + /* The remaining fields are only used for formats >= 4, so check that. */ + str = svn_cstring_tokenize(" ", &string); + if (str == NULL) + return SVN_NO_ERROR; + + /* Read the SHA1 hash. */ + if (strlen(str) != (APR_SHA1_DIGESTSIZE * 2)) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed text representation offset line in node-rev")); + + SVN_ERR(svn_checksum_parse_hex(&rep->sha1_checksum, svn_checksum_sha1, str, + pool)); + + /* Read the uniquifier. */ + str = svn_cstring_tokenize(" ", &string); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed text representation offset line in node-rev")); + + rep->uniquifier = apr_pstrdup(pool, str); + + return SVN_NO_ERROR; +} + +/* Wrap read_rep_offsets_body(), extracting its TXN_ID from our NODEREV_ID, + and adding an error message. */ +static svn_error_t * +read_rep_offsets(representation_t **rep_p, + char *string, + const svn_fs_id_t *noderev_id, + svn_boolean_t mutable_rep_truncated, + apr_pool_t *pool) +{ + svn_error_t *err; + const char *txn_id; + + if (noderev_id) + txn_id = svn_fs_fs__id_txn_id(noderev_id); + else + txn_id = NULL; + + err = read_rep_offsets_body(rep_p, string, txn_id, mutable_rep_truncated, + pool); + if (err) + { + const svn_string_t *id_unparsed = svn_fs_fs__id_unparse(noderev_id, pool); + const char *where; + where = apr_psprintf(pool, + _("While reading representation offsets " + "for node-revision '%s':"), + noderev_id ? id_unparsed->data : "(null)"); + + return svn_error_quick_wrap(err, where); + } + else + return SVN_NO_ERROR; +} + +static svn_error_t * +err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id) +{ + svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool); + return svn_error_createf + (SVN_ERR_FS_ID_NOT_FOUND, 0, + _("Reference to non-existent node '%s' in filesystem '%s'"), + id_str->data, fs->path); +} + +/* Look up the NODEREV_P for ID in FS' node revsion cache. If noderev + * caching has been enabled and the data can be found, IS_CACHED will + * be set to TRUE. The noderev will be allocated from POOL. + * + * Non-permanent ids (e.g. ids within a TXN) will not be cached. + */ +static svn_error_t * +get_cached_node_revision_body(node_revision_t **noderev_p, + svn_fs_t *fs, + const svn_fs_id_t *id, + svn_boolean_t *is_cached, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + if (! ffd->node_revision_cache || svn_fs_fs__id_txn_id(id)) + { + *is_cached = FALSE; + } + else + { + pair_cache_key_t key = { 0 }; + + key.revision = svn_fs_fs__id_rev(id); + key.second = svn_fs_fs__id_offset(id); + SVN_ERR(svn_cache__get((void **) noderev_p, + is_cached, + ffd->node_revision_cache, + &key, + pool)); + } + + return SVN_NO_ERROR; +} + +/* If noderev caching has been enabled, store the NODEREV_P for the given ID + * in FS' node revsion cache. SCRATCH_POOL is used for temporary allcations. + * + * Non-permanent ids (e.g. ids within a TXN) will not be cached. + */ +static svn_error_t * +set_cached_node_revision_body(node_revision_t *noderev_p, + svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + if (ffd->node_revision_cache && !svn_fs_fs__id_txn_id(id)) + { + pair_cache_key_t key = { 0 }; + + key.revision = svn_fs_fs__id_rev(id); + key.second = svn_fs_fs__id_offset(id); + return svn_cache__set(ffd->node_revision_cache, + &key, + noderev_p, + scratch_pool); + } + + return SVN_NO_ERROR; +} + +/* Get the node-revision for the node ID in FS. + Set *NODEREV_P to the new node-revision structure, allocated in POOL. + See svn_fs_fs__get_node_revision, which wraps this and adds another + error. */ +static svn_error_t * +get_node_revision_body(node_revision_t **noderev_p, + svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool) +{ + apr_file_t *revision_file; + svn_error_t *err; + svn_boolean_t is_cached = FALSE; + + /* First, try a cache lookup. If that succeeds, we are done here. */ + SVN_ERR(get_cached_node_revision_body(noderev_p, fs, id, &is_cached, pool)); + if (is_cached) + return SVN_NO_ERROR; + + if (svn_fs_fs__id_txn_id(id)) + { + /* This is a transaction node-rev. */ + err = svn_io_file_open(&revision_file, path_txn_node_rev(fs, id, pool), + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool); + } + else + { + /* This is a revision node-rev. */ + err = open_and_seek_revision(&revision_file, fs, + svn_fs_fs__id_rev(id), + svn_fs_fs__id_offset(id), + pool); + } + + if (err) + { + if (APR_STATUS_IS_ENOENT(err->apr_err)) + { + svn_error_clear(err); + return svn_error_trace(err_dangling_id(fs, id)); + } + + return svn_error_trace(err); + } + + SVN_ERR(svn_fs_fs__read_noderev(noderev_p, + svn_stream_from_aprfile2(revision_file, FALSE, + pool), + pool)); + + /* The noderev is not in cache, yet. Add it, if caching has been enabled. */ + return set_cached_node_revision_body(*noderev_p, fs, id, pool); +} + +svn_error_t * +svn_fs_fs__read_noderev(node_revision_t **noderev_p, + svn_stream_t *stream, + apr_pool_t *pool) +{ + apr_hash_t *headers; + node_revision_t *noderev; + char *value; + const char *noderev_id; + + SVN_ERR(read_header_block(&headers, stream, pool)); + + noderev = apr_pcalloc(pool, sizeof(*noderev)); + + /* Read the node-rev id. */ + value = svn_hash_gets(headers, HEADER_ID); + if (value == NULL) + /* ### More information: filename/offset coordinates */ + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Missing id field in node-rev")); + + SVN_ERR(svn_stream_close(stream)); + + noderev->id = svn_fs_fs__id_parse(value, strlen(value), pool); + noderev_id = value; /* for error messages later */ + + /* Read the type. */ + value = svn_hash_gets(headers, HEADER_TYPE); + + if ((value == NULL) || + (strcmp(value, KIND_FILE) != 0 && strcmp(value, KIND_DIR))) + /* ### s/kind/type/ */ + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Missing kind field in node-rev '%s'"), + noderev_id); + + noderev->kind = (strcmp(value, KIND_FILE) == 0) ? svn_node_file + : svn_node_dir; + + /* Read the 'count' field. */ + value = svn_hash_gets(headers, HEADER_COUNT); + if (value) + SVN_ERR(svn_cstring_atoi(&noderev->predecessor_count, value)); + else + noderev->predecessor_count = 0; + + /* Get the properties location. */ + value = svn_hash_gets(headers, HEADER_PROPS); + if (value) + { + SVN_ERR(read_rep_offsets(&noderev->prop_rep, value, + noderev->id, TRUE, pool)); + } + + /* Get the data location. */ + value = svn_hash_gets(headers, HEADER_TEXT); + if (value) + { + SVN_ERR(read_rep_offsets(&noderev->data_rep, value, + noderev->id, + (noderev->kind == svn_node_dir), pool)); + } + + /* Get the created path. */ + value = svn_hash_gets(headers, HEADER_CPATH); + if (value == NULL) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Missing cpath field in node-rev '%s'"), + noderev_id); + } + else + { + noderev->created_path = apr_pstrdup(pool, value); + } + + /* Get the predecessor ID. */ + value = svn_hash_gets(headers, HEADER_PRED); + if (value) + noderev->predecessor_id = svn_fs_fs__id_parse(value, strlen(value), + pool); + + /* Get the copyroot. */ + value = svn_hash_gets(headers, HEADER_COPYROOT); + if (value == NULL) + { + noderev->copyroot_path = apr_pstrdup(pool, noderev->created_path); + noderev->copyroot_rev = svn_fs_fs__id_rev(noderev->id); + } + else + { + char *str; + + str = svn_cstring_tokenize(" ", &value); + if (str == NULL) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed copyroot line in node-rev '%s'"), + noderev_id); + + noderev->copyroot_rev = SVN_STR_TO_REV(str); + + if (*value == '\0') + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed copyroot line in node-rev '%s'"), + noderev_id); + noderev->copyroot_path = apr_pstrdup(pool, value); + } + + /* Get the copyfrom. */ + value = svn_hash_gets(headers, HEADER_COPYFROM); + if (value == NULL) + { + noderev->copyfrom_path = NULL; + noderev->copyfrom_rev = SVN_INVALID_REVNUM; + } + else + { + char *str = svn_cstring_tokenize(" ", &value); + if (str == NULL) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed copyfrom line in node-rev '%s'"), + noderev_id); + + noderev->copyfrom_rev = SVN_STR_TO_REV(str); + + if (*value == 0) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed copyfrom line in node-rev '%s'"), + noderev_id); + noderev->copyfrom_path = apr_pstrdup(pool, value); + } + + /* Get whether this is a fresh txn root. */ + value = svn_hash_gets(headers, HEADER_FRESHTXNRT); + noderev->is_fresh_txn_root = (value != NULL); + + /* Get the mergeinfo count. */ + value = svn_hash_gets(headers, HEADER_MINFO_CNT); + if (value) + SVN_ERR(svn_cstring_atoi64(&noderev->mergeinfo_count, value)); + else + noderev->mergeinfo_count = 0; + + /* Get whether *this* node has mergeinfo. */ + value = svn_hash_gets(headers, HEADER_MINFO_HERE); + noderev->has_mergeinfo = (value != NULL); + + *noderev_p = noderev; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__get_node_revision(node_revision_t **noderev_p, + svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool) +{ + svn_error_t *err = get_node_revision_body(noderev_p, fs, id, pool); + if (err && err->apr_err == SVN_ERR_FS_CORRUPT) + { + svn_string_t *id_string = svn_fs_fs__id_unparse(id, pool); + return svn_error_createf(SVN_ERR_FS_CORRUPT, err, + "Corrupt node-revision '%s'", + id_string->data); + } + return svn_error_trace(err); +} + + +/* Return a formatted string, compatible with filesystem format FORMAT, + that represents the location of representation REP. If + MUTABLE_REP_TRUNCATED is given, the rep is for props or dir contents, + and only a "-1" revision number will be given for a mutable rep. + If MAY_BE_CORRUPT is true, guard for NULL when constructing the string. + Perform the allocation from POOL. */ +static const char * +representation_string(representation_t *rep, + int format, + svn_boolean_t mutable_rep_truncated, + svn_boolean_t may_be_corrupt, + apr_pool_t *pool) +{ + if (rep->txn_id && mutable_rep_truncated) + return "-1"; + +#define DISPLAY_MAYBE_NULL_CHECKSUM(checksum) \ + ((!may_be_corrupt || (checksum) != NULL) \ + ? svn_checksum_to_cstring_display((checksum), pool) \ + : "(null)") + + if (format < SVN_FS_FS__MIN_REP_SHARING_FORMAT || rep->sha1_checksum == NULL) + return apr_psprintf(pool, "%ld %" APR_OFF_T_FMT " %" SVN_FILESIZE_T_FMT + " %" SVN_FILESIZE_T_FMT " %s", + rep->revision, rep->offset, rep->size, + rep->expanded_size, + DISPLAY_MAYBE_NULL_CHECKSUM(rep->md5_checksum)); + + return apr_psprintf(pool, "%ld %" APR_OFF_T_FMT " %" SVN_FILESIZE_T_FMT + " %" SVN_FILESIZE_T_FMT " %s %s %s", + rep->revision, rep->offset, rep->size, + rep->expanded_size, + DISPLAY_MAYBE_NULL_CHECKSUM(rep->md5_checksum), + DISPLAY_MAYBE_NULL_CHECKSUM(rep->sha1_checksum), + rep->uniquifier); + +#undef DISPLAY_MAYBE_NULL_CHECKSUM + +} + + +svn_error_t * +svn_fs_fs__write_noderev(svn_stream_t *outfile, + node_revision_t *noderev, + int format, + svn_boolean_t include_mergeinfo, + apr_pool_t *pool) +{ + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_ID ": %s\n", + svn_fs_fs__id_unparse(noderev->id, + pool)->data)); + + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_TYPE ": %s\n", + (noderev->kind == svn_node_file) ? + KIND_FILE : KIND_DIR)); + + if (noderev->predecessor_id) + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_PRED ": %s\n", + svn_fs_fs__id_unparse(noderev->predecessor_id, + pool)->data)); + + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_COUNT ": %d\n", + noderev->predecessor_count)); + + if (noderev->data_rep) + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_TEXT ": %s\n", + representation_string(noderev->data_rep, + format, + (noderev->kind + == svn_node_dir), + FALSE, + pool))); + + if (noderev->prop_rep) + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_PROPS ": %s\n", + representation_string(noderev->prop_rep, format, + TRUE, FALSE, pool))); + + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_CPATH ": %s\n", + noderev->created_path)); + + if (noderev->copyfrom_path) + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_COPYFROM ": %ld" + " %s\n", + noderev->copyfrom_rev, + noderev->copyfrom_path)); + + if ((noderev->copyroot_rev != svn_fs_fs__id_rev(noderev->id)) || + (strcmp(noderev->copyroot_path, noderev->created_path) != 0)) + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_COPYROOT ": %ld" + " %s\n", + noderev->copyroot_rev, + noderev->copyroot_path)); + + if (noderev->is_fresh_txn_root) + SVN_ERR(svn_stream_puts(outfile, HEADER_FRESHTXNRT ": y\n")); + + if (include_mergeinfo) + { + if (noderev->mergeinfo_count > 0) + SVN_ERR(svn_stream_printf(outfile, pool, HEADER_MINFO_CNT ": %" + APR_INT64_T_FMT "\n", + noderev->mergeinfo_count)); + + if (noderev->has_mergeinfo) + SVN_ERR(svn_stream_puts(outfile, HEADER_MINFO_HERE ": y\n")); + } + + return svn_stream_puts(outfile, "\n"); +} + +svn_error_t * +svn_fs_fs__put_node_revision(svn_fs_t *fs, + const svn_fs_id_t *id, + node_revision_t *noderev, + svn_boolean_t fresh_txn_root, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_file_t *noderev_file; + const char *txn_id = svn_fs_fs__id_txn_id(id); + + noderev->is_fresh_txn_root = fresh_txn_root; + + if (! txn_id) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Attempted to write to non-transaction '%s'"), + svn_fs_fs__id_unparse(id, pool)->data); + + SVN_ERR(svn_io_file_open(&noderev_file, path_txn_node_rev(fs, id, pool), + APR_WRITE | APR_CREATE | APR_TRUNCATE + | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + SVN_ERR(svn_fs_fs__write_noderev(svn_stream_from_aprfile2(noderev_file, TRUE, + pool), + noderev, ffd->format, + svn_fs_fs__fs_supports_mergeinfo(fs), + pool)); + + SVN_ERR(svn_io_file_close(noderev_file, pool)); + + return SVN_NO_ERROR; +} + +/* For the in-transaction NODEREV within FS, write the sha1->rep mapping + * file in the respective transaction, if rep sharing has been enabled etc. + * Use POOL for temporary allocations. + */ +static svn_error_t * +store_sha1_rep_mapping(svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + /* if rep sharing has been enabled and the noderev has a data rep and + * its SHA-1 is known, store the rep struct under its SHA1. */ + if ( ffd->rep_sharing_allowed + && noderev->data_rep + && noderev->data_rep->sha1_checksum) + { + apr_file_t *rep_file; + const char *file_name = path_txn_sha1(fs, + svn_fs_fs__id_txn_id(noderev->id), + noderev->data_rep->sha1_checksum, + pool); + const char *rep_string = representation_string(noderev->data_rep, + ffd->format, + (noderev->kind + == svn_node_dir), + FALSE, + pool); + SVN_ERR(svn_io_file_open(&rep_file, file_name, + APR_WRITE | APR_CREATE | APR_TRUNCATE + | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + SVN_ERR(svn_io_file_write_full(rep_file, rep_string, + strlen(rep_string), NULL, pool)); + + SVN_ERR(svn_io_file_close(rep_file, pool)); + } + + return SVN_NO_ERROR; +} + + +/* This structure is used to hold the information associated with a + REP line. */ +struct rep_args +{ + svn_boolean_t is_delta; + svn_boolean_t is_delta_vs_empty; + + svn_revnum_t base_revision; + apr_off_t base_offset; + svn_filesize_t base_length; +}; + +/* Read the next line from file FILE and parse it as a text + representation entry. Return the parsed entry in *REP_ARGS_P. + Perform all allocations in POOL. */ +static svn_error_t * +read_rep_line(struct rep_args **rep_args_p, + apr_file_t *file, + apr_pool_t *pool) +{ + char buffer[160]; + apr_size_t limit; + struct rep_args *rep_args; + char *str, *last_str = buffer; + apr_int64_t val; + + limit = sizeof(buffer); + SVN_ERR(svn_io_read_length_line(file, buffer, &limit, pool)); + + rep_args = apr_pcalloc(pool, sizeof(*rep_args)); + rep_args->is_delta = FALSE; + + if (strcmp(buffer, REP_PLAIN) == 0) + { + *rep_args_p = rep_args; + return SVN_NO_ERROR; + } + + if (strcmp(buffer, REP_DELTA) == 0) + { + /* This is a delta against the empty stream. */ + rep_args->is_delta = TRUE; + rep_args->is_delta_vs_empty = TRUE; + *rep_args_p = rep_args; + return SVN_NO_ERROR; + } + + rep_args->is_delta = TRUE; + rep_args->is_delta_vs_empty = FALSE; + + /* We have hopefully a DELTA vs. a non-empty base revision. */ + str = svn_cstring_tokenize(" ", &last_str); + if (! str || (strcmp(str, REP_DELTA) != 0)) + goto error; + + str = svn_cstring_tokenize(" ", &last_str); + if (! str) + goto error; + rep_args->base_revision = SVN_STR_TO_REV(str); + + str = svn_cstring_tokenize(" ", &last_str); + if (! str) + goto error; + SVN_ERR(svn_cstring_atoi64(&val, str)); + rep_args->base_offset = (apr_off_t)val; + + str = svn_cstring_tokenize(" ", &last_str); + if (! str) + goto error; + SVN_ERR(svn_cstring_atoi64(&val, str)); + rep_args->base_length = (svn_filesize_t)val; + + *rep_args_p = rep_args; + return SVN_NO_ERROR; + + error: + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Malformed representation header at %s"), + path_and_offset_of(file, pool)); +} + +/* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID + of the header located at OFFSET and store it in *ID_P. Allocate + temporary variables from POOL. */ +static svn_error_t * +get_fs_id_at_offset(svn_fs_id_t **id_p, + apr_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t rev, + apr_off_t offset, + apr_pool_t *pool) +{ + svn_fs_id_t *id; + apr_hash_t *headers; + const char *node_id_str; + + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + + SVN_ERR(read_header_block(&headers, + svn_stream_from_aprfile2(rev_file, TRUE, pool), + pool)); + + /* In error messages, the offset is relative to the pack file, + not to the rev file. */ + + node_id_str = svn_hash_gets(headers, HEADER_ID); + + if (node_id_str == NULL) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Missing node-id in node-rev at r%ld " + "(offset %s)"), + rev, + apr_psprintf(pool, "%" APR_OFF_T_FMT, offset)); + + id = svn_fs_fs__id_parse(node_id_str, strlen(node_id_str), pool); + + if (id == NULL) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Corrupt node-id '%s' in node-rev at r%ld " + "(offset %s)"), + node_id_str, rev, + apr_psprintf(pool, "%" APR_OFF_T_FMT, offset)); + + *id_p = id; + + /* ### assert that the txn_id is REV/OFFSET ? */ + + return SVN_NO_ERROR; +} + + +/* Given an open revision file REV_FILE in FS for REV, locate the trailer that + specifies the offset to the root node-id and to the changed path + information. Store the root node offset in *ROOT_OFFSET and the + changed path offset in *CHANGES_OFFSET. If either of these + pointers is NULL, do nothing with it. + + If PACKED is true, REV_FILE should be a packed shard file. + ### There is currently no such parameter. This function assumes that + is_packed_rev(FS, REV) will indicate whether REV_FILE is a packed + file. Therefore FS->fsap_data->min_unpacked_rev must not have been + refreshed since REV_FILE was opened if there is a possibility that + revision REV may have become packed since then. + TODO: Take an IS_PACKED parameter instead, in order to remove this + requirement. + + Allocate temporary variables from POOL. */ +static svn_error_t * +get_root_changes_offset(apr_off_t *root_offset, + apr_off_t *changes_offset, + apr_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_off_t offset; + apr_off_t rev_offset; + char buf[64]; + int i, num_bytes; + const char *str; + apr_size_t len; + apr_seek_where_t seek_relative; + + /* Determine where to seek to in the file. + + If we've got a pack file, we want to seek to the end of the desired + revision. But we don't track that, so we seek to the beginning of the + next revision. + + Unless the next revision is in a different file, in which case, we can + just seek to the end of the pack file -- just like we do in the + non-packed case. */ + if (is_packed_rev(fs, rev) && ((rev + 1) % ffd->max_files_per_dir != 0)) + { + SVN_ERR(get_packed_offset(&offset, fs, rev + 1, pool)); + seek_relative = APR_SET; + } + else + { + seek_relative = APR_END; + offset = 0; + } + + /* Offset of the revision from the start of the pack file, if applicable. */ + if (is_packed_rev(fs, rev)) + SVN_ERR(get_packed_offset(&rev_offset, fs, rev, pool)); + else + rev_offset = 0; + + /* We will assume that the last line containing the two offsets + will never be longer than 64 characters. */ + SVN_ERR(svn_io_file_seek(rev_file, seek_relative, &offset, pool)); + + offset -= sizeof(buf); + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + + /* Read in this last block, from which we will identify the last line. */ + len = sizeof(buf); + SVN_ERR(svn_io_file_read(rev_file, buf, &len, pool)); + + /* This cast should be safe since the maximum amount read, 64, will + never be bigger than the size of an int. */ + num_bytes = (int) len; + + /* The last byte should be a newline. */ + if (buf[num_bytes - 1] != '\n') + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision file (r%ld) lacks trailing newline"), + rev); + } + + /* Look for the next previous newline. */ + for (i = num_bytes - 2; i >= 0; i--) + { + if (buf[i] == '\n') + break; + } + + if (i < 0) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Final line in revision file (r%ld) longer " + "than 64 characters"), + rev); + } + + i++; + str = &buf[i]; + + /* find the next space */ + for ( ; i < (num_bytes - 2) ; i++) + if (buf[i] == ' ') + break; + + if (i == (num_bytes - 2)) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Final line in revision file r%ld missing space"), + rev); + + if (root_offset) + { + apr_int64_t val; + + buf[i] = '\0'; + SVN_ERR(svn_cstring_atoi64(&val, str)); + *root_offset = rev_offset + (apr_off_t)val; + } + + i++; + str = &buf[i]; + + /* find the next newline */ + for ( ; i < num_bytes; i++) + if (buf[i] == '\n') + break; + + if (changes_offset) + { + apr_int64_t val; + + buf[i] = '\0'; + SVN_ERR(svn_cstring_atoi64(&val, str)); + *changes_offset = rev_offset + (apr_off_t)val; + } + + return SVN_NO_ERROR; +} + +/* Move a file into place from OLD_FILENAME in the transactions + directory to its final location NEW_FILENAME in the repository. On + Unix, match the permissions of the new file to the permissions of + PERMS_REFERENCE. Temporary allocations are from POOL. + + This function almost duplicates svn_io_file_move(), but it tries to + guarantee a flush. */ +static svn_error_t * +move_into_place(const char *old_filename, + const char *new_filename, + const char *perms_reference, + apr_pool_t *pool) +{ + svn_error_t *err; + + SVN_ERR(svn_io_copy_perms(perms_reference, old_filename, pool)); + + /* Move the file into place. */ + err = svn_io_file_rename(old_filename, new_filename, pool); + if (err && APR_STATUS_IS_EXDEV(err->apr_err)) + { + apr_file_t *file; + + /* Can't rename across devices; fall back to copying. */ + svn_error_clear(err); + err = SVN_NO_ERROR; + SVN_ERR(svn_io_copy_file(old_filename, new_filename, TRUE, pool)); + + /* Flush the target of the copy to disk. */ + SVN_ERR(svn_io_file_open(&file, new_filename, APR_READ, + APR_OS_DEFAULT, pool)); + /* ### BH: Does this really guarantee a flush of the data written + ### via a completely different handle on all operating systems? + ### + ### Maybe we should perform the copy ourselves instead of making + ### apr do that and flush the real handle? */ + SVN_ERR(svn_io_file_flush_to_disk(file, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + } + if (err) + return svn_error_trace(err); + +#ifdef __linux__ + { + /* Linux has the unusual feature that fsync() on a file is not + enough to ensure that a file's directory entries have been + flushed to disk; you have to fsync the directory as well. + On other operating systems, we'd only be asking for trouble + by trying to open and fsync a directory. */ + const char *dirname; + apr_file_t *file; + + dirname = svn_dirent_dirname(new_filename, pool); + SVN_ERR(svn_io_file_open(&file, dirname, APR_READ, APR_OS_DEFAULT, + pool)); + SVN_ERR(svn_io_file_flush_to_disk(file, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + } +#endif + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_file_t *revision_file; + apr_off_t root_offset; + svn_fs_id_t *root_id = NULL; + svn_boolean_t is_cached; + + SVN_ERR(ensure_revision_exists(fs, rev, pool)); + + SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached, + ffd->rev_root_id_cache, &rev, pool)); + if (is_cached) + return SVN_NO_ERROR; + + SVN_ERR(open_pack_or_rev_file(&revision_file, fs, rev, pool)); + SVN_ERR(get_root_changes_offset(&root_offset, NULL, revision_file, fs, rev, + pool)); + + SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev, + root_offset, pool)); + + SVN_ERR(svn_io_file_close(revision_file, pool)); + + SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id, pool)); + + *root_id_p = root_id; + + return SVN_NO_ERROR; +} + +/* Revprop caching management. + * + * Mechanism: + * ---------- + * + * Revprop caching needs to be activated and will be deactivated for the + * respective FS instance if the necessary infrastructure could not be + * initialized. In deactivated mode, there is almost no runtime overhead + * associated with revprop caching. As long as no revprops are being read + * or changed, revprop caching imposes no overhead. + * + * When activated, we cache revprops using (revision, generation) pairs + * as keys with the generation being incremented upon every revprop change. + * Since the cache is process-local, the generation needs to be tracked + * for at least as long as the process lives but may be reset afterwards. + * + * To track the revprop generation, we use two-layer approach. On the lower + * level, we use named atomics to have a system-wide consistent value for + * the current revprop generation. However, those named atomics will only + * remain valid for as long as at least one process / thread in the system + * accesses revprops in the respective repository. The underlying shared + * memory gets cleaned up afterwards. + * + * On the second level, we will use a persistent file to track the latest + * revprop generation. It will be written upon each revprop change but + * only be read if we are the first process to initialize the named atomics + * with that value. + * + * The overhead for the second and following accesses to revprops is + * almost zero on most systems. + * + * + * Tech aspects: + * ------------- + * + * A problem is that we need to provide a globally available file name to + * back the SHM implementation on OSes that need it. We can only assume + * write access to some file within the respective repositories. Because + * a given server process may access thousands of repositories during its + * lifetime, keeping the SHM data alive for all of them is also not an + * option. + * + * So, we store the new revprop generation on disk as part of each + * setrevprop call, i.e. this write will be serialized and the write order + * be guaranteed by the repository write lock. + * + * The only racy situation occurs when the data is being read again by two + * processes concurrently but in that situation, the first process to + * finish that procedure is guaranteed to be the only one that initializes + * the SHM data. Since even writers will first go through that + * initialization phase, they will never operate on stale data. + */ + +/* Read revprop generation as stored on disk for repository FS. The result + * is returned in *CURRENT. Default to 2 if no such file is available. + */ +static svn_error_t * +read_revprop_generation_file(apr_int64_t *current, + svn_fs_t *fs, + apr_pool_t *pool) +{ + svn_error_t *err; + apr_file_t *file; + char buf[80]; + apr_size_t len; + const char *path = path_revprop_generation(fs, pool); + + err = svn_io_file_open(&file, path, + APR_READ | APR_BUFFERED, + APR_OS_DEFAULT, pool); + if (err && APR_STATUS_IS_ENOENT(err->apr_err)) + { + svn_error_clear(err); + *current = 2; + + return SVN_NO_ERROR; + } + SVN_ERR(err); + + len = sizeof(buf); + SVN_ERR(svn_io_read_length_line(file, buf, &len, pool)); + + /* Check that the first line contains only digits. */ + SVN_ERR(check_file_buffer_numeric(buf, 0, path, + "Revprop Generation", pool)); + SVN_ERR(svn_cstring_atoi64(current, buf)); + + return svn_io_file_close(file, pool); +} + +/* Write the CURRENT revprop generation to disk for repository FS. + */ +static svn_error_t * +write_revprop_generation_file(svn_fs_t *fs, + apr_int64_t current, + apr_pool_t *pool) +{ + apr_file_t *file; + const char *tmp_path; + + char buf[SVN_INT64_BUFFER_SIZE]; + apr_size_t len = svn__i64toa(buf, current); + buf[len] = '\n'; + + SVN_ERR(svn_io_open_unique_file3(&file, &tmp_path, fs->path, + svn_io_file_del_none, pool, pool)); + SVN_ERR(svn_io_file_write_full(file, buf, len + 1, NULL, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + + return move_into_place(tmp_path, path_revprop_generation(fs, pool), + tmp_path, pool); +} + +/* Make sure the revprop_namespace member in FS is set. */ +static svn_error_t * +ensure_revprop_namespace(svn_fs_t *fs) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + return ffd->revprop_namespace == NULL + ? svn_atomic_namespace__create(&ffd->revprop_namespace, + svn_dirent_join(fs->path, + ATOMIC_REVPROP_NAMESPACE, + fs->pool), + fs->pool) + : SVN_NO_ERROR; +} + +/* Make sure the revprop_namespace member in FS is set. */ +static svn_error_t * +cleanup_revprop_namespace(svn_fs_t *fs) +{ + const char *name = svn_dirent_join(fs->path, + ATOMIC_REVPROP_NAMESPACE, + fs->pool); + return svn_error_trace(svn_atomic_namespace__cleanup(name, fs->pool)); +} + +/* Make sure the revprop_generation member in FS is set and, if necessary, + * initialized with the latest value stored on disk. + */ +static svn_error_t * +ensure_revprop_generation(svn_fs_t *fs, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + SVN_ERR(ensure_revprop_namespace(fs)); + if (ffd->revprop_generation == NULL) + { + apr_int64_t current = 0; + + SVN_ERR(svn_named_atomic__get(&ffd->revprop_generation, + ffd->revprop_namespace, + ATOMIC_REVPROP_GENERATION, + TRUE)); + + /* If the generation is at 0, we just created a new namespace + * (it would be at least 2 otherwise). Read the latest generation + * from disk and if we are the first one to initialize the atomic + * (i.e. is still 0), set it to the value just gotten. + */ + SVN_ERR(svn_named_atomic__read(¤t, ffd->revprop_generation)); + if (current == 0) + { + SVN_ERR(read_revprop_generation_file(¤t, fs, pool)); + SVN_ERR(svn_named_atomic__cmpxchg(NULL, current, 0, + ffd->revprop_generation)); + } + } + + return SVN_NO_ERROR; +} + +/* Make sure the revprop_timeout member in FS is set. */ +static svn_error_t * +ensure_revprop_timeout(svn_fs_t *fs) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + SVN_ERR(ensure_revprop_namespace(fs)); + return ffd->revprop_timeout == NULL + ? svn_named_atomic__get(&ffd->revprop_timeout, + ffd->revprop_namespace, + ATOMIC_REVPROP_TIMEOUT, + TRUE) + : SVN_NO_ERROR; +} + +/* Create an error object with the given MESSAGE and pass it to the + WARNING member of FS. */ +static void +log_revprop_cache_init_warning(svn_fs_t *fs, + svn_error_t *underlying_err, + const char *message) +{ + svn_error_t *err = svn_error_createf(SVN_ERR_FS_REVPROP_CACHE_INIT_FAILURE, + underlying_err, + message, fs->path); + + if (fs->warning) + (fs->warning)(fs->warning_baton, err); + + svn_error_clear(err); +} + +/* Test whether revprop cache and necessary infrastructure are + available in FS. */ +static svn_boolean_t +has_revprop_cache(svn_fs_t *fs, apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_error_t *error; + + /* is the cache (still) enabled? */ + if (ffd->revprop_cache == NULL) + return FALSE; + + /* is it efficient? */ + if (!svn_named_atomic__is_efficient()) + { + /* access to it would be quite slow + * -> disable the revprop cache for good + */ + ffd->revprop_cache = NULL; + log_revprop_cache_init_warning(fs, NULL, + "Revprop caching for '%s' disabled" + " because it would be inefficient."); + + return FALSE; + } + + /* try to access our SHM-backed infrastructure */ + error = ensure_revprop_generation(fs, pool); + if (error) + { + /* failure -> disable revprop cache for good */ + + ffd->revprop_cache = NULL; + log_revprop_cache_init_warning(fs, error, + "Revprop caching for '%s' disabled " + "because SHM infrastructure for revprop " + "caching failed to initialize."); + + return FALSE; + } + + return TRUE; +} + +/* Baton structure for revprop_generation_fixup. */ +typedef struct revprop_generation_fixup_t +{ + /* revprop generation to read */ + apr_int64_t *generation; + + /* containing the revprop_generation member to query */ + fs_fs_data_t *ffd; +} revprop_generation_upgrade_t; + +/* If the revprop generation has an odd value, it means the original writer + of the revprop got killed. We don't know whether that process as able + to change the revprop data but we assume that it was. Therefore, we + increase the generation in that case to basically invalidate everyones + cache content. + Execute this onlx while holding the write lock to the repo in baton->FFD. + */ +static svn_error_t * +revprop_generation_fixup(void *void_baton, + apr_pool_t *pool) +{ + revprop_generation_upgrade_t *baton = void_baton; + assert(baton->ffd->has_write_lock); + + /* Maybe, either the original revprop writer or some other reader has + already corrected / bumped the revprop generation. Thus, we need + to read it again. */ + SVN_ERR(svn_named_atomic__read(baton->generation, + baton->ffd->revprop_generation)); + + /* Cause everyone to re-read revprops upon their next access, if the + last revprop write did not complete properly. */ + while (*baton->generation % 2) + SVN_ERR(svn_named_atomic__add(baton->generation, + 1, + baton->ffd->revprop_generation)); + + return SVN_NO_ERROR; +} + +/* Read the current revprop generation and return it in *GENERATION. + Also, detect aborted / crashed writers and recover from that. + Use the access object in FS to set the shared mem values. */ +static svn_error_t * +read_revprop_generation(apr_int64_t *generation, + svn_fs_t *fs, + apr_pool_t *pool) +{ + apr_int64_t current = 0; + fs_fs_data_t *ffd = fs->fsap_data; + + /* read the current revprop generation number */ + SVN_ERR(ensure_revprop_generation(fs, pool)); + SVN_ERR(svn_named_atomic__read(¤t, ffd->revprop_generation)); + + /* is an unfinished revprop write under the way? */ + if (current % 2) + { + apr_int64_t timeout = 0; + + /* read timeout for the write operation */ + SVN_ERR(ensure_revprop_timeout(fs)); + SVN_ERR(svn_named_atomic__read(&timeout, ffd->revprop_timeout)); + + /* has the writer process been aborted, + * i.e. has the timeout been reached? + */ + if (apr_time_now() > timeout) + { + revprop_generation_upgrade_t baton; + baton.generation = ¤t; + baton.ffd = ffd; + + /* Ensure that the original writer process no longer exists by + * acquiring the write lock to this repository. Then, fix up + * the revprop generation. + */ + if (ffd->has_write_lock) + SVN_ERR(revprop_generation_fixup(&baton, pool)); + else + SVN_ERR(svn_fs_fs__with_write_lock(fs, revprop_generation_fixup, + &baton, pool)); + } + } + + /* return the value we just got */ + *generation = current; + return SVN_NO_ERROR; +} + +/* Set the revprop generation to the next odd number to indicate that + there is a revprop write process under way. If that times out, + readers shall recover from that state & re-read revprops. + Use the access object in FS to set the shared mem value. */ +static svn_error_t * +begin_revprop_change(svn_fs_t *fs, apr_pool_t *pool) +{ + apr_int64_t current; + fs_fs_data_t *ffd = fs->fsap_data; + + /* set the timeout for the write operation */ + SVN_ERR(ensure_revprop_timeout(fs)); + SVN_ERR(svn_named_atomic__write(NULL, + apr_time_now() + REVPROP_CHANGE_TIMEOUT, + ffd->revprop_timeout)); + + /* set the revprop generation to an odd value to indicate + * that a write is in progress + */ + SVN_ERR(ensure_revprop_generation(fs, pool)); + do + { + SVN_ERR(svn_named_atomic__add(¤t, + 1, + ffd->revprop_generation)); + } + while (current % 2 == 0); + + return SVN_NO_ERROR; +} + +/* Set the revprop generation to the next even number to indicate that + a) readers shall re-read revprops, and + b) the write process has been completed (no recovery required) + Use the access object in FS to set the shared mem value. */ +static svn_error_t * +end_revprop_change(svn_fs_t *fs, apr_pool_t *pool) +{ + apr_int64_t current = 1; + fs_fs_data_t *ffd = fs->fsap_data; + + /* set the revprop generation to an even value to indicate + * that a write has been completed + */ + SVN_ERR(ensure_revprop_generation(fs, pool)); + do + { + SVN_ERR(svn_named_atomic__add(¤t, + 1, + ffd->revprop_generation)); + } + while (current % 2); + + /* Save the latest generation to disk. FS is currently in a "locked" + * state such that we can be sure the be the only ones to write that + * file. + */ + return write_revprop_generation_file(fs, current, pool); +} + +/* Container for all data required to access the packed revprop file + * for a given REVISION. This structure will be filled incrementally + * by read_pack_revprops() its sub-routines. + */ +typedef struct packed_revprops_t +{ + /* revision number to read (not necessarily the first in the pack) */ + svn_revnum_t revision; + + /* current revprop generation. Used when populating the revprop cache */ + apr_int64_t generation; + + /* the actual revision properties */ + apr_hash_t *properties; + + /* their size when serialized to a single string + * (as found in PACKED_REVPROPS) */ + apr_size_t serialized_size; + + + /* name of the pack file (without folder path) */ + const char *filename; + + /* packed shard folder path */ + const char *folder; + + /* sum of values in SIZES */ + apr_size_t total_size; + + /* first revision in the pack */ + svn_revnum_t start_revision; + + /* size of the revprops in PACKED_REVPROPS */ + apr_array_header_t *sizes; + + /* offset of the revprops in PACKED_REVPROPS */ + apr_array_header_t *offsets; + + + /* concatenation of the serialized representation of all revprops + * in the pack, i.e. the pack content without header and compression */ + svn_stringbuf_t *packed_revprops; + + /* content of the manifest. + * Maps long(rev - START_REVISION) to const char* pack file name */ + apr_array_header_t *manifest; +} packed_revprops_t; + +/* Parse the serialized revprops in CONTENT and return them in *PROPERTIES. + * Also, put them into the revprop cache, if activated, for future use. + * Three more parameters are being used to update the revprop cache: FS is + * our file system, the revprops belong to REVISION and the global revprop + * GENERATION is used as well. + * + * The returned hash will be allocated in POOL, SCRATCH_POOL is being used + * for temporary allocations. + */ +static svn_error_t * +parse_revprop(apr_hash_t **properties, + svn_fs_t *fs, + svn_revnum_t revision, + apr_int64_t generation, + svn_string_t *content, + apr_pool_t *pool, + apr_pool_t *scratch_pool) +{ + svn_stream_t *stream = svn_stream_from_string(content, scratch_pool); + *properties = apr_hash_make(pool); + + SVN_ERR(svn_hash_read2(*properties, stream, SVN_HASH_TERMINATOR, pool)); + if (has_revprop_cache(fs, pool)) + { + fs_fs_data_t *ffd = fs->fsap_data; + pair_cache_key_t key = { 0 }; + + key.revision = revision; + key.second = generation; + SVN_ERR(svn_cache__set(ffd->revprop_cache, &key, *properties, + scratch_pool)); + } + + return SVN_NO_ERROR; +} + +/* Read the non-packed revprops for revision REV in FS, put them into the + * revprop cache if activated and return them in *PROPERTIES. GENERATION + * is the current revprop generation. + * + * If the data could not be read due to an otherwise recoverable error, + * leave *PROPERTIES unchanged. No error will be returned in that case. + * + * Allocations will be done in POOL. + */ +static svn_error_t * +read_non_packed_revprop(apr_hash_t **properties, + svn_fs_t *fs, + svn_revnum_t rev, + apr_int64_t generation, + apr_pool_t *pool) +{ + svn_stringbuf_t *content = NULL; + apr_pool_t *iterpool = svn_pool_create(pool); + svn_boolean_t missing = FALSE; + int i; + + for (i = 0; i < RECOVERABLE_RETRY_COUNT && !missing && !content; ++i) + { + svn_pool_clear(iterpool); + SVN_ERR(try_stringbuf_from_file(&content, + &missing, + path_revprops(fs, rev, iterpool), + i + 1 < RECOVERABLE_RETRY_COUNT, + iterpool)); + } + + if (content) + SVN_ERR(parse_revprop(properties, fs, rev, generation, + svn_stringbuf__morph_into_string(content), + pool, iterpool)); + + svn_pool_clear(iterpool); + + return SVN_NO_ERROR; +} + +/* Given FS and REVPROPS->REVISION, fill the FILENAME, FOLDER and MANIFEST + * members. Use POOL for allocating results and SCRATCH_POOL for temporaries. + */ +static svn_error_t * +get_revprop_packname(svn_fs_t *fs, + packed_revprops_t *revprops, + apr_pool_t *pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_stringbuf_t *content = NULL; + const char *manifest_file_path; + int idx; + + /* read content of the manifest file */ + revprops->folder = path_revprops_pack_shard(fs, revprops->revision, pool); + manifest_file_path = svn_dirent_join(revprops->folder, PATH_MANIFEST, pool); + + SVN_ERR(read_content(&content, manifest_file_path, pool)); + + /* parse the manifest. Every line is a file name */ + revprops->manifest = apr_array_make(pool, ffd->max_files_per_dir, + sizeof(const char*)); + while (content->data) + { + APR_ARRAY_PUSH(revprops->manifest, const char*) = content->data; + content->data = strchr(content->data, '\n'); + if (content->data) + { + *content->data = 0; + content->data++; + } + } + + /* Index for our revision. Rev 0 is excluded from the first shard. */ + idx = (int)(revprops->revision % ffd->max_files_per_dir); + if (revprops->revision < ffd->max_files_per_dir) + --idx; + + if (revprops->manifest->nelts <= idx) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Packed revprop manifest for rev %ld too " + "small"), revprops->revision); + + /* Now get the file name */ + revprops->filename = APR_ARRAY_IDX(revprops->manifest, idx, const char*); + + return SVN_NO_ERROR; +} + +/* Given FS and the full packed file content in REVPROPS->PACKED_REVPROPS, + * fill the START_REVISION, SIZES, OFFSETS members. Also, make + * PACKED_REVPROPS point to the first serialized revprop. + * + * Parse the revprops for REVPROPS->REVISION and set the PROPERTIES as + * well as the SERIALIZED_SIZE member. If revprop caching has been + * enabled, parse all revprops in the pack and cache them. + */ +static svn_error_t * +parse_packed_revprops(svn_fs_t *fs, + packed_revprops_t *revprops, + apr_pool_t *pool, + apr_pool_t *scratch_pool) +{ + svn_stream_t *stream; + apr_int64_t first_rev, count, i; + apr_off_t offset; + const char *header_end; + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + + /* decompress (even if the data is only "stored", there is still a + * length header to remove) */ + svn_string_t *compressed + = svn_stringbuf__morph_into_string(revprops->packed_revprops); + svn_stringbuf_t *uncompressed = svn_stringbuf_create_empty(pool); + SVN_ERR(svn__decompress(compressed, uncompressed, 0x1000000)); + + /* read first revision number and number of revisions in the pack */ + stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); + SVN_ERR(read_number_from_stream(&first_rev, NULL, stream, iterpool)); + SVN_ERR(read_number_from_stream(&count, NULL, stream, iterpool)); + + /* make PACKED_REVPROPS point to the first char after the header. + * This is where the serialized revprops are. */ + header_end = strstr(uncompressed->data, "\n\n"); + if (header_end == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Header end not found")); + + offset = header_end - uncompressed->data + 2; + + revprops->packed_revprops = svn_stringbuf_create_empty(pool); + revprops->packed_revprops->data = uncompressed->data + offset; + revprops->packed_revprops->len = (apr_size_t)(uncompressed->len - offset); + revprops->packed_revprops->blocksize = (apr_size_t)(uncompressed->blocksize - offset); + + /* STREAM still points to the first entry in the sizes list. + * Init / construct REVPROPS members. */ + revprops->start_revision = (svn_revnum_t)first_rev; + revprops->sizes = apr_array_make(pool, (int)count, sizeof(offset)); + revprops->offsets = apr_array_make(pool, (int)count, sizeof(offset)); + + /* Now parse, revision by revision, the size and content of each + * revisions' revprops. */ + for (i = 0, offset = 0, revprops->total_size = 0; i < count; ++i) + { + apr_int64_t size; + svn_string_t serialized; + apr_hash_t *properties; + svn_revnum_t revision = (svn_revnum_t)(first_rev + i); + + /* read & check the serialized size */ + SVN_ERR(read_number_from_stream(&size, NULL, stream, iterpool)); + if (size + offset > (apr_int64_t)revprops->packed_revprops->len) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Packed revprop size exceeds pack file size")); + + /* Parse this revprops list, if necessary */ + serialized.data = revprops->packed_revprops->data + offset; + serialized.len = (apr_size_t)size; + + if (revision == revprops->revision) + { + SVN_ERR(parse_revprop(&revprops->properties, fs, revision, + revprops->generation, &serialized, + pool, iterpool)); + revprops->serialized_size = serialized.len; + } + else + { + /* If revprop caching is enabled, parse any revprops. + * They will get cached as a side-effect of this. */ + if (has_revprop_cache(fs, pool)) + SVN_ERR(parse_revprop(&properties, fs, revision, + revprops->generation, &serialized, + iterpool, iterpool)); + } + + /* fill REVPROPS data structures */ + APR_ARRAY_PUSH(revprops->sizes, apr_off_t) = serialized.len; + APR_ARRAY_PUSH(revprops->offsets, apr_off_t) = offset; + revprops->total_size += serialized.len; + + offset += serialized.len; + + svn_pool_clear(iterpool); + } + + return SVN_NO_ERROR; +} + +/* In filesystem FS, read the packed revprops for revision REV into + * *REVPROPS. Use GENERATION to populate the revprop cache, if enabled. + * Allocate data in POOL. + */ +static svn_error_t * +read_pack_revprop(packed_revprops_t **revprops, + svn_fs_t *fs, + svn_revnum_t rev, + apr_int64_t generation, + apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); + svn_boolean_t missing = FALSE; + svn_error_t *err; + packed_revprops_t *result; + int i; + + /* someone insisted that REV is packed. Double-check if necessary */ + if (!is_packed_revprop(fs, rev)) + SVN_ERR(update_min_unpacked_rev(fs, iterpool)); + + if (!is_packed_revprop(fs, rev)) + return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("No such packed revision %ld"), rev); + + /* initialize the result data structure */ + result = apr_pcalloc(pool, sizeof(*result)); + result->revision = rev; + result->generation = generation; + + /* try to read the packed revprops. This may require retries if we have + * concurrent writers. */ + for (i = 0; i < RECOVERABLE_RETRY_COUNT && !result->packed_revprops; ++i) + { + const char *file_path; + + /* there might have been concurrent writes. + * Re-read the manifest and the pack file. + */ + SVN_ERR(get_revprop_packname(fs, result, pool, iterpool)); + file_path = svn_dirent_join(result->folder, + result->filename, + iterpool); + SVN_ERR(try_stringbuf_from_file(&result->packed_revprops, + &missing, + file_path, + i + 1 < RECOVERABLE_RETRY_COUNT, + pool)); + + /* If we could not find the file, there was a write. + * So, we should refresh our revprop generation info as well such + * that others may find data we will put into the cache. They would + * consider it outdated, otherwise. + */ + if (missing && has_revprop_cache(fs, pool)) + SVN_ERR(read_revprop_generation(&result->generation, fs, pool)); + + svn_pool_clear(iterpool); + } + + /* the file content should be available now */ + if (!result->packed_revprops) + return svn_error_createf(SVN_ERR_FS_PACKED_REVPROP_READ_FAILURE, NULL, + _("Failed to read revprop pack file for rev %ld"), rev); + + /* parse it. RESULT will be complete afterwards. */ + err = parse_packed_revprops(fs, result, pool, iterpool); + svn_pool_destroy(iterpool); + if (err) + return svn_error_createf(SVN_ERR_FS_CORRUPT, err, + _("Revprop pack file for rev %ld is corrupt"), rev); + + *revprops = result; + + return SVN_NO_ERROR; +} + +/* Read the revprops for revision REV in FS and return them in *PROPERTIES_P. + * + * Allocations will be done in POOL. + */ +static svn_error_t * +get_revision_proplist(apr_hash_t **proplist_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_int64_t generation = 0; + + /* not found, yet */ + *proplist_p = NULL; + + /* should they be available at all? */ + SVN_ERR(ensure_revision_exists(fs, rev, pool)); + + /* Try cache lookup first. */ + if (has_revprop_cache(fs, pool)) + { + svn_boolean_t is_cached; + pair_cache_key_t key = { 0 }; + + SVN_ERR(read_revprop_generation(&generation, fs, pool)); + + key.revision = rev; + key.second = generation; + SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached, + ffd->revprop_cache, &key, pool)); + if (is_cached) + return SVN_NO_ERROR; + } + + /* if REV had not been packed when we began, try reading it from the + * non-packed shard. If that fails, we will fall through to packed + * shard reads. */ + if (!is_packed_revprop(fs, rev)) + { + svn_error_t *err = read_non_packed_revprop(proplist_p, fs, rev, + generation, pool); + if (err) + { + if (!APR_STATUS_IS_ENOENT(err->apr_err) + || ffd->format < SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT) + return svn_error_trace(err); + + svn_error_clear(err); + *proplist_p = NULL; /* in case read_non_packed_revprop changed it */ + } + } + + /* if revprop packing is available and we have not read the revprops, yet, + * try reading them from a packed shard. If that fails, REV is most + * likely invalid (or its revprops highly contested). */ + if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT && !*proplist_p) + { + packed_revprops_t *packed_revprops; + SVN_ERR(read_pack_revprop(&packed_revprops, fs, rev, generation, pool)); + *proplist_p = packed_revprops->properties; + } + + /* The revprops should have been there. Did we get them? */ + if (!*proplist_p) + return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("Could not read revprops for revision %ld"), + rev); + + return SVN_NO_ERROR; +} + +/* Serialize the revision property list PROPLIST of revision REV in + * filesystem FS to a non-packed file. Return the name of that temporary + * file in *TMP_PATH and the file path that it must be moved to in + * *FINAL_PATH. + * + * Use POOL for allocations. + */ +static svn_error_t * +write_non_packed_revprop(const char **final_path, + const char **tmp_path, + svn_fs_t *fs, + svn_revnum_t rev, + apr_hash_t *proplist, + apr_pool_t *pool) +{ + svn_stream_t *stream; + *final_path = path_revprops(fs, rev, pool); + + /* ### do we have a directory sitting around already? we really shouldn't + ### have to get the dirname here. */ + SVN_ERR(svn_stream_open_unique(&stream, tmp_path, + svn_dirent_dirname(*final_path, pool), + svn_io_file_del_none, pool, pool)); + SVN_ERR(svn_hash_write2(proplist, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + + return SVN_NO_ERROR; +} + +/* After writing the new revprop file(s), call this function to move the + * file at TMP_PATH to FINAL_PATH and give it the permissions from + * PERMS_REFERENCE. + * + * If indicated in BUMP_GENERATION, increase FS' revprop generation. + * Finally, delete all the temporary files given in FILES_TO_DELETE. + * The latter may be NULL. + * + * Use POOL for temporary allocations. + */ +static svn_error_t * +switch_to_new_revprop(svn_fs_t *fs, + const char *final_path, + const char *tmp_path, + const char *perms_reference, + apr_array_header_t *files_to_delete, + svn_boolean_t bump_generation, + apr_pool_t *pool) +{ + /* Now, we may actually be replacing revprops. Make sure that all other + threads and processes will know about this. */ + if (bump_generation) + SVN_ERR(begin_revprop_change(fs, pool)); + + SVN_ERR(move_into_place(tmp_path, final_path, perms_reference, pool)); + + /* Indicate that the update (if relevant) has been completed. */ + if (bump_generation) + SVN_ERR(end_revprop_change(fs, pool)); + + /* Clean up temporary files, if necessary. */ + if (files_to_delete) + { + apr_pool_t *iterpool = svn_pool_create(pool); + int i; + + for (i = 0; i < files_to_delete->nelts; ++i) + { + const char *path = APR_ARRAY_IDX(files_to_delete, i, const char*); + SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool)); + svn_pool_clear(iterpool); + } + + svn_pool_destroy(iterpool); + } + return SVN_NO_ERROR; +} + +/* Write a pack file header to STREAM that starts at revision START_REVISION + * and contains the indexes [START,END) of SIZES. + */ +static svn_error_t * +serialize_revprops_header(svn_stream_t *stream, + svn_revnum_t start_revision, + apr_array_header_t *sizes, + int start, + int end, + apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); + int i; + + SVN_ERR_ASSERT(start < end); + + /* start revision and entry count */ + SVN_ERR(svn_stream_printf(stream, pool, "%ld\n", start_revision)); + SVN_ERR(svn_stream_printf(stream, pool, "%d\n", end - start)); + + /* the sizes array */ + for (i = start; i < end; ++i) + { + apr_off_t size = APR_ARRAY_IDX(sizes, i, apr_off_t); + SVN_ERR(svn_stream_printf(stream, iterpool, "%" APR_OFF_T_FMT "\n", + size)); + } + + /* the double newline char indicates the end of the header */ + SVN_ERR(svn_stream_printf(stream, iterpool, "\n")); + + svn_pool_clear(iterpool); + return SVN_NO_ERROR; +} + +/* Writes the a pack file to FILE_STREAM. It copies the serialized data + * from REVPROPS for the indexes [START,END) except for index CHANGED_INDEX. + * + * The data for the latter is taken from NEW_SERIALIZED. Note, that + * CHANGED_INDEX may be outside the [START,END) range, i.e. no new data is + * taken in that case but only a subset of the old data will be copied. + * + * NEW_TOTAL_SIZE is a hint for pre-allocating buffers of appropriate size. + * POOL is used for temporary allocations. + */ +static svn_error_t * +repack_revprops(svn_fs_t *fs, + packed_revprops_t *revprops, + int start, + int end, + int changed_index, + svn_stringbuf_t *new_serialized, + apr_off_t new_total_size, + svn_stream_t *file_stream, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_stream_t *stream; + int i; + + /* create data empty buffers and the stream object */ + svn_stringbuf_t *uncompressed + = svn_stringbuf_create_ensure((apr_size_t)new_total_size, pool); + svn_stringbuf_t *compressed + = svn_stringbuf_create_empty(pool); + stream = svn_stream_from_stringbuf(uncompressed, pool); + + /* write the header*/ + SVN_ERR(serialize_revprops_header(stream, revprops->start_revision + start, + revprops->sizes, start, end, pool)); + + /* append the serialized revprops */ + for (i = start; i < end; ++i) + if (i == changed_index) + { + SVN_ERR(svn_stream_write(stream, + new_serialized->data, + &new_serialized->len)); + } + else + { + apr_size_t size + = (apr_size_t)APR_ARRAY_IDX(revprops->sizes, i, apr_off_t); + apr_size_t offset + = (apr_size_t)APR_ARRAY_IDX(revprops->offsets, i, apr_off_t); + + SVN_ERR(svn_stream_write(stream, + revprops->packed_revprops->data + offset, + &size)); + } + + /* flush the stream buffer (if any) to our underlying data buffer */ + SVN_ERR(svn_stream_close(stream)); + + /* compress / store the data */ + SVN_ERR(svn__compress(svn_stringbuf__morph_into_string(uncompressed), + compressed, + ffd->compress_packed_revprops + ? SVN_DELTA_COMPRESSION_LEVEL_DEFAULT + : SVN_DELTA_COMPRESSION_LEVEL_NONE)); + + /* finally, write the content to the target stream and close it */ + SVN_ERR(svn_stream_write(file_stream, compressed->data, &compressed->len)); + SVN_ERR(svn_stream_close(file_stream)); + + return SVN_NO_ERROR; +} + +/* Allocate a new pack file name for the revisions at index [START,END) + * of REVPROPS->MANIFEST. Add the name of old file to FILES_TO_DELETE, + * auto-create that array if necessary. Return an open file stream to + * the new file in *STREAM allocated in POOL. + */ +static svn_error_t * +repack_stream_open(svn_stream_t **stream, + svn_fs_t *fs, + packed_revprops_t *revprops, + int start, + int end, + apr_array_header_t **files_to_delete, + apr_pool_t *pool) +{ + apr_int64_t tag; + const char *tag_string; + svn_string_t *new_filename; + int i; + apr_file_t *file; + + /* get the old (= current) file name and enlist it for later deletion */ + const char *old_filename + = APR_ARRAY_IDX(revprops->manifest, start, const char*); + + if (*files_to_delete == NULL) + *files_to_delete = apr_array_make(pool, 3, sizeof(const char*)); + + APR_ARRAY_PUSH(*files_to_delete, const char*) + = svn_dirent_join(revprops->folder, old_filename, pool); + + /* increase the tag part, i.e. the counter after the dot */ + tag_string = strchr(old_filename, '.'); + if (tag_string == NULL) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Packed file '%s' misses a tag"), + old_filename); + + SVN_ERR(svn_cstring_atoi64(&tag, tag_string + 1)); + new_filename = svn_string_createf(pool, "%ld.%" APR_INT64_T_FMT, + revprops->start_revision + start, + ++tag); + + /* update the manifest to point to the new file */ + for (i = start; i < end; ++i) + APR_ARRAY_IDX(revprops->manifest, i, const char*) = new_filename->data; + + /* create a file stream for the new file */ + SVN_ERR(svn_io_file_open(&file, svn_dirent_join(revprops->folder, + new_filename->data, + pool), + APR_WRITE | APR_CREATE, APR_OS_DEFAULT, pool)); + *stream = svn_stream_from_aprfile2(file, FALSE, pool); + + return SVN_NO_ERROR; +} + +/* For revision REV in filesystem FS, set the revision properties to + * PROPLIST. Return a new file in *TMP_PATH that the caller shall move + * to *FINAL_PATH to make the change visible. Files to be deleted will + * be listed in *FILES_TO_DELETE which may remain unchanged / unallocated. + * Use POOL for allocations. + */ +static svn_error_t * +write_packed_revprop(const char **final_path, + const char **tmp_path, + apr_array_header_t **files_to_delete, + svn_fs_t *fs, + svn_revnum_t rev, + apr_hash_t *proplist, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + packed_revprops_t *revprops; + apr_int64_t generation = 0; + svn_stream_t *stream; + svn_stringbuf_t *serialized; + apr_off_t new_total_size; + int changed_index; + + /* read the current revprop generation. This value will not change + * while we hold the global write lock to this FS. */ + if (has_revprop_cache(fs, pool)) + SVN_ERR(read_revprop_generation(&generation, fs, pool)); + + /* read contents of the current pack file */ + SVN_ERR(read_pack_revprop(&revprops, fs, rev, generation, pool)); + + /* serialize the new revprops */ + serialized = svn_stringbuf_create_empty(pool); + stream = svn_stream_from_stringbuf(serialized, pool); + SVN_ERR(svn_hash_write2(proplist, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + + /* calculate the size of the new data */ + changed_index = (int)(rev - revprops->start_revision); + new_total_size = revprops->total_size - revprops->serialized_size + + serialized->len + + (revprops->offsets->nelts + 2) * SVN_INT64_BUFFER_SIZE; + + APR_ARRAY_IDX(revprops->sizes, changed_index, apr_off_t) = serialized->len; + + /* can we put the new data into the same pack as the before? */ + if ( new_total_size < ffd->revprop_pack_size + || revprops->sizes->nelts == 1) + { + /* simply replace the old pack file with new content as we do it + * in the non-packed case */ + + *final_path = svn_dirent_join(revprops->folder, revprops->filename, + pool); + SVN_ERR(svn_stream_open_unique(&stream, tmp_path, revprops->folder, + svn_io_file_del_none, pool, pool)); + SVN_ERR(repack_revprops(fs, revprops, 0, revprops->sizes->nelts, + changed_index, serialized, new_total_size, + stream, pool)); + } + else + { + /* split the pack file into two of roughly equal size */ + int right_count, left_count, i; + + int left = 0; + int right = revprops->sizes->nelts - 1; + apr_off_t left_size = 2 * SVN_INT64_BUFFER_SIZE; + apr_off_t right_size = 2 * SVN_INT64_BUFFER_SIZE; + + /* let left and right side grow such that their size difference + * is minimal after each step. */ + while (left <= right) + if ( left_size + APR_ARRAY_IDX(revprops->sizes, left, apr_off_t) + < right_size + APR_ARRAY_IDX(revprops->sizes, right, apr_off_t)) + { + left_size += APR_ARRAY_IDX(revprops->sizes, left, apr_off_t) + + SVN_INT64_BUFFER_SIZE; + ++left; + } + else + { + right_size += APR_ARRAY_IDX(revprops->sizes, right, apr_off_t) + + SVN_INT64_BUFFER_SIZE; + --right; + } + + /* since the items need much less than SVN_INT64_BUFFER_SIZE + * bytes to represent their length, the split may not be optimal */ + left_count = left; + right_count = revprops->sizes->nelts - left; + + /* if new_size is large, one side may exceed the pack size limit. + * In that case, split before and after the modified revprop.*/ + if ( left_size > ffd->revprop_pack_size + || right_size > ffd->revprop_pack_size) + { + left_count = changed_index; + right_count = revprops->sizes->nelts - left_count - 1; + } + + /* write the new, split files */ + if (left_count) + { + SVN_ERR(repack_stream_open(&stream, fs, revprops, 0, + left_count, files_to_delete, pool)); + SVN_ERR(repack_revprops(fs, revprops, 0, left_count, + changed_index, serialized, new_total_size, + stream, pool)); + } + + if (left_count + right_count < revprops->sizes->nelts) + { + SVN_ERR(repack_stream_open(&stream, fs, revprops, changed_index, + changed_index + 1, files_to_delete, + pool)); + SVN_ERR(repack_revprops(fs, revprops, changed_index, + changed_index + 1, + changed_index, serialized, new_total_size, + stream, pool)); + } + + if (right_count) + { + SVN_ERR(repack_stream_open(&stream, fs, revprops, + revprops->sizes->nelts - right_count, + revprops->sizes->nelts, + files_to_delete, pool)); + SVN_ERR(repack_revprops(fs, revprops, + revprops->sizes->nelts - right_count, + revprops->sizes->nelts, changed_index, + serialized, new_total_size, stream, + pool)); + } + + /* write the new manifest */ + *final_path = svn_dirent_join(revprops->folder, PATH_MANIFEST, pool); + SVN_ERR(svn_stream_open_unique(&stream, tmp_path, revprops->folder, + svn_io_file_del_none, pool, pool)); + + for (i = 0; i < revprops->manifest->nelts; ++i) + { + const char *filename = APR_ARRAY_IDX(revprops->manifest, i, + const char*); + SVN_ERR(svn_stream_printf(stream, pool, "%s\n", filename)); + } + + SVN_ERR(svn_stream_close(stream)); + } + + return SVN_NO_ERROR; +} + +/* Set the revision property list of revision REV in filesystem FS to + PROPLIST. Use POOL for temporary allocations. */ +static svn_error_t * +set_revision_proplist(svn_fs_t *fs, + svn_revnum_t rev, + apr_hash_t *proplist, + apr_pool_t *pool) +{ + svn_boolean_t is_packed; + svn_boolean_t bump_generation = FALSE; + const char *final_path; + const char *tmp_path; + const char *perms_reference; + apr_array_header_t *files_to_delete = NULL; + + SVN_ERR(ensure_revision_exists(fs, rev, pool)); + + /* this info will not change while we hold the global FS write lock */ + is_packed = is_packed_revprop(fs, rev); + + /* Test whether revprops already exist for this revision. + * Only then will we need to bump the revprop generation. */ + if (has_revprop_cache(fs, pool)) + { + if (is_packed) + { + bump_generation = TRUE; + } + else + { + svn_node_kind_t kind; + SVN_ERR(svn_io_check_path(path_revprops(fs, rev, pool), &kind, + pool)); + bump_generation = kind != svn_node_none; + } + } + + /* Serialize the new revprop data */ + if (is_packed) + SVN_ERR(write_packed_revprop(&final_path, &tmp_path, &files_to_delete, + fs, rev, proplist, pool)); + else + SVN_ERR(write_non_packed_revprop(&final_path, &tmp_path, + fs, rev, proplist, pool)); + + /* We use the rev file of this revision as the perms reference, + * because when setting revprops for the first time, the revprop + * file won't exist and therefore can't serve as its own reference. + * (Whereas the rev file should already exist at this point.) + */ + SVN_ERR(svn_fs_fs__path_rev_absolute(&perms_reference, fs, rev, pool)); + + /* Now, switch to the new revprop data. */ + SVN_ERR(switch_to_new_revprop(fs, final_path, tmp_path, perms_reference, + files_to_delete, bump_generation, pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__revision_proplist(apr_hash_t **proplist_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + SVN_ERR(get_revision_proplist(proplist_p, fs, rev, pool)); + + return SVN_NO_ERROR; +} + +/* Represents where in the current svndiff data block each + representation is. */ +struct rep_state +{ + apr_file_t *file; + /* The txdelta window cache to use or NULL. */ + svn_cache__t *window_cache; + /* Caches un-deltified windows. May be NULL. */ + svn_cache__t *combined_cache; + apr_off_t start; /* The starting offset for the raw + svndiff/plaintext data minus header. */ + apr_off_t off; /* The current offset into the file. */ + apr_off_t end; /* The end offset of the raw data. */ + int ver; /* If a delta, what svndiff version? */ + int chunk_index; +}; + +/* See create_rep_state, which wraps this and adds another error. */ +static svn_error_t * +create_rep_state_body(struct rep_state **rep_state, + struct rep_args **rep_args, + apr_file_t **file_hint, + svn_revnum_t *rev_hint, + representation_t *rep, + svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + struct rep_state *rs = apr_pcalloc(pool, sizeof(*rs)); + struct rep_args *ra; + unsigned char buf[4]; + + /* If the hint is + * - given, + * - refers to a valid revision, + * - refers to a packed revision, + * - as does the rep we want to read, and + * - refers to the same pack file as the rep + * ... + */ + if ( file_hint && rev_hint && *file_hint + && SVN_IS_VALID_REVNUM(*rev_hint) + && *rev_hint < ffd->min_unpacked_rev + && rep->revision < ffd->min_unpacked_rev + && ( (*rev_hint / ffd->max_files_per_dir) + == (rep->revision / ffd->max_files_per_dir))) + { + /* ... we can re-use the same, already open file object + */ + apr_off_t offset; + SVN_ERR(get_packed_offset(&offset, fs, rep->revision, pool)); + + offset += rep->offset; + SVN_ERR(svn_io_file_seek(*file_hint, APR_SET, &offset, pool)); + + rs->file = *file_hint; + } + else + { + /* otherwise, create a new file object + */ + SVN_ERR(open_and_seek_representation(&rs->file, fs, rep, pool)); + } + + /* remember the current file, if suggested by the caller */ + if (file_hint) + *file_hint = rs->file; + if (rev_hint) + *rev_hint = rep->revision; + + /* continue constructing RS and RA */ + rs->window_cache = ffd->txdelta_window_cache; + rs->combined_cache = ffd->combined_window_cache; + + SVN_ERR(read_rep_line(&ra, rs->file, pool)); + SVN_ERR(get_file_offset(&rs->start, rs->file, pool)); + rs->off = rs->start; + rs->end = rs->start + rep->size; + *rep_state = rs; + *rep_args = ra; + + if (!ra->is_delta) + /* This is a plaintext, so just return the current rep_state. */ + return SVN_NO_ERROR; + + /* We are dealing with a delta, find out what version. */ + SVN_ERR(svn_io_file_read_full2(rs->file, buf, sizeof(buf), + NULL, NULL, pool)); + /* ### Layering violation */ + if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N'))) + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Malformed svndiff data in representation")); + rs->ver = buf[3]; + rs->chunk_index = 0; + rs->off += 4; + + return SVN_NO_ERROR; +} + +/* Read the rep args for REP in filesystem FS and create a rep_state + for reading the representation. Return the rep_state in *REP_STATE + and the rep args in *REP_ARGS, both allocated in POOL. + + When reading multiple reps, i.e. a skip delta chain, you may provide + non-NULL FILE_HINT and REV_HINT. (If FILE_HINT is not NULL, in the first + call it should be a pointer to NULL.) The function will use these variables + to store the previous call results and tries to re-use them. This may + result in significant savings in I/O for packed files. + */ +static svn_error_t * +create_rep_state(struct rep_state **rep_state, + struct rep_args **rep_args, + apr_file_t **file_hint, + svn_revnum_t *rev_hint, + representation_t *rep, + svn_fs_t *fs, + apr_pool_t *pool) +{ + svn_error_t *err = create_rep_state_body(rep_state, rep_args, + file_hint, rev_hint, + rep, fs, pool); + if (err && err->apr_err == SVN_ERR_FS_CORRUPT) + { + fs_fs_data_t *ffd = fs->fsap_data; + + /* ### This always returns "-1" for transaction reps, because + ### this particular bit of code doesn't know if the rep is + ### stored in the protorev or in the mutable area (for props + ### or dir contents). It is pretty rare for FSFS to *read* + ### from the protorev file, though, so this is probably OK. + ### And anyone going to debug corruption errors is probably + ### going to jump straight to this comment anyway! */ + return svn_error_createf(SVN_ERR_FS_CORRUPT, err, + "Corrupt representation '%s'", + rep + ? representation_string(rep, ffd->format, TRUE, + TRUE, pool) + : "(null)"); + } + /* ### Call representation_string() ? */ + return svn_error_trace(err); +} + +struct rep_read_baton +{ + /* The FS from which we're reading. */ + svn_fs_t *fs; + + /* If not NULL, this is the base for the first delta window in rs_list */ + svn_stringbuf_t *base_window; + + /* The state of all prior delta representations. */ + apr_array_header_t *rs_list; + + /* The plaintext state, if there is a plaintext. */ + struct rep_state *src_state; + + /* The index of the current delta chunk, if we are reading a delta. */ + int chunk_index; + + /* The buffer where we store undeltified data. */ + char *buf; + apr_size_t buf_pos; + apr_size_t buf_len; + + /* A checksum context for summing the data read in order to verify it. + Note: we don't need to use the sha1 checksum because we're only doing + data verification, for which md5 is perfectly safe. */ + svn_checksum_ctx_t *md5_checksum_ctx; + + svn_boolean_t checksum_finalized; + + /* The stored checksum of the representation we are reading, its + length, and the amount we've read so far. Some of this + information is redundant with rs_list and src_state, but it's + convenient for the checksumming code to have it here. */ + svn_checksum_t *md5_checksum; + + svn_filesize_t len; + svn_filesize_t off; + + /* The key for the fulltext cache for this rep, if there is a + fulltext cache. */ + pair_cache_key_t fulltext_cache_key; + /* The text we've been reading, if we're going to cache it. */ + svn_stringbuf_t *current_fulltext; + + /* Used for temporary allocations during the read. */ + apr_pool_t *pool; + + /* Pool used to store file handles and other data that is persistant + for the entire stream read. */ + apr_pool_t *filehandle_pool; +}; + +/* Combine the name of the rev file in RS with the given OFFSET to form + * a cache lookup key. Allocations will be made from POOL. May return + * NULL if the key cannot be constructed. */ +static const char* +get_window_key(struct rep_state *rs, apr_off_t offset, apr_pool_t *pool) +{ + const char *name; + const char *last_part; + const char *name_last; + + /* the rev file name containing the txdelta window. + * If this fails we are in serious trouble anyways. + * And if nobody else detects the problems, the file content checksum + * comparison _will_ find them. + */ + if (apr_file_name_get(&name, rs->file)) + return NULL; + + /* Handle packed files as well by scanning backwards until we find the + * revision or pack number. */ + name_last = name + strlen(name) - 1; + while (! svn_ctype_isdigit(*name_last)) + --name_last; + + last_part = name_last; + while (svn_ctype_isdigit(*last_part)) + --last_part; + + /* We must differentiate between packed files (as of today, the number + * is being followed by a dot) and non-packed files (followed by \0). + * Otherwise, there might be overlaps in the numbering range if the + * repo gets packed after caching the txdeltas of non-packed revs. + * => add the first non-digit char to the packed number. */ + if (name_last[1] != '\0') + ++name_last; + + /* copy one char MORE than the actual number to mark packed files, + * i.e. packed revision file content uses different key space then + * non-packed ones: keys for packed rev file content ends with a dot + * for non-packed rev files they end with a digit. */ + name = apr_pstrndup(pool, last_part + 1, name_last - last_part); + return svn_fs_fs__combine_number_and_string(offset, name, pool); +} + +/* Read the WINDOW_P for the rep state RS from the current FSFS session's + * cache. This will be a no-op and IS_CACHED will be set to FALSE if no + * cache has been given. If a cache is available IS_CACHED will inform + * the caller about the success of the lookup. Allocations (of the window + * in particualar) will be made from POOL. + * + * If the information could be found, put RS and the position within the + * rev file into the same state as if the data had just been read from it. + */ +static svn_error_t * +get_cached_window(svn_txdelta_window_t **window_p, + struct rep_state *rs, + svn_boolean_t *is_cached, + apr_pool_t *pool) +{ + if (! rs->window_cache) + { + /* txdelta window has not been enabled */ + *is_cached = FALSE; + } + else + { + /* ask the cache for the desired txdelta window */ + svn_fs_fs__txdelta_cached_window_t *cached_window; + SVN_ERR(svn_cache__get((void **) &cached_window, + is_cached, + rs->window_cache, + get_window_key(rs, rs->off, pool), + pool)); + + if (*is_cached) + { + /* found it. Pass it back to the caller. */ + *window_p = cached_window->window; + + /* manipulate the RS as if we just read the data */ + rs->chunk_index++; + rs->off = cached_window->end_offset; + + /* manipulate the rev file as if we just read from it */ + SVN_ERR(svn_io_file_seek(rs->file, APR_SET, &rs->off, pool)); + } + } + + return SVN_NO_ERROR; +} + +/* Store the WINDOW read at OFFSET for the rep state RS in the current + * FSFS session's cache. This will be a no-op if no cache has been given. + * Temporary allocations will be made from SCRATCH_POOL. */ +static svn_error_t * +set_cached_window(svn_txdelta_window_t *window, + struct rep_state *rs, + apr_off_t offset, + apr_pool_t *scratch_pool) +{ + if (rs->window_cache) + { + /* store the window and the first offset _past_ it */ + svn_fs_fs__txdelta_cached_window_t cached_window; + + cached_window.window = window; + cached_window.end_offset = rs->off; + + /* but key it with the start offset because that is the known state + * when we will look it up */ + return svn_cache__set(rs->window_cache, + get_window_key(rs, offset, scratch_pool), + &cached_window, + scratch_pool); + } + + return SVN_NO_ERROR; +} + +/* Read the WINDOW_P for the rep state RS from the current FSFS session's + * cache. This will be a no-op and IS_CACHED will be set to FALSE if no + * cache has been given. If a cache is available IS_CACHED will inform + * the caller about the success of the lookup. Allocations (of the window + * in particualar) will be made from POOL. + */ +static svn_error_t * +get_cached_combined_window(svn_stringbuf_t **window_p, + struct rep_state *rs, + svn_boolean_t *is_cached, + apr_pool_t *pool) +{ + if (! rs->combined_cache) + { + /* txdelta window has not been enabled */ + *is_cached = FALSE; + } + else + { + /* ask the cache for the desired txdelta window */ + return svn_cache__get((void **)window_p, + is_cached, + rs->combined_cache, + get_window_key(rs, rs->start, pool), + pool); + } + + return SVN_NO_ERROR; +} + +/* Store the WINDOW read at OFFSET for the rep state RS in the current + * FSFS session's cache. This will be a no-op if no cache has been given. + * Temporary allocations will be made from SCRATCH_POOL. */ +static svn_error_t * +set_cached_combined_window(svn_stringbuf_t *window, + struct rep_state *rs, + apr_off_t offset, + apr_pool_t *scratch_pool) +{ + if (rs->combined_cache) + { + /* but key it with the start offset because that is the known state + * when we will look it up */ + return svn_cache__set(rs->combined_cache, + get_window_key(rs, offset, scratch_pool), + window, + scratch_pool); + } + + return SVN_NO_ERROR; +} + +/* Build an array of rep_state structures in *LIST giving the delta + reps from first_rep to a plain-text or self-compressed rep. Set + *SRC_STATE to the plain-text rep we find at the end of the chain, + or to NULL if the final delta representation is self-compressed. + The representation to start from is designated by filesystem FS, id + ID, and representation REP. + Also, set *WINDOW_P to the base window content for *LIST, if it + could be found in cache. Otherwise, *LIST will contain the base + representation for the whole delta chain. + Finally, return the expanded size of the representation in + *EXPANDED_SIZE. It will take care of cases where only the on-disk + size is known. */ +static svn_error_t * +build_rep_list(apr_array_header_t **list, + svn_stringbuf_t **window_p, + struct rep_state **src_state, + svn_filesize_t *expanded_size, + svn_fs_t *fs, + representation_t *first_rep, + apr_pool_t *pool) +{ + representation_t rep; + struct rep_state *rs = NULL; + struct rep_args *rep_args; + svn_boolean_t is_cached = FALSE; + apr_file_t *last_file = NULL; + svn_revnum_t last_revision; + + *list = apr_array_make(pool, 1, sizeof(struct rep_state *)); + rep = *first_rep; + + /* The value as stored in the data struct. + 0 is either for unknown length or actually zero length. */ + *expanded_size = first_rep->expanded_size; + + /* for the top-level rep, we need the rep_args */ + SVN_ERR(create_rep_state(&rs, &rep_args, &last_file, + &last_revision, &rep, fs, pool)); + + /* Unknown size or empty representation? + That implies the this being the first iteration. + Usually size equals on-disk size, except for empty, + compressed representations (delta, size = 4). + Please note that for all non-empty deltas have + a 4-byte header _plus_ some data. */ + if (*expanded_size == 0) + if (! rep_args->is_delta || first_rep->size != 4) + *expanded_size = first_rep->size; + + while (1) + { + /* fetch state, if that has not been done already */ + if (!rs) + SVN_ERR(create_rep_state(&rs, &rep_args, &last_file, + &last_revision, &rep, fs, pool)); + + SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool)); + if (is_cached) + { + /* We already have a reconstructed window in our cache. + Write a pseudo rep_state with the full length. */ + rs->off = rs->start; + rs->end = rs->start + (*window_p)->len; + *src_state = rs; + return SVN_NO_ERROR; + } + + if (!rep_args->is_delta) + { + /* This is a plaintext, so just return the current rep_state. */ + *src_state = rs; + return SVN_NO_ERROR; + } + + /* Push this rep onto the list. If it's self-compressed, we're done. */ + APR_ARRAY_PUSH(*list, struct rep_state *) = rs; + if (rep_args->is_delta_vs_empty) + { + *src_state = NULL; + return SVN_NO_ERROR; + } + + rep.revision = rep_args->base_revision; + rep.offset = rep_args->base_offset; + rep.size = rep_args->base_length; + rep.txn_id = NULL; + + rs = NULL; + } +} + + +/* Create a rep_read_baton structure for node revision NODEREV in + filesystem FS and store it in *RB_P. If FULLTEXT_CACHE_KEY is not + NULL, it is the rep's key in the fulltext cache, and a stringbuf + must be allocated to store the text. Perform all allocations in + POOL. If rep is mutable, it must be for file contents. */ +static svn_error_t * +rep_read_get_baton(struct rep_read_baton **rb_p, + svn_fs_t *fs, + representation_t *rep, + pair_cache_key_t fulltext_cache_key, + apr_pool_t *pool) +{ + struct rep_read_baton *b; + + b = apr_pcalloc(pool, sizeof(*b)); + b->fs = fs; + b->base_window = NULL; + b->chunk_index = 0; + b->buf = NULL; + b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); + b->checksum_finalized = FALSE; + b->md5_checksum = svn_checksum_dup(rep->md5_checksum, pool); + b->len = rep->expanded_size; + b->off = 0; + b->fulltext_cache_key = fulltext_cache_key; + b->pool = svn_pool_create(pool); + b->filehandle_pool = svn_pool_create(pool); + + SVN_ERR(build_rep_list(&b->rs_list, &b->base_window, + &b->src_state, &b->len, fs, rep, + b->filehandle_pool)); + + if (SVN_IS_VALID_REVNUM(fulltext_cache_key.revision)) + b->current_fulltext = svn_stringbuf_create_ensure + ((apr_size_t)b->len, + b->filehandle_pool); + else + b->current_fulltext = NULL; + + /* Save our output baton. */ + *rb_p = b; + + return SVN_NO_ERROR; +} + +/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta + window into *NWIN. */ +static svn_error_t * +read_delta_window(svn_txdelta_window_t **nwin, int this_chunk, + struct rep_state *rs, apr_pool_t *pool) +{ + svn_stream_t *stream; + svn_boolean_t is_cached; + apr_off_t old_offset; + + SVN_ERR_ASSERT(rs->chunk_index <= this_chunk); + + /* RS->FILE may be shared between RS instances -> make sure we point + * to the right data. */ + SVN_ERR(svn_io_file_seek(rs->file, APR_SET, &rs->off, pool)); + + /* Skip windows to reach the current chunk if we aren't there yet. */ + while (rs->chunk_index < this_chunk) + { + SVN_ERR(svn_txdelta_skip_svndiff_window(rs->file, rs->ver, pool)); + rs->chunk_index++; + SVN_ERR(get_file_offset(&rs->off, rs->file, pool)); + if (rs->off >= rs->end) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Reading one svndiff window read " + "beyond the end of the " + "representation")); + } + + /* Read the next window. But first, try to find it in the cache. */ + SVN_ERR(get_cached_window(nwin, rs, &is_cached, pool)); + if (is_cached) + return SVN_NO_ERROR; + + /* Actually read the next window. */ + old_offset = rs->off; + stream = svn_stream_from_aprfile2(rs->file, TRUE, pool); + SVN_ERR(svn_txdelta_read_svndiff_window(nwin, stream, rs->ver, pool)); + rs->chunk_index++; + SVN_ERR(get_file_offset(&rs->off, rs->file, pool)); + + if (rs->off > rs->end) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Reading one svndiff window read beyond " + "the end of the representation")); + + /* the window has not been cached before, thus cache it now + * (if caching is used for them at all) */ + return set_cached_window(*nwin, rs, old_offset, pool); +} + +/* Read SIZE bytes from the representation RS and return it in *NWIN. */ +static svn_error_t * +read_plain_window(svn_stringbuf_t **nwin, struct rep_state *rs, + apr_size_t size, apr_pool_t *pool) +{ + /* RS->FILE may be shared between RS instances -> make sure we point + * to the right data. */ + SVN_ERR(svn_io_file_seek(rs->file, APR_SET, &rs->off, pool)); + + /* Read the plain data. */ + *nwin = svn_stringbuf_create_ensure(size, pool); + SVN_ERR(svn_io_file_read_full2(rs->file, (*nwin)->data, size, NULL, NULL, + pool)); + (*nwin)->data[size] = 0; + + /* Update RS. */ + rs->off += (apr_off_t)size; + + return SVN_NO_ERROR; +} + +/* Get the undeltified window that is a result of combining all deltas + from the current desired representation identified in *RB with its + base representation. Store the window in *RESULT. */ +static svn_error_t * +get_combined_window(svn_stringbuf_t **result, + struct rep_read_baton *rb) +{ + apr_pool_t *pool, *new_pool, *window_pool; + int i; + svn_txdelta_window_t *window; + apr_array_header_t *windows; + svn_stringbuf_t *source, *buf = rb->base_window; + struct rep_state *rs; + + /* Read all windows that we need to combine. This is fine because + the size of each window is relatively small (100kB) and skip- + delta limits the number of deltas in a chain to well under 100. + Stop early if one of them does not depend on its predecessors. */ + window_pool = svn_pool_create(rb->pool); + windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *)); + for (i = 0; i < rb->rs_list->nelts; ++i) + { + rs = APR_ARRAY_IDX(rb->rs_list, i, struct rep_state *); + SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool)); + + APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window; + if (window->src_ops == 0) + { + ++i; + break; + } + } + + /* Combine in the windows from the other delta reps. */ + pool = svn_pool_create(rb->pool); + for (--i; i >= 0; --i) + { + + rs = APR_ARRAY_IDX(rb->rs_list, i, struct rep_state *); + window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *); + + /* Maybe, we've got a PLAIN start representation. If we do, read + as much data from it as the needed for the txdelta window's source + view. + Note that BUF / SOURCE may only be NULL in the first iteration. */ + source = buf; + if (source == NULL && rb->src_state != NULL) + SVN_ERR(read_plain_window(&source, rb->src_state, window->sview_len, + pool)); + + /* Combine this window with the current one. */ + new_pool = svn_pool_create(rb->pool); + buf = svn_stringbuf_create_ensure(window->tview_len, new_pool); + buf->len = window->tview_len; + + svn_txdelta_apply_instructions(window, source ? source->data : NULL, + buf->data, &buf->len); + if (buf->len != window->tview_len) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("svndiff window length is " + "corrupt")); + + /* Cache windows only if the whole rep content could be read as a + single chunk. Only then will no other chunk need a deeper RS + list than the cached chunk. */ + if ((rb->chunk_index == 0) && (rs->off == rs->end)) + SVN_ERR(set_cached_combined_window(buf, rs, rs->start, new_pool)); + + /* Cycle pools so that we only need to hold three windows at a time. */ + svn_pool_destroy(pool); + pool = new_pool; + } + + svn_pool_destroy(window_pool); + + *result = buf; + return SVN_NO_ERROR; +} + +/* Returns whether or not the expanded fulltext of the file is cachable + * based on its size SIZE. The decision depends on the cache used by RB. + */ +static svn_boolean_t +fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size) +{ + return (size < APR_SIZE_MAX) + && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size); +} + +/* Close method used on streams returned by read_representation(). + */ +static svn_error_t * +rep_read_contents_close(void *baton) +{ + struct rep_read_baton *rb = baton; + + svn_pool_destroy(rb->pool); + svn_pool_destroy(rb->filehandle_pool); + + return SVN_NO_ERROR; +} + +/* Return the next *LEN bytes of the rep and store them in *BUF. */ +static svn_error_t * +get_contents(struct rep_read_baton *rb, + char *buf, + apr_size_t *len) +{ + apr_size_t copy_len, remaining = *len; + char *cur = buf; + struct rep_state *rs; + + /* Special case for when there are no delta reps, only a plain + text. */ + if (rb->rs_list->nelts == 0) + { + copy_len = remaining; + rs = rb->src_state; + + if (rb->base_window != NULL) + { + /* We got the desired rep directly from the cache. + This is where we need the pseudo rep_state created + by build_rep_list(). */ + apr_size_t offset = (apr_size_t)(rs->off - rs->start); + if (copy_len + offset > rb->base_window->len) + copy_len = offset < rb->base_window->len + ? rb->base_window->len - offset + : 0ul; + + memcpy (cur, rb->base_window->data + offset, copy_len); + } + else + { + if (((apr_off_t) copy_len) > rs->end - rs->off) + copy_len = (apr_size_t) (rs->end - rs->off); + SVN_ERR(svn_io_file_read_full2(rs->file, cur, copy_len, NULL, + NULL, rb->pool)); + } + + rs->off += copy_len; + *len = copy_len; + return SVN_NO_ERROR; + } + + while (remaining > 0) + { + /* If we have buffered data from a previous chunk, use that. */ + if (rb->buf) + { + /* Determine how much to copy from the buffer. */ + copy_len = rb->buf_len - rb->buf_pos; + if (copy_len > remaining) + copy_len = remaining; + + /* Actually copy the data. */ + memcpy(cur, rb->buf + rb->buf_pos, copy_len); + rb->buf_pos += copy_len; + cur += copy_len; + remaining -= copy_len; + + /* If the buffer is all used up, clear it and empty the + local pool. */ + if (rb->buf_pos == rb->buf_len) + { + svn_pool_clear(rb->pool); + rb->buf = NULL; + } + } + else + { + svn_stringbuf_t *sbuf = NULL; + + rs = APR_ARRAY_IDX(rb->rs_list, 0, struct rep_state *); + if (rs->off == rs->end) + break; + + /* Get more buffered data by evaluating a chunk. */ + SVN_ERR(get_combined_window(&sbuf, rb)); + + rb->chunk_index++; + rb->buf_len = sbuf->len; + rb->buf = sbuf->data; + rb->buf_pos = 0; + } + } + + *len = cur - buf; + + return SVN_NO_ERROR; +} + +/* BATON is of type `rep_read_baton'; read the next *LEN bytes of the + representation and store them in *BUF. Sum as we read and verify + the MD5 sum at the end. */ +static svn_error_t * +rep_read_contents(void *baton, + char *buf, + apr_size_t *len) +{ + struct rep_read_baton *rb = baton; + + /* Get the next block of data. */ + SVN_ERR(get_contents(rb, buf, len)); + + if (rb->current_fulltext) + svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len); + + /* Perform checksumming. We want to check the checksum as soon as + the last byte of data is read, in case the caller never performs + a short read, but we don't want to finalize the MD5 context + twice. */ + if (!rb->checksum_finalized) + { + SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len)); + rb->off += *len; + if (rb->off == rb->len) + { + svn_checksum_t *md5_checksum; + + rb->checksum_finalized = TRUE; + SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx, + rb->pool)); + if (!svn_checksum_match(md5_checksum, rb->md5_checksum)) + return svn_error_create(SVN_ERR_FS_CORRUPT, + svn_checksum_mismatch_err(rb->md5_checksum, md5_checksum, + rb->pool, + _("Checksum mismatch while reading representation")), + NULL); + } + } + + if (rb->off == rb->len && rb->current_fulltext) + { + fs_fs_data_t *ffd = rb->fs->fsap_data; + SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key, + rb->current_fulltext, rb->pool)); + rb->current_fulltext = NULL; + } + + return SVN_NO_ERROR; +} + + +/* Return a stream in *CONTENTS_P that will read the contents of a + representation stored at the location given by REP. Appropriate + for any kind of immutable representation, but only for file + contents (not props or directory contents) in mutable + representations. + + If REP is NULL, the representation is assumed to be empty, and the + empty stream is returned. +*/ +static svn_error_t * +read_representation(svn_stream_t **contents_p, + svn_fs_t *fs, + representation_t *rep, + apr_pool_t *pool) +{ + if (! rep) + { + *contents_p = svn_stream_empty(pool); + } + else + { + fs_fs_data_t *ffd = fs->fsap_data; + pair_cache_key_t fulltext_cache_key = { 0 }; + svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size; + struct rep_read_baton *rb; + + fulltext_cache_key.revision = rep->revision; + fulltext_cache_key.second = rep->offset; + if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision) + && fulltext_size_is_cachable(ffd, len)) + { + svn_stringbuf_t *fulltext; + svn_boolean_t is_cached; + SVN_ERR(svn_cache__get((void **) &fulltext, &is_cached, + ffd->fulltext_cache, &fulltext_cache_key, + pool)); + if (is_cached) + { + *contents_p = svn_stream_from_stringbuf(fulltext, pool); + return SVN_NO_ERROR; + } + } + else + fulltext_cache_key.revision = SVN_INVALID_REVNUM; + + SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool)); + + *contents_p = svn_stream_create(rb, pool); + svn_stream_set_read(*contents_p, rep_read_contents); + svn_stream_set_close(*contents_p, rep_read_contents_close); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__get_contents(svn_stream_t **contents_p, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + return read_representation(contents_p, fs, noderev->data_rep, pool); +} + +/* Baton used when reading delta windows. */ +struct delta_read_baton +{ + struct rep_state *rs; + svn_checksum_t *checksum; +}; + +/* This implements the svn_txdelta_next_window_fn_t interface. */ +static svn_error_t * +delta_read_next_window(svn_txdelta_window_t **window, void *baton, + apr_pool_t *pool) +{ + struct delta_read_baton *drb = baton; + + if (drb->rs->off == drb->rs->end) + { + *window = NULL; + return SVN_NO_ERROR; + } + + return read_delta_window(window, drb->rs->chunk_index, drb->rs, pool); +} + +/* This implements the svn_txdelta_md5_digest_fn_t interface. */ +static const unsigned char * +delta_read_md5_digest(void *baton) +{ + struct delta_read_baton *drb = baton; + + if (drb->checksum->kind == svn_checksum_md5) + return drb->checksum->digest; + else + return NULL; +} + +svn_error_t * +svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p, + svn_fs_t *fs, + node_revision_t *source, + node_revision_t *target, + apr_pool_t *pool) +{ + svn_stream_t *source_stream, *target_stream; + + /* Try a shortcut: if the target is stored as a delta against the source, + then just use that delta. */ + if (source && source->data_rep && target->data_rep) + { + struct rep_state *rep_state; + struct rep_args *rep_args; + + /* Read target's base rep if any. */ + SVN_ERR(create_rep_state(&rep_state, &rep_args, NULL, NULL, + target->data_rep, fs, pool)); + /* If that matches source, then use this delta as is. */ + if (rep_args->is_delta + && (rep_args->is_delta_vs_empty + || (rep_args->base_revision == source->data_rep->revision + && rep_args->base_offset == source->data_rep->offset))) + { + /* Create the delta read baton. */ + struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb)); + drb->rs = rep_state; + drb->checksum = svn_checksum_dup(target->data_rep->md5_checksum, + pool); + *stream_p = svn_txdelta_stream_create(drb, delta_read_next_window, + delta_read_md5_digest, pool); + return SVN_NO_ERROR; + } + else + SVN_ERR(svn_io_file_close(rep_state->file, pool)); + } + + /* Read both fulltexts and construct a delta. */ + if (source) + SVN_ERR(read_representation(&source_stream, fs, source->data_rep, pool)); + else + source_stream = svn_stream_empty(pool); + SVN_ERR(read_representation(&target_stream, fs, target->data_rep, pool)); + + /* Because source and target stream will already verify their content, + * there is no need to do this once more. In particular if the stream + * content is being fetched from cache. */ + svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool); + + return SVN_NO_ERROR; +} + +/* Baton for cache_access_wrapper. Wraps the original parameters of + * svn_fs_fs__try_process_file_content(). + */ +typedef struct cache_access_wrapper_baton_t +{ + svn_fs_process_contents_func_t func; + void* baton; +} cache_access_wrapper_baton_t; + +/* Wrapper to translate between svn_fs_process_contents_func_t and + * svn_cache__partial_getter_func_t. + */ +static svn_error_t * +cache_access_wrapper(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *pool) +{ + cache_access_wrapper_baton_t *wrapper_baton = baton; + + SVN_ERR(wrapper_baton->func((const unsigned char *)data, + data_len - 1, /* cache adds terminating 0 */ + wrapper_baton->baton, + pool)); + + /* non-NULL value to signal the calling cache that all went well */ + *out = baton; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__try_process_file_contents(svn_boolean_t *success, + svn_fs_t *fs, + node_revision_t *noderev, + svn_fs_process_contents_func_t processor, + void* baton, + apr_pool_t *pool) +{ + representation_t *rep = noderev->data_rep; + if (rep) + { + fs_fs_data_t *ffd = fs->fsap_data; + pair_cache_key_t fulltext_cache_key = { 0 }; + + fulltext_cache_key.revision = rep->revision; + fulltext_cache_key.second = rep->offset; + if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision) + && fulltext_size_is_cachable(ffd, rep->expanded_size)) + { + cache_access_wrapper_baton_t wrapper_baton; + void *dummy = NULL; + + wrapper_baton.func = processor; + wrapper_baton.baton = baton; + return svn_cache__get_partial(&dummy, success, + ffd->fulltext_cache, + &fulltext_cache_key, + cache_access_wrapper, + &wrapper_baton, + pool); + } + } + + *success = FALSE; + return SVN_NO_ERROR; +} + +/* Fetch the contents of a directory into ENTRIES. Values are stored + as filename to string mappings; further conversion is necessary to + convert them into svn_fs_dirent_t values. */ +static svn_error_t * +get_dir_contents(apr_hash_t *entries, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + svn_stream_t *contents; + + if (noderev->data_rep && noderev->data_rep->txn_id) + { + const char *filename = path_txn_node_children(fs, noderev->id, pool); + + /* The representation is mutable. Read the old directory + contents from the mutable children file, followed by the + changes we've made in this transaction. */ + SVN_ERR(svn_stream_open_readonly(&contents, filename, pool, pool)); + SVN_ERR(svn_hash_read2(entries, contents, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_hash_read_incremental(entries, contents, NULL, pool)); + SVN_ERR(svn_stream_close(contents)); + } + else if (noderev->data_rep) + { + /* use a temporary pool for temp objects. + * Also undeltify content before parsing it. Otherwise, we could only + * parse it byte-by-byte. + */ + apr_pool_t *text_pool = svn_pool_create(pool); + apr_size_t len = noderev->data_rep->expanded_size + ? (apr_size_t)noderev->data_rep->expanded_size + : (apr_size_t)noderev->data_rep->size; + svn_stringbuf_t *text = svn_stringbuf_create_ensure(len, text_pool); + text->len = len; + + /* The representation is immutable. Read it normally. */ + SVN_ERR(read_representation(&contents, fs, noderev->data_rep, text_pool)); + SVN_ERR(svn_stream_read(contents, text->data, &text->len)); + SVN_ERR(svn_stream_close(contents)); + + /* de-serialize hash */ + contents = svn_stream_from_stringbuf(text, text_pool); + SVN_ERR(svn_hash_read2(entries, contents, SVN_HASH_TERMINATOR, pool)); + + svn_pool_destroy(text_pool); + } + + return SVN_NO_ERROR; +} + + +static const char * +unparse_dir_entry(svn_node_kind_t kind, const svn_fs_id_t *id, + apr_pool_t *pool) +{ + return apr_psprintf(pool, "%s %s", + (kind == svn_node_file) ? KIND_FILE : KIND_DIR, + svn_fs_fs__id_unparse(id, pool)->data); +} + +/* Given a hash ENTRIES of dirent structions, return a hash in + *STR_ENTRIES_P, that has svn_string_t as the values in the format + specified by the fs_fs directory contents file. Perform + allocations in POOL. */ +static svn_error_t * +unparse_dir_entries(apr_hash_t **str_entries_p, + apr_hash_t *entries, + apr_pool_t *pool) +{ + apr_hash_index_t *hi; + + /* For now, we use a our own hash function to ensure that we get a + * (largely) stable order when serializing the data. It also gives + * us some performance improvement. + * + * ### TODO ### + * Use some sorted or other fixed order data container. + */ + *str_entries_p = svn_hash__make(pool); + + for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi)) + { + const void *key; + apr_ssize_t klen; + svn_fs_dirent_t *dirent = svn__apr_hash_index_val(hi); + const char *new_val; + + apr_hash_this(hi, &key, &klen, NULL); + new_val = unparse_dir_entry(dirent->kind, dirent->id, pool); + apr_hash_set(*str_entries_p, key, klen, + svn_string_create(new_val, pool)); + } + + return SVN_NO_ERROR; +} + + +/* Given a hash STR_ENTRIES with values as svn_string_t as specified + in an FSFS directory contents listing, return a hash of dirents in + *ENTRIES_P. Perform allocations in POOL. */ +static svn_error_t * +parse_dir_entries(apr_hash_t **entries_p, + apr_hash_t *str_entries, + const char *unparsed_id, + apr_pool_t *pool) +{ + apr_hash_index_t *hi; + + *entries_p = apr_hash_make(pool); + + /* Translate the string dir entries into real entries. */ + for (hi = apr_hash_first(pool, str_entries); hi; hi = apr_hash_next(hi)) + { + const char *name = svn__apr_hash_index_key(hi); + svn_string_t *str_val = svn__apr_hash_index_val(hi); + char *str, *last_str; + svn_fs_dirent_t *dirent = apr_pcalloc(pool, sizeof(*dirent)); + + last_str = apr_pstrdup(pool, str_val->data); + dirent->name = apr_pstrdup(pool, name); + + str = svn_cstring_tokenize(" ", &last_str); + if (str == NULL) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt in '%s'"), + unparsed_id); + + if (strcmp(str, KIND_FILE) == 0) + { + dirent->kind = svn_node_file; + } + else if (strcmp(str, KIND_DIR) == 0) + { + dirent->kind = svn_node_dir; + } + else + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt in '%s'"), + unparsed_id); + } + + str = svn_cstring_tokenize(" ", &last_str); + if (str == NULL) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt in '%s'"), + unparsed_id); + + dirent->id = svn_fs_fs__id_parse(str, strlen(str), pool); + + svn_hash_sets(*entries_p, dirent->name, dirent); + } + + return SVN_NO_ERROR; +} + +/* Return the cache object in FS responsible to storing the directory + * the NODEREV. If none exists, return NULL. */ +static svn_cache__t * +locate_dir_cache(svn_fs_t *fs, + node_revision_t *noderev) +{ + fs_fs_data_t *ffd = fs->fsap_data; + return svn_fs_fs__id_txn_id(noderev->id) + ? ffd->txn_dir_cache + : ffd->dir_cache; +} + +svn_error_t * +svn_fs_fs__rep_contents_dir(apr_hash_t **entries_p, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + const char *unparsed_id = NULL; + apr_hash_t *unparsed_entries, *parsed_entries; + + /* find the cache we may use */ + svn_cache__t *cache = locate_dir_cache(fs, noderev); + if (cache) + { + svn_boolean_t found; + + unparsed_id = svn_fs_fs__id_unparse(noderev->id, pool)->data; + SVN_ERR(svn_cache__get((void **) entries_p, &found, cache, + unparsed_id, pool)); + if (found) + return SVN_NO_ERROR; + } + + /* Read in the directory hash. */ + unparsed_entries = apr_hash_make(pool); + SVN_ERR(get_dir_contents(unparsed_entries, fs, noderev, pool)); + SVN_ERR(parse_dir_entries(&parsed_entries, unparsed_entries, + unparsed_id, pool)); + + /* Update the cache, if we are to use one. */ + if (cache) + SVN_ERR(svn_cache__set(cache, unparsed_id, parsed_entries, pool)); + + *entries_p = parsed_entries; + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent, + svn_fs_t *fs, + node_revision_t *noderev, + const char *name, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_boolean_t found = FALSE; + + /* find the cache we may use */ + svn_cache__t *cache = locate_dir_cache(fs, noderev); + if (cache) + { + const char *unparsed_id = + svn_fs_fs__id_unparse(noderev->id, scratch_pool)->data; + + /* Cache lookup. */ + SVN_ERR(svn_cache__get_partial((void **)dirent, + &found, + cache, + unparsed_id, + svn_fs_fs__extract_dir_entry, + (void*)name, + result_pool)); + } + + /* fetch data from disk if we did not find it in the cache */ + if (! found) + { + apr_hash_t *entries; + svn_fs_dirent_t *entry; + svn_fs_dirent_t *entry_copy = NULL; + + /* read the dir from the file system. It will probably be put it + into the cache for faster lookup in future calls. */ + SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev, + scratch_pool)); + + /* find desired entry and return a copy in POOL, if found */ + entry = svn_hash_gets(entries, name); + if (entry != NULL) + { + entry_copy = apr_palloc(result_pool, sizeof(*entry_copy)); + entry_copy->name = apr_pstrdup(result_pool, entry->name); + entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool); + entry_copy->kind = entry->kind; + } + + *dirent = entry_copy; + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__get_proplist(apr_hash_t **proplist_p, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + apr_hash_t *proplist; + svn_stream_t *stream; + + if (noderev->prop_rep && noderev->prop_rep->txn_id) + { + const char *filename = path_txn_node_props(fs, noderev->id, pool); + proplist = apr_hash_make(pool); + + SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool)); + SVN_ERR(svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + } + else if (noderev->prop_rep) + { + fs_fs_data_t *ffd = fs->fsap_data; + representation_t *rep = noderev->prop_rep; + pair_cache_key_t key = { 0 }; + + key.revision = rep->revision; + key.second = rep->offset; + if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision)) + { + svn_boolean_t is_cached; + SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached, + ffd->properties_cache, &key, pool)); + if (is_cached) + return SVN_NO_ERROR; + } + + proplist = apr_hash_make(pool); + SVN_ERR(read_representation(&stream, fs, noderev->prop_rep, pool)); + SVN_ERR(svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + + if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision)) + SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool)); + } + else + { + /* return an empty prop list if the node doesn't have any props */ + proplist = apr_hash_make(pool); + } + + *proplist_p = proplist; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__file_length(svn_filesize_t *length, + node_revision_t *noderev, + apr_pool_t *pool) +{ + if (noderev->data_rep) + *length = noderev->data_rep->expanded_size; + else + *length = 0; + + return SVN_NO_ERROR; +} + +svn_boolean_t +svn_fs_fs__noderev_same_rep_key(representation_t *a, + representation_t *b) +{ + if (a == b) + return TRUE; + + if (a == NULL || b == NULL) + return FALSE; + + if (a->offset != b->offset) + return FALSE; + + if (a->revision != b->revision) + return FALSE; + + if (a->uniquifier == b->uniquifier) + return TRUE; + + if (a->uniquifier == NULL || b->uniquifier == NULL) + return FALSE; + + return strcmp(a->uniquifier, b->uniquifier) == 0; +} + +svn_error_t * +svn_fs_fs__file_checksum(svn_checksum_t **checksum, + node_revision_t *noderev, + svn_checksum_kind_t kind, + apr_pool_t *pool) +{ + if (noderev->data_rep) + { + switch(kind) + { + case svn_checksum_md5: + *checksum = svn_checksum_dup(noderev->data_rep->md5_checksum, + pool); + break; + case svn_checksum_sha1: + *checksum = svn_checksum_dup(noderev->data_rep->sha1_checksum, + pool); + break; + default: + *checksum = NULL; + } + } + else + *checksum = NULL; + + return SVN_NO_ERROR; +} + +representation_t * +svn_fs_fs__rep_copy(representation_t *rep, + apr_pool_t *pool) +{ + representation_t *rep_new; + + if (rep == NULL) + return NULL; + + rep_new = apr_pcalloc(pool, sizeof(*rep_new)); + + memcpy(rep_new, rep, sizeof(*rep_new)); + rep_new->md5_checksum = svn_checksum_dup(rep->md5_checksum, pool); + rep_new->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool); + rep_new->uniquifier = apr_pstrdup(pool, rep->uniquifier); + + return rep_new; +} + +/* Merge the internal-use-only CHANGE into a hash of public-FS + svn_fs_path_change2_t CHANGES, collapsing multiple changes into a + single summarical (is that real word?) change per path. Also keep + the COPYFROM_CACHE up to date with new adds and replaces. */ +static svn_error_t * +fold_change(apr_hash_t *changes, + const change_t *change, + apr_hash_t *copyfrom_cache) +{ + apr_pool_t *pool = apr_hash_pool_get(changes); + svn_fs_path_change2_t *old_change, *new_change; + const char *path; + apr_size_t path_len = strlen(change->path); + + if ((old_change = apr_hash_get(changes, change->path, path_len))) + { + /* This path already exists in the hash, so we have to merge + this change into the already existing one. */ + + /* Sanity check: only allow NULL node revision ID in the + `reset' case. */ + if ((! change->noderev_id) && (change->kind != svn_fs_path_change_reset)) + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Missing required node revision ID")); + + /* Sanity check: we should be talking about the same node + revision ID as our last change except where the last change + was a deletion. */ + if (change->noderev_id + && (! svn_fs_fs__id_eq(old_change->node_rev_id, change->noderev_id)) + && (old_change->change_kind != svn_fs_path_change_delete)) + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Invalid change ordering: new node revision ID " + "without delete")); + + /* Sanity check: an add, replacement, or reset must be the first + thing to follow a deletion. */ + if ((old_change->change_kind == svn_fs_path_change_delete) + && (! ((change->kind == svn_fs_path_change_replace) + || (change->kind == svn_fs_path_change_reset) + || (change->kind == svn_fs_path_change_add)))) + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Invalid change ordering: non-add change on deleted path")); + + /* Sanity check: an add can't follow anything except + a delete or reset. */ + if ((change->kind == svn_fs_path_change_add) + && (old_change->change_kind != svn_fs_path_change_delete) + && (old_change->change_kind != svn_fs_path_change_reset)) + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Invalid change ordering: add change on preexisting path")); + + /* Now, merge that change in. */ + switch (change->kind) + { + case svn_fs_path_change_reset: + /* A reset here will simply remove the path change from the + hash. */ + old_change = NULL; + break; + + case svn_fs_path_change_delete: + if (old_change->change_kind == svn_fs_path_change_add) + { + /* If the path was introduced in this transaction via an + add, and we are deleting it, just remove the path + altogether. */ + old_change = NULL; + } + else + { + /* A deletion overrules all previous changes. */ + old_change->change_kind = svn_fs_path_change_delete; + old_change->text_mod = change->text_mod; + old_change->prop_mod = change->prop_mod; + old_change->copyfrom_rev = SVN_INVALID_REVNUM; + old_change->copyfrom_path = NULL; + } + break; + + case svn_fs_path_change_add: + case svn_fs_path_change_replace: + /* An add at this point must be following a previous delete, + so treat it just like a replace. */ + old_change->change_kind = svn_fs_path_change_replace; + old_change->node_rev_id = svn_fs_fs__id_copy(change->noderev_id, + pool); + old_change->text_mod = change->text_mod; + old_change->prop_mod = change->prop_mod; + if (change->copyfrom_rev == SVN_INVALID_REVNUM) + { + old_change->copyfrom_rev = SVN_INVALID_REVNUM; + old_change->copyfrom_path = NULL; + } + else + { + old_change->copyfrom_rev = change->copyfrom_rev; + old_change->copyfrom_path = apr_pstrdup(pool, + change->copyfrom_path); + } + break; + + case svn_fs_path_change_modify: + default: + if (change->text_mod) + old_change->text_mod = TRUE; + if (change->prop_mod) + old_change->prop_mod = TRUE; + break; + } + + /* Point our new_change to our (possibly modified) old_change. */ + new_change = old_change; + } + else + { + /* This change is new to the hash, so make a new public change + structure from the internal one (in the hash's pool), and dup + the path into the hash's pool, too. */ + new_change = apr_pcalloc(pool, sizeof(*new_change)); + new_change->node_rev_id = svn_fs_fs__id_copy(change->noderev_id, pool); + new_change->change_kind = change->kind; + new_change->text_mod = change->text_mod; + new_change->prop_mod = change->prop_mod; + /* In FSFS, copyfrom_known is *always* true, since we've always + * stored copyfroms in changed paths lists. */ + new_change->copyfrom_known = TRUE; + if (change->copyfrom_rev != SVN_INVALID_REVNUM) + { + new_change->copyfrom_rev = change->copyfrom_rev; + new_change->copyfrom_path = apr_pstrdup(pool, change->copyfrom_path); + } + else + { + new_change->copyfrom_rev = SVN_INVALID_REVNUM; + new_change->copyfrom_path = NULL; + } + } + + if (new_change) + new_change->node_kind = change->node_kind; + + /* Add (or update) this path. + + Note: this key might already be present, and it would be nice to + re-use its value, but there is no way to fetch it. The API makes no + guarantees that this (new) key will not be retained. Thus, we (again) + copy the key into the target pool to ensure a proper lifetime. */ + path = apr_pstrmemdup(pool, change->path, path_len); + apr_hash_set(changes, path, path_len, new_change); + + /* Update the copyfrom cache, if any. */ + if (copyfrom_cache) + { + apr_pool_t *copyfrom_pool = apr_hash_pool_get(copyfrom_cache); + const char *copyfrom_string = NULL, *copyfrom_key = path; + if (new_change) + { + if (SVN_IS_VALID_REVNUM(new_change->copyfrom_rev)) + copyfrom_string = apr_psprintf(copyfrom_pool, "%ld %s", + new_change->copyfrom_rev, + new_change->copyfrom_path); + else + copyfrom_string = ""; + } + /* We need to allocate a copy of the key in the copyfrom_pool if + * we're not doing a deletion and if it isn't already there. */ + if ( copyfrom_string + && ( ! apr_hash_count(copyfrom_cache) + || ! apr_hash_get(copyfrom_cache, copyfrom_key, path_len))) + copyfrom_key = apr_pstrmemdup(copyfrom_pool, copyfrom_key, path_len); + + apr_hash_set(copyfrom_cache, copyfrom_key, path_len, + copyfrom_string); + } + + return SVN_NO_ERROR; +} + +/* The 256 is an arbitrary size large enough to hold the node id and the + * various flags. */ +#define MAX_CHANGE_LINE_LEN FSFS_MAX_PATH_LEN + 256 + +/* Read the next entry in the changes record from file FILE and store + the resulting change in *CHANGE_P. If there is no next record, + store NULL there. Perform all allocations from POOL. */ +static svn_error_t * +read_change(change_t **change_p, + apr_file_t *file, + apr_pool_t *pool) +{ + char buf[MAX_CHANGE_LINE_LEN]; + apr_size_t len = sizeof(buf); + change_t *change; + char *str, *last_str = buf, *kind_str; + svn_error_t *err; + + /* Default return value. */ + *change_p = NULL; + + err = svn_io_read_length_line(file, buf, &len, pool); + + /* Check for a blank line. */ + if (err || (len == 0)) + { + if (err && APR_STATUS_IS_EOF(err->apr_err)) + { + svn_error_clear(err); + return SVN_NO_ERROR; + } + if ((len == 0) && (! err)) + return SVN_NO_ERROR; + return svn_error_trace(err); + } + + change = apr_pcalloc(pool, sizeof(*change)); + + /* Get the node-id of the change. */ + str = svn_cstring_tokenize(" ", &last_str); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + + change->noderev_id = svn_fs_fs__id_parse(str, strlen(str), pool); + if (change->noderev_id == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + + /* Get the change type. */ + str = svn_cstring_tokenize(" ", &last_str); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + + /* Don't bother to check the format number before looking for + * node-kinds: just read them if you find them. */ + change->node_kind = svn_node_unknown; + kind_str = strchr(str, '-'); + if (kind_str) + { + /* Cap off the end of "str" (the action). */ + *kind_str = '\0'; + kind_str++; + if (strcmp(kind_str, KIND_FILE) == 0) + change->node_kind = svn_node_file; + else if (strcmp(kind_str, KIND_DIR) == 0) + change->node_kind = svn_node_dir; + else + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + } + + if (strcmp(str, ACTION_MODIFY) == 0) + { + change->kind = svn_fs_path_change_modify; + } + else if (strcmp(str, ACTION_ADD) == 0) + { + change->kind = svn_fs_path_change_add; + } + else if (strcmp(str, ACTION_DELETE) == 0) + { + change->kind = svn_fs_path_change_delete; + } + else if (strcmp(str, ACTION_REPLACE) == 0) + { + change->kind = svn_fs_path_change_replace; + } + else if (strcmp(str, ACTION_RESET) == 0) + { + change->kind = svn_fs_path_change_reset; + } + else + { + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid change kind in rev file")); + } + + /* Get the text-mod flag. */ + str = svn_cstring_tokenize(" ", &last_str); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + + if (strcmp(str, FLAG_TRUE) == 0) + { + change->text_mod = TRUE; + } + else if (strcmp(str, FLAG_FALSE) == 0) + { + change->text_mod = FALSE; + } + else + { + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid text-mod flag in rev-file")); + } + + /* Get the prop-mod flag. */ + str = svn_cstring_tokenize(" ", &last_str); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + + if (strcmp(str, FLAG_TRUE) == 0) + { + change->prop_mod = TRUE; + } + else if (strcmp(str, FLAG_FALSE) == 0) + { + change->prop_mod = FALSE; + } + else + { + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid prop-mod flag in rev-file")); + } + + /* Get the changed path. */ + change->path = apr_pstrdup(pool, last_str); + + + /* Read the next line, the copyfrom line. */ + len = sizeof(buf); + SVN_ERR(svn_io_read_length_line(file, buf, &len, pool)); + + if (len == 0) + { + change->copyfrom_rev = SVN_INVALID_REVNUM; + change->copyfrom_path = NULL; + } + else + { + last_str = buf; + str = svn_cstring_tokenize(" ", &last_str); + if (! str) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + change->copyfrom_rev = SVN_STR_TO_REV(str); + + if (! last_str) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid changes line in rev-file")); + + change->copyfrom_path = apr_pstrdup(pool, last_str); + } + + *change_p = change; + + return SVN_NO_ERROR; +} + +/* Examine all the changed path entries in CHANGES and store them in + *CHANGED_PATHS. Folding is done to remove redundant or unnecessary + *data. Store a hash of paths to copyfrom "REV PATH" strings in + COPYFROM_HASH if it is non-NULL. If PREFOLDED is true, assume that + the changed-path entries have already been folded (by + write_final_changed_path_info) and may be out of order, so we shouldn't + remove children of replaced or deleted directories. Do all + allocations in POOL. */ +static svn_error_t * +process_changes(apr_hash_t *changed_paths, + apr_hash_t *copyfrom_cache, + apr_array_header_t *changes, + svn_boolean_t prefolded, + apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); + int i; + + /* Read in the changes one by one, folding them into our local hash + as necessary. */ + + for (i = 0; i < changes->nelts; ++i) + { + change_t *change = APR_ARRAY_IDX(changes, i, change_t *); + + SVN_ERR(fold_change(changed_paths, change, copyfrom_cache)); + + /* Now, if our change was a deletion or replacement, we have to + blow away any changes thus far on paths that are (or, were) + children of this path. + ### i won't bother with another iteration pool here -- at + most we talking about a few extra dups of paths into what + is already a temporary subpool. + */ + + if (((change->kind == svn_fs_path_change_delete) + || (change->kind == svn_fs_path_change_replace)) + && ! prefolded) + { + apr_hash_index_t *hi; + + /* a potential child path must contain at least 2 more chars + (the path separator plus at least one char for the name). + Also, we should not assume that all paths have been normalized + i.e. some might have trailing path separators. + */ + apr_ssize_t change_path_len = strlen(change->path); + apr_ssize_t min_child_len = change_path_len == 0 + ? 1 + : change->path[change_path_len-1] == '/' + ? change_path_len + 1 + : change_path_len + 2; + + /* CAUTION: This is the inner loop of an O(n^2) algorithm. + The number of changes to process may be >> 1000. + Therefore, keep the inner loop as tight as possible. + */ + for (hi = apr_hash_first(iterpool, changed_paths); + hi; + hi = apr_hash_next(hi)) + { + /* KEY is the path. */ + const void *path; + apr_ssize_t klen; + apr_hash_this(hi, &path, &klen, NULL); + + /* If we come across a child of our path, remove it. + Call svn_dirent_is_child only if there is a chance that + this is actually a sub-path. + */ + if ( klen >= min_child_len + && svn_dirent_is_child(change->path, path, iterpool)) + apr_hash_set(changed_paths, path, klen, NULL); + } + } + + /* Clear the per-iteration subpool. */ + svn_pool_clear(iterpool); + } + + /* Destroy the per-iteration subpool. */ + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Fetch all the changes from FILE and store them in *CHANGES. Do all + allocations in POOL. */ +static svn_error_t * +read_all_changes(apr_array_header_t **changes, + apr_file_t *file, + apr_pool_t *pool) +{ + change_t *change; + + /* pre-allocate enough room for most change lists + (will be auto-expanded as necessary) */ + *changes = apr_array_make(pool, 30, sizeof(change_t *)); + + SVN_ERR(read_change(&change, file, pool)); + while (change) + { + APR_ARRAY_PUSH(*changes, change_t*) = change; + SVN_ERR(read_change(&change, file, pool)); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__txn_changes_fetch(apr_hash_t **changed_paths_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + apr_file_t *file; + apr_hash_t *changed_paths = apr_hash_make(pool); + apr_array_header_t *changes; + apr_pool_t *scratch_pool = svn_pool_create(pool); + + SVN_ERR(svn_io_file_open(&file, path_txn_changes(fs, txn_id, pool), + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + SVN_ERR(read_all_changes(&changes, file, scratch_pool)); + SVN_ERR(process_changes(changed_paths, NULL, changes, FALSE, pool)); + svn_pool_destroy(scratch_pool); + + SVN_ERR(svn_io_file_close(file, pool)); + + *changed_paths_p = changed_paths; + + return SVN_NO_ERROR; +} + +/* Fetch the list of change in revision REV in FS and return it in *CHANGES. + * Allocate the result in POOL. + */ +static svn_error_t * +get_changes(apr_array_header_t **changes, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + apr_off_t changes_offset; + apr_file_t *revision_file; + svn_boolean_t found; + fs_fs_data_t *ffd = fs->fsap_data; + + /* try cache lookup first */ + + if (ffd->changes_cache) + { + SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache, + &rev, pool)); + if (found) + return SVN_NO_ERROR; + } + + /* read changes from revision file */ + + SVN_ERR(ensure_revision_exists(fs, rev, pool)); + + SVN_ERR(open_pack_or_rev_file(&revision_file, fs, rev, pool)); + + SVN_ERR(get_root_changes_offset(NULL, &changes_offset, revision_file, fs, + rev, pool)); + + SVN_ERR(svn_io_file_seek(revision_file, APR_SET, &changes_offset, pool)); + SVN_ERR(read_all_changes(changes, revision_file, pool)); + + SVN_ERR(svn_io_file_close(revision_file, pool)); + + /* cache for future reference */ + + if (ffd->changes_cache) + SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes, pool)); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__paths_changed(apr_hash_t **changed_paths_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_hash_t *copyfrom_cache, + apr_pool_t *pool) +{ + apr_hash_t *changed_paths; + apr_array_header_t *changes; + apr_pool_t *scratch_pool = svn_pool_create(pool); + + SVN_ERR(get_changes(&changes, fs, rev, scratch_pool)); + + changed_paths = svn_hash__make(pool); + + SVN_ERR(process_changes(changed_paths, copyfrom_cache, changes, + TRUE, pool)); + svn_pool_destroy(scratch_pool); + + *changed_paths_p = changed_paths; + + return SVN_NO_ERROR; +} + +/* Copy a revision node-rev SRC into the current transaction TXN_ID in + the filesystem FS. This is only used to create the root of a transaction. + Allocations are from POOL. */ +static svn_error_t * +create_new_txn_noderev_from_rev(svn_fs_t *fs, + const char *txn_id, + svn_fs_id_t *src, + apr_pool_t *pool) +{ + node_revision_t *noderev; + const char *node_id, *copy_id; + + SVN_ERR(svn_fs_fs__get_node_revision(&noderev, fs, src, pool)); + + if (svn_fs_fs__id_txn_id(noderev->id)) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Copying from transactions not allowed")); + + noderev->predecessor_id = noderev->id; + noderev->predecessor_count++; + noderev->copyfrom_path = NULL; + noderev->copyfrom_rev = SVN_INVALID_REVNUM; + + /* For the transaction root, the copyroot never changes. */ + + node_id = svn_fs_fs__id_node_id(noderev->id); + copy_id = svn_fs_fs__id_copy_id(noderev->id); + noderev->id = svn_fs_fs__id_txn_create(node_id, copy_id, txn_id, pool); + + return svn_fs_fs__put_node_revision(fs, noderev->id, noderev, TRUE, pool); +} + +/* A structure used by get_and_increment_txn_key_body(). */ +struct get_and_increment_txn_key_baton { + svn_fs_t *fs; + char *txn_id; + apr_pool_t *pool; +}; + +/* Callback used in the implementation of create_txn_dir(). This gets + the current base 36 value in PATH_TXN_CURRENT and increments it. + It returns the original value by the baton. */ +static svn_error_t * +get_and_increment_txn_key_body(void *baton, apr_pool_t *pool) +{ + struct get_and_increment_txn_key_baton *cb = baton; + const char *txn_current_filename = path_txn_current(cb->fs, pool); + const char *tmp_filename; + char next_txn_id[MAX_KEY_SIZE+3]; + apr_size_t len; + + svn_stringbuf_t *buf; + SVN_ERR(read_content(&buf, txn_current_filename, cb->pool)); + + /* remove trailing newlines */ + svn_stringbuf_strip_whitespace(buf); + cb->txn_id = buf->data; + len = buf->len; + + /* Increment the key and add a trailing \n to the string so the + txn-current file has a newline in it. */ + svn_fs_fs__next_key(cb->txn_id, &len, next_txn_id); + next_txn_id[len] = '\n'; + ++len; + next_txn_id[len] = '\0'; + + SVN_ERR(svn_io_write_unique(&tmp_filename, + svn_dirent_dirname(txn_current_filename, pool), + next_txn_id, len, svn_io_file_del_none, pool)); + SVN_ERR(move_into_place(tmp_filename, txn_current_filename, + txn_current_filename, pool)); + + return SVN_NO_ERROR; +} + +/* Create a unique directory for a transaction in FS based on revision + REV. Return the ID for this transaction in *ID_P. Use a sequence + value in the transaction ID to prevent reuse of transaction IDs. */ +static svn_error_t * +create_txn_dir(const char **id_p, svn_fs_t *fs, svn_revnum_t rev, + apr_pool_t *pool) +{ + struct get_and_increment_txn_key_baton cb; + const char *txn_dir; + + /* Get the current transaction sequence value, which is a base-36 + number, from the txn-current file, and write an + incremented value back out to the file. Place the revision + number the transaction is based off into the transaction id. */ + cb.pool = pool; + cb.fs = fs; + SVN_ERR(with_txn_current_lock(fs, + get_and_increment_txn_key_body, + &cb, + pool)); + *id_p = apr_psprintf(pool, "%ld-%s", rev, cb.txn_id); + + txn_dir = svn_dirent_join_many(pool, + fs->path, + PATH_TXNS_DIR, + apr_pstrcat(pool, *id_p, PATH_EXT_TXN, + (char *)NULL), + NULL); + + return svn_io_dir_make(txn_dir, APR_OS_DEFAULT, pool); +} + +/* Create a unique directory for a transaction in FS based on revision + REV. Return the ID for this transaction in *ID_P. This + implementation is used in svn 1.4 and earlier repositories and is + kept in 1.5 and greater to support the --pre-1.4-compatible and + --pre-1.5-compatible repository creation options. Reused + transaction IDs are possible with this implementation. */ +static svn_error_t * +create_txn_dir_pre_1_5(const char **id_p, svn_fs_t *fs, svn_revnum_t rev, + apr_pool_t *pool) +{ + unsigned int i; + apr_pool_t *subpool; + const char *unique_path, *prefix; + + /* Try to create directories named "<txndir>/<rev>-<uniqueifier>.txn". */ + prefix = svn_dirent_join_many(pool, fs->path, PATH_TXNS_DIR, + apr_psprintf(pool, "%ld", rev), NULL); + + subpool = svn_pool_create(pool); + for (i = 1; i <= 99999; i++) + { + svn_error_t *err; + + svn_pool_clear(subpool); + unique_path = apr_psprintf(subpool, "%s-%u" PATH_EXT_TXN, prefix, i); + err = svn_io_dir_make(unique_path, APR_OS_DEFAULT, subpool); + if (! err) + { + /* We succeeded. Return the basename minus the ".txn" extension. */ + const char *name = svn_dirent_basename(unique_path, subpool); + *id_p = apr_pstrndup(pool, name, + strlen(name) - strlen(PATH_EXT_TXN)); + svn_pool_destroy(subpool); + return SVN_NO_ERROR; + } + if (! APR_STATUS_IS_EEXIST(err->apr_err)) + return svn_error_trace(err); + svn_error_clear(err); + } + + return svn_error_createf(SVN_ERR_IO_UNIQUE_NAMES_EXHAUSTED, + NULL, + _("Unable to create transaction directory " + "in '%s' for revision %ld"), + svn_dirent_local_style(fs->path, pool), + rev); +} + +svn_error_t * +svn_fs_fs__create_txn(svn_fs_txn_t **txn_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_fs_txn_t *txn; + svn_fs_id_t *root_id; + + txn = apr_pcalloc(pool, sizeof(*txn)); + + /* Get the txn_id. */ + if (ffd->format >= SVN_FS_FS__MIN_TXN_CURRENT_FORMAT) + SVN_ERR(create_txn_dir(&txn->id, fs, rev, pool)); + else + SVN_ERR(create_txn_dir_pre_1_5(&txn->id, fs, rev, pool)); + + txn->fs = fs; + txn->base_rev = rev; + + txn->vtable = &txn_vtable; + *txn_p = txn; + + /* Create a new root node for this transaction. */ + SVN_ERR(svn_fs_fs__rev_get_root(&root_id, fs, rev, pool)); + SVN_ERR(create_new_txn_noderev_from_rev(fs, txn->id, root_id, pool)); + + /* Create an empty rev file. */ + SVN_ERR(svn_io_file_create(path_txn_proto_rev(fs, txn->id, pool), "", + pool)); + + /* Create an empty rev-lock file. */ + SVN_ERR(svn_io_file_create(path_txn_proto_rev_lock(fs, txn->id, pool), "", + pool)); + + /* Create an empty changes file. */ + SVN_ERR(svn_io_file_create(path_txn_changes(fs, txn->id, pool), "", + pool)); + + /* Create the next-ids file. */ + return svn_io_file_create(path_txn_next_ids(fs, txn->id, pool), "0 0\n", + pool); +} + +/* Store the property list for transaction TXN_ID in PROPLIST. + Perform temporary allocations in POOL. */ +static svn_error_t * +get_txn_proplist(apr_hash_t *proplist, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + svn_stream_t *stream; + + /* Check for issue #3696. (When we find and fix the cause, we can change + * this to an assertion.) */ + if (txn_id == NULL) + return svn_error_create(SVN_ERR_INCORRECT_PARAMS, NULL, + _("Internal error: a null transaction id was " + "passed to get_txn_proplist()")); + + /* Open the transaction properties file. */ + SVN_ERR(svn_stream_open_readonly(&stream, path_txn_props(fs, txn_id, pool), + pool, pool)); + + /* Read in the property list. */ + SVN_ERR(svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool)); + + return svn_stream_close(stream); +} + +svn_error_t * +svn_fs_fs__change_txn_prop(svn_fs_txn_t *txn, + const char *name, + const svn_string_t *value, + apr_pool_t *pool) +{ + apr_array_header_t *props = apr_array_make(pool, 1, sizeof(svn_prop_t)); + svn_prop_t prop; + + prop.name = name; + prop.value = value; + APR_ARRAY_PUSH(props, svn_prop_t) = prop; + + return svn_fs_fs__change_txn_props(txn, props, pool); +} + +svn_error_t * +svn_fs_fs__change_txn_props(svn_fs_txn_t *txn, + const apr_array_header_t *props, + apr_pool_t *pool) +{ + const char *txn_prop_filename; + svn_stringbuf_t *buf; + svn_stream_t *stream; + apr_hash_t *txn_prop = apr_hash_make(pool); + int i; + svn_error_t *err; + + err = get_txn_proplist(txn_prop, txn->fs, txn->id, pool); + /* Here - and here only - we need to deal with the possibility that the + transaction property file doesn't yet exist. The rest of the + implementation assumes that the file exists, but we're called to set the + initial transaction properties as the transaction is being created. */ + if (err && (APR_STATUS_IS_ENOENT(err->apr_err))) + svn_error_clear(err); + else if (err) + return svn_error_trace(err); + + for (i = 0; i < props->nelts; i++) + { + svn_prop_t *prop = &APR_ARRAY_IDX(props, i, svn_prop_t); + + svn_hash_sets(txn_prop, prop->name, prop->value); + } + + /* Create a new version of the file and write out the new props. */ + /* Open the transaction properties file. */ + buf = svn_stringbuf_create_ensure(1024, pool); + stream = svn_stream_from_stringbuf(buf, pool); + SVN_ERR(svn_hash_write2(txn_prop, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + SVN_ERR(svn_io_write_unique(&txn_prop_filename, + path_txn_dir(txn->fs, txn->id, pool), + buf->data, + buf->len, + svn_io_file_del_none, + pool)); + return svn_io_file_rename(txn_prop_filename, + path_txn_props(txn->fs, txn->id, pool), + pool); +} + +svn_error_t * +svn_fs_fs__get_txn(transaction_t **txn_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + transaction_t *txn; + node_revision_t *noderev; + svn_fs_id_t *root_id; + + txn = apr_pcalloc(pool, sizeof(*txn)); + txn->proplist = apr_hash_make(pool); + + SVN_ERR(get_txn_proplist(txn->proplist, fs, txn_id, pool)); + root_id = svn_fs_fs__id_txn_create("0", "0", txn_id, pool); + + SVN_ERR(svn_fs_fs__get_node_revision(&noderev, fs, root_id, pool)); + + txn->root_id = svn_fs_fs__id_copy(noderev->id, pool); + txn->base_id = svn_fs_fs__id_copy(noderev->predecessor_id, pool); + txn->copies = NULL; + + *txn_p = txn; + + return SVN_NO_ERROR; +} + +/* Write out the currently available next node_id NODE_ID and copy_id + COPY_ID for transaction TXN_ID in filesystem FS. The next node-id is + used both for creating new unique nodes for the given transaction, as + well as uniquifying representations. Perform temporary allocations in + POOL. */ +static svn_error_t * +write_next_ids(svn_fs_t *fs, + const char *txn_id, + const char *node_id, + const char *copy_id, + apr_pool_t *pool) +{ + apr_file_t *file; + svn_stream_t *out_stream; + + SVN_ERR(svn_io_file_open(&file, path_txn_next_ids(fs, txn_id, pool), + APR_WRITE | APR_TRUNCATE, + APR_OS_DEFAULT, pool)); + + out_stream = svn_stream_from_aprfile2(file, TRUE, pool); + + SVN_ERR(svn_stream_printf(out_stream, pool, "%s %s\n", node_id, copy_id)); + + SVN_ERR(svn_stream_close(out_stream)); + return svn_io_file_close(file, pool); +} + +/* Find out what the next unique node-id and copy-id are for + transaction TXN_ID in filesystem FS. Store the results in *NODE_ID + and *COPY_ID. The next node-id is used both for creating new unique + nodes for the given transaction, as well as uniquifying representations. + Perform all allocations in POOL. */ +static svn_error_t * +read_next_ids(const char **node_id, + const char **copy_id, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + apr_file_t *file; + char buf[MAX_KEY_SIZE*2+3]; + apr_size_t limit; + char *str, *last_str = buf; + + SVN_ERR(svn_io_file_open(&file, path_txn_next_ids(fs, txn_id, pool), + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + limit = sizeof(buf); + SVN_ERR(svn_io_read_length_line(file, buf, &limit, pool)); + + SVN_ERR(svn_io_file_close(file, pool)); + + /* Parse this into two separate strings. */ + + str = svn_cstring_tokenize(" ", &last_str); + if (! str) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("next-id file corrupt")); + + *node_id = apr_pstrdup(pool, str); + + str = svn_cstring_tokenize(" ", &last_str); + if (! str) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("next-id file corrupt")); + + *copy_id = apr_pstrdup(pool, str); + + return SVN_NO_ERROR; +} + +/* Get a new and unique to this transaction node-id for transaction + TXN_ID in filesystem FS. Store the new node-id in *NODE_ID_P. + Node-ids are guaranteed to be unique to this transction, but may + not necessarily be sequential. Perform all allocations in POOL. */ +static svn_error_t * +get_new_txn_node_id(const char **node_id_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + const char *cur_node_id, *cur_copy_id; + char *node_id; + apr_size_t len; + + /* First read in the current next-ids file. */ + SVN_ERR(read_next_ids(&cur_node_id, &cur_copy_id, fs, txn_id, pool)); + + node_id = apr_pcalloc(pool, strlen(cur_node_id) + 2); + + len = strlen(cur_node_id); + svn_fs_fs__next_key(cur_node_id, &len, node_id); + + SVN_ERR(write_next_ids(fs, txn_id, node_id, cur_copy_id, pool)); + + *node_id_p = apr_pstrcat(pool, "_", cur_node_id, (char *)NULL); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__create_node(const svn_fs_id_t **id_p, + svn_fs_t *fs, + node_revision_t *noderev, + const char *copy_id, + const char *txn_id, + apr_pool_t *pool) +{ + const char *node_id; + const svn_fs_id_t *id; + + /* Get a new node-id for this node. */ + SVN_ERR(get_new_txn_node_id(&node_id, fs, txn_id, pool)); + + id = svn_fs_fs__id_txn_create(node_id, copy_id, txn_id, pool); + + noderev->id = id; + + SVN_ERR(svn_fs_fs__put_node_revision(fs, noderev->id, noderev, FALSE, pool)); + + *id_p = id; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__purge_txn(svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + /* Remove the shared transaction object associated with this transaction. */ + SVN_ERR(purge_shared_txn(fs, txn_id, pool)); + /* Remove the directory associated with this transaction. */ + SVN_ERR(svn_io_remove_dir2(path_txn_dir(fs, txn_id, pool), FALSE, + NULL, NULL, pool)); + if (ffd->format >= SVN_FS_FS__MIN_PROTOREVS_DIR_FORMAT) + { + /* Delete protorev and its lock, which aren't in the txn + directory. It's OK if they don't exist (for example, if this + is post-commit and the proto-rev has been moved into + place). */ + SVN_ERR(svn_io_remove_file2(path_txn_proto_rev(fs, txn_id, pool), + TRUE, pool)); + SVN_ERR(svn_io_remove_file2(path_txn_proto_rev_lock(fs, txn_id, pool), + TRUE, pool)); + } + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__abort_txn(svn_fs_txn_t *txn, + apr_pool_t *pool) +{ + SVN_ERR(svn_fs__check_fs(txn->fs, TRUE)); + + /* Now, purge the transaction. */ + SVN_ERR_W(svn_fs_fs__purge_txn(txn->fs, txn->id, pool), + apr_psprintf(pool, _("Transaction '%s' cleanup failed"), + txn->id)); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__set_entry(svn_fs_t *fs, + const char *txn_id, + node_revision_t *parent_noderev, + const char *name, + const svn_fs_id_t *id, + svn_node_kind_t kind, + apr_pool_t *pool) +{ + representation_t *rep = parent_noderev->data_rep; + const char *filename = path_txn_node_children(fs, parent_noderev->id, pool); + apr_file_t *file; + svn_stream_t *out; + fs_fs_data_t *ffd = fs->fsap_data; + apr_pool_t *subpool = svn_pool_create(pool); + + if (!rep || !rep->txn_id) + { + const char *unique_suffix; + apr_hash_t *entries; + + /* Before we can modify the directory, we need to dump its old + contents into a mutable representation file. */ + SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, parent_noderev, + subpool)); + SVN_ERR(unparse_dir_entries(&entries, entries, subpool)); + SVN_ERR(svn_io_file_open(&file, filename, + APR_WRITE | APR_CREATE | APR_BUFFERED, + APR_OS_DEFAULT, pool)); + out = svn_stream_from_aprfile2(file, TRUE, pool); + SVN_ERR(svn_hash_write2(entries, out, SVN_HASH_TERMINATOR, subpool)); + + svn_pool_clear(subpool); + + /* Mark the node-rev's data rep as mutable. */ + rep = apr_pcalloc(pool, sizeof(*rep)); + rep->revision = SVN_INVALID_REVNUM; + rep->txn_id = txn_id; + SVN_ERR(get_new_txn_node_id(&unique_suffix, fs, txn_id, pool)); + rep->uniquifier = apr_psprintf(pool, "%s/%s", txn_id, unique_suffix); + parent_noderev->data_rep = rep; + SVN_ERR(svn_fs_fs__put_node_revision(fs, parent_noderev->id, + parent_noderev, FALSE, pool)); + } + else + { + /* The directory rep is already mutable, so just open it for append. */ + SVN_ERR(svn_io_file_open(&file, filename, APR_WRITE | APR_APPEND, + APR_OS_DEFAULT, pool)); + out = svn_stream_from_aprfile2(file, TRUE, pool); + } + + /* if we have a directory cache for this transaction, update it */ + if (ffd->txn_dir_cache) + { + /* build parameters: (name, new entry) pair */ + const char *key = + svn_fs_fs__id_unparse(parent_noderev->id, subpool)->data; + replace_baton_t baton; + + baton.name = name; + baton.new_entry = NULL; + + if (id) + { + baton.new_entry = apr_pcalloc(subpool, sizeof(*baton.new_entry)); + baton.new_entry->name = name; + baton.new_entry->kind = kind; + baton.new_entry->id = id; + } + + /* actually update the cached directory (if cached) */ + SVN_ERR(svn_cache__set_partial(ffd->txn_dir_cache, key, + svn_fs_fs__replace_dir_entry, &baton, + subpool)); + } + svn_pool_clear(subpool); + + /* Append an incremental hash entry for the entry change. */ + if (id) + { + const char *val = unparse_dir_entry(kind, id, subpool); + + SVN_ERR(svn_stream_printf(out, subpool, "K %" APR_SIZE_T_FMT "\n%s\n" + "V %" APR_SIZE_T_FMT "\n%s\n", + strlen(name), name, + strlen(val), val)); + } + else + { + SVN_ERR(svn_stream_printf(out, subpool, "D %" APR_SIZE_T_FMT "\n%s\n", + strlen(name), name)); + } + + SVN_ERR(svn_io_file_close(file, subpool)); + svn_pool_destroy(subpool); + return SVN_NO_ERROR; +} + +/* Write a single change entry, path PATH, change CHANGE, and copyfrom + string COPYFROM, into the file specified by FILE. Only include the + node kind field if INCLUDE_NODE_KIND is true. All temporary + allocations are in POOL. */ +static svn_error_t * +write_change_entry(apr_file_t *file, + const char *path, + svn_fs_path_change2_t *change, + svn_boolean_t include_node_kind, + apr_pool_t *pool) +{ + const char *idstr, *buf; + const char *change_string = NULL; + const char *kind_string = ""; + + switch (change->change_kind) + { + case svn_fs_path_change_modify: + change_string = ACTION_MODIFY; + break; + case svn_fs_path_change_add: + change_string = ACTION_ADD; + break; + case svn_fs_path_change_delete: + change_string = ACTION_DELETE; + break; + case svn_fs_path_change_replace: + change_string = ACTION_REPLACE; + break; + case svn_fs_path_change_reset: + change_string = ACTION_RESET; + break; + default: + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Invalid change type %d"), + change->change_kind); + } + + if (change->node_rev_id) + idstr = svn_fs_fs__id_unparse(change->node_rev_id, pool)->data; + else + idstr = ACTION_RESET; + + if (include_node_kind) + { + SVN_ERR_ASSERT(change->node_kind == svn_node_dir + || change->node_kind == svn_node_file); + kind_string = apr_psprintf(pool, "-%s", + change->node_kind == svn_node_dir + ? KIND_DIR : KIND_FILE); + } + buf = apr_psprintf(pool, "%s %s%s %s %s %s\n", + idstr, change_string, kind_string, + change->text_mod ? FLAG_TRUE : FLAG_FALSE, + change->prop_mod ? FLAG_TRUE : FLAG_FALSE, + path); + + SVN_ERR(svn_io_file_write_full(file, buf, strlen(buf), NULL, pool)); + + if (SVN_IS_VALID_REVNUM(change->copyfrom_rev)) + { + buf = apr_psprintf(pool, "%ld %s", change->copyfrom_rev, + change->copyfrom_path); + SVN_ERR(svn_io_file_write_full(file, buf, strlen(buf), NULL, pool)); + } + + return svn_io_file_write_full(file, "\n", 1, NULL, pool); +} + +svn_error_t * +svn_fs_fs__add_change(svn_fs_t *fs, + const char *txn_id, + const char *path, + const svn_fs_id_t *id, + svn_fs_path_change_kind_t change_kind, + svn_boolean_t text_mod, + svn_boolean_t prop_mod, + svn_node_kind_t node_kind, + svn_revnum_t copyfrom_rev, + const char *copyfrom_path, + apr_pool_t *pool) +{ + apr_file_t *file; + svn_fs_path_change2_t *change; + + SVN_ERR(svn_io_file_open(&file, path_txn_changes(fs, txn_id, pool), + APR_APPEND | APR_WRITE | APR_CREATE + | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + change = svn_fs__path_change_create_internal(id, change_kind, pool); + change->text_mod = text_mod; + change->prop_mod = prop_mod; + change->node_kind = node_kind; + change->copyfrom_rev = copyfrom_rev; + change->copyfrom_path = apr_pstrdup(pool, copyfrom_path); + + SVN_ERR(write_change_entry(file, path, change, TRUE, pool)); + + return svn_io_file_close(file, pool); +} + +/* This baton is used by the representation writing streams. It keeps + track of the checksum information as well as the total size of the + representation so far. */ +struct rep_write_baton +{ + /* The FS we are writing to. */ + svn_fs_t *fs; + + /* Actual file to which we are writing. */ + svn_stream_t *rep_stream; + + /* A stream from the delta combiner. Data written here gets + deltified, then eventually written to rep_stream. */ + svn_stream_t *delta_stream; + + /* Where is this representation header stored. */ + apr_off_t rep_offset; + + /* Start of the actual data. */ + apr_off_t delta_start; + + /* How many bytes have been written to this rep already. */ + svn_filesize_t rep_size; + + /* The node revision for which we're writing out info. */ + node_revision_t *noderev; + + /* Actual output file. */ + apr_file_t *file; + /* Lock 'cookie' used to unlock the output file once we've finished + writing to it. */ + void *lockcookie; + + svn_checksum_ctx_t *md5_checksum_ctx; + svn_checksum_ctx_t *sha1_checksum_ctx; + + apr_pool_t *pool; + + apr_pool_t *parent_pool; +}; + +/* Handler for the write method of the representation writable stream. + BATON is a rep_write_baton, DATA is the data to write, and *LEN is + the length of this data. */ +static svn_error_t * +rep_write_contents(void *baton, + const char *data, + apr_size_t *len) +{ + struct rep_write_baton *b = baton; + + SVN_ERR(svn_checksum_update(b->md5_checksum_ctx, data, *len)); + SVN_ERR(svn_checksum_update(b->sha1_checksum_ctx, data, *len)); + b->rep_size += *len; + + /* If we are writing a delta, use that stream. */ + if (b->delta_stream) + return svn_stream_write(b->delta_stream, data, len); + else + return svn_stream_write(b->rep_stream, data, len); +} + +/* Given a node-revision NODEREV in filesystem FS, return the + representation in *REP to use as the base for a text representation + delta if PROPS is FALSE. If PROPS has been set, a suitable props + base representation will be returned. Perform temporary allocations + in *POOL. */ +static svn_error_t * +choose_delta_base(representation_t **rep, + svn_fs_t *fs, + node_revision_t *noderev, + svn_boolean_t props, + apr_pool_t *pool) +{ + int count; + int walk; + node_revision_t *base; + fs_fs_data_t *ffd = fs->fsap_data; + svn_boolean_t maybe_shared_rep = FALSE; + + /* If we have no predecessors, then use the empty stream as a + base. */ + if (! noderev->predecessor_count) + { + *rep = NULL; + return SVN_NO_ERROR; + } + + /* Flip the rightmost '1' bit of the predecessor count to determine + which file rev (counting from 0) we want to use. (To see why + count & (count - 1) unsets the rightmost set bit, think about how + you decrement a binary number.) */ + count = noderev->predecessor_count; + count = count & (count - 1); + + /* We use skip delta for limiting the number of delta operations + along very long node histories. Close to HEAD however, we create + a linear history to minimize delta size. */ + walk = noderev->predecessor_count - count; + if (walk < (int)ffd->max_linear_deltification) + count = noderev->predecessor_count - 1; + + /* Finding the delta base over a very long distance can become extremely + expensive for very deep histories, possibly causing client timeouts etc. + OTOH, this is a rare operation and its gains are minimal. Lets simply + start deltification anew close every other 1000 changes or so. */ + if (walk > (int)ffd->max_deltification_walk) + { + *rep = NULL; + return SVN_NO_ERROR; + } + + /* Walk back a number of predecessors equal to the difference + between count and the original predecessor count. (For example, + if noderev has ten predecessors and we want the eighth file rev, + walk back two predecessors.) */ + base = noderev; + while ((count++) < noderev->predecessor_count) + { + SVN_ERR(svn_fs_fs__get_node_revision(&base, fs, + base->predecessor_id, pool)); + + /* If there is a shared rep along the way, we need to limit the + * length of the deltification chain. + * + * Please note that copied nodes - such as branch directories - will + * look the same (false positive) while reps shared within the same + * revision will not be caught (false negative). + */ + if (props) + { + if ( base->prop_rep + && svn_fs_fs__id_rev(base->id) > base->prop_rep->revision) + maybe_shared_rep = TRUE; + } + else + { + if ( base->data_rep + && svn_fs_fs__id_rev(base->id) > base->data_rep->revision) + maybe_shared_rep = TRUE; + } + } + + /* return a suitable base representation */ + *rep = props ? base->prop_rep : base->data_rep; + + /* if we encountered a shared rep, it's parent chain may be different + * from the node-rev parent chain. */ + if (*rep && maybe_shared_rep) + { + /* Check whether the length of the deltification chain is acceptable. + * Otherwise, shared reps may form a non-skipping delta chain in + * extreme cases. */ + apr_pool_t *sub_pool = svn_pool_create(pool); + representation_t base_rep = **rep; + + /* Some reasonable limit, depending on how acceptable longer linear + * chains are in this repo. Also, allow for some minimal chain. */ + int max_chain_length = 2 * (int)ffd->max_linear_deltification + 2; + + /* re-use open files between iterations */ + svn_revnum_t rev_hint = SVN_INVALID_REVNUM; + apr_file_t *file_hint = NULL; + + /* follow the delta chain towards the end but for at most + * MAX_CHAIN_LENGTH steps. */ + for (; max_chain_length; --max_chain_length) + { + struct rep_state *rep_state; + struct rep_args *rep_args; + + SVN_ERR(create_rep_state_body(&rep_state, + &rep_args, + &file_hint, + &rev_hint, + &base_rep, + fs, + sub_pool)); + if (!rep_args->is_delta || !rep_args->base_revision) + break; + + base_rep.revision = rep_args->base_revision; + base_rep.offset = rep_args->base_offset; + base_rep.size = rep_args->base_length; + base_rep.txn_id = NULL; + } + + /* start new delta chain if the current one has grown too long */ + if (max_chain_length == 0) + *rep = NULL; + + svn_pool_destroy(sub_pool); + } + + /* verify that the reps don't form a degenerated '*/ + return SVN_NO_ERROR; +} + +/* Something went wrong and the pool for the rep write is being + cleared before we've finished writing the rep. So we need + to remove the rep from the protorevfile and we need to unlock + the protorevfile. */ +static apr_status_t +rep_write_cleanup(void *data) +{ + struct rep_write_baton *b = data; + const char *txn_id = svn_fs_fs__id_txn_id(b->noderev->id); + svn_error_t *err; + + /* Truncate and close the protorevfile. */ + err = svn_io_file_trunc(b->file, b->rep_offset, b->pool); + err = svn_error_compose_create(err, svn_io_file_close(b->file, b->pool)); + + /* Remove our lock regardless of any preceeding errors so that the + being_written flag is always removed and stays consistent with the + file lock which will be removed no matter what since the pool is + going away. */ + err = svn_error_compose_create(err, unlock_proto_rev(b->fs, txn_id, + b->lockcookie, b->pool)); + if (err) + { + apr_status_t rc = err->apr_err; + svn_error_clear(err); + return rc; + } + + return APR_SUCCESS; +} + + +/* Get a rep_write_baton and store it in *WB_P for the representation + indicated by NODEREV in filesystem FS. Perform allocations in + POOL. Only appropriate for file contents, not for props or + directory contents. */ +static svn_error_t * +rep_write_get_baton(struct rep_write_baton **wb_p, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + struct rep_write_baton *b; + apr_file_t *file; + representation_t *base_rep; + svn_stream_t *source; + const char *header; + svn_txdelta_window_handler_t wh; + void *whb; + fs_fs_data_t *ffd = fs->fsap_data; + int diff_version = ffd->format >= SVN_FS_FS__MIN_SVNDIFF1_FORMAT ? 1 : 0; + + b = apr_pcalloc(pool, sizeof(*b)); + + b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool); + b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); + + b->fs = fs; + b->parent_pool = pool; + b->pool = svn_pool_create(pool); + b->rep_size = 0; + b->noderev = noderev; + + /* Open the prototype rev file and seek to its end. */ + SVN_ERR(get_writable_proto_rev(&file, &b->lockcookie, + fs, svn_fs_fs__id_txn_id(noderev->id), + b->pool)); + + b->file = file; + b->rep_stream = svn_stream_from_aprfile2(file, TRUE, b->pool); + + SVN_ERR(get_file_offset(&b->rep_offset, file, b->pool)); + + /* Get the base for this delta. */ + SVN_ERR(choose_delta_base(&base_rep, fs, noderev, FALSE, b->pool)); + SVN_ERR(read_representation(&source, fs, base_rep, b->pool)); + + /* Write out the rep header. */ + if (base_rep) + { + header = apr_psprintf(b->pool, REP_DELTA " %ld %" APR_OFF_T_FMT " %" + SVN_FILESIZE_T_FMT "\n", + base_rep->revision, base_rep->offset, + base_rep->size); + } + else + { + header = REP_DELTA "\n"; + } + SVN_ERR(svn_io_file_write_full(file, header, strlen(header), NULL, + b->pool)); + + /* Now determine the offset of the actual svndiff data. */ + SVN_ERR(get_file_offset(&b->delta_start, file, b->pool)); + + /* Cleanup in case something goes wrong. */ + apr_pool_cleanup_register(b->pool, b, rep_write_cleanup, + apr_pool_cleanup_null); + + /* Prepare to write the svndiff data. */ + svn_txdelta_to_svndiff3(&wh, + &whb, + b->rep_stream, + diff_version, + SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, + pool); + + b->delta_stream = svn_txdelta_target_push(wh, whb, source, b->pool); + + *wb_p = b; + + return SVN_NO_ERROR; +} + +/* For the hash REP->SHA1, try to find an already existing representation + in FS and return it in *OUT_REP. If no such representation exists or + if rep sharing has been disabled for FS, NULL will be returned. Since + there may be new duplicate representations within the same uncommitted + revision, those can be passed in REPS_HASH (maps a sha1 digest onto + representation_t*), otherwise pass in NULL for REPS_HASH. + POOL will be used for allocations. The lifetime of the returned rep is + limited by both, POOL and REP lifetime. + */ +static svn_error_t * +get_shared_rep(representation_t **old_rep, + svn_fs_t *fs, + representation_t *rep, + apr_hash_t *reps_hash, + apr_pool_t *pool) +{ + svn_error_t *err; + fs_fs_data_t *ffd = fs->fsap_data; + + /* Return NULL, if rep sharing has been disabled. */ + *old_rep = NULL; + if (!ffd->rep_sharing_allowed) + return SVN_NO_ERROR; + + /* Check and see if we already have a representation somewhere that's + identical to the one we just wrote out. Start with the hash lookup + because it is cheepest. */ + if (reps_hash) + *old_rep = apr_hash_get(reps_hash, + rep->sha1_checksum->digest, + APR_SHA1_DIGESTSIZE); + + /* If we haven't found anything yet, try harder and consult our DB. */ + if (*old_rep == NULL) + { + err = svn_fs_fs__get_rep_reference(old_rep, fs, rep->sha1_checksum, + pool); + /* ### Other error codes that we shouldn't mask out? */ + if (err == SVN_NO_ERROR) + { + if (*old_rep) + SVN_ERR(verify_walker(*old_rep, NULL, fs, pool)); + } + else if (err->apr_err == SVN_ERR_FS_CORRUPT + || SVN_ERROR_IN_CATEGORY(err->apr_err, + SVN_ERR_MALFUNC_CATEGORY_START)) + { + /* Fatal error; don't mask it. + + In particular, this block is triggered when the rep-cache refers + to revisions in the future. We signal that as a corruption situation + since, once those revisions are less than youngest (because of more + commits), the rep-cache would be invalid. + */ + SVN_ERR(err); + } + else + { + /* Something's wrong with the rep-sharing index. We can continue + without rep-sharing, but warn. + */ + (fs->warning)(fs->warning_baton, err); + svn_error_clear(err); + *old_rep = NULL; + } + } + + /* look for intra-revision matches (usually data reps but not limited + to them in case props happen to look like some data rep) + */ + if (*old_rep == NULL && rep->txn_id) + { + svn_node_kind_t kind; + const char *file_name + = path_txn_sha1(fs, rep->txn_id, rep->sha1_checksum, pool); + + /* in our txn, is there a rep file named with the wanted SHA1? + If so, read it and use that rep. + */ + SVN_ERR(svn_io_check_path(file_name, &kind, pool)); + if (kind == svn_node_file) + { + svn_stringbuf_t *rep_string; + SVN_ERR(svn_stringbuf_from_file2(&rep_string, file_name, pool)); + SVN_ERR(read_rep_offsets_body(old_rep, rep_string->data, + rep->txn_id, FALSE, pool)); + } + } + + /* Add information that is missing in the cached data. */ + if (*old_rep) + { + /* Use the old rep for this content. */ + (*old_rep)->md5_checksum = rep->md5_checksum; + (*old_rep)->uniquifier = rep->uniquifier; + } + + return SVN_NO_ERROR; +} + +/* Close handler for the representation write stream. BATON is a + rep_write_baton. Writes out a new node-rev that correctly + references the representation we just finished writing. */ +static svn_error_t * +rep_write_contents_close(void *baton) +{ + struct rep_write_baton *b = baton; + const char *unique_suffix; + representation_t *rep; + representation_t *old_rep; + apr_off_t offset; + + rep = apr_pcalloc(b->parent_pool, sizeof(*rep)); + rep->offset = b->rep_offset; + + /* Close our delta stream so the last bits of svndiff are written + out. */ + if (b->delta_stream) + SVN_ERR(svn_stream_close(b->delta_stream)); + + /* Determine the length of the svndiff data. */ + SVN_ERR(get_file_offset(&offset, b->file, b->pool)); + rep->size = offset - b->delta_start; + + /* Fill in the rest of the representation field. */ + rep->expanded_size = b->rep_size; + rep->txn_id = svn_fs_fs__id_txn_id(b->noderev->id); + SVN_ERR(get_new_txn_node_id(&unique_suffix, b->fs, rep->txn_id, b->pool)); + rep->uniquifier = apr_psprintf(b->parent_pool, "%s/%s", rep->txn_id, + unique_suffix); + rep->revision = SVN_INVALID_REVNUM; + + /* Finalize the checksum. */ + SVN_ERR(svn_checksum_final(&rep->md5_checksum, b->md5_checksum_ctx, + b->parent_pool)); + SVN_ERR(svn_checksum_final(&rep->sha1_checksum, b->sha1_checksum_ctx, + b->parent_pool)); + + /* Check and see if we already have a representation somewhere that's + identical to the one we just wrote out. */ + SVN_ERR(get_shared_rep(&old_rep, b->fs, rep, NULL, b->parent_pool)); + + if (old_rep) + { + /* We need to erase from the protorev the data we just wrote. */ + SVN_ERR(svn_io_file_trunc(b->file, b->rep_offset, b->pool)); + + /* Use the old rep for this content. */ + b->noderev->data_rep = old_rep; + } + else + { + /* Write out our cosmetic end marker. */ + SVN_ERR(svn_stream_puts(b->rep_stream, "ENDREP\n")); + + b->noderev->data_rep = rep; + } + + /* Remove cleanup callback. */ + apr_pool_cleanup_kill(b->pool, b, rep_write_cleanup); + + /* Write out the new node-rev information. */ + SVN_ERR(svn_fs_fs__put_node_revision(b->fs, b->noderev->id, b->noderev, FALSE, + b->pool)); + if (!old_rep) + SVN_ERR(store_sha1_rep_mapping(b->fs, b->noderev, b->pool)); + + SVN_ERR(svn_io_file_close(b->file, b->pool)); + SVN_ERR(unlock_proto_rev(b->fs, rep->txn_id, b->lockcookie, b->pool)); + svn_pool_destroy(b->pool); + + return SVN_NO_ERROR; +} + +/* Store a writable stream in *CONTENTS_P that will receive all data + written and store it as the file data representation referenced by + NODEREV in filesystem FS. Perform temporary allocations in + POOL. Only appropriate for file data, not props or directory + contents. */ +static svn_error_t * +set_representation(svn_stream_t **contents_p, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + struct rep_write_baton *wb; + + if (! svn_fs_fs__id_txn_id(noderev->id)) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Attempted to write to non-transaction '%s'"), + svn_fs_fs__id_unparse(noderev->id, pool)->data); + + SVN_ERR(rep_write_get_baton(&wb, fs, noderev, pool)); + + *contents_p = svn_stream_create(wb, pool); + svn_stream_set_write(*contents_p, rep_write_contents); + svn_stream_set_close(*contents_p, rep_write_contents_close); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__set_contents(svn_stream_t **stream, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool) +{ + if (noderev->kind != svn_node_file) + return svn_error_create(SVN_ERR_FS_NOT_FILE, NULL, + _("Can't set text contents of a directory")); + + return set_representation(stream, fs, noderev, pool); +} + +svn_error_t * +svn_fs_fs__create_successor(const svn_fs_id_t **new_id_p, + svn_fs_t *fs, + const svn_fs_id_t *old_idp, + node_revision_t *new_noderev, + const char *copy_id, + const char *txn_id, + apr_pool_t *pool) +{ + const svn_fs_id_t *id; + + if (! copy_id) + copy_id = svn_fs_fs__id_copy_id(old_idp); + id = svn_fs_fs__id_txn_create(svn_fs_fs__id_node_id(old_idp), copy_id, + txn_id, pool); + + new_noderev->id = id; + + if (! new_noderev->copyroot_path) + { + new_noderev->copyroot_path = apr_pstrdup(pool, + new_noderev->created_path); + new_noderev->copyroot_rev = svn_fs_fs__id_rev(new_noderev->id); + } + + SVN_ERR(svn_fs_fs__put_node_revision(fs, new_noderev->id, new_noderev, FALSE, + pool)); + + *new_id_p = id; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__set_proplist(svn_fs_t *fs, + node_revision_t *noderev, + apr_hash_t *proplist, + apr_pool_t *pool) +{ + const char *filename = path_txn_node_props(fs, noderev->id, pool); + apr_file_t *file; + svn_stream_t *out; + + /* Dump the property list to the mutable property file. */ + SVN_ERR(svn_io_file_open(&file, filename, + APR_WRITE | APR_CREATE | APR_TRUNCATE + | APR_BUFFERED, APR_OS_DEFAULT, pool)); + out = svn_stream_from_aprfile2(file, TRUE, pool); + SVN_ERR(svn_hash_write2(proplist, out, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + + /* Mark the node-rev's prop rep as mutable, if not already done. */ + if (!noderev->prop_rep || !noderev->prop_rep->txn_id) + { + noderev->prop_rep = apr_pcalloc(pool, sizeof(*noderev->prop_rep)); + noderev->prop_rep->txn_id = svn_fs_fs__id_txn_id(noderev->id); + SVN_ERR(svn_fs_fs__put_node_revision(fs, noderev->id, noderev, FALSE, pool)); + } + + return SVN_NO_ERROR; +} + +/* Read the 'current' file for filesystem FS and store the next + available node id in *NODE_ID, and the next available copy id in + *COPY_ID. Allocations are performed from POOL. */ +static svn_error_t * +get_next_revision_ids(const char **node_id, + const char **copy_id, + svn_fs_t *fs, + apr_pool_t *pool) +{ + char *buf; + char *str; + svn_stringbuf_t *content; + + SVN_ERR(read_content(&content, svn_fs_fs__path_current(fs, pool), pool)); + buf = content->data; + + str = svn_cstring_tokenize(" ", &buf); + if (! str) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Corrupt 'current' file")); + + str = svn_cstring_tokenize(" ", &buf); + if (! str) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Corrupt 'current' file")); + + *node_id = apr_pstrdup(pool, str); + + str = svn_cstring_tokenize(" \n", &buf); + if (! str) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Corrupt 'current' file")); + + *copy_id = apr_pstrdup(pool, str); + + return SVN_NO_ERROR; +} + +/* This baton is used by the stream created for write_hash_rep. */ +struct write_hash_baton +{ + svn_stream_t *stream; + + apr_size_t size; + + svn_checksum_ctx_t *md5_ctx; + svn_checksum_ctx_t *sha1_ctx; +}; + +/* The handler for the write_hash_rep stream. BATON is a + write_hash_baton, DATA has the data to write and *LEN is the number + of bytes to write. */ +static svn_error_t * +write_hash_handler(void *baton, + const char *data, + apr_size_t *len) +{ + struct write_hash_baton *whb = baton; + + SVN_ERR(svn_checksum_update(whb->md5_ctx, data, *len)); + SVN_ERR(svn_checksum_update(whb->sha1_ctx, data, *len)); + + SVN_ERR(svn_stream_write(whb->stream, data, len)); + whb->size += *len; + + return SVN_NO_ERROR; +} + +/* Write out the hash HASH as a text representation to file FILE. In + the process, record position, the total size of the dump and MD5 as + well as SHA1 in REP. If rep sharing has been enabled and REPS_HASH + is not NULL, it will be used in addition to the on-disk cache to find + earlier reps with the same content. When such existing reps can be + found, we will truncate the one just written from the file and return + the existing rep. Perform temporary allocations in POOL. */ +static svn_error_t * +write_hash_rep(representation_t *rep, + apr_file_t *file, + apr_hash_t *hash, + svn_fs_t *fs, + apr_hash_t *reps_hash, + apr_pool_t *pool) +{ + svn_stream_t *stream; + struct write_hash_baton *whb; + representation_t *old_rep; + + SVN_ERR(get_file_offset(&rep->offset, file, pool)); + + whb = apr_pcalloc(pool, sizeof(*whb)); + + whb->stream = svn_stream_from_aprfile2(file, TRUE, pool); + whb->size = 0; + whb->md5_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); + whb->sha1_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool); + + stream = svn_stream_create(whb, pool); + svn_stream_set_write(stream, write_hash_handler); + + SVN_ERR(svn_stream_puts(whb->stream, "PLAIN\n")); + + SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, pool)); + + /* Store the results. */ + SVN_ERR(svn_checksum_final(&rep->md5_checksum, whb->md5_ctx, pool)); + SVN_ERR(svn_checksum_final(&rep->sha1_checksum, whb->sha1_ctx, pool)); + + /* Check and see if we already have a representation somewhere that's + identical to the one we just wrote out. */ + SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, pool)); + + if (old_rep) + { + /* We need to erase from the protorev the data we just wrote. */ + SVN_ERR(svn_io_file_trunc(file, rep->offset, pool)); + + /* Use the old rep for this content. */ + memcpy(rep, old_rep, sizeof (*rep)); + } + else + { + /* Write out our cosmetic end marker. */ + SVN_ERR(svn_stream_puts(whb->stream, "ENDREP\n")); + + /* update the representation */ + rep->size = whb->size; + rep->expanded_size = 0; + } + + return SVN_NO_ERROR; +} + +/* Write out the hash HASH pertaining to the NODEREV in FS as a deltified + text representation to file FILE. In the process, record the total size + and the md5 digest in REP. If rep sharing has been enabled and REPS_HASH + is not NULL, it will be used in addition to the on-disk cache to find + earlier reps with the same content. When such existing reps can be found, + we will truncate the one just written from the file and return the existing + rep. If PROPS is set, assume that we want to a props representation as + the base for our delta. Perform temporary allocations in POOL. */ +static svn_error_t * +write_hash_delta_rep(representation_t *rep, + apr_file_t *file, + apr_hash_t *hash, + svn_fs_t *fs, + node_revision_t *noderev, + apr_hash_t *reps_hash, + svn_boolean_t props, + apr_pool_t *pool) +{ + svn_txdelta_window_handler_t diff_wh; + void *diff_whb; + + svn_stream_t *file_stream; + svn_stream_t *stream; + representation_t *base_rep; + representation_t *old_rep; + svn_stream_t *source; + const char *header; + + apr_off_t rep_end = 0; + apr_off_t delta_start = 0; + + struct write_hash_baton *whb; + fs_fs_data_t *ffd = fs->fsap_data; + int diff_version = ffd->format >= SVN_FS_FS__MIN_SVNDIFF1_FORMAT ? 1 : 0; + + /* Get the base for this delta. */ + SVN_ERR(choose_delta_base(&base_rep, fs, noderev, props, pool)); + SVN_ERR(read_representation(&source, fs, base_rep, pool)); + + SVN_ERR(get_file_offset(&rep->offset, file, pool)); + + /* Write out the rep header. */ + if (base_rep) + { + header = apr_psprintf(pool, REP_DELTA " %ld %" APR_OFF_T_FMT " %" + SVN_FILESIZE_T_FMT "\n", + base_rep->revision, base_rep->offset, + base_rep->size); + } + else + { + header = REP_DELTA "\n"; + } + SVN_ERR(svn_io_file_write_full(file, header, strlen(header), NULL, + pool)); + + SVN_ERR(get_file_offset(&delta_start, file, pool)); + file_stream = svn_stream_from_aprfile2(file, TRUE, pool); + + /* Prepare to write the svndiff data. */ + svn_txdelta_to_svndiff3(&diff_wh, + &diff_whb, + file_stream, + diff_version, + SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, + pool); + + whb = apr_pcalloc(pool, sizeof(*whb)); + whb->stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool); + whb->size = 0; + whb->md5_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); + whb->sha1_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool); + + /* serialize the hash */ + stream = svn_stream_create(whb, pool); + svn_stream_set_write(stream, write_hash_handler); + + SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(whb->stream)); + + /* Store the results. */ + SVN_ERR(svn_checksum_final(&rep->md5_checksum, whb->md5_ctx, pool)); + SVN_ERR(svn_checksum_final(&rep->sha1_checksum, whb->sha1_ctx, pool)); + + /* Check and see if we already have a representation somewhere that's + identical to the one we just wrote out. */ + SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, pool)); + + if (old_rep) + { + /* We need to erase from the protorev the data we just wrote. */ + SVN_ERR(svn_io_file_trunc(file, rep->offset, pool)); + + /* Use the old rep for this content. */ + memcpy(rep, old_rep, sizeof (*rep)); + } + else + { + /* Write out our cosmetic end marker. */ + SVN_ERR(get_file_offset(&rep_end, file, pool)); + SVN_ERR(svn_stream_puts(file_stream, "ENDREP\n")); + + /* update the representation */ + rep->expanded_size = whb->size; + rep->size = rep_end - delta_start; + } + + return SVN_NO_ERROR; +} + +/* Sanity check ROOT_NODEREV, a candidate for being the root node-revision + of (not yet committed) revision REV in FS. Use POOL for temporary + allocations. + + If you change this function, consider updating svn_fs_fs__verify() too. + */ +static svn_error_t * +validate_root_noderev(svn_fs_t *fs, + node_revision_t *root_noderev, + svn_revnum_t rev, + apr_pool_t *pool) +{ + svn_revnum_t head_revnum = rev-1; + int head_predecessor_count; + + SVN_ERR_ASSERT(rev > 0); + + /* Compute HEAD_PREDECESSOR_COUNT. */ + { + svn_fs_root_t *head_revision; + const svn_fs_id_t *head_root_id; + node_revision_t *head_root_noderev; + + /* Get /@HEAD's noderev. */ + SVN_ERR(svn_fs_fs__revision_root(&head_revision, fs, head_revnum, pool)); + SVN_ERR(svn_fs_fs__node_id(&head_root_id, head_revision, "/", pool)); + SVN_ERR(svn_fs_fs__get_node_revision(&head_root_noderev, fs, head_root_id, + pool)); + + head_predecessor_count = head_root_noderev->predecessor_count; + } + + /* Check that the root noderev's predecessor count equals REV. + + This kind of corruption was seen on svn.apache.org (both on + the root noderev and on other fspaths' noderevs); see + issue #4129. + + Normally (rev == root_noderev->predecessor_count), but here we + use a more roundabout check that should only trigger on new instances + of the corruption, rather then trigger on each and every new commit + to a repository that has triggered the bug somewhere in its root + noderev's history. + */ + if (root_noderev->predecessor_count != -1 + && (root_noderev->predecessor_count - head_predecessor_count) + != (rev - head_revnum)) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("predecessor count for " + "the root node-revision is wrong: " + "found (%d+%ld != %d), committing r%ld"), + head_predecessor_count, + rev - head_revnum, /* This is equal to 1. */ + root_noderev->predecessor_count, + rev); + } + + return SVN_NO_ERROR; +} + +/* Copy a node-revision specified by id ID in fileystem FS from a + transaction into the proto-rev-file FILE. Set *NEW_ID_P to a + pointer to the new node-id which will be allocated in POOL. + If this is a directory, copy all children as well. + + START_NODE_ID and START_COPY_ID are + the first available node and copy ids for this filesystem, for older + FS formats. + + REV is the revision number that this proto-rev-file will represent. + + INITIAL_OFFSET is the offset of the proto-rev-file on entry to + commit_body. + + If REPS_TO_CACHE is not NULL, append to it a copy (allocated in + REPS_POOL) of each data rep that is new in this revision. + + If REPS_HASH is not NULL, append copies (allocated in REPS_POOL) + of the representations of each property rep that is new in this + revision. + + AT_ROOT is true if the node revision being written is the root + node-revision. It is only controls additional sanity checking + logic. + + Temporary allocations are also from POOL. */ +static svn_error_t * +write_final_rev(const svn_fs_id_t **new_id_p, + apr_file_t *file, + svn_revnum_t rev, + svn_fs_t *fs, + const svn_fs_id_t *id, + const char *start_node_id, + const char *start_copy_id, + apr_off_t initial_offset, + apr_array_header_t *reps_to_cache, + apr_hash_t *reps_hash, + apr_pool_t *reps_pool, + svn_boolean_t at_root, + apr_pool_t *pool) +{ + node_revision_t *noderev; + apr_off_t my_offset; + char my_node_id_buf[MAX_KEY_SIZE + 2]; + char my_copy_id_buf[MAX_KEY_SIZE + 2]; + const svn_fs_id_t *new_id; + const char *node_id, *copy_id, *my_node_id, *my_copy_id; + fs_fs_data_t *ffd = fs->fsap_data; + + *new_id_p = NULL; + + /* Check to see if this is a transaction node. */ + if (! svn_fs_fs__id_txn_id(id)) + return SVN_NO_ERROR; + + SVN_ERR(svn_fs_fs__get_node_revision(&noderev, fs, id, pool)); + + if (noderev->kind == svn_node_dir) + { + apr_pool_t *subpool; + apr_hash_t *entries, *str_entries; + apr_array_header_t *sorted_entries; + int i; + + /* This is a directory. Write out all the children first. */ + subpool = svn_pool_create(pool); + + SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev, pool)); + /* For the sake of the repository administrator sort the entries + so that the final file is deterministic and repeatable, + however the rest of the FSFS code doesn't require any + particular order here. */ + sorted_entries = svn_sort__hash(entries, svn_sort_compare_items_lexically, + pool); + for (i = 0; i < sorted_entries->nelts; ++i) + { + svn_fs_dirent_t *dirent = APR_ARRAY_IDX(sorted_entries, i, + svn_sort__item_t).value; + + svn_pool_clear(subpool); + SVN_ERR(write_final_rev(&new_id, file, rev, fs, dirent->id, + start_node_id, start_copy_id, initial_offset, + reps_to_cache, reps_hash, reps_pool, FALSE, + subpool)); + if (new_id && (svn_fs_fs__id_rev(new_id) == rev)) + dirent->id = svn_fs_fs__id_copy(new_id, pool); + } + svn_pool_destroy(subpool); + + if (noderev->data_rep && noderev->data_rep->txn_id) + { + /* Write out the contents of this directory as a text rep. */ + SVN_ERR(unparse_dir_entries(&str_entries, entries, pool)); + + noderev->data_rep->txn_id = NULL; + noderev->data_rep->revision = rev; + + if (ffd->deltify_directories) + SVN_ERR(write_hash_delta_rep(noderev->data_rep, file, + str_entries, fs, noderev, NULL, + FALSE, pool)); + else + SVN_ERR(write_hash_rep(noderev->data_rep, file, str_entries, + fs, NULL, pool)); + } + } + else + { + /* This is a file. We should make sure the data rep, if it + exists in a "this" state, gets rewritten to our new revision + num. */ + + if (noderev->data_rep && noderev->data_rep->txn_id) + { + noderev->data_rep->txn_id = NULL; + noderev->data_rep->revision = rev; + + /* See issue 3845. Some unknown mechanism caused the + protorev file to get truncated, so check for that + here. */ + if (noderev->data_rep->offset + noderev->data_rep->size + > initial_offset) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Truncated protorev file detected")); + } + } + + /* Fix up the property reps. */ + if (noderev->prop_rep && noderev->prop_rep->txn_id) + { + apr_hash_t *proplist; + SVN_ERR(svn_fs_fs__get_proplist(&proplist, fs, noderev, pool)); + + noderev->prop_rep->txn_id = NULL; + noderev->prop_rep->revision = rev; + + if (ffd->deltify_properties) + SVN_ERR(write_hash_delta_rep(noderev->prop_rep, file, + proplist, fs, noderev, reps_hash, + TRUE, pool)); + else + SVN_ERR(write_hash_rep(noderev->prop_rep, file, proplist, + fs, reps_hash, pool)); + } + + + /* Convert our temporary ID into a permanent revision one. */ + SVN_ERR(get_file_offset(&my_offset, file, pool)); + + node_id = svn_fs_fs__id_node_id(noderev->id); + if (*node_id == '_') + { + if (ffd->format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + my_node_id = apr_psprintf(pool, "%s-%ld", node_id + 1, rev); + else + { + svn_fs_fs__add_keys(start_node_id, node_id + 1, my_node_id_buf); + my_node_id = my_node_id_buf; + } + } + else + my_node_id = node_id; + + copy_id = svn_fs_fs__id_copy_id(noderev->id); + if (*copy_id == '_') + { + if (ffd->format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + my_copy_id = apr_psprintf(pool, "%s-%ld", copy_id + 1, rev); + else + { + svn_fs_fs__add_keys(start_copy_id, copy_id + 1, my_copy_id_buf); + my_copy_id = my_copy_id_buf; + } + } + else + my_copy_id = copy_id; + + if (noderev->copyroot_rev == SVN_INVALID_REVNUM) + noderev->copyroot_rev = rev; + + new_id = svn_fs_fs__id_rev_create(my_node_id, my_copy_id, rev, my_offset, + pool); + + noderev->id = new_id; + + if (ffd->rep_sharing_allowed) + { + /* Save the data representation's hash in the rep cache. */ + if ( noderev->data_rep && noderev->kind == svn_node_file + && noderev->data_rep->revision == rev) + { + SVN_ERR_ASSERT(reps_to_cache && reps_pool); + APR_ARRAY_PUSH(reps_to_cache, representation_t *) + = svn_fs_fs__rep_copy(noderev->data_rep, reps_pool); + } + + if (noderev->prop_rep && noderev->prop_rep->revision == rev) + { + /* Add new property reps to hash and on-disk cache. */ + representation_t *copy + = svn_fs_fs__rep_copy(noderev->prop_rep, reps_pool); + + SVN_ERR_ASSERT(reps_to_cache && reps_pool); + APR_ARRAY_PUSH(reps_to_cache, representation_t *) = copy; + + apr_hash_set(reps_hash, + copy->sha1_checksum->digest, + APR_SHA1_DIGESTSIZE, + copy); + } + } + + /* don't serialize SHA1 for dirs to disk (waste of space) */ + if (noderev->data_rep && noderev->kind == svn_node_dir) + noderev->data_rep->sha1_checksum = NULL; + + /* don't serialize SHA1 for props to disk (waste of space) */ + if (noderev->prop_rep) + noderev->prop_rep->sha1_checksum = NULL; + + /* Workaround issue #4031: is-fresh-txn-root in revision files. */ + noderev->is_fresh_txn_root = FALSE; + + /* Write out our new node-revision. */ + if (at_root) + SVN_ERR(validate_root_noderev(fs, noderev, rev, pool)); + + SVN_ERR(svn_fs_fs__write_noderev(svn_stream_from_aprfile2(file, TRUE, pool), + noderev, ffd->format, + svn_fs_fs__fs_supports_mergeinfo(fs), + pool)); + + /* Return our ID that references the revision file. */ + *new_id_p = noderev->id; + + return SVN_NO_ERROR; +} + +/* Write the changed path info from transaction TXN_ID in filesystem + FS to the permanent rev-file FILE. *OFFSET_P is set the to offset + in the file of the beginning of this information. Perform + temporary allocations in POOL. */ +static svn_error_t * +write_final_changed_path_info(apr_off_t *offset_p, + apr_file_t *file, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + apr_hash_t *changed_paths; + apr_off_t offset; + apr_pool_t *iterpool = svn_pool_create(pool); + fs_fs_data_t *ffd = fs->fsap_data; + svn_boolean_t include_node_kinds = + ffd->format >= SVN_FS_FS__MIN_KIND_IN_CHANGED_FORMAT; + apr_array_header_t *sorted_changed_paths; + int i; + + SVN_ERR(get_file_offset(&offset, file, pool)); + + SVN_ERR(svn_fs_fs__txn_changes_fetch(&changed_paths, fs, txn_id, pool)); + /* For the sake of the repository administrator sort the changes so + that the final file is deterministic and repeatable, however the + rest of the FSFS code doesn't require any particular order here. */ + sorted_changed_paths = svn_sort__hash(changed_paths, + svn_sort_compare_items_lexically, pool); + + /* Iterate through the changed paths one at a time, and convert the + temporary node-id into a permanent one for each change entry. */ + for (i = 0; i < sorted_changed_paths->nelts; ++i) + { + node_revision_t *noderev; + const svn_fs_id_t *id; + svn_fs_path_change2_t *change; + const char *path; + + svn_pool_clear(iterpool); + + change = APR_ARRAY_IDX(sorted_changed_paths, i, svn_sort__item_t).value; + path = APR_ARRAY_IDX(sorted_changed_paths, i, svn_sort__item_t).key; + + id = change->node_rev_id; + + /* If this was a delete of a mutable node, then it is OK to + leave the change entry pointing to the non-existent temporary + node, since it will never be used. */ + if ((change->change_kind != svn_fs_path_change_delete) && + (! svn_fs_fs__id_txn_id(id))) + { + SVN_ERR(svn_fs_fs__get_node_revision(&noderev, fs, id, iterpool)); + + /* noderev has the permanent node-id at this point, so we just + substitute it for the temporary one. */ + change->node_rev_id = noderev->id; + } + + /* Write out the new entry into the final rev-file. */ + SVN_ERR(write_change_entry(file, path, change, include_node_kinds, + iterpool)); + } + + svn_pool_destroy(iterpool); + + *offset_p = offset; + + return SVN_NO_ERROR; +} + +/* Atomically update the 'current' file to hold the specifed REV, + NEXT_NODE_ID, and NEXT_COPY_ID. (The two next-ID parameters are + ignored and may be NULL if the FS format does not use them.) + Perform temporary allocations in POOL. */ +static svn_error_t * +write_current(svn_fs_t *fs, svn_revnum_t rev, const char *next_node_id, + const char *next_copy_id, apr_pool_t *pool) +{ + char *buf; + const char *tmp_name, *name; + fs_fs_data_t *ffd = fs->fsap_data; + + /* Now we can just write out this line. */ + if (ffd->format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + buf = apr_psprintf(pool, "%ld\n", rev); + else + buf = apr_psprintf(pool, "%ld %s %s\n", rev, next_node_id, next_copy_id); + + name = svn_fs_fs__path_current(fs, pool); + SVN_ERR(svn_io_write_unique(&tmp_name, + svn_dirent_dirname(name, pool), + buf, strlen(buf), + svn_io_file_del_none, pool)); + + return move_into_place(tmp_name, name, name, pool); +} + +/* Open a new svn_fs_t handle to FS, set that handle's concept of "current + youngest revision" to NEW_REV, and call svn_fs_fs__verify_root() on + NEW_REV's revision root. + + Intended to be called as the very last step in a commit before 'current' + is bumped. This implies that we are holding the write lock. */ +static svn_error_t * +verify_as_revision_before_current_plus_plus(svn_fs_t *fs, + svn_revnum_t new_rev, + apr_pool_t *pool) +{ +#ifdef SVN_DEBUG + fs_fs_data_t *ffd = fs->fsap_data; + svn_fs_t *ft; /* fs++ == ft */ + svn_fs_root_t *root; + fs_fs_data_t *ft_ffd; + apr_hash_t *fs_config; + + SVN_ERR_ASSERT(ffd->svn_fs_open_); + + /* make sure FT does not simply return data cached by other instances + * but actually retrieves it from disk at least once. + */ + fs_config = apr_hash_make(pool); + svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_NS, + svn_uuid_generate(pool)); + SVN_ERR(ffd->svn_fs_open_(&ft, fs->path, + fs_config, + pool)); + ft_ffd = ft->fsap_data; + /* Don't let FT consult rep-cache.db, either. */ + ft_ffd->rep_sharing_allowed = FALSE; + + /* Time travel! */ + ft_ffd->youngest_rev_cache = new_rev; + + SVN_ERR(svn_fs_fs__revision_root(&root, ft, new_rev, pool)); + SVN_ERR_ASSERT(root->is_txn_root == FALSE && root->rev == new_rev); + SVN_ERR_ASSERT(ft_ffd->youngest_rev_cache == new_rev); + SVN_ERR(svn_fs_fs__verify_root(root, pool)); +#endif /* SVN_DEBUG */ + + return SVN_NO_ERROR; +} + +/* Update the 'current' file to hold the correct next node and copy_ids + from transaction TXN_ID in filesystem FS. The current revision is + set to REV. Perform temporary allocations in POOL. */ +static svn_error_t * +write_final_current(svn_fs_t *fs, + const char *txn_id, + svn_revnum_t rev, + const char *start_node_id, + const char *start_copy_id, + apr_pool_t *pool) +{ + const char *txn_node_id, *txn_copy_id; + char new_node_id[MAX_KEY_SIZE + 2]; + char new_copy_id[MAX_KEY_SIZE + 2]; + fs_fs_data_t *ffd = fs->fsap_data; + + if (ffd->format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + return write_current(fs, rev, NULL, NULL, pool); + + /* To find the next available ids, we add the id that used to be in + the 'current' file, to the next ids from the transaction file. */ + SVN_ERR(read_next_ids(&txn_node_id, &txn_copy_id, fs, txn_id, pool)); + + svn_fs_fs__add_keys(start_node_id, txn_node_id, new_node_id); + svn_fs_fs__add_keys(start_copy_id, txn_copy_id, new_copy_id); + + return write_current(fs, rev, new_node_id, new_copy_id, pool); +} + +/* Verify that the user registed with FS has all the locks necessary to + permit all the changes associate with TXN_NAME. + The FS write lock is assumed to be held by the caller. */ +static svn_error_t * +verify_locks(svn_fs_t *fs, + const char *txn_name, + apr_pool_t *pool) +{ + apr_pool_t *subpool = svn_pool_create(pool); + apr_hash_t *changes; + apr_hash_index_t *hi; + apr_array_header_t *changed_paths; + svn_stringbuf_t *last_recursed = NULL; + int i; + + /* Fetch the changes for this transaction. */ + SVN_ERR(svn_fs_fs__txn_changes_fetch(&changes, fs, txn_name, pool)); + + /* Make an array of the changed paths, and sort them depth-first-ily. */ + changed_paths = apr_array_make(pool, apr_hash_count(changes) + 1, + sizeof(const char *)); + for (hi = apr_hash_first(pool, changes); hi; hi = apr_hash_next(hi)) + APR_ARRAY_PUSH(changed_paths, const char *) = svn__apr_hash_index_key(hi); + qsort(changed_paths->elts, changed_paths->nelts, + changed_paths->elt_size, svn_sort_compare_paths); + + /* Now, traverse the array of changed paths, verify locks. Note + that if we need to do a recursive verification a path, we'll skip + over children of that path when we get to them. */ + for (i = 0; i < changed_paths->nelts; i++) + { + const char *path; + svn_fs_path_change2_t *change; + svn_boolean_t recurse = TRUE; + + svn_pool_clear(subpool); + path = APR_ARRAY_IDX(changed_paths, i, const char *); + + /* If this path has already been verified as part of a recursive + check of one of its parents, no need to do it again. */ + if (last_recursed + && svn_dirent_is_child(last_recursed->data, path, subpool)) + continue; + + /* Fetch the change associated with our path. */ + change = svn_hash_gets(changes, path); + + /* What does it mean to succeed at lock verification for a given + path? For an existing file or directory getting modified + (text, props), it means we hold the lock on the file or + directory. For paths being added or removed, we need to hold + the locks for that path and any children of that path. + + WHEW! We have no reliable way to determine the node kind + of deleted items, but fortunately we are going to do a + recursive check on deleted paths regardless of their kind. */ + if (change->change_kind == svn_fs_path_change_modify) + recurse = FALSE; + SVN_ERR(svn_fs_fs__allow_locked_operation(path, fs, recurse, TRUE, + subpool)); + + /* If we just did a recursive check, remember the path we + checked (so children can be skipped). */ + if (recurse) + { + if (! last_recursed) + last_recursed = svn_stringbuf_create(path, pool); + else + svn_stringbuf_set(last_recursed, path); + } + } + svn_pool_destroy(subpool); + return SVN_NO_ERROR; +} + +/* Baton used for commit_body below. */ +struct commit_baton { + svn_revnum_t *new_rev_p; + svn_fs_t *fs; + svn_fs_txn_t *txn; + apr_array_header_t *reps_to_cache; + apr_hash_t *reps_hash; + apr_pool_t *reps_pool; +}; + +/* The work-horse for svn_fs_fs__commit, called with the FS write lock. + This implements the svn_fs_fs__with_write_lock() 'body' callback + type. BATON is a 'struct commit_baton *'. */ +static svn_error_t * +commit_body(void *baton, apr_pool_t *pool) +{ + struct commit_baton *cb = baton; + fs_fs_data_t *ffd = cb->fs->fsap_data; + const char *old_rev_filename, *rev_filename, *proto_filename; + const char *revprop_filename, *final_revprop; + const svn_fs_id_t *root_id, *new_root_id; + const char *start_node_id = NULL, *start_copy_id = NULL; + svn_revnum_t old_rev, new_rev; + apr_file_t *proto_file; + void *proto_file_lockcookie; + apr_off_t initial_offset, changed_path_offset; + char *buf; + apr_hash_t *txnprops; + apr_array_header_t *txnprop_list; + svn_prop_t prop; + svn_string_t date; + + /* Get the current youngest revision. */ + SVN_ERR(svn_fs_fs__youngest_rev(&old_rev, cb->fs, pool)); + + /* Check to make sure this transaction is based off the most recent + revision. */ + if (cb->txn->base_rev != old_rev) + return svn_error_create(SVN_ERR_FS_TXN_OUT_OF_DATE, NULL, + _("Transaction out of date")); + + /* Locks may have been added (or stolen) between the calling of + previous svn_fs.h functions and svn_fs_commit_txn(), so we need + to re-examine every changed-path in the txn and re-verify all + discovered locks. */ + SVN_ERR(verify_locks(cb->fs, cb->txn->id, pool)); + + /* Get the next node_id and copy_id to use. */ + if (ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + SVN_ERR(get_next_revision_ids(&start_node_id, &start_copy_id, cb->fs, + pool)); + + /* We are going to be one better than this puny old revision. */ + new_rev = old_rev + 1; + + /* Get a write handle on the proto revision file. */ + SVN_ERR(get_writable_proto_rev(&proto_file, &proto_file_lockcookie, + cb->fs, cb->txn->id, pool)); + SVN_ERR(get_file_offset(&initial_offset, proto_file, pool)); + + /* Write out all the node-revisions and directory contents. */ + root_id = svn_fs_fs__id_txn_create("0", "0", cb->txn->id, pool); + SVN_ERR(write_final_rev(&new_root_id, proto_file, new_rev, cb->fs, root_id, + start_node_id, start_copy_id, initial_offset, + cb->reps_to_cache, cb->reps_hash, cb->reps_pool, + TRUE, pool)); + + /* Write the changed-path information. */ + SVN_ERR(write_final_changed_path_info(&changed_path_offset, proto_file, + cb->fs, cb->txn->id, pool)); + + /* Write the final line. */ + buf = apr_psprintf(pool, "\n%" APR_OFF_T_FMT " %" APR_OFF_T_FMT "\n", + svn_fs_fs__id_offset(new_root_id), + changed_path_offset); + SVN_ERR(svn_io_file_write_full(proto_file, buf, strlen(buf), NULL, + pool)); + SVN_ERR(svn_io_file_flush_to_disk(proto_file, pool)); + SVN_ERR(svn_io_file_close(proto_file, pool)); + + /* We don't unlock the prototype revision file immediately to avoid a + race with another caller writing to the prototype revision file + before we commit it. */ + + /* Remove any temporary txn props representing 'flags'. */ + SVN_ERR(svn_fs_fs__txn_proplist(&txnprops, cb->txn, pool)); + txnprop_list = apr_array_make(pool, 3, sizeof(svn_prop_t)); + prop.value = NULL; + + if (svn_hash_gets(txnprops, SVN_FS__PROP_TXN_CHECK_OOD)) + { + prop.name = SVN_FS__PROP_TXN_CHECK_OOD; + APR_ARRAY_PUSH(txnprop_list, svn_prop_t) = prop; + } + + if (svn_hash_gets(txnprops, SVN_FS__PROP_TXN_CHECK_LOCKS)) + { + prop.name = SVN_FS__PROP_TXN_CHECK_LOCKS; + APR_ARRAY_PUSH(txnprop_list, svn_prop_t) = prop; + } + + if (! apr_is_empty_array(txnprop_list)) + SVN_ERR(svn_fs_fs__change_txn_props(cb->txn, txnprop_list, pool)); + + /* Create the shard for the rev and revprop file, if we're sharding and + this is the first revision of a new shard. We don't care if this + fails because the shard already existed for some reason. */ + if (ffd->max_files_per_dir && new_rev % ffd->max_files_per_dir == 0) + { + /* Create the revs shard. */ + { + const char *new_dir = path_rev_shard(cb->fs, new_rev, pool); + svn_error_t *err = svn_io_dir_make(new_dir, APR_OS_DEFAULT, pool); + if (err && !APR_STATUS_IS_EEXIST(err->apr_err)) + return svn_error_trace(err); + svn_error_clear(err); + SVN_ERR(svn_io_copy_perms(svn_dirent_join(cb->fs->path, + PATH_REVS_DIR, + pool), + new_dir, pool)); + } + + /* Create the revprops shard. */ + SVN_ERR_ASSERT(! is_packed_revprop(cb->fs, new_rev)); + { + const char *new_dir = path_revprops_shard(cb->fs, new_rev, pool); + svn_error_t *err = svn_io_dir_make(new_dir, APR_OS_DEFAULT, pool); + if (err && !APR_STATUS_IS_EEXIST(err->apr_err)) + return svn_error_trace(err); + svn_error_clear(err); + SVN_ERR(svn_io_copy_perms(svn_dirent_join(cb->fs->path, + PATH_REVPROPS_DIR, + pool), + new_dir, pool)); + } + } + + /* Move the finished rev file into place. */ + SVN_ERR(svn_fs_fs__path_rev_absolute(&old_rev_filename, + cb->fs, old_rev, pool)); + rev_filename = path_rev(cb->fs, new_rev, pool); + proto_filename = path_txn_proto_rev(cb->fs, cb->txn->id, pool); + SVN_ERR(move_into_place(proto_filename, rev_filename, old_rev_filename, + pool)); + + /* Now that we've moved the prototype revision file out of the way, + we can unlock it (since further attempts to write to the file + will fail as it no longer exists). We must do this so that we can + remove the transaction directory later. */ + SVN_ERR(unlock_proto_rev(cb->fs, cb->txn->id, proto_file_lockcookie, pool)); + + /* Update commit time to ensure that svn:date revprops remain ordered. */ + date.data = svn_time_to_cstring(apr_time_now(), pool); + date.len = strlen(date.data); + + SVN_ERR(svn_fs_fs__change_txn_prop(cb->txn, SVN_PROP_REVISION_DATE, + &date, pool)); + + /* Move the revprops file into place. */ + SVN_ERR_ASSERT(! is_packed_revprop(cb->fs, new_rev)); + revprop_filename = path_txn_props(cb->fs, cb->txn->id, pool); + final_revprop = path_revprops(cb->fs, new_rev, pool); + SVN_ERR(move_into_place(revprop_filename, final_revprop, + old_rev_filename, pool)); + + /* Update the 'current' file. */ + SVN_ERR(verify_as_revision_before_current_plus_plus(cb->fs, new_rev, pool)); + SVN_ERR(write_final_current(cb->fs, cb->txn->id, new_rev, start_node_id, + start_copy_id, pool)); + + /* At this point the new revision is committed and globally visible + so let the caller know it succeeded by giving it the new revision + number, which fulfills svn_fs_commit_txn() contract. Any errors + after this point do not change the fact that a new revision was + created. */ + *cb->new_rev_p = new_rev; + + ffd->youngest_rev_cache = new_rev; + + /* Remove this transaction directory. */ + SVN_ERR(svn_fs_fs__purge_txn(cb->fs, cb->txn->id, pool)); + + return SVN_NO_ERROR; +} + +/* Add the representations in REPS_TO_CACHE (an array of representation_t *) + * to the rep-cache database of FS. */ +static svn_error_t * +write_reps_to_cache(svn_fs_t *fs, + const apr_array_header_t *reps_to_cache, + apr_pool_t *scratch_pool) +{ + int i; + + for (i = 0; i < reps_to_cache->nelts; i++) + { + representation_t *rep = APR_ARRAY_IDX(reps_to_cache, i, representation_t *); + + /* FALSE because we don't care if another parallel commit happened to + * collide with us. (Non-parallel collisions will not be detected.) */ + SVN_ERR(svn_fs_fs__set_rep_reference(fs, rep, FALSE, scratch_pool)); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__commit(svn_revnum_t *new_rev_p, + svn_fs_t *fs, + svn_fs_txn_t *txn, + apr_pool_t *pool) +{ + struct commit_baton cb; + fs_fs_data_t *ffd = fs->fsap_data; + + cb.new_rev_p = new_rev_p; + cb.fs = fs; + cb.txn = txn; + + if (ffd->rep_sharing_allowed) + { + cb.reps_to_cache = apr_array_make(pool, 5, sizeof(representation_t *)); + cb.reps_hash = apr_hash_make(pool); + cb.reps_pool = pool; + } + else + { + cb.reps_to_cache = NULL; + cb.reps_hash = NULL; + cb.reps_pool = NULL; + } + + SVN_ERR(svn_fs_fs__with_write_lock(fs, commit_body, &cb, pool)); + + /* At this point, *NEW_REV_P has been set, so errors below won't affect + the success of the commit. (See svn_fs_commit_txn().) */ + + if (ffd->rep_sharing_allowed) + { + SVN_ERR(svn_fs_fs__open_rep_cache(fs, pool)); + + /* Write new entries to the rep-sharing database. + * + * We use an sqlite transaction to speed things up; + * see <http://www.sqlite.org/faq.html#q19>. + */ + SVN_SQLITE__WITH_TXN( + write_reps_to_cache(fs, cb.reps_to_cache, pool), + ffd->rep_cache_db); + } + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__reserve_copy_id(const char **copy_id_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool) +{ + const char *cur_node_id, *cur_copy_id; + char *copy_id; + apr_size_t len; + + /* First read in the current next-ids file. */ + SVN_ERR(read_next_ids(&cur_node_id, &cur_copy_id, fs, txn_id, pool)); + + copy_id = apr_pcalloc(pool, strlen(cur_copy_id) + 2); + + len = strlen(cur_copy_id); + svn_fs_fs__next_key(cur_copy_id, &len, copy_id); + + SVN_ERR(write_next_ids(fs, txn_id, cur_node_id, copy_id, pool)); + + *copy_id_p = apr_pstrcat(pool, "_", cur_copy_id, (char *)NULL); + + return SVN_NO_ERROR; +} + +/* Write out the zeroth revision for filesystem FS. */ +static svn_error_t * +write_revision_zero(svn_fs_t *fs) +{ + const char *path_revision_zero = path_rev(fs, 0, fs->pool); + apr_hash_t *proplist; + svn_string_t date; + + /* Write out a rev file for revision 0. */ + SVN_ERR(svn_io_file_create(path_revision_zero, + "PLAIN\nEND\nENDREP\n" + "id: 0.0.r0/17\n" + "type: dir\n" + "count: 0\n" + "text: 0 0 4 4 " + "2d2977d1c96f487abe4a1e202dd03b4e\n" + "cpath: /\n" + "\n\n17 107\n", fs->pool)); + SVN_ERR(svn_io_set_file_read_only(path_revision_zero, FALSE, fs->pool)); + + /* Set a date on revision 0. */ + date.data = svn_time_to_cstring(apr_time_now(), fs->pool); + date.len = strlen(date.data); + proplist = apr_hash_make(fs->pool); + svn_hash_sets(proplist, SVN_PROP_REVISION_DATE, &date); + return set_revision_proplist(fs, 0, proplist, fs->pool); +} + +svn_error_t * +svn_fs_fs__create(svn_fs_t *fs, + const char *path, + apr_pool_t *pool) +{ + int format = SVN_FS_FS__FORMAT_NUMBER; + fs_fs_data_t *ffd = fs->fsap_data; + + fs->path = apr_pstrdup(pool, path); + /* See if compatibility with older versions was explicitly requested. */ + if (fs->config) + { + if (svn_hash_gets(fs->config, SVN_FS_CONFIG_PRE_1_4_COMPATIBLE)) + format = 1; + else if (svn_hash_gets(fs->config, SVN_FS_CONFIG_PRE_1_5_COMPATIBLE)) + format = 2; + else if (svn_hash_gets(fs->config, SVN_FS_CONFIG_PRE_1_6_COMPATIBLE)) + format = 3; + else if (svn_hash_gets(fs->config, SVN_FS_CONFIG_PRE_1_8_COMPATIBLE)) + format = 4; + } + ffd->format = format; + + /* Override the default linear layout if this is a new-enough format. */ + if (format >= SVN_FS_FS__MIN_LAYOUT_FORMAT_OPTION_FORMAT) + ffd->max_files_per_dir = SVN_FS_FS_DEFAULT_MAX_FILES_PER_DIR; + + /* Create the revision data directories. */ + if (ffd->max_files_per_dir) + SVN_ERR(svn_io_make_dir_recursively(path_rev_shard(fs, 0, pool), pool)); + else + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(path, PATH_REVS_DIR, + pool), + pool)); + + /* Create the revprops directory. */ + if (ffd->max_files_per_dir) + SVN_ERR(svn_io_make_dir_recursively(path_revprops_shard(fs, 0, pool), + pool)); + else + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(path, + PATH_REVPROPS_DIR, + pool), + pool)); + + /* Create the transaction directory. */ + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(path, PATH_TXNS_DIR, + pool), + pool)); + + /* Create the protorevs directory. */ + if (format >= SVN_FS_FS__MIN_PROTOREVS_DIR_FORMAT) + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(path, PATH_TXN_PROTOS_DIR, + pool), + pool)); + + /* Create the 'current' file. */ + SVN_ERR(svn_io_file_create(svn_fs_fs__path_current(fs, pool), + (format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT + ? "0\n" : "0 1 1\n"), + pool)); + SVN_ERR(svn_io_file_create(path_lock(fs, pool), "", pool)); + SVN_ERR(svn_fs_fs__set_uuid(fs, NULL, pool)); + + SVN_ERR(write_revision_zero(fs)); + + SVN_ERR(write_config(fs, pool)); + + SVN_ERR(read_config(ffd, fs->path, pool)); + + /* Create the min unpacked rev file. */ + if (ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + SVN_ERR(svn_io_file_create(path_min_unpacked_rev(fs, pool), "0\n", pool)); + + /* Create the txn-current file if the repository supports + the transaction sequence file. */ + if (format >= SVN_FS_FS__MIN_TXN_CURRENT_FORMAT) + { + SVN_ERR(svn_io_file_create(path_txn_current(fs, pool), + "0\n", pool)); + SVN_ERR(svn_io_file_create(path_txn_current_lock(fs, pool), + "", pool)); + } + + /* This filesystem is ready. Stamp it with a format number. */ + SVN_ERR(write_format(path_format(fs, pool), + ffd->format, ffd->max_files_per_dir, FALSE, pool)); + + ffd->youngest_rev_cache = 0; + return SVN_NO_ERROR; +} + +/* Part of the recovery procedure. Return the largest revision *REV in + filesystem FS. Use POOL for temporary allocation. */ +static svn_error_t * +recover_get_largest_revision(svn_fs_t *fs, svn_revnum_t *rev, apr_pool_t *pool) +{ + /* Discovering the largest revision in the filesystem would be an + expensive operation if we did a readdir() or searched linearly, + so we'll do a form of binary search. left is a revision that we + know exists, right a revision that we know does not exist. */ + apr_pool_t *iterpool; + svn_revnum_t left, right = 1; + + iterpool = svn_pool_create(pool); + /* Keep doubling right, until we find a revision that doesn't exist. */ + while (1) + { + svn_error_t *err; + apr_file_t *file; + + err = open_pack_or_rev_file(&file, fs, right, iterpool); + svn_pool_clear(iterpool); + + if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) + { + svn_error_clear(err); + break; + } + else + SVN_ERR(err); + + right <<= 1; + } + + left = right >> 1; + + /* We know that left exists and right doesn't. Do a normal bsearch to find + the last revision. */ + while (left + 1 < right) + { + svn_revnum_t probe = left + ((right - left) / 2); + svn_error_t *err; + apr_file_t *file; + + err = open_pack_or_rev_file(&file, fs, probe, iterpool); + svn_pool_clear(iterpool); + + if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) + { + svn_error_clear(err); + right = probe; + } + else + { + SVN_ERR(err); + left = probe; + } + } + + svn_pool_destroy(iterpool); + + /* left is now the largest revision that exists. */ + *rev = left; + return SVN_NO_ERROR; +} + +/* A baton for reading a fixed amount from an open file. For + recover_find_max_ids() below. */ +struct recover_read_from_file_baton +{ + apr_file_t *file; + apr_pool_t *pool; + apr_off_t remaining; +}; + +/* A stream read handler used by recover_find_max_ids() below. + Read and return at most BATON->REMAINING bytes from the stream, + returning nothing after that to indicate EOF. */ +static svn_error_t * +read_handler_recover(void *baton, char *buffer, apr_size_t *len) +{ + struct recover_read_from_file_baton *b = baton; + svn_filesize_t bytes_to_read = *len; + + if (b->remaining == 0) + { + /* Return a successful read of zero bytes to signal EOF. */ + *len = 0; + return SVN_NO_ERROR; + } + + if (bytes_to_read > b->remaining) + bytes_to_read = b->remaining; + b->remaining -= bytes_to_read; + + return svn_io_file_read_full2(b->file, buffer, (apr_size_t) bytes_to_read, + len, NULL, b->pool); +} + +/* Part of the recovery procedure. Read the directory noderev at offset + OFFSET of file REV_FILE (the revision file of revision REV of + filesystem FS), and set MAX_NODE_ID and MAX_COPY_ID to be the node-id + and copy-id of that node, if greater than the current value stored + in either. Recurse into any child directories that were modified in + this revision. + + MAX_NODE_ID and MAX_COPY_ID must be arrays of at least MAX_KEY_SIZE. + + Perform temporary allocation in POOL. */ +static svn_error_t * +recover_find_max_ids(svn_fs_t *fs, svn_revnum_t rev, + apr_file_t *rev_file, apr_off_t offset, + char *max_node_id, char *max_copy_id, + apr_pool_t *pool) +{ + apr_hash_t *headers; + char *value; + representation_t *data_rep; + struct rep_args *ra; + struct recover_read_from_file_baton baton; + svn_stream_t *stream; + apr_hash_t *entries; + apr_hash_index_t *hi; + apr_pool_t *iterpool; + + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + SVN_ERR(read_header_block(&headers, svn_stream_from_aprfile2(rev_file, TRUE, + pool), + pool)); + + /* Check that this is a directory. It should be. */ + value = svn_hash_gets(headers, HEADER_TYPE); + if (value == NULL || strcmp(value, KIND_DIR) != 0) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Recovery encountered a non-directory node")); + + /* Get the data location. No data location indicates an empty directory. */ + value = svn_hash_gets(headers, HEADER_TEXT); + if (!value) + return SVN_NO_ERROR; + SVN_ERR(read_rep_offsets(&data_rep, value, NULL, FALSE, pool)); + + /* If the directory's data representation wasn't changed in this revision, + we've already scanned the directory's contents for noderevs, so we don't + need to again. This will occur if a property is changed on a directory + without changing the directory's contents. */ + if (data_rep->revision != rev) + return SVN_NO_ERROR; + + /* We could use get_dir_contents(), but this is much cheaper. It does + rely on directory entries being stored as PLAIN reps, though. */ + offset = data_rep->offset; + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + SVN_ERR(read_rep_line(&ra, rev_file, pool)); + if (ra->is_delta) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Recovery encountered a deltified directory " + "representation")); + + /* Now create a stream that's allowed to read only as much data as is + stored in the representation. */ + baton.file = rev_file; + baton.pool = pool; + baton.remaining = data_rep->expanded_size; + stream = svn_stream_create(&baton, pool); + svn_stream_set_read(stream, read_handler_recover); + + /* Now read the entries from that stream. */ + entries = apr_hash_make(pool); + SVN_ERR(svn_hash_read2(entries, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + + /* Now check each of the entries in our directory to find new node and + copy ids, and recurse into new subdirectories. */ + iterpool = svn_pool_create(pool); + for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi)) + { + char *str_val; + char *str; + svn_node_kind_t kind; + svn_fs_id_t *id; + const char *node_id, *copy_id; + apr_off_t child_dir_offset; + const svn_string_t *path = svn__apr_hash_index_val(hi); + + svn_pool_clear(iterpool); + + str_val = apr_pstrdup(iterpool, path->data); + + str = svn_cstring_tokenize(" ", &str_val); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + + if (strcmp(str, KIND_FILE) == 0) + kind = svn_node_file; + else if (strcmp(str, KIND_DIR) == 0) + kind = svn_node_dir; + else + { + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + } + + str = svn_cstring_tokenize(" ", &str_val); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + + id = svn_fs_fs__id_parse(str, strlen(str), iterpool); + + if (svn_fs_fs__id_rev(id) != rev) + { + /* If the node wasn't modified in this revision, we've already + checked the node and copy id. */ + continue; + } + + node_id = svn_fs_fs__id_node_id(id); + copy_id = svn_fs_fs__id_copy_id(id); + + if (svn_fs_fs__key_compare(node_id, max_node_id) > 0) + { + SVN_ERR_ASSERT(strlen(node_id) < MAX_KEY_SIZE); + apr_cpystrn(max_node_id, node_id, MAX_KEY_SIZE); + } + if (svn_fs_fs__key_compare(copy_id, max_copy_id) > 0) + { + SVN_ERR_ASSERT(strlen(copy_id) < MAX_KEY_SIZE); + apr_cpystrn(max_copy_id, copy_id, MAX_KEY_SIZE); + } + + if (kind == svn_node_file) + continue; + + child_dir_offset = svn_fs_fs__id_offset(id); + SVN_ERR(recover_find_max_ids(fs, rev, rev_file, child_dir_offset, + max_node_id, max_copy_id, iterpool)); + } + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Return TRUE, if for REVISION in FS, we can find the revprop pack file. + * Use POOL for temporary allocations. + * Set *MISSING, if the reason is a missing manifest or pack file. + */ +static svn_boolean_t +packed_revprop_available(svn_boolean_t *missing, + svn_fs_t *fs, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_stringbuf_t *content = NULL; + + /* try to read the manifest file */ + const char *folder = path_revprops_pack_shard(fs, revision, pool); + const char *manifest_path = svn_dirent_join(folder, PATH_MANIFEST, pool); + + svn_error_t *err = try_stringbuf_from_file(&content, + missing, + manifest_path, + FALSE, + pool); + + /* if the manifest cannot be read, consider the pack files inaccessible + * even if the file itself exists. */ + if (err) + { + svn_error_clear(err); + return FALSE; + } + + if (*missing) + return FALSE; + + /* parse manifest content until we find the entry for REVISION. + * Revision 0 is never packed. */ + revision = revision < ffd->max_files_per_dir + ? revision - 1 + : revision % ffd->max_files_per_dir; + while (content->data) + { + char *next = strchr(content->data, '\n'); + if (next) + { + *next = 0; + ++next; + } + + if (revision-- == 0) + { + /* the respective pack file must exist (and be a file) */ + svn_node_kind_t kind; + err = svn_io_check_path(svn_dirent_join(folder, content->data, + pool), + &kind, pool); + if (err) + { + svn_error_clear(err); + return FALSE; + } + + *missing = kind == svn_node_none; + return kind == svn_node_file; + } + + content->data = next; + } + + return FALSE; +} + +/* Baton used for recover_body below. */ +struct recover_baton { + svn_fs_t *fs; + svn_cancel_func_t cancel_func; + void *cancel_baton; +}; + +/* The work-horse for svn_fs_fs__recover, called with the FS + write lock. This implements the svn_fs_fs__with_write_lock() + 'body' callback type. BATON is a 'struct recover_baton *'. */ +static svn_error_t * +recover_body(void *baton, apr_pool_t *pool) +{ + struct recover_baton *b = baton; + svn_fs_t *fs = b->fs; + fs_fs_data_t *ffd = fs->fsap_data; + svn_revnum_t max_rev; + char next_node_id_buf[MAX_KEY_SIZE], next_copy_id_buf[MAX_KEY_SIZE]; + char *next_node_id = NULL, *next_copy_id = NULL; + svn_revnum_t youngest_rev; + svn_node_kind_t youngest_revprops_kind; + + /* Lose potentially corrupted data in temp files */ + SVN_ERR(cleanup_revprop_namespace(fs)); + + /* We need to know the largest revision in the filesystem. */ + SVN_ERR(recover_get_largest_revision(fs, &max_rev, pool)); + + /* Get the expected youngest revision */ + SVN_ERR(get_youngest(&youngest_rev, fs->path, pool)); + + /* Policy note: + + Since the revprops file is written after the revs file, the true + maximum available revision is the youngest one for which both are + present. That's probably the same as the max_rev we just found, + but if it's not, we could, in theory, repeatedly decrement + max_rev until we find a revision that has both a revs and + revprops file, then write db/current with that. + + But we choose not to. If a repository is so corrupt that it's + missing at least one revprops file, we shouldn't assume that the + youngest revision for which both the revs and revprops files are + present is healthy. In other words, we're willing to recover + from a missing or out-of-date db/current file, because db/current + is truly redundant -- it's basically a cache so we don't have to + find max_rev each time, albeit a cache with unusual semantics, + since it also officially defines when a revision goes live. But + if we're missing more than the cache, it's time to back out and + let the admin reconstruct things by hand: correctness at that + point may depend on external things like checking a commit email + list, looking in particular working copies, etc. + + This policy matches well with a typical naive backup scenario. + Say you're rsyncing your FSFS repository nightly to the same + location. Once revs and revprops are written, you've got the + maximum rev; if the backup should bomb before db/current is + written, then db/current could stay arbitrarily out-of-date, but + we can still recover. It's a small window, but we might as well + do what we can. */ + + /* Even if db/current were missing, it would be created with 0 by + get_youngest(), so this conditional remains valid. */ + if (youngest_rev > max_rev) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Expected current rev to be <= %ld " + "but found %ld"), max_rev, youngest_rev); + + /* We only need to search for maximum IDs for old FS formats which + se global ID counters. */ + if (ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + { + /* Next we need to find the maximum node id and copy id in use across the + filesystem. Unfortunately, the only way we can get this information + is to scan all the noderevs of all the revisions and keep track as + we go along. */ + svn_revnum_t rev; + apr_pool_t *iterpool = svn_pool_create(pool); + char max_node_id[MAX_KEY_SIZE] = "0", max_copy_id[MAX_KEY_SIZE] = "0"; + apr_size_t len; + + for (rev = 0; rev <= max_rev; rev++) + { + apr_file_t *rev_file; + apr_off_t root_offset; + + svn_pool_clear(iterpool); + + if (b->cancel_func) + SVN_ERR(b->cancel_func(b->cancel_baton)); + + SVN_ERR(open_pack_or_rev_file(&rev_file, fs, rev, iterpool)); + SVN_ERR(get_root_changes_offset(&root_offset, NULL, rev_file, fs, rev, + iterpool)); + SVN_ERR(recover_find_max_ids(fs, rev, rev_file, root_offset, + max_node_id, max_copy_id, iterpool)); + SVN_ERR(svn_io_file_close(rev_file, iterpool)); + } + svn_pool_destroy(iterpool); + + /* Now that we finally have the maximum revision, node-id and copy-id, we + can bump the two ids to get the next of each. */ + len = strlen(max_node_id); + svn_fs_fs__next_key(max_node_id, &len, next_node_id_buf); + next_node_id = next_node_id_buf; + len = strlen(max_copy_id); + svn_fs_fs__next_key(max_copy_id, &len, next_copy_id_buf); + next_copy_id = next_copy_id_buf; + } + + /* Before setting current, verify that there is a revprops file + for the youngest revision. (Issue #2992) */ + SVN_ERR(svn_io_check_path(path_revprops(fs, max_rev, pool), + &youngest_revprops_kind, pool)); + if (youngest_revprops_kind == svn_node_none) + { + svn_boolean_t missing = TRUE; + if (!packed_revprop_available(&missing, fs, max_rev, pool)) + { + if (missing) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a revs file but no " + "revprops file"), + max_rev); + } + else + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a revs file but the " + "revprops file is inaccessible"), + max_rev); + } + } + } + else if (youngest_revprops_kind != svn_node_file) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a non-file where its " + "revprops file should be"), + max_rev); + } + + /* Prune younger-than-(newfound-youngest) revisions from the rep + cache if sharing is enabled taking care not to create the cache + if it does not exist. */ + if (ffd->rep_sharing_allowed) + { + svn_boolean_t rep_cache_exists; + + SVN_ERR(svn_fs_fs__exists_rep_cache(&rep_cache_exists, fs, pool)); + if (rep_cache_exists) + SVN_ERR(svn_fs_fs__del_rep_reference(fs, max_rev, pool)); + } + + /* Now store the discovered youngest revision, and the next IDs if + relevant, in a new 'current' file. */ + return write_current(fs, max_rev, next_node_id, next_copy_id, pool); +} + +/* This implements the fs_library_vtable_t.recover() API. */ +svn_error_t * +svn_fs_fs__recover(svn_fs_t *fs, + svn_cancel_func_t cancel_func, void *cancel_baton, + apr_pool_t *pool) +{ + struct recover_baton b; + + /* We have no way to take out an exclusive lock in FSFS, so we're + restricted as to the types of recovery we can do. Luckily, + we just want to recreate the 'current' file, and we can do that just + by blocking other writers. */ + b.fs = fs; + b.cancel_func = cancel_func; + b.cancel_baton = cancel_baton; + return svn_fs_fs__with_write_lock(fs, recover_body, &b, pool); +} + +svn_error_t * +svn_fs_fs__set_uuid(svn_fs_t *fs, + const char *uuid, + apr_pool_t *pool) +{ + char *my_uuid; + apr_size_t my_uuid_len; + const char *tmp_path; + const char *uuid_path = path_uuid(fs, pool); + + if (! uuid) + uuid = svn_uuid_generate(pool); + + /* Make sure we have a copy in FS->POOL, and append a newline. */ + my_uuid = apr_pstrcat(fs->pool, uuid, "\n", (char *)NULL); + my_uuid_len = strlen(my_uuid); + + SVN_ERR(svn_io_write_unique(&tmp_path, + svn_dirent_dirname(uuid_path, pool), + my_uuid, my_uuid_len, + svn_io_file_del_none, pool)); + + /* We use the permissions of the 'current' file, because the 'uuid' + file does not exist during repository creation. */ + SVN_ERR(move_into_place(tmp_path, uuid_path, + svn_fs_fs__path_current(fs, pool), pool)); + + /* Remove the newline we added, and stash the UUID. */ + my_uuid[my_uuid_len - 1] = '\0'; + fs->uuid = my_uuid; + + return SVN_NO_ERROR; +} + +/** Node origin lazy cache. */ + +/* If directory PATH does not exist, create it and give it the same + permissions as FS_path.*/ +svn_error_t * +svn_fs_fs__ensure_dir_exists(const char *path, + const char *fs_path, + apr_pool_t *pool) +{ + svn_error_t *err = svn_io_dir_make(path, APR_OS_DEFAULT, pool); + if (err && APR_STATUS_IS_EEXIST(err->apr_err)) + { + svn_error_clear(err); + return SVN_NO_ERROR; + } + SVN_ERR(err); + + /* We successfully created a new directory. Dup the permissions + from FS->path. */ + return svn_io_copy_perms(fs_path, path, pool); +} + +/* Set *NODE_ORIGINS to a hash mapping 'const char *' node IDs to + 'svn_string_t *' node revision IDs. Use POOL for allocations. */ +static svn_error_t * +get_node_origins_from_file(svn_fs_t *fs, + apr_hash_t **node_origins, + const char *node_origins_file, + apr_pool_t *pool) +{ + apr_file_t *fd; + svn_error_t *err; + svn_stream_t *stream; + + *node_origins = NULL; + err = svn_io_file_open(&fd, node_origins_file, + APR_READ, APR_OS_DEFAULT, pool); + if (err && APR_STATUS_IS_ENOENT(err->apr_err)) + { + svn_error_clear(err); + return SVN_NO_ERROR; + } + SVN_ERR(err); + + stream = svn_stream_from_aprfile2(fd, FALSE, pool); + *node_origins = apr_hash_make(pool); + SVN_ERR(svn_hash_read2(*node_origins, stream, SVN_HASH_TERMINATOR, pool)); + return svn_stream_close(stream); +} + +svn_error_t * +svn_fs_fs__get_node_origin(const svn_fs_id_t **origin_id, + svn_fs_t *fs, + const char *node_id, + apr_pool_t *pool) +{ + apr_hash_t *node_origins; + + *origin_id = NULL; + SVN_ERR(get_node_origins_from_file(fs, &node_origins, + path_node_origin(fs, node_id, pool), + pool)); + if (node_origins) + { + svn_string_t *origin_id_str = + svn_hash_gets(node_origins, node_id); + if (origin_id_str) + *origin_id = svn_fs_fs__id_parse(origin_id_str->data, + origin_id_str->len, pool); + } + return SVN_NO_ERROR; +} + + +/* Helper for svn_fs_fs__set_node_origin. Takes a NODE_ID/NODE_REV_ID + pair and adds it to the NODE_ORIGINS_PATH file. */ +static svn_error_t * +set_node_origins_for_file(svn_fs_t *fs, + const char *node_origins_path, + const char *node_id, + svn_string_t *node_rev_id, + apr_pool_t *pool) +{ + const char *path_tmp; + svn_stream_t *stream; + apr_hash_t *origins_hash; + svn_string_t *old_node_rev_id; + + SVN_ERR(svn_fs_fs__ensure_dir_exists(svn_dirent_join(fs->path, + PATH_NODE_ORIGINS_DIR, + pool), + fs->path, pool)); + + /* Read the previously existing origins (if any), and merge our + update with it. */ + SVN_ERR(get_node_origins_from_file(fs, &origins_hash, + node_origins_path, pool)); + if (! origins_hash) + origins_hash = apr_hash_make(pool); + + old_node_rev_id = svn_hash_gets(origins_hash, node_id); + + if (old_node_rev_id && !svn_string_compare(node_rev_id, old_node_rev_id)) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Node origin for '%s' exists with a different " + "value (%s) than what we were about to store " + "(%s)"), + node_id, old_node_rev_id->data, node_rev_id->data); + + svn_hash_sets(origins_hash, node_id, node_rev_id); + + /* Sure, there's a race condition here. Two processes could be + trying to add different cache elements to the same file at the + same time, and the entries added by the first one to write will + be lost. But this is just a cache of reconstructible data, so + we'll accept this problem in return for not having to deal with + locking overhead. */ + + /* Create a temporary file, write out our hash, and close the file. */ + SVN_ERR(svn_stream_open_unique(&stream, &path_tmp, + svn_dirent_dirname(node_origins_path, pool), + svn_io_file_del_none, pool, pool)); + SVN_ERR(svn_hash_write2(origins_hash, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + + /* Rename the temp file as the real destination */ + return svn_io_file_rename(path_tmp, node_origins_path, pool); +} + + +svn_error_t * +svn_fs_fs__set_node_origin(svn_fs_t *fs, + const char *node_id, + const svn_fs_id_t *node_rev_id, + apr_pool_t *pool) +{ + svn_error_t *err; + const char *filename = path_node_origin(fs, node_id, pool); + + err = set_node_origins_for_file(fs, filename, + node_id, + svn_fs_fs__id_unparse(node_rev_id, pool), + pool); + if (err && APR_STATUS_IS_EACCES(err->apr_err)) + { + /* It's just a cache; stop trying if I can't write. */ + svn_error_clear(err); + err = NULL; + } + return svn_error_trace(err); +} + + +svn_error_t * +svn_fs_fs__list_transactions(apr_array_header_t **names_p, + svn_fs_t *fs, + apr_pool_t *pool) +{ + const char *txn_dir; + apr_hash_t *dirents; + apr_hash_index_t *hi; + apr_array_header_t *names; + apr_size_t ext_len = strlen(PATH_EXT_TXN); + + names = apr_array_make(pool, 1, sizeof(const char *)); + + /* Get the transactions directory. */ + txn_dir = svn_dirent_join(fs->path, PATH_TXNS_DIR, pool); + + /* Now find a listing of this directory. */ + SVN_ERR(svn_io_get_dirents3(&dirents, txn_dir, TRUE, pool, pool)); + + /* Loop through all the entries and return anything that ends with '.txn'. */ + for (hi = apr_hash_first(pool, dirents); hi; hi = apr_hash_next(hi)) + { + const char *name = svn__apr_hash_index_key(hi); + apr_ssize_t klen = svn__apr_hash_index_klen(hi); + const char *id; + + /* The name must end with ".txn" to be considered a transaction. */ + if ((apr_size_t) klen <= ext_len + || (strcmp(name + klen - ext_len, PATH_EXT_TXN)) != 0) + continue; + + /* Truncate the ".txn" extension and store the ID. */ + id = apr_pstrndup(pool, name, strlen(name) - ext_len); + APR_ARRAY_PUSH(names, const char *) = id; + } + + *names_p = names; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__open_txn(svn_fs_txn_t **txn_p, + svn_fs_t *fs, + const char *name, + apr_pool_t *pool) +{ + svn_fs_txn_t *txn; + svn_node_kind_t kind; + transaction_t *local_txn; + + /* First check to see if the directory exists. */ + SVN_ERR(svn_io_check_path(path_txn_dir(fs, name, pool), &kind, pool)); + + /* Did we find it? */ + if (kind != svn_node_dir) + return svn_error_createf(SVN_ERR_FS_NO_SUCH_TRANSACTION, NULL, + _("No such transaction '%s'"), + name); + + txn = apr_pcalloc(pool, sizeof(*txn)); + + /* Read in the root node of this transaction. */ + txn->id = apr_pstrdup(pool, name); + txn->fs = fs; + + SVN_ERR(svn_fs_fs__get_txn(&local_txn, fs, name, pool)); + + txn->base_rev = svn_fs_fs__id_rev(local_txn->base_id); + + txn->vtable = &txn_vtable; + *txn_p = txn; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__txn_proplist(apr_hash_t **table_p, + svn_fs_txn_t *txn, + apr_pool_t *pool) +{ + apr_hash_t *proplist = apr_hash_make(pool); + SVN_ERR(get_txn_proplist(proplist, txn->fs, txn->id, pool)); + *table_p = proplist; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__delete_node_revision(svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool) +{ + node_revision_t *noderev; + + SVN_ERR(svn_fs_fs__get_node_revision(&noderev, fs, id, pool)); + + /* Delete any mutable property representation. */ + if (noderev->prop_rep && noderev->prop_rep->txn_id) + SVN_ERR(svn_io_remove_file2(path_txn_node_props(fs, id, pool), FALSE, + pool)); + + /* Delete any mutable data representation. */ + if (noderev->data_rep && noderev->data_rep->txn_id + && noderev->kind == svn_node_dir) + { + fs_fs_data_t *ffd = fs->fsap_data; + SVN_ERR(svn_io_remove_file2(path_txn_node_children(fs, id, pool), FALSE, + pool)); + + /* remove the corresponding entry from the cache, if such exists */ + if (ffd->txn_dir_cache) + { + const char *key = svn_fs_fs__id_unparse(id, pool)->data; + SVN_ERR(svn_cache__set(ffd->txn_dir_cache, key, NULL, pool)); + } + } + + return svn_io_remove_file2(path_txn_node_rev(fs, id, pool), FALSE, pool); +} + + + +/*** Revisions ***/ + +svn_error_t * +svn_fs_fs__revision_prop(svn_string_t **value_p, + svn_fs_t *fs, + svn_revnum_t rev, + const char *propname, + apr_pool_t *pool) +{ + apr_hash_t *table; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + SVN_ERR(svn_fs_fs__revision_proplist(&table, fs, rev, pool)); + + *value_p = svn_hash_gets(table, propname); + + return SVN_NO_ERROR; +} + + +/* Baton used for change_rev_prop_body below. */ +struct change_rev_prop_baton { + svn_fs_t *fs; + svn_revnum_t rev; + const char *name; + const svn_string_t *const *old_value_p; + const svn_string_t *value; +}; + +/* The work-horse for svn_fs_fs__change_rev_prop, called with the FS + write lock. This implements the svn_fs_fs__with_write_lock() + 'body' callback type. BATON is a 'struct change_rev_prop_baton *'. */ +static svn_error_t * +change_rev_prop_body(void *baton, apr_pool_t *pool) +{ + struct change_rev_prop_baton *cb = baton; + apr_hash_t *table; + + SVN_ERR(svn_fs_fs__revision_proplist(&table, cb->fs, cb->rev, pool)); + + if (cb->old_value_p) + { + const svn_string_t *wanted_value = *cb->old_value_p; + const svn_string_t *present_value = svn_hash_gets(table, cb->name); + if ((!wanted_value != !present_value) + || (wanted_value && present_value + && !svn_string_compare(wanted_value, present_value))) + { + /* What we expected isn't what we found. */ + return svn_error_createf(SVN_ERR_FS_PROP_BASEVALUE_MISMATCH, NULL, + _("revprop '%s' has unexpected value in " + "filesystem"), + cb->name); + } + /* Fall through. */ + } + svn_hash_sets(table, cb->name, cb->value); + + return set_revision_proplist(cb->fs, cb->rev, table, pool); +} + +svn_error_t * +svn_fs_fs__change_rev_prop(svn_fs_t *fs, + svn_revnum_t rev, + const char *name, + const svn_string_t *const *old_value_p, + const svn_string_t *value, + apr_pool_t *pool) +{ + struct change_rev_prop_baton cb; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + + cb.fs = fs; + cb.rev = rev; + cb.name = name; + cb.old_value_p = old_value_p; + cb.value = value; + + return svn_fs_fs__with_write_lock(fs, change_rev_prop_body, &cb, pool); +} + + + +/*** Transactions ***/ + +svn_error_t * +svn_fs_fs__get_txn_ids(const svn_fs_id_t **root_id_p, + const svn_fs_id_t **base_root_id_p, + svn_fs_t *fs, + const char *txn_name, + apr_pool_t *pool) +{ + transaction_t *txn; + SVN_ERR(svn_fs_fs__get_txn(&txn, fs, txn_name, pool)); + *root_id_p = txn->root_id; + *base_root_id_p = txn->base_id; + return SVN_NO_ERROR; +} + + +/* Generic transaction operations. */ + +svn_error_t * +svn_fs_fs__txn_prop(svn_string_t **value_p, + svn_fs_txn_t *txn, + const char *propname, + apr_pool_t *pool) +{ + apr_hash_t *table; + svn_fs_t *fs = txn->fs; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + SVN_ERR(svn_fs_fs__txn_proplist(&table, txn, pool)); + + *value_p = svn_hash_gets(table, propname); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__begin_txn(svn_fs_txn_t **txn_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_uint32_t flags, + apr_pool_t *pool) +{ + svn_string_t date; + svn_prop_t prop; + apr_array_header_t *props = apr_array_make(pool, 3, sizeof(svn_prop_t)); + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + + SVN_ERR(svn_fs_fs__create_txn(txn_p, fs, rev, pool)); + + /* Put a datestamp on the newly created txn, so we always know + exactly how old it is. (This will help sysadmins identify + long-abandoned txns that may need to be manually removed.) When + a txn is promoted to a revision, this property will be + automatically overwritten with a revision datestamp. */ + date.data = svn_time_to_cstring(apr_time_now(), pool); + date.len = strlen(date.data); + + prop.name = SVN_PROP_REVISION_DATE; + prop.value = &date; + APR_ARRAY_PUSH(props, svn_prop_t) = prop; + + /* Set temporary txn props that represent the requested 'flags' + behaviors. */ + if (flags & SVN_FS_TXN_CHECK_OOD) + { + prop.name = SVN_FS__PROP_TXN_CHECK_OOD; + prop.value = svn_string_create("true", pool); + APR_ARRAY_PUSH(props, svn_prop_t) = prop; + } + + if (flags & SVN_FS_TXN_CHECK_LOCKS) + { + prop.name = SVN_FS__PROP_TXN_CHECK_LOCKS; + prop.value = svn_string_create("true", pool); + APR_ARRAY_PUSH(props, svn_prop_t) = prop; + } + + return svn_fs_fs__change_txn_props(*txn_p, props, pool); +} + + +/****** Packing FSFS shards *********/ + +/* Write a file FILENAME in directory FS_PATH, containing a single line + * with the number REVNUM in ASCII decimal. Move the file into place + * atomically, overwriting any existing file. + * + * Similar to write_current(). */ +static svn_error_t * +write_revnum_file(const char *fs_path, + const char *filename, + svn_revnum_t revnum, + apr_pool_t *scratch_pool) +{ + const char *final_path, *tmp_path; + svn_stream_t *tmp_stream; + + final_path = svn_dirent_join(fs_path, filename, scratch_pool); + SVN_ERR(svn_stream_open_unique(&tmp_stream, &tmp_path, fs_path, + svn_io_file_del_none, + scratch_pool, scratch_pool)); + SVN_ERR(svn_stream_printf(tmp_stream, scratch_pool, "%ld\n", revnum)); + SVN_ERR(svn_stream_close(tmp_stream)); + SVN_ERR(move_into_place(tmp_path, final_path, final_path, scratch_pool)); + return SVN_NO_ERROR; +} + +/* Pack the revision SHARD containing exactly MAX_FILES_PER_DIR revisions + * from SHARD_PATH into the PACK_FILE_DIR, using POOL for allocations. + * CANCEL_FUNC and CANCEL_BATON are what you think they are. + * + * If for some reason we detect a partial packing already performed, we + * remove the pack file and start again. + */ +static svn_error_t * +pack_rev_shard(const char *pack_file_dir, + const char *shard_path, + apr_int64_t shard, + int max_files_per_dir, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + const char *pack_file_path, *manifest_file_path; + svn_stream_t *pack_stream, *manifest_stream; + svn_revnum_t start_rev, end_rev, rev; + apr_off_t next_offset; + apr_pool_t *iterpool; + + /* Some useful paths. */ + pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); + manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool); + + /* Remove any existing pack file for this shard, since it is incomplete. */ + SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, + pool)); + + /* Create the new directory and pack and manifest files. */ + SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool)); + SVN_ERR(svn_stream_open_writable(&pack_stream, pack_file_path, pool, + pool)); + SVN_ERR(svn_stream_open_writable(&manifest_stream, manifest_file_path, + pool, pool)); + + start_rev = (svn_revnum_t) (shard * max_files_per_dir); + end_rev = (svn_revnum_t) ((shard + 1) * (max_files_per_dir) - 1); + next_offset = 0; + iterpool = svn_pool_create(pool); + + /* Iterate over the revisions in this shard, squashing them together. */ + for (rev = start_rev; rev <= end_rev; rev++) + { + svn_stream_t *rev_stream; + apr_finfo_t finfo; + const char *path; + + svn_pool_clear(iterpool); + + /* Get the size of the file. */ + path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), + iterpool); + SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool)); + + /* Update the manifest. */ + SVN_ERR(svn_stream_printf(manifest_stream, iterpool, "%" APR_OFF_T_FMT + "\n", next_offset)); + next_offset += finfo.size; + + /* Copy all the bits from the rev file to the end of the pack file. */ + SVN_ERR(svn_stream_open_readonly(&rev_stream, path, iterpool, iterpool)); + SVN_ERR(svn_stream_copy3(rev_stream, svn_stream_disown(pack_stream, + iterpool), + cancel_func, cancel_baton, iterpool)); + } + + SVN_ERR(svn_stream_close(manifest_stream)); + SVN_ERR(svn_stream_close(pack_stream)); + SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, iterpool)); + SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, iterpool)); + SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool)); + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Copy revprop files for revisions [START_REV, END_REV) from SHARD_PATH + * to the pack file at PACK_FILE_NAME in PACK_FILE_DIR. + * + * The file sizes have already been determined and written to SIZES. + * Please note that this function will be executed while the filesystem + * has been locked and that revprops files will therefore not be modified + * while the pack is in progress. + * + * COMPRESSION_LEVEL defines how well the resulting pack file shall be + * compressed or whether is shall be compressed at all. TOTAL_SIZE is + * a hint on which initial buffer size we should use to hold the pack file + * content. + * + * CANCEL_FUNC and CANCEL_BATON are used as usual. Temporary allocations + * are done in SCRATCH_POOL. + */ +static svn_error_t * +copy_revprops(const char *pack_file_dir, + const char *pack_filename, + const char *shard_path, + svn_revnum_t start_rev, + svn_revnum_t end_rev, + apr_array_header_t *sizes, + apr_size_t total_size, + int compression_level, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *scratch_pool) +{ + svn_stream_t *pack_stream; + apr_file_t *pack_file; + svn_revnum_t rev; + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + svn_stream_t *stream; + + /* create empty data buffer and a write stream on top of it */ + svn_stringbuf_t *uncompressed + = svn_stringbuf_create_ensure(total_size, scratch_pool); + svn_stringbuf_t *compressed + = svn_stringbuf_create_empty(scratch_pool); + pack_stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); + + /* write the pack file header */ + SVN_ERR(serialize_revprops_header(pack_stream, start_rev, sizes, 0, + sizes->nelts, iterpool)); + + /* Some useful paths. */ + SVN_ERR(svn_io_file_open(&pack_file, svn_dirent_join(pack_file_dir, + pack_filename, + scratch_pool), + APR_WRITE | APR_CREATE, APR_OS_DEFAULT, + scratch_pool)); + + /* Iterate over the revisions in this shard, squashing them together. */ + for (rev = start_rev; rev <= end_rev; rev++) + { + const char *path; + + svn_pool_clear(iterpool); + + /* Construct the file name. */ + path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), + iterpool); + + /* Copy all the bits from the non-packed revprop file to the end of + * the pack file. */ + SVN_ERR(svn_stream_open_readonly(&stream, path, iterpool, iterpool)); + SVN_ERR(svn_stream_copy3(stream, pack_stream, + cancel_func, cancel_baton, iterpool)); + } + + /* flush stream buffers to content buffer */ + SVN_ERR(svn_stream_close(pack_stream)); + + /* compress the content (or just store it for COMPRESSION_LEVEL 0) */ + SVN_ERR(svn__compress(svn_stringbuf__morph_into_string(uncompressed), + compressed, compression_level)); + + /* write the pack file content to disk */ + stream = svn_stream_from_aprfile2(pack_file, FALSE, scratch_pool); + SVN_ERR(svn_stream_write(stream, compressed->data, &compressed->len)); + SVN_ERR(svn_stream_close(stream)); + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* For the revprop SHARD at SHARD_PATH with exactly MAX_FILES_PER_DIR + * revprop files in it, create a packed shared at PACK_FILE_DIR. + * + * COMPRESSION_LEVEL defines how well the resulting pack file shall be + * compressed or whether is shall be compressed at all. Individual pack + * file containing more than one revision will be limited to a size of + * MAX_PACK_SIZE bytes before compression. + * + * CANCEL_FUNC and CANCEL_BATON are used in the usual way. Temporary + * allocations are done in SCRATCH_POOL. + */ +static svn_error_t * +pack_revprops_shard(const char *pack_file_dir, + const char *shard_path, + apr_int64_t shard, + int max_files_per_dir, + apr_off_t max_pack_size, + int compression_level, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *scratch_pool) +{ + const char *manifest_file_path, *pack_filename = NULL; + svn_stream_t *manifest_stream; + svn_revnum_t start_rev, end_rev, rev; + apr_off_t total_size; + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + apr_array_header_t *sizes; + + /* Some useful paths. */ + manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, + scratch_pool); + + /* Remove any existing pack file for this shard, since it is incomplete. */ + SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, + scratch_pool)); + + /* Create the new directory and manifest file stream. */ + SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool)); + SVN_ERR(svn_stream_open_writable(&manifest_stream, manifest_file_path, + scratch_pool, scratch_pool)); + + /* revisions to handle. Special case: revision 0 */ + start_rev = (svn_revnum_t) (shard * max_files_per_dir); + end_rev = (svn_revnum_t) ((shard + 1) * (max_files_per_dir) - 1); + if (start_rev == 0) + ++start_rev; + + /* initialize the revprop size info */ + sizes = apr_array_make(scratch_pool, max_files_per_dir, sizeof(apr_off_t)); + total_size = 2 * SVN_INT64_BUFFER_SIZE; + + /* Iterate over the revisions in this shard, determine their size and + * squashing them together into pack files. */ + for (rev = start_rev; rev <= end_rev; rev++) + { + apr_finfo_t finfo; + const char *path; + + svn_pool_clear(iterpool); + + /* Get the size of the file. */ + path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), + iterpool); + SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool)); + + /* if we already have started a pack file and this revprop cannot be + * appended to it, write the previous pack file. */ + if (sizes->nelts != 0 && + total_size + SVN_INT64_BUFFER_SIZE + finfo.size > max_pack_size) + { + SVN_ERR(copy_revprops(pack_file_dir, pack_filename, shard_path, + start_rev, rev-1, sizes, (apr_size_t)total_size, + compression_level, cancel_func, cancel_baton, + iterpool)); + + /* next pack file starts empty again */ + apr_array_clear(sizes); + total_size = 2 * SVN_INT64_BUFFER_SIZE; + start_rev = rev; + } + + /* Update the manifest. Allocate a file name for the current pack + * file if it is a new one */ + if (sizes->nelts == 0) + pack_filename = apr_psprintf(scratch_pool, "%ld.0", rev); + + SVN_ERR(svn_stream_printf(manifest_stream, iterpool, "%s\n", + pack_filename)); + + /* add to list of files to put into the current pack file */ + APR_ARRAY_PUSH(sizes, apr_off_t) = finfo.size; + total_size += SVN_INT64_BUFFER_SIZE + finfo.size; + } + + /* write the last pack file */ + if (sizes->nelts != 0) + SVN_ERR(copy_revprops(pack_file_dir, pack_filename, shard_path, + start_rev, rev-1, sizes, (apr_size_t)total_size, + compression_level, cancel_func, cancel_baton, + iterpool)); + + /* flush the manifest file and update permissions */ + SVN_ERR(svn_stream_close(manifest_stream)); + SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, iterpool)); + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Delete the non-packed revprop SHARD at SHARD_PATH with exactly + * MAX_FILES_PER_DIR revprop files in it. If this is shard 0, keep the + * revprop file for revision 0. + * + * CANCEL_FUNC and CANCEL_BATON are used in the usual way. Temporary + * allocations are done in SCRATCH_POOL. + */ +static svn_error_t * +delete_revprops_shard(const char *shard_path, + apr_int64_t shard, + int max_files_per_dir, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *scratch_pool) +{ + if (shard == 0) + { + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + int i; + + /* delete all files except the one for revision 0 */ + for (i = 1; i < max_files_per_dir; ++i) + { + const char *path = svn_dirent_join(shard_path, + apr_psprintf(iterpool, "%d", i), + iterpool); + if (cancel_func) + SVN_ERR((*cancel_func)(cancel_baton)); + + SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool)); + svn_pool_clear(iterpool); + } + + svn_pool_destroy(iterpool); + } + else + SVN_ERR(svn_io_remove_dir2(shard_path, TRUE, + cancel_func, cancel_baton, scratch_pool)); + + return SVN_NO_ERROR; +} + +/* In the file system at FS_PATH, pack the SHARD in REVS_DIR and + * REVPROPS_DIR containing exactly MAX_FILES_PER_DIR revisions, using POOL + * for allocations. REVPROPS_DIR will be NULL if revprop packing is not + * supported. COMPRESSION_LEVEL and MAX_PACK_SIZE will be ignored in that + * case. + * + * CANCEL_FUNC and CANCEL_BATON are what you think they are; similarly + * NOTIFY_FUNC and NOTIFY_BATON. + * + * If for some reason we detect a partial packing already performed, we + * remove the pack file and start again. + */ +static svn_error_t * +pack_shard(const char *revs_dir, + const char *revsprops_dir, + const char *fs_path, + apr_int64_t shard, + int max_files_per_dir, + apr_off_t max_pack_size, + int compression_level, + svn_fs_pack_notify_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + const char *rev_shard_path, *rev_pack_file_dir; + const char *revprops_shard_path, *revprops_pack_file_dir; + + /* Notify caller we're starting to pack this shard. */ + if (notify_func) + SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_start, + pool)); + + /* Some useful paths. */ + rev_pack_file_dir = svn_dirent_join(revs_dir, + apr_psprintf(pool, + "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, + shard), + pool); + rev_shard_path = svn_dirent_join(revs_dir, + apr_psprintf(pool, "%" APR_INT64_T_FMT, shard), + pool); + + /* pack the revision content */ + SVN_ERR(pack_rev_shard(rev_pack_file_dir, rev_shard_path, + shard, max_files_per_dir, + cancel_func, cancel_baton, pool)); + + /* if enabled, pack the revprops in an equivalent way */ + if (revsprops_dir) + { + revprops_pack_file_dir = svn_dirent_join(revsprops_dir, + apr_psprintf(pool, + "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, + shard), + pool); + revprops_shard_path = svn_dirent_join(revsprops_dir, + apr_psprintf(pool, "%" APR_INT64_T_FMT, shard), + pool); + + SVN_ERR(pack_revprops_shard(revprops_pack_file_dir, revprops_shard_path, + shard, max_files_per_dir, + (int)(0.9 * max_pack_size), + compression_level, + cancel_func, cancel_baton, pool)); + } + + /* Update the min-unpacked-rev file to reflect our newly packed shard. + * (This doesn't update ffd->min_unpacked_rev. That will be updated by + * update_min_unpacked_rev() when necessary.) */ + SVN_ERR(write_revnum_file(fs_path, PATH_MIN_UNPACKED_REV, + (svn_revnum_t)((shard + 1) * max_files_per_dir), + pool)); + + /* Finally, remove the existing shard directories. */ + SVN_ERR(svn_io_remove_dir2(rev_shard_path, TRUE, + cancel_func, cancel_baton, pool)); + if (revsprops_dir) + SVN_ERR(delete_revprops_shard(revprops_shard_path, + shard, max_files_per_dir, + cancel_func, cancel_baton, pool)); + + /* Notify caller we're starting to pack this shard. */ + if (notify_func) + SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_end, + pool)); + + return SVN_NO_ERROR; +} + +struct pack_baton +{ + svn_fs_t *fs; + svn_fs_pack_notify_t notify_func; + void *notify_baton; + svn_cancel_func_t cancel_func; + void *cancel_baton; +}; + + +/* The work-horse for svn_fs_fs__pack, called with the FS write lock. + This implements the svn_fs_fs__with_write_lock() 'body' callback + type. BATON is a 'struct pack_baton *'. + + WARNING: if you add a call to this function, please note: + The code currently assumes that any piece of code running with + the write-lock set can rely on the ffd->min_unpacked_rev and + ffd->min_unpacked_revprop caches to be up-to-date (and, by + extension, on not having to use a retry when calling + svn_fs_fs__path_rev_absolute() and friends). If you add a call + to this function, consider whether you have to call + update_min_unpacked_rev(). + See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith + */ +static svn_error_t * +pack_body(void *baton, + apr_pool_t *pool) +{ + struct pack_baton *pb = baton; + fs_fs_data_t ffd = {0}; + apr_int64_t completed_shards; + apr_int64_t i; + svn_revnum_t youngest; + apr_pool_t *iterpool; + const char *rev_data_path; + const char *revprops_data_path = NULL; + + /* read repository settings */ + SVN_ERR(read_format(&ffd.format, &ffd.max_files_per_dir, + path_format(pb->fs, pool), pool)); + SVN_ERR(check_format(ffd.format)); + SVN_ERR(read_config(&ffd, pb->fs->path, pool)); + + /* If the repository isn't a new enough format, we don't support packing. + Return a friendly error to that effect. */ + if (ffd.format < SVN_FS_FS__MIN_PACKED_FORMAT) + return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("FSFS format (%d) too old to pack; please upgrade the filesystem."), + ffd.format); + + /* If we aren't using sharding, we can't do any packing, so quit. */ + if (!ffd.max_files_per_dir) + return SVN_NO_ERROR; + + SVN_ERR(read_min_unpacked_rev(&ffd.min_unpacked_rev, + path_min_unpacked_rev(pb->fs, pool), + pool)); + + SVN_ERR(get_youngest(&youngest, pb->fs->path, pool)); + completed_shards = (youngest + 1) / ffd.max_files_per_dir; + + /* See if we've already completed all possible shards thus far. */ + if (ffd.min_unpacked_rev == (completed_shards * ffd.max_files_per_dir)) + return SVN_NO_ERROR; + + rev_data_path = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool); + if (ffd.format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT) + revprops_data_path = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR, + pool); + + iterpool = svn_pool_create(pool); + for (i = ffd.min_unpacked_rev / ffd.max_files_per_dir; + i < completed_shards; + i++) + { + svn_pool_clear(iterpool); + + if (pb->cancel_func) + SVN_ERR(pb->cancel_func(pb->cancel_baton)); + + SVN_ERR(pack_shard(rev_data_path, revprops_data_path, + pb->fs->path, i, ffd.max_files_per_dir, + ffd.revprop_pack_size, + ffd.compress_packed_revprops + ? SVN_DELTA_COMPRESSION_LEVEL_DEFAULT + : SVN_DELTA_COMPRESSION_LEVEL_NONE, + pb->notify_func, pb->notify_baton, + pb->cancel_func, pb->cancel_baton, iterpool)); + } + + svn_pool_destroy(iterpool); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__pack(svn_fs_t *fs, + svn_fs_pack_notify_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + struct pack_baton pb = { 0 }; + pb.fs = fs; + pb.notify_func = notify_func; + pb.notify_baton = notify_baton; + pb.cancel_func = cancel_func; + pb.cancel_baton = cancel_baton; + return svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool); +} + + +/** Verifying. **/ + +/* Baton type expected by verify_walker(). The purpose is to reuse open + * rev / pack file handles between calls. Its contents need to be cleaned + * periodically to limit resource usage. + */ +typedef struct verify_walker_baton_t +{ + /* number of calls to verify_walker() since the last clean */ + int iteration_count; + + /* number of files opened since the last clean */ + int file_count; + + /* progress notification callback to invoke periodically (may be NULL) */ + svn_fs_progress_notify_func_t notify_func; + + /* baton to use with NOTIFY_FUNC */ + void *notify_baton; + + /* remember the last revision for which we called notify_func */ + svn_revnum_t last_notified_revision; + + /* current file handle (or NULL) */ + apr_file_t *file_hint; + + /* corresponding revision (or SVN_INVALID_REVNUM) */ + svn_revnum_t rev_hint; + + /* pool to use for the file handles etc. */ + apr_pool_t *pool; +} verify_walker_baton_t; + +/* Used by svn_fs_fs__verify(). + Implements svn_fs_fs__walk_rep_reference().walker. */ +static svn_error_t * +verify_walker(representation_t *rep, + void *baton, + svn_fs_t *fs, + apr_pool_t *scratch_pool) +{ + struct rep_state *rs; + struct rep_args *rep_args; + + if (baton) + { + verify_walker_baton_t *walker_baton = baton; + apr_file_t * previous_file; + + /* notify and free resources periodically */ + if ( walker_baton->iteration_count > 1000 + || walker_baton->file_count > 16) + { + if ( walker_baton->notify_func + && rep->revision != walker_baton->last_notified_revision) + { + walker_baton->notify_func(rep->revision, + walker_baton->notify_baton, + scratch_pool); + walker_baton->last_notified_revision = rep->revision; + } + + svn_pool_clear(walker_baton->pool); + + walker_baton->iteration_count = 0; + walker_baton->file_count = 0; + walker_baton->file_hint = NULL; + walker_baton->rev_hint = SVN_INVALID_REVNUM; + } + + /* access the repo data */ + previous_file = walker_baton->file_hint; + SVN_ERR(create_rep_state(&rs, &rep_args, &walker_baton->file_hint, + &walker_baton->rev_hint, rep, fs, + walker_baton->pool)); + + /* update resource usage counters */ + walker_baton->iteration_count++; + if (previous_file != walker_baton->file_hint) + walker_baton->file_count++; + } + else + { + /* ### Should this be using read_rep_line() directly? */ + SVN_ERR(create_rep_state(&rs, &rep_args, NULL, NULL, rep, fs, + scratch_pool)); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__verify(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t end, + svn_fs_progress_notify_func_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_boolean_t exists; + svn_revnum_t youngest = ffd->youngest_rev_cache; /* cache is current */ + + if (ffd->format < SVN_FS_FS__MIN_REP_SHARING_FORMAT) + return SVN_NO_ERROR; + + /* Input validation. */ + if (! SVN_IS_VALID_REVNUM(start)) + start = 0; + if (! SVN_IS_VALID_REVNUM(end)) + end = youngest; + SVN_ERR(ensure_revision_exists(fs, start, pool)); + SVN_ERR(ensure_revision_exists(fs, end, pool)); + + /* rep-cache verification. */ + SVN_ERR(svn_fs_fs__exists_rep_cache(&exists, fs, pool)); + if (exists) + { + /* provide a baton to allow the reuse of open file handles between + iterations (saves 2/3 of OS level file operations). */ + verify_walker_baton_t *baton = apr_pcalloc(pool, sizeof(*baton)); + baton->rev_hint = SVN_INVALID_REVNUM; + baton->pool = svn_pool_create(pool); + baton->last_notified_revision = SVN_INVALID_REVNUM; + baton->notify_func = notify_func; + baton->notify_baton = notify_baton; + + /* tell the user that we are now ready to do *something* */ + if (notify_func) + notify_func(SVN_INVALID_REVNUM, notify_baton, baton->pool); + + /* Do not attempt to walk the rep-cache database if its file does + not exist, since doing so would create it --- which may confuse + the administrator. Don't take any lock. */ + SVN_ERR(svn_fs_fs__walk_rep_reference(fs, start, end, + verify_walker, baton, + cancel_func, cancel_baton, + pool)); + + /* walker resource cleanup */ + svn_pool_destroy(baton->pool); + } + + return SVN_NO_ERROR; +} + + +/** Hotcopy. **/ + +/* Like svn_io_dir_file_copy(), but doesn't copy files that exist at + * the destination and do not differ in terms of kind, size, and mtime. */ +static svn_error_t * +hotcopy_io_dir_file_copy(const char *src_path, + const char *dst_path, + const char *file, + apr_pool_t *scratch_pool) +{ + const svn_io_dirent2_t *src_dirent; + const svn_io_dirent2_t *dst_dirent; + const char *src_target; + const char *dst_target; + + /* Does the destination already exist? If not, we must copy it. */ + dst_target = svn_dirent_join(dst_path, file, scratch_pool); + SVN_ERR(svn_io_stat_dirent2(&dst_dirent, dst_target, FALSE, TRUE, + scratch_pool, scratch_pool)); + if (dst_dirent->kind != svn_node_none) + { + /* If the destination's stat information indicates that the file + * is equal to the source, don't bother copying the file again. */ + src_target = svn_dirent_join(src_path, file, scratch_pool); + SVN_ERR(svn_io_stat_dirent2(&src_dirent, src_target, FALSE, FALSE, + scratch_pool, scratch_pool)); + if (src_dirent->kind == dst_dirent->kind && + src_dirent->special == dst_dirent->special && + src_dirent->filesize == dst_dirent->filesize && + src_dirent->mtime <= dst_dirent->mtime) + return SVN_NO_ERROR; + } + + return svn_error_trace(svn_io_dir_file_copy(src_path, dst_path, file, + scratch_pool)); +} + +/* Set *NAME_P to the UTF-8 representation of directory entry NAME. + * NAME is in the internal encoding used by APR; PARENT is in + * UTF-8 and in internal (not local) style. + * + * Use PARENT only for generating an error string if the conversion + * fails because NAME could not be represented in UTF-8. In that + * case, return a two-level error in which the outer error's message + * mentions PARENT, but the inner error's message does not mention + * NAME (except possibly in hex) since NAME may not be printable. + * Such a compound error at least allows the user to go looking in the + * right directory for the problem. + * + * If there is any other error, just return that error directly. + * + * If there is any error, the effect on *NAME_P is undefined. + * + * *NAME_P and NAME may refer to the same storage. + */ +static svn_error_t * +entry_name_to_utf8(const char **name_p, + const char *name, + const char *parent, + apr_pool_t *pool) +{ + svn_error_t *err = svn_path_cstring_to_utf8(name_p, name, pool); + if (err && err->apr_err == APR_EINVAL) + { + return svn_error_createf(err->apr_err, err, + _("Error converting entry " + "in directory '%s' to UTF-8"), + svn_dirent_local_style(parent, pool)); + } + return err; +} + +/* Like svn_io_copy_dir_recursively() but doesn't copy regular files that + * exist in the destination and do not differ from the source in terms of + * kind, size, and mtime. */ +static svn_error_t * +hotcopy_io_copy_dir_recursively(const char *src, + const char *dst_parent, + const char *dst_basename, + svn_boolean_t copy_perms, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + svn_node_kind_t kind; + apr_status_t status; + const char *dst_path; + apr_dir_t *this_dir; + apr_finfo_t this_entry; + apr_int32_t flags = APR_FINFO_TYPE | APR_FINFO_NAME; + + /* Make a subpool for recursion */ + apr_pool_t *subpool = svn_pool_create(pool); + + /* The 'dst_path' is simply dst_parent/dst_basename */ + dst_path = svn_dirent_join(dst_parent, dst_basename, pool); + + /* Sanity checks: SRC and DST_PARENT are directories, and + DST_BASENAME doesn't already exist in DST_PARENT. */ + SVN_ERR(svn_io_check_path(src, &kind, subpool)); + if (kind != svn_node_dir) + return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, + _("Source '%s' is not a directory"), + svn_dirent_local_style(src, pool)); + + SVN_ERR(svn_io_check_path(dst_parent, &kind, subpool)); + if (kind != svn_node_dir) + return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, + _("Destination '%s' is not a directory"), + svn_dirent_local_style(dst_parent, pool)); + + SVN_ERR(svn_io_check_path(dst_path, &kind, subpool)); + + /* Create the new directory. */ + /* ### TODO: copy permissions (needs apr_file_attrs_get()) */ + SVN_ERR(svn_io_make_dir_recursively(dst_path, pool)); + + /* Loop over the dirents in SRC. ('.' and '..' are auto-excluded) */ + SVN_ERR(svn_io_dir_open(&this_dir, src, subpool)); + + for (status = apr_dir_read(&this_entry, flags, this_dir); + status == APR_SUCCESS; + status = apr_dir_read(&this_entry, flags, this_dir)) + { + if ((this_entry.name[0] == '.') + && ((this_entry.name[1] == '\0') + || ((this_entry.name[1] == '.') + && (this_entry.name[2] == '\0')))) + { + continue; + } + else + { + const char *entryname_utf8; + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + SVN_ERR(entry_name_to_utf8(&entryname_utf8, this_entry.name, + src, subpool)); + if (this_entry.filetype == APR_REG) /* regular file */ + { + SVN_ERR(hotcopy_io_dir_file_copy(src, dst_path, entryname_utf8, + subpool)); + } + else if (this_entry.filetype == APR_LNK) /* symlink */ + { + const char *src_target = svn_dirent_join(src, entryname_utf8, + subpool); + const char *dst_target = svn_dirent_join(dst_path, + entryname_utf8, + subpool); + SVN_ERR(svn_io_copy_link(src_target, dst_target, + subpool)); + } + else if (this_entry.filetype == APR_DIR) /* recurse */ + { + const char *src_target; + + /* Prevent infinite recursion by filtering off our + newly created destination path. */ + if (strcmp(src, dst_parent) == 0 + && strcmp(entryname_utf8, dst_basename) == 0) + continue; + + src_target = svn_dirent_join(src, entryname_utf8, subpool); + SVN_ERR(hotcopy_io_copy_dir_recursively(src_target, + dst_path, + entryname_utf8, + copy_perms, + cancel_func, + cancel_baton, + subpool)); + } + /* ### support other APR node types someday?? */ + + } + } + + if (! (APR_STATUS_IS_ENOENT(status))) + return svn_error_wrap_apr(status, _("Can't read directory '%s'"), + svn_dirent_local_style(src, pool)); + + status = apr_dir_close(this_dir); + if (status) + return svn_error_wrap_apr(status, _("Error closing directory '%s'"), + svn_dirent_local_style(src, pool)); + + /* Free any memory used by recursion */ + svn_pool_destroy(subpool); + + return SVN_NO_ERROR; +} + +/* Copy an un-packed revision or revprop file for revision REV from SRC_SUBDIR + * to DST_SUBDIR. Assume a sharding layout based on MAX_FILES_PER_DIR. + * Use SCRATCH_POOL for temporary allocations. */ +static svn_error_t * +hotcopy_copy_shard_file(const char *src_subdir, + const char *dst_subdir, + svn_revnum_t rev, + int max_files_per_dir, + apr_pool_t *scratch_pool) +{ + const char *src_subdir_shard = src_subdir, + *dst_subdir_shard = dst_subdir; + + if (max_files_per_dir) + { + const char *shard = apr_psprintf(scratch_pool, "%ld", + rev / max_files_per_dir); + src_subdir_shard = svn_dirent_join(src_subdir, shard, scratch_pool); + dst_subdir_shard = svn_dirent_join(dst_subdir, shard, scratch_pool); + + if (rev % max_files_per_dir == 0) + { + SVN_ERR(svn_io_make_dir_recursively(dst_subdir_shard, scratch_pool)); + SVN_ERR(svn_io_copy_perms(dst_subdir, dst_subdir_shard, + scratch_pool)); + } + } + + SVN_ERR(hotcopy_io_dir_file_copy(src_subdir_shard, dst_subdir_shard, + apr_psprintf(scratch_pool, "%ld", rev), + scratch_pool)); + return SVN_NO_ERROR; +} + + +/* Copy a packed shard containing revision REV, and which contains + * MAX_FILES_PER_DIR revisions, from SRC_FS to DST_FS. + * Update *DST_MIN_UNPACKED_REV in case the shard is new in DST_FS. + * Do not re-copy data which already exists in DST_FS. + * Use SCRATCH_POOL for temporary allocations. */ +static svn_error_t * +hotcopy_copy_packed_shard(svn_revnum_t *dst_min_unpacked_rev, + svn_fs_t *src_fs, + svn_fs_t *dst_fs, + svn_revnum_t rev, + int max_files_per_dir, + apr_pool_t *scratch_pool) +{ + const char *src_subdir; + const char *dst_subdir; + const char *packed_shard; + const char *src_subdir_packed_shard; + svn_revnum_t revprop_rev; + apr_pool_t *iterpool; + fs_fs_data_t *src_ffd = src_fs->fsap_data; + + /* Copy the packed shard. */ + src_subdir = svn_dirent_join(src_fs->path, PATH_REVS_DIR, scratch_pool); + dst_subdir = svn_dirent_join(dst_fs->path, PATH_REVS_DIR, scratch_pool); + packed_shard = apr_psprintf(scratch_pool, "%ld" PATH_EXT_PACKED_SHARD, + rev / max_files_per_dir); + src_subdir_packed_shard = svn_dirent_join(src_subdir, packed_shard, + scratch_pool); + SVN_ERR(hotcopy_io_copy_dir_recursively(src_subdir_packed_shard, + dst_subdir, packed_shard, + TRUE /* copy_perms */, + NULL /* cancel_func */, NULL, + scratch_pool)); + + /* Copy revprops belonging to revisions in this pack. */ + src_subdir = svn_dirent_join(src_fs->path, PATH_REVPROPS_DIR, scratch_pool); + dst_subdir = svn_dirent_join(dst_fs->path, PATH_REVPROPS_DIR, scratch_pool); + + if ( src_ffd->format < SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT + || src_ffd->min_unpacked_rev < rev + max_files_per_dir) + { + /* copy unpacked revprops rev by rev */ + iterpool = svn_pool_create(scratch_pool); + for (revprop_rev = rev; + revprop_rev < rev + max_files_per_dir; + revprop_rev++) + { + svn_pool_clear(iterpool); + + SVN_ERR(hotcopy_copy_shard_file(src_subdir, dst_subdir, + revprop_rev, max_files_per_dir, + iterpool)); + } + svn_pool_destroy(iterpool); + } + else + { + /* revprop for revision 0 will never be packed */ + if (rev == 0) + SVN_ERR(hotcopy_copy_shard_file(src_subdir, dst_subdir, + 0, max_files_per_dir, + scratch_pool)); + + /* packed revprops folder */ + packed_shard = apr_psprintf(scratch_pool, "%ld" PATH_EXT_PACKED_SHARD, + rev / max_files_per_dir); + src_subdir_packed_shard = svn_dirent_join(src_subdir, packed_shard, + scratch_pool); + SVN_ERR(hotcopy_io_copy_dir_recursively(src_subdir_packed_shard, + dst_subdir, packed_shard, + TRUE /* copy_perms */, + NULL /* cancel_func */, NULL, + scratch_pool)); + } + + /* If necessary, update the min-unpacked rev file in the hotcopy. */ + if (*dst_min_unpacked_rev < rev + max_files_per_dir) + { + *dst_min_unpacked_rev = rev + max_files_per_dir; + SVN_ERR(write_revnum_file(dst_fs->path, PATH_MIN_UNPACKED_REV, + *dst_min_unpacked_rev, + scratch_pool)); + } + + return SVN_NO_ERROR; +} + +/* If NEW_YOUNGEST is younger than *DST_YOUNGEST, update the 'current' + * file in DST_FS and set *DST_YOUNGEST to NEW_YOUNGEST. + * Use SCRATCH_POOL for temporary allocations. */ +static svn_error_t * +hotcopy_update_current(svn_revnum_t *dst_youngest, + svn_fs_t *dst_fs, + svn_revnum_t new_youngest, + apr_pool_t *scratch_pool) +{ + char next_node_id[MAX_KEY_SIZE] = "0"; + char next_copy_id[MAX_KEY_SIZE] = "0"; + fs_fs_data_t *dst_ffd = dst_fs->fsap_data; + + if (*dst_youngest >= new_youngest) + return SVN_NO_ERROR; + + /* If necessary, get new current next_node and next_copy IDs. */ + if (dst_ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + { + apr_off_t root_offset; + apr_file_t *rev_file; + + if (dst_ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + SVN_ERR(update_min_unpacked_rev(dst_fs, scratch_pool)); + + SVN_ERR(open_pack_or_rev_file(&rev_file, dst_fs, new_youngest, + scratch_pool)); + SVN_ERR(get_root_changes_offset(&root_offset, NULL, rev_file, + dst_fs, new_youngest, scratch_pool)); + SVN_ERR(recover_find_max_ids(dst_fs, new_youngest, rev_file, + root_offset, next_node_id, next_copy_id, + scratch_pool)); + SVN_ERR(svn_io_file_close(rev_file, scratch_pool)); + } + + /* Update 'current'. */ + SVN_ERR(write_current(dst_fs, new_youngest, next_node_id, next_copy_id, + scratch_pool)); + + *dst_youngest = new_youngest; + + return SVN_NO_ERROR; +} + + +/* Remove revisions between START_REV (inclusive) and END_REV (non-inclusive) + * from DST_FS. Assume sharding as per MAX_FILES_PER_DIR. + * Use SCRATCH_POOL for temporary allocations. */ +static svn_error_t * +hotcopy_remove_rev_files(svn_fs_t *dst_fs, + svn_revnum_t start_rev, + svn_revnum_t end_rev, + int max_files_per_dir, + apr_pool_t *scratch_pool) +{ + const char *dst_subdir; + const char *shard; + const char *dst_subdir_shard; + svn_revnum_t rev; + apr_pool_t *iterpool; + + SVN_ERR_ASSERT(start_rev <= end_rev); + + dst_subdir = svn_dirent_join(dst_fs->path, PATH_REVS_DIR, scratch_pool); + + /* Pre-compute paths for initial shard. */ + shard = apr_psprintf(scratch_pool, "%ld", start_rev / max_files_per_dir); + dst_subdir_shard = svn_dirent_join(dst_subdir, shard, scratch_pool); + + iterpool = svn_pool_create(scratch_pool); + for (rev = start_rev; rev < end_rev; rev++) + { + const char *rev_path; + + svn_pool_clear(iterpool); + + /* If necessary, update paths for shard. */ + if (rev != start_rev && rev % max_files_per_dir == 0) + { + shard = apr_psprintf(iterpool, "%ld", rev / max_files_per_dir); + dst_subdir_shard = svn_dirent_join(dst_subdir, shard, scratch_pool); + } + + rev_path = svn_dirent_join(dst_subdir_shard, + apr_psprintf(iterpool, "%ld", rev), + iterpool); + + /* Make the rev file writable and remove it. */ + SVN_ERR(svn_io_set_file_read_write(rev_path, TRUE, iterpool)); + SVN_ERR(svn_io_remove_file2(rev_path, TRUE, iterpool)); + } + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Verify that DST_FS is a suitable destination for an incremental + * hotcopy from SRC_FS. */ +static svn_error_t * +hotcopy_incremental_check_preconditions(svn_fs_t *src_fs, + svn_fs_t *dst_fs, + apr_pool_t *pool) +{ + fs_fs_data_t *src_ffd = src_fs->fsap_data; + fs_fs_data_t *dst_ffd = dst_fs->fsap_data; + + /* We only support incremental hotcopy between the same format. */ + if (src_ffd->format != dst_ffd->format) + return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("The FSFS format (%d) of the hotcopy source does not match the " + "FSFS format (%d) of the hotcopy destination; please upgrade " + "both repositories to the same format"), + src_ffd->format, dst_ffd->format); + + /* Make sure the UUID of source and destination match up. + * We don't want to copy over a different repository. */ + if (strcmp(src_fs->uuid, dst_fs->uuid) != 0) + return svn_error_create(SVN_ERR_RA_UUID_MISMATCH, NULL, + _("The UUID of the hotcopy source does " + "not match the UUID of the hotcopy " + "destination")); + + /* Also require same shard size. */ + if (src_ffd->max_files_per_dir != dst_ffd->max_files_per_dir) + return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("The sharding layout configuration " + "of the hotcopy source does not match " + "the sharding layout configuration of " + "the hotcopy destination")); + return SVN_NO_ERROR; +} + + +/* Baton for hotcopy_body(). */ +struct hotcopy_body_baton { + svn_fs_t *src_fs; + svn_fs_t *dst_fs; + svn_boolean_t incremental; + svn_cancel_func_t cancel_func; + void *cancel_baton; +} hotcopy_body_baton; + +/* Perform a hotcopy, either normal or incremental. + * + * Normal hotcopy assumes that the destination exists as an empty + * directory. It behaves like an incremental hotcopy except that + * none of the copied files already exist in the destination. + * + * An incremental hotcopy copies only changed or new files to the destination, + * and removes files from the destination no longer present in the source. + * While the incremental hotcopy is running, readers should still be able + * to access the destintation repository without error and should not see + * revisions currently in progress of being copied. Readers are able to see + * new fully copied revisions even if the entire incremental hotcopy procedure + * has not yet completed. + * + * Writers are blocked out completely during the entire incremental hotcopy + * process to ensure consistency. This function assumes that the repository + * write-lock is held. + */ +static svn_error_t * +hotcopy_body(void *baton, apr_pool_t *pool) +{ + struct hotcopy_body_baton *hbb = baton; + svn_fs_t *src_fs = hbb->src_fs; + fs_fs_data_t *src_ffd = src_fs->fsap_data; + svn_fs_t *dst_fs = hbb->dst_fs; + fs_fs_data_t *dst_ffd = dst_fs->fsap_data; + int max_files_per_dir = src_ffd->max_files_per_dir; + svn_boolean_t incremental = hbb->incremental; + svn_cancel_func_t cancel_func = hbb->cancel_func; + void* cancel_baton = hbb->cancel_baton; + svn_revnum_t src_youngest; + svn_revnum_t dst_youngest; + svn_revnum_t rev; + svn_revnum_t src_min_unpacked_rev; + svn_revnum_t dst_min_unpacked_rev; + const char *src_subdir; + const char *dst_subdir; + const char *revprop_src_subdir; + const char *revprop_dst_subdir; + apr_pool_t *iterpool; + svn_node_kind_t kind; + + /* Try to copy the config. + * + * ### We try copying the config file before doing anything else, + * ### because higher layers will abort the hotcopy if we throw + * ### an error from this function, and that renders the hotcopy + * ### unusable anyway. */ + if (src_ffd->format >= SVN_FS_FS__MIN_CONFIG_FILE) + { + svn_error_t *err; + + err = svn_io_dir_file_copy(src_fs->path, dst_fs->path, PATH_CONFIG, + pool); + if (err) + { + if (APR_STATUS_IS_ENOENT(err->apr_err)) + { + /* 1.6.0 to 1.6.11 did not copy the configuration file during + * hotcopy. So if we're hotcopying a repository which has been + * created as a hotcopy itself, it's possible that fsfs.conf + * does not exist. Ask the user to re-create it. + * + * ### It would be nice to make this a non-fatal error, + * ### but this function does not get an svn_fs_t object + * ### so we have no way of just printing a warning via + * ### the fs->warning() callback. */ + + const char *msg; + const char *src_abspath; + const char *dst_abspath; + const char *config_relpath; + svn_error_t *err2; + + config_relpath = svn_dirent_join(src_fs->path, PATH_CONFIG, pool); + err2 = svn_dirent_get_absolute(&src_abspath, src_fs->path, pool); + if (err2) + return svn_error_trace(svn_error_compose_create(err, err2)); + err2 = svn_dirent_get_absolute(&dst_abspath, dst_fs->path, pool); + if (err2) + return svn_error_trace(svn_error_compose_create(err, err2)); + + /* ### hack: strip off the 'db/' directory from paths so + * ### they make sense to the user */ + src_abspath = svn_dirent_dirname(src_abspath, pool); + dst_abspath = svn_dirent_dirname(dst_abspath, pool); + + msg = apr_psprintf(pool, + _("Failed to create hotcopy at '%s'. " + "The file '%s' is missing from the source " + "repository. Please create this file, for " + "instance by running 'svnadmin upgrade %s'"), + dst_abspath, config_relpath, src_abspath); + return svn_error_quick_wrap(err, msg); + } + else + return svn_error_trace(err); + } + } + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + /* Find the youngest revision in the source and destination. + * We only support hotcopies from sources with an equal or greater amount + * of revisions than the destination. + * This also catches the case where users accidentally swap the + * source and destination arguments. */ + SVN_ERR(get_youngest(&src_youngest, src_fs->path, pool)); + if (incremental) + { + SVN_ERR(get_youngest(&dst_youngest, dst_fs->path, pool)); + if (src_youngest < dst_youngest) + return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("The hotcopy destination already contains more revisions " + "(%lu) than the hotcopy source contains (%lu); are source " + "and destination swapped?"), + dst_youngest, src_youngest); + } + else + dst_youngest = 0; + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + /* Copy the min unpacked rev, and read its value. */ + if (src_ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + { + const char *min_unpacked_rev_path; + + min_unpacked_rev_path = svn_dirent_join(src_fs->path, + PATH_MIN_UNPACKED_REV, + pool); + SVN_ERR(read_min_unpacked_rev(&src_min_unpacked_rev, + min_unpacked_rev_path, + pool)); + + min_unpacked_rev_path = svn_dirent_join(dst_fs->path, + PATH_MIN_UNPACKED_REV, + pool); + SVN_ERR(read_min_unpacked_rev(&dst_min_unpacked_rev, + min_unpacked_rev_path, + pool)); + + /* We only support packs coming from the hotcopy source. + * The destination should not be packed independently from + * the source. This also catches the case where users accidentally + * swap the source and destination arguments. */ + if (src_min_unpacked_rev < dst_min_unpacked_rev) + return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("The hotcopy destination already contains " + "more packed revisions (%lu) than the " + "hotcopy source contains (%lu)"), + dst_min_unpacked_rev - 1, + src_min_unpacked_rev - 1); + + SVN_ERR(svn_io_dir_file_copy(src_fs->path, dst_fs->path, + PATH_MIN_UNPACKED_REV, pool)); + } + else + { + src_min_unpacked_rev = 0; + dst_min_unpacked_rev = 0; + } + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + /* + * Copy the necessary rev files. + */ + + src_subdir = svn_dirent_join(src_fs->path, PATH_REVS_DIR, pool); + dst_subdir = svn_dirent_join(dst_fs->path, PATH_REVS_DIR, pool); + SVN_ERR(svn_io_make_dir_recursively(dst_subdir, pool)); + + iterpool = svn_pool_create(pool); + /* First, copy packed shards. */ + for (rev = 0; rev < src_min_unpacked_rev; rev += max_files_per_dir) + { + svn_error_t *err; + + svn_pool_clear(iterpool); + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + /* Copy the packed shard. */ + SVN_ERR(hotcopy_copy_packed_shard(&dst_min_unpacked_rev, + src_fs, dst_fs, + rev, max_files_per_dir, + iterpool)); + + /* If necessary, update 'current' to the most recent packed rev, + * so readers can see new revisions which arrived in this pack. */ + SVN_ERR(hotcopy_update_current(&dst_youngest, dst_fs, + rev + max_files_per_dir - 1, + iterpool)); + + /* Remove revision files which are now packed. */ + if (incremental) + SVN_ERR(hotcopy_remove_rev_files(dst_fs, rev, rev + max_files_per_dir, + max_files_per_dir, iterpool)); + + /* Now that all revisions have moved into the pack, the original + * rev dir can be removed. */ + err = svn_io_remove_dir2(path_rev_shard(dst_fs, rev, iterpool), + TRUE, cancel_func, cancel_baton, iterpool); + if (err) + { + if (APR_STATUS_IS_ENOTEMPTY(err->apr_err)) + svn_error_clear(err); + else + return svn_error_trace(err); + } + } + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + /* Now, copy pairs of non-packed revisions and revprop files. + * If necessary, update 'current' after copying all files from a shard. */ + SVN_ERR_ASSERT(rev == src_min_unpacked_rev); + SVN_ERR_ASSERT(src_min_unpacked_rev == dst_min_unpacked_rev); + revprop_src_subdir = svn_dirent_join(src_fs->path, PATH_REVPROPS_DIR, pool); + revprop_dst_subdir = svn_dirent_join(dst_fs->path, PATH_REVPROPS_DIR, pool); + SVN_ERR(svn_io_make_dir_recursively(revprop_dst_subdir, pool)); + for (; rev <= src_youngest; rev++) + { + svn_error_t *err; + + svn_pool_clear(iterpool); + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + /* Copy the rev file. */ + err = hotcopy_copy_shard_file(src_subdir, dst_subdir, + rev, max_files_per_dir, + iterpool); + if (err) + { + if (APR_STATUS_IS_ENOENT(err->apr_err) && + src_ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + { + svn_error_clear(err); + + /* The source rev file does not exist. This can happen if the + * source repository is being packed concurrently with this + * hotcopy operation. + * + * If the new revision is now packed, and the youngest revision + * we're interested in is not inside this pack, try to copy the + * pack instead. + * + * If the youngest revision ended up being packed, don't try + * to be smart and work around this. Just abort the hotcopy. */ + SVN_ERR(update_min_unpacked_rev(src_fs, pool)); + if (is_packed_rev(src_fs, rev)) + { + if (is_packed_rev(src_fs, src_youngest)) + return svn_error_createf( + SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("The assumed HEAD revision (%lu) of the " + "hotcopy source has been packed while the " + "hotcopy was in progress; please restart " + "the hotcopy operation"), + src_youngest); + + SVN_ERR(hotcopy_copy_packed_shard(&dst_min_unpacked_rev, + src_fs, dst_fs, + rev, max_files_per_dir, + iterpool)); + rev = dst_min_unpacked_rev; + continue; + } + else + return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, + _("Revision %lu disappeared from the " + "hotcopy source while hotcopy was " + "in progress"), rev); + } + else + return svn_error_trace(err); + } + + /* Copy the revprop file. */ + SVN_ERR(hotcopy_copy_shard_file(revprop_src_subdir, + revprop_dst_subdir, + rev, max_files_per_dir, + iterpool)); + + /* After completing a full shard, update 'current'. */ + if (max_files_per_dir && rev % max_files_per_dir == 0) + SVN_ERR(hotcopy_update_current(&dst_youngest, dst_fs, rev, iterpool)); + } + svn_pool_destroy(iterpool); + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + /* We assume that all revisions were copied now, i.e. we didn't exit the + * above loop early. 'rev' was last incremented during exit of the loop. */ + SVN_ERR_ASSERT(rev == src_youngest + 1); + + /* All revisions were copied. Update 'current'. */ + SVN_ERR(hotcopy_update_current(&dst_youngest, dst_fs, src_youngest, pool)); + + /* Replace the locks tree. + * This is racy in case readers are currently trying to list locks in + * the destination. However, we need to get rid of stale locks. + * This is the simplest way of doing this, so we accept this small race. */ + dst_subdir = svn_dirent_join(dst_fs->path, PATH_LOCKS_DIR, pool); + SVN_ERR(svn_io_remove_dir2(dst_subdir, TRUE, cancel_func, cancel_baton, + pool)); + src_subdir = svn_dirent_join(src_fs->path, PATH_LOCKS_DIR, pool); + SVN_ERR(svn_io_check_path(src_subdir, &kind, pool)); + if (kind == svn_node_dir) + SVN_ERR(svn_io_copy_dir_recursively(src_subdir, dst_fs->path, + PATH_LOCKS_DIR, TRUE, + cancel_func, cancel_baton, pool)); + + /* Now copy the node-origins cache tree. */ + src_subdir = svn_dirent_join(src_fs->path, PATH_NODE_ORIGINS_DIR, pool); + SVN_ERR(svn_io_check_path(src_subdir, &kind, pool)); + if (kind == svn_node_dir) + SVN_ERR(hotcopy_io_copy_dir_recursively(src_subdir, dst_fs->path, + PATH_NODE_ORIGINS_DIR, TRUE, + cancel_func, cancel_baton, pool)); + + /* + * NB: Data copied below is only read by writers, not readers. + * Writers are still locked out at this point. + */ + + if (dst_ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT) + { + /* Copy the rep cache and then remove entries for revisions + * younger than the destination's youngest revision. */ + src_subdir = svn_dirent_join(src_fs->path, REP_CACHE_DB_NAME, pool); + dst_subdir = svn_dirent_join(dst_fs->path, REP_CACHE_DB_NAME, pool); + SVN_ERR(svn_io_check_path(src_subdir, &kind, pool)); + if (kind == svn_node_file) + { + SVN_ERR(svn_sqlite__hotcopy(src_subdir, dst_subdir, pool)); + SVN_ERR(svn_fs_fs__del_rep_reference(dst_fs, dst_youngest, pool)); + } + } + + /* Copy the txn-current file. */ + if (dst_ffd->format >= SVN_FS_FS__MIN_TXN_CURRENT_FORMAT) + SVN_ERR(svn_io_dir_file_copy(src_fs->path, dst_fs->path, + PATH_TXN_CURRENT, pool)); + + /* If a revprop generation file exists in the source filesystem, + * reset it to zero (since this is on a different path, it will not + * overlap with data already in cache). Also, clean up stale files + * used for the named atomics implementation. */ + SVN_ERR(svn_io_check_path(path_revprop_generation(src_fs, pool), + &kind, pool)); + if (kind == svn_node_file) + SVN_ERR(write_revprop_generation_file(dst_fs, 0, pool)); + + SVN_ERR(cleanup_revprop_namespace(dst_fs)); + + /* Hotcopied FS is complete. Stamp it with a format file. */ + SVN_ERR(write_format(svn_dirent_join(dst_fs->path, PATH_FORMAT, pool), + dst_ffd->format, max_files_per_dir, TRUE, pool)); + + return SVN_NO_ERROR; +} + + +/* Set up shared data between SRC_FS and DST_FS. */ +static void +hotcopy_setup_shared_fs_data(svn_fs_t *src_fs, svn_fs_t *dst_fs) +{ + fs_fs_data_t *src_ffd = src_fs->fsap_data; + fs_fs_data_t *dst_ffd = dst_fs->fsap_data; + + /* The common pool and mutexes are shared between src and dst filesystems. + * During hotcopy we only grab the mutexes for the destination, so there + * is no risk of dead-lock. We don't write to the src filesystem. Shared + * data for the src_fs has already been initialised in fs_hotcopy(). */ + dst_ffd->shared = src_ffd->shared; +} + +/* Create an empty filesystem at DST_FS at DST_PATH with the same + * configuration as SRC_FS (uuid, format, and other parameters). + * After creation DST_FS has no revisions, not even revision zero. */ +static svn_error_t * +hotcopy_create_empty_dest(svn_fs_t *src_fs, + svn_fs_t *dst_fs, + const char *dst_path, + apr_pool_t *pool) +{ + fs_fs_data_t *src_ffd = src_fs->fsap_data; + fs_fs_data_t *dst_ffd = dst_fs->fsap_data; + + dst_fs->path = apr_pstrdup(pool, dst_path); + + dst_ffd->max_files_per_dir = src_ffd->max_files_per_dir; + dst_ffd->config = src_ffd->config; + dst_ffd->format = src_ffd->format; + + /* Create the revision data directories. */ + if (dst_ffd->max_files_per_dir) + SVN_ERR(svn_io_make_dir_recursively(path_rev_shard(dst_fs, 0, pool), + pool)); + else + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(dst_path, + PATH_REVS_DIR, pool), + pool)); + + /* Create the revprops directory. */ + if (src_ffd->max_files_per_dir) + SVN_ERR(svn_io_make_dir_recursively(path_revprops_shard(dst_fs, 0, pool), + pool)); + else + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(dst_path, + PATH_REVPROPS_DIR, + pool), + pool)); + + /* Create the transaction directory. */ + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(dst_path, PATH_TXNS_DIR, + pool), + pool)); + + /* Create the protorevs directory. */ + if (dst_ffd->format >= SVN_FS_FS__MIN_PROTOREVS_DIR_FORMAT) + SVN_ERR(svn_io_make_dir_recursively(svn_dirent_join(dst_path, + PATH_TXN_PROTOS_DIR, + pool), + pool)); + + /* Create the 'current' file. */ + SVN_ERR(svn_io_file_create(svn_fs_fs__path_current(dst_fs, pool), + (dst_ffd->format >= + SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT + ? "0\n" : "0 1 1\n"), + pool)); + + /* Create lock file and UUID. */ + SVN_ERR(svn_io_file_create(path_lock(dst_fs, pool), "", pool)); + SVN_ERR(svn_fs_fs__set_uuid(dst_fs, src_fs->uuid, pool)); + + /* Create the min unpacked rev file. */ + if (dst_ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) + SVN_ERR(svn_io_file_create(path_min_unpacked_rev(dst_fs, pool), + "0\n", pool)); + /* Create the txn-current file if the repository supports + the transaction sequence file. */ + if (dst_ffd->format >= SVN_FS_FS__MIN_TXN_CURRENT_FORMAT) + { + SVN_ERR(svn_io_file_create(path_txn_current(dst_fs, pool), + "0\n", pool)); + SVN_ERR(svn_io_file_create(path_txn_current_lock(dst_fs, pool), + "", pool)); + } + + dst_ffd->youngest_rev_cache = 0; + + hotcopy_setup_shared_fs_data(src_fs, dst_fs); + SVN_ERR(svn_fs_fs__initialize_caches(dst_fs, pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__hotcopy(svn_fs_t *src_fs, + svn_fs_t *dst_fs, + const char *src_path, + const char *dst_path, + svn_boolean_t incremental, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + struct hotcopy_body_baton hbb; + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + SVN_ERR(svn_fs_fs__open(src_fs, src_path, pool)); + + if (incremental) + { + const char *dst_format_abspath; + svn_node_kind_t dst_format_kind; + + /* Check destination format to be sure we know how to incrementally + * hotcopy to the destination FS. */ + dst_format_abspath = svn_dirent_join(dst_path, PATH_FORMAT, pool); + SVN_ERR(svn_io_check_path(dst_format_abspath, &dst_format_kind, pool)); + if (dst_format_kind == svn_node_none) + { + /* Destination doesn't exist yet. Perform a normal hotcopy to a + * empty destination using the same configuration as the source. */ + SVN_ERR(hotcopy_create_empty_dest(src_fs, dst_fs, dst_path, pool)); + } + else + { + /* Check the existing repository. */ + SVN_ERR(svn_fs_fs__open(dst_fs, dst_path, pool)); + SVN_ERR(hotcopy_incremental_check_preconditions(src_fs, dst_fs, + pool)); + hotcopy_setup_shared_fs_data(src_fs, dst_fs); + SVN_ERR(svn_fs_fs__initialize_caches(dst_fs, pool)); + } + } + else + { + /* Start out with an empty destination using the same configuration + * as the source. */ + SVN_ERR(hotcopy_create_empty_dest(src_fs, dst_fs, dst_path, pool)); + } + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + + hbb.src_fs = src_fs; + hbb.dst_fs = dst_fs; + hbb.incremental = incremental; + hbb.cancel_func = cancel_func; + hbb.cancel_baton = cancel_baton; + SVN_ERR(svn_fs_fs__with_write_lock(dst_fs, hotcopy_body, &hbb, pool)); + + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_fs_fs/fs_fs.h b/subversion/libsvn_fs_fs/fs_fs.h new file mode 100644 index 0000000..c09f861 --- /dev/null +++ b/subversion/libsvn_fs_fs/fs_fs.h @@ -0,0 +1,575 @@ +/* fs_fs.h : interface to the native filesystem layer + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS__FS_FS_H +#define SVN_LIBSVN_FS__FS_FS_H + +#include "fs.h" + +/* Open the fsfs filesystem pointed to by PATH and associate it with + filesystem object FS. Use POOL for temporary allocations. + + ### Some parts of *FS must have been initialized beforehand; some parts + (including FS->path) are initialized by this function. */ +svn_error_t *svn_fs_fs__open(svn_fs_t *fs, + const char *path, + apr_pool_t *pool); + +/* Upgrade the fsfs filesystem FS. Use POOL for temporary allocations. */ +svn_error_t *svn_fs_fs__upgrade(svn_fs_t *fs, + apr_pool_t *pool); + +/* Verify metadata in fsfs filesystem FS. Limit the checks to revisions + * START to END where possible. Indicate progress via the optional + * NOTIFY_FUNC callback using NOTIFY_BATON. The optional CANCEL_FUNC + * will periodically be called with CANCEL_BATON to allow for preemption. + * Use POOL for temporary allocations. */ +svn_error_t *svn_fs_fs__verify(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t end, + svn_fs_progress_notify_func_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool); + +/* Copy the fsfs filesystem SRC_FS at SRC_PATH into a new copy DST_FS at + * DST_PATH. If INCREMENTAL is TRUE, do not re-copy data which already + * exists in DST_FS. Use POOL for temporary allocations. */ +svn_error_t * svn_fs_fs__hotcopy(svn_fs_t *src_fs, + svn_fs_t *dst_fs, + const char *src_path, + const char *dst_path, + svn_boolean_t incremental, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool); + +/* Recover the fsfs associated with filesystem FS. + Use optional CANCEL_FUNC/CANCEL_BATON for cancellation support. + Use POOL for temporary allocations. */ +svn_error_t *svn_fs_fs__recover(svn_fs_t *fs, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool); + +/* Set *NODEREV_P to the node-revision for the node ID in FS. Do any + allocations in POOL. */ +svn_error_t *svn_fs_fs__get_node_revision(node_revision_t **noderev_p, + svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool); + +/* Store NODEREV as the node-revision for the node whose id is ID in + FS, after setting its is_fresh_txn_root to FRESH_TXN_ROOT. Do any + necessary temporary allocation in POOL. */ +svn_error_t *svn_fs_fs__put_node_revision(svn_fs_t *fs, + const svn_fs_id_t *id, + node_revision_t *noderev, + svn_boolean_t fresh_txn_root, + apr_pool_t *pool); + +/* Write the node-revision NODEREV into the stream OUTFILE, compatible with + filesystem format FORMAT. Only write mergeinfo-related metadata if + INCLUDE_MERGEINFO is true. Temporary allocations are from POOL. */ +/* ### Currently used only by fs_fs.c */ +svn_error_t * +svn_fs_fs__write_noderev(svn_stream_t *outfile, + node_revision_t *noderev, + int format, + svn_boolean_t include_mergeinfo, + apr_pool_t *pool); + +/* Read a node-revision from STREAM. Set *NODEREV to the new structure, + allocated in POOL. */ +/* ### Currently used only by fs_fs.c */ +svn_error_t * +svn_fs_fs__read_noderev(node_revision_t **noderev, + svn_stream_t *stream, + apr_pool_t *pool); + + +/* Set *YOUNGEST to the youngest revision in filesystem FS. Do any + temporary allocation in POOL. */ +svn_error_t *svn_fs_fs__youngest_rev(svn_revnum_t *youngest, + svn_fs_t *fs, + apr_pool_t *pool); + +/* Return an error iff REV does not exist in FS. */ +svn_error_t * +svn_fs_fs__revision_exists(svn_revnum_t rev, + svn_fs_t *fs, + apr_pool_t *pool); + +/* Set *ROOT_ID to the node-id for the root of revision REV in + filesystem FS. Do any allocations in POOL. */ +svn_error_t *svn_fs_fs__rev_get_root(svn_fs_id_t **root_id, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool); + +/* Set *ENTRIES to an apr_hash_t of dirent structs that contain the + directory entries of node-revision NODEREV in filesystem FS. The + returned table (and its keys and values) is allocated in POOL, + which is also used for temporary allocations. */ +svn_error_t *svn_fs_fs__rep_contents_dir(apr_hash_t **entries, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool); + +/* Set *DIRENT to the entry identified by NAME in the directory given + by NODEREV in filesystem FS. If no such entry exits, *DIRENT will + be NULL. The returned object is allocated in RESULT_POOL; SCRATCH_POOL + used for temporary allocations. */ +svn_error_t * +svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent, + svn_fs_t *fs, + node_revision_t *noderev, + const char *name, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + +/* Set *CONTENTS to be a readable svn_stream_t that receives the text + representation of node-revision NODEREV as seen in filesystem FS. + Use POOL for temporary allocations. */ +svn_error_t *svn_fs_fs__get_contents(svn_stream_t **contents, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool); + +/* Attempt to fetch the text representation of node-revision NODEREV as + seen in filesystem FS and pass it along with the BATON to the PROCESSOR. + Set *SUCCESS only of the data could be provided and the processing + had been called. + Use POOL for all allocations. + */ +svn_error_t * +svn_fs_fs__try_process_file_contents(svn_boolean_t *success, + svn_fs_t *fs, + node_revision_t *noderev, + svn_fs_process_contents_func_t processor, + void* baton, + apr_pool_t *pool); + +/* Set *STREAM_P to a delta stream turning the contents of the file SOURCE into + the contents of the file TARGET, allocated in POOL. + If SOURCE is null, the empty string will be used. */ +svn_error_t *svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p, + svn_fs_t *fs, + node_revision_t *source, + node_revision_t *target, + apr_pool_t *pool); + +/* Set *PROPLIST to be an apr_hash_t containing the property list of + node-revision NODEREV as seen in filesystem FS. Use POOL for + temporary allocations. */ +svn_error_t *svn_fs_fs__get_proplist(apr_hash_t **proplist, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool); + +/* Set *PROPLIST to be an apr_hash_t containing the property list of + revision REV as seen in filesystem FS. Use POOL for temporary + allocations. */ +svn_error_t *svn_fs_fs__revision_proplist(apr_hash_t **proplist, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool); + +/* Set *LENGTH to the be fulltext length of the node revision + specified by NODEREV. Use POOL for temporary allocations. */ +svn_error_t *svn_fs_fs__file_length(svn_filesize_t *length, + node_revision_t *noderev, + apr_pool_t *pool); + +/* Return TRUE if the representation keys in A and B both point to the + same representation, else return FALSE. */ +svn_boolean_t svn_fs_fs__noderev_same_rep_key(representation_t *a, + representation_t *b); + + +/* Return a copy of the representation REP allocated from POOL. */ +representation_t *svn_fs_fs__rep_copy(representation_t *rep, + apr_pool_t *pool); + + +/* Return the recorded checksum of type KIND for the text representation + of NODREV into CHECKSUM, allocating from POOL. If no stored checksum is + available, put all NULL into CHECKSUM. */ +svn_error_t *svn_fs_fs__file_checksum(svn_checksum_t **checksum, + node_revision_t *noderev, + svn_checksum_kind_t kind, + apr_pool_t *pool); + +/* Find the paths which were changed in revision REV of filesystem FS + and store them in *CHANGED_PATHS_P. Cached copyfrom information + will be stored in *COPYFROM_CACHE. Get any temporary allocations + from POOL. */ +svn_error_t *svn_fs_fs__paths_changed(apr_hash_t **changed_paths_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_hash_t *copyfrom_cache, + apr_pool_t *pool); + +/* Create a new transaction in filesystem FS, based on revision REV, + and store it in *TXN_P. Allocate all necessary variables from + POOL. */ +svn_error_t *svn_fs_fs__create_txn(svn_fs_txn_t **txn_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool); + +/* Set the transaction property NAME to the value VALUE in transaction + TXN. Perform temporary allocations from POOL. */ +svn_error_t *svn_fs_fs__change_txn_prop(svn_fs_txn_t *txn, + const char *name, + const svn_string_t *value, + apr_pool_t *pool); + +/* Change transaction properties in transaction TXN based on PROPS. + Perform temporary allocations from POOL. */ +svn_error_t *svn_fs_fs__change_txn_props(svn_fs_txn_t *txn, + const apr_array_header_t *props, + apr_pool_t *pool); + +/* Return whether or not the given FS supports mergeinfo metadata. */ +svn_boolean_t svn_fs_fs__fs_supports_mergeinfo(svn_fs_t *fs); + +/* Store a transaction record in *TXN_P for the transaction identified + by TXN_ID in filesystem FS. Allocate everything from POOL. */ +svn_error_t *svn_fs_fs__get_txn(transaction_t **txn_p, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + +/* Abort the existing transaction TXN, performing any temporary + allocations in POOL. */ +svn_error_t *svn_fs_fs__abort_txn(svn_fs_txn_t *txn, apr_pool_t *pool); + +/* Create an entirely new mutable node in the filesystem FS, whose + node-revision is NODEREV. Set *ID_P to the new node revision's ID. + Use POOL for any temporary allocation. COPY_ID is the copy_id to + use in the node revision ID. TXN_ID is the Subversion transaction + under which this occurs. */ +svn_error_t *svn_fs_fs__create_node(const svn_fs_id_t **id_p, + svn_fs_t *fs, + node_revision_t *noderev, + const char *copy_id, + const char *txn_id, + apr_pool_t *pool); + +/* Remove all references to the transaction TXN_ID from filesystem FS. + Temporary allocations are from POOL. */ +svn_error_t *svn_fs_fs__purge_txn(svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + +/* Add or set in filesystem FS, transaction TXN_ID, in directory + PARENT_NODEREV a directory entry for NAME pointing to ID of type + KIND. Allocations are done in POOL. */ +svn_error_t *svn_fs_fs__set_entry(svn_fs_t *fs, + const char *txn_id, + node_revision_t *parent_noderev, + const char *name, + const svn_fs_id_t *id, + svn_node_kind_t kind, + apr_pool_t *pool); + +/* Add a change to the changes record for filesystem FS in transaction + TXN_ID. Mark path PATH, having node-id ID, as changed according to + the type in CHANGE_KIND. If the text representation was changed + set TEXT_MOD to TRUE, and likewise for PROP_MOD. If this change + was the result of a copy, set COPYFROM_REV and COPYFROM_PATH to the + revision and path of the copy source, otherwise they should be set + to SVN_INVALID_REVNUM and NULL. Perform any temporary allocations + from POOL. */ +svn_error_t *svn_fs_fs__add_change(svn_fs_t *fs, + const char *txn_id, + const char *path, + const svn_fs_id_t *id, + svn_fs_path_change_kind_t change_kind, + svn_boolean_t text_mod, + svn_boolean_t prop_mod, + svn_node_kind_t node_kind, + svn_revnum_t copyfrom_rev, + const char *copyfrom_path, + apr_pool_t *pool); + +/* Return a writable stream in *STREAM that allows storing the text + representation of node-revision NODEREV in filesystem FS. + Allocations are from POOL. */ +svn_error_t *svn_fs_fs__set_contents(svn_stream_t **stream, + svn_fs_t *fs, + node_revision_t *noderev, + apr_pool_t *pool); + +/* Create a node revision in FS which is an immediate successor of + OLD_ID, whose contents are NEW_NR. Set *NEW_ID_P to the new node + revision's ID. Use POOL for any temporary allocation. + + COPY_ID, if non-NULL, is a key into the `copies' table, and + indicates that this new node is being created as the result of a + copy operation, and specifically which operation that was. If + COPY_ID is NULL, then re-use the copy ID from the predecessor node. + + TXN_ID is the Subversion transaction under which this occurs. + + After this call, the deltification code assumes that the new node's + contents will change frequently, and will avoid representing other + nodes as deltas against this node's contents. */ +svn_error_t *svn_fs_fs__create_successor(const svn_fs_id_t **new_id_p, + svn_fs_t *fs, + const svn_fs_id_t *old_idp, + node_revision_t *new_noderev, + const char *copy_id, + const char *txn_id, + apr_pool_t *pool); + +/* Write a new property list PROPLIST for node-revision NODEREV in + filesystem FS. Perform any temporary allocations in POOL. */ +svn_error_t *svn_fs_fs__set_proplist(svn_fs_t *fs, + node_revision_t *noderev, + apr_hash_t *proplist, + apr_pool_t *pool); + +/* Commit the transaction TXN in filesystem FS and return its new + revision number in *REV. If the transaction is out of date, return + the error SVN_ERR_FS_TXN_OUT_OF_DATE. Use POOL for temporary + allocations. */ +svn_error_t *svn_fs_fs__commit(svn_revnum_t *new_rev_p, + svn_fs_t *fs, + svn_fs_txn_t *txn, + apr_pool_t *pool); + +/* Return the next available copy_id in *COPY_ID for the transaction + TXN_ID in filesystem FS. Allocate space in POOL. */ +svn_error_t *svn_fs_fs__reserve_copy_id(const char **copy_id, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + +/* Create a fs_fs fileysystem referenced by FS at path PATH. Get any + temporary allocations from POOL. + + ### Some parts of *FS must have been initialized beforehand; some parts + (including FS->path) are initialized by this function. */ +svn_error_t *svn_fs_fs__create(svn_fs_t *fs, + const char *path, + apr_pool_t *pool); + +/* Set the uuid of repository FS to UUID, if UUID is not NULL; + otherwise, set the uuid of FS to a newly generated UUID. Perform + temporary allocations in POOL. */ +svn_error_t *svn_fs_fs__set_uuid(svn_fs_t *fs, + const char *uuid, + apr_pool_t *pool); + +/* Set *NAMES_P to an array of names which are all the active + transactions in filesystem FS. Allocate the array from POOL. */ +svn_error_t *svn_fs_fs__list_transactions(apr_array_header_t **names_p, + svn_fs_t *fs, + apr_pool_t *pool); + +/* Open the transaction named NAME in filesystem FS. Set *TXN_P to + * the transaction. If there is no such transaction, return +` * SVN_ERR_FS_NO_SUCH_TRANSACTION. Allocate the new transaction in + * POOL. */ +svn_error_t *svn_fs_fs__open_txn(svn_fs_txn_t **txn_p, + svn_fs_t *fs, + const char *name, + apr_pool_t *pool); + +/* Return the property list from transaction TXN and store it in + *PROPLIST. Allocate the property list from POOL. */ +svn_error_t *svn_fs_fs__txn_proplist(apr_hash_t **proplist, + svn_fs_txn_t *txn, + apr_pool_t *pool); + +/* Delete the mutable node-revision referenced by ID, along with any + mutable props or directory contents associated with it. Perform + temporary allocations in POOL. */ +svn_error_t *svn_fs_fs__delete_node_revision(svn_fs_t *fs, + const svn_fs_id_t *id, + apr_pool_t *pool); + + +/* Find the paths which were changed in transaction TXN_ID of + filesystem FS and store them in *CHANGED_PATHS_P. + Get any temporary allocations from POOL. */ +svn_error_t *svn_fs_fs__txn_changes_fetch(apr_hash_t **changes, + svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + + +/* Set *PATH to the path of REV in FS, whether in a pack file or not. + Allocate *PATH in POOL. + + Note: If the caller does not have the write lock on FS, then the path is + not guaranteed to be correct or to remain correct after the function + returns, because the revision might become packed before or after this + call. If a file exists at that path, then it is correct; if not, then + the caller should call update_min_unpacked_rev() and re-try once. */ +svn_error_t * +svn_fs_fs__path_rev_absolute(const char **path, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool); + +/* Return the path to the 'current' file in FS. + Perform allocation in POOL. */ +const char * +svn_fs_fs__path_current(svn_fs_t *fs, apr_pool_t *pool); + +/* Obtain a write lock on the filesystem FS in a subpool of POOL, call + BODY with BATON and that subpool, destroy the subpool (releasing the write + lock) and return what BODY returned. */ +svn_error_t * +svn_fs_fs__with_write_lock(svn_fs_t *fs, + svn_error_t *(*body)(void *baton, + apr_pool_t *pool), + void *baton, + apr_pool_t *pool); + +/* Find the value of the property named PROPNAME in transaction TXN. + Return the contents in *VALUE_P. The contents will be allocated + from POOL. */ +svn_error_t *svn_fs_fs__revision_prop(svn_string_t **value_p, svn_fs_t *fs, + svn_revnum_t rev, + const char *propname, + apr_pool_t *pool); + +/* Change, add, or delete a property on a revision REV in filesystem + FS. NAME gives the name of the property, and value, if non-NULL, + gives the new contents of the property. If value is NULL, then the + property will be deleted. If OLD_VALUE_P is not NULL, do nothing unless the + preexisting value is *OLD_VALUE_P. Do any temporary allocation in POOL. */ +svn_error_t *svn_fs_fs__change_rev_prop(svn_fs_t *fs, svn_revnum_t rev, + const char *name, + const svn_string_t *const *old_value_p, + const svn_string_t *value, + apr_pool_t *pool); + +/* Retrieve information about the Subversion transaction SVN_TXN from + the `transactions' table of FS, allocating from POOL. Set + *ROOT_ID_P to the ID of the transaction's root directory. Set + *BASE_ROOT_ID_P to the ID of the root directory of the + transaction's base revision. + + If there is no such transaction, SVN_ERR_FS_NO_SUCH_TRANSACTION is + the error returned. + + Returns SVN_ERR_FS_TRANSACTION_NOT_MUTABLE if TXN_NAME refers to a + transaction that has already been committed. + + Allocate *ROOT_ID_P and *BASE_ROOT_ID_P in POOL. */ +svn_error_t *svn_fs_fs__get_txn_ids(const svn_fs_id_t **root_id_p, + const svn_fs_id_t **base_root_id_p, + svn_fs_t *fs, + const char *txn_name, + apr_pool_t *pool); + +/* Begin a new transaction in filesystem FS, based on existing + revision REV. The new transaction is returned in *TXN_P. Allocate + the new transaction structure from POOL. */ +svn_error_t *svn_fs_fs__begin_txn(svn_fs_txn_t **txn_p, svn_fs_t *fs, + svn_revnum_t rev, apr_uint32_t flags, + apr_pool_t *pool); + +/* Find the value of the property named PROPNAME in transaction TXN. + Return the contents in *VALUE_P. The contents will be allocated + from POOL. */ +svn_error_t *svn_fs_fs__txn_prop(svn_string_t **value_p, svn_fs_txn_t *txn, + const char *propname, apr_pool_t *pool); + +/* If directory PATH does not exist, create it and give it the same + permissions as FS_PATH.*/ +svn_error_t *svn_fs_fs__ensure_dir_exists(const char *path, + const char *fs_path, + apr_pool_t *pool); + +/* Update the node origin index for FS, recording the mapping from + NODE_ID to NODE_REV_ID. Use POOL for any temporary allocations. + + Because this is just an "optional" cache, this function does not + return an error if the underlying storage is readonly; it still + returns an error for other error conditions. + */ +svn_error_t * +svn_fs_fs__set_node_origin(svn_fs_t *fs, + const char *node_id, + const svn_fs_id_t *node_rev_id, + apr_pool_t *pool); + +/* Set *ORIGIN_ID to the node revision ID from which the history of + all nodes in FS whose "Node ID" is NODE_ID springs, as determined + by a look in the index. ORIGIN_ID needs to be parsed in an + FS-backend-specific way. Use POOL for allocations. + + If there is no entry for NODE_ID in the cache, return NULL + in *ORIGIN_ID. */ +svn_error_t * +svn_fs_fs__get_node_origin(const svn_fs_id_t **origin_id, + svn_fs_t *fs, + const char *node_id, + apr_pool_t *pool); + + +/* Initialize all session-local caches in FS according to the global + cache settings. Use POOL for allocations. + + Please note that it is permissible for this function to set some + or all of these caches to NULL, regardless of any setting. */ +svn_error_t * +svn_fs_fs__initialize_caches(svn_fs_t *fs, apr_pool_t *pool); + +/* Initialize all transaction-local caches in FS according to the global + cache settings and make TXN_ID part of their key space. Use POOL for + allocations. + + Please note that it is permissible for this function to set some or all + of these caches to NULL, regardless of any setting. */ +svn_error_t * +svn_fs_fs__initialize_txn_caches(svn_fs_t *fs, + const char *txn_id, + apr_pool_t *pool); + +/* Resets the svn_cache__t structures local to the current transaction in FS. + Calling it more than once per txn or from outside any txn is allowed. */ +void +svn_fs_fs__reset_txn_caches(svn_fs_t *fs); + +/* Possibly pack the repository at PATH. This just take full shards, and + combines all the revision files into a single one, with a manifest header. + Use optional CANCEL_FUNC/CANCEL_BATON for cancellation support. + + Existing filesystem references need not change. */ +svn_error_t * +svn_fs_fs__pack(svn_fs_t *fs, + svn_fs_pack_notify_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool); + + +#endif diff --git a/subversion/libsvn_fs_fs/id.c b/subversion/libsvn_fs_fs/id.c new file mode 100644 index 0000000..1317829 --- /dev/null +++ b/subversion/libsvn_fs_fs/id.c @@ -0,0 +1,405 @@ +/* id.c : operations on node-revision IDs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <string.h> +#include <stdlib.h> + +#include "id.h" +#include "../libsvn_fs/fs-loader.h" +#include "private/svn_temp_serializer.h" +#include "private/svn_string_private.h" + + +typedef struct id_private_t { + const char *node_id; + const char *copy_id; + const char *txn_id; + svn_revnum_t rev; + apr_off_t offset; +} id_private_t; + + +/* Accessing ID Pieces. */ + +const char * +svn_fs_fs__id_node_id(const svn_fs_id_t *id) +{ + id_private_t *pvt = id->fsap_data; + + return pvt->node_id; +} + + +const char * +svn_fs_fs__id_copy_id(const svn_fs_id_t *id) +{ + id_private_t *pvt = id->fsap_data; + + return pvt->copy_id; +} + + +const char * +svn_fs_fs__id_txn_id(const svn_fs_id_t *id) +{ + id_private_t *pvt = id->fsap_data; + + return pvt->txn_id; +} + + +svn_revnum_t +svn_fs_fs__id_rev(const svn_fs_id_t *id) +{ + id_private_t *pvt = id->fsap_data; + + return pvt->rev; +} + + +apr_off_t +svn_fs_fs__id_offset(const svn_fs_id_t *id) +{ + id_private_t *pvt = id->fsap_data; + + return pvt->offset; +} + + +svn_string_t * +svn_fs_fs__id_unparse(const svn_fs_id_t *id, + apr_pool_t *pool) +{ + id_private_t *pvt = id->fsap_data; + + if ((! pvt->txn_id)) + { + char rev_string[SVN_INT64_BUFFER_SIZE]; + char offset_string[SVN_INT64_BUFFER_SIZE]; + + svn__i64toa(rev_string, pvt->rev); + svn__i64toa(offset_string, pvt->offset); + return svn_string_createf(pool, "%s.%s.r%s/%s", + pvt->node_id, pvt->copy_id, + rev_string, offset_string); + } + else + { + return svn_string_createf(pool, "%s.%s.t%s", + pvt->node_id, pvt->copy_id, + pvt->txn_id); + } +} + + +/*** Comparing node IDs ***/ + +svn_boolean_t +svn_fs_fs__id_eq(const svn_fs_id_t *a, + const svn_fs_id_t *b) +{ + id_private_t *pvta = a->fsap_data, *pvtb = b->fsap_data; + + if (a == b) + return TRUE; + if (strcmp(pvta->node_id, pvtb->node_id) != 0) + return FALSE; + if (strcmp(pvta->copy_id, pvtb->copy_id) != 0) + return FALSE; + if ((pvta->txn_id == NULL) != (pvtb->txn_id == NULL)) + return FALSE; + if (pvta->txn_id && pvtb->txn_id && strcmp(pvta->txn_id, pvtb->txn_id) != 0) + return FALSE; + if (pvta->rev != pvtb->rev) + return FALSE; + if (pvta->offset != pvtb->offset) + return FALSE; + return TRUE; +} + + +svn_boolean_t +svn_fs_fs__id_check_related(const svn_fs_id_t *a, + const svn_fs_id_t *b) +{ + id_private_t *pvta = a->fsap_data, *pvtb = b->fsap_data; + + if (a == b) + return TRUE; + /* If both node_ids start with _ and they have differing transaction + IDs, then it is impossible for them to be related. */ + if (pvta->node_id[0] == '_') + { + if (pvta->txn_id && pvtb->txn_id && + (strcmp(pvta->txn_id, pvtb->txn_id) != 0)) + return FALSE; + } + + return (strcmp(pvta->node_id, pvtb->node_id) == 0); +} + + +int +svn_fs_fs__id_compare(const svn_fs_id_t *a, + const svn_fs_id_t *b) +{ + if (svn_fs_fs__id_eq(a, b)) + return 0; + return (svn_fs_fs__id_check_related(a, b) ? 1 : -1); +} + + + +/* Creating ID's. */ + +static id_vtable_t id_vtable = { + svn_fs_fs__id_unparse, + svn_fs_fs__id_compare +}; + + +svn_fs_id_t * +svn_fs_fs__id_txn_create(const char *node_id, + const char *copy_id, + const char *txn_id, + apr_pool_t *pool) +{ + svn_fs_id_t *id = apr_palloc(pool, sizeof(*id)); + id_private_t *pvt = apr_palloc(pool, sizeof(*pvt)); + + pvt->node_id = apr_pstrdup(pool, node_id); + pvt->copy_id = apr_pstrdup(pool, copy_id); + pvt->txn_id = apr_pstrdup(pool, txn_id); + pvt->rev = SVN_INVALID_REVNUM; + pvt->offset = -1; + + id->vtable = &id_vtable; + id->fsap_data = pvt; + return id; +} + + +svn_fs_id_t * +svn_fs_fs__id_rev_create(const char *node_id, + const char *copy_id, + svn_revnum_t rev, + apr_off_t offset, + apr_pool_t *pool) +{ + svn_fs_id_t *id = apr_palloc(pool, sizeof(*id)); + id_private_t *pvt = apr_palloc(pool, sizeof(*pvt)); + + pvt->node_id = apr_pstrdup(pool, node_id); + pvt->copy_id = apr_pstrdup(pool, copy_id); + pvt->txn_id = NULL; + pvt->rev = rev; + pvt->offset = offset; + + id->vtable = &id_vtable; + id->fsap_data = pvt; + return id; +} + + +svn_fs_id_t * +svn_fs_fs__id_copy(const svn_fs_id_t *id, apr_pool_t *pool) +{ + svn_fs_id_t *new_id = apr_palloc(pool, sizeof(*new_id)); + id_private_t *new_pvt = apr_palloc(pool, sizeof(*new_pvt)); + id_private_t *pvt = id->fsap_data; + + new_pvt->node_id = apr_pstrdup(pool, pvt->node_id); + new_pvt->copy_id = apr_pstrdup(pool, pvt->copy_id); + new_pvt->txn_id = pvt->txn_id ? apr_pstrdup(pool, pvt->txn_id) : NULL; + new_pvt->rev = pvt->rev; + new_pvt->offset = pvt->offset; + + new_id->vtable = &id_vtable; + new_id->fsap_data = new_pvt; + return new_id; +} + + +svn_fs_id_t * +svn_fs_fs__id_parse(const char *data, + apr_size_t len, + apr_pool_t *pool) +{ + svn_fs_id_t *id; + id_private_t *pvt; + char *data_copy, *str; + + /* Dup the ID data into POOL. Our returned ID will have references + into this memory. */ + data_copy = apr_pstrmemdup(pool, data, len); + + /* Alloc a new svn_fs_id_t structure. */ + id = apr_palloc(pool, sizeof(*id)); + pvt = apr_palloc(pool, sizeof(*pvt)); + id->vtable = &id_vtable; + id->fsap_data = pvt; + + /* Now, we basically just need to "split" this data on `.' + characters. We will use svn_cstring_tokenize, which will put + terminators where each of the '.'s used to be. Then our new + id field will reference string locations inside our duplicate + string.*/ + + /* Node Id */ + str = svn_cstring_tokenize(".", &data_copy); + if (str == NULL) + return NULL; + pvt->node_id = str; + + /* Copy Id */ + str = svn_cstring_tokenize(".", &data_copy); + if (str == NULL) + return NULL; + pvt->copy_id = str; + + /* Txn/Rev Id */ + str = svn_cstring_tokenize(".", &data_copy); + if (str == NULL) + return NULL; + + if (str[0] == 'r') + { + apr_int64_t val; + svn_error_t *err; + + /* This is a revision type ID */ + pvt->txn_id = NULL; + + data_copy = str + 1; + str = svn_cstring_tokenize("/", &data_copy); + if (str == NULL) + return NULL; + pvt->rev = SVN_STR_TO_REV(str); + + str = svn_cstring_tokenize("/", &data_copy); + if (str == NULL) + return NULL; + err = svn_cstring_atoi64(&val, str); + if (err) + { + svn_error_clear(err); + return NULL; + } + pvt->offset = (apr_off_t)val; + } + else if (str[0] == 't') + { + /* This is a transaction type ID */ + pvt->txn_id = str + 1; + pvt->rev = SVN_INVALID_REVNUM; + pvt->offset = -1; + } + else + return NULL; + + return id; +} + +/* (de-)serialization support */ + +/* Serialization of the PVT sub-structure within the CONTEXT. + */ +static void +serialize_id_private(svn_temp_serializer__context_t *context, + const id_private_t * const *pvt) +{ + const id_private_t *private = *pvt; + + /* serialize the pvt data struct itself */ + svn_temp_serializer__push(context, + (const void * const *)pvt, + sizeof(*private)); + + /* append the referenced strings */ + svn_temp_serializer__add_string(context, &private->node_id); + svn_temp_serializer__add_string(context, &private->copy_id); + svn_temp_serializer__add_string(context, &private->txn_id); + + /* return to caller's nesting level */ + svn_temp_serializer__pop(context); +} + +/* Serialize an ID within the serialization CONTEXT. + */ +void +svn_fs_fs__id_serialize(svn_temp_serializer__context_t *context, + const struct svn_fs_id_t * const *id) +{ + /* nothing to do for NULL ids */ + if (*id == NULL) + return; + + /* serialize the id data struct itself */ + svn_temp_serializer__push(context, + (const void * const *)id, + sizeof(**id)); + + /* serialize the id_private_t data sub-struct */ + serialize_id_private(context, + (const id_private_t * const *)&(*id)->fsap_data); + + /* return to caller's nesting level */ + svn_temp_serializer__pop(context); +} + +/* Deserialization of the PVT sub-structure in BUFFER. + */ +static void +deserialize_id_private(void *buffer, id_private_t **pvt) +{ + /* fixup the reference to the only sub-structure */ + id_private_t *private; + svn_temp_deserializer__resolve(buffer, (void**)pvt); + + /* fixup the sub-structure itself */ + private = *pvt; + svn_temp_deserializer__resolve(private, (void**)&private->node_id); + svn_temp_deserializer__resolve(private, (void**)&private->copy_id); + svn_temp_deserializer__resolve(private, (void**)&private->txn_id); +} + +/* Deserialize an ID inside the BUFFER. + */ +void +svn_fs_fs__id_deserialize(void *buffer, svn_fs_id_t **id) +{ + /* The id maybe all what is in the whole buffer. + * Don't try to fixup the pointer in that case*/ + if (*id != buffer) + svn_temp_deserializer__resolve(buffer, (void**)id); + + /* no id, no sub-structure fixup necessary */ + if (*id == NULL) + return; + + /* the stored vtable is bogus at best -> set the right one */ + (*id)->vtable = &id_vtable; + + /* handle sub-structures */ + deserialize_id_private(*id, (id_private_t **)&(*id)->fsap_data); +} + diff --git a/subversion/libsvn_fs_fs/id.h b/subversion/libsvn_fs_fs/id.h new file mode 100644 index 0000000..11da466 --- /dev/null +++ b/subversion/libsvn_fs_fs/id.h @@ -0,0 +1,116 @@ +/* id.h : interface to node ID functions, private to libsvn_fs_fs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS_FS_ID_H +#define SVN_LIBSVN_FS_FS_ID_H + +#include "svn_fs.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*** ID accessor functions. ***/ + +/* Get the "node id" portion of ID. */ +const char *svn_fs_fs__id_node_id(const svn_fs_id_t *id); + +/* Get the "copy id" portion of ID. */ +const char *svn_fs_fs__id_copy_id(const svn_fs_id_t *id); + +/* Get the "txn id" portion of ID, or NULL if it is a permanent ID. */ +const char *svn_fs_fs__id_txn_id(const svn_fs_id_t *id); + +/* Get the "rev" portion of ID, or SVN_INVALID_REVNUM if it is a + transaction ID. */ +svn_revnum_t svn_fs_fs__id_rev(const svn_fs_id_t *id); + +/* Access the "offset" portion of the ID, or -1 if it is a transaction + ID. */ +apr_off_t svn_fs_fs__id_offset(const svn_fs_id_t *id); + +/* Convert ID into string form, allocated in POOL. */ +svn_string_t *svn_fs_fs__id_unparse(const svn_fs_id_t *id, + apr_pool_t *pool); + +/* Return true if A and B are equal. */ +svn_boolean_t svn_fs_fs__id_eq(const svn_fs_id_t *a, + const svn_fs_id_t *b); + +/* Return true if A and B are related. */ +svn_boolean_t svn_fs_fs__id_check_related(const svn_fs_id_t *a, + const svn_fs_id_t *b); + +/* Return 0 if A and B are equal, 1 if they are related, -1 otherwise. */ +int svn_fs_fs__id_compare(const svn_fs_id_t *a, + const svn_fs_id_t *b); + +/* Create an ID within a transaction based on NODE_ID, COPY_ID, and + TXN_ID, allocated in POOL. */ +svn_fs_id_t *svn_fs_fs__id_txn_create(const char *node_id, + const char *copy_id, + const char *txn_id, + apr_pool_t *pool); + +/* Create a permanent ID based on NODE_ID, COPY_ID, REV, and OFFSET, + allocated in POOL. */ +svn_fs_id_t *svn_fs_fs__id_rev_create(const char *node_id, + const char *copy_id, + svn_revnum_t rev, + apr_off_t offset, + apr_pool_t *pool); + +/* Return a copy of ID, allocated from POOL. */ +svn_fs_id_t *svn_fs_fs__id_copy(const svn_fs_id_t *id, + apr_pool_t *pool); + +/* Return an ID resulting from parsing the string DATA (with length + LEN), or NULL if DATA is an invalid ID string. */ +svn_fs_id_t *svn_fs_fs__id_parse(const char *data, + apr_size_t len, + apr_pool_t *pool); + + +/* (de-)serialization support*/ + +struct svn_temp_serializer__context_t; + +/** + * Serialize an @a id within the serialization @a context. + */ +void +svn_fs_fs__id_serialize(struct svn_temp_serializer__context_t *context, + const svn_fs_id_t * const *id); + +/** + * Deserialize an @a id within the @a buffer. + */ +void +svn_fs_fs__id_deserialize(void *buffer, + svn_fs_id_t **id); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_LIBSVN_FS_FS_ID_H */ diff --git a/subversion/libsvn_fs_fs/key-gen.c b/subversion/libsvn_fs_fs/key-gen.c new file mode 100644 index 0000000..a65c59d --- /dev/null +++ b/subversion/libsvn_fs_fs/key-gen.c @@ -0,0 +1,159 @@ +/* key-gen.c --- manufacturing sequential keys for some db tables + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <assert.h> +#include <string.h> +#include <stdlib.h> +#include <apr.h> +#include <apr_network_io.h> +#include "private/svn_fs_private.h" +#include "key-gen.h" + +/* The Berkeley DB backend uses a key as a transaction name and the + maximum key size must be less than the maximum transaction name + length. */ +#if MAX_KEY_SIZE > SVN_FS__TXN_MAX_LEN +#error The MAX_KEY_SIZE used for BDB txn names is greater than SVN_FS__TXN_MAX_LEN. +#endif + + +/*** Keys for reps and strings. ***/ + +void +svn_fs_fs__add_keys(const char *key1, const char *key2, char *result) +{ + apr_ssize_t i1 = strlen(key1) - 1; + apr_ssize_t i2 = strlen(key2) - 1; + int i3 = 0; + int val; + int carry = 0; + char buf[MAX_KEY_SIZE + 2]; + + while ((i1 >= 0) || (i2 >= 0) || (carry > 0)) + { + val = carry; + if (i1>=0) + val += (key1[i1] <= '9') ? (key1[i1] - '0') : (key1[i1] - 'a' + 10); + + if (i2>=0) + val += (key2[i2] <= '9') ? (key2[i2] - '0') : (key2[i2] - 'a' + 10); + + carry = val / 36; + val = val % 36; + + buf[i3++] = (char)((val <= 9) ? (val + '0') : (val - 10 + 'a')); + + if (i1>=0) + i1--; + if (i2>=0) + i2--; + } + + /* Now reverse the resulting string and NULL terminate it. */ + for (i1 = 0; i1 < i3; i1++) + result[i1] = buf[i3 - i1 - 1]; + + result[i1] = '\0'; +} + + +void +svn_fs_fs__next_key(const char *this, apr_size_t *len, char *next) +{ + apr_ssize_t i; + apr_size_t olen = *len; /* remember the first length */ + char c; /* current char */ + svn_boolean_t carry = TRUE; /* boolean: do we have a carry or not? + We start with a carry, because we're + incrementing the number, after all. */ + + /* Leading zeros are not allowed, except for the string "0". */ + if ((*len > 1) && (this[0] == '0')) + { + *len = 0; + return; + } + + for (i = (olen - 1); i >= 0; i--) + { + c = this[i]; + + /* Validate as we go. */ + if (! (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'z')))) + { + *len = 0; + return; + } + + if (carry) + { + if (c == 'z') + next[i] = '0'; + else + { + carry = FALSE; + + if (c == '9') + next[i] = 'a'; + else + next[i] = ++c; + } + } + else + next[i] = c; + } + + /* The new length is OLEN, plus 1 if there's a carry out of the + leftmost digit. */ + *len = olen + (carry ? 1 : 0); + + /* Ensure that we haven't overrun the (ludicrous) bound on key length. + Note that MAX_KEY_SIZE is a bound on the size *including* + the trailing null byte. */ + assert(*len < MAX_KEY_SIZE); + + /* Now we know it's safe to add the null terminator. */ + next[*len] = '\0'; + + /* Handle any leftover carry. */ + if (carry) + { + memmove(next+1, next, olen); + next[0] = '1'; + } +} + + +int +svn_fs_fs__key_compare(const char *a, const char *b) +{ + apr_size_t a_len = strlen(a); + apr_size_t b_len = strlen(b); + int cmp; + + if (a_len > b_len) + return 1; + if (b_len > a_len) + return -1; + cmp = strcmp(a, b); + return (cmp ? (cmp / abs(cmp)) : 0); +} diff --git a/subversion/libsvn_fs_fs/key-gen.h b/subversion/libsvn_fs_fs/key-gen.h new file mode 100644 index 0000000..e1b3858 --- /dev/null +++ b/subversion/libsvn_fs_fs/key-gen.h @@ -0,0 +1,91 @@ +/* key-gen.c --- manufacturing sequential keys for some db tables + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS_KEY_GEN_H +#define SVN_LIBSVN_FS_KEY_GEN_H + +#include <apr.h> + +#include "svn_types.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/* The alphanumeric keys passed in and out of svn_fs_fs__next_key + are guaranteed never to be longer than this many bytes, + *including* the trailing null byte. It is therefore safe + to declare a key as "char key[MAX_KEY_SIZE]". + + Note that this limit will be a problem if the number of + keys in a table ever exceeds + + 18217977168218728251394687124089371267338971528174 + 76066745969754933395997209053270030282678007662838 + 67331479599455916367452421574456059646801054954062 + 15017704234999886990788594743994796171248406730973 + 80736524850563115569208508785942830080999927310762 + 50733948404739350551934565743979678824151197232629 + 947748581376, + + but that's a risk we'll live with for now. */ +#define MAX_KEY_SIZE 200 + + +/* Generate the next key after a given alphanumeric key. + * + * The first *LEN bytes of THIS are an ascii representation of a + * number in base 36: digits 0-9 have their usual values, and a-z have + * values 10-35. + * + * The new key is stored in NEXT, null-terminated. NEXT must be at + * least *LEN + 2 bytes long -- one extra byte to hold a possible + * overflow column, and one for null termination. On return, *LEN + * will be set to the length of the new key, not counting the null + * terminator. In other words, the outgoing *LEN will be either equal + * to the incoming, or to the incoming + 1. + * + * If THIS contains anything other than digits and lower-case + * alphabetic characters, or if it starts with `0' but is not the + * string "0", then *LEN is set to zero and the effect on NEXT + * is undefined. + */ +void svn_fs_fs__next_key(const char *this, apr_size_t *len, char *next); + + +/* Compare two strings A and B as base-36 alphanumeric keys. + * + * Return -1, 0, or 1 if A is less than, equal to, or greater than B, + * respectively. + */ +int svn_fs_fs__key_compare(const char *a, const char *b); + +/* Add two base-36 alphanumeric keys to get a third, the result. */ +void svn_fs_fs__add_keys(const char *key1, const char *key2, char *result); + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_LIBSVN_FS_KEY_GEN_H */ diff --git a/subversion/libsvn_fs_fs/lock.c b/subversion/libsvn_fs_fs/lock.c new file mode 100644 index 0000000..95bd943 --- /dev/null +++ b/subversion/libsvn_fs_fs/lock.c @@ -0,0 +1,1079 @@ +/* lock.c : functions for manipulating filesystem locks. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +#include "svn_pools.h" +#include "svn_error.h" +#include "svn_dirent_uri.h" +#include "svn_path.h" +#include "svn_fs.h" +#include "svn_hash.h" +#include "svn_time.h" +#include "svn_utf.h" + +#include <apr_uuid.h> +#include <apr_file_io.h> +#include <apr_file_info.h> + +#include "lock.h" +#include "tree.h" +#include "fs_fs.h" +#include "../libsvn_fs/fs-loader.h" + +#include "private/svn_fs_util.h" +#include "private/svn_fspath.h" +#include "svn_private_config.h" + +/* Names of hash keys used to store a lock for writing to disk. */ +#define PATH_KEY "path" +#define TOKEN_KEY "token" +#define OWNER_KEY "owner" +#define CREATION_DATE_KEY "creation_date" +#define EXPIRATION_DATE_KEY "expiration_date" +#define COMMENT_KEY "comment" +#define IS_DAV_COMMENT_KEY "is_dav_comment" +#define CHILDREN_KEY "children" + +/* Number of characters from the head of a digest file name used to + calculate a subdirectory in which to drop that file. */ +#define DIGEST_SUBDIR_LEN 3 + + + +/*** Generic helper functions. ***/ + +/* Set *DIGEST to the MD5 hash of STR. */ +static svn_error_t * +make_digest(const char **digest, + const char *str, + apr_pool_t *pool) +{ + svn_checksum_t *checksum; + + SVN_ERR(svn_checksum(&checksum, svn_checksum_md5, str, strlen(str), pool)); + + *digest = svn_checksum_to_cstring_display(checksum, pool); + return SVN_NO_ERROR; +} + + +/* Set the value of KEY (whose size is KEY_LEN, or APR_HASH_KEY_STRING + if unknown) to an svn_string_t-ized version of VALUE (whose size is + VALUE_LEN, or APR_HASH_KEY_STRING if unknown) in HASH. The value + will be allocated in POOL; KEY will not be duped. If either KEY or VALUE + is NULL, this function will do nothing. */ +static void +hash_store(apr_hash_t *hash, + const char *key, + apr_ssize_t key_len, + const char *value, + apr_ssize_t value_len, + apr_pool_t *pool) +{ + if (! (key && value)) + return; + if (value_len == APR_HASH_KEY_STRING) + value_len = strlen(value); + apr_hash_set(hash, key, key_len, + svn_string_ncreate(value, value_len, pool)); +} + + +/* Fetch the value of KEY from HASH, returning only the cstring data + of that value (if it exists). */ +static const char * +hash_fetch(apr_hash_t *hash, + const char *key, + apr_pool_t *pool) +{ + svn_string_t *str = svn_hash_gets(hash, key); + return str ? str->data : NULL; +} + + +/* SVN_ERR_FS_CORRUPT: the lockfile for PATH in FS is corrupt. */ +static svn_error_t * +err_corrupt_lockfile(const char *fs_path, const char *path) +{ + return + svn_error_createf( + SVN_ERR_FS_CORRUPT, 0, + _("Corrupt lockfile for path '%s' in filesystem '%s'"), + path, fs_path); +} + + +/*** Digest file handling functions. ***/ + +/* Return the path of the lock/entries file for which DIGEST is the + hashed repository relative path. */ +static const char * +digest_path_from_digest(const char *fs_path, + const char *digest, + apr_pool_t *pool) +{ + return svn_dirent_join_many(pool, fs_path, PATH_LOCKS_DIR, + apr_pstrmemdup(pool, digest, DIGEST_SUBDIR_LEN), + digest, NULL); +} + + +/* Set *DIGEST_PATH to the path to the lock/entries digest file associate + with PATH, where PATH is the path to the lock file or lock entries file + in FS. */ +static svn_error_t * +digest_path_from_path(const char **digest_path, + const char *fs_path, + const char *path, + apr_pool_t *pool) +{ + const char *digest; + SVN_ERR(make_digest(&digest, path, pool)); + *digest_path = svn_dirent_join_many(pool, fs_path, PATH_LOCKS_DIR, + apr_pstrmemdup(pool, digest, + DIGEST_SUBDIR_LEN), + digest, NULL); + return SVN_NO_ERROR; +} + + +/* Write to DIGEST_PATH a representation of CHILDREN (which may be + empty, if the versioned path in FS represented by DIGEST_PATH has + no children) and LOCK (which may be NULL if that versioned path is + lock itself locked). Set the permissions of DIGEST_PATH to those of + PERMS_REFERENCE. Use POOL for all allocations. + */ +static svn_error_t * +write_digest_file(apr_hash_t *children, + svn_lock_t *lock, + const char *fs_path, + const char *digest_path, + const char *perms_reference, + apr_pool_t *pool) +{ + svn_error_t *err = SVN_NO_ERROR; + svn_stream_t *stream; + apr_hash_index_t *hi; + apr_hash_t *hash = apr_hash_make(pool); + const char *tmp_path; + + SVN_ERR(svn_fs_fs__ensure_dir_exists(svn_dirent_join(fs_path, PATH_LOCKS_DIR, + pool), fs_path, pool)); + SVN_ERR(svn_fs_fs__ensure_dir_exists(svn_dirent_dirname(digest_path, pool), + fs_path, pool)); + + if (lock) + { + const char *creation_date = NULL, *expiration_date = NULL; + if (lock->creation_date) + creation_date = svn_time_to_cstring(lock->creation_date, pool); + if (lock->expiration_date) + expiration_date = svn_time_to_cstring(lock->expiration_date, pool); + hash_store(hash, PATH_KEY, sizeof(PATH_KEY)-1, + lock->path, APR_HASH_KEY_STRING, pool); + hash_store(hash, TOKEN_KEY, sizeof(TOKEN_KEY)-1, + lock->token, APR_HASH_KEY_STRING, pool); + hash_store(hash, OWNER_KEY, sizeof(OWNER_KEY)-1, + lock->owner, APR_HASH_KEY_STRING, pool); + hash_store(hash, COMMENT_KEY, sizeof(COMMENT_KEY)-1, + lock->comment, APR_HASH_KEY_STRING, pool); + hash_store(hash, IS_DAV_COMMENT_KEY, sizeof(IS_DAV_COMMENT_KEY)-1, + lock->is_dav_comment ? "1" : "0", 1, pool); + hash_store(hash, CREATION_DATE_KEY, sizeof(CREATION_DATE_KEY)-1, + creation_date, APR_HASH_KEY_STRING, pool); + hash_store(hash, EXPIRATION_DATE_KEY, sizeof(EXPIRATION_DATE_KEY)-1, + expiration_date, APR_HASH_KEY_STRING, pool); + } + if (apr_hash_count(children)) + { + svn_stringbuf_t *children_list = svn_stringbuf_create_empty(pool); + for (hi = apr_hash_first(pool, children); hi; hi = apr_hash_next(hi)) + { + svn_stringbuf_appendbytes(children_list, + svn__apr_hash_index_key(hi), + svn__apr_hash_index_klen(hi)); + svn_stringbuf_appendbyte(children_list, '\n'); + } + hash_store(hash, CHILDREN_KEY, sizeof(CHILDREN_KEY)-1, + children_list->data, children_list->len, pool); + } + + SVN_ERR(svn_stream_open_unique(&stream, &tmp_path, + svn_dirent_dirname(digest_path, pool), + svn_io_file_del_none, pool, pool)); + if ((err = svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, pool))) + { + svn_error_clear(svn_stream_close(stream)); + return svn_error_createf(err->apr_err, + err, + _("Cannot write lock/entries hashfile '%s'"), + svn_dirent_local_style(tmp_path, pool)); + } + + SVN_ERR(svn_stream_close(stream)); + SVN_ERR(svn_io_file_rename(tmp_path, digest_path, pool)); + SVN_ERR(svn_io_copy_perms(perms_reference, digest_path, pool)); + return SVN_NO_ERROR; +} + + +/* Parse the file at DIGEST_PATH, populating the lock LOCK_P in that + file (if it exists, and if *LOCK_P is non-NULL) and the hash of + CHILDREN_P (if any exist, and if *CHILDREN_P is non-NULL). Use POOL + for all allocations. */ +static svn_error_t * +read_digest_file(apr_hash_t **children_p, + svn_lock_t **lock_p, + const char *fs_path, + const char *digest_path, + apr_pool_t *pool) +{ + svn_error_t *err = SVN_NO_ERROR; + svn_lock_t *lock; + apr_hash_t *hash; + svn_stream_t *stream; + const char *val; + + if (lock_p) + *lock_p = NULL; + if (children_p) + *children_p = apr_hash_make(pool); + + err = svn_stream_open_readonly(&stream, digest_path, pool, pool); + if (err && APR_STATUS_IS_ENOENT(err->apr_err)) + { + svn_error_clear(err); + return SVN_NO_ERROR; + } + SVN_ERR(err); + + /* If our caller doesn't care about anything but the presence of the + file... whatever. */ + if (! (lock_p || children_p)) + return svn_stream_close(stream); + + hash = apr_hash_make(pool); + if ((err = svn_hash_read2(hash, stream, SVN_HASH_TERMINATOR, pool))) + { + svn_error_clear(svn_stream_close(stream)); + return svn_error_createf(err->apr_err, + err, + _("Can't parse lock/entries hashfile '%s'"), + svn_dirent_local_style(digest_path, pool)); + } + SVN_ERR(svn_stream_close(stream)); + + /* If our caller cares, see if we have a lock path in our hash. If + so, we'll assume we have a lock here. */ + val = hash_fetch(hash, PATH_KEY, pool); + if (val && lock_p) + { + const char *path = val; + + /* Create our lock and load it up. */ + lock = svn_lock_create(pool); + lock->path = path; + + if (! ((lock->token = hash_fetch(hash, TOKEN_KEY, pool)))) + return svn_error_trace(err_corrupt_lockfile(fs_path, path)); + + if (! ((lock->owner = hash_fetch(hash, OWNER_KEY, pool)))) + return svn_error_trace(err_corrupt_lockfile(fs_path, path)); + + if (! ((val = hash_fetch(hash, IS_DAV_COMMENT_KEY, pool)))) + return svn_error_trace(err_corrupt_lockfile(fs_path, path)); + lock->is_dav_comment = (val[0] == '1'); + + if (! ((val = hash_fetch(hash, CREATION_DATE_KEY, pool)))) + return svn_error_trace(err_corrupt_lockfile(fs_path, path)); + SVN_ERR(svn_time_from_cstring(&(lock->creation_date), val, pool)); + + if ((val = hash_fetch(hash, EXPIRATION_DATE_KEY, pool))) + SVN_ERR(svn_time_from_cstring(&(lock->expiration_date), val, pool)); + + lock->comment = hash_fetch(hash, COMMENT_KEY, pool); + + *lock_p = lock; + } + + /* If our caller cares, see if we have any children for this path. */ + val = hash_fetch(hash, CHILDREN_KEY, pool); + if (val && children_p) + { + apr_array_header_t *kiddos = svn_cstring_split(val, "\n", FALSE, pool); + int i; + + for (i = 0; i < kiddos->nelts; i++) + { + svn_hash_sets(*children_p, APR_ARRAY_IDX(kiddos, i, const char *), + (void *)1); + } + } + return SVN_NO_ERROR; +} + + + +/*** Lock helper functions (path here are still FS paths, not on-disk + schema-supporting paths) ***/ + + +/* Write LOCK in FS to the actual OS filesystem. + + Use PERMS_REFERENCE for the permissions of any digest files. + + Note: this takes an FS_PATH because it's called from the hotcopy logic. + */ +static svn_error_t * +set_lock(const char *fs_path, + svn_lock_t *lock, + const char *perms_reference, + apr_pool_t *pool) +{ + svn_stringbuf_t *this_path = svn_stringbuf_create(lock->path, pool); + const char *lock_digest_path = NULL; + apr_pool_t *subpool; + + SVN_ERR_ASSERT(lock); + + /* Iterate in reverse, creating the lock for LOCK->path, and then + just adding entries for its parent, until we reach a parent + that's already listed in *its* parent. */ + subpool = svn_pool_create(pool); + while (1729) + { + const char *digest_path, *digest_file; + apr_hash_t *this_children; + svn_lock_t *this_lock; + + svn_pool_clear(subpool); + + /* Calculate the DIGEST_PATH for the currently FS path, and then + get its DIGEST_FILE basename. */ + SVN_ERR(digest_path_from_path(&digest_path, fs_path, this_path->data, + subpool)); + digest_file = svn_dirent_basename(digest_path, subpool); + + SVN_ERR(read_digest_file(&this_children, &this_lock, fs_path, + digest_path, subpool)); + + /* We're either writing a new lock (first time through only) or + a new entry (every time but the first). */ + if (lock) + { + this_lock = lock; + lock = NULL; + lock_digest_path = apr_pstrdup(pool, digest_file); + } + else + { + /* If we already have an entry for this path, we're done. */ + if (svn_hash_gets(this_children, lock_digest_path)) + break; + svn_hash_sets(this_children, lock_digest_path, (void *)1); + } + SVN_ERR(write_digest_file(this_children, this_lock, fs_path, + digest_path, perms_reference, subpool)); + + /* Prep for next iteration, or bail if we're done. */ + if (svn_fspath__is_root(this_path->data, this_path->len)) + break; + svn_stringbuf_set(this_path, + svn_fspath__dirname(this_path->data, subpool)); + } + + svn_pool_destroy(subpool); + return SVN_NO_ERROR; +} + +/* Delete LOCK from FS in the actual OS filesystem. */ +static svn_error_t * +delete_lock(svn_fs_t *fs, + svn_lock_t *lock, + apr_pool_t *pool) +{ + svn_stringbuf_t *this_path = svn_stringbuf_create(lock->path, pool); + const char *child_to_kill = NULL; + apr_pool_t *subpool; + + SVN_ERR_ASSERT(lock); + + /* Iterate in reverse, deleting the lock for LOCK->path, and then + deleting its entry as it appears in each of its parents. */ + subpool = svn_pool_create(pool); + while (1729) + { + const char *digest_path, *digest_file; + apr_hash_t *this_children; + svn_lock_t *this_lock; + + svn_pool_clear(subpool); + + /* Calculate the DIGEST_PATH for the currently FS path, and then + get its DIGEST_FILE basename. */ + SVN_ERR(digest_path_from_path(&digest_path, fs->path, this_path->data, + subpool)); + digest_file = svn_dirent_basename(digest_path, subpool); + + SVN_ERR(read_digest_file(&this_children, &this_lock, fs->path, + digest_path, subpool)); + + /* Delete the lock (first time through only). */ + if (lock) + { + this_lock = NULL; + lock = NULL; + child_to_kill = apr_pstrdup(pool, digest_file); + } + + if (child_to_kill) + svn_hash_sets(this_children, child_to_kill, NULL); + + if (! (this_lock || apr_hash_count(this_children) != 0)) + { + /* Special case: no goodz, no file. And remember to nix + the entry for it in its parent. */ + SVN_ERR(svn_io_remove_file2(digest_path, FALSE, subpool)); + } + else + { + const char *rev_0_path; + SVN_ERR(svn_fs_fs__path_rev_absolute(&rev_0_path, fs, 0, pool)); + SVN_ERR(write_digest_file(this_children, this_lock, fs->path, + digest_path, rev_0_path, subpool)); + } + + /* Prep for next iteration, or bail if we're done. */ + if (svn_fspath__is_root(this_path->data, this_path->len)) + break; + svn_stringbuf_set(this_path, + svn_fspath__dirname(this_path->data, subpool)); + } + + svn_pool_destroy(subpool); + return SVN_NO_ERROR; +} + +/* Set *LOCK_P to the lock for PATH in FS. HAVE_WRITE_LOCK should be + TRUE if the caller (or one of its callers) has taken out the + repository-wide write lock, FALSE otherwise. If MUST_EXIST is + not set, the function will simply return NULL in *LOCK_P instead + of creating an SVN_FS__ERR_NO_SUCH_LOCK error in case the lock + was not found (much faster). Use POOL for allocations. */ +static svn_error_t * +get_lock(svn_lock_t **lock_p, + svn_fs_t *fs, + const char *path, + svn_boolean_t have_write_lock, + svn_boolean_t must_exist, + apr_pool_t *pool) +{ + svn_lock_t *lock = NULL; + const char *digest_path; + svn_node_kind_t kind; + + SVN_ERR(digest_path_from_path(&digest_path, fs->path, path, pool)); + SVN_ERR(svn_io_check_path(digest_path, &kind, pool)); + + *lock_p = NULL; + if (kind != svn_node_none) + SVN_ERR(read_digest_file(NULL, &lock, fs->path, digest_path, pool)); + + if (! lock) + return must_exist ? SVN_FS__ERR_NO_SUCH_LOCK(fs, path) : SVN_NO_ERROR; + + /* Don't return an expired lock. */ + if (lock->expiration_date && (apr_time_now() > lock->expiration_date)) + { + /* Only remove the lock if we have the write lock. + Read operations shouldn't change the filesystem. */ + if (have_write_lock) + SVN_ERR(delete_lock(fs, lock, pool)); + return SVN_FS__ERR_LOCK_EXPIRED(fs, lock->token); + } + + *lock_p = lock; + return SVN_NO_ERROR; +} + + +/* Set *LOCK_P to the lock for PATH in FS. HAVE_WRITE_LOCK should be + TRUE if the caller (or one of its callers) has taken out the + repository-wide write lock, FALSE otherwise. Use POOL for + allocations. */ +static svn_error_t * +get_lock_helper(svn_fs_t *fs, + svn_lock_t **lock_p, + const char *path, + svn_boolean_t have_write_lock, + apr_pool_t *pool) +{ + svn_lock_t *lock; + svn_error_t *err; + + err = get_lock(&lock, fs, path, have_write_lock, FALSE, pool); + + /* We've deliberately decided that this function doesn't tell the + caller *why* the lock is unavailable. */ + if (err && ((err->apr_err == SVN_ERR_FS_NO_SUCH_LOCK) + || (err->apr_err == SVN_ERR_FS_LOCK_EXPIRED))) + { + svn_error_clear(err); + *lock_p = NULL; + return SVN_NO_ERROR; + } + else + SVN_ERR(err); + + *lock_p = lock; + return SVN_NO_ERROR; +} + + +/* Baton for locks_walker(). */ +struct walk_locks_baton { + svn_fs_get_locks_callback_t get_locks_func; + void *get_locks_baton; + svn_fs_t *fs; +}; + +/* Implements walk_digests_callback_t. */ +static svn_error_t * +locks_walker(void *baton, + const char *fs_path, + const char *digest_path, + apr_hash_t *children, + svn_lock_t *lock, + svn_boolean_t have_write_lock, + apr_pool_t *pool) +{ + struct walk_locks_baton *wlb = baton; + + if (lock) + { + /* Don't report an expired lock. */ + if (lock->expiration_date == 0 + || (apr_time_now() <= lock->expiration_date)) + { + if (wlb->get_locks_func) + SVN_ERR(wlb->get_locks_func(wlb->get_locks_baton, lock, pool)); + } + else + { + /* Only remove the lock if we have the write lock. + Read operations shouldn't change the filesystem. */ + if (have_write_lock) + SVN_ERR(delete_lock(wlb->fs, lock, pool)); + } + } + + return SVN_NO_ERROR; +} + +/* Callback type for walk_digest_files(). + * + * CHILDREN and LOCK come from a read_digest_file(digest_path) call. + */ +typedef svn_error_t *(*walk_digests_callback_t)(void *baton, + const char *fs_path, + const char *digest_path, + apr_hash_t *children, + svn_lock_t *lock, + svn_boolean_t have_write_lock, + apr_pool_t *pool); + +/* A recursive function that calls WALK_DIGESTS_FUNC/WALK_DIGESTS_BATON for + all lock digest files in and under PATH in FS. + HAVE_WRITE_LOCK should be true if the caller (directly or indirectly) + has the FS write lock. */ +static svn_error_t * +walk_digest_files(const char *fs_path, + const char *digest_path, + walk_digests_callback_t walk_digests_func, + void *walk_digests_baton, + svn_boolean_t have_write_lock, + apr_pool_t *pool) +{ + apr_hash_index_t *hi; + apr_hash_t *children; + apr_pool_t *subpool; + svn_lock_t *lock; + + /* First, send up any locks in the current digest file. */ + SVN_ERR(read_digest_file(&children, &lock, fs_path, digest_path, pool)); + + SVN_ERR(walk_digests_func(walk_digests_baton, fs_path, digest_path, + children, lock, + have_write_lock, pool)); + + /* Now, recurse on this thing's child entries (if any; bail otherwise). */ + if (! apr_hash_count(children)) + return SVN_NO_ERROR; + subpool = svn_pool_create(pool); + for (hi = apr_hash_first(pool, children); hi; hi = apr_hash_next(hi)) + { + const char *digest = svn__apr_hash_index_key(hi); + svn_pool_clear(subpool); + SVN_ERR(walk_digest_files + (fs_path, digest_path_from_digest(fs_path, digest, subpool), + walk_digests_func, walk_digests_baton, have_write_lock, subpool)); + } + svn_pool_destroy(subpool); + return SVN_NO_ERROR; +} + +/* A recursive function that calls GET_LOCKS_FUNC/GET_LOCKS_BATON for + all locks in and under PATH in FS. + HAVE_WRITE_LOCK should be true if the caller (directly or indirectly) + has the FS write lock. */ +static svn_error_t * +walk_locks(svn_fs_t *fs, + const char *digest_path, + svn_fs_get_locks_callback_t get_locks_func, + void *get_locks_baton, + svn_boolean_t have_write_lock, + apr_pool_t *pool) +{ + struct walk_locks_baton wlb; + + wlb.get_locks_func = get_locks_func; + wlb.get_locks_baton = get_locks_baton; + wlb.fs = fs; + SVN_ERR(walk_digest_files(fs->path, digest_path, locks_walker, &wlb, + have_write_lock, pool)); + return SVN_NO_ERROR; +} + + +/* Utility function: verify that a lock can be used. Interesting + errors returned from this function: + + SVN_ERR_FS_NO_USER: No username attached to FS. + SVN_ERR_FS_LOCK_OWNER_MISMATCH: FS's username doesn't match LOCK's owner. + SVN_ERR_FS_BAD_LOCK_TOKEN: FS doesn't hold matching lock-token for LOCK. + */ +static svn_error_t * +verify_lock(svn_fs_t *fs, + svn_lock_t *lock, + apr_pool_t *pool) +{ + if ((! fs->access_ctx) || (! fs->access_ctx->username)) + return svn_error_createf + (SVN_ERR_FS_NO_USER, NULL, + _("Cannot verify lock on path '%s'; no username available"), + lock->path); + + else if (strcmp(fs->access_ctx->username, lock->owner) != 0) + return svn_error_createf + (SVN_ERR_FS_LOCK_OWNER_MISMATCH, NULL, + _("User '%s' does not own lock on path '%s' (currently locked by '%s')"), + fs->access_ctx->username, lock->path, lock->owner); + + else if (svn_hash_gets(fs->access_ctx->lock_tokens, lock->token) == NULL) + return svn_error_createf + (SVN_ERR_FS_BAD_LOCK_TOKEN, NULL, + _("Cannot verify lock on path '%s'; no matching lock-token available"), + lock->path); + + return SVN_NO_ERROR; +} + + +/* This implements the svn_fs_get_locks_callback_t interface, where + BATON is just an svn_fs_t object. */ +static svn_error_t * +get_locks_callback(void *baton, + svn_lock_t *lock, + apr_pool_t *pool) +{ + return verify_lock(baton, lock, pool); +} + + +/* The main routine for lock enforcement, used throughout libsvn_fs_fs. */ +svn_error_t * +svn_fs_fs__allow_locked_operation(const char *path, + svn_fs_t *fs, + svn_boolean_t recurse, + svn_boolean_t have_write_lock, + apr_pool_t *pool) +{ + path = svn_fs__canonicalize_abspath(path, pool); + if (recurse) + { + /* Discover all locks at or below the path. */ + const char *digest_path; + SVN_ERR(digest_path_from_path(&digest_path, fs->path, path, pool)); + SVN_ERR(walk_locks(fs, digest_path, get_locks_callback, + fs, have_write_lock, pool)); + } + else + { + /* Discover and verify any lock attached to the path. */ + svn_lock_t *lock; + SVN_ERR(get_lock_helper(fs, &lock, path, have_write_lock, pool)); + if (lock) + SVN_ERR(verify_lock(fs, lock, pool)); + } + return SVN_NO_ERROR; +} + +/* Baton used for lock_body below. */ +struct lock_baton { + svn_lock_t **lock_p; + svn_fs_t *fs; + const char *path; + const char *token; + const char *comment; + svn_boolean_t is_dav_comment; + apr_time_t expiration_date; + svn_revnum_t current_rev; + svn_boolean_t steal_lock; + apr_pool_t *pool; +}; + + +/* This implements the svn_fs_fs__with_write_lock() 'body' callback + type, and assumes that the write lock is held. + BATON is a 'struct lock_baton *'. */ +static svn_error_t * +lock_body(void *baton, apr_pool_t *pool) +{ + struct lock_baton *lb = baton; + svn_node_kind_t kind; + svn_lock_t *existing_lock; + svn_lock_t *lock; + svn_fs_root_t *root; + svn_revnum_t youngest; + const char *rev_0_path; + + /* Until we implement directory locks someday, we only allow locks + on files or non-existent paths. */ + /* Use fs->vtable->foo instead of svn_fs_foo to avoid circular + library dependencies, which are not portable. */ + SVN_ERR(lb->fs->vtable->youngest_rev(&youngest, lb->fs, pool)); + SVN_ERR(lb->fs->vtable->revision_root(&root, lb->fs, youngest, pool)); + SVN_ERR(svn_fs_fs__check_path(&kind, root, lb->path, pool)); + if (kind == svn_node_dir) + return SVN_FS__ERR_NOT_FILE(lb->fs, lb->path); + + /* While our locking implementation easily supports the locking of + nonexistent paths, we deliberately choose not to allow such madness. */ + if (kind == svn_node_none) + { + if (SVN_IS_VALID_REVNUM(lb->current_rev)) + return svn_error_createf( + SVN_ERR_FS_OUT_OF_DATE, NULL, + _("Path '%s' doesn't exist in HEAD revision"), + lb->path); + else + return svn_error_createf( + SVN_ERR_FS_NOT_FOUND, NULL, + _("Path '%s' doesn't exist in HEAD revision"), + lb->path); + } + + /* We need to have a username attached to the fs. */ + if (!lb->fs->access_ctx || !lb->fs->access_ctx->username) + return SVN_FS__ERR_NO_USER(lb->fs); + + /* Is the caller attempting to lock an out-of-date working file? */ + if (SVN_IS_VALID_REVNUM(lb->current_rev)) + { + svn_revnum_t created_rev; + SVN_ERR(svn_fs_fs__node_created_rev(&created_rev, root, lb->path, + pool)); + + /* SVN_INVALID_REVNUM means the path doesn't exist. So + apparently somebody is trying to lock something in their + working copy, but somebody else has deleted the thing + from HEAD. That counts as being 'out of date'. */ + if (! SVN_IS_VALID_REVNUM(created_rev)) + return svn_error_createf + (SVN_ERR_FS_OUT_OF_DATE, NULL, + _("Path '%s' doesn't exist in HEAD revision"), lb->path); + + if (lb->current_rev < created_rev) + return svn_error_createf + (SVN_ERR_FS_OUT_OF_DATE, NULL, + _("Lock failed: newer version of '%s' exists"), lb->path); + } + + /* If the caller provided a TOKEN, we *really* need to see + if a lock already exists with that token, and if so, verify that + the lock's path matches PATH. Otherwise we run the risk of + breaking the 1-to-1 mapping of lock tokens to locked paths. */ + /* ### TODO: actually do this check. This is tough, because the + schema doesn't supply a lookup-by-token mechanism. */ + + /* Is the path already locked? + + Note that this next function call will automatically ignore any + errors about {the path not existing as a key, the path's token + not existing as a key, the lock just having been expired}. And + that's totally fine. Any of these three errors are perfectly + acceptable to ignore; it means that the path is now free and + clear for locking, because the fsfs funcs just cleared out both + of the tables for us. */ + SVN_ERR(get_lock_helper(lb->fs, &existing_lock, lb->path, TRUE, pool)); + if (existing_lock) + { + if (! lb->steal_lock) + { + /* Sorry, the path is already locked. */ + return SVN_FS__ERR_PATH_ALREADY_LOCKED(lb->fs, existing_lock); + } + else + { + /* STEAL_LOCK was passed, so fs_username is "stealing" the + lock from lock->owner. Destroy the existing lock. */ + SVN_ERR(delete_lock(lb->fs, existing_lock, pool)); + } + } + + /* Create our new lock, and add it to the tables. + Ensure that the lock is created in the correct pool. */ + lock = svn_lock_create(lb->pool); + if (lb->token) + lock->token = apr_pstrdup(lb->pool, lb->token); + else + SVN_ERR(svn_fs_fs__generate_lock_token(&(lock->token), lb->fs, + lb->pool)); + lock->path = apr_pstrdup(lb->pool, lb->path); + lock->owner = apr_pstrdup(lb->pool, lb->fs->access_ctx->username); + lock->comment = apr_pstrdup(lb->pool, lb->comment); + lock->is_dav_comment = lb->is_dav_comment; + lock->creation_date = apr_time_now(); + lock->expiration_date = lb->expiration_date; + SVN_ERR(svn_fs_fs__path_rev_absolute(&rev_0_path, lb->fs, 0, pool)); + SVN_ERR(set_lock(lb->fs->path, lock, rev_0_path, pool)); + *lb->lock_p = lock; + + return SVN_NO_ERROR; +} + +/* Baton used for unlock_body below. */ +struct unlock_baton { + svn_fs_t *fs; + const char *path; + const char *token; + svn_boolean_t break_lock; +}; + +/* This implements the svn_fs_fs__with_write_lock() 'body' callback + type, and assumes that the write lock is held. + BATON is a 'struct unlock_baton *'. */ +static svn_error_t * +unlock_body(void *baton, apr_pool_t *pool) +{ + struct unlock_baton *ub = baton; + svn_lock_t *lock; + + /* This could return SVN_ERR_FS_BAD_LOCK_TOKEN or SVN_ERR_FS_LOCK_EXPIRED. */ + SVN_ERR(get_lock(&lock, ub->fs, ub->path, TRUE, TRUE, pool)); + + /* Unless breaking the lock, we do some checks. */ + if (! ub->break_lock) + { + /* Sanity check: the incoming token should match lock->token. */ + if (strcmp(ub->token, lock->token) != 0) + return SVN_FS__ERR_NO_SUCH_LOCK(ub->fs, lock->path); + + /* There better be a username attached to the fs. */ + if (! (ub->fs->access_ctx && ub->fs->access_ctx->username)) + return SVN_FS__ERR_NO_USER(ub->fs); + + /* And that username better be the same as the lock's owner. */ + if (strcmp(ub->fs->access_ctx->username, lock->owner) != 0) + return SVN_FS__ERR_LOCK_OWNER_MISMATCH( + ub->fs, ub->fs->access_ctx->username, lock->owner); + } + + /* Remove lock and lock token files. */ + return delete_lock(ub->fs, lock, pool); +} + + +/*** Public API implementations ***/ + +svn_error_t * +svn_fs_fs__lock(svn_lock_t **lock_p, + svn_fs_t *fs, + const char *path, + const char *token, + const char *comment, + svn_boolean_t is_dav_comment, + apr_time_t expiration_date, + svn_revnum_t current_rev, + svn_boolean_t steal_lock, + apr_pool_t *pool) +{ + struct lock_baton lb; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + path = svn_fs__canonicalize_abspath(path, pool); + + lb.lock_p = lock_p; + lb.fs = fs; + lb.path = path; + lb.token = token; + lb.comment = comment; + lb.is_dav_comment = is_dav_comment; + lb.expiration_date = expiration_date; + lb.current_rev = current_rev; + lb.steal_lock = steal_lock; + lb.pool = pool; + + return svn_fs_fs__with_write_lock(fs, lock_body, &lb, pool); +} + + +svn_error_t * +svn_fs_fs__generate_lock_token(const char **token, + svn_fs_t *fs, + apr_pool_t *pool) +{ + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + + /* Notice that 'fs' is currently unused. But perhaps someday, we'll + want to use the fs UUID + some incremented number? For now, we + generate a URI that matches the DAV RFC. We could change this to + some other URI scheme someday, if we wish. */ + *token = apr_pstrcat(pool, "opaquelocktoken:", + svn_uuid_generate(pool), (char *)NULL); + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__unlock(svn_fs_t *fs, + const char *path, + const char *token, + svn_boolean_t break_lock, + apr_pool_t *pool) +{ + struct unlock_baton ub; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + path = svn_fs__canonicalize_abspath(path, pool); + + ub.fs = fs; + ub.path = path; + ub.token = token; + ub.break_lock = break_lock; + + return svn_fs_fs__with_write_lock(fs, unlock_body, &ub, pool); +} + + +svn_error_t * +svn_fs_fs__get_lock(svn_lock_t **lock_p, + svn_fs_t *fs, + const char *path, + apr_pool_t *pool) +{ + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + path = svn_fs__canonicalize_abspath(path, pool); + return get_lock_helper(fs, lock_p, path, FALSE, pool); +} + + +/* Baton for get_locks_filter_func(). */ +typedef struct get_locks_filter_baton_t +{ + const char *path; + svn_depth_t requested_depth; + svn_fs_get_locks_callback_t get_locks_func; + void *get_locks_baton; + +} get_locks_filter_baton_t; + + +/* A wrapper for the GET_LOCKS_FUNC passed to svn_fs_fs__get_locks() + which filters out locks on paths that aren't within + BATON->requested_depth of BATON->path before called + BATON->get_locks_func() with BATON->get_locks_baton. + + NOTE: See issue #3660 for details about how the FSFS lock + management code is inconsistent. Until that inconsistency is + resolved, we take this filtering approach rather than honoring + depth requests closer to the crawling code. In other words, once + we decide how to resolve issue #3660, there might be a more + performant way to honor the depth passed to svn_fs_fs__get_locks(). */ +static svn_error_t * +get_locks_filter_func(void *baton, + svn_lock_t *lock, + apr_pool_t *pool) +{ + get_locks_filter_baton_t *b = baton; + + /* Filter out unwanted paths. Since Subversion only allows + locks on files, we can treat depth=immediates the same as + depth=files for filtering purposes. Meaning, we'll keep + this lock if: + + a) its path is the very path we queried, or + b) we've asked for a fully recursive answer, or + c) we've asked for depth=files or depth=immediates, and this + lock is on an immediate child of our query path. + */ + if ((strcmp(b->path, lock->path) == 0) + || (b->requested_depth == svn_depth_infinity)) + { + SVN_ERR(b->get_locks_func(b->get_locks_baton, lock, pool)); + } + else if ((b->requested_depth == svn_depth_files) || + (b->requested_depth == svn_depth_immediates)) + { + const char *rel_uri = svn_fspath__skip_ancestor(b->path, lock->path); + if (rel_uri && (svn_path_component_count(rel_uri) == 1)) + SVN_ERR(b->get_locks_func(b->get_locks_baton, lock, pool)); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__get_locks(svn_fs_t *fs, + const char *path, + svn_depth_t depth, + svn_fs_get_locks_callback_t get_locks_func, + void *get_locks_baton, + apr_pool_t *pool) +{ + const char *digest_path; + get_locks_filter_baton_t glfb; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + path = svn_fs__canonicalize_abspath(path, pool); + + glfb.path = path; + glfb.requested_depth = depth; + glfb.get_locks_func = get_locks_func; + glfb.get_locks_baton = get_locks_baton; + + /* Get the top digest path in our tree of interest, and then walk it. */ + SVN_ERR(digest_path_from_path(&digest_path, fs->path, path, pool)); + SVN_ERR(walk_locks(fs, digest_path, get_locks_filter_func, &glfb, + FALSE, pool)); + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_fs_fs/lock.h b/subversion/libsvn_fs_fs/lock.h new file mode 100644 index 0000000..1acc79e --- /dev/null +++ b/subversion/libsvn_fs_fs/lock.h @@ -0,0 +1,103 @@ +/* lock.h : internal interface to lock functions + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS_LOCK_H +#define SVN_LIBSVN_FS_LOCK_H + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + + +/* These functions implement some of the calls in the FS loader + library's fs vtables. */ + +svn_error_t *svn_fs_fs__lock(svn_lock_t **lock, + svn_fs_t *fs, + const char *path, + const char *token, + const char *comment, + svn_boolean_t is_dav_comment, + apr_time_t expiration_date, + svn_revnum_t current_rev, + svn_boolean_t steal_lock, + apr_pool_t *pool); + +svn_error_t *svn_fs_fs__generate_lock_token(const char **token, + svn_fs_t *fs, + apr_pool_t *pool); + +svn_error_t *svn_fs_fs__unlock(svn_fs_t *fs, + const char *path, + const char *token, + svn_boolean_t break_lock, + apr_pool_t *pool); + +svn_error_t *svn_fs_fs__get_lock(svn_lock_t **lock, + svn_fs_t *fs, + const char *path, + apr_pool_t *pool); + +svn_error_t *svn_fs_fs__get_locks(svn_fs_t *fs, + const char *path, + svn_depth_t depth, + svn_fs_get_locks_callback_t get_locks_func, + void *get_locks_baton, + apr_pool_t *pool); + + +/* Examine PATH for existing locks, and check whether they can be + used. Use POOL for temporary allocations. + + If no locks are present, return SVN_NO_ERROR. + + If PATH is locked (or contains locks "below" it, when RECURSE is + set), then verify that: + + 1. a username has been supplied to TRAIL->fs's access-context, + else return SVN_ERR_FS_NO_USER. + + 2. for every lock discovered, the current username in the access + context of TRAIL->fs matches the "owner" of the lock, else + return SVN_ERR_FS_LOCK_OWNER_MISMATCH. + + 3. for every lock discovered, a matching lock token has been + passed into TRAIL->fs's access-context, else return + SVN_ERR_FS_BAD_LOCK_TOKEN. + + If all three conditions are met, return SVN_NO_ERROR. + + If the caller (directly or indirectly) has the FS write lock, + HAVE_WRITE_LOCK should be true. +*/ +svn_error_t *svn_fs_fs__allow_locked_operation(const char *path, + svn_fs_t *fs, + svn_boolean_t recurse, + svn_boolean_t have_write_lock, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_LIBSVN_FS_LOCK_H */ diff --git a/subversion/libsvn_fs_fs/rep-cache-db.h b/subversion/libsvn_fs_fs/rep-cache-db.h new file mode 100644 index 0000000..ed379a1 --- /dev/null +++ b/subversion/libsvn_fs_fs/rep-cache-db.h @@ -0,0 +1,83 @@ +/* This file is automatically generated from rep-cache-db.sql and .dist_sandbox/subversion-1.8.0-rc3/subversion/libsvn_fs_fs/token-map.h. + * Do not edit this file -- edit the source and rerun gen-make.py */ + +#define STMT_CREATE_SCHEMA 0 +#define STMT_0_INFO {"STMT_CREATE_SCHEMA", NULL} +#define STMT_0 \ + "CREATE TABLE rep_cache ( " \ + " hash TEXT NOT NULL PRIMARY KEY, " \ + " revision INTEGER NOT NULL, " \ + " offset INTEGER NOT NULL, " \ + " size INTEGER NOT NULL, " \ + " expanded_size INTEGER NOT NULL " \ + " ); " \ + "PRAGMA USER_VERSION = 1; " \ + "" + +#define STMT_GET_REP 1 +#define STMT_1_INFO {"STMT_GET_REP", NULL} +#define STMT_1 \ + "SELECT revision, offset, size, expanded_size " \ + "FROM rep_cache " \ + "WHERE hash = ?1 " \ + "" + +#define STMT_SET_REP 2 +#define STMT_2_INFO {"STMT_SET_REP", NULL} +#define STMT_2 \ + "INSERT OR FAIL INTO rep_cache (hash, revision, offset, size, expanded_size) " \ + "VALUES (?1, ?2, ?3, ?4, ?5) " \ + "" + +#define STMT_GET_REPS_FOR_RANGE 3 +#define STMT_3_INFO {"STMT_GET_REPS_FOR_RANGE", NULL} +#define STMT_3 \ + "SELECT hash, revision, offset, size, expanded_size " \ + "FROM rep_cache " \ + "WHERE revision >= ?1 AND revision <= ?2 " \ + "" + +#define STMT_GET_MAX_REV 4 +#define STMT_4_INFO {"STMT_GET_MAX_REV", NULL} +#define STMT_4 \ + "SELECT MAX(revision) " \ + "FROM rep_cache " \ + "" + +#define STMT_DEL_REPS_YOUNGER_THAN_REV 5 +#define STMT_5_INFO {"STMT_DEL_REPS_YOUNGER_THAN_REV", NULL} +#define STMT_5 \ + "DELETE FROM rep_cache " \ + "WHERE revision > ?1 " \ + "" + +#define STMT_LOCK_REP 6 +#define STMT_6_INFO {"STMT_LOCK_REP", NULL} +#define STMT_6 \ + "BEGIN TRANSACTION; " \ + "INSERT INTO rep_cache VALUES ('dummy', 0, 0, 0, 0) " \ + "" + +#define REP_CACHE_DB_SQL_DECLARE_STATEMENTS(varname) \ + static const char * const varname[] = { \ + STMT_0, \ + STMT_1, \ + STMT_2, \ + STMT_3, \ + STMT_4, \ + STMT_5, \ + STMT_6, \ + NULL \ + } + +#define REP_CACHE_DB_SQL_DECLARE_STATEMENT_INFO(varname) \ + static const char * const varname[][2] = { \ + STMT_0_INFO, \ + STMT_1_INFO, \ + STMT_2_INFO, \ + STMT_3_INFO, \ + STMT_4_INFO, \ + STMT_5_INFO, \ + STMT_6_INFO, \ + {NULL, NULL} \ + } diff --git a/subversion/libsvn_fs_fs/rep-cache-db.sql b/subversion/libsvn_fs_fs/rep-cache-db.sql new file mode 100644 index 0000000..b88c3e0 --- /dev/null +++ b/subversion/libsvn_fs_fs/rep-cache-db.sql @@ -0,0 +1,65 @@ +/* rep-cache-db.sql -- schema for use in rep-caching + * This is intended for use with SQLite 3 + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +-- STMT_CREATE_SCHEMA +/* A table mapping representation hashes to locations in a rev file. */ +CREATE TABLE rep_cache ( + hash TEXT NOT NULL PRIMARY KEY, + revision INTEGER NOT NULL, + offset INTEGER NOT NULL, + size INTEGER NOT NULL, + expanded_size INTEGER NOT NULL + ); + +PRAGMA USER_VERSION = 1; + + +-- STMT_GET_REP +SELECT revision, offset, size, expanded_size +FROM rep_cache +WHERE hash = ?1 + +-- STMT_SET_REP +INSERT OR FAIL INTO rep_cache (hash, revision, offset, size, expanded_size) +VALUES (?1, ?2, ?3, ?4, ?5) + +-- STMT_GET_REPS_FOR_RANGE +SELECT hash, revision, offset, size, expanded_size +FROM rep_cache +WHERE revision >= ?1 AND revision <= ?2 + +-- STMT_GET_MAX_REV +SELECT MAX(revision) +FROM rep_cache + +-- STMT_DEL_REPS_YOUNGER_THAN_REV +DELETE FROM rep_cache +WHERE revision > ?1 + +/* An INSERT takes an SQLite reserved lock that prevents other writes + but doesn't block reads. The incomplete transaction means that no + permanent change is made to the database and the transaction is + removed when the database is closed. */ +-- STMT_LOCK_REP +BEGIN TRANSACTION; +INSERT INTO rep_cache VALUES ('dummy', 0, 0, 0, 0) diff --git a/subversion/libsvn_fs_fs/rep-cache.c b/subversion/libsvn_fs_fs/rep-cache.c new file mode 100644 index 0000000..3a94690 --- /dev/null +++ b/subversion/libsvn_fs_fs/rep-cache.c @@ -0,0 +1,381 @@ +/* rep-sharing.c --- the rep-sharing cache for fsfs + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "svn_pools.h" + +#include "svn_private_config.h" + +#include "fs_fs.h" +#include "fs.h" +#include "rep-cache.h" +#include "../libsvn_fs/fs-loader.h" + +#include "svn_path.h" + +#include "private/svn_sqlite.h" + +#include "rep-cache-db.h" + +/* A few magic values */ +#define REP_CACHE_SCHEMA_FORMAT 1 + +REP_CACHE_DB_SQL_DECLARE_STATEMENTS(statements); + + + +/** Helper functions. **/ +static APR_INLINE const char * +path_rep_cache_db(const char *fs_path, + apr_pool_t *result_pool) +{ + return svn_dirent_join(fs_path, REP_CACHE_DB_NAME, result_pool); +} + +/* Check that REP refers to a revision that exists in FS. */ +static svn_error_t * +rep_has_been_born(representation_t *rep, + svn_fs_t *fs, + apr_pool_t *pool) +{ + SVN_ERR_ASSERT(rep); + + SVN_ERR(svn_fs_fs__revision_exists(rep->revision, fs, pool)); + + return SVN_NO_ERROR; +} + + + +/** Library-private API's. **/ + +/* Body of svn_fs_fs__open_rep_cache(). + Implements svn_atomic__init_once().init_func. + */ +static svn_error_t * +open_rep_cache(void *baton, + apr_pool_t *pool) +{ + svn_fs_t *fs = baton; + fs_fs_data_t *ffd = fs->fsap_data; + svn_sqlite__db_t *sdb; + const char *db_path; + int version; + + /* Open (or create) the sqlite database. It will be automatically + closed when fs->pool is destoyed. */ + db_path = path_rep_cache_db(fs->path, pool); + SVN_ERR(svn_sqlite__open(&sdb, db_path, + svn_sqlite__mode_rwcreate, statements, + 0, NULL, + fs->pool, pool)); + + SVN_ERR(svn_sqlite__read_schema_version(&version, sdb, pool)); + if (version < REP_CACHE_SCHEMA_FORMAT) + { + /* Must be 0 -- an uninitialized (no schema) database. Create + the schema. Results in schema version of 1. */ + SVN_ERR(svn_sqlite__exec_statements(sdb, STMT_CREATE_SCHEMA)); + } + + /* This is used as a flag that the database is available so don't + set it earlier. */ + ffd->rep_cache_db = sdb; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__open_rep_cache(svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_error_t *err = svn_atomic__init_once(&ffd->rep_cache_db_opened, + open_rep_cache, fs, pool); + return svn_error_quick_wrap(err, _("Couldn't open rep-cache database")); +} + +svn_error_t * +svn_fs_fs__exists_rep_cache(svn_boolean_t *exists, + svn_fs_t *fs, apr_pool_t *pool) +{ + svn_node_kind_t kind; + + SVN_ERR(svn_io_check_path(path_rep_cache_db(fs->path, pool), + &kind, pool)); + + *exists = (kind != svn_node_none); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__walk_rep_reference(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t end, + svn_error_t *(*walker)(representation_t *, + void *, + svn_fs_t *, + apr_pool_t *), + void *walker_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_sqlite__stmt_t *stmt; + svn_boolean_t have_row; + int iterations = 0; + + apr_pool_t *iterpool = svn_pool_create(pool); + + /* Don't check ffd->rep_sharing_allowed. */ + SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT); + + if (! ffd->rep_cache_db) + SVN_ERR(svn_fs_fs__open_rep_cache(fs, pool)); + + /* Check global invariants. */ + if (start == 0) + { + svn_revnum_t max; + + SVN_ERR(svn_sqlite__get_statement(&stmt, ffd->rep_cache_db, + STMT_GET_MAX_REV)); + SVN_ERR(svn_sqlite__step(&have_row, stmt)); + max = svn_sqlite__column_revnum(stmt, 0); + SVN_ERR(svn_sqlite__reset(stmt)); + if (SVN_IS_VALID_REVNUM(max)) /* The rep-cache could be empty. */ + SVN_ERR(svn_fs_fs__revision_exists(max, fs, iterpool)); + } + + SVN_ERR(svn_sqlite__get_statement(&stmt, ffd->rep_cache_db, + STMT_GET_REPS_FOR_RANGE)); + SVN_ERR(svn_sqlite__bindf(stmt, "rr", + start, end)); + + /* Walk the cache entries. */ + SVN_ERR(svn_sqlite__step(&have_row, stmt)); + while (have_row) + { + representation_t *rep; + const char *sha1_digest; + svn_error_t *err; + + /* Clear ITERPOOL occasionally. */ + if (iterations++ % 16 == 0) + svn_pool_clear(iterpool); + + /* Check for cancellation. */ + if (cancel_func) + { + err = cancel_func(cancel_baton); + if (err) + return svn_error_compose_create(err, svn_sqlite__reset(stmt)); + } + + /* Construct a representation_t. */ + rep = apr_pcalloc(iterpool, sizeof(*rep)); + sha1_digest = svn_sqlite__column_text(stmt, 0, iterpool); + err = svn_checksum_parse_hex(&rep->sha1_checksum, + svn_checksum_sha1, sha1_digest, + iterpool); + if (err) + return svn_error_compose_create(err, svn_sqlite__reset(stmt)); + rep->revision = svn_sqlite__column_revnum(stmt, 1); + rep->offset = svn_sqlite__column_int64(stmt, 2); + rep->size = svn_sqlite__column_int64(stmt, 3); + rep->expanded_size = svn_sqlite__column_int64(stmt, 4); + + /* Walk. */ + err = walker(rep, walker_baton, fs, iterpool); + if (err) + return svn_error_compose_create(err, svn_sqlite__reset(stmt)); + + SVN_ERR(svn_sqlite__step(&have_row, stmt)); + } + + SVN_ERR(svn_sqlite__reset(stmt)); + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + + +/* This function's caller ignores most errors it returns. + If you extend this function, check the callsite to see if you have + to make it not-ignore additional error codes. */ +svn_error_t * +svn_fs_fs__get_rep_reference(representation_t **rep, + svn_fs_t *fs, + svn_checksum_t *checksum, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_sqlite__stmt_t *stmt; + svn_boolean_t have_row; + + SVN_ERR_ASSERT(ffd->rep_sharing_allowed); + if (! ffd->rep_cache_db) + SVN_ERR(svn_fs_fs__open_rep_cache(fs, pool)); + + /* We only allow SHA1 checksums in this table. */ + if (checksum->kind != svn_checksum_sha1) + return svn_error_create(SVN_ERR_BAD_CHECKSUM_KIND, NULL, + _("Only SHA1 checksums can be used as keys in the " + "rep_cache table.\n")); + + SVN_ERR(svn_sqlite__get_statement(&stmt, ffd->rep_cache_db, STMT_GET_REP)); + SVN_ERR(svn_sqlite__bindf(stmt, "s", + svn_checksum_to_cstring(checksum, pool))); + + SVN_ERR(svn_sqlite__step(&have_row, stmt)); + if (have_row) + { + *rep = apr_pcalloc(pool, sizeof(**rep)); + (*rep)->sha1_checksum = svn_checksum_dup(checksum, pool); + (*rep)->revision = svn_sqlite__column_revnum(stmt, 0); + (*rep)->offset = svn_sqlite__column_int64(stmt, 1); + (*rep)->size = svn_sqlite__column_int64(stmt, 2); + (*rep)->expanded_size = svn_sqlite__column_int64(stmt, 3); + } + else + *rep = NULL; + + SVN_ERR(svn_sqlite__reset(stmt)); + + if (*rep) + SVN_ERR(rep_has_been_born(*rep, fs, pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__set_rep_reference(svn_fs_t *fs, + representation_t *rep, + svn_boolean_t reject_dup, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_sqlite__stmt_t *stmt; + svn_error_t *err; + + SVN_ERR_ASSERT(ffd->rep_sharing_allowed); + if (! ffd->rep_cache_db) + SVN_ERR(svn_fs_fs__open_rep_cache(fs, pool)); + + /* We only allow SHA1 checksums in this table. */ + if (rep->sha1_checksum == NULL) + return svn_error_create(SVN_ERR_BAD_CHECKSUM_KIND, NULL, + _("Only SHA1 checksums can be used as keys in the " + "rep_cache table.\n")); + + SVN_ERR(svn_sqlite__get_statement(&stmt, ffd->rep_cache_db, STMT_SET_REP)); + SVN_ERR(svn_sqlite__bindf(stmt, "siiii", + svn_checksum_to_cstring(rep->sha1_checksum, pool), + (apr_int64_t) rep->revision, + (apr_int64_t) rep->offset, + (apr_int64_t) rep->size, + (apr_int64_t) rep->expanded_size)); + + err = svn_sqlite__insert(NULL, stmt); + if (err) + { + representation_t *old_rep; + + if (err->apr_err != SVN_ERR_SQLITE_CONSTRAINT) + return svn_error_trace(err); + + svn_error_clear(err); + + /* Constraint failed so the mapping for SHA1_CHECKSUM->REP + should exist. If so, and the value is the same one we were + about to write, that's cool -- just do nothing. If, however, + the value is *different*, that's a red flag! */ + SVN_ERR(svn_fs_fs__get_rep_reference(&old_rep, fs, rep->sha1_checksum, + pool)); + + if (old_rep) + { + if (reject_dup && ((old_rep->revision != rep->revision) + || (old_rep->offset != rep->offset) + || (old_rep->size != rep->size) + || (old_rep->expanded_size != rep->expanded_size))) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + apr_psprintf(pool, + _("Representation key for checksum '%%s' exists " + "in filesystem '%%s' with a different value " + "(%%ld,%%%s,%%%s,%%%s) than what we were about " + "to store (%%ld,%%%s,%%%s,%%%s)"), + APR_OFF_T_FMT, SVN_FILESIZE_T_FMT, + SVN_FILESIZE_T_FMT, APR_OFF_T_FMT, + SVN_FILESIZE_T_FMT, SVN_FILESIZE_T_FMT), + svn_checksum_to_cstring_display(rep->sha1_checksum, pool), + fs->path, old_rep->revision, old_rep->offset, old_rep->size, + old_rep->expanded_size, rep->revision, rep->offset, rep->size, + rep->expanded_size); + else + return SVN_NO_ERROR; + } + else + { + /* Something really odd at this point, we failed to insert the + checksum AND failed to read an existing checksum. Do we need + to flag this? */ + } + } + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__del_rep_reference(svn_fs_t *fs, + svn_revnum_t youngest, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_sqlite__stmt_t *stmt; + + SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT); + if (! ffd->rep_cache_db) + SVN_ERR(svn_fs_fs__open_rep_cache(fs, pool)); + + SVN_ERR(svn_sqlite__get_statement(&stmt, ffd->rep_cache_db, + STMT_DEL_REPS_YOUNGER_THAN_REV)); + SVN_ERR(svn_sqlite__bindf(stmt, "r", youngest)); + SVN_ERR(svn_sqlite__step_done(stmt)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__lock_rep_cache(svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + + if (! ffd->rep_cache_db) + SVN_ERR(svn_fs_fs__open_rep_cache(fs, pool)); + + SVN_ERR(svn_sqlite__exec_statements(ffd->rep_cache_db, STMT_LOCK_REP)); + + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_fs_fs/rep-cache.h b/subversion/libsvn_fs_fs/rep-cache.h new file mode 100644 index 0000000..3ccb056 --- /dev/null +++ b/subversion/libsvn_fs_fs/rep-cache.h @@ -0,0 +1,101 @@ +/* rep-cache.h : interface to rep cache db functions + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS_FS_REP_CACHE_H +#define SVN_LIBSVN_FS_FS_REP_CACHE_H + +#include "svn_error.h" + +#include "fs.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +#define REP_CACHE_DB_NAME "rep-cache.db" + +/* Open and create, if needed, the rep cache database associated with FS. + Use POOL for temporary allocations. */ +svn_error_t * +svn_fs_fs__open_rep_cache(svn_fs_t *fs, + apr_pool_t *pool); + +/* Set *EXISTS to TRUE iff the rep-cache DB file exists. */ +svn_error_t * +svn_fs_fs__exists_rep_cache(svn_boolean_t *exists, + svn_fs_t *fs, apr_pool_t *pool); + +/* Iterate all representations currently in FS's cache. */ +svn_error_t * +svn_fs_fs__walk_rep_reference(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t end, + svn_error_t *(*walker)(representation_t *rep, + void *walker_baton, + svn_fs_t *fs, + apr_pool_t *scratch_pool), + void *walker_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool); + +/* Return the representation REP in FS which has fulltext CHECKSUM. + REP is allocated in POOL. If the rep cache database has not been + opened, just set *REP to NULL. */ +svn_error_t * +svn_fs_fs__get_rep_reference(representation_t **rep, + svn_fs_t *fs, + svn_checksum_t *checksum, + apr_pool_t *pool); + +/* Set the representation REP in FS, using REP->CHECKSUM. + Use POOL for temporary allocations. + + If the rep cache database has not been opened, this may be a no op. + + If REJECT_DUP is TRUE, return an error if there is an existing + match for REP->CHECKSUM. */ +svn_error_t * +svn_fs_fs__set_rep_reference(svn_fs_t *fs, + representation_t *rep, + svn_boolean_t reject_dup, + apr_pool_t *pool); + +/* Delete from the cache all reps corresponding to revisions younger + than YOUNGEST. */ +svn_error_t * +svn_fs_fs__del_rep_reference(svn_fs_t *fs, + svn_revnum_t youngest, + apr_pool_t *pool); + +/* Start a transaction to take an SQLite reserved lock that prevents + other writes. */ +svn_error_t * +svn_fs_fs__lock_rep_cache(svn_fs_t *fs, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_LIBSVN_FS_FS_REP_CACHE_H */ diff --git a/subversion/libsvn_fs_fs/structure b/subversion/libsvn_fs_fs/structure new file mode 100644 index 0000000..41caf1d --- /dev/null +++ b/subversion/libsvn_fs_fs/structure @@ -0,0 +1,621 @@ +This file describes the design, layouts, and file formats of a +libsvn_fs_fs repository. + +Design +------ + +In FSFS, each committed revision is represented as an immutable file +containing the new node-revisions, contents, and changed-path +information for the revision, plus a second, changeable file +containing the revision properties. + +In contrast to the BDB back end, the contents of recent revision of +files are stored as deltas against earlier revisions, instead of the +other way around. This is less efficient for common-case checkouts, +but brings greater simplicity and robustness, as well as the +flexibility to make commits work without write access to existing +revisions. Skip-deltas and delta combination mitigate the checkout +cost. + +In-progress transactions are represented with a prototype rev file +containing only the new text representations of files (appended to as +changed file contents come in), along with a separate file for each +node-revision, directory representation, or property representation +which has been changed or added in the transaction. During the final +stage of the commit, these separate files are marshalled onto the end +of the prototype rev file to form the immutable revision file. + +Layout of the FS directory +-------------------------- + +The layout of the FS directory (the "db" subdirectory of the +repository) is: + + revs/ Subdirectory containing revs + <shard>/ Shard directory, if sharding is in use (see below) + <revnum> File containing rev <revnum> + <shard>.pack/ Pack directory, if the repo has been packed (see below) + pack Pack file, if the repository has been packed (see below) + manifest Pack manifest file, if a pack file exists (see below) + revprops/ Subdirectory containing rev-props + <shard>/ Shard directory, if sharding is in use (see below) + <revnum> File containing rev-props for <revnum> + <shard>.pack/ Pack directory, if the repo has been packed (see below) + <rev>.<count> Pack file, if the repository has been packed (see below) + manifest Pack manifest file, if a pack file exists (see below) + revprops.db SQLite database of the packed revision properties + transactions/ Subdirectory containing transactions + <txnid>.txn/ Directory containing transaction <txnid> + txn-protorevs/ Subdirectory containing transaction proto-revision files + <txnid>.rev Proto-revision file for transaction <txnid> + <txnid>.rev-lock Write lock for proto-rev file + txn-current File containing the next transaction key + locks/ Subdirectory containing locks + <partial-digest>/ Subdirectory named for first 3 letters of an MD5 digest + <digest> File containing locks/children for path with <digest> + node-origins/ Lazy cache of origin noderevs for nodes + <partial-nodeid> File containing noderev ID of origins of nodes + current File specifying current revision and next node/copy id + fs-type File identifying this filesystem as an FSFS filesystem + write-lock Empty file, locked to serialise writers + txn-current-lock Empty file, locked to serialise 'txn-current' + uuid File containing the UUID of the repository + format File containing the format number of this filesystem + fsfs.conf Configuration file + min-unpacked-rev File containing the oldest revision not in a pack file + min-unpacked-revprop File containing the oldest revision of unpacked revprop + rep-cache.db SQLite database mapping rep checksums to locations + +Files in the revprops directory are in the hash dump format used by +svn_hash_write. + +The format of the "current" file is: + + * Format 3 and above: a single line of the form + "<youngest-revision>\n" giving the youngest revision for the + repository. + + * Format 2 and below: a single line of the form "<youngest-revision> + <next-node-id> <next-copy-id>\n" giving the youngest revision, the + next unique node-ID, and the next unique copy-ID for the + repository. + +The "write-lock" file is an empty file which is locked before the +final stage of a commit and unlocked after the new "current" file has +been moved into place to indicate that a new revision is present. It +is also locked during a revprop propchange while the revprop file is +read in, mutated, and written out again. Note that readers are never +blocked by any operation - writers must ensure that the filesystem is +always in a consistent state. + +The "txn-current" file is a file with a single line of text that +contains only a base-36 number. The current value will be used in the +next transaction name, along with the revision number the transaction +is based on. This sequence number ensures that transaction names are +not reused, even if the transaction is aborted and a new transaction +based on the same revision is begun. The only operation that FSFS +performs on this file is "get and increment"; the "txn-current-lock" +file is locked during this operation. + +"fsfs.conf" is a configuration file in the standard Subversion/Python +config format. It is automatically generated when you create a new +repository; read the generated file for details on what it controls. + +When representation sharing is enabled, the filesystem tracks +representation checksum and location mappings using a SQLite database in +"rep-cache.db". The database has a single table, which stores the sha1 +hash text as the primary key, mapped to the representation revision, offset, +size and expanded size. This file is only consulted during writes and never +during reads. Consequently, it is not required, and may be removed at an +abritrary time, with the subsequent loss of rep-sharing capabilities for +revisions written thereafter. + +Filesystem formats +------------------ + +The "format" file defines what features are permitted within the +filesystem, and indicates changes that are not backward-compatible. +It serves the same purpose as the repository file of the same name. + +The filesystem format file was introduced in Subversion 1.2, and so +will not be present if the repository was created with an older +version of Subversion. An absent format file should be interpreted as +indicating a format 1 filesystem. + +The format file is a single line of the form "<format number>\n", +followed by any number of lines specifying 'format options' - +additional information about the filesystem's format. Each format +option line is of the form "<option>\n" or "<option> <parameters>\n". + +Clients should raise an error if they encounter an option not +permitted by the format number in use. + +The formats are: + + Format 1, understood by Subversion 1.1+ + Format 2, understood by Subversion 1.4+ + Format 3, understood by Subversion 1.5+ + Format 4, understood by Subversion 1.6+ + Format 5, understood by Subversion 1.7-dev, never released + Format 6, understood by Subversion 1.8 + +The differences between the formats are: + +Delta representation in revision files + Format 1: svndiff0 only + Formats 2+: svndiff0 or svndiff1 + +Format options + Formats 1-2: none permitted + Format 3+: "layout" option + +Transaction name reuse + Formats 1-2: transaction names may be reused + Format 3+: transaction names generated using txn-current file + +Location of proto-rev file and its lock + Formats 1-2: transactions/<txnid>/rev and + transactions/<txnid>/rev-lock. + Format 3+: txn-protorevs/<txnid>.rev and + txn-protorevs/<txnid>.rev-lock. + +Node-ID and copy-ID generation + Formats 1-2: Node-IDs and copy-IDs are guaranteed to form a + monotonically increasing base36 sequence using the "current" + file. + Format 3+: Node-IDs and copy-IDs use the new revision number to + ensure uniqueness and the "current" file just contains the + youngest revision. + +Mergeinfo metadata: + Format 1-2: minfo-here and minfo-count node-revision fields are not + stored. svn_fs_get_mergeinfo returns an error. + Format 3+: minfo-here and minfo-count node-revision fields are + maintained. svn_fs_get_mergeinfo works. + +Revision changed paths list: + Format 1-3: Does not contain the node's kind. + Format 4+: Contains the node's kind. + +Shard packing: + Format 4: Applied to revision data only. + Format 5: Revprops would be packed independently of revision data. + Format 6+: Applied equally to revision data and revprop data + (i.e. same min packed revision) + +# Incomplete list. See SVN_FS_FS__MIN_*_FORMAT + + +Filesystem format options +------------------------- + +Currently, the only recognised format option is "layout", which +specifies the paths that will be used to store the revision files and +revision property files. + +The "layout" option is followed by the name of the filesystem layout +and any required parameters. The default layout, if no "layout" +keyword is specified, is the 'linear' layout. + +The known layouts, and the parameters they require, are as follows: + +"linear" + Revision files and rev-prop files are named after the revision they + represent, and are placed directly in the revs/ and revprops/ + directories. r1234 will be represented by the revision file + revs/1234 and the rev-prop file revprops/1234. + +"sharded <max-files-per-directory>" + Revision files and rev-prop files are named after the revision they + represent, and are placed in a subdirectory of the revs/ and + revprops/ directories named according to the 'shard' they belong to. + + Shards are numbered from zero and contain between one and the + maximum number of files per directory specified in the layout's + parameters. + + For the "sharded 1000" layout, r1234 will be represented by the + revision file revs/1/1234 and rev-prop file revprops/1/1234. The + revs/0/ directory will contain revisions 0-999, revs/1/ will contain + 1000-1999, and so on. + +Packing revisions +----------------- + +A filesystem can optionally be "packed" to conserve space on disk. The +packing process concatenates all the revision files in each full shard to +create pack files. A manifest file is also created for each shard which +records the indexes of the corresponding revision files in the pack file. +In addition, the original shard is removed, and reads are redirected to the +pack file. + +The manifest file consists of a list of offsets, one for each revision in the +pack file. The offsets are stored as ASCII decimal, and separated by a newline +character. + +Packing revision properties (format 5: SQLite) +--------------------------- + +This was supported by 1.7-dev builds but never included in a blessed release. + +See r1143829 of this file: +http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/structure?view=markup&pathrev=1143829 + + +Packing revision properties (format 6+) +--------------------------- + +Similarly to the revision data, packing will concatenate multiple +revprops into a single file. Since they are mutable data, we put an +upper limit to the size of these files: We will concatenate the data +up to the limit and then use a new file for the following revisions. + +The limit can be set and changed at will in the configuration file. +It is 64kB by default. Because a pack file must contain at least one +complete property list, files containing just one revision may exceed +that limit. + +Furthermore, pack files can be compressed which saves about 75% of +disk space. A configuration file flag enables the compression; it is +off by default and may be switched on and off at will. The pack size +limit is always applied to the uncompressed data. For this reason, +the default is 256kB while compression has been enabled. + +Files are named after their start revision as "<rev>.<counter>" where +counter will be increased whenever we rewrite a pack file due to a +revprop change. The manifest file contains the list of pack file +names, one line for each revision. + +Many tools track repository global data in revision properties at +revision 0. To minimize I/O overhead for those applications, we +will never pack that revision, i.e. its data is always being kept +in revprops/0/0. + +Pack file format + + Top level: <packed container> + + We always apply data compression to the pack file - using the + SVN_DELTA_COMPRESSION_LEVEL_NONE level if compression is disabled. + (Note that compression at SVN_DELTA_COMPRESSION_LEVEL_NONE is not + a no-op stream transformation although most of the data will remain + human readable.) + + container := header '\n' (revprops)+ + header := start_rev '\n' rev_count '\n' (size '\n')+ + + All numbers in the header are given as ASCII decimals. rev_count + is the number of revisions packed into this container. There must + be exactly as many "size" and serialized "revprops". The "size" + values in the list are the length in bytes of the serialized + revprops of the respective revision. + +Writing to packed revprops + + The old pack file is being read and the new revprops serialized. + If they fit into the same pack file, a temp file with the new + content gets written and moved into place just like an non-packed + revprop file would. No name change or manifest update required. + + If they don't fit into the same pack file, i.e. exceed the pack + size limit, the pack will be split into 2 or 3 new packs just + before and / or after the modified revision. + + In the current implementation, they will never be merged again. + To minimize fragmentation, the initial packing process will only + use about 90% of the limit, i.e. leave some room for growth. + + When a pack file gets split, its counter is being increased + creating a new file and leaving the old content in place and + available for concurrent readers. Only after the new manifest + file got moved into place, will the old pack files be deleted. + + Write access to revprops is being serialized by the global + filesystem write lock. We only need to build a few retries into + the reader code to gracefully handle manifest changes and pack + file deletions. + + +Node-revision IDs +----------------- + +A node-rev ID consists of the following three fields: + + node_revision_id ::= node_id '.' copy_id '.' txn_id + +At this level, the form of the ID is the same as for BDB - see the +section called "ID's" in <../libsvn_fs_base/notes/structure>. + +In order to support efficient lookup of node-revisions by their IDs +and to simplify the allocation of fresh node-IDs during a transaction, +we treat the fields of a node-rev ID in new and interesting ways. + +Within a new transaction: + + New node-revision IDs assigned within a transaction have a txn-id + field of the form "t<txnid>". + + When a new node-id or copy-id is assigned in a transaction, the ID + used is a "_" followed by a base36 number unique to the transaction. + +Within a revision: + + Within a revision file, node-revs have a txn-id field of the form + "r<rev>/<offset>", to support easy lookup. The <offset> is the (ASCII + decimal) number of bytes from the start of the revision file to the + start of the node-rev. + + During the final phase of a commit, node-revision IDs are rewritten + to have repository-wide unique node-ID and copy-ID fields, and to have + "r<rev>/<offset>" txn-id fields. + + In Format 3 and above, this uniqueness is done by changing a temporary + id of "_<base36>" to "<base36>-<rev>". Note that this means that the + originating revision of a line of history or a copy can be determined + by looking at the node ID. + + In Format 2 and below, the "current" file contains global base36 + node-ID and copy-ID counters; during the commit, the counter value is + added to the transaction-specific base36 ID, and the value in + "current" is adjusted. + + (It is legal for Format 3 repositories to contain Format 2-style IDs; + this just prevents I/O-less node-origin-rev lookup for those nodes.) + +The temporary assignment of node-ID and copy-ID fields has +implications for svn_fs_compare_ids and svn_fs_check_related. The ID +_1.0.t1 is not related to the ID _1.0.t2 even though they have the +same node-ID, because temporary node-IDs are restricted in scope to +the transactions they belong to. + +There is a lazily created cache mapping from node-IDs to the full +node-revision ID where they are created. This is in the node-origins +directory; the file name is the node-ID without its last character (or +"0" for single-character node IDs) and the contents is a serialized +hash mapping from node-ID to node-revision ID. This cache is only +used for node-IDs of the pre-Format 3 style. + +Copy-IDs and copy roots +----------------------- + +Copy-IDs are assigned in the same manner as they are in the BDB +implementation: + + * A node-rev resulting from a creation operation (with no copy + history) receives the copy-ID of its parent directory. + + * A node-rev resulting from a copy operation receives a fresh + copy-ID, as one would expect. + + * A node-rev resulting from a modification operation receives a + copy-ID depending on whether its predecessor derives from a + copy operation or whether it derives from a creation operation + with no intervening copies: + + - If the predecessor does not derive from a copy, the new + node-rev receives the copy-ID of its parent directory. If the + node-rev is being modified through its created-path, this will + be the same copy-ID as the predecessor node-rev has; however, + if the node-rev is being modified through a copied ancestor + directory (i.e. we are performing a "lazy copy"), this will be + a different copy-ID. + + - If the predecessor derives from a copy and the node-rev is + being modified through its created-path, the new node-rev + receives the copy-ID of the predecessor. + + - If the predecessor derives from a copy and the node-rev is not + being modified through its created path, the new node-rev + receives a fresh copy-ID. This is called a "soft copy" + operation, as distinct from a "true copy" operation which was + actually requested through the svn_fs interface. Soft copies + exist to ensure that the same <node-ID,copy-ID> pair is not + used twice within a transaction. + +Unlike the BDB implementation, we do not have a "copies" table. +Instead, each node-revision record contains a "copyroot" field +identifying the node-rev resulting from the true copy operation most +proximal to the node-rev. If the node-rev does not itself derive from +a copy operation, then the copyroot field identifies the copy of an +ancestor directory; if no ancestor directories derive from a copy +operation, then the copyroot field identifies the root directory of +rev 0. + +Revision file format +-------------------- + +A revision file contains a concatenation of various kinds of data: + + * Text and property representations + * Node-revisions + * The changed-path data + * Two offsets at the very end + +A representation begins with a line containing either "PLAIN\n" or +"DELTA\n" or "DELTA <rev> <offset> <length>\n", where <rev>, <offset>, +and <length> give the location of the delta base of the representation +and the amount of data it contains (not counting the header or +trailer). If no base location is given for a delta, the base is the +empty stream. After the initial line comes raw svndiff data, followed +by a cosmetic trailer "ENDREP\n". + +If the representation is for the text contents of a directory node, +the expanded contents are in hash dump format mapping entry names to +"<type> <id>" pairs, where <type> is "file" or "dir" and <id> gives +the ID of the child node-rev. + +If a representation is for a property list, the expanded contents are +in the form of a dumped hash map mapping property names to property +values. + +The marshalling syntax for node-revs is a series of fields terminated +by a blank line. Fields have the syntax "<name>: <value>\n", where +<name> is a symbolic field name (each symbolic name is used only once +in a given node-rev) and <value> is the value data. Unrecognized +fields are ignored, for extensibility. The following fields are +defined: + + id The ID of the node-rev + type "file" or "dir" + pred The ID of the predecessor node-rev + count Count of node-revs since the base of the node + text "<rev> <offset> <length> <size> <digest>" for text rep + props "<rev> <offset> <length> <size> <digest>" for props rep + <rev> and <offset> give location of rep + <length> gives length of rep, sans header and trailer + <size> gives size of expanded rep; may be 0 if equal + to the length + <digest> gives hex MD5 digest of expanded rep + ### in formats >=4, also present: + <sha1-digest> gives hex SHA1 digest of expanded rep + <uniquifier> see representation_t->uniquifier in fs.h + cpath FS pathname node was created at + copyfrom "<rev> <path>" of copyfrom data + copyroot "<rev> <created-path>" of the root of this copy + minfo-cnt The number of nodes under (and including) this node + which have svn:mergeinfo. + minfo-here Exists if this node itself has svn:mergeinfo. + +The predecessor of a node-rev crosses both soft and true copies; +together with the count field, it allows efficient determination of +the base for skip-deltas. The first node-rev of a node contains no +"pred" field. A node-revision with no properties may omit the "props" +field. A node-revision with no contents (a zero-length file or an +empty directory) may omit the "text" field. In a node-revision +resulting from a true copy operation, the "copyfrom" field gives the +copyfrom data. The "copyroot" field identifies the root node-revision +of the copy; it may be omitted if the node-rev is its own copy root +(as is the case for node-revs with copy history, and for the root node +of revision 0). Copy roots are identified by revision and +created-path, not by node-rev ID, because a copy root may be a +node-rev which exists later on within the same revision file, meaning +its offset is not yet known. + +The changed-path data is represented as a series of changed-path +items, each consisting of two lines. The first line has the format +"<id> <action> <text-mod> <prop-mod> <path>\n", where <id> is the +node-rev ID of the new node-rev, <action> is "add", "delete", +"replace", or "modify", <text-mod> and <prop-mod> are "true" or +"false" indicating whether the text and/or properties changed, and +<path> is the changed pathname. For deletes, <id> is the node-rev ID +of the deleted node-rev, and <text-mod> and <prop-mod> are always +"false". The second line has the format "<rev> <path>\n" containing +the node-rev's copyfrom information if it has any; if it does not, the +second line is blank. + +Starting with FS format 4, <action> may contain the kind ("file" or +"dir") of the node, after a hyphen; for example, an added directory +may be represented as "add-dir". + +At the very end of a rev file is a pair of lines containing +"\n<root-offset> <cp-offset>\n", where <root-offset> is the offset of +the root directory node revision and <cp-offset> is the offset of the +changed-path data. + +All numbers in the rev file format are unsigned and are represented as +ASCII decimal. + +Transaction layout +------------------ + +A transaction directory has the following layout: + + props Transaction props + next-ids Next temporary node-ID and copy-ID + changes Changed-path information so far + node.<nid>.<cid> New node-rev data for node + node.<nid>.<cid>.props Props for new node-rev, if changed + node.<nid>.<cid>.children Directory contents for node-rev + <sha1> Text representation of that sha1 + +In FS formats 1 and 2, it also contains: + + rev Prototype rev file with new text reps + rev-lock Lockfile for writing to the above + +In newer formats, these files are in the txn-protorevs/ directory. + +The prototype rev file is used to store the text representations as +they are received from the client. To ensure that only one client is +writing to the file at a given time, the "rev-lock" file is locked for +the duration of each write. + +The two kinds of props files are all in hash dump format. The "props" +file will always be present. The "node.<nid>.<cid>.props" file will +only be present if the node-rev properties have been changed. + +The <sha1> files have been introduced in FS format 6. Their content +is that of text rep references: "<rev> <offset> <length> <size> <digest>" +They will be written for text reps in the current transaction and be +used to eliminate duplicate reps within that transaction. + +The "next-ids" file contains a single line "<next-temp-node-id> +<next-temp-copy-id>\n" giving the next temporary node-ID and copy-ID +assignments (without the leading underscores). The next node-ID is +also used as a uniquifier for representations which may share the same +underlying rep. + +The "children" file for a node-revision begins with a copy of the hash +dump representation of the directory entries from the old node-rev (or +a dump of the empty hash for new directories), and then an incremental +hash dump entry for each change made to the directory. + +The "changes" file contains changed-path entries in the same form as +the changed-path entries in a rev file, except that <id> and <action> +may both be "reset" (in which case <text-mod> and <prop-mod> are both +always "false") to indicate that all changes to a path should be +considered undone. Reset entries are only used during the final merge +phase of a transaction. Actions in the "changes" file always contain +a node kind, even if the FS format is older than format 4. + +The node-rev files have the same format as node-revs in a revision +file, except that the "text" and "props" fields are augmented as +follows: + + * The "props" field may have the value "-1" if properties have + been changed and are contained in a "props" file within the + node-rev subdirectory. + + * For directory node-revs, the "text" field may have the value + "-1" if entries have been changed and are contained in a + "contents" file in the node-rev subdirectory. + + * For the directory node-rev representing the root of the + transaction, the "is-fresh-txn-root" field indicates that it has + not been made mutable yet (see Issue #2608). + + * For file node-revs, the "text" field may have the value "-1 + <offset> <length> <size> <digest>" if the text representation is + within the prototype rev file. + + * The "copyroot" field may have the value "-1 <created-path>" if the + copy root of the node-rev is part of the transaction in process. + +Locks layout +------------ + +Locks in FSFS are stored in serialized hash format in files whose +names are MD5 digests of the FS path which the lock is associated +with. For the purposes of keeping directory inode usage down, these +digest files live in subdirectories of the main lock directory whose +names are the first 3 characters of the digest filename. + +Also stored in the digest file for a given FS path are pointers to +other digest files which contain information associated with other FS +paths that are beneath our path (an immediate child thereof, or a +grandchild, or a great-grandchild, ...). + +To answer the question, "Does path FOO have a lock associated with +it?", one need only generate the MD5 digest of FOO's +absolute-in-the-FS path (say, 3b1b011fed614a263986b5c4869604e8), look +for a file located like so: + + /path/to/repos/locks/3b1/3b1b011fed614a263986b5c4869604e8 + +And then see if that file contains lock information. + +To inquire about locks on children of the path FOO, you would +reference the same path as above, but look for a list of children in +that file (instead of lock information). Children are listed as MD5 +digests, too, so you would simply iterate over those digests and +consult the files they reference for lock information. diff --git a/subversion/libsvn_fs_fs/temp_serializer.c b/subversion/libsvn_fs_fs/temp_serializer.c new file mode 100644 index 0000000..0178143 --- /dev/null +++ b/subversion/libsvn_fs_fs/temp_serializer.c @@ -0,0 +1,1341 @@ +/* temp_serializer.c: serialization functions for caching of FSFS structures + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <apr_pools.h> + +#include "svn_pools.h" +#include "svn_hash.h" + +#include "id.h" +#include "svn_fs.h" + +#include "private/svn_fs_util.h" +#include "private/svn_temp_serializer.h" +#include "private/svn_subr_private.h" + +#include "temp_serializer.h" + +/* Utility to encode a signed NUMBER into a variable-length sequence of + * 8-bit chars in KEY_BUFFER and return the last writen position. + * + * Numbers will be stored in 7 bits / byte and using byte values above + * 32 (' ') to make them combinable with other string by simply separating + * individual parts with spaces. + */ +static char* +encode_number(apr_int64_t number, char *key_buffer) +{ + /* encode the sign in the first byte */ + if (number < 0) + { + number = -number; + *key_buffer = (char)((number & 63) + ' ' + 65); + } + else + *key_buffer = (char)((number & 63) + ' ' + 1); + number /= 64; + + /* write 7 bits / byte until no significant bits are left */ + while (number) + { + *++key_buffer = (char)((number & 127) + ' ' + 1); + number /= 128; + } + + /* return the last written position */ + return key_buffer; +} + +const char* +svn_fs_fs__combine_number_and_string(apr_int64_t number, + const char *string, + apr_pool_t *pool) +{ + apr_size_t len = strlen(string); + + /* number part requires max. 10x7 bits + 1 space. + * Add another 1 for the terminal 0 */ + char *key_buffer = apr_palloc(pool, len + 12); + const char *key = key_buffer; + + /* Prepend the number to the string and separate them by space. No other + * number can result in the same prefix, no other string in the same + * postfix nor can the boundary between them be ambiguous. */ + key_buffer = encode_number(number, key_buffer); + *++key_buffer = ' '; + memcpy(++key_buffer, string, len+1); + + /* return the start of the key */ + return key; +} + +/* Utility function to serialize string S in the given serialization CONTEXT. + */ +static void +serialize_svn_string(svn_temp_serializer__context_t *context, + const svn_string_t * const *s) +{ + const svn_string_t *string = *s; + + /* Nothing to do for NULL string references. */ + if (string == NULL) + return; + + svn_temp_serializer__push(context, + (const void * const *)s, + sizeof(*string)); + + /* the "string" content may actually be arbitrary binary data. + * Thus, we cannot use svn_temp_serializer__add_string. */ + svn_temp_serializer__push(context, + (const void * const *)&string->data, + string->len + 1); + + /* back to the caller's nesting level */ + svn_temp_serializer__pop(context); + svn_temp_serializer__pop(context); +} + +/* Utility function to deserialize the STRING inside the BUFFER. + */ +static void +deserialize_svn_string(void *buffer, svn_string_t **string) +{ + svn_temp_deserializer__resolve(buffer, (void **)string); + if (*string == NULL) + return; + + svn_temp_deserializer__resolve(*string, (void **)&(*string)->data); +} + +/* Utility function to serialize checkum CS within the given serialization + * CONTEXT. + */ +static void +serialize_checksum(svn_temp_serializer__context_t *context, + svn_checksum_t * const *cs) +{ + const svn_checksum_t *checksum = *cs; + if (checksum == NULL) + return; + + svn_temp_serializer__push(context, + (const void * const *)cs, + sizeof(*checksum)); + + /* The digest is arbitrary binary data. + * Thus, we cannot use svn_temp_serializer__add_string. */ + svn_temp_serializer__push(context, + (const void * const *)&checksum->digest, + svn_checksum_size(checksum)); + + /* return to the caller's nesting level */ + svn_temp_serializer__pop(context); + svn_temp_serializer__pop(context); +} + +/* Utility function to deserialize the checksum CS inside the BUFFER. + */ +static void +deserialize_checksum(void *buffer, svn_checksum_t **cs) +{ + svn_temp_deserializer__resolve(buffer, (void **)cs); + if (*cs == NULL) + return; + + svn_temp_deserializer__resolve(*cs, (void **)&(*cs)->digest); +} + +/* Utility function to serialize the REPRESENTATION within the given + * serialization CONTEXT. + */ +static void +serialize_representation(svn_temp_serializer__context_t *context, + representation_t * const *representation) +{ + const representation_t * rep = *representation; + if (rep == NULL) + return; + + /* serialize the representation struct itself */ + svn_temp_serializer__push(context, + (const void * const *)representation, + sizeof(*rep)); + + /* serialize sub-structures */ + serialize_checksum(context, &rep->md5_checksum); + serialize_checksum(context, &rep->sha1_checksum); + + svn_temp_serializer__add_string(context, &rep->txn_id); + svn_temp_serializer__add_string(context, &rep->uniquifier); + + /* return to the caller's nesting level */ + svn_temp_serializer__pop(context); +} + +/* Utility function to deserialize the REPRESENTATIONS inside the BUFFER. + */ +static void +deserialize_representation(void *buffer, + representation_t **representation) +{ + representation_t *rep; + + /* fixup the reference to the representation itself */ + svn_temp_deserializer__resolve(buffer, (void **)representation); + rep = *representation; + if (rep == NULL) + return; + + /* fixup of sub-structures */ + deserialize_checksum(rep, &rep->md5_checksum); + deserialize_checksum(rep, &rep->sha1_checksum); + + svn_temp_deserializer__resolve(rep, (void **)&rep->txn_id); + svn_temp_deserializer__resolve(rep, (void **)&rep->uniquifier); +} + +/* auxilliary structure representing the content of a directory hash */ +typedef struct hash_data_t +{ + /* number of entries in the directory */ + apr_size_t count; + + /* number of unused dir entry buckets in the index */ + apr_size_t over_provision; + + /* internal modifying operations counter + * (used to repack data once in a while) */ + apr_size_t operations; + + /* size of the serialization buffer actually used. + * (we will allocate more than we actually need such that we may + * append more data in situ later) */ + apr_size_t len; + + /* reference to the entries */ + svn_fs_dirent_t **entries; + + /* size of the serialized entries and don't be too wasteful + * (needed since the entries are no longer in sequence) */ + apr_uint32_t *lengths; +} hash_data_t; + +static int +compare_dirent_id_names(const void *lhs, const void *rhs) +{ + return strcmp((*(const svn_fs_dirent_t *const *)lhs)->name, + (*(const svn_fs_dirent_t *const *)rhs)->name); +} + +/* Utility function to serialize the *ENTRY_P into a the given + * serialization CONTEXT. Return the serialized size of the + * dir entry in *LENGTH. + */ +static void +serialize_dir_entry(svn_temp_serializer__context_t *context, + svn_fs_dirent_t **entry_p, + apr_uint32_t *length) +{ + svn_fs_dirent_t *entry = *entry_p; + apr_size_t initial_length = svn_temp_serializer__get_length(context); + + svn_temp_serializer__push(context, + (const void * const *)entry_p, + sizeof(svn_fs_dirent_t)); + + svn_fs_fs__id_serialize(context, &entry->id); + svn_temp_serializer__add_string(context, &entry->name); + + *length = (apr_uint32_t)( svn_temp_serializer__get_length(context) + - APR_ALIGN_DEFAULT(initial_length)); + + svn_temp_serializer__pop(context); +} + +/* Utility function to serialize the ENTRIES into a new serialization + * context to be returned. Allocation will be made form POOL. + */ +static svn_temp_serializer__context_t * +serialize_dir(apr_hash_t *entries, apr_pool_t *pool) +{ + hash_data_t hash_data; + apr_hash_index_t *hi; + apr_size_t i = 0; + svn_temp_serializer__context_t *context; + + /* calculate sizes */ + apr_size_t count = apr_hash_count(entries); + apr_size_t over_provision = 2 + count / 4; + apr_size_t entries_len = (count + over_provision) * sizeof(svn_fs_dirent_t*); + apr_size_t lengths_len = (count + over_provision) * sizeof(apr_uint32_t); + + /* copy the hash entries to an auxilliary struct of known layout */ + hash_data.count = count; + hash_data.over_provision = over_provision; + hash_data.operations = 0; + hash_data.entries = apr_palloc(pool, entries_len); + hash_data.lengths = apr_palloc(pool, lengths_len); + + for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi), ++i) + hash_data.entries[i] = svn__apr_hash_index_val(hi); + + /* sort entry index by ID name */ + qsort(hash_data.entries, + count, + sizeof(*hash_data.entries), + compare_dirent_id_names); + + /* Serialize that aux. structure into a new one. Also, provide a good + * estimate for the size of the buffer that we will need. */ + context = svn_temp_serializer__init(&hash_data, + sizeof(hash_data), + 50 + count * 200 + entries_len, + pool); + + /* serialize entries references */ + svn_temp_serializer__push(context, + (const void * const *)&hash_data.entries, + entries_len); + + /* serialize the individual entries and their sub-structures */ + for (i = 0; i < count; ++i) + serialize_dir_entry(context, + &hash_data.entries[i], + &hash_data.lengths[i]); + + svn_temp_serializer__pop(context); + + /* serialize entries references */ + svn_temp_serializer__push(context, + (const void * const *)&hash_data.lengths, + lengths_len); + + return context; +} + +/* Utility function to reconstruct a dir entries hash from serialized data + * in BUFFER and HASH_DATA. Allocation will be made form POOL. + */ +static apr_hash_t * +deserialize_dir(void *buffer, hash_data_t *hash_data, apr_pool_t *pool) +{ + apr_hash_t *result = svn_hash__make(pool); + apr_size_t i; + apr_size_t count; + svn_fs_dirent_t *entry; + svn_fs_dirent_t **entries; + + /* resolve the reference to the entries array */ + svn_temp_deserializer__resolve(buffer, (void **)&hash_data->entries); + entries = hash_data->entries; + + /* fixup the references within each entry and add it to the hash */ + for (i = 0, count = hash_data->count; i < count; ++i) + { + svn_temp_deserializer__resolve(entries, (void **)&entries[i]); + entry = hash_data->entries[i]; + + /* pointer fixup */ + svn_temp_deserializer__resolve(entry, (void **)&entry->name); + svn_fs_fs__id_deserialize(entry, (svn_fs_id_t **)&entry->id); + + /* add the entry to the hash */ + svn_hash_sets(result, entry->name, entry); + } + + /* return the now complete hash */ + return result; +} + +void +svn_fs_fs__noderev_serialize(svn_temp_serializer__context_t *context, + node_revision_t * const *noderev_p) +{ + const node_revision_t *noderev = *noderev_p; + if (noderev == NULL) + return; + + /* serialize the representation struct itself */ + svn_temp_serializer__push(context, + (const void * const *)noderev_p, + sizeof(*noderev)); + + /* serialize sub-structures */ + svn_fs_fs__id_serialize(context, &noderev->id); + svn_fs_fs__id_serialize(context, &noderev->predecessor_id); + serialize_representation(context, &noderev->prop_rep); + serialize_representation(context, &noderev->data_rep); + + svn_temp_serializer__add_string(context, &noderev->copyfrom_path); + svn_temp_serializer__add_string(context, &noderev->copyroot_path); + svn_temp_serializer__add_string(context, &noderev->created_path); + + /* return to the caller's nesting level */ + svn_temp_serializer__pop(context); +} + + +void +svn_fs_fs__noderev_deserialize(void *buffer, + node_revision_t **noderev_p) +{ + node_revision_t *noderev; + + /* fixup the reference to the representation itself, + * if this is part of a parent structure. */ + if (buffer != *noderev_p) + svn_temp_deserializer__resolve(buffer, (void **)noderev_p); + + noderev = *noderev_p; + if (noderev == NULL) + return; + + /* fixup of sub-structures */ + svn_fs_fs__id_deserialize(noderev, (svn_fs_id_t **)&noderev->id); + svn_fs_fs__id_deserialize(noderev, (svn_fs_id_t **)&noderev->predecessor_id); + deserialize_representation(noderev, &noderev->prop_rep); + deserialize_representation(noderev, &noderev->data_rep); + + svn_temp_deserializer__resolve(noderev, (void **)&noderev->copyfrom_path); + svn_temp_deserializer__resolve(noderev, (void **)&noderev->copyroot_path); + svn_temp_deserializer__resolve(noderev, (void **)&noderev->created_path); +} + + +/* Utility function to serialize COUNT svn_txdelta_op_t objects + * at OPS in the given serialization CONTEXT. + */ +static void +serialize_txdelta_ops(svn_temp_serializer__context_t *context, + const svn_txdelta_op_t * const * ops, + apr_size_t count) +{ + if (*ops == NULL) + return; + + /* the ops form a contiguous chunk of memory with no further references */ + svn_temp_serializer__push(context, + (const void * const *)ops, + count * sizeof(svn_txdelta_op_t)); + svn_temp_serializer__pop(context); +} + +/* Utility function to serialize W in the given serialization CONTEXT. + */ +static void +serialize_txdeltawindow(svn_temp_serializer__context_t *context, + svn_txdelta_window_t * const * w) +{ + svn_txdelta_window_t *window = *w; + + /* serialize the window struct itself */ + svn_temp_serializer__push(context, + (const void * const *)w, + sizeof(svn_txdelta_window_t)); + + /* serialize its sub-structures */ + serialize_txdelta_ops(context, &window->ops, window->num_ops); + serialize_svn_string(context, &window->new_data); + + svn_temp_serializer__pop(context); +} + +svn_error_t * +svn_fs_fs__serialize_txdelta_window(void **buffer, + apr_size_t *buffer_size, + void *item, + apr_pool_t *pool) +{ + svn_fs_fs__txdelta_cached_window_t *window_info = item; + svn_stringbuf_t *serialized; + + /* initialize the serialization process and allocate a buffer large + * enough to do without the need of re-allocations in most cases. */ + apr_size_t text_len = window_info->window->new_data + ? window_info->window->new_data->len + : 0; + svn_temp_serializer__context_t *context = + svn_temp_serializer__init(window_info, + sizeof(*window_info), + 500 + text_len, + pool); + + /* serialize the sub-structure(s) */ + serialize_txdeltawindow(context, &window_info->window); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *buffer = serialized->data; + *buffer_size = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_txdelta_window(void **item, + void *buffer, + apr_size_t buffer_size, + apr_pool_t *pool) +{ + svn_txdelta_window_t *window; + + /* Copy the _full_ buffer as it also contains the sub-structures. */ + svn_fs_fs__txdelta_cached_window_t *window_info = + (svn_fs_fs__txdelta_cached_window_t *)buffer; + + /* pointer reference fixup */ + svn_temp_deserializer__resolve(window_info, + (void **)&window_info->window); + window = window_info->window; + + svn_temp_deserializer__resolve(window, (void **)&window->ops); + + deserialize_svn_string(window, (svn_string_t**)&window->new_data); + + /* done */ + *item = window_info; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__serialize_manifest(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + apr_array_header_t *manifest = in; + + *data_len = sizeof(apr_off_t) *manifest->nelts; + *data = apr_palloc(pool, *data_len); + memcpy(*data, manifest->elts, *data_len); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_manifest(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + apr_array_header_t *manifest = apr_array_make(pool, 1, sizeof(apr_off_t)); + + manifest->nelts = (int) (data_len / sizeof(apr_off_t)); + manifest->nalloc = (int) (data_len / sizeof(apr_off_t)); + manifest->elts = (char*)data; + + *out = manifest; + + return SVN_NO_ERROR; +} + +/* Auxilliary structure representing the content of a properties hash. + This structure is much easier to (de-)serialize than an apr_hash. + */ +typedef struct properties_data_t +{ + /* number of entries in the hash */ + apr_size_t count; + + /* reference to the keys */ + const char **keys; + + /* reference to the values */ + const svn_string_t **values; +} properties_data_t; + +/* Serialize COUNT C-style strings from *STRINGS into CONTEXT. */ +static void +serialize_cstring_array(svn_temp_serializer__context_t *context, + const char ***strings, + apr_size_t count) +{ + apr_size_t i; + const char **entries = *strings; + + /* serialize COUNT entries pointers (the array) */ + svn_temp_serializer__push(context, + (const void * const *)strings, + count * sizeof(const char*)); + + /* serialize array elements */ + for (i = 0; i < count; ++i) + svn_temp_serializer__add_string(context, &entries[i]); + + svn_temp_serializer__pop(context); +} + +/* Serialize COUNT svn_string_t* items from *STRINGS into CONTEXT. */ +static void +serialize_svn_string_array(svn_temp_serializer__context_t *context, + const svn_string_t ***strings, + apr_size_t count) +{ + apr_size_t i; + const svn_string_t **entries = *strings; + + /* serialize COUNT entries pointers (the array) */ + svn_temp_serializer__push(context, + (const void * const *)strings, + count * sizeof(const char*)); + + /* serialize array elements */ + for (i = 0; i < count; ++i) + serialize_svn_string(context, &entries[i]); + + svn_temp_serializer__pop(context); +} + +svn_error_t * +svn_fs_fs__serialize_properties(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + apr_hash_t *hash = in; + properties_data_t properties; + svn_temp_serializer__context_t *context; + apr_hash_index_t *hi; + svn_stringbuf_t *serialized; + apr_size_t i; + + /* create our auxilliary data structure */ + properties.count = apr_hash_count(hash); + properties.keys = apr_palloc(pool, sizeof(const char*) * (properties.count + 1)); + properties.values = apr_palloc(pool, sizeof(const char*) * properties.count); + + /* populate it with the hash entries */ + for (hi = apr_hash_first(pool, hash), i=0; hi; hi = apr_hash_next(hi), ++i) + { + properties.keys[i] = svn__apr_hash_index_key(hi); + properties.values[i] = svn__apr_hash_index_val(hi); + } + + /* serialize it */ + context = svn_temp_serializer__init(&properties, + sizeof(properties), + properties.count * 100, + pool); + + properties.keys[i] = ""; + serialize_cstring_array(context, &properties.keys, properties.count + 1); + serialize_svn_string_array(context, &properties.values, properties.count); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_properties(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + apr_hash_t *hash = svn_hash__make(pool); + properties_data_t *properties = (properties_data_t *)data; + size_t i; + + /* de-serialize our auxilliary data structure */ + svn_temp_deserializer__resolve(properties, (void**)&properties->keys); + svn_temp_deserializer__resolve(properties, (void**)&properties->values); + + /* de-serialize each entry and put it into the hash */ + for (i = 0; i < properties->count; ++i) + { + apr_size_t len = properties->keys[i+1] - properties->keys[i] - 1; + svn_temp_deserializer__resolve((void*)properties->keys, + (void**)&properties->keys[i]); + + deserialize_svn_string((void*)properties->values, + (svn_string_t **)&properties->values[i]); + + apr_hash_set(hash, + properties->keys[i], len, + properties->values[i]); + } + + /* done */ + *out = hash; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__serialize_id(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + const svn_fs_id_t *id = in; + svn_stringbuf_t *serialized; + + /* create an (empty) serialization context with plenty of buffer space */ + svn_temp_serializer__context_t *context = + svn_temp_serializer__init(NULL, 0, 250, pool); + + /* serialize the id */ + svn_fs_fs__id_serialize(context, &id); + + /* return serialized data */ + serialized = svn_temp_serializer__get(context); + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_id(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + /* Copy the _full_ buffer as it also contains the sub-structures. */ + svn_fs_id_t *id = (svn_fs_id_t *)data; + + /* fixup of all pointers etc. */ + svn_fs_fs__id_deserialize(id, &id); + + /* done */ + *out = id; + return SVN_NO_ERROR; +} + +/** Caching node_revision_t objects. **/ + +svn_error_t * +svn_fs_fs__serialize_node_revision(void **buffer, + apr_size_t *buffer_size, + void *item, + apr_pool_t *pool) +{ + svn_stringbuf_t *serialized; + node_revision_t *noderev = item; + + /* create an (empty) serialization context with plenty of (initial) + * buffer space. */ + svn_temp_serializer__context_t *context = + svn_temp_serializer__init(NULL, 0, + 1024 - SVN_TEMP_SERIALIZER__OVERHEAD, + pool); + + /* serialize the noderev */ + svn_fs_fs__noderev_serialize(context, &noderev); + + /* return serialized data */ + serialized = svn_temp_serializer__get(context); + *buffer = serialized->data; + *buffer_size = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_node_revision(void **item, + void *buffer, + apr_size_t buffer_size, + apr_pool_t *pool) +{ + /* Copy the _full_ buffer as it also contains the sub-structures. */ + node_revision_t *noderev = (node_revision_t *)buffer; + + /* fixup of all pointers etc. */ + svn_fs_fs__noderev_deserialize(noderev, &noderev); + + /* done */ + *item = noderev; + return SVN_NO_ERROR; +} + +/* Utility function that returns the directory serialized inside CONTEXT + * to DATA and DATA_LEN. */ +static svn_error_t * +return_serialized_dir_context(svn_temp_serializer__context_t *context, + void **data, + apr_size_t *data_len) +{ + svn_stringbuf_t *serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->blocksize; + ((hash_data_t *)serialized->data)->len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__serialize_dir_entries(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + apr_hash_t *dir = in; + + /* serialize the dir content into a new serialization context + * and return the serialized data */ + return return_serialized_dir_context(serialize_dir(dir, pool), + data, + data_len); +} + +svn_error_t * +svn_fs_fs__deserialize_dir_entries(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + /* Copy the _full_ buffer as it also contains the sub-structures. */ + hash_data_t *hash_data = (hash_data_t *)data; + + /* reconstruct the hash from the serialized data */ + *out = deserialize_dir(hash_data, hash_data, pool); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__get_sharded_offset(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *pool) +{ + const apr_off_t *manifest = data; + apr_int64_t shard_pos = *(apr_int64_t *)baton; + + *(apr_off_t *)out = manifest[shard_pos]; + + return SVN_NO_ERROR; +} + +/* Utility function that returns the lowest index of the first entry in + * *ENTRIES that points to a dir entry with a name equal or larger than NAME. + * If an exact match has been found, *FOUND will be set to TRUE. COUNT is + * the number of valid entries in ENTRIES. + */ +static apr_size_t +find_entry(svn_fs_dirent_t **entries, + const char *name, + apr_size_t count, + svn_boolean_t *found) +{ + /* binary search for the desired entry by name */ + apr_size_t lower = 0; + apr_size_t upper = count; + apr_size_t middle; + + for (middle = upper / 2; lower < upper; middle = (upper + lower) / 2) + { + const svn_fs_dirent_t *entry = + svn_temp_deserializer__ptr(entries, (const void *const *)&entries[middle]); + const char* entry_name = + svn_temp_deserializer__ptr(entry, (const void *const *)&entry->name); + + int diff = strcmp(entry_name, name); + if (diff < 0) + lower = middle + 1; + else + upper = middle; + } + + /* check whether we actually found a match */ + *found = FALSE; + if (lower < count) + { + const svn_fs_dirent_t *entry = + svn_temp_deserializer__ptr(entries, (const void *const *)&entries[lower]); + const char* entry_name = + svn_temp_deserializer__ptr(entry, (const void *const *)&entry->name); + + if (strcmp(entry_name, name) == 0) + *found = TRUE; + } + + return lower; +} + +svn_error_t * +svn_fs_fs__extract_dir_entry(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *pool) +{ + const hash_data_t *hash_data = data; + const char* name = baton; + svn_boolean_t found; + + /* resolve the reference to the entries array */ + const svn_fs_dirent_t * const *entries = + svn_temp_deserializer__ptr(data, (const void *const *)&hash_data->entries); + + /* resolve the reference to the lengths array */ + const apr_uint32_t *lengths = + svn_temp_deserializer__ptr(data, (const void *const *)&hash_data->lengths); + + /* binary search for the desired entry by name */ + apr_size_t pos = find_entry((svn_fs_dirent_t **)entries, + name, + hash_data->count, + &found); + + /* de-serialize that entry or return NULL, if no match has been found */ + *out = NULL; + if (found) + { + const svn_fs_dirent_t *source = + svn_temp_deserializer__ptr(entries, (const void *const *)&entries[pos]); + + /* Entries have been serialized one-by-one, each time including all + * nested structures and strings. Therefore, they occupy a single + * block of memory whose end-offset is either the beginning of the + * next entry or the end of the buffer + */ + apr_size_t size = lengths[pos]; + + /* copy & deserialize the entry */ + svn_fs_dirent_t *new_entry = apr_palloc(pool, size); + memcpy(new_entry, source, size); + + svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->name); + svn_fs_fs__id_deserialize(new_entry, (svn_fs_id_t **)&new_entry->id); + *(svn_fs_dirent_t **)out = new_entry; + } + + return SVN_NO_ERROR; +} + +/* Utility function for svn_fs_fs__replace_dir_entry that implements the + * modification as a simply deserialize / modify / serialize sequence. + */ +static svn_error_t * +slowly_replace_dir_entry(void **data, + apr_size_t *data_len, + void *baton, + apr_pool_t *pool) +{ + replace_baton_t *replace_baton = (replace_baton_t *)baton; + hash_data_t *hash_data = (hash_data_t *)*data; + apr_hash_t *dir; + + SVN_ERR(svn_fs_fs__deserialize_dir_entries((void **)&dir, + *data, + hash_data->len, + pool)); + svn_hash_sets(dir, replace_baton->name, replace_baton->new_entry); + + return svn_fs_fs__serialize_dir_entries(data, data_len, dir, pool); +} + +svn_error_t * +svn_fs_fs__replace_dir_entry(void **data, + apr_size_t *data_len, + void *baton, + apr_pool_t *pool) +{ + replace_baton_t *replace_baton = (replace_baton_t *)baton; + hash_data_t *hash_data = (hash_data_t *)*data; + svn_boolean_t found; + svn_fs_dirent_t **entries; + apr_uint32_t *lengths; + apr_uint32_t length; + apr_size_t pos; + + svn_temp_serializer__context_t *context; + + /* after quite a number of operations, let's re-pack everything. + * This is to limit the number of vasted space as we cannot overwrite + * existing data but must always append. */ + if (hash_data->operations > 2 + hash_data->count / 4) + return slowly_replace_dir_entry(data, data_len, baton, pool); + + /* resolve the reference to the entries array */ + entries = (svn_fs_dirent_t **) + svn_temp_deserializer__ptr((const char *)hash_data, + (const void *const *)&hash_data->entries); + + /* resolve the reference to the lengths array */ + lengths = (apr_uint32_t *) + svn_temp_deserializer__ptr((const char *)hash_data, + (const void *const *)&hash_data->lengths); + + /* binary search for the desired entry by name */ + pos = find_entry(entries, replace_baton->name, hash_data->count, &found); + + /* handle entry removal (if found at all) */ + if (replace_baton->new_entry == NULL) + { + if (found) + { + /* remove reference to the entry from the index */ + memmove(&entries[pos], + &entries[pos + 1], + sizeof(entries[pos]) * (hash_data->count - pos)); + memmove(&lengths[pos], + &lengths[pos + 1], + sizeof(lengths[pos]) * (hash_data->count - pos)); + + hash_data->count--; + hash_data->over_provision++; + hash_data->operations++; + } + + return SVN_NO_ERROR; + } + + /* if not found, prepare to insert the new entry */ + if (!found) + { + /* fallback to slow operation if there is no place left to insert an + * new entry to index. That will automatically give add some spare + * entries ("overprovision"). */ + if (hash_data->over_provision == 0) + return slowly_replace_dir_entry(data, data_len, baton, pool); + + /* make entries[index] available for pointing to the new entry */ + memmove(&entries[pos + 1], + &entries[pos], + sizeof(entries[pos]) * (hash_data->count - pos)); + memmove(&lengths[pos + 1], + &lengths[pos], + sizeof(lengths[pos]) * (hash_data->count - pos)); + + hash_data->count++; + hash_data->over_provision--; + hash_data->operations++; + } + + /* de-serialize the new entry */ + entries[pos] = replace_baton->new_entry; + context = svn_temp_serializer__init_append(hash_data, + entries, + hash_data->len, + *data_len, + pool); + serialize_dir_entry(context, &entries[pos], &length); + + /* return the updated serialized data */ + SVN_ERR (return_serialized_dir_context(context, + data, + data_len)); + + /* since the previous call may have re-allocated the buffer, the lengths + * pointer may no longer point to the entry in that buffer. Therefore, + * re-map it again and store the length value after that. */ + + hash_data = (hash_data_t *)*data; + lengths = (apr_uint32_t *) + svn_temp_deserializer__ptr((const char *)hash_data, + (const void *const *)&hash_data->lengths); + lengths[pos] = length; + + return SVN_NO_ERROR; +} + +/* Utility function to serialize change CHANGE_P in the given serialization + * CONTEXT. + */ +static void +serialize_change(svn_temp_serializer__context_t *context, + change_t * const *change_p) +{ + const change_t * change = *change_p; + if (change == NULL) + return; + + /* serialize the change struct itself */ + svn_temp_serializer__push(context, + (const void * const *)change_p, + sizeof(*change)); + + /* serialize sub-structures */ + svn_fs_fs__id_serialize(context, &change->noderev_id); + + svn_temp_serializer__add_string(context, &change->path); + svn_temp_serializer__add_string(context, &change->copyfrom_path); + + /* return to the caller's nesting level */ + svn_temp_serializer__pop(context); +} + +/* Utility function to serialize the CHANGE_P within the given + * serialization CONTEXT. + */ +static void +deserialize_change(void *buffer, change_t **change_p) +{ + change_t * change; + + /* fix-up of the pointer to the struct in question */ + svn_temp_deserializer__resolve(buffer, (void **)change_p); + + change = *change_p; + if (change == NULL) + return; + + /* fix-up of sub-structures */ + svn_fs_fs__id_deserialize(change, (svn_fs_id_t **)&change->noderev_id); + + svn_temp_deserializer__resolve(change, (void **)&change->path); + svn_temp_deserializer__resolve(change, (void **)&change->copyfrom_path); +} + +/* Auxiliary structure representing the content of a change_t array. + This structure is much easier to (de-)serialize than an APR array. + */ +typedef struct changes_data_t +{ + /* number of entries in the array */ + int count; + + /* reference to the changes */ + change_t **changes; +} changes_data_t; + +svn_error_t * +svn_fs_fs__serialize_changes(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + apr_array_header_t *array = in; + changes_data_t changes; + svn_temp_serializer__context_t *context; + svn_stringbuf_t *serialized; + int i; + + /* initialize our auxiliary data structure */ + changes.count = array->nelts; + changes.changes = apr_palloc(pool, sizeof(change_t*) * changes.count); + + /* populate it with the array elements */ + for (i = 0; i < changes.count; ++i) + changes.changes[i] = APR_ARRAY_IDX(array, i, change_t*); + + /* serialize it and all its elements */ + context = svn_temp_serializer__init(&changes, + sizeof(changes), + changes.count * 100, + pool); + + svn_temp_serializer__push(context, + (const void * const *)&changes.changes, + changes.count * sizeof(change_t*)); + + for (i = 0; i < changes.count; ++i) + serialize_change(context, &changes.changes[i]); + + svn_temp_serializer__pop(context); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_changes(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + int i; + changes_data_t *changes = (changes_data_t *)data; + apr_array_header_t *array = apr_array_make(pool, changes->count, + sizeof(change_t *)); + + /* de-serialize our auxiliary data structure */ + svn_temp_deserializer__resolve(changes, (void**)&changes->changes); + + /* de-serialize each entry and add it to the array */ + for (i = 0; i < changes->count; ++i) + { + deserialize_change((void*)changes->changes, + (change_t **)&changes->changes[i]); + APR_ARRAY_PUSH(array, change_t *) = changes->changes[i]; + } + + /* done */ + *out = array; + + return SVN_NO_ERROR; +} + +/* Auxiliary structure representing the content of a svn_mergeinfo_t hash. + This structure is much easier to (de-)serialize than an APR array. + */ +typedef struct mergeinfo_data_t +{ + /* number of paths in the hash */ + unsigned count; + + /* COUNT keys (paths) */ + const char **keys; + + /* COUNT keys lengths (strlen of path) */ + apr_ssize_t *key_lengths; + + /* COUNT entries, each giving the number of ranges for the key */ + int *range_counts; + + /* all ranges in a single, concatenated buffer */ + svn_merge_range_t *ranges; +} mergeinfo_data_t; + +svn_error_t * +svn_fs_fs__serialize_mergeinfo(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + svn_mergeinfo_t mergeinfo = in; + mergeinfo_data_t merges; + svn_temp_serializer__context_t *context; + svn_stringbuf_t *serialized; + apr_hash_index_t *hi; + unsigned i; + int k; + apr_size_t range_count; + + /* initialize our auxiliary data structure */ + merges.count = apr_hash_count(mergeinfo); + merges.keys = apr_palloc(pool, sizeof(*merges.keys) * merges.count); + merges.key_lengths = apr_palloc(pool, sizeof(*merges.key_lengths) * + merges.count); + merges.range_counts = apr_palloc(pool, sizeof(*merges.range_counts) * + merges.count); + + i = 0; + range_count = 0; + for (hi = apr_hash_first(pool, mergeinfo); hi; hi = apr_hash_next(hi), ++i) + { + svn_rangelist_t *ranges; + apr_hash_this(hi, (const void**)&merges.keys[i], + &merges.key_lengths[i], + (void **)&ranges); + merges.range_counts[i] = ranges->nelts; + range_count += ranges->nelts; + } + + merges.ranges = apr_palloc(pool, sizeof(*merges.ranges) * range_count); + + i = 0; + for (hi = apr_hash_first(pool, mergeinfo); hi; hi = apr_hash_next(hi)) + { + svn_rangelist_t *ranges = svn__apr_hash_index_val(hi); + for (k = 0; k < ranges->nelts; ++k, ++i) + merges.ranges[i] = *APR_ARRAY_IDX(ranges, k, svn_merge_range_t*); + } + + /* serialize it and all its elements */ + context = svn_temp_serializer__init(&merges, + sizeof(merges), + range_count * 30, + pool); + + /* keys array */ + svn_temp_serializer__push(context, + (const void * const *)&merges.keys, + merges.count * sizeof(*merges.keys)); + + for (i = 0; i < merges.count; ++i) + svn_temp_serializer__add_string(context, &merges.keys[i]); + + svn_temp_serializer__pop(context); + + /* key lengths array */ + svn_temp_serializer__push(context, + (const void * const *)&merges.key_lengths, + merges.count * sizeof(*merges.key_lengths)); + svn_temp_serializer__pop(context); + + /* range counts array */ + svn_temp_serializer__push(context, + (const void * const *)&merges.range_counts, + merges.count * sizeof(*merges.range_counts)); + svn_temp_serializer__pop(context); + + /* ranges */ + svn_temp_serializer__push(context, + (const void * const *)&merges.ranges, + range_count * sizeof(*merges.ranges)); + svn_temp_serializer__pop(context); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_mergeinfo(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + unsigned i; + int k, n; + mergeinfo_data_t *merges = (mergeinfo_data_t *)data; + svn_mergeinfo_t mergeinfo; + + /* de-serialize our auxiliary data structure */ + svn_temp_deserializer__resolve(merges, (void**)&merges->keys); + svn_temp_deserializer__resolve(merges, (void**)&merges->key_lengths); + svn_temp_deserializer__resolve(merges, (void**)&merges->range_counts); + svn_temp_deserializer__resolve(merges, (void**)&merges->ranges); + + /* de-serialize keys and add entries to the result */ + n = 0; + mergeinfo = svn_hash__make(pool); + for (i = 0; i < merges->count; ++i) + { + svn_rangelist_t *ranges = apr_array_make(pool, + merges->range_counts[i], + sizeof(svn_merge_range_t*)); + for (k = 0; k < merges->range_counts[i]; ++k, ++n) + APR_ARRAY_PUSH(ranges, svn_merge_range_t*) = &merges->ranges[n]; + + svn_temp_deserializer__resolve((void*)merges->keys, + (void**)&merges->keys[i]); + apr_hash_set(mergeinfo, merges->keys[i], merges->key_lengths[i], ranges); + } + + /* done */ + *out = mergeinfo; + + return SVN_NO_ERROR; +} + diff --git a/subversion/libsvn_fs_fs/temp_serializer.h b/subversion/libsvn_fs_fs/temp_serializer.h new file mode 100644 index 0000000..1009d63 --- /dev/null +++ b/subversion/libsvn_fs_fs/temp_serializer.h @@ -0,0 +1,266 @@ +/* temp_serializer.h : serialization functions for caching of FSFS structures + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS__TEMP_SERIALIZER_H +#define SVN_LIBSVN_FS__TEMP_SERIALIZER_H + +#include "fs.h" + +/** + * Prepend the @a number to the @a string in a space efficient way such that + * no other (number,string) combination can produce the same result. + * Allocate temporaries as well as the result from @a pool. + */ +const char* +svn_fs_fs__combine_number_and_string(apr_int64_t number, + const char *string, + apr_pool_t *pool); + +/** + * Serialize a @a noderev_p within the serialization @a context. + */ +void +svn_fs_fs__noderev_serialize(struct svn_temp_serializer__context_t *context, + node_revision_t * const *noderev_p); + +/** + * Deserialize a @a noderev_p within the @a buffer. + */ +void +svn_fs_fs__noderev_deserialize(void *buffer, + node_revision_t **noderev_p); + +/** + * #svn_txdelta_window_t is not sufficient for caching the data it + * represents because data read process needs auxilliary information. + */ +typedef struct +{ + /* the txdelta window information cached / to be cached */ + svn_txdelta_window_t *window; + + /* the revision file read pointer position right after reading the window */ + apr_off_t end_offset; +} svn_fs_fs__txdelta_cached_window_t; + +/** + * Implements #svn_cache__serialize_func_t for + * #svn_fs_fs__txdelta_cached_window_t. + */ +svn_error_t * +svn_fs_fs__serialize_txdelta_window(void **buffer, + apr_size_t *buffer_size, + void *item, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for + * #svn_fs_fs__txdelta_cached_window_t. + */ +svn_error_t * +svn_fs_fs__deserialize_txdelta_window(void **item, + void *buffer, + apr_size_t buffer_size, + apr_pool_t *pool); + +/** + * Implements #svn_cache__serialize_func_t for a manifest + * (@a in is an #apr_array_header_t of apr_off_t elements). + */ +svn_error_t * +svn_fs_fs__serialize_manifest(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for a manifest + * (@a *out is an #apr_array_header_t of apr_off_t elements). + */ +svn_error_t * +svn_fs_fs__deserialize_manifest(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/** + * Implements #svn_cache__serialize_func_t for a properties hash + * (@a in is an #apr_hash_t of svn_string_t elements, keyed by const char*). + */ +svn_error_t * +svn_fs_fs__serialize_properties(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for a properties hash + * (@a *out is an #apr_hash_t of svn_string_t elements, keyed by const char*). + */ +svn_error_t * +svn_fs_fs__deserialize_properties(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/** + * Implements #svn_cache__serialize_func_t for #svn_fs_id_t + */ +svn_error_t * +svn_fs_fs__serialize_id(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for #svn_fs_id_t + */ +svn_error_t * +svn_fs_fs__deserialize_id(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/** + * Implements #svn_cache__serialize_func_t for #node_revision_t + */ +svn_error_t * +svn_fs_fs__serialize_node_revision(void **buffer, + apr_size_t *buffer_size, + void *item, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for #node_revision_t + */ +svn_error_t * +svn_fs_fs__deserialize_node_revision(void **item, + void *buffer, + apr_size_t buffer_size, + apr_pool_t *pool); + +/** + * Implements #svn_cache__serialize_func_t for a directory contents hash + */ +svn_error_t * +svn_fs_fs__serialize_dir_entries(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for a directory contents hash + */ +svn_error_t * +svn_fs_fs__deserialize_dir_entries(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/** + * Implements #svn_cache__partial_getter_func_t. Set (apr_off_t) @a *out + * to the element indexed by (apr_int64_t) @a *baton within the + * serialized manifest array @a data and @a data_len. */ +svn_error_t * +svn_fs_fs__get_sharded_offset(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *pool); + +/** + * Implements #svn_cache__partial_getter_func_t for a single + * #svn_fs_dirent_t within a serialized directory contents hash, + * identified by its name (const char @a *baton). + */ +svn_error_t * +svn_fs_fs__extract_dir_entry(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *pool); + +/** + * Describes the change to be done to a directory: Set the entry + * identify by @a name to the value @a new_entry. If the latter is + * @c NULL, the entry shall be removed if it exists. Otherwise it + * will be replaced or automatically added, respectively. + */ +typedef struct replace_baton_t +{ + /** name of the directory entry to modify */ + const char *name; + + /** directory entry to insert instead */ + svn_fs_dirent_t *new_entry; +} replace_baton_t; + +/** + * Implements #svn_cache__partial_setter_func_t for a single + * #svn_fs_dirent_t within a serialized directory contents hash, + * identified by its name in the #replace_baton_t in @a baton. + */ +svn_error_t * +svn_fs_fs__replace_dir_entry(void **data, + apr_size_t *data_len, + void *baton, + apr_pool_t *pool); + +/** + * Implements #svn_cache__serialize_func_t for an #apr_array_header_t of + * #change_t *. + */ +svn_error_t * +svn_fs_fs__serialize_changes(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for an #apr_array_header_t of + * #change_t *. + */ +svn_error_t * +svn_fs_fs__deserialize_changes(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/** + * Implements #svn_cache__serialize_func_t for #svn_mergeinfo_t objects. + */ +svn_error_t * +svn_fs_fs__serialize_mergeinfo(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/** + * Implements #svn_cache__deserialize_func_t for #svn_mergeinfo_t objects. + */ +svn_error_t * +svn_fs_fs__deserialize_mergeinfo(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +#endif diff --git a/subversion/libsvn_fs_fs/tree.c b/subversion/libsvn_fs_fs/tree.c new file mode 100644 index 0000000..c14955d --- /dev/null +++ b/subversion/libsvn_fs_fs/tree.c @@ -0,0 +1,4420 @@ +/* tree.c : tree-like filesystem, built on DAG filesystem + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + +/* The job of this layer is to take a filesystem with lots of node + sharing going on --- the real DAG filesystem as it appears in the + database --- and make it look and act like an ordinary tree + filesystem, with no sharing. + + We do just-in-time cloning: you can walk from some unfinished + transaction's root down into directories and files shared with + committed revisions; as soon as you try to change something, the + appropriate nodes get cloned (and parent directory entries updated) + invisibly, behind your back. Any other references you have to + nodes that have been cloned by other changes, even made by other + processes, are automatically updated to point to the right clones. */ + + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <apr_pools.h> +#include <apr_hash.h> + +#include "svn_hash.h" +#include "svn_private_config.h" +#include "svn_pools.h" +#include "svn_error.h" +#include "svn_path.h" +#include "svn_mergeinfo.h" +#include "svn_fs.h" +#include "svn_props.h" + +#include "fs.h" +#include "key-gen.h" +#include "dag.h" +#include "lock.h" +#include "tree.h" +#include "fs_fs.h" +#include "id.h" +#include "temp_serializer.h" + +#include "private/svn_mergeinfo_private.h" +#include "private/svn_subr_private.h" +#include "private/svn_fs_util.h" +#include "private/svn_fspath.h" +#include "../libsvn_fs/fs-loader.h" + + +/* ### I believe this constant will become internal to reps-strings.c. + ### see the comment in window_consumer() for more information. */ + +/* ### the comment also seems to need tweaking: the log file stuff + ### is no longer an issue... */ +/* Data written to the filesystem through the svn_fs_apply_textdelta() + interface is cached in memory until the end of the data stream, or + until a size trigger is hit. Define that trigger here (in bytes). + Setting the value to 0 will result in no filesystem buffering at + all. The value only really matters when dealing with file contents + bigger than the value itself. Above that point, large values here + allow the filesystem to buffer more data in memory before flushing + to the database, which increases memory usage but greatly decreases + the amount of disk access (and log-file generation) in database. + Smaller values will limit your overall memory consumption, but can + drastically hurt throughput by necessitating more write operations + to the database (which also generates more log-files). */ +#define WRITE_BUFFER_SIZE 512000 + + + +/* The root structures. + + Why do they contain different data? Well, transactions are mutable + enough that it isn't safe to cache the DAG node for the root + directory or the hash of copyfrom data: somebody else might modify + them concurrently on disk! (Why is the DAG node cache safer than + the root DAG node? When cloning transaction DAG nodes in and out + of the cache, all of the possibly-mutable data from the + node_revision_t inside the dag_node_t is dropped.) Additionally, + revisions are immutable enough that their DAG node cache can be + kept in the FS object and shared among multiple revision root + objects. +*/ +typedef struct fs_rev_root_data_t +{ + /* A dag node for the revision's root directory. */ + dag_node_t *root_dir; + + /* Cache structure for mapping const char * PATH to const char + *COPYFROM_STRING, so that paths_changed can remember all the + copyfrom information in the changes file. + COPYFROM_STRING has the format "REV PATH", or is the empty string if + the path was added without history. */ + apr_hash_t *copyfrom_cache; + +} fs_rev_root_data_t; + +typedef struct fs_txn_root_data_t +{ + const char *txn_id; + + /* Cache of txn DAG nodes (without their nested noderevs, because + * it's mutable). Same keys/values as ffd->rev_node_cache. */ + svn_cache__t *txn_node_cache; +} fs_txn_root_data_t; + +/* Declared here to resolve the circular dependencies. */ +static svn_error_t * get_dag(dag_node_t **dag_node_p, + svn_fs_root_t *root, + const char *path, + svn_boolean_t needs_lock_cache, + apr_pool_t *pool); + +static svn_fs_root_t *make_revision_root(svn_fs_t *fs, svn_revnum_t rev, + dag_node_t *root_dir, + apr_pool_t *pool); + +static svn_error_t *make_txn_root(svn_fs_root_t **root_p, + svn_fs_t *fs, const char *txn, + svn_revnum_t base_rev, apr_uint32_t flags, + apr_pool_t *pool); + + +/*** Node Caching ***/ + +/* 1st level cache */ + +/* An entry in the first-level cache. REVISION and PATH form the key that + will ultimately be matched. + */ +typedef struct cache_entry_t +{ + /* hash value derived from PATH, REVISION. + Used to short-circuit failed lookups. */ + long int hash_value; + + /* revision to which the NODE belongs */ + svn_revnum_t revision; + + /* path of the NODE */ + char *path; + + /* cached value of strlen(PATH). */ + apr_size_t path_len; + + /* the node allocated in the cache's pool. NULL for empty entries. */ + dag_node_t *node; +} cache_entry_t; + +/* Number of entries in the cache. Keep this low to keep pressure on the + CPU caches low as well. A binary value is most efficient. If we walk + a directory tree, we want enough entries to store nodes for all files + without overwriting the nodes for the parent folder. That way, there + will be no unnecessary misses (except for a few random ones caused by + hash collision). + + The actual number of instances may be higher but entries that got + overwritten are no longer visible. + */ +enum { BUCKET_COUNT = 256 }; + +/* Each pool that has received a DAG node, will hold at least on lock on + our cache to ensure that the node remains valid despite being allocated + in the cache's pool. This is the structure to represent the lock. + */ +typedef struct cache_lock_t +{ + /* pool holding the lock */ + apr_pool_t *pool; + + /* cache being locked */ + fs_fs_dag_cache_t *cache; + + /* next lock. NULL at EOL */ + struct cache_lock_t *next; + + /* previous lock. NULL at list head. Only then this==cache->first_lock */ + struct cache_lock_t *prev; +} cache_lock_t; + +/* The actual cache structure. All nodes will be allocated in POOL. + When the number of INSERTIONS (i.e. objects created form that pool) + exceeds a certain threshold, the pool will be cleared and the cache + with it. + + To ensure that nodes returned from this structure remain valid, the + cache will get locked for the lifetime of the _receiving_ pools (i.e. + those in which we would allocate the node if there was no cache.). + The cache will only be cleared FIRST_LOCK is 0. + */ +struct fs_fs_dag_cache_t +{ + /* fixed number of (possibly empty) cache entries */ + cache_entry_t buckets[BUCKET_COUNT]; + + /* pool used for all node allocation */ + apr_pool_t *pool; + + /* number of entries created from POOL since the last cleanup */ + apr_size_t insertions; + + /* Property lookups etc. have a very high locality (75% re-hit). + Thus, remember the last hit location for optimistic lookup. */ + apr_size_t last_hit; + + /* List of receiving pools that are still alive. */ + cache_lock_t *first_lock; +}; + +/* Cleanup function to be called when a receiving pool gets cleared. + Unlocks the cache once. + */ +static apr_status_t +unlock_cache(void *baton_void) +{ + cache_lock_t *lock = baton_void; + + /* remove lock from chain. Update the head */ + if (lock->next) + lock->next->prev = lock->prev; + if (lock->prev) + lock->prev->next = lock->next; + else + lock->cache->first_lock = lock->next; + + return APR_SUCCESS; +} + +/* Cleanup function to be called when the cache itself gets destroyed. + In that case, we must unregister all unlock requests. + */ +static apr_status_t +unregister_locks(void *baton_void) +{ + fs_fs_dag_cache_t *cache = baton_void; + cache_lock_t *lock; + + for (lock = cache->first_lock; lock; lock = lock->next) + apr_pool_cleanup_kill(lock->pool, + lock, + unlock_cache); + + return APR_SUCCESS; +} + +fs_fs_dag_cache_t* +svn_fs_fs__create_dag_cache(apr_pool_t *pool) +{ + fs_fs_dag_cache_t *result = apr_pcalloc(pool, sizeof(*result)); + result->pool = svn_pool_create(pool); + + apr_pool_cleanup_register(pool, + result, + unregister_locks, + apr_pool_cleanup_null); + + return result; +} + +/* Prevent the entries in CACHE from being destroyed, for as long as the + POOL lives. + */ +static void +lock_cache(fs_fs_dag_cache_t* cache, apr_pool_t *pool) +{ + /* we only need to lock / unlock once per pool. Since we will often ask + for multiple nodes with the same pool, we can reduce the overhead. + However, if e.g. pools are being used in an alternating pattern, + we may lock the cache more than once for the same pool (and register + just as many cleanup actions). + */ + cache_lock_t *lock = cache->first_lock; + + /* try to find an existing lock for POOL. + But limit the time spent on chasing pointers. */ + int limiter = 8; + while (lock && --limiter) + if (lock->pool == pool) + return; + + /* create a new lock and put it at the beginning of the lock chain */ + lock = apr_palloc(pool, sizeof(*lock)); + lock->cache = cache; + lock->pool = pool; + lock->next = cache->first_lock; + lock->prev = NULL; + + if (cache->first_lock) + cache->first_lock->prev = lock; + cache->first_lock = lock; + + /* instruct POOL to remove the look upon cleanup */ + apr_pool_cleanup_register(pool, + lock, + unlock_cache, + apr_pool_cleanup_null); +} + +/* Clears the CACHE at regular intervals (destroying all cached nodes) + */ +static void +auto_clear_dag_cache(fs_fs_dag_cache_t* cache) +{ + if (cache->first_lock == NULL && cache->insertions > BUCKET_COUNT) + { + svn_pool_clear(cache->pool); + + memset(cache->buckets, 0, sizeof(cache->buckets)); + cache->insertions = 0; + } +} + +/* For the given REVISION and PATH, return the respective entry in CACHE. + If the entry is empty, its NODE member will be NULL and the caller + may then set it to the corresponding DAG node allocated in CACHE->POOL. + */ +static cache_entry_t * +cache_lookup( fs_fs_dag_cache_t *cache + , svn_revnum_t revision + , const char *path) +{ + apr_size_t i, bucket_index; + apr_size_t path_len = strlen(path); + long int hash_value = revision; + + /* optimistic lookup: hit the same bucket again? */ + cache_entry_t *result = &cache->buckets[cache->last_hit]; + if ( (result->revision == revision) + && (result->path_len == path_len) + && !memcmp(result->path, path, path_len)) + { + return result; + } + + /* need to do a full lookup. Calculate the hash value + (HASH_VALUE has been initialized to REVISION). */ + for (i = 0; i + 4 <= path_len; i += 4) + hash_value = hash_value * 0xd1f3da69 + *(const apr_uint32_t*)(path + i); + + for (; i < path_len; ++i) + hash_value = hash_value * 33 + path[i]; + + bucket_index = hash_value + (hash_value >> 16); + bucket_index = (bucket_index + (bucket_index >> 8)) % BUCKET_COUNT; + + /* access the corresponding bucket and remember its location */ + result = &cache->buckets[bucket_index]; + cache->last_hit = bucket_index; + + /* if it is *NOT* a match, clear the bucket, expect the caller to fill + in the node and count it as an insertion */ + if ( (result->hash_value != hash_value) + || (result->revision != revision) + || (result->path_len != path_len) + || memcmp(result->path, path, path_len)) + { + result->hash_value = hash_value; + result->revision = revision; + if (result->path_len < path_len) + result->path = apr_palloc(cache->pool, path_len + 1); + result->path_len = path_len; + memcpy(result->path, path, path_len + 1); + + result->node = NULL; + + cache->insertions++; + } + + return result; +} + +/* 2nd level cache */ + +/* Find and return the DAG node cache for ROOT and the key that + should be used for PATH. */ +static void +locate_cache(svn_cache__t **cache, + const char **key, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + if (root->is_txn_root) + { + fs_txn_root_data_t *frd = root->fsap_data; + if (cache) *cache = frd->txn_node_cache; + if (key && path) *key = path; + } + else + { + fs_fs_data_t *ffd = root->fs->fsap_data; + if (cache) *cache = ffd->rev_node_cache; + if (key && path) *key + = svn_fs_fs__combine_number_and_string(root->rev, path, pool); + } +} + +/* Return NODE for PATH from ROOT's node cache, or NULL if the node + isn't cached; read it from the FS. *NODE remains valid until either + POOL or the FS gets cleared or destroyed (whichever comes first). + + Since locking can be expensive and POOL may be long-living, for + nodes that will not need to survive the next call to this function, + set NEEDS_LOCK_CACHE to FALSE. */ +static svn_error_t * +dag_node_cache_get(dag_node_t **node_p, + svn_fs_root_t *root, + const char *path, + svn_boolean_t needs_lock_cache, + apr_pool_t *pool) +{ + svn_boolean_t found; + dag_node_t *node = NULL; + svn_cache__t *cache; + const char *key; + + SVN_ERR_ASSERT(*path == '/'); + + if (!root->is_txn_root) + { + /* immutable DAG node. use the global caches for it */ + + fs_fs_data_t *ffd = root->fs->fsap_data; + cache_entry_t *bucket; + + auto_clear_dag_cache(ffd->dag_node_cache); + bucket = cache_lookup(ffd->dag_node_cache, root->rev, path); + if (bucket->node == NULL) + { + locate_cache(&cache, &key, root, path, pool); + SVN_ERR(svn_cache__get((void **)&node, &found, cache, key, + ffd->dag_node_cache->pool)); + if (found && node) + { + /* Patch up the FS, since this might have come from an old FS + * object. */ + svn_fs_fs__dag_set_fs(node, root->fs); + bucket->node = node; + } + } + else + { + node = bucket->node; + } + + /* if we found a node, make sure it remains valid at least as long + as it would when allocated in POOL. */ + if (node && needs_lock_cache) + lock_cache(ffd->dag_node_cache, pool); + } + else + { + /* DAG is mutable / may become invalid. Use the TXN-local cache */ + + locate_cache(&cache, &key, root, path, pool); + + SVN_ERR(svn_cache__get((void **) &node, &found, cache, key, pool)); + if (found && node) + { + /* Patch up the FS, since this might have come from an old FS + * object. */ + svn_fs_fs__dag_set_fs(node, root->fs); + } + } + + *node_p = node; + + return SVN_NO_ERROR; +} + + +/* Add the NODE for PATH to ROOT's node cache. */ +static svn_error_t * +dag_node_cache_set(svn_fs_root_t *root, + const char *path, + dag_node_t *node, + apr_pool_t *pool) +{ + svn_cache__t *cache; + const char *key; + + SVN_ERR_ASSERT(*path == '/'); + + /* Do *not* attempt to dup and put the node into L1. + * dup() is twice as expensive as an L2 lookup (which will set also L1). + */ + locate_cache(&cache, &key, root, path, pool); + + return svn_cache__set(cache, key, node, pool); +} + + +/* Baton for find_descendents_in_cache. */ +struct fdic_baton { + const char *path; + apr_array_header_t *list; + apr_pool_t *pool; +}; + +/* If the given item is a descendent of BATON->PATH, push + * it onto BATON->LIST (copying into BATON->POOL). Implements + * the svn_iter_apr_hash_cb_t prototype. */ +static svn_error_t * +find_descendents_in_cache(void *baton, + const void *key, + apr_ssize_t klen, + void *val, + apr_pool_t *pool) +{ + struct fdic_baton *b = baton; + const char *item_path = key; + + if (svn_fspath__skip_ancestor(b->path, item_path)) + APR_ARRAY_PUSH(b->list, const char *) = apr_pstrdup(b->pool, item_path); + + return SVN_NO_ERROR; +} + +/* Invalidate cache entries for PATH and any of its children. This + should *only* be called on a transaction root! */ +static svn_error_t * +dag_node_cache_invalidate(svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + struct fdic_baton b; + svn_cache__t *cache; + apr_pool_t *iterpool; + int i; + + b.path = path; + b.pool = svn_pool_create(pool); + b.list = apr_array_make(b.pool, 1, sizeof(const char *)); + + SVN_ERR_ASSERT(root->is_txn_root); + locate_cache(&cache, NULL, root, NULL, b.pool); + + + SVN_ERR(svn_cache__iter(NULL, cache, find_descendents_in_cache, + &b, b.pool)); + + iterpool = svn_pool_create(b.pool); + + for (i = 0; i < b.list->nelts; i++) + { + const char *descendent = APR_ARRAY_IDX(b.list, i, const char *); + svn_pool_clear(iterpool); + SVN_ERR(svn_cache__set(cache, descendent, NULL, iterpool)); + } + + svn_pool_destroy(iterpool); + svn_pool_destroy(b.pool); + return SVN_NO_ERROR; +} + + + +/* Creating transaction and revision root nodes. */ + +svn_error_t * +svn_fs_fs__txn_root(svn_fs_root_t **root_p, + svn_fs_txn_t *txn, + apr_pool_t *pool) +{ + apr_uint32_t flags = 0; + apr_hash_t *txnprops; + + /* Look for the temporary txn props representing 'flags'. */ + SVN_ERR(svn_fs_fs__txn_proplist(&txnprops, txn, pool)); + if (txnprops) + { + if (svn_hash_gets(txnprops, SVN_FS__PROP_TXN_CHECK_OOD)) + flags |= SVN_FS_TXN_CHECK_OOD; + + if (svn_hash_gets(txnprops, SVN_FS__PROP_TXN_CHECK_LOCKS)) + flags |= SVN_FS_TXN_CHECK_LOCKS; + } + + return make_txn_root(root_p, txn->fs, txn->id, txn->base_rev, flags, pool); +} + + +svn_error_t * +svn_fs_fs__revision_root(svn_fs_root_t **root_p, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *pool) +{ + dag_node_t *root_dir; + + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + + SVN_ERR(svn_fs_fs__dag_revision_root(&root_dir, fs, rev, pool)); + + *root_p = make_revision_root(fs, rev, root_dir, pool); + + return SVN_NO_ERROR; +} + + + +/* Getting dag nodes for roots. */ + + +/* Set *NODE_P to a freshly opened dag node referring to the root + directory of ROOT, allocating from POOL. */ +static svn_error_t * +root_node(dag_node_t **node_p, + svn_fs_root_t *root, + apr_pool_t *pool) +{ + if (root->is_txn_root) + { + /* It's a transaction root. Open a fresh copy. */ + return svn_fs_fs__dag_txn_root(node_p, root->fs, root->txn, pool); + } + else + { + /* It's a revision root, so we already have its root directory + opened. */ + fs_rev_root_data_t *frd = root->fsap_data; + *node_p = svn_fs_fs__dag_dup(frd->root_dir, pool); + return SVN_NO_ERROR; + } +} + + +/* Set *NODE_P to a mutable root directory for ROOT, cloning if + necessary, allocating in POOL. ROOT must be a transaction root. + Use ERROR_PATH in error messages. */ +static svn_error_t * +mutable_root_node(dag_node_t **node_p, + svn_fs_root_t *root, + const char *error_path, + apr_pool_t *pool) +{ + if (root->is_txn_root) + return svn_fs_fs__dag_clone_root(node_p, root->fs, root->txn, pool); + else + /* If it's not a transaction root, we can't change its contents. */ + return SVN_FS__ERR_NOT_MUTABLE(root->fs, root->rev, error_path); +} + + + +/* Traversing directory paths. */ + +typedef enum copy_id_inherit_t +{ + copy_id_inherit_unknown = 0, + copy_id_inherit_self, + copy_id_inherit_parent, + copy_id_inherit_new + +} copy_id_inherit_t; + +/* A linked list representing the path from a node up to a root + directory. We use this for cloning, and for operations that need + to deal with both a node and its parent directory. For example, a + `delete' operation needs to know that the node actually exists, but + also needs to change the parent directory. */ +typedef struct parent_path_t +{ + + /* A node along the path. This could be the final node, one of its + parents, or the root. Every parent path ends with an element for + the root directory. */ + dag_node_t *node; + + /* The name NODE has in its parent directory. This is zero for the + root directory, which (obviously) has no name in its parent. */ + char *entry; + + /* The parent of NODE, or zero if NODE is the root directory. */ + struct parent_path_t *parent; + + /* The copy ID inheritance style. */ + copy_id_inherit_t copy_inherit; + + /* If copy ID inheritance style is copy_id_inherit_new, this is the + path which should be implicitly copied; otherwise, this is NULL. */ + const char *copy_src_path; + +} parent_path_t; + +/* Return a text string describing the absolute path of parent_path + PARENT_PATH. It will be allocated in POOL. */ +static const char * +parent_path_path(parent_path_t *parent_path, + apr_pool_t *pool) +{ + const char *path_so_far = "/"; + if (parent_path->parent) + path_so_far = parent_path_path(parent_path->parent, pool); + return parent_path->entry + ? svn_fspath__join(path_so_far, parent_path->entry, pool) + : path_so_far; +} + + +/* Return the FS path for the parent path chain object CHILD relative + to its ANCESTOR in the same chain, allocated in POOL. */ +static const char * +parent_path_relpath(parent_path_t *child, + parent_path_t *ancestor, + apr_pool_t *pool) +{ + const char *path_so_far = ""; + parent_path_t *this_node = child; + while (this_node != ancestor) + { + assert(this_node != NULL); + path_so_far = svn_relpath_join(this_node->entry, path_so_far, pool); + this_node = this_node->parent; + } + return path_so_far; +} + + + +/* Choose a copy ID inheritance method *INHERIT_P to be used in the + event that immutable node CHILD in FS needs to be made mutable. If + the inheritance method is copy_id_inherit_new, also return a + *COPY_SRC_PATH on which to base the new copy ID (else return NULL + for that path). CHILD must have a parent (it cannot be the root + node). TXN_ID is the transaction in which these items might be + mutable. Allocations are taken from POOL. */ +static svn_error_t * +get_copy_inheritance(copy_id_inherit_t *inherit_p, + const char **copy_src_path, + svn_fs_t *fs, + parent_path_t *child, + const char *txn_id, + apr_pool_t *pool) +{ + const svn_fs_id_t *child_id, *parent_id, *copyroot_id; + const char *child_copy_id, *parent_copy_id; + const char *id_path = NULL; + svn_fs_root_t *copyroot_root; + dag_node_t *copyroot_node; + svn_revnum_t copyroot_rev; + const char *copyroot_path; + + SVN_ERR_ASSERT(child && child->parent && txn_id); + + /* Initialize some convenience variables. */ + child_id = svn_fs_fs__dag_get_id(child->node); + parent_id = svn_fs_fs__dag_get_id(child->parent->node); + child_copy_id = svn_fs_fs__id_copy_id(child_id); + parent_copy_id = svn_fs_fs__id_copy_id(parent_id); + + /* If this child is already mutable, we have nothing to do. */ + if (svn_fs_fs__id_txn_id(child_id)) + { + *inherit_p = copy_id_inherit_self; + *copy_src_path = NULL; + return SVN_NO_ERROR; + } + + /* From this point on, we'll assume that the child will just take + its copy ID from its parent. */ + *inherit_p = copy_id_inherit_parent; + *copy_src_path = NULL; + + /* Special case: if the child's copy ID is '0', use the parent's + copy ID. */ + if (strcmp(child_copy_id, "0") == 0) + return SVN_NO_ERROR; + + /* Compare the copy IDs of the child and its parent. If they are + the same, then the child is already on the same branch as the + parent, and should use the same mutability copy ID that the + parent will use. */ + if (svn_fs_fs__key_compare(child_copy_id, parent_copy_id) == 0) + return SVN_NO_ERROR; + + /* If the child is on the same branch that the parent is on, the + child should just use the same copy ID that the parent would use. + Else, the child needs to generate a new copy ID to use should it + need to be made mutable. We will claim that child is on the same + branch as its parent if the child itself is not a branch point, + or if it is a branch point that we are accessing via its original + copy destination path. */ + SVN_ERR(svn_fs_fs__dag_get_copyroot(©root_rev, ©root_path, + child->node)); + SVN_ERR(svn_fs_fs__revision_root(©root_root, fs, copyroot_rev, pool)); + SVN_ERR(get_dag(©root_node, copyroot_root, copyroot_path, FALSE, pool)); + copyroot_id = svn_fs_fs__dag_get_id(copyroot_node); + + if (svn_fs_fs__id_compare(copyroot_id, child_id) == -1) + return SVN_NO_ERROR; + + /* Determine if we are looking at the child via its original path or + as a subtree item of a copied tree. */ + id_path = svn_fs_fs__dag_get_created_path(child->node); + if (strcmp(id_path, parent_path_path(child, pool)) == 0) + { + *inherit_p = copy_id_inherit_self; + return SVN_NO_ERROR; + } + + /* We are pretty sure that the child node is an unedited nested + branched node. When it needs to be made mutable, it should claim + a new copy ID. */ + *inherit_p = copy_id_inherit_new; + *copy_src_path = id_path; + return SVN_NO_ERROR; +} + +/* Allocate a new parent_path_t node from POOL, referring to NODE, + ENTRY, PARENT, and COPY_ID. */ +static parent_path_t * +make_parent_path(dag_node_t *node, + char *entry, + parent_path_t *parent, + apr_pool_t *pool) +{ + parent_path_t *parent_path = apr_pcalloc(pool, sizeof(*parent_path)); + parent_path->node = node; + parent_path->entry = entry; + parent_path->parent = parent; + parent_path->copy_inherit = copy_id_inherit_unknown; + parent_path->copy_src_path = NULL; + return parent_path; +} + + +/* Flags for open_path. */ +typedef enum open_path_flags_t { + + /* The last component of the PATH need not exist. (All parent + directories must exist, as usual.) If the last component doesn't + exist, simply leave the `node' member of the bottom parent_path + component zero. */ + open_path_last_optional = 1, + + /* When this flag is set, don't bother to lookup the DAG node in + our caches because we already tried this. Ignoring this flag + has no functional impact. */ + open_path_uncached = 2, + + /* The caller does not care about the parent node chain but only + the final DAG node. */ + open_path_node_only = 4 +} open_path_flags_t; + + +/* Open the node identified by PATH in ROOT, allocating in POOL. Set + *PARENT_PATH_P to a path from the node up to ROOT. The resulting + **PARENT_PATH_P value is guaranteed to contain at least one + *element, for the root directory. PATH must be in canonical form. + + If resulting *PARENT_PATH_P will eventually be made mutable and + modified, or if copy ID inheritance information is otherwise + needed, TXN_ID should be the ID of the mutability transaction. If + TXN_ID is NULL, no copy ID inheritance information will be + calculated for the *PARENT_PATH_P chain. + + If FLAGS & open_path_last_optional is zero, return the error + SVN_ERR_FS_NOT_FOUND if the node PATH refers to does not exist. If + non-zero, require all the parent directories to exist as normal, + but if the final path component doesn't exist, simply return a path + whose bottom `node' member is zero. This option is useful for + callers that create new nodes --- we find the parent directory for + them, and tell them whether the entry exists already. + + The remaining bits in FLAGS are hints that allow this function + to take shortcuts based on knowledge that the caller provides, + such as the caller is not actually being interested in PARENT_PATH_P, + but only in (*PARENT_PATH_P)->NODE. + + NOTE: Public interfaces which only *read* from the filesystem + should not call this function directly, but should instead use + get_dag(). +*/ +static svn_error_t * +open_path(parent_path_t **parent_path_p, + svn_fs_root_t *root, + const char *path, + int flags, + const char *txn_id, + apr_pool_t *pool) +{ + svn_fs_t *fs = root->fs; + dag_node_t *here = NULL; /* The directory we're currently looking at. */ + parent_path_t *parent_path; /* The path from HERE up to the root. */ + const char *rest; /* The portion of PATH we haven't traversed yet. */ + + /* ensure a canonical path representation */ + const char *path_so_far = "/"; + apr_pool_t *iterpool = svn_pool_create(pool); + + /* callers often traverse the tree in some path-based order. That means + a sibling of PATH has been presently accessed. Try to start the lookup + directly at the parent node, if the caller did not requested the full + parent chain. */ + const char *directory; + assert(svn_fs__is_canonical_abspath(path)); + if (flags & open_path_node_only) + { + directory = svn_dirent_dirname(path, pool); + if (directory[1] != 0) /* root nodes are covered anyway */ + SVN_ERR(dag_node_cache_get(&here, root, directory, TRUE, pool)); + } + + /* did the shortcut work? */ + if (here) + { + path_so_far = directory; + rest = path + strlen(directory) + 1; + } + else + { + /* Make a parent_path item for the root node, using its own current + copy id. */ + SVN_ERR(root_node(&here, root, pool)); + rest = path + 1; /* skip the leading '/', it saves in iteration */ + } + + parent_path = make_parent_path(here, 0, 0, pool); + parent_path->copy_inherit = copy_id_inherit_self; + + /* Whenever we are at the top of this loop: + - HERE is our current directory, + - ID is the node revision ID of HERE, + - REST is the path we're going to find in HERE, and + - PARENT_PATH includes HERE and all its parents. */ + for (;;) + { + const char *next; + char *entry; + dag_node_t *child; + + svn_pool_clear(iterpool); + + /* Parse out the next entry from the path. */ + entry = svn_fs__next_entry_name(&next, rest, pool); + + /* Calculate the path traversed thus far. */ + path_so_far = svn_fspath__join(path_so_far, entry, pool); + + if (*entry == '\0') + { + /* Given the behavior of svn_fs__next_entry_name(), this + happens when the path either starts or ends with a slash. + In either case, we stay put: the current directory stays + the same, and we add nothing to the parent path. */ + child = here; + } + else + { + copy_id_inherit_t inherit; + const char *copy_path = NULL; + svn_error_t *err = SVN_NO_ERROR; + dag_node_t *cached_node = NULL; + + /* If we found a directory entry, follow it. First, we + check our node cache, and, failing that, we hit the DAG + layer. Don't bother to contact the cache for the last + element if we already know the lookup to fail for the + complete path. */ + if (next || !(flags & open_path_uncached)) + SVN_ERR(dag_node_cache_get(&cached_node, root, path_so_far, + TRUE, pool)); + if (cached_node) + child = cached_node; + else + err = svn_fs_fs__dag_open(&child, here, entry, pool, iterpool); + + /* "file not found" requires special handling. */ + if (err && err->apr_err == SVN_ERR_FS_NOT_FOUND) + { + /* If this was the last path component, and the caller + said it was optional, then don't return an error; + just put a NULL node pointer in the path. */ + + svn_error_clear(err); + + if ((flags & open_path_last_optional) + && (! next || *next == '\0')) + { + parent_path = make_parent_path(NULL, entry, parent_path, + pool); + break; + } + else + { + /* Build a better error message than svn_fs_fs__dag_open + can provide, giving the root and full path name. */ + return SVN_FS__NOT_FOUND(root, path); + } + } + + /* Other errors we return normally. */ + SVN_ERR(err); + + if (flags & open_path_node_only) + { + /* Shortcut: the caller only wan'ts the final DAG node. */ + parent_path->node = child; + } + else + { + /* Now, make a parent_path item for CHILD. */ + parent_path = make_parent_path(child, entry, parent_path, pool); + if (txn_id) + { + SVN_ERR(get_copy_inheritance(&inherit, ©_path, fs, + parent_path, txn_id, iterpool)); + parent_path->copy_inherit = inherit; + parent_path->copy_src_path = apr_pstrdup(pool, copy_path); + } + } + + /* Cache the node we found (if it wasn't already cached). */ + if (! cached_node) + SVN_ERR(dag_node_cache_set(root, path_so_far, child, iterpool)); + } + + /* Are we finished traversing the path? */ + if (! next) + break; + + /* The path isn't finished yet; we'd better be in a directory. */ + if (svn_fs_fs__dag_node_kind(child) != svn_node_dir) + SVN_ERR_W(SVN_FS__ERR_NOT_DIRECTORY(fs, path_so_far), + apr_psprintf(iterpool, _("Failure opening '%s'"), path)); + + rest = next; + here = child; + } + + svn_pool_destroy(iterpool); + *parent_path_p = parent_path; + return SVN_NO_ERROR; +} + + +/* Make the node referred to by PARENT_PATH mutable, if it isn't + already, allocating from POOL. ROOT must be the root from which + PARENT_PATH descends. Clone any parent directories as needed. + Adjust the dag nodes in PARENT_PATH to refer to the clones. Use + ERROR_PATH in error messages. */ +static svn_error_t * +make_path_mutable(svn_fs_root_t *root, + parent_path_t *parent_path, + const char *error_path, + apr_pool_t *pool) +{ + dag_node_t *clone; + const char *txn_id = root->txn; + + /* Is the node mutable already? */ + if (svn_fs_fs__dag_check_mutable(parent_path->node)) + return SVN_NO_ERROR; + + /* Are we trying to clone the root, or somebody's child node? */ + if (parent_path->parent) + { + const svn_fs_id_t *parent_id, *child_id, *copyroot_id; + const char *copy_id = NULL; + copy_id_inherit_t inherit = parent_path->copy_inherit; + const char *clone_path, *copyroot_path; + svn_revnum_t copyroot_rev; + svn_boolean_t is_parent_copyroot = FALSE; + svn_fs_root_t *copyroot_root; + dag_node_t *copyroot_node; + + /* We're trying to clone somebody's child. Make sure our parent + is mutable. */ + SVN_ERR(make_path_mutable(root, parent_path->parent, + error_path, pool)); + + switch (inherit) + { + case copy_id_inherit_parent: + parent_id = svn_fs_fs__dag_get_id(parent_path->parent->node); + copy_id = svn_fs_fs__id_copy_id(parent_id); + break; + + case copy_id_inherit_new: + SVN_ERR(svn_fs_fs__reserve_copy_id(©_id, root->fs, txn_id, + pool)); + break; + + case copy_id_inherit_self: + copy_id = NULL; + break; + + case copy_id_inherit_unknown: + default: + SVN_ERR_MALFUNCTION(); /* uh-oh -- somebody didn't calculate copy-ID + inheritance data. */ + } + + /* Determine what copyroot our new child node should use. */ + SVN_ERR(svn_fs_fs__dag_get_copyroot(©root_rev, ©root_path, + parent_path->node)); + SVN_ERR(svn_fs_fs__revision_root(©root_root, root->fs, + copyroot_rev, pool)); + SVN_ERR(get_dag(©root_node, copyroot_root, copyroot_path, + FALSE, pool)); + + child_id = svn_fs_fs__dag_get_id(parent_path->node); + copyroot_id = svn_fs_fs__dag_get_id(copyroot_node); + if (strcmp(svn_fs_fs__id_node_id(child_id), + svn_fs_fs__id_node_id(copyroot_id)) != 0) + is_parent_copyroot = TRUE; + + /* Now make this node mutable. */ + clone_path = parent_path_path(parent_path->parent, pool); + SVN_ERR(svn_fs_fs__dag_clone_child(&clone, + parent_path->parent->node, + clone_path, + parent_path->entry, + copy_id, txn_id, + is_parent_copyroot, + pool)); + + /* Update the path cache. */ + SVN_ERR(dag_node_cache_set(root, parent_path_path(parent_path, pool), + clone, pool)); + } + else + { + /* We're trying to clone the root directory. */ + SVN_ERR(mutable_root_node(&clone, root, error_path, pool)); + } + + /* Update the PARENT_PATH link to refer to the clone. */ + parent_path->node = clone; + + return SVN_NO_ERROR; +} + + +/* Open the node identified by PATH in ROOT. Set DAG_NODE_P to the + node we find, allocated in POOL. Return the error + SVN_ERR_FS_NOT_FOUND if this node doesn't exist. + + Since locking can be expensive and POOL may be long-living, for + nodes that will not need to survive the next call to this function, + set NEEDS_LOCK_CACHE to FALSE. */ +static svn_error_t * +get_dag(dag_node_t **dag_node_p, + svn_fs_root_t *root, + const char *path, + svn_boolean_t needs_lock_cache, + apr_pool_t *pool) +{ + parent_path_t *parent_path; + dag_node_t *node = NULL; + + /* First we look for the DAG in our cache + (if the path may be canonical). */ + if (*path == '/') + SVN_ERR(dag_node_cache_get(&node, root, path, needs_lock_cache, pool)); + + if (! node) + { + /* Canonicalize the input PATH. */ + if (! svn_fs__is_canonical_abspath(path)) + { + path = svn_fs__canonicalize_abspath(path, pool); + + /* Try again with the corrected path. */ + SVN_ERR(dag_node_cache_get(&node, root, path, needs_lock_cache, + pool)); + } + + if (! node) + { + /* Call open_path with no flags, as we want this to return an + * error if the node for which we are searching doesn't exist. */ + SVN_ERR(open_path(&parent_path, root, path, + open_path_uncached | open_path_node_only, + NULL, pool)); + node = parent_path->node; + + /* No need to cache our find -- open_path() will do that for us. */ + } + } + + *dag_node_p = node; + return SVN_NO_ERROR; +} + + + +/* Populating the `changes' table. */ + +/* Add a change to the changes table in FS, keyed on transaction id + TXN_ID, and indicated that a change of kind CHANGE_KIND occurred on + PATH (whose node revision id is--or was, in the case of a + deletion--NODEREV_ID), and optionally that TEXT_MODs or PROP_MODs + occurred. If the change resulted from a copy, COPYFROM_REV and + COPYFROM_PATH specify under which revision and path the node was + copied from. If this was not part of a copy, COPYFROM_REV should + be SVN_INVALID_REVNUM. Do all this as part of POOL. */ +static svn_error_t * +add_change(svn_fs_t *fs, + const char *txn_id, + const char *path, + const svn_fs_id_t *noderev_id, + svn_fs_path_change_kind_t change_kind, + svn_boolean_t text_mod, + svn_boolean_t prop_mod, + svn_node_kind_t node_kind, + svn_revnum_t copyfrom_rev, + const char *copyfrom_path, + apr_pool_t *pool) +{ + return svn_fs_fs__add_change(fs, txn_id, + svn_fs__canonicalize_abspath(path, pool), + noderev_id, change_kind, text_mod, prop_mod, + node_kind, copyfrom_rev, copyfrom_path, + pool); +} + + + +/* Generic node operations. */ + +/* Get the id of a node referenced by path PATH in ROOT. Return the + id in *ID_P allocated in POOL. */ +svn_error_t * +svn_fs_fs__node_id(const svn_fs_id_t **id_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + if ((! root->is_txn_root) + && (path[0] == '\0' || ((path[0] == '/') && (path[1] == '\0')))) + { + /* Optimize the case where we don't need any db access at all. + The root directory ("" or "/") node is stored in the + svn_fs_root_t object, and never changes when it's a revision + root, so we can just reach in and grab it directly. */ + fs_rev_root_data_t *frd = root->fsap_data; + *id_p = svn_fs_fs__id_copy(svn_fs_fs__dag_get_id(frd->root_dir), pool); + } + else + { + dag_node_t *node; + + SVN_ERR(get_dag(&node, root, path, FALSE, pool)); + *id_p = svn_fs_fs__id_copy(svn_fs_fs__dag_get_id(node), pool); + } + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__node_created_rev(svn_revnum_t *revision, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + dag_node_t *node; + + SVN_ERR(get_dag(&node, root, path, FALSE, pool)); + return svn_fs_fs__dag_get_revision(revision, node, pool); +} + + +/* Set *CREATED_PATH to the path at which PATH under ROOT was created. + Return a string allocated in POOL. */ +static svn_error_t * +fs_node_created_path(const char **created_path, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + dag_node_t *node; + + SVN_ERR(get_dag(&node, root, path, TRUE, pool)); + *created_path = svn_fs_fs__dag_get_created_path(node); + + return SVN_NO_ERROR; +} + + +/* Set *KIND_P to the type of node located at PATH under ROOT. + Perform temporary allocations in POOL. */ +static svn_error_t * +node_kind(svn_node_kind_t *kind_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + const svn_fs_id_t *node_id; + dag_node_t *node; + + /* Get the node id. */ + SVN_ERR(svn_fs_fs__node_id(&node_id, root, path, pool)); + + /* Use the node id to get the real kind. */ + SVN_ERR(svn_fs_fs__dag_get_node(&node, root->fs, node_id, pool)); + *kind_p = svn_fs_fs__dag_node_kind(node); + + return SVN_NO_ERROR; +} + + +/* Set *KIND_P to the type of node present at PATH under ROOT. If + PATH does not exist under ROOT, set *KIND_P to svn_node_none. Use + POOL for temporary allocation. */ +svn_error_t * +svn_fs_fs__check_path(svn_node_kind_t *kind_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + svn_error_t *err = node_kind(kind_p, root, path, pool); + if (err && + ((err->apr_err == SVN_ERR_FS_NOT_FOUND) + || (err->apr_err == SVN_ERR_FS_NOT_DIRECTORY))) + { + svn_error_clear(err); + err = SVN_NO_ERROR; + *kind_p = svn_node_none; + } + + return svn_error_trace(err); +} + +/* Set *VALUE_P to the value of the property named PROPNAME of PATH in + ROOT. If the node has no property by that name, set *VALUE_P to + zero. Allocate the result in POOL. */ +static svn_error_t * +fs_node_prop(svn_string_t **value_p, + svn_fs_root_t *root, + const char *path, + const char *propname, + apr_pool_t *pool) +{ + dag_node_t *node; + apr_hash_t *proplist; + + SVN_ERR(get_dag(&node, root, path, FALSE, pool)); + SVN_ERR(svn_fs_fs__dag_get_proplist(&proplist, node, pool)); + *value_p = NULL; + if (proplist) + *value_p = svn_hash_gets(proplist, propname); + + return SVN_NO_ERROR; +} + + +/* Set *TABLE_P to the entire property list of PATH under ROOT, as an + APR hash table allocated in POOL. The resulting property table + maps property names to pointers to svn_string_t objects containing + the property value. */ +static svn_error_t * +fs_node_proplist(apr_hash_t **table_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + apr_hash_t *table; + dag_node_t *node; + + SVN_ERR(get_dag(&node, root, path, FALSE, pool)); + SVN_ERR(svn_fs_fs__dag_get_proplist(&table, node, pool)); + *table_p = table ? table : apr_hash_make(pool); + + return SVN_NO_ERROR; +} + + +static svn_error_t * +increment_mergeinfo_up_tree(parent_path_t *pp, + apr_int64_t increment, + apr_pool_t *pool) +{ + for (; pp; pp = pp->parent) + SVN_ERR(svn_fs_fs__dag_increment_mergeinfo_count(pp->node, + increment, + pool)); + + return SVN_NO_ERROR; +} + +/* Change, add, or delete a node's property value. The affected node + is PATH under ROOT, the property value to modify is NAME, and VALUE + points to either a string value to set the new contents to, or NULL + if the property should be deleted. Perform temporary allocations + in POOL. */ +static svn_error_t * +fs_change_node_prop(svn_fs_root_t *root, + const char *path, + const char *name, + const svn_string_t *value, + apr_pool_t *pool) +{ + parent_path_t *parent_path; + apr_hash_t *proplist; + const char *txn_id; + + if (! root->is_txn_root) + return SVN_FS__NOT_TXN(root); + txn_id = root->txn; + + path = svn_fs__canonicalize_abspath(path, pool); + SVN_ERR(open_path(&parent_path, root, path, 0, txn_id, pool)); + + /* Check (non-recursively) to see if path is locked; if so, check + that we can use it. */ + if (root->txn_flags & SVN_FS_TXN_CHECK_LOCKS) + SVN_ERR(svn_fs_fs__allow_locked_operation(path, root->fs, FALSE, FALSE, + pool)); + + SVN_ERR(make_path_mutable(root, parent_path, path, pool)); + SVN_ERR(svn_fs_fs__dag_get_proplist(&proplist, parent_path->node, pool)); + + /* If there's no proplist, but we're just deleting a property, exit now. */ + if ((! proplist) && (! value)) + return SVN_NO_ERROR; + + /* Now, if there's no proplist, we know we need to make one. */ + if (! proplist) + proplist = apr_hash_make(pool); + + if (svn_fs_fs__fs_supports_mergeinfo(root->fs) + && strcmp (name, SVN_PROP_MERGEINFO) == 0) + { + apr_int64_t increment = 0; + svn_boolean_t had_mergeinfo; + SVN_ERR(svn_fs_fs__dag_has_mergeinfo(&had_mergeinfo, parent_path->node)); + + if (value && !had_mergeinfo) + increment = 1; + else if (!value && had_mergeinfo) + increment = -1; + + if (increment != 0) + { + SVN_ERR(increment_mergeinfo_up_tree(parent_path, increment, pool)); + SVN_ERR(svn_fs_fs__dag_set_has_mergeinfo(parent_path->node, + (value != NULL), pool)); + } + } + + /* Set the property. */ + svn_hash_sets(proplist, name, value); + + /* Overwrite the node's proplist. */ + SVN_ERR(svn_fs_fs__dag_set_proplist(parent_path->node, proplist, + pool)); + + /* Make a record of this modification in the changes table. */ + return add_change(root->fs, txn_id, path, + svn_fs_fs__dag_get_id(parent_path->node), + svn_fs_path_change_modify, FALSE, TRUE, + svn_fs_fs__dag_node_kind(parent_path->node), + SVN_INVALID_REVNUM, NULL, pool); +} + + +/* Determine if the properties of two path/root combinations are + different. Set *CHANGED_P to TRUE if the properties at PATH1 under + ROOT1 differ from those at PATH2 under ROOT2, or FALSE otherwise. + Both roots must be in the same filesystem. */ +static svn_error_t * +fs_props_changed(svn_boolean_t *changed_p, + svn_fs_root_t *root1, + const char *path1, + svn_fs_root_t *root2, + const char *path2, + apr_pool_t *pool) +{ + dag_node_t *node1, *node2; + + /* Check that roots are in the same fs. */ + if (root1->fs != root2->fs) + return svn_error_create + (SVN_ERR_FS_GENERAL, NULL, + _("Cannot compare property value between two different filesystems")); + + SVN_ERR(get_dag(&node1, root1, path1, TRUE, pool)); + SVN_ERR(get_dag(&node2, root2, path2, TRUE, pool)); + return svn_fs_fs__dag_things_different(changed_p, NULL, + node1, node2); +} + + + +/* Merges and commits. */ + +/* Set *NODE to the root node of ROOT. */ +static svn_error_t * +get_root(dag_node_t **node, svn_fs_root_t *root, apr_pool_t *pool) +{ + return get_dag(node, root, "/", TRUE, pool); +} + + +/* Set the contents of CONFLICT_PATH to PATH, and return an + SVN_ERR_FS_CONFLICT error that indicates that there was a conflict + at PATH. Perform all allocations in POOL (except the allocation of + CONFLICT_PATH, which should be handled outside this function). */ +static svn_error_t * +conflict_err(svn_stringbuf_t *conflict_path, + const char *path) +{ + svn_stringbuf_set(conflict_path, path); + return svn_error_createf(SVN_ERR_FS_CONFLICT, NULL, + _("Conflict at '%s'"), path); +} + + +/* Merge changes between ANCESTOR and SOURCE into TARGET. ANCESTOR + * and TARGET must be distinct node revisions. TARGET_PATH should + * correspond to TARGET's full path in its filesystem, and is used for + * reporting conflict location. + * + * SOURCE, TARGET, and ANCESTOR are generally directories; this + * function recursively merges the directories' contents. If any are + * files, this function simply returns an error whenever SOURCE, + * TARGET, and ANCESTOR are all distinct node revisions. + * + * If there are differences between ANCESTOR and SOURCE that conflict + * with changes between ANCESTOR and TARGET, this function returns an + * SVN_ERR_FS_CONFLICT error, and updates CONFLICT_P to the name of the + * conflicting node in TARGET, with TARGET_PATH prepended as a path. + * + * If there are no conflicting differences, CONFLICT_P is updated to + * the empty string. + * + * CONFLICT_P must point to a valid svn_stringbuf_t. + * + * Do any necessary temporary allocation in POOL. + */ +static svn_error_t * +merge(svn_stringbuf_t *conflict_p, + const char *target_path, + dag_node_t *target, + dag_node_t *source, + dag_node_t *ancestor, + const char *txn_id, + apr_int64_t *mergeinfo_increment_out, + apr_pool_t *pool) +{ + const svn_fs_id_t *source_id, *target_id, *ancestor_id; + apr_hash_t *s_entries, *t_entries, *a_entries; + apr_hash_index_t *hi; + svn_fs_t *fs; + apr_pool_t *iterpool; + apr_int64_t mergeinfo_increment = 0; + svn_boolean_t fs_supports_mergeinfo; + + /* Make sure everyone comes from the same filesystem. */ + fs = svn_fs_fs__dag_get_fs(ancestor); + if ((fs != svn_fs_fs__dag_get_fs(source)) + || (fs != svn_fs_fs__dag_get_fs(target))) + { + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Bad merge; ancestor, source, and target not all in same fs")); + } + + /* We have the same fs, now check it. */ + SVN_ERR(svn_fs__check_fs(fs, TRUE)); + + source_id = svn_fs_fs__dag_get_id(source); + target_id = svn_fs_fs__dag_get_id(target); + ancestor_id = svn_fs_fs__dag_get_id(ancestor); + + /* It's improper to call this function with ancestor == target. */ + if (svn_fs_fs__id_eq(ancestor_id, target_id)) + { + svn_string_t *id_str = svn_fs_fs__id_unparse(target_id, pool); + return svn_error_createf + (SVN_ERR_FS_GENERAL, NULL, + _("Bad merge; target '%s' has id '%s', same as ancestor"), + target_path, id_str->data); + } + + svn_stringbuf_setempty(conflict_p); + + /* Base cases: + * Either no change made in source, or same change as made in target. + * Both mean nothing to merge here. + */ + if (svn_fs_fs__id_eq(ancestor_id, source_id) + || (svn_fs_fs__id_eq(source_id, target_id))) + return SVN_NO_ERROR; + + /* Else proceed, knowing all three are distinct node revisions. + * + * How to merge from this point: + * + * if (not all 3 are directories) + * { + * early exit with conflict; + * } + * + * // Property changes may only be made to up-to-date + * // directories, because once the client commits the prop + * // change, it bumps the directory's revision, and therefore + * // must be able to depend on there being no other changes to + * // that directory in the repository. + * if (target's property list differs from ancestor's) + * conflict; + * + * For each entry NAME in the directory ANCESTOR: + * + * Let ANCESTOR-ENTRY, SOURCE-ENTRY, and TARGET-ENTRY be the IDs of + * the name within ANCESTOR, SOURCE, and TARGET respectively. + * (Possibly null if NAME does not exist in SOURCE or TARGET.) + * + * If ANCESTOR-ENTRY == SOURCE-ENTRY, then: + * No changes were made to this entry while the transaction was in + * progress, so do nothing to the target. + * + * Else if ANCESTOR-ENTRY == TARGET-ENTRY, then: + * A change was made to this entry while the transaction was in + * process, but the transaction did not touch this entry. Replace + * TARGET-ENTRY with SOURCE-ENTRY. + * + * Else: + * Changes were made to this entry both within the transaction and + * to the repository while the transaction was in progress. They + * must be merged or declared to be in conflict. + * + * If SOURCE-ENTRY and TARGET-ENTRY are both null, that's a + * double delete; flag a conflict. + * + * If any of the three entries is of type file, declare a conflict. + * + * If either SOURCE-ENTRY or TARGET-ENTRY is not a direct + * modification of ANCESTOR-ENTRY (determine by comparing the + * node-id fields), declare a conflict. A replacement is + * incompatible with a modification or other replacement--even + * an identical replacement. + * + * Direct modifications were made to the directory ANCESTOR-ENTRY + * in both SOURCE and TARGET. Recursively merge these + * modifications. + * + * For each leftover entry NAME in the directory SOURCE: + * + * If NAME exists in TARGET, declare a conflict. Even if SOURCE and + * TARGET are adding exactly the same thing, two additions are not + * auto-mergeable with each other. + * + * Add NAME to TARGET with the entry from SOURCE. + * + * Now that we are done merging the changes from SOURCE into the + * directory TARGET, update TARGET's predecessor to be SOURCE. + */ + + if ((svn_fs_fs__dag_node_kind(source) != svn_node_dir) + || (svn_fs_fs__dag_node_kind(target) != svn_node_dir) + || (svn_fs_fs__dag_node_kind(ancestor) != svn_node_dir)) + { + return conflict_err(conflict_p, target_path); + } + + + /* Possible early merge failure: if target and ancestor have + different property lists, then the merge should fail. + Propchanges can *only* be committed on an up-to-date directory. + ### TODO: see issue #418 about the inelegance of this. + + Another possible, similar, early merge failure: if source and + ancestor have different property lists (meaning someone else + changed directory properties while our commit transaction was + happening), the merge should fail. See issue #2751. + */ + { + node_revision_t *tgt_nr, *anc_nr, *src_nr; + + /* Get node revisions for our id's. */ + SVN_ERR(svn_fs_fs__get_node_revision(&tgt_nr, fs, target_id, pool)); + SVN_ERR(svn_fs_fs__get_node_revision(&anc_nr, fs, ancestor_id, pool)); + SVN_ERR(svn_fs_fs__get_node_revision(&src_nr, fs, source_id, pool)); + + /* Now compare the prop-keys of the skels. Note that just because + the keys are different -doesn't- mean the proplists have + different contents. But merge() isn't concerned with contents; + it doesn't do a brute-force comparison on textual contents, so + it won't do that here either. Checking to see if the propkey + atoms are `equal' is enough. */ + if (! svn_fs_fs__noderev_same_rep_key(tgt_nr->prop_rep, anc_nr->prop_rep)) + return conflict_err(conflict_p, target_path); + if (! svn_fs_fs__noderev_same_rep_key(src_nr->prop_rep, anc_nr->prop_rep)) + return conflict_err(conflict_p, target_path); + } + + /* ### todo: it would be more efficient to simply check for a NULL + entries hash where necessary below than to allocate an empty hash + here, but another day, another day... */ + SVN_ERR(svn_fs_fs__dag_dir_entries(&s_entries, source, pool)); + SVN_ERR(svn_fs_fs__dag_dir_entries(&t_entries, target, pool)); + SVN_ERR(svn_fs_fs__dag_dir_entries(&a_entries, ancestor, pool)); + + fs_supports_mergeinfo = svn_fs_fs__fs_supports_mergeinfo(fs); + + /* for each entry E in a_entries... */ + iterpool = svn_pool_create(pool); + for (hi = apr_hash_first(pool, a_entries); + hi; + hi = apr_hash_next(hi)) + { + svn_fs_dirent_t *s_entry, *t_entry, *a_entry; + const char *name; + apr_ssize_t klen; + + svn_pool_clear(iterpool); + + name = svn__apr_hash_index_key(hi); + klen = svn__apr_hash_index_klen(hi); + a_entry = svn__apr_hash_index_val(hi); + + s_entry = apr_hash_get(s_entries, name, klen); + t_entry = apr_hash_get(t_entries, name, klen); + + /* No changes were made to this entry while the transaction was + in progress, so do nothing to the target. */ + if (s_entry && svn_fs_fs__id_eq(a_entry->id, s_entry->id)) + goto end; + + /* A change was made to this entry while the transaction was in + process, but the transaction did not touch this entry. */ + else if (t_entry && svn_fs_fs__id_eq(a_entry->id, t_entry->id)) + { + dag_node_t *t_ent_node; + SVN_ERR(svn_fs_fs__dag_get_node(&t_ent_node, fs, + t_entry->id, iterpool)); + if (fs_supports_mergeinfo) + { + apr_int64_t mergeinfo_start; + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&mergeinfo_start, + t_ent_node)); + mergeinfo_increment -= mergeinfo_start; + } + + if (s_entry) + { + dag_node_t *s_ent_node; + SVN_ERR(svn_fs_fs__dag_get_node(&s_ent_node, fs, + s_entry->id, iterpool)); + + if (fs_supports_mergeinfo) + { + apr_int64_t mergeinfo_end; + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&mergeinfo_end, + s_ent_node)); + mergeinfo_increment += mergeinfo_end; + } + + SVN_ERR(svn_fs_fs__dag_set_entry(target, name, + s_entry->id, + s_entry->kind, + txn_id, + iterpool)); + } + else + { + SVN_ERR(svn_fs_fs__dag_delete(target, name, txn_id, iterpool)); + } + } + + /* Changes were made to this entry both within the transaction + and to the repository while the transaction was in progress. + They must be merged or declared to be in conflict. */ + else + { + dag_node_t *s_ent_node, *t_ent_node, *a_ent_node; + const char *new_tpath; + apr_int64_t sub_mergeinfo_increment; + + /* If SOURCE-ENTRY and TARGET-ENTRY are both null, that's a + double delete; if one of them is null, that's a delete versus + a modification. In any of these cases, flag a conflict. */ + if (s_entry == NULL || t_entry == NULL) + return conflict_err(conflict_p, + svn_fspath__join(target_path, + a_entry->name, + iterpool)); + + /* If any of the three entries is of type file, flag a conflict. */ + if (s_entry->kind == svn_node_file + || t_entry->kind == svn_node_file + || a_entry->kind == svn_node_file) + return conflict_err(conflict_p, + svn_fspath__join(target_path, + a_entry->name, + iterpool)); + + /* If either SOURCE-ENTRY or TARGET-ENTRY is not a direct + modification of ANCESTOR-ENTRY, declare a conflict. */ + if (strcmp(svn_fs_fs__id_node_id(s_entry->id), + svn_fs_fs__id_node_id(a_entry->id)) != 0 + || strcmp(svn_fs_fs__id_copy_id(s_entry->id), + svn_fs_fs__id_copy_id(a_entry->id)) != 0 + || strcmp(svn_fs_fs__id_node_id(t_entry->id), + svn_fs_fs__id_node_id(a_entry->id)) != 0 + || strcmp(svn_fs_fs__id_copy_id(t_entry->id), + svn_fs_fs__id_copy_id(a_entry->id)) != 0) + return conflict_err(conflict_p, + svn_fspath__join(target_path, + a_entry->name, + iterpool)); + + /* Direct modifications were made to the directory + ANCESTOR-ENTRY in both SOURCE and TARGET. Recursively + merge these modifications. */ + SVN_ERR(svn_fs_fs__dag_get_node(&s_ent_node, fs, + s_entry->id, iterpool)); + SVN_ERR(svn_fs_fs__dag_get_node(&t_ent_node, fs, + t_entry->id, iterpool)); + SVN_ERR(svn_fs_fs__dag_get_node(&a_ent_node, fs, + a_entry->id, iterpool)); + new_tpath = svn_fspath__join(target_path, t_entry->name, iterpool); + SVN_ERR(merge(conflict_p, new_tpath, + t_ent_node, s_ent_node, a_ent_node, + txn_id, + &sub_mergeinfo_increment, + iterpool)); + if (fs_supports_mergeinfo) + mergeinfo_increment += sub_mergeinfo_increment; + } + + /* We've taken care of any possible implications E could have. + Remove it from source_entries, so it's easy later to loop + over all the source entries that didn't exist in + ancestor_entries. */ + end: + apr_hash_set(s_entries, name, klen, NULL); + } + + /* For each entry E in source but not in ancestor */ + for (hi = apr_hash_first(pool, s_entries); + hi; + hi = apr_hash_next(hi)) + { + svn_fs_dirent_t *s_entry, *t_entry; + const char *name = svn__apr_hash_index_key(hi); + apr_ssize_t klen = svn__apr_hash_index_klen(hi); + dag_node_t *s_ent_node; + + svn_pool_clear(iterpool); + + s_entry = svn__apr_hash_index_val(hi); + t_entry = apr_hash_get(t_entries, name, klen); + + /* If NAME exists in TARGET, declare a conflict. */ + if (t_entry) + return conflict_err(conflict_p, + svn_fspath__join(target_path, + t_entry->name, + iterpool)); + + SVN_ERR(svn_fs_fs__dag_get_node(&s_ent_node, fs, + s_entry->id, iterpool)); + if (fs_supports_mergeinfo) + { + apr_int64_t mergeinfo_s; + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&mergeinfo_s, + s_ent_node)); + mergeinfo_increment += mergeinfo_s; + } + + SVN_ERR(svn_fs_fs__dag_set_entry + (target, s_entry->name, s_entry->id, s_entry->kind, + txn_id, iterpool)); + } + svn_pool_destroy(iterpool); + + SVN_ERR(svn_fs_fs__dag_update_ancestry(target, source, pool)); + + if (fs_supports_mergeinfo) + SVN_ERR(svn_fs_fs__dag_increment_mergeinfo_count(target, + mergeinfo_increment, + pool)); + + if (mergeinfo_increment_out) + *mergeinfo_increment_out = mergeinfo_increment; + + return SVN_NO_ERROR; +} + +/* Merge changes between an ancestor and SOURCE_NODE into + TXN. The ancestor is either ANCESTOR_NODE, or if + that is null, TXN's base node. + + If the merge is successful, TXN's base will become + SOURCE_NODE, and its root node will have a new ID, a + successor of SOURCE_NODE. + + If a conflict results, update *CONFLICT to the path in the txn that + conflicted; see the CONFLICT_P parameter of merge() for details. */ +static svn_error_t * +merge_changes(dag_node_t *ancestor_node, + dag_node_t *source_node, + svn_fs_txn_t *txn, + svn_stringbuf_t *conflict, + apr_pool_t *pool) +{ + dag_node_t *txn_root_node; + svn_fs_t *fs = txn->fs; + const char *txn_id = txn->id; + + SVN_ERR(svn_fs_fs__dag_txn_root(&txn_root_node, fs, txn_id, pool)); + + if (ancestor_node == NULL) + { + SVN_ERR(svn_fs_fs__dag_txn_base_root(&ancestor_node, fs, + txn_id, pool)); + } + + if (svn_fs_fs__id_eq(svn_fs_fs__dag_get_id(ancestor_node), + svn_fs_fs__dag_get_id(txn_root_node))) + { + /* If no changes have been made in TXN since its current base, + then it can't conflict with any changes since that base. + The caller isn't supposed to call us in that case. */ + SVN_ERR_MALFUNCTION(); + } + else + SVN_ERR(merge(conflict, "/", txn_root_node, + source_node, ancestor_node, txn_id, NULL, pool)); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__commit_txn(const char **conflict_p, + svn_revnum_t *new_rev, + svn_fs_txn_t *txn, + apr_pool_t *pool) +{ + /* How do commits work in Subversion? + * + * When you're ready to commit, here's what you have: + * + * 1. A transaction, with a mutable tree hanging off it. + * 2. A base revision, against which TXN_TREE was made. + * 3. A latest revision, which may be newer than the base rev. + * + * The problem is that if latest != base, then one can't simply + * attach the txn root as the root of the new revision, because that + * would lose all the changes between base and latest. It is also + * not acceptable to insist that base == latest; in a busy + * repository, commits happen too fast to insist that everyone keep + * their entire tree up-to-date at all times. Non-overlapping + * changes should not interfere with each other. + * + * The solution is to merge the changes between base and latest into + * the txn tree [see the function merge()]. The txn tree is the + * only one of the three trees that is mutable, so it has to be the + * one to adjust. + * + * You might have to adjust it more than once, if a new latest + * revision gets committed while you were merging in the previous + * one. For example: + * + * 1. Jane starts txn T, based at revision 6. + * 2. Someone commits (or already committed) revision 7. + * 3. Jane's starts merging the changes between 6 and 7 into T. + * 4. Meanwhile, someone commits revision 8. + * 5. Jane finishes the 6-->7 merge. T could now be committed + * against a latest revision of 7, if only that were still the + * latest. Unfortunately, 8 is now the latest, so... + * 6. Jane starts merging the changes between 7 and 8 into T. + * 7. Meanwhile, no one commits any new revisions. Whew. + * 8. Jane commits T, creating revision 9, whose tree is exactly + * T's tree, except immutable now. + * + * Lather, rinse, repeat. + */ + + svn_error_t *err = SVN_NO_ERROR; + svn_stringbuf_t *conflict = svn_stringbuf_create_empty(pool); + svn_fs_t *fs = txn->fs; + + /* Limit memory usage when the repository has a high commit rate and + needs to run the following while loop multiple times. The memory + growth without an iteration pool is very noticeable when the + transaction modifies a node that has 20,000 sibling nodes. */ + apr_pool_t *iterpool = svn_pool_create(pool); + + /* Initialize output params. */ + *new_rev = SVN_INVALID_REVNUM; + if (conflict_p) + *conflict_p = NULL; + + while (1729) + { + svn_revnum_t youngish_rev; + svn_fs_root_t *youngish_root; + dag_node_t *youngish_root_node; + + svn_pool_clear(iterpool); + + /* Get the *current* youngest revision. We call it "youngish" + because new revisions might get committed after we've + obtained it. */ + + SVN_ERR(svn_fs_fs__youngest_rev(&youngish_rev, fs, iterpool)); + SVN_ERR(svn_fs_fs__revision_root(&youngish_root, fs, youngish_rev, + iterpool)); + + /* Get the dag node for the youngest revision. Later we'll use + it as the SOURCE argument to a merge, and if the merge + succeeds, this youngest root node will become the new base + root for the svn txn that was the target of the merge (but + note that the youngest rev may have changed by then -- that's + why we're careful to get this root in its own bdb txn + here). */ + SVN_ERR(get_root(&youngish_root_node, youngish_root, iterpool)); + + /* Try to merge. If the merge succeeds, the base root node of + TARGET's txn will become the same as youngish_root_node, so + any future merges will only be between that node and whatever + the root node of the youngest rev is by then. */ + err = merge_changes(NULL, youngish_root_node, txn, conflict, iterpool); + if (err) + { + if ((err->apr_err == SVN_ERR_FS_CONFLICT) && conflict_p) + *conflict_p = conflict->data; + goto cleanup; + } + txn->base_rev = youngish_rev; + + /* Try to commit. */ + err = svn_fs_fs__commit(new_rev, fs, txn, iterpool); + if (err && (err->apr_err == SVN_ERR_FS_TXN_OUT_OF_DATE)) + { + /* Did someone else finish committing a new revision while we + were in mid-merge or mid-commit? If so, we'll need to + loop again to merge the new changes in, then try to + commit again. Or if that's not what happened, then just + return the error. */ + svn_revnum_t youngest_rev; + SVN_ERR(svn_fs_fs__youngest_rev(&youngest_rev, fs, iterpool)); + if (youngest_rev == youngish_rev) + goto cleanup; + else + svn_error_clear(err); + } + else if (err) + { + goto cleanup; + } + else + { + err = SVN_NO_ERROR; + goto cleanup; + } + } + + cleanup: + + svn_fs_fs__reset_txn_caches(fs); + + svn_pool_destroy(iterpool); + return svn_error_trace(err); +} + + +/* Merge changes between two nodes into a third node. Given nodes + SOURCE_PATH under SOURCE_ROOT, TARGET_PATH under TARGET_ROOT and + ANCESTOR_PATH under ANCESTOR_ROOT, modify target to contain all the + changes between the ancestor and source. If there are conflicts, + return SVN_ERR_FS_CONFLICT and set *CONFLICT_P to a textual + description of the offending changes. Perform any temporary + allocations in POOL. */ +static svn_error_t * +fs_merge(const char **conflict_p, + svn_fs_root_t *source_root, + const char *source_path, + svn_fs_root_t *target_root, + const char *target_path, + svn_fs_root_t *ancestor_root, + const char *ancestor_path, + apr_pool_t *pool) +{ + dag_node_t *source, *ancestor; + svn_fs_txn_t *txn; + svn_error_t *err; + svn_stringbuf_t *conflict = svn_stringbuf_create_empty(pool); + + if (! target_root->is_txn_root) + return SVN_FS__NOT_TXN(target_root); + + /* Paranoia. */ + if ((source_root->fs != ancestor_root->fs) + || (target_root->fs != ancestor_root->fs)) + { + return svn_error_create + (SVN_ERR_FS_CORRUPT, NULL, + _("Bad merge; ancestor, source, and target not all in same fs")); + } + + /* ### kff todo: is there any compelling reason to get the nodes in + one db transaction? Right now we don't; txn_body_get_root() gets + one node at a time. This will probably need to change: + + Jim Blandy <jimb@zwingli.cygnus.com> writes: + > svn_fs_merge needs to be a single transaction, to protect it against + > people deleting parents of nodes it's working on, etc. + */ + + /* Get the ancestor node. */ + SVN_ERR(get_root(&ancestor, ancestor_root, pool)); + + /* Get the source node. */ + SVN_ERR(get_root(&source, source_root, pool)); + + /* Open a txn for the txn root into which we're merging. */ + SVN_ERR(svn_fs_fs__open_txn(&txn, ancestor_root->fs, target_root->txn, + pool)); + + /* Merge changes between ANCESTOR and SOURCE into TXN. */ + err = merge_changes(ancestor, source, txn, conflict, pool); + if (err) + { + if ((err->apr_err == SVN_ERR_FS_CONFLICT) && conflict_p) + *conflict_p = conflict->data; + return svn_error_trace(err); + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deltify(svn_fs_t *fs, + svn_revnum_t revision, + apr_pool_t *pool) +{ + /* Deltify is a no-op for fs_fs. */ + + return SVN_NO_ERROR; +} + + + +/* Directories. */ + +/* Set *TABLE_P to a newly allocated APR hash table containing the + entries of the directory at PATH in ROOT. The keys of the table + are entry names, as byte strings, excluding the final null + character; the table's values are pointers to svn_fs_dirent_t + structures. Allocate the table and its contents in POOL. */ +static svn_error_t * +fs_dir_entries(apr_hash_t **table_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + dag_node_t *node; + + /* Get the entries for this path in the caller's pool. */ + SVN_ERR(get_dag(&node, root, path, FALSE, pool)); + return svn_fs_fs__dag_dir_entries(table_p, node, pool); +} + +/* Raise an error if PATH contains a newline because FSFS cannot handle + * such paths. See issue #4340. */ +static svn_error_t * +check_newline(const char *path, apr_pool_t *pool) +{ + char *c = strchr(path, '\n'); + + if (c) + return svn_error_createf(SVN_ERR_FS_PATH_SYNTAX, NULL, + _("Invalid control character '0x%02x' in path '%s'"), + (unsigned char)*c, svn_path_illegal_path_escape(path, pool)); + + return SVN_NO_ERROR; +} + +/* Create a new directory named PATH in ROOT. The new directory has + no entries, and no properties. ROOT must be the root of a + transaction, not a revision. Do any necessary temporary allocation + in POOL. */ +static svn_error_t * +fs_make_dir(svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + parent_path_t *parent_path; + dag_node_t *sub_dir; + const char *txn_id = root->txn; + + SVN_ERR(check_newline(path, pool)); + + path = svn_fs__canonicalize_abspath(path, pool); + SVN_ERR(open_path(&parent_path, root, path, open_path_last_optional, + txn_id, pool)); + + /* Check (recursively) to see if some lock is 'reserving' a path at + that location, or even some child-path; if so, check that we can + use it. */ + if (root->txn_flags & SVN_FS_TXN_CHECK_LOCKS) + SVN_ERR(svn_fs_fs__allow_locked_operation(path, root->fs, TRUE, FALSE, + pool)); + + /* If there's already a sub-directory by that name, complain. This + also catches the case of trying to make a subdirectory named `/'. */ + if (parent_path->node) + return SVN_FS__ALREADY_EXISTS(root, path); + + /* Create the subdirectory. */ + SVN_ERR(make_path_mutable(root, parent_path->parent, path, pool)); + SVN_ERR(svn_fs_fs__dag_make_dir(&sub_dir, + parent_path->parent->node, + parent_path_path(parent_path->parent, + pool), + parent_path->entry, + txn_id, + pool)); + + /* Add this directory to the path cache. */ + SVN_ERR(dag_node_cache_set(root, parent_path_path(parent_path, pool), + sub_dir, pool)); + + /* Make a record of this modification in the changes table. */ + return add_change(root->fs, txn_id, path, svn_fs_fs__dag_get_id(sub_dir), + svn_fs_path_change_add, FALSE, FALSE, svn_node_dir, + SVN_INVALID_REVNUM, NULL, pool); +} + + +/* Delete the node at PATH under ROOT. ROOT must be a transaction + root. Perform temporary allocations in POOL. */ +static svn_error_t * +fs_delete_node(svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + parent_path_t *parent_path; + const char *txn_id = root->txn; + apr_int64_t mergeinfo_count = 0; + svn_node_kind_t kind; + + if (! root->is_txn_root) + return SVN_FS__NOT_TXN(root); + + path = svn_fs__canonicalize_abspath(path, pool); + SVN_ERR(open_path(&parent_path, root, path, 0, txn_id, pool)); + kind = svn_fs_fs__dag_node_kind(parent_path->node); + + /* We can't remove the root of the filesystem. */ + if (! parent_path->parent) + return svn_error_create(SVN_ERR_FS_ROOT_DIR, NULL, + _("The root directory cannot be deleted")); + + /* Check to see if path (or any child thereof) is locked; if so, + check that we can use the existing lock(s). */ + if (root->txn_flags & SVN_FS_TXN_CHECK_LOCKS) + SVN_ERR(svn_fs_fs__allow_locked_operation(path, root->fs, TRUE, FALSE, + pool)); + + /* Make the parent directory mutable, and do the deletion. */ + SVN_ERR(make_path_mutable(root, parent_path->parent, path, pool)); + if (svn_fs_fs__fs_supports_mergeinfo(root->fs)) + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&mergeinfo_count, + parent_path->node)); + SVN_ERR(svn_fs_fs__dag_delete(parent_path->parent->node, + parent_path->entry, + txn_id, pool)); + + /* Remove this node and any children from the path cache. */ + SVN_ERR(dag_node_cache_invalidate(root, parent_path_path(parent_path, pool), + pool)); + + /* Update mergeinfo counts for parents */ + if (mergeinfo_count > 0) + SVN_ERR(increment_mergeinfo_up_tree(parent_path->parent, + -mergeinfo_count, + pool)); + + /* Make a record of this modification in the changes table. */ + return add_change(root->fs, txn_id, path, + svn_fs_fs__dag_get_id(parent_path->node), + svn_fs_path_change_delete, FALSE, FALSE, kind, + SVN_INVALID_REVNUM, NULL, pool); +} + + +/* Set *SAME_P to TRUE if FS1 and FS2 have the same UUID, else set to FALSE. + Use POOL for temporary allocation only. + Note: this code is duplicated between libsvn_fs_fs and libsvn_fs_base. */ +static svn_error_t * +fs_same_p(svn_boolean_t *same_p, + svn_fs_t *fs1, + svn_fs_t *fs2, + apr_pool_t *pool) +{ + *same_p = ! strcmp(fs1->uuid, fs2->uuid); + return SVN_NO_ERROR; +} + +/* Copy the node at FROM_PATH under FROM_ROOT to TO_PATH under + TO_ROOT. If PRESERVE_HISTORY is set, then the copy is recorded in + the copies table. Perform temporary allocations in POOL. */ +static svn_error_t * +copy_helper(svn_fs_root_t *from_root, + const char *from_path, + svn_fs_root_t *to_root, + const char *to_path, + svn_boolean_t preserve_history, + apr_pool_t *pool) +{ + dag_node_t *from_node; + parent_path_t *to_parent_path; + const char *txn_id = to_root->txn; + svn_boolean_t same_p; + + /* Use an error check, not an assert, because even the caller cannot + guarantee that a filesystem's UUID has not changed "on the fly". */ + SVN_ERR(fs_same_p(&same_p, from_root->fs, to_root->fs, pool)); + if (! same_p) + return svn_error_createf + (SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("Cannot copy between two different filesystems ('%s' and '%s')"), + from_root->fs->path, to_root->fs->path); + + if (from_root->is_txn_root) + return svn_error_create + (SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("Copy from mutable tree not currently supported")); + + /* Get the NODE for FROM_PATH in FROM_ROOT.*/ + SVN_ERR(get_dag(&from_node, from_root, from_path, TRUE, pool)); + + /* Build up the parent path from TO_PATH in TO_ROOT. If the last + component does not exist, it's not that big a deal. We'll just + make one there. */ + SVN_ERR(open_path(&to_parent_path, to_root, to_path, + open_path_last_optional, txn_id, pool)); + + /* Check to see if path (or any child thereof) is locked; if so, + check that we can use the existing lock(s). */ + if (to_root->txn_flags & SVN_FS_TXN_CHECK_LOCKS) + SVN_ERR(svn_fs_fs__allow_locked_operation(to_path, to_root->fs, + TRUE, FALSE, pool)); + + /* If the destination node already exists as the same node as the + source (in other words, this operation would result in nothing + happening at all), just do nothing an return successfully, + proud that you saved yourself from a tiresome task. */ + if (to_parent_path->node && + svn_fs_fs__id_eq(svn_fs_fs__dag_get_id(from_node), + svn_fs_fs__dag_get_id(to_parent_path->node))) + return SVN_NO_ERROR; + + if (! from_root->is_txn_root) + { + svn_fs_path_change_kind_t kind; + dag_node_t *new_node; + const char *from_canonpath; + apr_int64_t mergeinfo_start; + apr_int64_t mergeinfo_end; + + /* If TO_PATH already existed prior to the copy, note that this + operation is a replacement, not an addition. */ + if (to_parent_path->node) + { + kind = svn_fs_path_change_replace; + if (svn_fs_fs__fs_supports_mergeinfo(to_root->fs)) + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&mergeinfo_start, + to_parent_path->node)); + } + else + { + kind = svn_fs_path_change_add; + mergeinfo_start = 0; + } + + if (svn_fs_fs__fs_supports_mergeinfo(to_root->fs)) + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&mergeinfo_end, + from_node)); + + /* Make sure the target node's parents are mutable. */ + SVN_ERR(make_path_mutable(to_root, to_parent_path->parent, + to_path, pool)); + + /* Canonicalize the copyfrom path. */ + from_canonpath = svn_fs__canonicalize_abspath(from_path, pool); + + SVN_ERR(svn_fs_fs__dag_copy(to_parent_path->parent->node, + to_parent_path->entry, + from_node, + preserve_history, + from_root->rev, + from_canonpath, + txn_id, pool)); + + if (kind == svn_fs_path_change_replace) + SVN_ERR(dag_node_cache_invalidate(to_root, + parent_path_path(to_parent_path, + pool), pool)); + + if (svn_fs_fs__fs_supports_mergeinfo(to_root->fs) + && mergeinfo_start != mergeinfo_end) + SVN_ERR(increment_mergeinfo_up_tree(to_parent_path->parent, + mergeinfo_end - mergeinfo_start, + pool)); + + /* Make a record of this modification in the changes table. */ + SVN_ERR(get_dag(&new_node, to_root, to_path, TRUE, pool)); + SVN_ERR(add_change(to_root->fs, txn_id, to_path, + svn_fs_fs__dag_get_id(new_node), kind, FALSE, FALSE, + svn_fs_fs__dag_node_kind(from_node), + from_root->rev, from_canonpath, pool)); + } + else + { + /* See IZ Issue #436 */ + /* Copying from transaction roots not currently available. + + ### cmpilato todo someday: make this not so. :-) Note that + when copying from mutable trees, you have to make sure that + you aren't creating a cyclic graph filesystem, and a simple + referencing operation won't cut it. Currently, we should not + be able to reach this clause, and the interface reports that + this only works from immutable trees anyway, but JimB has + stated that this requirement need not be necessary in the + future. */ + + SVN_ERR_MALFUNCTION(); + } + + return SVN_NO_ERROR; +} + + +/* Create a copy of FROM_PATH in FROM_ROOT named TO_PATH in TO_ROOT. + If FROM_PATH is a directory, copy it recursively. Temporary + allocations are from POOL.*/ +static svn_error_t * +fs_copy(svn_fs_root_t *from_root, + const char *from_path, + svn_fs_root_t *to_root, + const char *to_path, + apr_pool_t *pool) +{ + SVN_ERR(check_newline(to_path, pool)); + + return svn_error_trace(copy_helper(from_root, + svn_fs__canonicalize_abspath(from_path, + pool), + to_root, + svn_fs__canonicalize_abspath(to_path, + pool), + TRUE, pool)); +} + + +/* Create a copy of FROM_PATH in FROM_ROOT named TO_PATH in TO_ROOT. + If FROM_PATH is a directory, copy it recursively. No history is + preserved. Temporary allocations are from POOL. */ +static svn_error_t * +fs_revision_link(svn_fs_root_t *from_root, + svn_fs_root_t *to_root, + const char *path, + apr_pool_t *pool) +{ + if (! to_root->is_txn_root) + return SVN_FS__NOT_TXN(to_root); + + path = svn_fs__canonicalize_abspath(path, pool); + return svn_error_trace(copy_helper(from_root, path, to_root, path, + FALSE, pool)); +} + + +/* Discover the copy ancestry of PATH under ROOT. Return a relevant + ancestor/revision combination in *PATH_P and *REV_P. Temporary + allocations are in POOL. */ +static svn_error_t * +fs_copied_from(svn_revnum_t *rev_p, + const char **path_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + dag_node_t *node; + const char *copyfrom_path, *copyfrom_str = NULL; + svn_revnum_t copyfrom_rev; + char *str, *buf; + + /* Check to see if there is a cached version of this copyfrom + entry. */ + if (! root->is_txn_root) { + fs_rev_root_data_t *frd = root->fsap_data; + copyfrom_str = svn_hash_gets(frd->copyfrom_cache, path); + } + + if (copyfrom_str) + { + if (*copyfrom_str == 0) + { + /* We have a cached entry that says there is no copyfrom + here. */ + copyfrom_rev = SVN_INVALID_REVNUM; + copyfrom_path = NULL; + } + else + { + /* Parse the copyfrom string for our cached entry. */ + buf = apr_pstrdup(pool, copyfrom_str); + str = svn_cstring_tokenize(" ", &buf); + copyfrom_rev = SVN_STR_TO_REV(str); + copyfrom_path = buf; + } + } + else + { + /* There is no cached entry, look it up the old-fashioned + way. */ + SVN_ERR(get_dag(&node, root, path, TRUE, pool)); + SVN_ERR(svn_fs_fs__dag_get_copyfrom_rev(©from_rev, node)); + SVN_ERR(svn_fs_fs__dag_get_copyfrom_path(©from_path, node)); + } + + *rev_p = copyfrom_rev; + *path_p = copyfrom_path; + + return SVN_NO_ERROR; +} + + + +/* Files. */ + +/* Create the empty file PATH under ROOT. Temporary allocations are + in POOL. */ +static svn_error_t * +fs_make_file(svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + parent_path_t *parent_path; + dag_node_t *child; + const char *txn_id = root->txn; + + SVN_ERR(check_newline(path, pool)); + + path = svn_fs__canonicalize_abspath(path, pool); + SVN_ERR(open_path(&parent_path, root, path, open_path_last_optional, + txn_id, pool)); + + /* If there's already a file by that name, complain. + This also catches the case of trying to make a file named `/'. */ + if (parent_path->node) + return SVN_FS__ALREADY_EXISTS(root, path); + + /* Check (non-recursively) to see if path is locked; if so, check + that we can use it. */ + if (root->txn_flags & SVN_FS_TXN_CHECK_LOCKS) + SVN_ERR(svn_fs_fs__allow_locked_operation(path, root->fs, FALSE, FALSE, + pool)); + + /* Create the file. */ + SVN_ERR(make_path_mutable(root, parent_path->parent, path, pool)); + SVN_ERR(svn_fs_fs__dag_make_file(&child, + parent_path->parent->node, + parent_path_path(parent_path->parent, + pool), + parent_path->entry, + txn_id, + pool)); + + /* Add this file to the path cache. */ + SVN_ERR(dag_node_cache_set(root, parent_path_path(parent_path, pool), child, + pool)); + + /* Make a record of this modification in the changes table. */ + return add_change(root->fs, txn_id, path, svn_fs_fs__dag_get_id(child), + svn_fs_path_change_add, TRUE, FALSE, svn_node_file, + SVN_INVALID_REVNUM, NULL, pool); +} + + +/* Set *LENGTH_P to the size of the file PATH under ROOT. Temporary + allocations are in POOL. */ +static svn_error_t * +fs_file_length(svn_filesize_t *length_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + dag_node_t *file; + + /* First create a dag_node_t from the root/path pair. */ + SVN_ERR(get_dag(&file, root, path, FALSE, pool)); + + /* Now fetch its length */ + return svn_fs_fs__dag_file_length(length_p, file, pool); +} + + +/* Set *CHECKSUM to the checksum of type KIND for PATH under ROOT, or + NULL if that information isn't available. Temporary allocations + are from POOL. */ +static svn_error_t * +fs_file_checksum(svn_checksum_t **checksum, + svn_checksum_kind_t kind, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + dag_node_t *file; + + SVN_ERR(get_dag(&file, root, path, FALSE, pool)); + return svn_fs_fs__dag_file_checksum(checksum, file, kind, pool); +} + + +/* --- Machinery for svn_fs_file_contents() --- */ + +/* Set *CONTENTS to a readable stream that will return the contents of + PATH under ROOT. The stream is allocated in POOL. */ +static svn_error_t * +fs_file_contents(svn_stream_t **contents, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + dag_node_t *node; + svn_stream_t *file_stream; + + /* First create a dag_node_t from the root/path pair. */ + SVN_ERR(get_dag(&node, root, path, FALSE, pool)); + + /* Then create a readable stream from the dag_node_t. */ + SVN_ERR(svn_fs_fs__dag_get_contents(&file_stream, node, pool)); + + *contents = file_stream; + return SVN_NO_ERROR; +} + +/* --- End machinery for svn_fs_file_contents() --- */ + + +/* --- Machinery for svn_fs_try_process_file_contents() --- */ + +static svn_error_t * +fs_try_process_file_contents(svn_boolean_t *success, + svn_fs_root_t *root, + const char *path, + svn_fs_process_contents_func_t processor, + void* baton, + apr_pool_t *pool) +{ + dag_node_t *node; + SVN_ERR(get_dag(&node, root, path, FALSE, pool)); + + return svn_fs_fs__dag_try_process_file_contents(success, node, + processor, baton, pool); +} + +/* --- End machinery for svn_fs_try_process_file_contents() --- */ + + +/* --- Machinery for svn_fs_apply_textdelta() --- */ + + +/* Local baton type for all the helper functions below. */ +typedef struct txdelta_baton_t +{ + /* This is the custom-built window consumer given to us by the delta + library; it uniquely knows how to read data from our designated + "source" stream, interpret the window, and write data to our + designated "target" stream (in this case, our repos file.) */ + svn_txdelta_window_handler_t interpreter; + void *interpreter_baton; + + /* The original file info */ + svn_fs_root_t *root; + const char *path; + + /* Derived from the file info */ + dag_node_t *node; + + svn_stream_t *source_stream; + svn_stream_t *target_stream; + svn_stream_t *string_stream; + svn_stringbuf_t *target_string; + + /* MD5 digest for the base text against which a delta is to be + applied, and for the resultant fulltext, respectively. Either or + both may be null, in which case ignored. */ + svn_checksum_t *base_checksum; + svn_checksum_t *result_checksum; + + /* Pool used by db txns */ + apr_pool_t *pool; + +} txdelta_baton_t; + + +/* ### see comment in window_consumer() regarding this function. */ + +/* Helper function of generic type `svn_write_fn_t'. Implements a + writable stream which appends to an svn_stringbuf_t. */ +static svn_error_t * +write_to_string(void *baton, const char *data, apr_size_t *len) +{ + txdelta_baton_t *tb = (txdelta_baton_t *) baton; + svn_stringbuf_appendbytes(tb->target_string, data, *len); + return SVN_NO_ERROR; +} + + + +/* The main window handler returned by svn_fs_apply_textdelta. */ +static svn_error_t * +window_consumer(svn_txdelta_window_t *window, void *baton) +{ + txdelta_baton_t *tb = (txdelta_baton_t *) baton; + + /* Send the window right through to the custom window interpreter. + In theory, the interpreter will then write more data to + cb->target_string. */ + SVN_ERR(tb->interpreter(window, tb->interpreter_baton)); + + /* ### the write_to_string() callback for the txdelta's output stream + ### should be doing all the flush determination logic, not here. + ### in a drastic case, a window could generate a LOT more than the + ### maximum buffer size. we want to flush to the underlying target + ### stream much sooner (e.g. also in a streamy fashion). also, by + ### moving this logic inside the stream, the stream becomes nice + ### and encapsulated: it holds all the logic about buffering and + ### flushing. + ### + ### further: I believe the buffering should be removed from tree.c + ### the buffering should go into the target_stream itself, which + ### is defined by reps-string.c. Specifically, I think the + ### rep_write_contents() function will handle the buffering and + ### the spill to the underlying DB. by locating it there, then + ### anybody who gets a writable stream for FS content can take + ### advantage of the buffering capability. this will be important + ### when we export an FS API function for writing a fulltext into + ### the FS, rather than forcing that fulltext thru apply_textdelta. + */ + + /* Check to see if we need to purge the portion of the contents that + have been written thus far. */ + if ((! window) || (tb->target_string->len > WRITE_BUFFER_SIZE)) + { + apr_size_t len = tb->target_string->len; + SVN_ERR(svn_stream_write(tb->target_stream, + tb->target_string->data, + &len)); + svn_stringbuf_setempty(tb->target_string); + } + + /* Is the window NULL? If so, we're done. */ + if (! window) + { + /* Close the internal-use stream. ### This used to be inside of + txn_body_fulltext_finalize_edits(), but that invoked a nested + Berkeley DB transaction -- scandalous! */ + SVN_ERR(svn_stream_close(tb->target_stream)); + + SVN_ERR(svn_fs_fs__dag_finalize_edits(tb->node, tb->result_checksum, + tb->pool)); + } + + return SVN_NO_ERROR; +} + +/* Helper function for fs_apply_textdelta. BATON is of type + txdelta_baton_t. */ +static svn_error_t * +apply_textdelta(void *baton, apr_pool_t *pool) +{ + txdelta_baton_t *tb = (txdelta_baton_t *) baton; + parent_path_t *parent_path; + const char *txn_id = tb->root->txn; + + /* Call open_path with no flags, as we want this to return an error + if the node for which we are searching doesn't exist. */ + SVN_ERR(open_path(&parent_path, tb->root, tb->path, 0, txn_id, pool)); + + /* Check (non-recursively) to see if path is locked; if so, check + that we can use it. */ + if (tb->root->txn_flags & SVN_FS_TXN_CHECK_LOCKS) + SVN_ERR(svn_fs_fs__allow_locked_operation(tb->path, tb->root->fs, + FALSE, FALSE, pool)); + + /* Now, make sure this path is mutable. */ + SVN_ERR(make_path_mutable(tb->root, parent_path, tb->path, pool)); + tb->node = parent_path->node; + + if (tb->base_checksum) + { + svn_checksum_t *checksum; + + /* Until we finalize the node, its data_key points to the old + contents, in other words, the base text. */ + SVN_ERR(svn_fs_fs__dag_file_checksum(&checksum, tb->node, + tb->base_checksum->kind, pool)); + if (!svn_checksum_match(tb->base_checksum, checksum)) + return svn_checksum_mismatch_err(tb->base_checksum, checksum, pool, + _("Base checksum mismatch on '%s'"), + tb->path); + } + + /* Make a readable "source" stream out of the current contents of + ROOT/PATH; obviously, this must done in the context of a db_txn. + The stream is returned in tb->source_stream. */ + SVN_ERR(svn_fs_fs__dag_get_contents(&(tb->source_stream), + tb->node, tb->pool)); + + /* Make a writable "target" stream */ + SVN_ERR(svn_fs_fs__dag_get_edit_stream(&(tb->target_stream), tb->node, + tb->pool)); + + /* Make a writable "string" stream which writes data to + tb->target_string. */ + tb->target_string = svn_stringbuf_create_empty(tb->pool); + tb->string_stream = svn_stream_create(tb, tb->pool); + svn_stream_set_write(tb->string_stream, write_to_string); + + /* Now, create a custom window handler that uses our two streams. */ + svn_txdelta_apply(tb->source_stream, + tb->string_stream, + NULL, + tb->path, + tb->pool, + &(tb->interpreter), + &(tb->interpreter_baton)); + + /* Make a record of this modification in the changes table. */ + return add_change(tb->root->fs, txn_id, tb->path, + svn_fs_fs__dag_get_id(tb->node), + svn_fs_path_change_modify, TRUE, FALSE, svn_node_file, + SVN_INVALID_REVNUM, NULL, pool); +} + + +/* Set *CONTENTS_P and *CONTENTS_BATON_P to a window handler and baton + that will accept text delta windows to modify the contents of PATH + under ROOT. Allocations are in POOL. */ +static svn_error_t * +fs_apply_textdelta(svn_txdelta_window_handler_t *contents_p, + void **contents_baton_p, + svn_fs_root_t *root, + const char *path, + svn_checksum_t *base_checksum, + svn_checksum_t *result_checksum, + apr_pool_t *pool) +{ + txdelta_baton_t *tb = apr_pcalloc(pool, sizeof(*tb)); + + tb->root = root; + tb->path = svn_fs__canonicalize_abspath(path, pool); + tb->pool = pool; + tb->base_checksum = svn_checksum_dup(base_checksum, pool); + tb->result_checksum = svn_checksum_dup(result_checksum, pool); + + SVN_ERR(apply_textdelta(tb, pool)); + + *contents_p = window_consumer; + *contents_baton_p = tb; + return SVN_NO_ERROR; +} + +/* --- End machinery for svn_fs_apply_textdelta() --- */ + +/* --- Machinery for svn_fs_apply_text() --- */ + +/* Baton for svn_fs_apply_text(). */ +struct text_baton_t +{ + /* The original file info */ + svn_fs_root_t *root; + const char *path; + + /* Derived from the file info */ + dag_node_t *node; + + /* The returned stream that will accept the file's new contents. */ + svn_stream_t *stream; + + /* The actual fs stream that the returned stream will write to. */ + svn_stream_t *file_stream; + + /* MD5 digest for the final fulltext written to the file. May + be null, in which case ignored. */ + svn_checksum_t *result_checksum; + + /* Pool used by db txns */ + apr_pool_t *pool; +}; + + +/* A wrapper around svn_fs_fs__dag_finalize_edits, but for + * fulltext data, not text deltas. Closes BATON->file_stream. + * + * Note: If you're confused about how this function relates to another + * of similar name, think of it this way: + * + * svn_fs_apply_textdelta() ==> ... ==> txn_body_txdelta_finalize_edits() + * svn_fs_apply_text() ==> ... ==> txn_body_fulltext_finalize_edits() + */ + +/* Write function for the publically returned stream. */ +static svn_error_t * +text_stream_writer(void *baton, + const char *data, + apr_size_t *len) +{ + struct text_baton_t *tb = baton; + + /* Psst, here's some data. Pass it on to the -real- file stream. */ + return svn_stream_write(tb->file_stream, data, len); +} + +/* Close function for the publically returned stream. */ +static svn_error_t * +text_stream_closer(void *baton) +{ + struct text_baton_t *tb = baton; + + /* Close the internal-use stream. ### This used to be inside of + txn_body_fulltext_finalize_edits(), but that invoked a nested + Berkeley DB transaction -- scandalous! */ + SVN_ERR(svn_stream_close(tb->file_stream)); + + /* Need to tell fs that we're done sending text */ + return svn_fs_fs__dag_finalize_edits(tb->node, tb->result_checksum, + tb->pool); +} + + +/* Helper function for fs_apply_text. BATON is of type + text_baton_t. */ +static svn_error_t * +apply_text(void *baton, apr_pool_t *pool) +{ + struct text_baton_t *tb = baton; + parent_path_t *parent_path; + const char *txn_id = tb->root->txn; + + /* Call open_path with no flags, as we want this to return an error + if the node for which we are searching doesn't exist. */ + SVN_ERR(open_path(&parent_path, tb->root, tb->path, 0, txn_id, pool)); + + /* Check (non-recursively) to see if path is locked; if so, check + that we can use it. */ + if (tb->root->txn_flags & SVN_FS_TXN_CHECK_LOCKS) + SVN_ERR(svn_fs_fs__allow_locked_operation(tb->path, tb->root->fs, + FALSE, FALSE, pool)); + + /* Now, make sure this path is mutable. */ + SVN_ERR(make_path_mutable(tb->root, parent_path, tb->path, pool)); + tb->node = parent_path->node; + + /* Make a writable stream for replacing the file's text. */ + SVN_ERR(svn_fs_fs__dag_get_edit_stream(&(tb->file_stream), tb->node, + tb->pool)); + + /* Create a 'returnable' stream which writes to the file_stream. */ + tb->stream = svn_stream_create(tb, tb->pool); + svn_stream_set_write(tb->stream, text_stream_writer); + svn_stream_set_close(tb->stream, text_stream_closer); + + /* Make a record of this modification in the changes table. */ + return add_change(tb->root->fs, txn_id, tb->path, + svn_fs_fs__dag_get_id(tb->node), + svn_fs_path_change_modify, TRUE, FALSE, svn_node_file, + SVN_INVALID_REVNUM, NULL, pool); +} + + +/* Return a writable stream that will set the contents of PATH under + ROOT. RESULT_CHECKSUM is the MD5 checksum of the final result. + Temporary allocations are in POOL. */ +static svn_error_t * +fs_apply_text(svn_stream_t **contents_p, + svn_fs_root_t *root, + const char *path, + svn_checksum_t *result_checksum, + apr_pool_t *pool) +{ + struct text_baton_t *tb = apr_pcalloc(pool, sizeof(*tb)); + + tb->root = root; + tb->path = svn_fs__canonicalize_abspath(path, pool); + tb->pool = pool; + tb->result_checksum = svn_checksum_dup(result_checksum, pool); + + SVN_ERR(apply_text(tb, pool)); + + *contents_p = tb->stream; + return SVN_NO_ERROR; +} + +/* --- End machinery for svn_fs_apply_text() --- */ + + +/* Check if the contents of PATH1 under ROOT1 are different from the + contents of PATH2 under ROOT2. If they are different set + *CHANGED_P to TRUE, otherwise set it to FALSE. */ +static svn_error_t * +fs_contents_changed(svn_boolean_t *changed_p, + svn_fs_root_t *root1, + const char *path1, + svn_fs_root_t *root2, + const char *path2, + apr_pool_t *pool) +{ + dag_node_t *node1, *node2; + + /* Check that roots are in the same fs. */ + if (root1->fs != root2->fs) + return svn_error_create + (SVN_ERR_FS_GENERAL, NULL, + _("Cannot compare file contents between two different filesystems")); + + /* Check that both paths are files. */ + { + svn_node_kind_t kind; + + SVN_ERR(svn_fs_fs__check_path(&kind, root1, path1, pool)); + if (kind != svn_node_file) + return svn_error_createf + (SVN_ERR_FS_GENERAL, NULL, _("'%s' is not a file"), path1); + + SVN_ERR(svn_fs_fs__check_path(&kind, root2, path2, pool)); + if (kind != svn_node_file) + return svn_error_createf + (SVN_ERR_FS_GENERAL, NULL, _("'%s' is not a file"), path2); + } + + SVN_ERR(get_dag(&node1, root1, path1, TRUE, pool)); + SVN_ERR(get_dag(&node2, root2, path2, TRUE, pool)); + return svn_fs_fs__dag_things_different(NULL, changed_p, + node1, node2); +} + + + +/* Public interface to computing file text deltas. */ + +static svn_error_t * +fs_get_file_delta_stream(svn_txdelta_stream_t **stream_p, + svn_fs_root_t *source_root, + const char *source_path, + svn_fs_root_t *target_root, + const char *target_path, + apr_pool_t *pool) +{ + dag_node_t *source_node, *target_node; + + if (source_root && source_path) + SVN_ERR(get_dag(&source_node, source_root, source_path, TRUE, pool)); + else + source_node = NULL; + SVN_ERR(get_dag(&target_node, target_root, target_path, TRUE, pool)); + + /* Create a delta stream that turns the source into the target. */ + return svn_fs_fs__dag_get_file_delta_stream(stream_p, source_node, + target_node, pool); +} + + + +/* Finding Changes */ + +/* Set *CHANGED_PATHS_P to a newly allocated hash containing + descriptions of the paths changed under ROOT. The hash is keyed + with const char * paths and has svn_fs_path_change2_t * values. Use + POOL for all allocations. */ +static svn_error_t * +fs_paths_changed(apr_hash_t **changed_paths_p, + svn_fs_root_t *root, + apr_pool_t *pool) +{ + if (root->is_txn_root) + return svn_fs_fs__txn_changes_fetch(changed_paths_p, root->fs, root->txn, + pool); + else + { + fs_rev_root_data_t *frd = root->fsap_data; + return svn_fs_fs__paths_changed(changed_paths_p, root->fs, root->rev, + frd->copyfrom_cache, pool); + } +} + + + +/* Our coolio opaque history object. */ +typedef struct fs_history_data_t +{ + /* filesystem object */ + svn_fs_t *fs; + + /* path and revision of historical location */ + const char *path; + svn_revnum_t revision; + + /* internal-use hints about where to resume the history search. */ + const char *path_hint; + svn_revnum_t rev_hint; + + /* FALSE until the first call to svn_fs_history_prev(). */ + svn_boolean_t is_interesting; +} fs_history_data_t; + +static svn_fs_history_t * +assemble_history(svn_fs_t *fs, + const char *path, + svn_revnum_t revision, + svn_boolean_t is_interesting, + const char *path_hint, + svn_revnum_t rev_hint, + apr_pool_t *pool); + + +/* Set *HISTORY_P to an opaque node history object which represents + PATH under ROOT. ROOT must be a revision root. Use POOL for all + allocations. */ +static svn_error_t * +fs_node_history(svn_fs_history_t **history_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + svn_node_kind_t kind; + + /* We require a revision root. */ + if (root->is_txn_root) + return svn_error_create(SVN_ERR_FS_NOT_REVISION_ROOT, NULL, NULL); + + /* And we require that the path exist in the root. */ + SVN_ERR(svn_fs_fs__check_path(&kind, root, path, pool)); + if (kind == svn_node_none) + return SVN_FS__NOT_FOUND(root, path); + + /* Okay, all seems well. Build our history object and return it. */ + *history_p = assemble_history(root->fs, + svn_fs__canonicalize_abspath(path, pool), + root->rev, FALSE, NULL, + SVN_INVALID_REVNUM, pool); + return SVN_NO_ERROR; +} + +/* Find the youngest copyroot for path PARENT_PATH or its parents in + filesystem FS, and store the copyroot in *REV_P and *PATH_P. + Perform all allocations in POOL. */ +static svn_error_t * +find_youngest_copyroot(svn_revnum_t *rev_p, + const char **path_p, + svn_fs_t *fs, + parent_path_t *parent_path, + apr_pool_t *pool) +{ + svn_revnum_t rev_mine; + svn_revnum_t rev_parent = SVN_INVALID_REVNUM; + const char *path_mine; + const char *path_parent = NULL; + + /* First find our parent's youngest copyroot. */ + if (parent_path->parent) + SVN_ERR(find_youngest_copyroot(&rev_parent, &path_parent, fs, + parent_path->parent, pool)); + + /* Find our copyroot. */ + SVN_ERR(svn_fs_fs__dag_get_copyroot(&rev_mine, &path_mine, + parent_path->node)); + + /* If a parent and child were copied to in the same revision, prefer + the child copy target, since it is the copy relevant to the + history of the child. */ + if (rev_mine >= rev_parent) + { + *rev_p = rev_mine; + *path_p = path_mine; + } + else + { + *rev_p = rev_parent; + *path_p = path_parent; + } + + return SVN_NO_ERROR; +} + + +static svn_error_t *fs_closest_copy(svn_fs_root_t **root_p, + const char **path_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + svn_fs_t *fs = root->fs; + parent_path_t *parent_path, *copy_dst_parent_path; + svn_revnum_t copy_dst_rev, created_rev; + const char *copy_dst_path; + svn_fs_root_t *copy_dst_root; + dag_node_t *copy_dst_node; + svn_node_kind_t kind; + + /* Initialize return values. */ + *root_p = NULL; + *path_p = NULL; + + path = svn_fs__canonicalize_abspath(path, pool); + SVN_ERR(open_path(&parent_path, root, path, 0, NULL, pool)); + + /* Find the youngest copyroot in the path of this node-rev, which + will indicate the target of the innermost copy affecting the + node-rev. */ + SVN_ERR(find_youngest_copyroot(©_dst_rev, ©_dst_path, + fs, parent_path, pool)); + if (copy_dst_rev == 0) /* There are no copies affecting this node-rev. */ + return SVN_NO_ERROR; + + /* It is possible that this node was created from scratch at some + revision between COPY_DST_REV and REV. Make sure that PATH + exists as of COPY_DST_REV and is related to this node-rev. */ + SVN_ERR(svn_fs_fs__revision_root(©_dst_root, fs, copy_dst_rev, pool)); + SVN_ERR(svn_fs_fs__check_path(&kind, copy_dst_root, path, pool)); + if (kind == svn_node_none) + return SVN_NO_ERROR; + SVN_ERR(open_path(©_dst_parent_path, copy_dst_root, path, + open_path_node_only, NULL, pool)); + copy_dst_node = copy_dst_parent_path->node; + if (! svn_fs_fs__id_check_related(svn_fs_fs__dag_get_id(copy_dst_node), + svn_fs_fs__dag_get_id(parent_path->node))) + return SVN_NO_ERROR; + + /* One final check must be done here. If you copy a directory and + create a new entity somewhere beneath that directory in the same + txn, then we can't claim that the copy affected the new entity. + For example, if you do: + + copy dir1 dir2 + create dir2/new-thing + commit + + then dir2/new-thing was not affected by the copy of dir1 to dir2. + We detect this situation by asking if PATH@COPY_DST_REV's + created-rev is COPY_DST_REV, and that node-revision has no + predecessors, then there is no relevant closest copy. + */ + SVN_ERR(svn_fs_fs__dag_get_revision(&created_rev, copy_dst_node, pool)); + if (created_rev == copy_dst_rev) + { + const svn_fs_id_t *pred; + SVN_ERR(svn_fs_fs__dag_get_predecessor_id(&pred, copy_dst_node)); + if (! pred) + return SVN_NO_ERROR; + } + + /* The copy destination checks out. Return it. */ + *root_p = copy_dst_root; + *path_p = copy_dst_path; + return SVN_NO_ERROR; +} + + +/* Set *PREV_PATH and *PREV_REV to the path and revision which + represent the location at which PATH in FS was located immediately + prior to REVISION iff there was a copy operation (to PATH or one of + its parent directories) between that previous location and + PATH@REVISION. + + If there was no such copy operation in that portion of PATH's + history, set *PREV_PATH to NULL and *PREV_REV to SVN_INVALID_REVNUM. */ +static svn_error_t * +prev_location(const char **prev_path, + svn_revnum_t *prev_rev, + svn_fs_t *fs, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + const char *copy_path, *copy_src_path, *remainder_path; + svn_fs_root_t *copy_root; + svn_revnum_t copy_src_rev; + + /* Ask about the most recent copy which affected PATH@REVISION. If + there was no such copy, we're done. */ + SVN_ERR(fs_closest_copy(©_root, ©_path, root, path, pool)); + if (! copy_root) + { + *prev_rev = SVN_INVALID_REVNUM; + *prev_path = NULL; + return SVN_NO_ERROR; + } + + /* Ultimately, it's not the path of the closest copy's source that + we care about -- it's our own path's location in the copy source + revision. So we'll tack the relative path that expresses the + difference between the copy destination and our path in the copy + revision onto the copy source path to determine this information. + + In other words, if our path is "/branches/my-branch/foo/bar", and + we know that the closest relevant copy was a copy of "/trunk" to + "/branches/my-branch", then that relative path under the copy + destination is "/foo/bar". Tacking that onto the copy source + path tells us that our path was located at "/trunk/foo/bar" + before the copy. + */ + SVN_ERR(fs_copied_from(©_src_rev, ©_src_path, + copy_root, copy_path, pool)); + remainder_path = svn_fspath__skip_ancestor(copy_path, path); + *prev_path = svn_fspath__join(copy_src_path, remainder_path, pool); + *prev_rev = copy_src_rev; + return SVN_NO_ERROR; +} + + +static svn_error_t * +fs_node_origin_rev(svn_revnum_t *revision, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool) +{ + svn_fs_t *fs = root->fs; + const svn_fs_id_t *given_noderev_id, *cached_origin_id; + const char *node_id, *dash; + + path = svn_fs__canonicalize_abspath(path, pool); + + /* Check the cache first. */ + SVN_ERR(svn_fs_fs__node_id(&given_noderev_id, root, path, pool)); + node_id = svn_fs_fs__id_node_id(given_noderev_id); + + /* Is it a brand new uncommitted node? */ + if (node_id[0] == '_') + { + *revision = SVN_INVALID_REVNUM; + return SVN_NO_ERROR; + } + + /* Maybe this is a new-style node ID that just has the revision + sitting right in it. */ + dash = strchr(node_id, '-'); + if (dash && *(dash+1)) + { + *revision = SVN_STR_TO_REV(dash + 1); + return SVN_NO_ERROR; + } + + /* OK, it's an old-style ID? Maybe it's cached. */ + SVN_ERR(svn_fs_fs__get_node_origin(&cached_origin_id, + fs, + node_id, + pool)); + if (cached_origin_id != NULL) + { + *revision = svn_fs_fs__id_rev(cached_origin_id); + return SVN_NO_ERROR; + } + + { + /* Ah well, the answer isn't in the ID itself or in the cache. + Let's actually calculate it, then. */ + svn_fs_root_t *curroot = root; + apr_pool_t *subpool = svn_pool_create(pool); + apr_pool_t *predidpool = svn_pool_create(pool); + svn_stringbuf_t *lastpath = svn_stringbuf_create(path, pool); + svn_revnum_t lastrev = SVN_INVALID_REVNUM; + dag_node_t *node; + const svn_fs_id_t *pred_id; + + /* Walk the closest-copy chain back to the first copy in our history. + + NOTE: We merely *assume* that this is faster than walking the + predecessor chain, because we *assume* that copies of parent + directories happen less often than modifications to a given item. */ + while (1) + { + svn_revnum_t currev; + const char *curpath = lastpath->data; + + svn_pool_clear(subpool); + + /* Get a root pointing to LASTREV. (The first time around, + LASTREV is invalid, but that's cool because CURROOT is + already initialized.) */ + if (SVN_IS_VALID_REVNUM(lastrev)) + SVN_ERR(svn_fs_fs__revision_root(&curroot, fs, lastrev, subpool)); + + /* Find the previous location using the closest-copy shortcut. */ + SVN_ERR(prev_location(&curpath, &currev, fs, curroot, curpath, + subpool)); + if (! curpath) + break; + + /* Update our LASTPATH and LASTREV variables (which survive + SUBPOOL). */ + svn_stringbuf_set(lastpath, curpath); + lastrev = currev; + } + + /* Walk the predecessor links back to origin. */ + SVN_ERR(svn_fs_fs__node_id(&pred_id, curroot, lastpath->data, predidpool)); + do + { + svn_pool_clear(subpool); + SVN_ERR(svn_fs_fs__dag_get_node(&node, fs, pred_id, subpool)); + + /* Why not just fetch the predecessor ID in PREDIDPOOL? + Because svn_fs_fs__dag_get_predecessor_id() doesn't + necessarily honor the passed-in pool, and might return a + value cached in the node (which is allocated in + SUBPOOL... maybe). */ + svn_pool_clear(predidpool); + SVN_ERR(svn_fs_fs__dag_get_predecessor_id(&pred_id, node)); + pred_id = pred_id ? svn_fs_fs__id_copy(pred_id, predidpool) : NULL; + } + while (pred_id); + + /* When we get here, NODE should be the first node-revision in our + chain. */ + SVN_ERR(svn_fs_fs__dag_get_revision(revision, node, pool)); + + /* Wow, I don't want to have to do all that again. Let's cache + the result. */ + if (node_id[0] != '_') + SVN_ERR(svn_fs_fs__set_node_origin(fs, node_id, + svn_fs_fs__dag_get_id(node), pool)); + + svn_pool_destroy(subpool); + svn_pool_destroy(predidpool); + return SVN_NO_ERROR; + } +} + + +struct history_prev_args +{ + svn_fs_history_t **prev_history_p; + svn_fs_history_t *history; + svn_boolean_t cross_copies; + apr_pool_t *pool; +}; + + +static svn_error_t * +history_prev(void *baton, apr_pool_t *pool) +{ + struct history_prev_args *args = baton; + svn_fs_history_t **prev_history = args->prev_history_p; + svn_fs_history_t *history = args->history; + fs_history_data_t *fhd = history->fsap_data; + const char *commit_path, *src_path, *path = fhd->path; + svn_revnum_t commit_rev, src_rev, dst_rev; + svn_revnum_t revision = fhd->revision; + apr_pool_t *retpool = args->pool; + svn_fs_t *fs = fhd->fs; + parent_path_t *parent_path; + dag_node_t *node; + svn_fs_root_t *root; + svn_boolean_t reported = fhd->is_interesting; + svn_revnum_t copyroot_rev; + const char *copyroot_path; + + /* Initialize our return value. */ + *prev_history = NULL; + + /* If our last history report left us hints about where to pickup + the chase, then our last report was on the destination of a + copy. If we are crossing copies, start from those locations, + otherwise, we're all done here. */ + if (fhd->path_hint && SVN_IS_VALID_REVNUM(fhd->rev_hint)) + { + reported = FALSE; + if (! args->cross_copies) + return SVN_NO_ERROR; + path = fhd->path_hint; + revision = fhd->rev_hint; + } + + /* Construct a ROOT for the current revision. */ + SVN_ERR(svn_fs_fs__revision_root(&root, fs, revision, pool)); + + /* Open PATH/REVISION, and get its node and a bunch of other + goodies. */ + SVN_ERR(open_path(&parent_path, root, path, 0, NULL, pool)); + node = parent_path->node; + commit_path = svn_fs_fs__dag_get_created_path(node); + SVN_ERR(svn_fs_fs__dag_get_revision(&commit_rev, node, pool)); + + /* The Subversion filesystem is written in such a way that a given + line of history may have at most one interesting history point + per filesystem revision. Either that node was edited (and + possibly copied), or it was copied but not edited. And a copy + source cannot be from the same revision as its destination. So, + if our history revision matches its node's commit revision, we + know that ... */ + if (revision == commit_rev) + { + if (! reported) + { + /* ... we either have not yet reported on this revision (and + need now to do so) ... */ + *prev_history = assemble_history(fs, + apr_pstrdup(retpool, commit_path), + commit_rev, TRUE, NULL, + SVN_INVALID_REVNUM, retpool); + return SVN_NO_ERROR; + } + else + { + /* ... or we *have* reported on this revision, and must now + progress toward this node's predecessor (unless there is + no predecessor, in which case we're all done!). */ + const svn_fs_id_t *pred_id; + + SVN_ERR(svn_fs_fs__dag_get_predecessor_id(&pred_id, node)); + if (! pred_id) + return SVN_NO_ERROR; + + /* Replace NODE and friends with the information from its + predecessor. */ + SVN_ERR(svn_fs_fs__dag_get_node(&node, fs, pred_id, pool)); + commit_path = svn_fs_fs__dag_get_created_path(node); + SVN_ERR(svn_fs_fs__dag_get_revision(&commit_rev, node, pool)); + } + } + + /* Find the youngest copyroot in the path of this node, including + itself. */ + SVN_ERR(find_youngest_copyroot(©root_rev, ©root_path, fs, + parent_path, pool)); + + /* Initialize some state variables. */ + src_path = NULL; + src_rev = SVN_INVALID_REVNUM; + dst_rev = SVN_INVALID_REVNUM; + + if (copyroot_rev > commit_rev) + { + const char *remainder_path; + const char *copy_dst, *copy_src; + svn_fs_root_t *copyroot_root; + + SVN_ERR(svn_fs_fs__revision_root(©root_root, fs, copyroot_rev, + pool)); + SVN_ERR(get_dag(&node, copyroot_root, copyroot_path, FALSE, pool)); + copy_dst = svn_fs_fs__dag_get_created_path(node); + + /* If our current path was the very destination of the copy, + then our new current path will be the copy source. If our + current path was instead the *child* of the destination of + the copy, then figure out its previous location by taking its + path relative to the copy destination and appending that to + the copy source. Finally, if our current path doesn't meet + one of these other criteria ... ### for now just fallback to + the old copy hunt algorithm. */ + remainder_path = svn_fspath__skip_ancestor(copy_dst, path); + + if (remainder_path) + { + /* If we get here, then our current path is the destination + of, or the child of the destination of, a copy. Fill + in the return values and get outta here. */ + SVN_ERR(svn_fs_fs__dag_get_copyfrom_rev(&src_rev, node)); + SVN_ERR(svn_fs_fs__dag_get_copyfrom_path(©_src, node)); + + dst_rev = copyroot_rev; + src_path = svn_fspath__join(copy_src, remainder_path, pool); + } + } + + /* If we calculated a copy source path and revision, we'll make a + 'copy-style' history object. */ + if (src_path && SVN_IS_VALID_REVNUM(src_rev)) + { + svn_boolean_t retry = FALSE; + + /* It's possible for us to find a copy location that is the same + as the history point we've just reported. If that happens, + we simply need to take another trip through this history + search. */ + if ((dst_rev == revision) && reported) + retry = TRUE; + + *prev_history = assemble_history(fs, apr_pstrdup(retpool, path), + dst_rev, ! retry, + src_path, src_rev, retpool); + } + else + { + *prev_history = assemble_history(fs, apr_pstrdup(retpool, commit_path), + commit_rev, TRUE, NULL, + SVN_INVALID_REVNUM, retpool); + } + + return SVN_NO_ERROR; +} + + +/* Implement svn_fs_history_prev, set *PREV_HISTORY_P to a new + svn_fs_history_t object that represents the predecessory of + HISTORY. If CROSS_COPIES is true, *PREV_HISTORY_P may be related + only through a copy operation. Perform all allocations in POOL. */ +static svn_error_t * +fs_history_prev(svn_fs_history_t **prev_history_p, + svn_fs_history_t *history, + svn_boolean_t cross_copies, + apr_pool_t *pool) +{ + svn_fs_history_t *prev_history = NULL; + fs_history_data_t *fhd = history->fsap_data; + svn_fs_t *fs = fhd->fs; + + /* Special case: the root directory changes in every single + revision, no exceptions. And, the root can't be the target (or + child of a target -- duh) of a copy. So, if that's our path, + then we need only decrement our revision by 1, and there you go. */ + if (strcmp(fhd->path, "/") == 0) + { + if (! fhd->is_interesting) + prev_history = assemble_history(fs, "/", fhd->revision, + 1, NULL, SVN_INVALID_REVNUM, pool); + else if (fhd->revision > 0) + prev_history = assemble_history(fs, "/", fhd->revision - 1, + 1, NULL, SVN_INVALID_REVNUM, pool); + } + else + { + struct history_prev_args args; + prev_history = history; + + while (1) + { + args.prev_history_p = &prev_history; + args.history = prev_history; + args.cross_copies = cross_copies; + args.pool = pool; + SVN_ERR(history_prev(&args, pool)); + + if (! prev_history) + break; + fhd = prev_history->fsap_data; + if (fhd->is_interesting) + break; + } + } + + *prev_history_p = prev_history; + return SVN_NO_ERROR; +} + + +/* Set *PATH and *REVISION to the path and revision for the HISTORY + object. Use POOL for all allocations. */ +static svn_error_t * +fs_history_location(const char **path, + svn_revnum_t *revision, + svn_fs_history_t *history, + apr_pool_t *pool) +{ + fs_history_data_t *fhd = history->fsap_data; + + *path = apr_pstrdup(pool, fhd->path); + *revision = fhd->revision; + return SVN_NO_ERROR; +} + +static history_vtable_t history_vtable = { + fs_history_prev, + fs_history_location +}; + +/* Return a new history object (marked as "interesting") for PATH and + REVISION, allocated in POOL, and with its members set to the values + of the parameters provided. Note that PATH and PATH_HINT are not + duped into POOL -- it is the responsibility of the caller to ensure + that this happens. */ +static svn_fs_history_t * +assemble_history(svn_fs_t *fs, + const char *path, + svn_revnum_t revision, + svn_boolean_t is_interesting, + const char *path_hint, + svn_revnum_t rev_hint, + apr_pool_t *pool) +{ + svn_fs_history_t *history = apr_pcalloc(pool, sizeof(*history)); + fs_history_data_t *fhd = apr_pcalloc(pool, sizeof(*fhd)); + fhd->path = svn_fs__canonicalize_abspath(path, pool); + fhd->revision = revision; + fhd->is_interesting = is_interesting; + fhd->path_hint = path_hint; + fhd->rev_hint = rev_hint; + fhd->fs = fs; + + history->vtable = &history_vtable; + history->fsap_data = fhd; + return history; +} + + +/* mergeinfo queries */ + + +/* DIR_DAG is a directory DAG node which has mergeinfo in its + descendants. This function iterates over its children. For each + child with immediate mergeinfo, it adds its mergeinfo to + RESULT_CATALOG. appropriate arguments. For each child with + descendants with mergeinfo, it recurses. Note that it does *not* + call the action on the path for DIR_DAG itself. + + POOL is used for temporary allocations, including the mergeinfo + hashes passed to actions; RESULT_POOL is used for the mergeinfo added + to RESULT_CATALOG. + */ +static svn_error_t * +crawl_directory_dag_for_mergeinfo(svn_fs_root_t *root, + const char *this_path, + dag_node_t *dir_dag, + svn_mergeinfo_catalog_t result_catalog, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + apr_hash_t *entries; + apr_hash_index_t *hi; + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + + SVN_ERR(svn_fs_fs__dag_dir_entries(&entries, dir_dag, + scratch_pool)); + + for (hi = apr_hash_first(scratch_pool, entries); + hi; + hi = apr_hash_next(hi)) + { + svn_fs_dirent_t *dirent = svn__apr_hash_index_val(hi); + const char *kid_path; + dag_node_t *kid_dag; + svn_boolean_t has_mergeinfo, go_down; + + svn_pool_clear(iterpool); + + kid_path = svn_fspath__join(this_path, dirent->name, iterpool); + SVN_ERR(get_dag(&kid_dag, root, kid_path, TRUE, iterpool)); + + SVN_ERR(svn_fs_fs__dag_has_mergeinfo(&has_mergeinfo, kid_dag)); + SVN_ERR(svn_fs_fs__dag_has_descendants_with_mergeinfo(&go_down, kid_dag)); + + if (has_mergeinfo) + { + /* Save this particular node's mergeinfo. */ + apr_hash_t *proplist; + svn_mergeinfo_t kid_mergeinfo; + svn_string_t *mergeinfo_string; + svn_error_t *err; + + SVN_ERR(svn_fs_fs__dag_get_proplist(&proplist, kid_dag, iterpool)); + mergeinfo_string = svn_hash_gets(proplist, SVN_PROP_MERGEINFO); + if (!mergeinfo_string) + { + svn_string_t *idstr = svn_fs_fs__id_unparse(dirent->id, iterpool); + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + _("Node-revision #'%s' claims to have mergeinfo but doesn't"), + idstr->data); + } + + /* Issue #3896: If a node has syntactically invalid mergeinfo, then + treat it as if no mergeinfo is present rather than raising a parse + error. */ + err = svn_mergeinfo_parse(&kid_mergeinfo, + mergeinfo_string->data, + result_pool); + if (err) + { + if (err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) + svn_error_clear(err); + else + return svn_error_trace(err); + } + else + { + svn_hash_sets(result_catalog, apr_pstrdup(result_pool, kid_path), + kid_mergeinfo); + } + } + + if (go_down) + SVN_ERR(crawl_directory_dag_for_mergeinfo(root, + kid_path, + kid_dag, + result_catalog, + result_pool, + iterpool)); + } + + svn_pool_destroy(iterpool); + return SVN_NO_ERROR; +} + +/* Return the cache key as a combination of REV_ROOT->REV, the inheritance + flags INHERIT and ADJUST_INHERITED_MERGEINFO, and the PATH. The result + will be allocated in POOL.. + */ +static const char * +mergeinfo_cache_key(const char *path, + svn_fs_root_t *rev_root, + svn_mergeinfo_inheritance_t inherit, + svn_boolean_t adjust_inherited_mergeinfo, + apr_pool_t *pool) +{ + apr_int64_t number = rev_root->rev; + number = number * 4 + + (inherit == svn_mergeinfo_nearest_ancestor ? 2 : 0) + + (adjust_inherited_mergeinfo ? 1 : 0); + + return svn_fs_fs__combine_number_and_string(number, path, pool); +} + +/* Calculates the mergeinfo for PATH under REV_ROOT using inheritance + type INHERIT. Returns it in *MERGEINFO, or NULL if there is none. + The result is allocated in RESULT_POOL; SCRATCH_POOL is + used for temporary allocations. + */ +static svn_error_t * +get_mergeinfo_for_path_internal(svn_mergeinfo_t *mergeinfo, + svn_fs_root_t *rev_root, + const char *path, + svn_mergeinfo_inheritance_t inherit, + svn_boolean_t adjust_inherited_mergeinfo, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + parent_path_t *parent_path, *nearest_ancestor; + apr_hash_t *proplist; + svn_string_t *mergeinfo_string; + + path = svn_fs__canonicalize_abspath(path, scratch_pool); + + SVN_ERR(open_path(&parent_path, rev_root, path, 0, NULL, scratch_pool)); + + if (inherit == svn_mergeinfo_nearest_ancestor && ! parent_path->parent) + return SVN_NO_ERROR; + + if (inherit == svn_mergeinfo_nearest_ancestor) + nearest_ancestor = parent_path->parent; + else + nearest_ancestor = parent_path; + + while (TRUE) + { + svn_boolean_t has_mergeinfo; + + SVN_ERR(svn_fs_fs__dag_has_mergeinfo(&has_mergeinfo, + nearest_ancestor->node)); + if (has_mergeinfo) + break; + + /* No need to loop if we're looking for explicit mergeinfo. */ + if (inherit == svn_mergeinfo_explicit) + { + return SVN_NO_ERROR; + } + + nearest_ancestor = nearest_ancestor->parent; + + /* Run out? There's no mergeinfo. */ + if (!nearest_ancestor) + { + return SVN_NO_ERROR; + } + } + + SVN_ERR(svn_fs_fs__dag_get_proplist(&proplist, nearest_ancestor->node, + scratch_pool)); + mergeinfo_string = svn_hash_gets(proplist, SVN_PROP_MERGEINFO); + if (!mergeinfo_string) + return svn_error_createf + (SVN_ERR_FS_CORRUPT, NULL, + _("Node-revision '%s@%ld' claims to have mergeinfo but doesn't"), + parent_path_path(nearest_ancestor, scratch_pool), rev_root->rev); + + /* Parse the mergeinfo; store the result in *MERGEINFO. */ + { + /* Issue #3896: If a node has syntactically invalid mergeinfo, then + treat it as if no mergeinfo is present rather than raising a parse + error. */ + svn_error_t *err = svn_mergeinfo_parse(mergeinfo, + mergeinfo_string->data, + result_pool); + if (err) + { + if (err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) + { + svn_error_clear(err); + err = NULL; + *mergeinfo = NULL; + } + return svn_error_trace(err); + } + } + + /* If our nearest ancestor is the very path we inquired about, we + can return the mergeinfo results directly. Otherwise, we're + inheriting the mergeinfo, so we need to a) remove non-inheritable + ranges and b) telescope the merged-from paths. */ + if (adjust_inherited_mergeinfo && (nearest_ancestor != parent_path)) + { + svn_mergeinfo_t tmp_mergeinfo; + + SVN_ERR(svn_mergeinfo_inheritable2(&tmp_mergeinfo, *mergeinfo, + NULL, SVN_INVALID_REVNUM, + SVN_INVALID_REVNUM, TRUE, + scratch_pool, scratch_pool)); + SVN_ERR(svn_fs__append_to_merged_froms(mergeinfo, tmp_mergeinfo, + parent_path_relpath( + parent_path, nearest_ancestor, + scratch_pool), + result_pool)); + } + + return SVN_NO_ERROR; +} + +/* Caching wrapper around get_mergeinfo_for_path_internal(). + */ +static svn_error_t * +get_mergeinfo_for_path(svn_mergeinfo_t *mergeinfo, + svn_fs_root_t *rev_root, + const char *path, + svn_mergeinfo_inheritance_t inherit, + svn_boolean_t adjust_inherited_mergeinfo, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = rev_root->fs->fsap_data; + const char *cache_key; + svn_boolean_t found = FALSE; + svn_stringbuf_t *mergeinfo_exists; + + *mergeinfo = NULL; + + cache_key = mergeinfo_cache_key(path, rev_root, inherit, + adjust_inherited_mergeinfo, scratch_pool); + if (ffd->mergeinfo_existence_cache) + { + SVN_ERR(svn_cache__get((void **)&mergeinfo_exists, &found, + ffd->mergeinfo_existence_cache, + cache_key, result_pool)); + if (found && mergeinfo_exists->data[0] == '1') + SVN_ERR(svn_cache__get((void **)mergeinfo, &found, + ffd->mergeinfo_cache, + cache_key, result_pool)); + } + + if (! found) + { + SVN_ERR(get_mergeinfo_for_path_internal(mergeinfo, rev_root, path, + inherit, + adjust_inherited_mergeinfo, + result_pool, scratch_pool)); + if (ffd->mergeinfo_existence_cache) + { + mergeinfo_exists = svn_stringbuf_create(*mergeinfo ? "1" : "0", + scratch_pool); + SVN_ERR(svn_cache__set(ffd->mergeinfo_existence_cache, + cache_key, mergeinfo_exists, scratch_pool)); + if (*mergeinfo) + SVN_ERR(svn_cache__set(ffd->mergeinfo_cache, + cache_key, *mergeinfo, scratch_pool)); + } + } + + return SVN_NO_ERROR; +} + +/* Adds mergeinfo for each descendant of PATH (but not PATH itself) + under ROOT to RESULT_CATALOG. Returned values are allocated in + RESULT_POOL; temporary values in POOL. */ +static svn_error_t * +add_descendant_mergeinfo(svn_mergeinfo_catalog_t result_catalog, + svn_fs_root_t *root, + const char *path, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + dag_node_t *this_dag; + svn_boolean_t go_down; + + SVN_ERR(get_dag(&this_dag, root, path, TRUE, scratch_pool)); + SVN_ERR(svn_fs_fs__dag_has_descendants_with_mergeinfo(&go_down, + this_dag)); + if (go_down) + SVN_ERR(crawl_directory_dag_for_mergeinfo(root, + path, + this_dag, + result_catalog, + result_pool, + scratch_pool)); + return SVN_NO_ERROR; +} + + +/* Get the mergeinfo for a set of paths, returned in + *MERGEINFO_CATALOG. Returned values are allocated in + POOL, while temporary values are allocated in a sub-pool. */ +static svn_error_t * +get_mergeinfos_for_paths(svn_fs_root_t *root, + svn_mergeinfo_catalog_t *mergeinfo_catalog, + const apr_array_header_t *paths, + svn_mergeinfo_inheritance_t inherit, + svn_boolean_t include_descendants, + svn_boolean_t adjust_inherited_mergeinfo, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_mergeinfo_catalog_t result_catalog = svn_hash__make(result_pool); + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + int i; + + for (i = 0; i < paths->nelts; i++) + { + svn_error_t *err; + svn_mergeinfo_t path_mergeinfo; + const char *path = APR_ARRAY_IDX(paths, i, const char *); + + svn_pool_clear(iterpool); + + err = get_mergeinfo_for_path(&path_mergeinfo, root, path, + inherit, adjust_inherited_mergeinfo, + result_pool, iterpool); + if (err) + { + if (err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) + { + svn_error_clear(err); + err = NULL; + path_mergeinfo = NULL; + } + else + { + return svn_error_trace(err); + } + } + + if (path_mergeinfo) + svn_hash_sets(result_catalog, path, path_mergeinfo); + if (include_descendants) + SVN_ERR(add_descendant_mergeinfo(result_catalog, root, path, + result_pool, scratch_pool)); + } + svn_pool_destroy(iterpool); + + *mergeinfo_catalog = result_catalog; + return SVN_NO_ERROR; +} + + +/* Implements svn_fs_get_mergeinfo. */ +static svn_error_t * +fs_get_mergeinfo(svn_mergeinfo_catalog_t *catalog, + svn_fs_root_t *root, + const apr_array_header_t *paths, + svn_mergeinfo_inheritance_t inherit, + svn_boolean_t include_descendants, + svn_boolean_t adjust_inherited_mergeinfo, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = root->fs->fsap_data; + + /* We require a revision root. */ + if (root->is_txn_root) + return svn_error_create(SVN_ERR_FS_NOT_REVISION_ROOT, NULL, NULL); + + /* We have to actually be able to find the mergeinfo metadata! */ + if (! svn_fs_fs__fs_supports_mergeinfo(root->fs)) + return svn_error_createf + (SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("Querying mergeinfo requires version %d of the FSFS filesystem " + "schema; filesystem '%s' uses only version %d"), + SVN_FS_FS__MIN_MERGEINFO_FORMAT, root->fs->path, ffd->format); + + /* Retrieve a path -> mergeinfo hash mapping. */ + return get_mergeinfos_for_paths(root, catalog, paths, + inherit, + include_descendants, + adjust_inherited_mergeinfo, + result_pool, scratch_pool); +} + + +/* The vtable associated with root objects. */ +static root_vtable_t root_vtable = { + fs_paths_changed, + svn_fs_fs__check_path, + fs_node_history, + svn_fs_fs__node_id, + svn_fs_fs__node_created_rev, + fs_node_origin_rev, + fs_node_created_path, + fs_delete_node, + fs_copied_from, + fs_closest_copy, + fs_node_prop, + fs_node_proplist, + fs_change_node_prop, + fs_props_changed, + fs_dir_entries, + fs_make_dir, + fs_copy, + fs_revision_link, + fs_file_length, + fs_file_checksum, + fs_file_contents, + fs_try_process_file_contents, + fs_make_file, + fs_apply_textdelta, + fs_apply_text, + fs_contents_changed, + fs_get_file_delta_stream, + fs_merge, + fs_get_mergeinfo, +}; + +/* Construct a new root object in FS, allocated from POOL. */ +static svn_fs_root_t * +make_root(svn_fs_t *fs, + apr_pool_t *pool) +{ + svn_fs_root_t *root = apr_pcalloc(pool, sizeof(*root)); + + root->fs = fs; + root->pool = pool; + root->vtable = &root_vtable; + + return root; +} + + +/* Construct a root object referring to the root of REVISION in FS, + whose root directory is ROOT_DIR. Create the new root in POOL. */ +static svn_fs_root_t * +make_revision_root(svn_fs_t *fs, + svn_revnum_t rev, + dag_node_t *root_dir, + apr_pool_t *pool) +{ + svn_fs_root_t *root = make_root(fs, pool); + fs_rev_root_data_t *frd = apr_pcalloc(root->pool, sizeof(*frd)); + + root->is_txn_root = FALSE; + root->rev = rev; + + frd->root_dir = root_dir; + frd->copyfrom_cache = svn_hash__make(root->pool); + + root->fsap_data = frd; + + return root; +} + + +/* Construct a root object referring to the root of the transaction + named TXN and based on revision BASE_REV in FS, with FLAGS to + describe transaction's behavior. Create the new root in POOL. */ +static svn_error_t * +make_txn_root(svn_fs_root_t **root_p, + svn_fs_t *fs, + const char *txn, + svn_revnum_t base_rev, + apr_uint32_t flags, + apr_pool_t *pool) +{ + svn_fs_root_t *root = make_root(fs, pool); + fs_txn_root_data_t *frd = apr_pcalloc(root->pool, sizeof(*frd)); + + root->is_txn_root = TRUE; + root->txn = apr_pstrdup(root->pool, txn); + root->txn_flags = flags; + root->rev = base_rev; + + frd->txn_id = txn; + + /* Because this cache actually tries to invalidate elements, keep + the number of elements per page down. + + Note that since dag_node_cache_invalidate uses svn_cache__iter, + this *cannot* be a memcache-based cache. */ + SVN_ERR(svn_cache__create_inprocess(&(frd->txn_node_cache), + svn_fs_fs__dag_serialize, + svn_fs_fs__dag_deserialize, + APR_HASH_KEY_STRING, + 32, 20, FALSE, + apr_pstrcat(pool, txn, ":TXN", + (char *)NULL), + root->pool)); + + /* Initialize transaction-local caches in FS. + + Note that we cannot put those caches in frd because that content + fs root object is not available where we would need it. */ + SVN_ERR(svn_fs_fs__initialize_txn_caches(fs, txn, pool)); + + root->fsap_data = frd; + + *root_p = root; + return SVN_NO_ERROR; +} + + + +/* Verify. */ +static APR_INLINE const char * +stringify_node(dag_node_t *node, + apr_pool_t *pool) +{ + /* ### TODO: print some PATH@REV to it, too. */ + return svn_fs_fs__id_unparse(svn_fs_fs__dag_get_id(node), pool)->data; +} + +/* Check metadata sanity on NODE, and on its children. Manually verify + information for DAG nodes in revision REV, and trust the metadata + accuracy for nodes belonging to older revisions. */ +static svn_error_t * +verify_node(dag_node_t *node, + svn_revnum_t rev, + apr_pool_t *pool) +{ + svn_boolean_t has_mergeinfo; + apr_int64_t mergeinfo_count; + const svn_fs_id_t *pred_id; + svn_fs_t *fs = svn_fs_fs__dag_get_fs(node); + int pred_count; + svn_node_kind_t kind; + apr_pool_t *iterpool = svn_pool_create(pool); + + /* Fetch some data. */ + SVN_ERR(svn_fs_fs__dag_has_mergeinfo(&has_mergeinfo, node)); + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&mergeinfo_count, node)); + SVN_ERR(svn_fs_fs__dag_get_predecessor_id(&pred_id, node)); + SVN_ERR(svn_fs_fs__dag_get_predecessor_count(&pred_count, node)); + kind = svn_fs_fs__dag_node_kind(node); + + /* Sanity check. */ + if (mergeinfo_count < 0) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "Negative mergeinfo-count %" APR_INT64_T_FMT + " on node '%s'", + mergeinfo_count, stringify_node(node, iterpool)); + + /* Issue #4129. (This check will explicitly catch non-root instances too.) */ + if (pred_id) + { + dag_node_t *pred; + int pred_pred_count; + SVN_ERR(svn_fs_fs__dag_get_node(&pred, fs, pred_id, iterpool)); + SVN_ERR(svn_fs_fs__dag_get_predecessor_count(&pred_pred_count, pred)); + if (pred_pred_count+1 != pred_count) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "Predecessor count mismatch: " + "%s has %d, but %s has %d", + stringify_node(node, iterpool), pred_count, + stringify_node(pred, iterpool), + pred_pred_count); + } + + /* Kind-dependent verifications. */ + if (kind == svn_node_none) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "Node '%s' has kind 'none'", + stringify_node(node, iterpool)); + } + if (kind == svn_node_file) + { + if (has_mergeinfo != mergeinfo_count) /* comparing int to bool */ + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "File node '%s' has inconsistent mergeinfo: " + "has_mergeinfo=%d, " + "mergeinfo_count=%" APR_INT64_T_FMT, + stringify_node(node, iterpool), + has_mergeinfo, mergeinfo_count); + } + if (kind == svn_node_dir) + { + apr_hash_t *entries; + apr_hash_index_t *hi; + apr_int64_t children_mergeinfo = 0; + + SVN_ERR(svn_fs_fs__dag_dir_entries(&entries, node, pool)); + + /* Compute CHILDREN_MERGEINFO. */ + for (hi = apr_hash_first(pool, entries); + hi; + hi = apr_hash_next(hi)) + { + svn_fs_dirent_t *dirent = svn__apr_hash_index_val(hi); + dag_node_t *child; + svn_revnum_t child_rev; + apr_int64_t child_mergeinfo; + + svn_pool_clear(iterpool); + + /* Compute CHILD_REV. */ + SVN_ERR(svn_fs_fs__dag_get_node(&child, fs, dirent->id, iterpool)); + SVN_ERR(svn_fs_fs__dag_get_revision(&child_rev, child, iterpool)); + + if (child_rev == rev) + SVN_ERR(verify_node(child, rev, iterpool)); + + SVN_ERR(svn_fs_fs__dag_get_mergeinfo_count(&child_mergeinfo, child)); + children_mergeinfo += child_mergeinfo; + } + + /* Side-effect of issue #4129. */ + if (children_mergeinfo+has_mergeinfo != mergeinfo_count) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "Mergeinfo-count discrepancy on '%s': " + "expected %" APR_INT64_T_FMT "+%d, " + "counted %" APR_INT64_T_FMT, + stringify_node(node, iterpool), + mergeinfo_count, has_mergeinfo, + children_mergeinfo); + } + + svn_pool_destroy(iterpool); + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__verify_root(svn_fs_root_t *root, + apr_pool_t *pool) +{ + svn_fs_t *fs = root->fs; + dag_node_t *root_dir; + + /* Issue #4129: bogus pred-counts and minfo-cnt's on the root node-rev + (and elsewhere). This code makes more thorough checks than the + commit-time checks in validate_root_noderev(). */ + + /* Callers should disable caches by setting SVN_FS_CONFIG_FSFS_CACHE_NS; + see r1462436. + + When this code is called in the library, we want to ensure we + use the on-disk data --- rather than some data that was read + in the possibly-distance past and cached since. */ + + if (root->is_txn_root) + { + fs_txn_root_data_t *frd = root->fsap_data; + SVN_ERR(svn_fs_fs__dag_txn_root(&root_dir, fs, frd->txn_id, pool)); + } + else + { + fs_rev_root_data_t *frd = root->fsap_data; + root_dir = frd->root_dir; + } + + /* Recursively verify ROOT_DIR. */ + SVN_ERR(verify_node(root_dir, root->rev, pool)); + + /* Verify explicitly the predecessor of the root. */ + { + const svn_fs_id_t *pred_id; + + /* Only r0 should have no predecessor. */ + SVN_ERR(svn_fs_fs__dag_get_predecessor_id(&pred_id, root_dir)); + if (! root->is_txn_root && !!pred_id != !!root->rev) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "r%ld's root node's predecessor is " + "unexpectedly '%s'", + root->rev, + (pred_id + ? svn_fs_fs__id_unparse(pred_id, pool)->data + : "(null)")); + if (root->is_txn_root && !pred_id) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "Transaction '%s''s root node's predecessor is " + "unexpectedly NULL", + root->txn); + + /* Check the predecessor's revision. */ + if (pred_id) + { + svn_revnum_t pred_rev = svn_fs_fs__id_rev(pred_id); + if (! root->is_txn_root && pred_rev+1 != root->rev) + /* Issue #4129. */ + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "r%ld's root node's predecessor is r%ld" + " but should be r%ld", + root->rev, pred_rev, root->rev - 1); + if (root->is_txn_root && pred_rev != root->rev) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + "Transaction '%s''s root node's predecessor" + " is r%ld" + " but should be r%ld", + root->txn, pred_rev, root->rev); + } + } + + return SVN_NO_ERROR; +} diff --git a/subversion/libsvn_fs_fs/tree.h b/subversion/libsvn_fs_fs/tree.h new file mode 100644 index 0000000..34fa0a23b --- /dev/null +++ b/subversion/libsvn_fs_fs/tree.h @@ -0,0 +1,98 @@ +/* tree.h : internal interface to tree node functions + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS_TREE_H +#define SVN_LIBSVN_FS_TREE_H + +#include "fs.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + + +/* In POOL, create an instance of a DAG node 1st level cache. + The POOL will be cleared at regular intervals. */ +fs_fs_dag_cache_t* +svn_fs_fs__create_dag_cache(apr_pool_t *pool); + +/* Set *ROOT_P to the root directory of revision REV in filesystem FS. + Allocate the structure in POOL. */ +svn_error_t *svn_fs_fs__revision_root(svn_fs_root_t **root_p, svn_fs_t *fs, + svn_revnum_t rev, apr_pool_t *pool); + +/* Does nothing, but included for Subversion 1.0.x compatibility. */ +svn_error_t *svn_fs_fs__deltify(svn_fs_t *fs, svn_revnum_t rev, + apr_pool_t *pool); + +/* Commit the transaction TXN as a new revision. Return the new + revision in *NEW_REV. If the transaction conflicts with other + changes return SVN_ERR_FS_CONFLICT and set *CONFLICT_P to a string + that details the cause of the conflict. Perform temporary + allocations in POOL. */ +svn_error_t *svn_fs_fs__commit_txn(const char **conflict_p, + svn_revnum_t *new_rev, svn_fs_txn_t *txn, + apr_pool_t *pool); + +/* Set ROOT_P to the root directory of transaction TXN. Allocate the + structure in POOL. */ +svn_error_t *svn_fs_fs__txn_root(svn_fs_root_t **root_p, svn_fs_txn_t *txn, + apr_pool_t *pool); + + +/* Set KIND_P to the node kind of the node at PATH in ROOT. + Allocate the structure in POOL. */ +svn_error_t * +svn_fs_fs__check_path(svn_node_kind_t *kind_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool); + +/* Implement root_vtable_t.node_id(). */ +svn_error_t * +svn_fs_fs__node_id(const svn_fs_id_t **id_p, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool); + +/* Set *REVISION to the revision in which PATH under ROOT was created. + Use POOL for any temporary allocations. If PATH is in an + uncommitted transaction, *REVISION will be set to + SVN_INVALID_REVNUM. */ +svn_error_t * +svn_fs_fs__node_created_rev(svn_revnum_t *revision, + svn_fs_root_t *root, + const char *path, + apr_pool_t *pool); + +/* Verify metadata for ROOT. + ### Currently only implemented for revision roots. */ +svn_error_t * +svn_fs_fs__verify_root(svn_fs_root_t *root, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SVN_LIBSVN_FS_TREE_H */ |