summaryrefslogtreecommitdiffstats
path: root/subversion/libsvn_repos/load.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_repos/load.c')
-rw-r--r--subversion/libsvn_repos/load.c684
1 files changed, 684 insertions, 0 deletions
diff --git a/subversion/libsvn_repos/load.c b/subversion/libsvn_repos/load.c
new file mode 100644
index 0000000..691ff92
--- /dev/null
+++ b/subversion/libsvn_repos/load.c
@@ -0,0 +1,684 @@
+/* load.c --- parsing a 'dumpfile'-formatted stream.
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include "svn_private_config.h"
+#include "svn_hash.h"
+#include "svn_pools.h"
+#include "svn_error.h"
+#include "svn_fs.h"
+#include "svn_repos.h"
+#include "svn_string.h"
+#include "svn_path.h"
+#include "svn_props.h"
+#include "repos.h"
+#include "svn_private_config.h"
+#include "svn_mergeinfo.h"
+#include "svn_checksum.h"
+#include "svn_subst.h"
+#include "svn_ctype.h"
+
+#include <apr_lib.h>
+
+#include "private/svn_dep_compat.h"
+#include "private/svn_mergeinfo_private.h"
+
+/*----------------------------------------------------------------------*/
+
+/** The parser and related helper funcs **/
+
+
+static svn_error_t *
+stream_ran_dry(void)
+{
+ return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
+ _("Premature end of content data in dumpstream"));
+}
+
+static svn_error_t *
+stream_malformed(void)
+{
+ return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Dumpstream data appears to be malformed"));
+}
+
+/* Allocate a new hash *HEADERS in POOL, and read a series of
+ RFC822-style headers from STREAM. Duplicate each header's name and
+ value into POOL and store in hash as a const char * ==> const char *.
+
+ The headers are assumed to be terminated by a single blank line,
+ which will be permanently sucked from the stream and tossed.
+
+ If the caller has already read in the first header line, it should
+ be passed in as FIRST_HEADER. If not, pass NULL instead.
+ */
+static svn_error_t *
+read_header_block(svn_stream_t *stream,
+ svn_stringbuf_t *first_header,
+ apr_hash_t **headers,
+ apr_pool_t *pool)
+{
+ *headers = apr_hash_make(pool);
+
+ while (1)
+ {
+ svn_stringbuf_t *header_str;
+ const char *name, *value;
+ svn_boolean_t eof;
+ apr_size_t i = 0;
+
+ if (first_header != NULL)
+ {
+ header_str = first_header;
+ first_header = NULL; /* so we never visit this block again. */
+ eof = FALSE;
+ }
+
+ else
+ /* Read the next line into a stringbuf. */
+ SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool));
+
+ if (svn_stringbuf_isempty(header_str))
+ break; /* end of header block */
+ else if (eof)
+ return stream_ran_dry();
+
+ /* Find the next colon in the stringbuf. */
+ while (header_str->data[i] != ':')
+ {
+ if (header_str->data[i] == '\0')
+ return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Dump stream contains a malformed "
+ "header (with no ':') at '%.20s'"),
+ header_str->data);
+ i++;
+ }
+ /* Create a 'name' string and point to it. */
+ header_str->data[i] = '\0';
+ name = header_str->data;
+
+ /* Skip over the NULL byte and the space following it. */
+ i += 2;
+ if (i > header_str->len)
+ return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Dump stream contains a malformed "
+ "header (with no value) at '%.20s'"),
+ header_str->data);
+
+ /* Point to the 'value' string. */
+ value = header_str->data + i;
+
+ /* Store name/value in hash. */
+ svn_hash_sets(*headers, name, value);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM.
+ Also read a newline from STREAM and increase *ACTUAL_LEN by the total
+ number of bytes read from STREAM. */
+static svn_error_t *
+read_key_or_val(char **pbuf,
+ svn_filesize_t *actual_length,
+ svn_stream_t *stream,
+ apr_size_t len,
+ apr_pool_t *pool)
+{
+ char *buf = apr_pcalloc(pool, len + 1);
+ apr_size_t numread;
+ char c;
+
+ numread = len;
+ SVN_ERR(svn_stream_read(stream, buf, &numread));
+ *actual_length += numread;
+ if (numread != len)
+ return svn_error_trace(stream_ran_dry());
+ buf[len] = '\0';
+
+ /* Suck up extra newline after key data */
+ numread = 1;
+ SVN_ERR(svn_stream_read(stream, &c, &numread));
+ *actual_length += numread;
+ if (numread != 1)
+ return svn_error_trace(stream_ran_dry());
+ if (c != '\n')
+ return svn_error_trace(stream_malformed());
+
+ *pbuf = buf;
+ return SVN_NO_ERROR;
+}
+
+
+/* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an
+ encoded Subversion properties hash, and making multiple calls to
+ PARSE_FNS->set_*_property on RECORD_BATON (depending on the value
+ of IS_NODE.)
+
+ Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM.
+ If an error is returned, the value of *ACTUAL_LENGTH is undefined.
+
+ Use POOL for all allocations. */
+static svn_error_t *
+parse_property_block(svn_stream_t *stream,
+ svn_filesize_t content_length,
+ const svn_repos_parse_fns3_t *parse_fns,
+ void *record_baton,
+ void *parse_baton,
+ svn_boolean_t is_node,
+ svn_filesize_t *actual_length,
+ apr_pool_t *pool)
+{
+ svn_stringbuf_t *strbuf;
+ apr_pool_t *proppool = svn_pool_create(pool);
+
+ *actual_length = 0;
+ while (content_length != *actual_length)
+ {
+ char *buf; /* a pointer into the stringbuf's data */
+ svn_boolean_t eof;
+
+ svn_pool_clear(proppool);
+
+ /* Read a key length line. (Actually, it might be PROPS_END). */
+ SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
+
+ if (eof)
+ {
+ /* We could just use stream_ran_dry() or stream_malformed(),
+ but better to give a non-generic property block error. */
+ return svn_error_create
+ (SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Incomplete or unterminated property block"));
+ }
+
+ *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */
+ buf = strbuf->data;
+
+ if (! strcmp(buf, "PROPS-END"))
+ break; /* no more properties. */
+
+ else if ((buf[0] == 'K') && (buf[1] == ' '))
+ {
+ char *keybuf;
+ apr_uint64_t len;
+
+ SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
+ SVN_ERR(read_key_or_val(&keybuf, actual_length,
+ stream, (apr_size_t)len, proppool));
+
+ /* Read a val length line */
+ SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
+ if (eof)
+ return stream_ran_dry();
+
+ *actual_length += (strbuf->len + 1); /* +1 because we read \n too */
+ buf = strbuf->data;
+
+ if ((buf[0] == 'V') && (buf[1] == ' '))
+ {
+ svn_string_t propstring;
+ char *valbuf;
+ apr_int64_t val;
+
+ SVN_ERR(svn_cstring_atoi64(&val, buf + 2));
+ propstring.len = (apr_size_t)val;
+ SVN_ERR(read_key_or_val(&valbuf, actual_length,
+ stream, propstring.len, proppool));
+ propstring.data = valbuf;
+
+ /* Now, send the property pair to the vtable! */
+ if (is_node)
+ {
+ SVN_ERR(parse_fns->set_node_property(record_baton,
+ keybuf,
+ &propstring));
+ }
+ else
+ {
+ SVN_ERR(parse_fns->set_revision_property(record_baton,
+ keybuf,
+ &propstring));
+ }
+ }
+ else
+ return stream_malformed(); /* didn't find expected 'V' line */
+ }
+ else if ((buf[0] == 'D') && (buf[1] == ' '))
+ {
+ char *keybuf;
+ apr_uint64_t len;
+
+ SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
+ SVN_ERR(read_key_or_val(&keybuf, actual_length,
+ stream, (apr_size_t)len, proppool));
+
+ /* We don't expect these in revision properties, and if we see
+ one when we don't have a delete_node_property callback,
+ then we're seeing a v3 feature in a v2 dump. */
+ if (!is_node || !parse_fns->delete_node_property)
+ return stream_malformed();
+
+ SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf));
+ }
+ else
+ return stream_malformed(); /* didn't find expected 'K' line */
+
+ } /* while (1) */
+
+ svn_pool_destroy(proppool);
+ return SVN_NO_ERROR;
+}
+
+
+/* Read CONTENT_LENGTH bytes from STREAM, and use
+ PARSE_FNS->set_fulltext to push those bytes as replace fulltext for
+ a node. Use BUFFER/BUFLEN to push the fulltext in "chunks".
+
+ Use POOL for all allocations. */
+static svn_error_t *
+parse_text_block(svn_stream_t *stream,
+ svn_filesize_t content_length,
+ svn_boolean_t is_delta,
+ const svn_repos_parse_fns3_t *parse_fns,
+ void *record_baton,
+ char *buffer,
+ apr_size_t buflen,
+ apr_pool_t *pool)
+{
+ svn_stream_t *text_stream = NULL;
+ apr_size_t num_to_read, rlen, wlen;
+
+ if (is_delta)
+ {
+ svn_txdelta_window_handler_t wh;
+ void *whb;
+
+ SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton));
+ if (wh)
+ text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool);
+ }
+ else
+ {
+ /* Get a stream to which we can push the data. */
+ SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton));
+ }
+
+ /* If there are no contents to read, just write an empty buffer
+ through our callback. */
+ if (content_length == 0)
+ {
+ wlen = 0;
+ if (text_stream)
+ SVN_ERR(svn_stream_write(text_stream, "", &wlen));
+ }
+
+ /* Regardless of whether or not we have a sink for our data, we
+ need to read it. */
+ while (content_length)
+ {
+ if (content_length >= (svn_filesize_t)buflen)
+ rlen = buflen;
+ else
+ rlen = (apr_size_t) content_length;
+
+ num_to_read = rlen;
+ SVN_ERR(svn_stream_read(stream, buffer, &rlen));
+ content_length -= rlen;
+ if (rlen != num_to_read)
+ return stream_ran_dry();
+
+ if (text_stream)
+ {
+ /* write however many bytes you read. */
+ wlen = rlen;
+ SVN_ERR(svn_stream_write(text_stream, buffer, &wlen));
+ if (wlen != rlen)
+ {
+ /* Uh oh, didn't write as many bytes as we read. */
+ return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL,
+ _("Unexpected EOF writing contents"));
+ }
+ }
+ }
+
+ /* If we opened a stream, we must close it. */
+ if (text_stream)
+ SVN_ERR(svn_stream_close(text_stream));
+
+ return SVN_NO_ERROR;
+}
+
+
+
+/* Parse VERSIONSTRING and verify that we support the dumpfile format
+ version number, setting *VERSION appropriately. */
+static svn_error_t *
+parse_format_version(int *version,
+ const char *versionstring)
+{
+ static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1;
+ const char *p = strchr(versionstring, ':');
+ int value;
+
+ if (p == NULL
+ || p != (versionstring + magic_len)
+ || strncmp(versionstring,
+ SVN_REPOS_DUMPFILE_MAGIC_HEADER,
+ magic_len))
+ return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Malformed dumpfile header '%s'"),
+ versionstring);
+
+ SVN_ERR(svn_cstring_atoi(&value, p + 1));
+
+ if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION)
+ return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Unsupported dumpfile version: %d"),
+ value);
+
+ *version = value;
+ return SVN_NO_ERROR;
+}
+
+
+
+/*----------------------------------------------------------------------*/
+
+/** The public routines **/
+
+svn_error_t *
+svn_repos_parse_dumpstream3(svn_stream_t *stream,
+ const svn_repos_parse_fns3_t *parse_fns,
+ void *parse_baton,
+ svn_boolean_t deltas_are_text,
+ svn_cancel_func_t cancel_func,
+ void *cancel_baton,
+ apr_pool_t *pool)
+{
+ svn_boolean_t eof;
+ svn_stringbuf_t *linebuf;
+ void *rev_baton = NULL;
+ char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
+ apr_size_t buflen = SVN__STREAM_CHUNK_SIZE;
+ apr_pool_t *linepool = svn_pool_create(pool);
+ apr_pool_t *revpool = svn_pool_create(pool);
+ apr_pool_t *nodepool = svn_pool_create(pool);
+ int version;
+
+ SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
+ if (eof)
+ return stream_ran_dry();
+
+ /* The first two lines of the stream are the dumpfile-format version
+ number, and a blank line. To preserve backward compatibility,
+ don't assume the existence of newer parser-vtable functions. */
+ SVN_ERR(parse_format_version(&version, linebuf->data));
+ if (parse_fns->magic_header_record != NULL)
+ SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool));
+
+ /* A dumpfile "record" is defined to be a header-block of
+ rfc822-style headers, possibly followed by a content-block.
+
+ - A header-block is always terminated by a single blank line (\n\n)
+
+ - We know whether the record has a content-block by looking for
+ a 'Content-length:' header. The content-block will always be
+ of a specific length, plus an extra newline.
+
+ Once a record is fully sucked from the stream, an indeterminate
+ number of blank lines (or lines that begin with whitespace) may
+ follow before the next record (or the end of the stream.)
+ */
+
+ while (1)
+ {
+ apr_hash_t *headers;
+ void *node_baton;
+ svn_boolean_t found_node = FALSE;
+ svn_boolean_t old_v1_with_cl = FALSE;
+ const char *content_length;
+ const char *prop_cl;
+ const char *text_cl;
+ const char *value;
+ svn_filesize_t actual_prop_length;
+
+ /* Clear our per-line pool. */
+ svn_pool_clear(linepool);
+
+ /* Check for cancellation. */
+ if (cancel_func)
+ SVN_ERR(cancel_func(cancel_baton));
+
+ /* Keep reading blank lines until we discover a new record, or until
+ the stream runs out. */
+ SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
+
+ if (eof)
+ {
+ if (svn_stringbuf_isempty(linebuf))
+ break; /* end of stream, go home. */
+ else
+ return stream_ran_dry();
+ }
+
+ if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0])))
+ continue; /* empty line ... loop */
+
+ /*** Found the beginning of a new record. ***/
+
+ /* The last line we read better be a header of some sort.
+ Read the whole header-block into a hash. */
+ SVN_ERR(read_header_block(stream, linebuf, &headers, linepool));
+
+ /*** Handle the various header blocks. ***/
+
+ /* Is this a revision record? */
+ if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER))
+ {
+ /* If we already have a rev_baton open, we need to close it
+ and clear the per-revision subpool. */
+ if (rev_baton != NULL)
+ {
+ SVN_ERR(parse_fns->close_revision(rev_baton));
+ svn_pool_clear(revpool);
+ }
+
+ SVN_ERR(parse_fns->new_revision_record(&rev_baton,
+ headers, parse_baton,
+ revpool));
+ }
+ /* Or is this, perhaps, a node record? */
+ else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH))
+ {
+ SVN_ERR(parse_fns->new_node_record(&node_baton,
+ headers,
+ rev_baton,
+ nodepool));
+ found_node = TRUE;
+ }
+ /* Or is this the repos UUID? */
+ else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID)))
+ {
+ SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool));
+ }
+ /* Or perhaps a dumpfile format? */
+ /* ### TODO: use parse_format_version */
+ else if ((value = svn_hash_gets(headers,
+ SVN_REPOS_DUMPFILE_MAGIC_HEADER)))
+ {
+ /* ### someday, switch modes of operation here. */
+ SVN_ERR(svn_cstring_atoi(&version, value));
+ }
+ /* Or is this bogosity?! */
+ else
+ {
+ /* What the heck is this record?!? */
+ return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Unrecognized record type in stream"));
+ }
+
+ /* Need 3 values below to determine v1 dump type
+
+ Old (pre 0.14?) v1 dumps don't have Prop-content-length
+ and Text-content-length fields, but always have a properties
+ block in a block with Content-Length > 0 */
+
+ content_length = svn_hash_gets(headers,
+ SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
+ prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
+ text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
+ old_v1_with_cl =
+ version == 1 && content_length && ! prop_cl && ! text_cl;
+
+ /* Is there a props content-block to parse? */
+ if (prop_cl || old_v1_with_cl)
+ {
+ const char *delta = svn_hash_gets(headers,
+ SVN_REPOS_DUMPFILE_PROP_DELTA);
+ svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0);
+
+ /* First, remove all node properties, unless this is a delta
+ property block. */
+ if (found_node && !is_delta)
+ SVN_ERR(parse_fns->remove_node_props(node_baton));
+
+ SVN_ERR(parse_property_block
+ (stream,
+ svn__atoui64(prop_cl ? prop_cl : content_length),
+ parse_fns,
+ found_node ? node_baton : rev_baton,
+ parse_baton,
+ found_node,
+ &actual_prop_length,
+ found_node ? nodepool : revpool));
+ }
+
+ /* Is there a text content-block to parse? */
+ if (text_cl)
+ {
+ const char *delta = svn_hash_gets(headers,
+ SVN_REPOS_DUMPFILE_TEXT_DELTA);
+ svn_boolean_t is_delta = FALSE;
+ if (! deltas_are_text)
+ is_delta = (delta && strcmp(delta, "true") == 0);
+
+ SVN_ERR(parse_text_block(stream,
+ svn__atoui64(text_cl),
+ is_delta,
+ parse_fns,
+ found_node ? node_baton : rev_baton,
+ buffer,
+ buflen,
+ found_node ? nodepool : revpool));
+ }
+ else if (old_v1_with_cl)
+ {
+ /* An old-v1 block with a Content-length might have a text block.
+ If the property block did not consume all the bytes of the
+ Content-length, then it clearly does have a text block.
+ If not, then we must deduce whether we have an *empty* text
+ block or an *absent* text block. The rules are:
+ - "Node-kind: file" blocks have an empty (i.e. present, but
+ zero-length) text block, since they represent a file
+ modification. Note that file-copied-text-unmodified blocks
+ have no Content-length - even if they should have contained
+ a modified property block, the pre-0.14 dumper forgets to
+ dump the modified properties.
+ - If it is not a file node, then it is a revision or directory,
+ and so has an absent text block.
+ */
+ const char *node_kind;
+ svn_filesize_t cl_value = svn__atoui64(content_length)
+ - actual_prop_length;
+
+ if (cl_value ||
+ ((node_kind = svn_hash_gets(headers,
+ SVN_REPOS_DUMPFILE_NODE_KIND))
+ && strcmp(node_kind, "file") == 0)
+ )
+ SVN_ERR(parse_text_block(stream,
+ cl_value,
+ FALSE,
+ parse_fns,
+ found_node ? node_baton : rev_baton,
+ buffer,
+ buflen,
+ found_node ? nodepool : revpool));
+ }
+
+ /* if we have a content-length header, did we read all of it?
+ in case of an old v1, we *always* read all of it, because
+ text-content-length == content-length - prop-content-length
+ */
+ if (content_length && ! old_v1_with_cl)
+ {
+ apr_size_t rlen, num_to_read;
+ svn_filesize_t remaining =
+ svn__atoui64(content_length) -
+ (prop_cl ? svn__atoui64(prop_cl) : 0) -
+ (text_cl ? svn__atoui64(text_cl) : 0);
+
+
+ if (remaining < 0)
+ return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
+ _("Sum of subblock sizes larger than "
+ "total block content length"));
+
+ /* Consume remaining bytes in this content block */
+ while (remaining > 0)
+ {
+ if (remaining >= (svn_filesize_t)buflen)
+ rlen = buflen;
+ else
+ rlen = (apr_size_t) remaining;
+
+ num_to_read = rlen;
+ SVN_ERR(svn_stream_read(stream, buffer, &rlen));
+ remaining -= rlen;
+ if (rlen != num_to_read)
+ return stream_ran_dry();
+ }
+ }
+
+ /* If we just finished processing a node record, we need to
+ close the node record and clear the per-node subpool. */
+ if (found_node)
+ {
+ SVN_ERR(parse_fns->close_node(node_baton));
+ svn_pool_clear(nodepool);
+ }
+
+ /*** End of processing for one record. ***/
+
+ } /* end of stream */
+
+ /* Close out whatever revision we're in. */
+ if (rev_baton != NULL)
+ SVN_ERR(parse_fns->close_revision(rev_baton));
+
+ svn_pool_destroy(linepool);
+ svn_pool_destroy(revpool);
+ svn_pool_destroy(nodepool);
+ return SVN_NO_ERROR;
+}
OpenPOWER on IntegriCloud