summaryrefslogtreecommitdiffstats
path: root/subversion/libsvn_subr/hash.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_subr/hash.c')
-rw-r--r--subversion/libsvn_subr/hash.c642
1 files changed, 642 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/hash.c b/subversion/libsvn_subr/hash.c
new file mode 100644
index 0000000..7868cac
--- /dev/null
+++ b/subversion/libsvn_subr/hash.c
@@ -0,0 +1,642 @@
+/*
+ * hash.c : dumping and reading hash tables to/from files.
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+
+#include <stdlib.h>
+#include <limits.h>
+
+#include <apr_version.h>
+#include <apr_pools.h>
+#include <apr_hash.h>
+#include <apr_file_io.h>
+
+#include "svn_types.h"
+#include "svn_string.h"
+#include "svn_error.h"
+#include "svn_hash.h"
+#include "svn_sorts.h"
+#include "svn_io.h"
+#include "svn_pools.h"
+
+#include "private/svn_dep_compat.h"
+#include "private/svn_subr_private.h"
+
+#include "svn_private_config.h"
+
+
+
+
+/*
+ * The format of a dumped hash table is:
+ *
+ * K <nlength>
+ * name (a string of <nlength> bytes, followed by a newline)
+ * V <vlength>
+ * val (a string of <vlength> bytes, followed by a newline)
+ * [... etc, etc ...]
+ * END
+ *
+ *
+ * (Yes, there is a newline after END.)
+ *
+ * For example:
+ *
+ * K 5
+ * color
+ * V 3
+ * red
+ * K 11
+ * wine review
+ * V 376
+ * A forthright entrance, yet coquettish on the tongue, its deceptively
+ * fruity exterior hides the warm mahagony undercurrent that is the
+ * hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will
+ * be pleased to note the familiar, subtle hints of mulberries and
+ * carburator fluid. Its confident finish is marred only by a barely
+ * detectable suggestion of rancid squid ink.
+ * K 5
+ * price
+ * V 8
+ * US $6.50
+ * END
+ *
+ */
+
+
+
+
+/*** Dumping and loading hash files. */
+
+/* Implements svn_hash_read2 and svn_hash_read_incremental. */
+static svn_error_t *
+hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator,
+ svn_boolean_t incremental, apr_pool_t *pool)
+{
+ svn_stringbuf_t *buf;
+ svn_boolean_t eof;
+ apr_size_t len, keylen, vallen;
+ char c, *keybuf, *valbuf;
+ apr_pool_t *iterpool = svn_pool_create(pool);
+
+ while (1)
+ {
+ svn_error_t *err;
+ apr_uint64_t ui64;
+
+ svn_pool_clear(iterpool);
+
+ /* Read a key length line. Might be END, though. */
+ SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool));
+
+ /* Check for the end of the hash. */
+ if ((!terminator && eof && buf->len == 0)
+ || (terminator && (strcmp(buf->data, terminator) == 0)))
+ break;
+
+ /* Check for unexpected end of stream */
+ if (eof)
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
+ _("Serialized hash missing terminator"));
+
+ if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' '))
+ {
+ /* Get the length of the key */
+ err = svn_cstring_strtoui64(&ui64, buf->data + 2,
+ 0, APR_SIZE_MAX, 10);
+ if (err)
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
+ _("Serialized hash malformed"));
+ keylen = (apr_size_t)ui64;
+
+ /* Now read that much into a buffer. */
+ keybuf = apr_palloc(pool, keylen + 1);
+ SVN_ERR(svn_stream_read(stream, keybuf, &keylen));
+ keybuf[keylen] = '\0';
+
+ /* Suck up extra newline after key data */
+ len = 1;
+ SVN_ERR(svn_stream_read(stream, &c, &len));
+ if (c != '\n')
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
+ _("Serialized hash malformed"));
+
+ /* Read a val length line */
+ SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool));
+
+ if ((buf->data[0] == 'V') && (buf->data[1] == ' '))
+ {
+ err = svn_cstring_strtoui64(&ui64, buf->data + 2,
+ 0, APR_SIZE_MAX, 10);
+ if (err)
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
+ _("Serialized hash malformed"));
+ vallen = (apr_size_t)ui64;
+
+ valbuf = apr_palloc(iterpool, vallen + 1);
+ SVN_ERR(svn_stream_read(stream, valbuf, &vallen));
+ valbuf[vallen] = '\0';
+
+ /* Suck up extra newline after val data */
+ len = 1;
+ SVN_ERR(svn_stream_read(stream, &c, &len));
+ if (c != '\n')
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
+ _("Serialized hash malformed"));
+
+ /* Add a new hash entry. */
+ apr_hash_set(hash, keybuf, keylen,
+ svn_string_ncreate(valbuf, vallen, pool));
+ }
+ else
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
+ _("Serialized hash malformed"));
+ }
+ else if (incremental && (buf->len >= 3)
+ && (buf->data[0] == 'D') && (buf->data[1] == ' '))
+ {
+ /* Get the length of the key */
+ err = svn_cstring_strtoui64(&ui64, buf->data + 2,
+ 0, APR_SIZE_MAX, 10);
+ if (err)
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
+ _("Serialized hash malformed"));
+ keylen = (apr_size_t)ui64;
+
+ /* Now read that much into a buffer. */
+ keybuf = apr_palloc(iterpool, keylen + 1);
+ SVN_ERR(svn_stream_read(stream, keybuf, &keylen));
+ keybuf[keylen] = '\0';
+
+ /* Suck up extra newline after key data */
+ len = 1;
+ SVN_ERR(svn_stream_read(stream, &c, &len));
+ if (c != '\n')
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
+ _("Serialized hash malformed"));
+
+ /* Remove this hash entry. */
+ apr_hash_set(hash, keybuf, keylen, NULL);
+ }
+ else
+ {
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
+ _("Serialized hash malformed"));
+ }
+ }
+
+ svn_pool_destroy(iterpool);
+ return SVN_NO_ERROR;
+}
+
+
+/* Implements svn_hash_write2 and svn_hash_write_incremental. */
+static svn_error_t *
+hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream,
+ const char *terminator, apr_pool_t *pool)
+{
+ apr_pool_t *subpool;
+ apr_size_t len;
+ apr_array_header_t *list;
+ int i;
+
+ subpool = svn_pool_create(pool);
+
+ list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool);
+ for (i = 0; i < list->nelts; i++)
+ {
+ svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
+ svn_string_t *valstr = item->value;
+
+ svn_pool_clear(subpool);
+
+ /* Don't output entries equal to the ones in oldhash, if present. */
+ if (oldhash)
+ {
+ svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen);
+
+ if (oldstr && svn_string_compare(valstr, oldstr))
+ continue;
+ }
+
+ if (item->klen < 0)
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
+ _("Cannot serialize negative length"));
+
+ /* Write it out. */
+ SVN_ERR(svn_stream_printf(stream, subpool,
+ "K %" APR_SIZE_T_FMT "\n%s\n"
+ "V %" APR_SIZE_T_FMT "\n",
+ (apr_size_t) item->klen,
+ (const char *) item->key,
+ valstr->len));
+ len = valstr->len;
+ SVN_ERR(svn_stream_write(stream, valstr->data, &len));
+ SVN_ERR(svn_stream_puts(stream, "\n"));
+ }
+
+ if (oldhash)
+ {
+ /* Output a deletion entry for each property in oldhash but not hash. */
+ list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically,
+ pool);
+ for (i = 0; i < list->nelts; i++)
+ {
+ svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
+
+ svn_pool_clear(subpool);
+
+ /* If it's not present in the new hash, write out a D entry. */
+ if (! apr_hash_get(hash, item->key, item->klen))
+ SVN_ERR(svn_stream_printf(stream, subpool,
+ "D %" APR_SSIZE_T_FMT "\n%s\n",
+ item->klen, (const char *) item->key));
+ }
+ }
+
+ if (terminator)
+ SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator));
+
+ svn_pool_destroy(subpool);
+ return SVN_NO_ERROR;
+}
+
+
+svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream,
+ const char *terminator, apr_pool_t *pool)
+{
+ return hash_read(hash, stream, terminator, FALSE, pool);
+}
+
+
+svn_error_t *svn_hash_read_incremental(apr_hash_t *hash,
+ svn_stream_t *stream,
+ const char *terminator,
+ apr_pool_t *pool)
+{
+ return hash_read(hash, stream, terminator, TRUE, pool);
+}
+
+
+svn_error_t *
+svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream,
+ const char *terminator, apr_pool_t *pool)
+{
+ return hash_write(hash, NULL, stream, terminator, pool);
+}
+
+
+svn_error_t *
+svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash,
+ svn_stream_t *stream, const char *terminator,
+ apr_pool_t *pool)
+{
+ SVN_ERR_ASSERT(oldhash != NULL);
+ return hash_write(hash, oldhash, stream, terminator, pool);
+}
+
+
+svn_error_t *
+svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool)
+{
+ return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool),
+ SVN_HASH_TERMINATOR, pool);
+}
+
+
+/* There are enough quirks in the deprecated svn_hash_read that we
+ should just preserve its implementation. */
+svn_error_t *
+svn_hash_read(apr_hash_t *hash,
+ apr_file_t *srcfile,
+ apr_pool_t *pool)
+{
+ svn_error_t *err;
+ char buf[SVN_KEYLINE_MAXLEN];
+ apr_size_t num_read;
+ char c;
+ int first_time = 1;
+
+
+ while (1)
+ {
+ /* Read a key length line. Might be END, though. */
+ apr_size_t len = sizeof(buf);
+
+ err = svn_io_read_length_line(srcfile, buf, &len, pool);
+ if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time)
+ {
+ /* We got an EOF on our very first attempt to read, which
+ means it's a zero-byte file. No problem, just go home. */
+ svn_error_clear(err);
+ return SVN_NO_ERROR;
+ }
+ else if (err)
+ /* Any other circumstance is a genuine error. */
+ return err;
+
+ first_time = 0;
+
+ if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D'))
+ || ((len == 9)
+ && (buf[0] == 'P')
+ && (buf[1] == 'R') /* We formerly used just "END" to */
+ && (buf[2] == 'O') /* end a property hash, but later */
+ && (buf[3] == 'P') /* we added "PROPS-END", so that */
+ && (buf[4] == 'S') /* the fs dump format would be */
+ && (buf[5] == '-') /* more human-readable. That's */
+ && (buf[6] == 'E') /* why we accept either way here. */
+ && (buf[7] == 'N')
+ && (buf[8] == 'D')))
+ {
+ /* We've reached the end of the dumped hash table, so leave. */
+ return SVN_NO_ERROR;
+ }
+ else if ((buf[0] == 'K') && (buf[1] == ' '))
+ {
+ size_t keylen;
+ int parsed_len;
+ void *keybuf;
+
+ /* Get the length of the key */
+ SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
+ keylen = parsed_len;
+
+ /* Now read that much into a buffer, + 1 byte for null terminator */
+ keybuf = apr_palloc(pool, keylen + 1);
+ SVN_ERR(svn_io_file_read_full2(srcfile,
+ keybuf, keylen,
+ &num_read, NULL, pool));
+ ((char *) keybuf)[keylen] = '\0';
+
+ /* Suck up extra newline after key data */
+ SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
+ if (c != '\n')
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
+
+ /* Read a val length line */
+ len = sizeof(buf);
+ SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool));
+
+ if ((buf[0] == 'V') && (buf[1] == ' '))
+ {
+ svn_string_t *value = apr_palloc(pool, sizeof(*value));
+ apr_size_t vallen;
+ void *valbuf;
+
+ /* Get the length of the value */
+ SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
+ vallen = parsed_len;
+
+ /* Again, 1 extra byte for the null termination. */
+ valbuf = apr_palloc(pool, vallen + 1);
+ SVN_ERR(svn_io_file_read_full2(srcfile,
+ valbuf, vallen,
+ &num_read, NULL, pool));
+ ((char *) valbuf)[vallen] = '\0';
+
+ /* Suck up extra newline after val data */
+ SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
+ if (c != '\n')
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
+
+ value->data = valbuf;
+ value->len = vallen;
+
+ /* The Grand Moment: add a new hash entry! */
+ apr_hash_set(hash, keybuf, keylen, value);
+ }
+ else
+ {
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
+ }
+ }
+ else
+ {
+ return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
+ }
+ } /* while (1) */
+}
+
+
+
+/*** Diffing hashes ***/
+
+svn_error_t *
+svn_hash_diff(apr_hash_t *hash_a,
+ apr_hash_t *hash_b,
+ svn_hash_diff_func_t diff_func,
+ void *diff_func_baton,
+ apr_pool_t *pool)
+{
+ apr_hash_index_t *hi;
+
+ if (hash_a)
+ for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi))
+ {
+ const void *key;
+ apr_ssize_t klen;
+
+ apr_hash_this(hi, &key, &klen, NULL);
+
+ if (hash_b && (apr_hash_get(hash_b, key, klen)))
+ SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both,
+ diff_func_baton));
+ else
+ SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a,
+ diff_func_baton));
+ }
+
+ if (hash_b)
+ for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi))
+ {
+ const void *key;
+ apr_ssize_t klen;
+
+ apr_hash_this(hi, &key, &klen, NULL);
+
+ if (! (hash_a && apr_hash_get(hash_a, key, klen)))
+ SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b,
+ diff_func_baton));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+/*** Misc. hash APIs ***/
+
+svn_error_t *
+svn_hash_keys(apr_array_header_t **array,
+ apr_hash_t *hash,
+ apr_pool_t *pool)
+{
+ apr_hash_index_t *hi;
+
+ *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *));
+
+ for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
+ {
+ APR_ARRAY_PUSH(*array, const char *) = svn__apr_hash_index_key(hi);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+svn_error_t *
+svn_hash_from_cstring_keys(apr_hash_t **hash_p,
+ const apr_array_header_t *keys,
+ apr_pool_t *pool)
+{
+ int i;
+ apr_hash_t *hash = svn_hash__make(pool);
+ for (i = 0; i < keys->nelts; i++)
+ {
+ const char *key =
+ apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *));
+ svn_hash_sets(hash, key, key);
+ }
+ *hash_p = hash;
+ return SVN_NO_ERROR;
+}
+
+
+#if !APR_VERSION_AT_LEAST(1, 3, 0)
+void
+svn_hash__clear(apr_hash_t *hash)
+{
+ apr_hash_index_t *hi;
+ const void *key;
+ apr_ssize_t klen;
+
+ for (hi = apr_hash_first(NULL, hash); hi; hi = apr_hash_next(hi))
+ {
+ apr_hash_this(hi, &key, &klen, NULL);
+ apr_hash_set(hash, key, klen, NULL);
+ }
+}
+#endif
+
+
+
+/*** Specialized getter APIs ***/
+
+const char *
+svn_hash__get_cstring(apr_hash_t *hash,
+ const char *key,
+ const char *default_value)
+{
+ if (hash)
+ {
+ const char *value = svn_hash_gets(hash, key);
+ return value ? value : default_value;
+ }
+
+ return default_value;
+}
+
+
+svn_boolean_t
+svn_hash__get_bool(apr_hash_t *hash, const char *key,
+ svn_boolean_t default_value)
+{
+ const char *tmp_value = svn_hash__get_cstring(hash, key, NULL);
+ svn_tristate_t value = svn_tristate__from_word(tmp_value);
+
+ if (value == svn_tristate_true)
+ return TRUE;
+ else if (value == svn_tristate_false)
+ return FALSE;
+
+ return default_value;
+}
+
+
+
+/*** Optimized hash function ***/
+
+/* Optimized version of apr_hashfunc_default in APR 1.4.5 and earlier.
+ * It assumes that the CPU has 32-bit multiplications with high throughput
+ * of at least 1 operation every 3 cycles. Latency is not an issue. Another
+ * optimization is a mildly unrolled main loop and breaking the dependency
+ * chain within the loop.
+ *
+ * Note that most CPUs including Intel Atom, VIA Nano, ARM feature the
+ * assumed pipelined multiplication circuitry. They can do one MUL every
+ * or every other cycle.
+ *
+ * The performance is ultimately limited by the fact that most CPUs can
+ * do only one LOAD and only one BRANCH operation per cycle. The best we
+ * can do is to process one character per cycle - provided the processor
+ * is wide enough to do 1 LOAD, COMPARE, BRANCH, MUL and ADD per cycle.
+ */
+static unsigned int
+hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
+{
+ unsigned int hash = 0;
+ const unsigned char *key = (const unsigned char *)char_key;
+ const unsigned char *p;
+ apr_ssize_t i;
+
+ if (*klen == APR_HASH_KEY_STRING)
+ {
+ for (p = key; ; p+=4)
+ {
+ unsigned int new_hash = hash * 33 * 33 * 33 * 33;
+ if (!p[0]) break;
+ new_hash += p[0] * 33 * 33 * 33;
+ if (!p[1]) break;
+ new_hash += p[1] * 33 * 33;
+ if (!p[2]) break;
+ new_hash += p[2] * 33;
+ if (!p[3]) break;
+ hash = new_hash + p[3];
+ }
+ for (; *p; p++)
+ hash = hash * 33 + *p;
+
+ *klen = p - key;
+ }
+ else
+ {
+ for (p = key, i = *klen; i >= 4; i-=4, p+=4)
+ {
+ hash = hash * 33 * 33 * 33 * 33
+ + p[0] * 33 * 33 * 33
+ + p[1] * 33 * 33
+ + p[2] * 33
+ + p[3];
+ }
+ for (; i; i--, p++)
+ hash = hash * 33 + *p;
+ }
+
+ return hash;
+}
+
+apr_hash_t *
+svn_hash__make(apr_pool_t *pool)
+{
+ return apr_hash_make_custom(pool, hashfunc_compatible);
+}
OpenPOWER on IntegriCloud