diff options
Diffstat (limited to 'subversion/libsvn_subr/utf.c')
-rw-r--r-- | subversion/libsvn_subr/utf.c | 1075 |
1 files changed, 1075 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/utf.c b/subversion/libsvn_subr/utf.c new file mode 100644 index 0000000..355e068 --- /dev/null +++ b/subversion/libsvn_subr/utf.c @@ -0,0 +1,1075 @@ +/* + * utf.c: UTF-8 conversion routines + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + + +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <apr_strings.h> +#include <apr_lib.h> +#include <apr_xlate.h> +#include <apr_atomic.h> + +#include "svn_hash.h" +#include "svn_string.h" +#include "svn_error.h" +#include "svn_pools.h" +#include "svn_ctype.h" +#include "svn_utf.h" +#include "svn_private_config.h" +#include "win32_xlate.h" + +#include "private/svn_utf_private.h" +#include "private/svn_dep_compat.h" +#include "private/svn_string_private.h" +#include "private/svn_mutex.h" + + + +/* Use these static strings to maximize performance on standard conversions. + * Any strings on other locations are still valid, however. + */ +static const char *SVN_UTF_NTOU_XLATE_HANDLE = "svn-utf-ntou-xlate-handle"; +static const char *SVN_UTF_UTON_XLATE_HANDLE = "svn-utf-uton-xlate-handle"; + +static const char *SVN_APR_UTF8_CHARSET = "UTF-8"; + +static svn_mutex__t *xlate_handle_mutex = NULL; +static svn_boolean_t assume_native_charset_is_utf8 = FALSE; + +/* The xlate handle cache is a global hash table with linked lists of xlate + * handles. In multi-threaded environments, a thread "borrows" an xlate + * handle from the cache during a translation and puts it back afterwards. + * This avoids holding a global lock for all translations. + * If there is no handle for a particular key when needed, a new is + * handle is created and put in the cache after use. + * This means that there will be at most N handles open for a key, where N + * is the number of simultanous handles in use for that key. */ + +typedef struct xlate_handle_node_t { + apr_xlate_t *handle; + /* FALSE if the handle is not valid, since its pool is being + destroyed. */ + svn_boolean_t valid; + /* The name of a char encoding or APR_LOCALE_CHARSET. */ + const char *frompage, *topage; + struct xlate_handle_node_t *next; +} xlate_handle_node_t; + +/* This maps const char * userdata_key strings to xlate_handle_node_t ** + handles to the first entry in the linked list of xlate handles. We don't + store the pointer to the list head directly in the hash table, since we + remove/insert entries at the head in the list in the code below, and + we can't use apr_hash_set() in each character translation because that + function allocates memory in each call where the value is non-NULL. + Since these allocations take place in a global pool, this would be a + memory leak. */ +static apr_hash_t *xlate_handle_hash = NULL; + +/* "1st level cache" to standard conversion maps. We may access these + * using atomic xchange ops, i.e. without further thread synchronization. + * If the respective item is NULL, fallback to hash lookup. + */ +static void * volatile xlat_ntou_static_handle = NULL; +static void * volatile xlat_uton_static_handle = NULL; + +/* Clean up the xlate handle cache. */ +static apr_status_t +xlate_cleanup(void *arg) +{ + /* We set the cache variables to NULL so that translation works in other + cleanup functions, even if it isn't cached then. */ + xlate_handle_hash = NULL; + + /* ensure no stale objects get accessed */ + xlat_ntou_static_handle = NULL; + xlat_uton_static_handle = NULL; + + return APR_SUCCESS; +} + +/* Set the handle of ARG to NULL. */ +static apr_status_t +xlate_handle_node_cleanup(void *arg) +{ + xlate_handle_node_t *node = arg; + + node->valid = FALSE; + return APR_SUCCESS; +} + +void +svn_utf_initialize2(svn_boolean_t assume_native_utf8, + apr_pool_t *pool) +{ + if (!xlate_handle_hash) + { + /* We create our own subpool, which we protect with the mutex. + We can't use the pool passed to us by the caller, since we will + use it for xlate handle allocations, possibly in multiple threads, + and pool allocation is not thread-safe. */ + apr_pool_t *subpool = svn_pool_create(pool); + svn_mutex__t *mutex; + svn_error_t *err = svn_mutex__init(&mutex, TRUE, subpool); + if (err) + { + svn_error_clear(err); + return; + } + + xlate_handle_mutex = mutex; + xlate_handle_hash = apr_hash_make(subpool); + + apr_pool_cleanup_register(subpool, NULL, xlate_cleanup, + apr_pool_cleanup_null); + } + + if (!assume_native_charset_is_utf8) + assume_native_charset_is_utf8 = assume_native_utf8; +} + +/* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and + * FROMPAGE can be any valid arguments of the same name to + * apr_xlate_open(). Allocate the returned string in POOL. */ +static const char* +get_xlate_key(const char *topage, + const char *frompage, + apr_pool_t *pool) +{ + /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET + * topage/frompage is really an int, not a valid string. So generate a + * unique key accordingly. */ + if (frompage == SVN_APR_LOCALE_CHARSET) + frompage = "APR_LOCALE_CHARSET"; + else if (frompage == SVN_APR_DEFAULT_CHARSET) + frompage = "APR_DEFAULT_CHARSET"; + + if (topage == SVN_APR_LOCALE_CHARSET) + topage = "APR_LOCALE_CHARSET"; + else if (topage == SVN_APR_DEFAULT_CHARSET) + topage = "APR_DEFAULT_CHARSET"; + + return apr_pstrcat(pool, "svn-utf-", frompage, "to", topage, + "-xlate-handle", (char *)NULL); +} + +/* Atomically replace the content in *MEM with NEW_VALUE and return + * the previous content of *MEM. If atomicy cannot be guaranteed, + * *MEM will not be modified and NEW_VALUE is simply returned to + * the caller. + */ +static APR_INLINE void* +atomic_swap(void * volatile * mem, void *new_value) +{ +#if APR_HAS_THREADS +#if APR_VERSION_AT_LEAST(1,3,0) + /* Cast is necessary because of APR bug: + https://issues.apache.org/bugzilla/show_bug.cgi?id=50731 */ + return apr_atomic_xchgptr((volatile void **)mem, new_value); +#else + /* old APRs don't support atomic swaps. Simply return the + * input to the caller for further proccessing. */ + return new_value; +#endif +#else + /* no threads - no sync. necessary */ + void *old_value = (void*)*mem; + *mem = new_value; + return old_value; +#endif +} + +/* Set *RET to a newly created handle node for converting from FROMPAGE + to TOPAGE, If apr_xlate_open() returns APR_EINVAL or APR_ENOTIMPL, set + (*RET)->handle to NULL. If fail for any other reason, return the error. + Allocate *RET and its xlate handle in POOL. */ +static svn_error_t * +xlate_alloc_handle(xlate_handle_node_t **ret, + const char *topage, const char *frompage, + apr_pool_t *pool) +{ + apr_status_t apr_err; + apr_xlate_t *handle; + + /* The error handling doesn't support the following cases, since we don't + use them currently. Catch this here. */ + SVN_ERR_ASSERT(frompage != SVN_APR_DEFAULT_CHARSET + && topage != SVN_APR_DEFAULT_CHARSET + && (frompage != SVN_APR_LOCALE_CHARSET + || topage != SVN_APR_LOCALE_CHARSET)); + + /* Try to create a handle. */ +#if defined(WIN32) + apr_err = svn_subr__win32_xlate_open((win32_xlate_t **)&handle, topage, + frompage, pool); +#else + apr_err = apr_xlate_open(&handle, topage, frompage, pool); +#endif + + if (APR_STATUS_IS_EINVAL(apr_err) || APR_STATUS_IS_ENOTIMPL(apr_err)) + handle = NULL; + else if (apr_err != APR_SUCCESS) + { + const char *errstr; + /* Can't use svn_error_wrap_apr here because it calls functions in + this file, leading to infinite recursion. */ + if (frompage == SVN_APR_LOCALE_CHARSET) + errstr = apr_psprintf(pool, + _("Can't create a character converter from " + "native encoding to '%s'"), topage); + else if (topage == SVN_APR_LOCALE_CHARSET) + errstr = apr_psprintf(pool, + _("Can't create a character converter from " + "'%s' to native encoding"), frompage); + else + errstr = apr_psprintf(pool, + _("Can't create a character converter from " + "'%s' to '%s'"), frompage, topage); + + return svn_error_create(apr_err, NULL, errstr); + } + + /* Allocate and initialize the node. */ + *ret = apr_palloc(pool, sizeof(xlate_handle_node_t)); + (*ret)->handle = handle; + (*ret)->valid = TRUE; + (*ret)->frompage = ((frompage != SVN_APR_LOCALE_CHARSET) + ? apr_pstrdup(pool, frompage) : frompage); + (*ret)->topage = ((topage != SVN_APR_LOCALE_CHARSET) + ? apr_pstrdup(pool, topage) : topage); + (*ret)->next = NULL; + + /* If we are called from inside a pool cleanup handler, the just created + xlate handle will be closed when that handler returns by a newly + registered cleanup handler, however, the handle is still cached by us. + To prevent this, we register a cleanup handler that will reset the valid + flag of our node, so we don't use an invalid handle. */ + if (handle) + apr_pool_cleanup_register(pool, *ret, xlate_handle_node_cleanup, + apr_pool_cleanup_null); + + return SVN_NO_ERROR; +} + +/* Extend xlate_alloc_handle by using USERDATA_KEY as a key in our + global hash map, if available. + + Allocate *RET and its xlate handle in POOL if svn_utf_initialize() + hasn't been called or USERDATA_KEY is NULL. Else, allocate them + in the pool of xlate_handle_hash. + + Note: this function is not thread-safe. Call get_xlate_handle_node + instead. */ +static svn_error_t * +get_xlate_handle_node_internal(xlate_handle_node_t **ret, + const char *topage, const char *frompage, + const char *userdata_key, apr_pool_t *pool) +{ + /* If we already have a handle, just return it. */ + if (userdata_key && xlate_handle_hash) + { + xlate_handle_node_t *old_node = NULL; + + /* 2nd level: hash lookup */ + xlate_handle_node_t **old_node_p = svn_hash_gets(xlate_handle_hash, + userdata_key); + if (old_node_p) + old_node = *old_node_p; + if (old_node) + { + /* Ensure that the handle is still valid. */ + if (old_node->valid) + { + /* Remove from the list. */ + *old_node_p = old_node->next; + old_node->next = NULL; + *ret = old_node; + return SVN_NO_ERROR; + } + } + } + + /* Note that we still have the mutex locked (if it is initialized), so we + can use the global pool for creating the new xlate handle. */ + + /* Use the correct pool for creating the handle. */ + pool = apr_hash_pool_get(xlate_handle_hash); + + return xlate_alloc_handle(ret, topage, frompage, pool); +} + +/* Set *RET to a handle node for converting from FROMPAGE to TOPAGE, + creating the handle node if it doesn't exist in USERDATA_KEY. + If a node is not cached and apr_xlate_open() returns APR_EINVAL or + APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other + reason, return the error. + + Allocate *RET and its xlate handle in POOL if svn_utf_initialize() + hasn't been called or USERDATA_KEY is NULL. Else, allocate them + in the pool of xlate_handle_hash. */ +static svn_error_t * +get_xlate_handle_node(xlate_handle_node_t **ret, + const char *topage, const char *frompage, + const char *userdata_key, apr_pool_t *pool) +{ + xlate_handle_node_t *old_node = NULL; + + /* If we already have a handle, just return it. */ + if (userdata_key) + { + if (xlate_handle_hash) + { + /* 1st level: global, static items */ + if (userdata_key == SVN_UTF_NTOU_XLATE_HANDLE) + old_node = atomic_swap(&xlat_ntou_static_handle, NULL); + else if (userdata_key == SVN_UTF_UTON_XLATE_HANDLE) + old_node = atomic_swap(&xlat_uton_static_handle, NULL); + + if (old_node && old_node->valid) + { + *ret = old_node; + return SVN_NO_ERROR; + } + } + else + { + void *p; + /* We fall back on a per-pool cache instead. */ + apr_pool_userdata_get(&p, userdata_key, pool); + old_node = p; + /* Ensure that the handle is still valid. */ + if (old_node && old_node->valid) + { + *ret = old_node; + return SVN_NO_ERROR; + } + + return xlate_alloc_handle(ret, topage, frompage, pool); + } + } + + SVN_MUTEX__WITH_LOCK(xlate_handle_mutex, + get_xlate_handle_node_internal(ret, + topage, + frompage, + userdata_key, + pool)); + + return SVN_NO_ERROR; +} + +/* Put back NODE into the xlate handle cache for use by other calls. + + Note: this function is not thread-safe. Call put_xlate_handle_node + instead. */ +static svn_error_t * +put_xlate_handle_node_internal(xlate_handle_node_t *node, + const char *userdata_key) +{ + xlate_handle_node_t **node_p = svn_hash_gets(xlate_handle_hash, userdata_key); + if (node_p == NULL) + { + userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash), + userdata_key); + node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash), + sizeof(*node_p)); + *node_p = NULL; + svn_hash_sets(xlate_handle_hash, userdata_key, node_p); + } + node->next = *node_p; + *node_p = node; + + return SVN_NO_ERROR; +} + +/* Put back NODE into the xlate handle cache for use by other calls. + If there is no global cache, store the handle in POOL. + Ignore errors related to locking/unlocking the mutex. */ +static svn_error_t * +put_xlate_handle_node(xlate_handle_node_t *node, + const char *userdata_key, + apr_pool_t *pool) +{ + assert(node->next == NULL); + if (!userdata_key) + return SVN_NO_ERROR; + + /* push previous global node to the hash */ + if (xlate_handle_hash) + { + /* 1st level: global, static items */ + if (userdata_key == SVN_UTF_NTOU_XLATE_HANDLE) + node = atomic_swap(&xlat_ntou_static_handle, node); + else if (userdata_key == SVN_UTF_UTON_XLATE_HANDLE) + node = atomic_swap(&xlat_uton_static_handle, node); + if (node == NULL) + return SVN_NO_ERROR; + + SVN_MUTEX__WITH_LOCK(xlate_handle_mutex, + put_xlate_handle_node_internal(node, + userdata_key)); + } + else + { + /* Store it in the per-pool cache. */ + apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool); + } + + return SVN_NO_ERROR; +} + +/* Return the apr_xlate handle for converting native characters to UTF-8. */ +static svn_error_t * +get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool) +{ + return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET, + assume_native_charset_is_utf8 + ? SVN_APR_UTF8_CHARSET + : SVN_APR_LOCALE_CHARSET, + SVN_UTF_NTOU_XLATE_HANDLE, pool); +} + + +/* Return the apr_xlate handle for converting UTF-8 to native characters. + Create one if it doesn't exist. If unable to find a handle, or + unable to create one because apr_xlate_open returned APR_EINVAL, then + set *RET to null and return SVN_NO_ERROR; if fail for some other + reason, return error. */ +static svn_error_t * +get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool) +{ + return get_xlate_handle_node(ret, + assume_native_charset_is_utf8 + ? SVN_APR_UTF8_CHARSET + : SVN_APR_LOCALE_CHARSET, + SVN_APR_UTF8_CHARSET, + SVN_UTF_UTON_XLATE_HANDLE, pool); +} + + +/* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn + sequences, allocating the result in POOL. */ +static const char * +fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool) +{ + const char *src_orig = src, *src_end = src + len; + apr_size_t new_len = 0; + char *new; + const char *new_orig; + + /* First count how big a dest string we'll need. */ + while (src < src_end) + { + if (! svn_ctype_isascii(*src) || *src == '\0') + new_len += 5; /* 5 slots, for "?\XXX" */ + else + new_len += 1; /* one slot for the 7-bit char */ + + src++; + } + + /* Allocate that amount, plus one slot for '\0' character. */ + new = apr_palloc(pool, new_len + 1); + + new_orig = new; + + /* And fill it up. */ + while (src_orig < src_end) + { + if (! svn_ctype_isascii(*src_orig) || src_orig == '\0') + { + /* This is the same format as svn_xml_fuzzy_escape uses, but that + function escapes different characters. Please keep in sync! + ### If we add another fuzzy escape somewhere, we should abstract + ### this out to a common function. */ + apr_snprintf(new, 6, "?\\%03u", (unsigned char) *src_orig); + new += 5; + } + else + { + *new = *src_orig; + new += 1; + } + + src_orig++; + } + + *new = '\0'; + + return new_orig; +} + +/* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result + in *DEST, which is allocated in POOL. */ +static svn_error_t * +convert_to_stringbuf(xlate_handle_node_t *node, + const char *src_data, + apr_size_t src_length, + svn_stringbuf_t **dest, + apr_pool_t *pool) +{ +#ifdef WIN32 + apr_status_t apr_err; + + apr_err = svn_subr__win32_xlate_to_stringbuf((win32_xlate_t *) node->handle, + src_data, src_length, + dest, pool); +#else + apr_size_t buflen = src_length * 2; + apr_status_t apr_err; + apr_size_t srclen = src_length; + apr_size_t destlen = buflen; + + /* Initialize *DEST to an empty stringbuf. + A 1:2 ratio of input bytes to output bytes (as assigned above) + should be enough for most translations, and if it turns out not + to be enough, we'll grow the buffer again, sizing it based on a + 1:3 ratio of the remainder of the string. */ + *dest = svn_stringbuf_create_ensure(buflen + 1, pool); + + /* Not only does it not make sense to convert an empty string, but + apr-iconv is quite unreasonable about not allowing that. */ + if (src_length == 0) + return SVN_NO_ERROR; + + do + { + /* Set up state variables for xlate. */ + destlen = buflen - (*dest)->len; + + /* Attempt the conversion. */ + apr_err = apr_xlate_conv_buffer(node->handle, + src_data + (src_length - srclen), + &srclen, + (*dest)->data + (*dest)->len, + &destlen); + + /* Now, update the *DEST->len to track the amount of output data + churned out so far from this loop. */ + (*dest)->len += ((buflen - (*dest)->len) - destlen); + buflen += srclen * 3; /* 3 is middle ground, 2 wasn't enough + for all characters in the buffer, 4 is + maximum character size (currently) */ + + + } while (apr_err == APR_SUCCESS && srclen != 0); +#endif + + /* If we exited the loop with an error, return the error. */ + if (apr_err) + { + const char *errstr; + svn_error_t *err; + + /* Can't use svn_error_wrap_apr here because it calls functions in + this file, leading to infinite recursion. */ + if (node->frompage == SVN_APR_LOCALE_CHARSET) + errstr = apr_psprintf + (pool, _("Can't convert string from native encoding to '%s':"), + node->topage); + else if (node->topage == SVN_APR_LOCALE_CHARSET) + errstr = apr_psprintf + (pool, _("Can't convert string from '%s' to native encoding:"), + node->frompage); + else + errstr = apr_psprintf + (pool, _("Can't convert string from '%s' to '%s':"), + node->frompage, node->topage); + + err = svn_error_create(apr_err, NULL, fuzzy_escape(src_data, + src_length, pool)); + return svn_error_create(apr_err, err, errstr); + } + /* Else, exited due to success. Trim the result buffer down to the + right length. */ + (*dest)->data[(*dest)->len] = '\0'; + + return SVN_NO_ERROR; +} + + +/* Return APR_EINVAL if the first LEN bytes of DATA contain anything + other than seven-bit, non-control (except for whitespace) ASCII + characters, finding the error pool from POOL. Otherwise, return + SVN_NO_ERROR. */ +static svn_error_t * +check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool) +{ + const char *data_start = data; + + for (; len > 0; --len, data++) + { + if ((! svn_ctype_isascii(*data)) + || ((! svn_ctype_isspace(*data)) + && svn_ctype_iscntrl(*data))) + { + /* Show the printable part of the data, followed by the + decimal code of the questionable character. Because if a + user ever gets this error, she's going to have to spend + time tracking down the non-ASCII data, so we want to help + as much as possible. And yes, we just call the unsafe + data "non-ASCII", even though the actual constraint is + somewhat more complex than that. */ + + if (data - data_start) + { + const char *error_data + = apr_pstrndup(pool, data_start, (data - data_start)); + + return svn_error_createf + (APR_EINVAL, NULL, + _("Safe data '%s' was followed by non-ASCII byte %d: " + "unable to convert to/from UTF-8"), + error_data, *((const unsigned char *) data)); + } + else + { + return svn_error_createf + (APR_EINVAL, NULL, + _("Non-ASCII character (code %d) detected, " + "and unable to convert to/from UTF-8"), + *((const unsigned char *) data)); + } + } + } + + return SVN_NO_ERROR; +} + +/* Construct an error with code APR_EINVAL and with a suitable message + * to describe the invalid UTF-8 sequence DATA of length LEN (which + * may have embedded NULLs). We can't simply print the data, almost + * by definition we don't really know how it is encoded. + */ +static svn_error_t * +invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool) +{ + const char *last = svn_utf__last_valid(data, len); + const char *valid_txt = "", *invalid_txt = ""; + apr_size_t i; + size_t valid, invalid; + + /* We will display at most 24 valid octets (this may split a leading + multi-byte character) as that should fit on one 80 character line. */ + valid = last - data; + if (valid > 24) + valid = 24; + for (i = 0; i < valid; ++i) + valid_txt = apr_pstrcat(pool, valid_txt, + apr_psprintf(pool, " %02x", + (unsigned char)last[i-valid]), + (char *)NULL); + + /* 4 invalid octets will guarantee that the faulty octet is displayed */ + invalid = data + len - last; + if (invalid > 4) + invalid = 4; + for (i = 0; i < invalid; ++i) + invalid_txt = apr_pstrcat(pool, invalid_txt, + apr_psprintf(pool, " %02x", + (unsigned char)last[i]), + (char *)NULL); + + return svn_error_createf(APR_EINVAL, NULL, + _("Valid UTF-8 data\n(hex:%s)\n" + "followed by invalid UTF-8 sequence\n(hex:%s)"), + valid_txt, invalid_txt); +} + +/* Verify that the sequence DATA of length LEN is valid UTF-8. + If it is not, return an error with code APR_EINVAL. */ +static svn_error_t * +check_utf8(const char *data, apr_size_t len, apr_pool_t *pool) +{ + if (! svn_utf__is_valid(data, len)) + return invalid_utf8(data, len, pool); + return SVN_NO_ERROR; +} + +/* Verify that the NULL terminated sequence DATA is valid UTF-8. + If it is not, return an error with code APR_EINVAL. */ +static svn_error_t * +check_cstring_utf8(const char *data, apr_pool_t *pool) +{ + + if (! svn_utf__cstring_is_valid(data)) + return invalid_utf8(data, strlen(data), pool); + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, + const svn_stringbuf_t *src, + apr_pool_t *pool) +{ + xlate_handle_node_t *node; + svn_error_t *err; + + SVN_ERR(get_ntou_xlate_handle_node(&node, pool)); + + if (node->handle) + { + err = convert_to_stringbuf(node, src->data, src->len, dest, pool); + if (! err) + err = check_utf8((*dest)->data, (*dest)->len, pool); + } + else + { + err = check_non_ascii(src->data, src->len, pool); + if (! err) + *dest = svn_stringbuf_dup(src, pool); + } + + return svn_error_compose_create(err, + put_xlate_handle_node + (node, + SVN_UTF_NTOU_XLATE_HANDLE, + pool)); +} + + +svn_error_t * +svn_utf_string_to_utf8(const svn_string_t **dest, + const svn_string_t *src, + apr_pool_t *pool) +{ + svn_stringbuf_t *destbuf; + xlate_handle_node_t *node; + svn_error_t *err; + + SVN_ERR(get_ntou_xlate_handle_node(&node, pool)); + + if (node->handle) + { + err = convert_to_stringbuf(node, src->data, src->len, &destbuf, pool); + if (! err) + err = check_utf8(destbuf->data, destbuf->len, pool); + if (! err) + *dest = svn_stringbuf__morph_into_string(destbuf); + } + else + { + err = check_non_ascii(src->data, src->len, pool); + if (! err) + *dest = svn_string_dup(src, pool); + } + + return svn_error_compose_create(err, + put_xlate_handle_node + (node, + SVN_UTF_NTOU_XLATE_HANDLE, + pool)); +} + + +/* Common implementation for svn_utf_cstring_to_utf8, + svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and + svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as + the translator and allocating from POOL. */ +static svn_error_t * +convert_cstring(const char **dest, + const char *src, + xlate_handle_node_t *node, + apr_pool_t *pool) +{ + if (node->handle) + { + svn_stringbuf_t *destbuf; + SVN_ERR(convert_to_stringbuf(node, src, strlen(src), + &destbuf, pool)); + *dest = destbuf->data; + } + else + { + apr_size_t len = strlen(src); + SVN_ERR(check_non_ascii(src, len, pool)); + *dest = apr_pstrmemdup(pool, src, len); + } + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_utf_cstring_to_utf8(const char **dest, + const char *src, + apr_pool_t *pool) +{ + xlate_handle_node_t *node; + svn_error_t *err; + + SVN_ERR(get_ntou_xlate_handle_node(&node, pool)); + err = convert_cstring(dest, src, node, pool); + SVN_ERR(svn_error_compose_create(err, + put_xlate_handle_node + (node, + SVN_UTF_NTOU_XLATE_HANDLE, + pool))); + return check_cstring_utf8(*dest, pool); +} + + +svn_error_t * +svn_utf_cstring_to_utf8_ex2(const char **dest, + const char *src, + const char *frompage, + apr_pool_t *pool) +{ + xlate_handle_node_t *node; + svn_error_t *err; + const char *convset_key = get_xlate_key(SVN_APR_UTF8_CHARSET, frompage, + pool); + + SVN_ERR(get_xlate_handle_node(&node, SVN_APR_UTF8_CHARSET, frompage, + convset_key, pool)); + err = convert_cstring(dest, src, node, pool); + SVN_ERR(svn_error_compose_create(err, + put_xlate_handle_node + (node, + SVN_UTF_NTOU_XLATE_HANDLE, + pool))); + + return check_cstring_utf8(*dest, pool); +} + + +svn_error_t * +svn_utf_cstring_to_utf8_ex(const char **dest, + const char *src, + const char *frompage, + const char *convset_key, + apr_pool_t *pool) +{ + return svn_utf_cstring_to_utf8_ex2(dest, src, frompage, pool); +} + + +svn_error_t * +svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, + const svn_stringbuf_t *src, + apr_pool_t *pool) +{ + xlate_handle_node_t *node; + svn_error_t *err; + + SVN_ERR(get_uton_xlate_handle_node(&node, pool)); + + if (node->handle) + { + err = check_utf8(src->data, src->len, pool); + if (! err) + err = convert_to_stringbuf(node, src->data, src->len, dest, pool); + } + else + { + err = check_non_ascii(src->data, src->len, pool); + if (! err) + *dest = svn_stringbuf_dup(src, pool); + } + + err = svn_error_compose_create( + err, + put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool)); + + return err; +} + + +svn_error_t * +svn_utf_string_from_utf8(const svn_string_t **dest, + const svn_string_t *src, + apr_pool_t *pool) +{ + svn_stringbuf_t *dbuf; + xlate_handle_node_t *node; + svn_error_t *err; + + SVN_ERR(get_uton_xlate_handle_node(&node, pool)); + + if (node->handle) + { + err = check_utf8(src->data, src->len, pool); + if (! err) + err = convert_to_stringbuf(node, src->data, src->len, + &dbuf, pool); + if (! err) + *dest = svn_stringbuf__morph_into_string(dbuf); + } + else + { + err = check_non_ascii(src->data, src->len, pool); + if (! err) + *dest = svn_string_dup(src, pool); + } + + err = svn_error_compose_create( + err, + put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool)); + + return err; +} + + +svn_error_t * +svn_utf_cstring_from_utf8(const char **dest, + const char *src, + apr_pool_t *pool) +{ + xlate_handle_node_t *node; + svn_error_t *err; + + SVN_ERR(check_cstring_utf8(src, pool)); + + SVN_ERR(get_uton_xlate_handle_node(&node, pool)); + err = convert_cstring(dest, src, node, pool); + err = svn_error_compose_create( + err, + put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool)); + + return err; +} + + +svn_error_t * +svn_utf_cstring_from_utf8_ex2(const char **dest, + const char *src, + const char *topage, + apr_pool_t *pool) +{ + xlate_handle_node_t *node; + svn_error_t *err; + const char *convset_key = get_xlate_key(topage, SVN_APR_UTF8_CHARSET, + pool); + + SVN_ERR(check_cstring_utf8(src, pool)); + + SVN_ERR(get_xlate_handle_node(&node, topage, SVN_APR_UTF8_CHARSET, + convset_key, pool)); + err = convert_cstring(dest, src, node, pool); + err = svn_error_compose_create( + err, + put_xlate_handle_node(node, convset_key, pool)); + + return err; +} + + +svn_error_t * +svn_utf_cstring_from_utf8_ex(const char **dest, + const char *src, + const char *topage, + const char *convset_key, + apr_pool_t *pool) +{ + return svn_utf_cstring_from_utf8_ex2(dest, src, topage, pool); +} + + +const char * +svn_utf__cstring_from_utf8_fuzzy(const char *src, + apr_pool_t *pool, + svn_error_t *(*convert_from_utf8) + (const char **, const char *, apr_pool_t *)) +{ + const char *escaped, *converted; + svn_error_t *err; + + escaped = fuzzy_escape(src, strlen(src), pool); + + /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to + contain only 7-bit bytes :-). Recode to native... */ + err = convert_from_utf8(((const char **) &converted), escaped, pool); + + if (err) + { + svn_error_clear(err); + return escaped; + } + else + return converted; + + /* ### Check the client locale, maybe we can avoid that second + * conversion! See Ulrich Drepper's patch at + * http://subversion.tigris.org/issues/show_bug.cgi?id=807. + */ +} + + +const char * +svn_utf_cstring_from_utf8_fuzzy(const char *src, + apr_pool_t *pool) +{ + return svn_utf__cstring_from_utf8_fuzzy(src, pool, + svn_utf_cstring_from_utf8); +} + + +svn_error_t * +svn_utf_cstring_from_utf8_stringbuf(const char **dest, + const svn_stringbuf_t *src, + apr_pool_t *pool) +{ + svn_stringbuf_t *destbuf; + + SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf, src, pool)); + *dest = destbuf->data; + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_utf_cstring_from_utf8_string(const char **dest, + const svn_string_t *src, + apr_pool_t *pool) +{ + svn_stringbuf_t *dbuf; + xlate_handle_node_t *node; + svn_error_t *err; + + SVN_ERR(get_uton_xlate_handle_node(&node, pool)); + + if (node->handle) + { + err = check_utf8(src->data, src->len, pool); + if (! err) + err = convert_to_stringbuf(node, src->data, src->len, + &dbuf, pool); + if (! err) + *dest = dbuf->data; + } + else + { + err = check_non_ascii(src->data, src->len, pool); + if (! err) + *dest = apr_pstrmemdup(pool, src->data, src->len); + } + + err = svn_error_compose_create( + err, + put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool)); + + return err; +} |