diff options
Diffstat (limited to 'subversion/libsvn_subr/dirent_uri.c')
-rw-r--r-- | subversion/libsvn_subr/dirent_uri.c | 2597 |
1 files changed, 2597 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/dirent_uri.c b/subversion/libsvn_subr/dirent_uri.c new file mode 100644 index 0000000..2b51e7a --- /dev/null +++ b/subversion/libsvn_subr/dirent_uri.c @@ -0,0 +1,2597 @@ +/* + * dirent_uri.c: a library to manipulate URIs and directory entries. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + + +#include <string.h> +#include <assert.h> +#include <ctype.h> + +#include <apr_uri.h> +#include <apr_lib.h> + +#include "svn_private_config.h" +#include "svn_string.h" +#include "svn_dirent_uri.h" +#include "svn_path.h" +#include "svn_ctype.h" + +#include "dirent_uri.h" +#include "private/svn_fspath.h" + +/* The canonical empty path. Can this be changed? Well, change the empty + test below and the path library will work, not so sure about the fs/wc + libraries. */ +#define SVN_EMPTY_PATH "" + +/* TRUE if s is the canonical empty path, FALSE otherwise */ +#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0') + +/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can + this be changed? Well, the path library will work, not so sure about + the OS! */ +#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.') + +/* This check must match the check on top of dirent_uri-tests.c and + path-tests.c */ +#if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__) +#define SVN_USE_DOS_PATHS +#endif + +/* Path type definition. Used only by internal functions. */ +typedef enum path_type_t { + type_uri, + type_dirent, + type_relpath +} path_type_t; + + +/**** Forward declarations *****/ + +static svn_boolean_t +relpath_is_canonical(const char *relpath); + + +/**** Internal implementation functions *****/ + +/* Return an internal-style new path based on PATH, allocated in POOL. + * + * "Internal-style" means that separators are all '/'. + */ +static const char * +internal_style(const char *path, apr_pool_t *pool) +{ +#if '/' != SVN_PATH_LOCAL_SEPARATOR + { + char *p = apr_pstrdup(pool, path); + path = p; + + /* Convert all local-style separators to the canonical ones. */ + for (; *p != '\0'; ++p) + if (*p == SVN_PATH_LOCAL_SEPARATOR) + *p = '/'; + } +#endif + + return path; +} + +/* Locale insensitive tolower() for converting parts of dirents and urls + while canonicalizing */ +static char +canonicalize_to_lower(char c) +{ + if (c < 'A' || c > 'Z') + return c; + else + return (char)(c - 'A' + 'a'); +} + +/* Locale insensitive toupper() for converting parts of dirents and urls + while canonicalizing */ +static char +canonicalize_to_upper(char c) +{ + if (c < 'a' || c > 'z') + return c; + else + return (char)(c - 'a' + 'A'); +} + +/* Calculates the length of the dirent absolute or non absolute root in + DIRENT, return 0 if dirent is not rooted */ +static apr_size_t +dirent_root_length(const char *dirent, apr_size_t len) +{ +#ifdef SVN_USE_DOS_PATHS + if (len >= 2 && dirent[1] == ':' && + ((dirent[0] >= 'A' && dirent[0] <= 'Z') || + (dirent[0] >= 'a' && dirent[0] <= 'z'))) + { + return (len > 2 && dirent[2] == '/') ? 3 : 2; + } + + if (len > 2 && dirent[0] == '/' && dirent[1] == '/') + { + apr_size_t i = 2; + + while (i < len && dirent[i] != '/') + i++; + + if (i == len) + return len; /* Cygwin drive alias, invalid path on WIN32 */ + + i++; /* Skip '/' */ + + while (i < len && dirent[i] != '/') + i++; + + return i; + } +#endif /* SVN_USE_DOS_PATHS */ + if (len >= 1 && dirent[0] == '/') + return 1; + + return 0; +} + + +/* Return the length of substring necessary to encompass the entire + * previous dirent segment in DIRENT, which should be a LEN byte string. + * + * A trailing slash will not be included in the returned length except + * in the case in which DIRENT is absolute and there are no more + * previous segments. + */ +static apr_size_t +dirent_previous_segment(const char *dirent, + apr_size_t len) +{ + if (len == 0) + return 0; + + --len; + while (len > 0 && dirent[len] != '/' +#ifdef SVN_USE_DOS_PATHS + && (dirent[len] != ':' || len != 1) +#endif /* SVN_USE_DOS_PATHS */ + ) + --len; + + /* check if the remaining segment including trailing '/' is a root dirent */ + if (dirent_root_length(dirent, len+1) == len + 1) + return len + 1; + else + return len; +} + +/* Calculates the length occupied by the schema defined root of URI */ +static apr_size_t +uri_schema_root_length(const char *uri, apr_size_t len) +{ + apr_size_t i; + + for (i = 0; i < len; i++) + { + if (uri[i] == '/') + { + if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/') + { + /* We have an absolute uri */ + if (i == 5 && strncmp("file", uri, 4) == 0) + return 7; /* file:// */ + else + { + for (i += 2; i < len; i++) + if (uri[i] == '/') + return i; + + return len; /* Only a hostname is found */ + } + } + else + return 0; + } + } + + return 0; +} + +/* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has + a non absolute root. (E.g. '/' or 'F:' on Windows) */ +static svn_boolean_t +dirent_is_rooted(const char *dirent) +{ + if (! dirent) + return FALSE; + + /* Root on all systems */ + if (dirent[0] == '/') + return TRUE; + + /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/' + where 'H' is any letter. */ +#ifdef SVN_USE_DOS_PATHS + if (((dirent[0] >= 'A' && dirent[0] <= 'Z') || + (dirent[0] >= 'a' && dirent[0] <= 'z')) && + (dirent[1] == ':')) + return TRUE; +#endif /* SVN_USE_DOS_PATHS */ + + return FALSE; +} + +/* Return the length of substring necessary to encompass the entire + * previous relpath segment in RELPATH, which should be a LEN byte string. + * + * A trailing slash will not be included in the returned length. + */ +static apr_size_t +relpath_previous_segment(const char *relpath, + apr_size_t len) +{ + if (len == 0) + return 0; + + --len; + while (len > 0 && relpath[len] != '/') + --len; + + return len; +} + +/* Return the length of substring necessary to encompass the entire + * previous uri segment in URI, which should be a LEN byte string. + * + * A trailing slash will not be included in the returned length except + * in the case in which URI is absolute and there are no more + * previous segments. + */ +static apr_size_t +uri_previous_segment(const char *uri, + apr_size_t len) +{ + apr_size_t root_length; + apr_size_t i = len; + if (len == 0) + return 0; + + root_length = uri_schema_root_length(uri, len); + + --i; + while (len > root_length && uri[i] != '/') + --i; + + if (i == 0 && len > 1 && *uri == '/') + return 1; + + return i; +} + +/* Return the canonicalized version of PATH, of type TYPE, allocated in + * POOL. + */ +static const char * +canonicalize(path_type_t type, const char *path, apr_pool_t *pool) +{ + char *canon, *dst; + const char *src; + apr_size_t seglen; + apr_size_t schemelen = 0; + apr_size_t canon_segments = 0; + svn_boolean_t url = FALSE; + char *schema_data = NULL; + + /* "" is already canonical, so just return it; note that later code + depends on path not being zero-length. */ + if (SVN_PATH_IS_EMPTY(path)) + { + assert(type != type_uri); + return ""; + } + + dst = canon = apr_pcalloc(pool, strlen(path) + 1); + + /* If this is supposed to be an URI, it should start with + "scheme://". We'll copy the scheme, host name, etc. to DST and + set URL = TRUE. */ + src = path; + if (type == type_uri) + { + assert(*src != '/'); + + while (*src && (*src != '/') && (*src != ':')) + src++; + + if (*src == ':' && *(src+1) == '/' && *(src+2) == '/') + { + const char *seg; + + url = TRUE; + + /* Found a scheme, convert to lowercase and copy to dst. */ + src = path; + while (*src != ':') + { + *(dst++) = canonicalize_to_lower((*src++)); + schemelen++; + } + *(dst++) = ':'; + *(dst++) = '/'; + *(dst++) = '/'; + src += 3; + schemelen += 3; + + /* This might be the hostname */ + seg = src; + while (*src && (*src != '/') && (*src != '@')) + src++; + + if (*src == '@') + { + /* Copy the username & password. */ + seglen = src - seg + 1; + memcpy(dst, seg, seglen); + dst += seglen; + src++; + } + else + src = seg; + + /* Found a hostname, convert to lowercase and copy to dst. */ + if (*src == '[') + { + *(dst++) = *(src++); /* Copy '[' */ + + while (*src == ':' + || (*src >= '0' && (*src <= '9')) + || (*src >= 'a' && (*src <= 'f')) + || (*src >= 'A' && (*src <= 'F'))) + { + *(dst++) = canonicalize_to_lower((*src++)); + } + + if (*src == ']') + *(dst++) = *(src++); /* Copy ']' */ + } + else + while (*src && (*src != '/') && (*src != ':')) + *(dst++) = canonicalize_to_lower((*src++)); + + if (*src == ':') + { + /* We probably have a port number: Is it a default portnumber + which doesn't belong in a canonical url? */ + if (src[1] == '8' && src[2] == '0' + && (src[3]== '/'|| !src[3]) + && !strncmp(canon, "http:", 5)) + { + src += 3; + } + else if (src[1] == '4' && src[2] == '4' && src[3] == '3' + && (src[4]== '/'|| !src[4]) + && !strncmp(canon, "https:", 6)) + { + src += 4; + } + else if (src[1] == '3' && src[2] == '6' + && src[3] == '9' && src[4] == '0' + && (src[5]== '/'|| !src[5]) + && !strncmp(canon, "svn:", 4)) + { + src += 5; + } + else if (src[1] == '/' || !src[1]) + { + src += 1; + } + + while (*src && (*src != '/')) + *(dst++) = canonicalize_to_lower((*src++)); + } + + /* Copy trailing slash, or null-terminator. */ + *(dst) = *(src); + + /* Move src and dst forward only if we are not + * at null-terminator yet. */ + if (*src) + { + src++; + dst++; + schema_data = dst; + } + + canon_segments = 1; + } + } + + /* Copy to DST any separator or drive letter that must come before the + first regular path segment. */ + if (! url && type != type_relpath) + { + src = path; + /* If this is an absolute path, then just copy over the initial + separator character. */ + if (*src == '/') + { + *(dst++) = *(src++); + +#ifdef SVN_USE_DOS_PATHS + /* On Windows permit two leading separator characters which means an + * UNC path. */ + if ((type == type_dirent) && *src == '/') + *(dst++) = *(src++); +#endif /* SVN_USE_DOS_PATHS */ + } +#ifdef SVN_USE_DOS_PATHS + /* On Windows the first segment can be a drive letter, which we normalize + to upper case. */ + else if (type == type_dirent && + ((*src >= 'a' && *src <= 'z') || + (*src >= 'A' && *src <= 'Z')) && + (src[1] == ':')) + { + *(dst++) = canonicalize_to_upper(*(src++)); + /* Leave the ':' to be processed as (or as part of) a path segment + by the following code block, so we need not care whether it has + a slash after it. */ + } +#endif /* SVN_USE_DOS_PATHS */ + } + + while (*src) + { + /* Parse each segment, finding the closing '/' (which might look + like '%2F' for URIs). */ + const char *next = src; + apr_size_t slash_len = 0; + + while (*next + && (next[0] != '/') + && (! (type == type_uri && next[0] == '%' && next[1] == '2' && + canonicalize_to_upper(next[2]) == 'F'))) + { + ++next; + } + + /* Record how long our "slash" is. */ + if (next[0] == '/') + slash_len = 1; + else if (type == type_uri && next[0] == '%') + slash_len = 3; + + seglen = next - src; + + if (seglen == 0 + || (seglen == 1 && src[0] == '.') + || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2' + && canonicalize_to_upper(src[2]) == 'E')) + { + /* Empty or noop segment, so do nothing. (For URIs, '%2E' + is equivalent to '.'). */ + } +#ifdef SVN_USE_DOS_PATHS + /* If this is the first path segment of a file:// URI and it contains a + windows drive letter, convert the drive letter to upper case. */ + else if (url && canon_segments == 1 && seglen == 2 && + (strncmp(canon, "file:", 5) == 0) && + src[0] >= 'a' && src[0] <= 'z' && src[1] == ':') + { + *(dst++) = canonicalize_to_upper(src[0]); + *(dst++) = ':'; + if (*next) + *(dst++) = *next; + canon_segments++; + } +#endif /* SVN_USE_DOS_PATHS */ + else + { + /* An actual segment, append it to the destination path */ + memcpy(dst, src, seglen); + dst += seglen; + if (slash_len) + *(dst++) = '/'; + canon_segments++; + } + + /* Skip over trailing slash to the next segment. */ + src = next + slash_len; + } + + /* Remove the trailing slash if there was at least one + * canonical segment and the last segment ends with a slash. + * + * But keep in mind that, for URLs, the scheme counts as a + * canonical segment -- so if path is ONLY a scheme (such + * as "https://") we should NOT remove the trailing slash. */ + if ((canon_segments > 0 && *(dst - 1) == '/') + && ! (url && path[schemelen] == '\0')) + { + dst --; + } + + *dst = '\0'; + +#ifdef SVN_USE_DOS_PATHS + /* Skip leading double slashes when there are less than 2 + * canon segments. UNC paths *MUST* have two segments. */ + if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/') + { + if (canon_segments < 2) + return canon + 1; + else + { + /* Now we're sure this is a valid UNC path, convert the server name + (the first path segment) to lowercase as Windows treats it as case + insensitive. + Note: normally the share name is treated as case insensitive too, + but it seems to be possible to configure Samba to treat those as + case sensitive, so better leave that alone. */ + for (dst = canon + 2; *dst && *dst != '/'; dst++) + *dst = canonicalize_to_lower(*dst); + } + } +#endif /* SVN_USE_DOS_PATHS */ + + /* Check the normalization of characters in a uri */ + if (schema_data) + { + int need_extra = 0; + src = schema_data; + + while (*src) + { + switch (*src) + { + case '/': + break; + case '%': + if (!svn_ctype_isxdigit(*(src+1)) || + !svn_ctype_isxdigit(*(src+2))) + need_extra += 2; + else + src += 2; + break; + default: + if (!svn_uri__char_validity[(unsigned char)*src]) + need_extra += 2; + break; + } + src++; + } + + if (need_extra > 0) + { + apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon); + + dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1); + memcpy(dst, canon, pre_schema_size); + canon = dst; + + dst += pre_schema_size; + } + else + dst = schema_data; + + src = schema_data; + + while (*src) + { + switch (*src) + { + case '/': + *(dst++) = '/'; + break; + case '%': + if (!svn_ctype_isxdigit(*(src+1)) || + !svn_ctype_isxdigit(*(src+2))) + { + *(dst++) = '%'; + *(dst++) = '2'; + *(dst++) = '5'; + } + else + { + char digitz[3]; + int val; + + digitz[0] = *(++src); + digitz[1] = *(++src); + digitz[2] = 0; + + val = (int)strtol(digitz, NULL, 16); + + if (svn_uri__char_validity[(unsigned char)val]) + *(dst++) = (char)val; + else + { + *(dst++) = '%'; + *(dst++) = canonicalize_to_upper(digitz[0]); + *(dst++) = canonicalize_to_upper(digitz[1]); + } + } + break; + default: + if (!svn_uri__char_validity[(unsigned char)*src]) + { + apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src); + dst += 3; + } + else + *(dst++) = *src; + break; + } + src++; + } + *dst = '\0'; + } + + return canon; +} + +/* Return the string length of the longest common ancestor of PATH1 and PATH2. + * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if + * PATH1 and PATH2 are regular paths. + * + * If the two paths do not share a common ancestor, return 0. + * + * New strings are allocated in POOL. + */ +static apr_size_t +get_longest_ancestor_length(path_type_t types, + const char *path1, + const char *path2, + apr_pool_t *pool) +{ + apr_size_t path1_len, path2_len; + apr_size_t i = 0; + apr_size_t last_dirsep = 0; +#ifdef SVN_USE_DOS_PATHS + svn_boolean_t unc = FALSE; +#endif + + path1_len = strlen(path1); + path2_len = strlen(path2); + + if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2)) + return 0; + + while (path1[i] == path2[i]) + { + /* Keep track of the last directory separator we hit. */ + if (path1[i] == '/') + last_dirsep = i; + + i++; + + /* If we get to the end of either path, break out. */ + if ((i == path1_len) || (i == path2_len)) + break; + } + + /* two special cases: + 1. '/' is the longest common ancestor of '/' and '/foo' */ + if (i == 1 && path1[0] == '/' && path2[0] == '/') + return 1; + /* 2. '' is the longest common ancestor of any non-matching + * strings 'foo' and 'bar' */ + if (types == type_dirent && i == 0) + return 0; + + /* Handle some windows specific cases */ +#ifdef SVN_USE_DOS_PATHS + if (types == type_dirent) + { + /* don't count the '//' from UNC paths */ + if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/') + { + last_dirsep = 0; + unc = TRUE; + } + + /* X:/ and X:/foo */ + if (i == 3 && path1[2] == '/' && path1[1] == ':') + return i; + + /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry. + * Note that this assertion triggers only if the code above has + * been broken. The code below relies on this assertion, because + * it uses [i - 1] as index. */ + assert(i > 0); + + /* X: and X:/ */ + if ((path1[i - 1] == ':' && path2[i] == '/') || + (path2[i - 1] == ':' && path1[i] == '/')) + return 0; + /* X: and X:foo */ + if (path1[i - 1] == ':' || path2[i - 1] == ':') + return i; + } +#endif /* SVN_USE_DOS_PATHS */ + + /* last_dirsep is now the offset of the last directory separator we + crossed before reaching a non-matching byte. i is the offset of + that non-matching byte, and is guaranteed to be <= the length of + whichever path is shorter. + If one of the paths is the common part return that. */ + if (((i == path1_len) && (path2[i] == '/')) + || ((i == path2_len) && (path1[i] == '/')) + || ((i == path1_len) && (i == path2_len))) + return i; + else + { + /* Nothing in common but the root folder '/' or 'X:/' for Windows + dirents. */ +#ifdef SVN_USE_DOS_PATHS + if (! unc) + { + /* X:/foo and X:/bar returns X:/ */ + if ((types == type_dirent) && + last_dirsep == 2 && path1[1] == ':' && path1[2] == '/' + && path2[1] == ':' && path2[2] == '/') + return 3; +#endif /* SVN_USE_DOS_PATHS */ + if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/') + return 1; +#ifdef SVN_USE_DOS_PATHS + } +#endif + } + + return last_dirsep; +} + +/* Determine whether PATH2 is a child of PATH1. + * + * PATH2 is a child of PATH1 if + * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path. + * or + * 2) PATH2 is has n components, PATH1 has x < n components, + * and PATH1 matches PATH2 in all its x components. + * Components are separated by a slash, '/'. + * + * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if + * PATH1 and PATH2 are regular paths. + * + * If PATH2 is not a child of PATH1, return NULL. + * + * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy + * of the child part of PATH2 in POOL and return a pointer to the + * newly allocated child part. + * + * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer + * pointing to the child part of PATH2. + * */ +static const char * +is_child(path_type_t type, const char *path1, const char *path2, + apr_pool_t *pool) +{ + apr_size_t i; + + /* Allow "" and "foo" or "H:foo" to be parent/child */ + if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */ + { + if (SVN_PATH_IS_EMPTY(path2)) /* "" not a child */ + return NULL; + + /* check if this is an absolute path */ + if ((type == type_uri) || + (type == type_dirent && dirent_is_rooted(path2))) + return NULL; + else + /* everything else is child */ + return pool ? apr_pstrdup(pool, path2) : path2; + } + + /* Reach the end of at least one of the paths. How should we handle + things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't + appear to arise in the current Subversion code, it's not clear to me + if they should be parent/child or not. */ + /* Hmmm... aren't paths assumed to be canonical in this function? + * How can "foo///bar" even happen if the paths are canonical? */ + for (i = 0; path1[i] && path2[i]; i++) + if (path1[i] != path2[i]) + return NULL; + + /* FIXME: This comment does not really match + * the checks made in the code it refers to: */ + /* There are two cases that are parent/child + ... path1[i] == '\0' + .../foo path2[i] == '/' + or + / path1[i] == '\0' + /foo path2[i] != '/' + + Other root paths (like X:/) fall under the former case: + X:/ path1[i] == '\0' + X:/foo path2[i] != '/' + + Check for '//' to avoid matching '/' and '//srv'. + */ + if (path1[i] == '\0' && path2[i]) + { + if (path1[i - 1] == '/' +#ifdef SVN_USE_DOS_PATHS + || ((type == type_dirent) && path1[i - 1] == ':') +#endif + ) + { + if (path2[i] == '/') + /* .../ + * ..../ + * i */ + return NULL; + else + /* .../ + * .../foo + * i */ + return pool ? apr_pstrdup(pool, path2 + i) : path2 + i; + } + else if (path2[i] == '/') + { + if (path2[i + 1]) + /* ... + * .../foo + * i */ + return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1; + else + /* ... + * .../ + * i */ + return NULL; + } + } + + /* Otherwise, path2 isn't a child. */ + return NULL; +} + + +/**** Public API functions ****/ + +const char * +svn_dirent_internal_style(const char *dirent, apr_pool_t *pool) +{ + return svn_dirent_canonicalize(internal_style(dirent, pool), pool); +} + +const char * +svn_dirent_local_style(const char *dirent, apr_pool_t *pool) +{ + /* Internally, Subversion represents the current directory with the + empty string. But users like to see "." . */ + if (SVN_PATH_IS_EMPTY(dirent)) + return "."; + +#if '/' != SVN_PATH_LOCAL_SEPARATOR + { + char *p = apr_pstrdup(pool, dirent); + dirent = p; + + /* Convert all canonical separators to the local-style ones. */ + for (; *p != '\0'; ++p) + if (*p == '/') + *p = SVN_PATH_LOCAL_SEPARATOR; + } +#endif + + return dirent; +} + +const char * +svn_relpath__internal_style(const char *relpath, + apr_pool_t *pool) +{ + return svn_relpath_canonicalize(internal_style(relpath, pool), pool); +} + + +/* We decided against using apr_filepath_root here because of the negative + performance impact (creating a pool and converting strings ). */ +svn_boolean_t +svn_dirent_is_root(const char *dirent, apr_size_t len) +{ +#ifdef SVN_USE_DOS_PATHS + /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter) + are also root directories */ + if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) && + (dirent[1] == ':') && + ((dirent[0] >= 'A' && dirent[0] <= 'Z') || + (dirent[0] >= 'a' && dirent[0] <= 'z'))) + return TRUE; + + /* On Windows and Cygwin //server/share is a root directory, + and on Cygwin //drive is a drive alias */ + if (len >= 2 && dirent[0] == '/' && dirent[1] == '/' + && dirent[len - 1] != '/') + { + int segments = 0; + apr_size_t i; + for (i = len; i >= 2; i--) + { + if (dirent[i] == '/') + { + segments ++; + if (segments > 1) + return FALSE; + } + } +#ifdef __CYGWIN__ + return (segments <= 1); +#else + return (segments == 1); /* //drive is invalid on plain Windows */ +#endif + } +#endif + + /* directory is root if it's equal to '/' */ + if (len == 1 && dirent[0] == '/') + return TRUE; + + return FALSE; +} + +svn_boolean_t +svn_uri_is_root(const char *uri, apr_size_t len) +{ + assert(svn_uri_is_canonical(uri, NULL)); + return (len == uri_schema_root_length(uri, len)); +} + +char *svn_dirent_join(const char *base, + const char *component, + apr_pool_t *pool) +{ + apr_size_t blen = strlen(base); + apr_size_t clen = strlen(component); + char *dirent; + int add_separator; + + assert(svn_dirent_is_canonical(base, pool)); + assert(svn_dirent_is_canonical(component, pool)); + + /* If the component is absolute, then return it. */ + if (svn_dirent_is_absolute(component)) + return apr_pmemdup(pool, component, clen + 1); + + /* If either is empty return the other */ + if (SVN_PATH_IS_EMPTY(base)) + return apr_pmemdup(pool, component, clen + 1); + if (SVN_PATH_IS_EMPTY(component)) + return apr_pmemdup(pool, base, blen + 1); + +#ifdef SVN_USE_DOS_PATHS + if (component[0] == '/') + { + /* '/' is drive relative on Windows, not absolute like on Posix */ + if (dirent_is_rooted(base)) + { + /* Join component without '/' to root-of(base) */ + blen = dirent_root_length(base, blen); + component++; + clen--; + + if (blen == 2 && base[1] == ':') /* "C:" case */ + { + char *root = apr_pmemdup(pool, base, 3); + root[2] = '/'; /* We don't need the final '\0' */ + + base = root; + blen = 3; + } + + if (clen == 0) + return apr_pstrndup(pool, base, blen); + } + else + return apr_pmemdup(pool, component, clen + 1); + } + else if (dirent_is_rooted(component)) + return apr_pmemdup(pool, component, clen + 1); +#endif /* SVN_USE_DOS_PATHS */ + + /* if last character of base is already a separator, don't add a '/' */ + add_separator = 1; + if (base[blen - 1] == '/' +#ifdef SVN_USE_DOS_PATHS + || base[blen - 1] == ':' +#endif + ) + add_separator = 0; + + /* Construct the new, combined dirent. */ + dirent = apr_palloc(pool, blen + add_separator + clen + 1); + memcpy(dirent, base, blen); + if (add_separator) + dirent[blen] = '/'; + memcpy(dirent + blen + add_separator, component, clen + 1); + + return dirent; +} + +char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...) +{ +#define MAX_SAVED_LENGTHS 10 + apr_size_t saved_lengths[MAX_SAVED_LENGTHS]; + apr_size_t total_len; + int nargs; + va_list va; + const char *s; + apr_size_t len; + char *dirent; + char *p; + int add_separator; + int base_arg = 0; + + total_len = strlen(base); + + assert(svn_dirent_is_canonical(base, pool)); + + /* if last character of base is already a separator, don't add a '/' */ + add_separator = 1; + if (total_len == 0 + || base[total_len - 1] == '/' +#ifdef SVN_USE_DOS_PATHS + || base[total_len - 1] == ':' +#endif + ) + add_separator = 0; + + saved_lengths[0] = total_len; + + /* Compute the length of the resulting string. */ + + nargs = 0; + va_start(va, base); + while ((s = va_arg(va, const char *)) != NULL) + { + len = strlen(s); + + assert(svn_dirent_is_canonical(s, pool)); + + if (SVN_PATH_IS_EMPTY(s)) + continue; + + if (nargs++ < MAX_SAVED_LENGTHS) + saved_lengths[nargs] = len; + + if (dirent_is_rooted(s)) + { + total_len = len; + base_arg = nargs; + +#ifdef SVN_USE_DOS_PATHS + if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */ + { + /* Set new base and skip the current argument */ + base = s = svn_dirent_join(base, s, pool); + base_arg++; + saved_lengths[0] = total_len = len = strlen(s); + } + else +#endif /* SVN_USE_DOS_PATHS */ + { + base = ""; /* Don't add base */ + saved_lengths[0] = 0; + } + + add_separator = 1; + if (s[len - 1] == '/' +#ifdef SVN_USE_DOS_PATHS + || s[len - 1] == ':' +#endif + ) + add_separator = 0; + } + else if (nargs <= base_arg + 1) + { + total_len += add_separator + len; + } + else + { + total_len += 1 + len; + } + } + va_end(va); + + /* base == "/" and no further components. just return that. */ + if (add_separator == 0 && total_len == 1) + return apr_pmemdup(pool, "/", 2); + + /* we got the total size. allocate it, with room for a NULL character. */ + dirent = p = apr_palloc(pool, total_len + 1); + + /* if we aren't supposed to skip forward to an absolute component, and if + this is not an empty base that we are skipping, then copy the base + into the output. */ + if (! SVN_PATH_IS_EMPTY(base)) + { + memcpy(p, base, len = saved_lengths[0]); + p += len; + } + + nargs = 0; + va_start(va, base); + while ((s = va_arg(va, const char *)) != NULL) + { + if (SVN_PATH_IS_EMPTY(s)) + continue; + + if (++nargs < base_arg) + continue; + + if (nargs < MAX_SAVED_LENGTHS) + len = saved_lengths[nargs]; + else + len = strlen(s); + + /* insert a separator if we aren't copying in the first component + (which can happen when base_arg is set). also, don't put in a slash + if the prior character is a slash (occurs when prior component + is "/"). */ + if (p != dirent && + ( ! (nargs - 1 <= base_arg) || add_separator)) + *p++ = '/'; + + /* copy the new component and advance the pointer */ + memcpy(p, s, len); + p += len; + } + va_end(va); + + *p = '\0'; + assert((apr_size_t)(p - dirent) == total_len); + + return dirent; +} + +char * +svn_relpath_join(const char *base, + const char *component, + apr_pool_t *pool) +{ + apr_size_t blen = strlen(base); + apr_size_t clen = strlen(component); + char *path; + + assert(relpath_is_canonical(base)); + assert(relpath_is_canonical(component)); + + /* If either is empty return the other */ + if (blen == 0) + return apr_pmemdup(pool, component, clen + 1); + if (clen == 0) + return apr_pmemdup(pool, base, blen + 1); + + path = apr_palloc(pool, blen + 1 + clen + 1); + memcpy(path, base, blen); + path[blen] = '/'; + memcpy(path + blen + 1, component, clen + 1); + + return path; +} + +char * +svn_dirent_dirname(const char *dirent, apr_pool_t *pool) +{ + apr_size_t len = strlen(dirent); + + assert(svn_dirent_is_canonical(dirent, pool)); + + if (len == dirent_root_length(dirent, len)) + return apr_pstrmemdup(pool, dirent, len); + else + return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len)); +} + +const char * +svn_dirent_basename(const char *dirent, apr_pool_t *pool) +{ + apr_size_t len = strlen(dirent); + apr_size_t start; + + assert(!pool || svn_dirent_is_canonical(dirent, pool)); + + if (svn_dirent_is_root(dirent, len)) + return ""; + else + { + start = len; + while (start > 0 && dirent[start - 1] != '/' +#ifdef SVN_USE_DOS_PATHS + && dirent[start - 1] != ':' +#endif + ) + --start; + } + + if (pool) + return apr_pstrmemdup(pool, dirent + start, len - start); + else + return dirent + start; +} + +void +svn_dirent_split(const char **dirpath, + const char **base_name, + const char *dirent, + apr_pool_t *pool) +{ + assert(dirpath != base_name); + + if (dirpath) + *dirpath = svn_dirent_dirname(dirent, pool); + + if (base_name) + *base_name = svn_dirent_basename(dirent, pool); +} + +char * +svn_relpath_dirname(const char *relpath, + apr_pool_t *pool) +{ + apr_size_t len = strlen(relpath); + + assert(relpath_is_canonical(relpath)); + + return apr_pstrmemdup(pool, relpath, + relpath_previous_segment(relpath, len)); +} + +const char * +svn_relpath_basename(const char *relpath, + apr_pool_t *pool) +{ + apr_size_t len = strlen(relpath); + apr_size_t start; + + assert(relpath_is_canonical(relpath)); + + start = len; + while (start > 0 && relpath[start - 1] != '/') + --start; + + if (pool) + return apr_pstrmemdup(pool, relpath + start, len - start); + else + return relpath + start; +} + +void +svn_relpath_split(const char **dirpath, + const char **base_name, + const char *relpath, + apr_pool_t *pool) +{ + assert(dirpath != base_name); + + if (dirpath) + *dirpath = svn_relpath_dirname(relpath, pool); + + if (base_name) + *base_name = svn_relpath_basename(relpath, pool); +} + +char * +svn_uri_dirname(const char *uri, apr_pool_t *pool) +{ + apr_size_t len = strlen(uri); + + assert(svn_uri_is_canonical(uri, pool)); + + if (svn_uri_is_root(uri, len)) + return apr_pstrmemdup(pool, uri, len); + else + return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len)); +} + +const char * +svn_uri_basename(const char *uri, apr_pool_t *pool) +{ + apr_size_t len = strlen(uri); + apr_size_t start; + + assert(svn_uri_is_canonical(uri, NULL)); + + if (svn_uri_is_root(uri, len)) + return ""; + + start = len; + while (start > 0 && uri[start - 1] != '/') + --start; + + return svn_path_uri_decode(uri + start, pool); +} + +void +svn_uri_split(const char **dirpath, + const char **base_name, + const char *uri, + apr_pool_t *pool) +{ + assert(dirpath != base_name); + + if (dirpath) + *dirpath = svn_uri_dirname(uri, pool); + + if (base_name) + *base_name = svn_uri_basename(uri, pool); +} + +char * +svn_dirent_get_longest_ancestor(const char *dirent1, + const char *dirent2, + apr_pool_t *pool) +{ + return apr_pstrndup(pool, dirent1, + get_longest_ancestor_length(type_dirent, dirent1, + dirent2, pool)); +} + +char * +svn_relpath_get_longest_ancestor(const char *relpath1, + const char *relpath2, + apr_pool_t *pool) +{ + assert(relpath_is_canonical(relpath1)); + assert(relpath_is_canonical(relpath2)); + + return apr_pstrndup(pool, relpath1, + get_longest_ancestor_length(type_relpath, relpath1, + relpath2, pool)); +} + +char * +svn_uri_get_longest_ancestor(const char *uri1, + const char *uri2, + apr_pool_t *pool) +{ + apr_size_t uri_ancestor_len; + apr_size_t i = 0; + + assert(svn_uri_is_canonical(uri1, NULL)); + assert(svn_uri_is_canonical(uri2, NULL)); + + /* Find ':' */ + while (1) + { + /* No shared protocol => no common prefix */ + if (uri1[i] != uri2[i]) + return apr_pmemdup(pool, SVN_EMPTY_PATH, + sizeof(SVN_EMPTY_PATH)); + + if (uri1[i] == ':') + break; + + /* They're both URLs, so EOS can't come before ':' */ + assert((uri1[i] != '\0') && (uri2[i] != '\0')); + + i++; + } + + i += 3; /* Advance past '://' */ + + uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i, + uri2 + i, pool); + + if (uri_ancestor_len == 0 || + (uri_ancestor_len == 1 && (uri1 + i)[0] == '/')) + return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH)); + else + return apr_pstrndup(pool, uri1, uri_ancestor_len + i); +} + +const char * +svn_dirent_is_child(const char *parent_dirent, + const char *child_dirent, + apr_pool_t *pool) +{ + return is_child(type_dirent, parent_dirent, child_dirent, pool); +} + +const char * +svn_dirent_skip_ancestor(const char *parent_dirent, + const char *child_dirent) +{ + apr_size_t len = strlen(parent_dirent); + apr_size_t root_len; + + if (0 != strncmp(parent_dirent, child_dirent, len)) + return NULL; /* parent_dirent is no ancestor of child_dirent */ + + if (child_dirent[len] == 0) + return ""; /* parent_dirent == child_dirent */ + + /* Child == parent + more-characters */ + + root_len = dirent_root_length(child_dirent, strlen(child_dirent)); + if (root_len > len) + /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */ + return NULL; + + /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters. + * It must be one of the following forms. + * + * rlen parent child bad? rlen=len? c[len]=/? + * 0 "" "foo" * + * 0 "b" "bad" ! + * 0 "b" "b/foo" * + * 1 "/" "/foo" * + * 1 "/b" "/bad" ! + * 1 "/b" "/b/foo" * + * 2 "a:" "a:foo" * + * 2 "a:b" "a:bad" ! + * 2 "a:b" "a:b/foo" * + * 3 "a:/" "a:/foo" * + * 3 "a:/b" "a:/bad" ! + * 3 "a:/b" "a:/b/foo" * + * 5 "//s/s" "//s/s/foo" * * + * 5 "//s/s/b" "//s/s/bad" ! + * 5 "//s/s/b" "//s/s/b/foo" * + */ + + if (child_dirent[len] == '/') + /* "parent|child" is one of: + * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */ + return child_dirent + len + 1; + + if (root_len == len) + /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */ + return child_dirent + len; + + return NULL; +} + +const char * +svn_relpath_skip_ancestor(const char *parent_relpath, + const char *child_relpath) +{ + apr_size_t len = strlen(parent_relpath); + + assert(relpath_is_canonical(parent_relpath)); + assert(relpath_is_canonical(child_relpath)); + + if (len == 0) + return child_relpath; + + if (0 != strncmp(parent_relpath, child_relpath, len)) + return NULL; /* parent_relpath is no ancestor of child_relpath */ + + if (child_relpath[len] == 0) + return ""; /* parent_relpath == child_relpath */ + + if (child_relpath[len] == '/') + return child_relpath + len + 1; + + return NULL; +} + + +/* */ +static const char * +uri_skip_ancestor(const char *parent_uri, + const char *child_uri) +{ + apr_size_t len = strlen(parent_uri); + + assert(svn_uri_is_canonical(parent_uri, NULL)); + assert(svn_uri_is_canonical(child_uri, NULL)); + + if (0 != strncmp(parent_uri, child_uri, len)) + return NULL; /* parent_uri is no ancestor of child_uri */ + + if (child_uri[len] == 0) + return ""; /* parent_uri == child_uri */ + + if (child_uri[len] == '/') + return child_uri + len + 1; + + return NULL; +} + +const char * +svn_uri_skip_ancestor(const char *parent_uri, + const char *child_uri, + apr_pool_t *result_pool) +{ + const char *result = uri_skip_ancestor(parent_uri, child_uri); + + return result ? svn_path_uri_decode(result, result_pool) : NULL; +} + +svn_boolean_t +svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent) +{ + return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL; +} + +svn_boolean_t +svn_uri__is_ancestor(const char *parent_uri, const char *child_uri) +{ + return uri_skip_ancestor(parent_uri, child_uri) != NULL; +} + + +svn_boolean_t +svn_dirent_is_absolute(const char *dirent) +{ + if (! dirent) + return FALSE; + + /* dirent is absolute if it starts with '/' on non-Windows platforms + or with '//' on Windows platforms */ + if (dirent[0] == '/' +#ifdef SVN_USE_DOS_PATHS + && dirent[1] == '/' /* Single '/' depends on current drive */ +#endif + ) + return TRUE; + + /* On Windows, dirent is also absolute when it starts with 'H:/' + where 'H' is any letter. */ +#ifdef SVN_USE_DOS_PATHS + if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) && + (dirent[1] == ':') && (dirent[2] == '/')) + return TRUE; +#endif /* SVN_USE_DOS_PATHS */ + + return FALSE; +} + +svn_error_t * +svn_dirent_get_absolute(const char **pabsolute, + const char *relative, + apr_pool_t *pool) +{ + char *buffer; + apr_status_t apr_err; + const char *path_apr; + + SVN_ERR_ASSERT(! svn_path_is_url(relative)); + + /* Merge the current working directory with the relative dirent. */ + SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool)); + + apr_err = apr_filepath_merge(&buffer, NULL, + path_apr, + APR_FILEPATH_NOTRELATIVE, + pool); + if (apr_err) + { + /* In some cases when the passed path or its ancestor(s) do not exist + or no longer exist apr returns an error. + + In many of these cases we would like to return a path anyway, when the + passed path was already a safe absolute path. So check for that now to + avoid an error. + + svn_dirent_is_absolute() doesn't perform the necessary checks to see + if the path doesn't need post processing to be in the canonical absolute + format. + */ + + if (svn_dirent_is_absolute(relative) + && svn_dirent_is_canonical(relative, pool) + && !svn_path_is_backpath_present(relative)) + { + *pabsolute = apr_pstrdup(pool, relative); + return SVN_NO_ERROR; + } + + return svn_error_createf(SVN_ERR_BAD_FILENAME, + svn_error_create(apr_err, NULL, NULL), + _("Couldn't determine absolute path of '%s'"), + svn_dirent_local_style(relative, pool)); + } + + SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool)); + *pabsolute = svn_dirent_canonicalize(*pabsolute, pool); + return SVN_NO_ERROR; +} + +const char * +svn_uri_canonicalize(const char *uri, apr_pool_t *pool) +{ + return canonicalize(type_uri, uri, pool); +} + +const char * +svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool) +{ + return canonicalize(type_relpath, relpath, pool); +} + +const char * +svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool) +{ + const char *dst = canonicalize(type_dirent, dirent, pool); + +#ifdef SVN_USE_DOS_PATHS + /* Handle a specific case on Windows where path == "X:/". Here we have to + append the final '/', as svn_path_canonicalize will chop this of. */ + if (((dirent[0] >= 'A' && dirent[0] <= 'Z') || + (dirent[0] >= 'a' && dirent[0] <= 'z')) && + dirent[1] == ':' && dirent[2] == '/' && + dst[3] == '\0') + { + char *dst_slash = apr_pcalloc(pool, 4); + dst_slash[0] = canonicalize_to_upper(dirent[0]); + dst_slash[1] = ':'; + dst_slash[2] = '/'; + dst_slash[3] = '\0'; + + return dst_slash; + } +#endif /* SVN_USE_DOS_PATHS */ + + return dst; +} + +svn_boolean_t +svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool) +{ + const char *ptr = dirent; + if (*ptr == '/') + { + ptr++; +#ifdef SVN_USE_DOS_PATHS + /* Check for UNC paths */ + if (*ptr == '/') + { + /* TODO: Scan hostname and sharename and fall back to part code */ + + /* ### Fall back to old implementation */ + return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool)) + == 0); + } +#endif /* SVN_USE_DOS_PATHS */ + } +#ifdef SVN_USE_DOS_PATHS + else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) && + (ptr[1] == ':')) + { + /* The only canonical drive names are "A:"..."Z:", no lower case */ + if (*ptr < 'A' || *ptr > 'Z') + return FALSE; + + ptr += 2; + + if (*ptr == '/') + ptr++; + } +#endif /* SVN_USE_DOS_PATHS */ + + return relpath_is_canonical(ptr); +} + +static svn_boolean_t +relpath_is_canonical(const char *relpath) +{ + const char *ptr = relpath, *seg = relpath; + + /* RELPATH is canonical if it has: + * - no '.' segments + * - no start and closing '/' + * - no '//' + */ + + if (*relpath == '\0') + return TRUE; + + if (*ptr == '/') + return FALSE; + + /* Now validate the rest of the path. */ + while(1) + { + apr_size_t seglen = ptr - seg; + + if (seglen == 1 && *seg == '.') + return FALSE; /* /./ */ + + if (*ptr == '/' && *(ptr+1) == '/') + return FALSE; /* // */ + + if (! *ptr && *(ptr - 1) == '/') + return FALSE; /* foo/ */ + + if (! *ptr) + break; + + if (*ptr == '/') + ptr++; + seg = ptr; + + while (*ptr && (*ptr != '/')) + ptr++; + } + + return TRUE; +} + +svn_boolean_t +svn_relpath_is_canonical(const char *relpath) +{ + return relpath_is_canonical(relpath); +} + +svn_boolean_t +svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool) +{ + const char *ptr = uri, *seg = uri; + const char *schema_data = NULL; + + /* URI is canonical if it has: + * - lowercase URL scheme + * - lowercase URL hostname + * - no '.' segments + * - no closing '/' + * - no '//' + * - uppercase hex-encoded pair digits ("%AB", not "%ab") + */ + + if (*uri == '\0') + return FALSE; + + if (! svn_path_is_url(uri)) + return FALSE; + + /* Skip the scheme. */ + while (*ptr && (*ptr != '/') && (*ptr != ':')) + ptr++; + + /* No scheme? No good. */ + if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/')) + return FALSE; + + /* Found a scheme, check that it's all lowercase. */ + ptr = uri; + while (*ptr != ':') + { + if (*ptr >= 'A' && *ptr <= 'Z') + return FALSE; + ptr++; + } + /* Skip :// */ + ptr += 3; + + /* Scheme only? That works. */ + if (! *ptr) + return TRUE; + + /* This might be the hostname */ + seg = ptr; + while (*ptr && (*ptr != '/') && (*ptr != '@')) + ptr++; + + if (*ptr == '@') + seg = ptr + 1; + + /* Found a hostname, check that it's all lowercase. */ + ptr = seg; + + if (*ptr == '[') + { + ptr++; + while (*ptr == ':' + || (*ptr >= '0' && *ptr <= '9') + || (*ptr >= 'a' && *ptr <= 'f')) + { + ptr++; + } + + if (*ptr != ']') + return FALSE; + ptr++; + } + else + while (*ptr && *ptr != '/' && *ptr != ':') + { + if (*ptr >= 'A' && *ptr <= 'Z') + return FALSE; + ptr++; + } + + /* Found a portnumber */ + if (*ptr == ':') + { + apr_int64_t port = 0; + + ptr++; + schema_data = ptr; + + while (*ptr >= '0' && *ptr <= '9') + { + port = 10 * port + (*ptr - '0'); + ptr++; + } + + if (ptr == schema_data) + return FALSE; /* Fail on "http://host:" */ + + if (*ptr && *ptr != '/') + return FALSE; /* Not a port number */ + + if (port == 80 && strncmp(uri, "http:", 5) == 0) + return FALSE; + else if (port == 443 && strncmp(uri, "https:", 6) == 0) + return FALSE; + else if (port == 3690 && strncmp(uri, "svn:", 4) == 0) + return FALSE; + } + + schema_data = ptr; + +#ifdef SVN_USE_DOS_PATHS + if (schema_data && *ptr == '/') + { + /* If this is a file url, ptr now points to the third '/' in + file:///C:/path. Check that if we have such a URL the drive + letter is in uppercase. */ + if (strncmp(uri, "file:", 5) == 0 && + ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') && + *(ptr+2) == ':') + return FALSE; + } +#endif /* SVN_USE_DOS_PATHS */ + + /* Now validate the rest of the URI. */ + while(1) + { + apr_size_t seglen = ptr - seg; + + if (seglen == 1 && *seg == '.') + return FALSE; /* /./ */ + + if (*ptr == '/' && *(ptr+1) == '/') + return FALSE; /* // */ + + if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri) + return FALSE; /* foo/ */ + + if (! *ptr) + break; + + if (*ptr == '/') + ptr++; + seg = ptr; + + + while (*ptr && (*ptr != '/')) + ptr++; + } + + ptr = schema_data; + + while (*ptr) + { + if (*ptr == '%') + { + char digitz[3]; + int val; + + /* Can't usesvn_ctype_isxdigit() because lower case letters are + not in our canonical format */ + if (((*(ptr+1) < '0' || *(ptr+1) > '9')) + && (*(ptr+1) < 'A' || *(ptr+1) > 'F')) + return FALSE; + else if (((*(ptr+2) < '0' || *(ptr+2) > '9')) + && (*(ptr+2) < 'A' || *(ptr+2) > 'F')) + return FALSE; + + digitz[0] = *(++ptr); + digitz[1] = *(++ptr); + digitz[2] = '\0'; + val = (int)strtol(digitz, NULL, 16); + + if (svn_uri__char_validity[val]) + return FALSE; /* Should not have been escaped */ + } + else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr]) + return FALSE; /* Character should have been escaped */ + ptr++; + } + + return TRUE; +} + +svn_error_t * +svn_dirent_condense_targets(const char **pcommon, + apr_array_header_t **pcondensed_targets, + const apr_array_header_t *targets, + svn_boolean_t remove_redundancies, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + int i, num_condensed = targets->nelts; + svn_boolean_t *removed; + apr_array_header_t *abs_targets; + + /* Early exit when there's no data to work on. */ + if (targets->nelts <= 0) + { + *pcommon = NULL; + if (pcondensed_targets) + *pcondensed_targets = NULL; + return SVN_NO_ERROR; + } + + /* Get the absolute path of the first target. */ + SVN_ERR(svn_dirent_get_absolute(pcommon, + APR_ARRAY_IDX(targets, 0, const char *), + scratch_pool)); + + /* Early exit when there's only one dirent to work on. */ + if (targets->nelts == 1) + { + *pcommon = apr_pstrdup(result_pool, *pcommon); + if (pcondensed_targets) + *pcondensed_targets = apr_array_make(result_pool, 0, + sizeof(const char *)); + return SVN_NO_ERROR; + } + + /* Copy the targets array, but with absolute dirents instead of + relative. Also, find the pcommon argument by finding what is + common in all of the absolute dirents. NOTE: This is not as + efficient as it could be. The calculation of the basedir could + be done in the loop below, which would save some calls to + svn_dirent_get_longest_ancestor. I decided to do it this way + because I thought it would be simpler, since this way, we don't + even do the loop if we don't need to condense the targets. */ + + removed = apr_pcalloc(scratch_pool, (targets->nelts * + sizeof(svn_boolean_t))); + abs_targets = apr_array_make(scratch_pool, targets->nelts, + sizeof(const char *)); + + APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon; + + for (i = 1; i < targets->nelts; ++i) + { + const char *rel = APR_ARRAY_IDX(targets, i, const char *); + const char *absolute; + SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool)); + APR_ARRAY_PUSH(abs_targets, const char *) = absolute; + *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute, + scratch_pool); + } + + *pcommon = apr_pstrdup(result_pool, *pcommon); + + if (pcondensed_targets != NULL) + { + size_t basedir_len; + + if (remove_redundancies) + { + /* Find the common part of each pair of targets. If + common part is equal to one of the dirents, the other + is a child of it, and can be removed. If a target is + equal to *pcommon, it can also be removed. */ + + /* First pass: when one non-removed target is a child of + another non-removed target, remove the child. */ + for (i = 0; i < abs_targets->nelts; ++i) + { + int j; + + if (removed[i]) + continue; + + for (j = i + 1; j < abs_targets->nelts; ++j) + { + const char *abs_targets_i; + const char *abs_targets_j; + const char *ancestor; + + if (removed[j]) + continue; + + abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *); + abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *); + + ancestor = svn_dirent_get_longest_ancestor + (abs_targets_i, abs_targets_j, scratch_pool); + + if (*ancestor == '\0') + continue; + + if (strcmp(ancestor, abs_targets_i) == 0) + { + removed[j] = TRUE; + num_condensed--; + } + else if (strcmp(ancestor, abs_targets_j) == 0) + { + removed[i] = TRUE; + num_condensed--; + } + } + } + + /* Second pass: when a target is the same as *pcommon, + remove the target. */ + for (i = 0; i < abs_targets->nelts; ++i) + { + const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i, + const char *); + + if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i])) + { + removed[i] = TRUE; + num_condensed--; + } + } + } + + /* Now create the return array, and copy the non-removed items */ + basedir_len = strlen(*pcommon); + *pcondensed_targets = apr_array_make(result_pool, num_condensed, + sizeof(const char *)); + + for (i = 0; i < abs_targets->nelts; ++i) + { + const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *); + + /* Skip this if it's been removed. */ + if (removed[i]) + continue; + + /* If a common prefix was found, condensed_targets are given + relative to that prefix. */ + if (basedir_len > 0) + { + /* Only advance our pointer past a dirent separator if + REL_ITEM isn't the same as *PCOMMON. + + If *PCOMMON is a root dirent, basedir_len will already + include the closing '/', so never advance the pointer + here. + */ + rel_item += basedir_len; + if (rel_item[0] && + ! svn_dirent_is_root(*pcommon, basedir_len)) + rel_item++; + } + + APR_ARRAY_PUSH(*pcondensed_targets, const char *) + = apr_pstrdup(result_pool, rel_item); + } + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_uri_condense_targets(const char **pcommon, + apr_array_header_t **pcondensed_targets, + const apr_array_header_t *targets, + svn_boolean_t remove_redundancies, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + int i, num_condensed = targets->nelts; + apr_array_header_t *uri_targets; + svn_boolean_t *removed; + + /* Early exit when there's no data to work on. */ + if (targets->nelts <= 0) + { + *pcommon = NULL; + if (pcondensed_targets) + *pcondensed_targets = NULL; + return SVN_NO_ERROR; + } + + *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *), + scratch_pool); + + /* Early exit when there's only one uri to work on. */ + if (targets->nelts == 1) + { + *pcommon = apr_pstrdup(result_pool, *pcommon); + if (pcondensed_targets) + *pcondensed_targets = apr_array_make(result_pool, 0, + sizeof(const char *)); + return SVN_NO_ERROR; + } + + /* Find the pcommon argument by finding what is common in all of the + uris. NOTE: This is not as efficient as it could be. The calculation + of the basedir could be done in the loop below, which would + save some calls to svn_uri_get_longest_ancestor. I decided to do it + this way because I thought it would be simpler, since this way, we don't + even do the loop if we don't need to condense the targets. */ + + removed = apr_pcalloc(scratch_pool, (targets->nelts * + sizeof(svn_boolean_t))); + uri_targets = apr_array_make(scratch_pool, targets->nelts, + sizeof(const char *)); + + APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon; + + for (i = 1; i < targets->nelts; ++i) + { + const char *uri = svn_uri_canonicalize( + APR_ARRAY_IDX(targets, i, const char *), + scratch_pool); + APR_ARRAY_PUSH(uri_targets, const char *) = uri; + + /* If the commonmost ancestor so far is empty, there's no point + in continuing to search for a common ancestor at all. But + we'll keep looping for the sake of canonicalizing the + targets, I suppose. */ + if (**pcommon != '\0') + *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri, + scratch_pool); + } + + *pcommon = apr_pstrdup(result_pool, *pcommon); + + if (pcondensed_targets != NULL) + { + size_t basedir_len; + + if (remove_redundancies) + { + /* Find the common part of each pair of targets. If + common part is equal to one of the dirents, the other + is a child of it, and can be removed. If a target is + equal to *pcommon, it can also be removed. */ + + /* First pass: when one non-removed target is a child of + another non-removed target, remove the child. */ + for (i = 0; i < uri_targets->nelts; ++i) + { + int j; + + if (removed[i]) + continue; + + for (j = i + 1; j < uri_targets->nelts; ++j) + { + const char *uri_i; + const char *uri_j; + const char *ancestor; + + if (removed[j]) + continue; + + uri_i = APR_ARRAY_IDX(uri_targets, i, const char *); + uri_j = APR_ARRAY_IDX(uri_targets, j, const char *); + + ancestor = svn_uri_get_longest_ancestor(uri_i, + uri_j, + scratch_pool); + + if (*ancestor == '\0') + continue; + + if (strcmp(ancestor, uri_i) == 0) + { + removed[j] = TRUE; + num_condensed--; + } + else if (strcmp(ancestor, uri_j) == 0) + { + removed[i] = TRUE; + num_condensed--; + } + } + } + + /* Second pass: when a target is the same as *pcommon, + remove the target. */ + for (i = 0; i < uri_targets->nelts; ++i) + { + const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i, + const char *); + + if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i])) + { + removed[i] = TRUE; + num_condensed--; + } + } + } + + /* Now create the return array, and copy the non-removed items */ + basedir_len = strlen(*pcommon); + *pcondensed_targets = apr_array_make(result_pool, num_condensed, + sizeof(const char *)); + + for (i = 0; i < uri_targets->nelts; ++i) + { + const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *); + + /* Skip this if it's been removed. */ + if (removed[i]) + continue; + + /* If a common prefix was found, condensed_targets are given + relative to that prefix. */ + if (basedir_len > 0) + { + /* Only advance our pointer past a dirent separator if + REL_ITEM isn't the same as *PCOMMON. + + If *PCOMMON is a root dirent, basedir_len will already + include the closing '/', so never advance the pointer + here. + */ + rel_item += basedir_len; + if ((rel_item[0] == '/') || + (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len))) + { + rel_item++; + } + } + + APR_ARRAY_PUSH(*pcondensed_targets, const char *) + = svn_path_uri_decode(rel_item, result_pool); + } + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_dirent_is_under_root(svn_boolean_t *under_root, + const char **result_path, + const char *base_path, + const char *path, + apr_pool_t *result_pool) +{ + apr_status_t status; + char *full_path; + + *under_root = FALSE; + if (result_path) + *result_path = NULL; + + status = apr_filepath_merge(&full_path, + base_path, + path, + APR_FILEPATH_NOTABOVEROOT + | APR_FILEPATH_SECUREROOTTEST, + result_pool); + + if (status == APR_SUCCESS) + { + if (result_path) + *result_path = svn_dirent_canonicalize(full_path, result_pool); + *under_root = TRUE; + return SVN_NO_ERROR; + } + else if (status == APR_EABOVEROOT) + { + *under_root = FALSE; + return SVN_NO_ERROR; + } + + return svn_error_wrap_apr(status, NULL); +} + +svn_error_t * +svn_uri_get_dirent_from_file_url(const char **dirent, + const char *url, + apr_pool_t *pool) +{ + const char *hostname, *path; + + SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool)); + + /* Verify that the URL is well-formed (loosely) */ + + /* First, check for the "file://" prefix. */ + if (strncmp(url, "file://", 7) != 0) + return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL, + _("Local URL '%s' does not contain 'file://' " + "prefix"), url); + + /* Find the HOSTNAME portion and the PATH portion of the URL. The host + name is between the "file://" prefix and the next occurence of '/'. We + are considering everything from that '/' until the end of the URL to be + the absolute path portion of the URL. + If we got just "file://", treat it the same as "file:///". */ + hostname = url + 7; + path = strchr(hostname, '/'); + if (path) + hostname = apr_pstrmemdup(pool, hostname, path - hostname); + else + path = "/"; + + /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */ + if (*hostname == '\0') + hostname = NULL; + else + { + hostname = svn_path_uri_decode(hostname, pool); + if (strcmp(hostname, "localhost") == 0) + hostname = NULL; + } + + /* Duplicate the URL, starting at the top of the path. + At the same time, we URI-decode the path. */ +#ifdef SVN_USE_DOS_PATHS + /* On Windows, we'll typically have to skip the leading / if the + path starts with a drive letter. Like most Web browsers, We + support two variants of this scheme: + + file:///X:/path and + file:///X|/path + + Note that, at least on WinNT and above, file:////./X:/path will + also work, so we must make sure the transformation doesn't break + that, and file:///path (that looks within the current drive + only) should also keep working. + If we got a non-empty hostname other than localhost, we convert this + into an UNC path. In this case, we obviously don't strip the slash + even if the path looks like it starts with a drive letter. + */ + { + static const char valid_drive_letters[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + /* Casting away const! */ + char *dup_path = (char *)svn_path_uri_decode(path, pool); + + /* This check assumes ':' and '|' are already decoded! */ + if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1]) + && (dup_path[2] == ':' || dup_path[2] == '|')) + { + /* Skip the leading slash. */ + ++dup_path; + + if (dup_path[1] == '|') + dup_path[1] = ':'; + + if (dup_path[2] == '/' || dup_path[2] == '\0') + { + if (dup_path[2] == '\0') + { + /* A valid dirent for the driveroot must be like "C:/" instead of + just "C:" or svn_dirent_join() will use the current directory + on the drive instead */ + char *new_path = apr_pcalloc(pool, 4); + new_path[0] = dup_path[0]; + new_path[1] = ':'; + new_path[2] = '/'; + new_path[3] = '\0'; + dup_path = new_path; + } + } + } + if (hostname) + { + if (dup_path[0] == '/' && dup_path[1] == '\0') + return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL, + _("Local URL '%s' contains only a hostname, " + "no path"), url); + + /* We still know that the path starts with a slash. */ + *dirent = apr_pstrcat(pool, "//", hostname, dup_path, NULL); + } + else + *dirent = dup_path; + } +#else /* !SVN_USE_DOS_PATHS */ + /* Currently, the only hostnames we are allowing on non-Win32 platforms + are the empty string and 'localhost'. */ + if (hostname) + return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL, + _("Local URL '%s' contains unsupported hostname"), + url); + + *dirent = svn_path_uri_decode(path, pool); +#endif /* SVN_USE_DOS_PATHS */ + return SVN_NO_ERROR; +} + +svn_error_t * +svn_uri_get_file_url_from_dirent(const char **url, + const char *dirent, + apr_pool_t *pool) +{ + assert(svn_dirent_is_canonical(dirent, pool)); + + SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool)); + + dirent = svn_path_uri_encode(dirent, pool); + +#ifndef SVN_USE_DOS_PATHS + if (dirent[0] == '/' && dirent[1] == '\0') + dirent = NULL; /* "file://" is the canonical form of "file:///" */ + + *url = apr_pstrcat(pool, "file://", dirent, (char *)NULL); +#else + if (dirent[0] == '/') + { + /* Handle UNC paths //server/share -> file://server/share */ + assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */ + + *url = apr_pstrcat(pool, "file:", dirent, NULL); + } + else + { + char *uri = apr_pstrcat(pool, "file:///", dirent, NULL); + apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent); + + /* "C:/" is a canonical dirent on Windows, + but "file:///C:/" is not a canonical uri */ + if (uri[len-1] == '/') + uri[len-1] = '\0'; + + *url = uri; + } +#endif + + return SVN_NO_ERROR; +} + + + +/* -------------- The fspath API (see private/svn_fspath.h) -------------- */ + +svn_boolean_t +svn_fspath__is_canonical(const char *fspath) +{ + return fspath[0] == '/' && relpath_is_canonical(fspath + 1); +} + + +const char * +svn_fspath__canonicalize(const char *fspath, + apr_pool_t *pool) +{ + if ((fspath[0] == '/') && (fspath[1] == '\0')) + return "/"; + + return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool), + (char *)NULL); +} + + +svn_boolean_t +svn_fspath__is_root(const char *fspath, apr_size_t len) +{ + /* directory is root if it's equal to '/' */ + return (len == 1 && fspath[0] == '/'); +} + + +const char * +svn_fspath__skip_ancestor(const char *parent_fspath, + const char *child_fspath) +{ + assert(svn_fspath__is_canonical(parent_fspath)); + assert(svn_fspath__is_canonical(child_fspath)); + + return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1); +} + + +const char * +svn_fspath__dirname(const char *fspath, + apr_pool_t *pool) +{ + assert(svn_fspath__is_canonical(fspath)); + + if (fspath[0] == '/' && fspath[1] == '\0') + return apr_pstrdup(pool, fspath); + else + return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool), + (char *)NULL); +} + + +const char * +svn_fspath__basename(const char *fspath, + apr_pool_t *pool) +{ + const char *result; + assert(svn_fspath__is_canonical(fspath)); + + result = svn_relpath_basename(fspath + 1, pool); + + assert(strchr(result, '/') == NULL); + return result; +} + +void +svn_fspath__split(const char **dirpath, + const char **base_name, + const char *fspath, + apr_pool_t *result_pool) +{ + assert(dirpath != base_name); + + if (dirpath) + *dirpath = svn_fspath__dirname(fspath, result_pool); + + if (base_name) + *base_name = svn_fspath__basename(fspath, result_pool); +} + +char * +svn_fspath__join(const char *fspath, + const char *relpath, + apr_pool_t *result_pool) +{ + char *result; + assert(svn_fspath__is_canonical(fspath)); + assert(svn_relpath_is_canonical(relpath)); + + if (relpath[0] == '\0') + result = apr_pstrdup(result_pool, fspath); + else if (fspath[1] == '\0') + result = apr_pstrcat(result_pool, "/", relpath, (char *)NULL); + else + result = apr_pstrcat(result_pool, fspath, "/", relpath, (char *)NULL); + + assert(svn_fspath__is_canonical(result)); + return result; +} + +char * +svn_fspath__get_longest_ancestor(const char *fspath1, + const char *fspath2, + apr_pool_t *result_pool) +{ + char *result; + assert(svn_fspath__is_canonical(fspath1)); + assert(svn_fspath__is_canonical(fspath2)); + + result = apr_pstrcat(result_pool, "/", + svn_relpath_get_longest_ancestor(fspath1 + 1, + fspath2 + 1, + result_pool), + (char *)NULL); + + assert(svn_fspath__is_canonical(result)); + return result; +} + + + + +/* -------------- The urlpath API (see private/svn_fspath.h) ------------- */ + +const char * +svn_urlpath__canonicalize(const char *uri, + apr_pool_t *pool) +{ + if (svn_path_is_url(uri)) + { + uri = svn_uri_canonicalize(uri, pool); + } + else + { + uri = svn_fspath__canonicalize(uri, pool); + /* Do a little dance to normalize hex encoding. */ + uri = svn_path_uri_decode(uri, pool); + uri = svn_path_uri_encode(uri, pool); + } + return uri; +} |