diff options
Diffstat (limited to 'subversion/libsvn_subr/xml.c')
-rw-r--r-- | subversion/libsvn_subr/xml.c | 655 |
1 files changed, 655 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/xml.c b/subversion/libsvn_subr/xml.c new file mode 100644 index 0000000..a9d834a --- /dev/null +++ b/subversion/libsvn_subr/xml.c @@ -0,0 +1,655 @@ +/* + * xml.c: xml helper code shared among the Subversion libraries. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + + + +#include <string.h> +#include <assert.h> + +#include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */ +#include "svn_hash.h" +#include "svn_pools.h" +#include "svn_xml.h" +#include "svn_error.h" +#include "svn_ctype.h" + +#include "private/svn_utf_private.h" + +#ifdef SVN_HAVE_OLD_EXPAT +#include <xmlparse.h> +#else +#include <expat.h> +#endif + +#ifdef XML_UNICODE +#error Expat is unusable -- it has been compiled for wide characters +#endif + +/* The private internals for a parser object. */ +struct svn_xml_parser_t +{ + /** the expat parser */ + XML_Parser parser; + + /** the SVN callbacks to call from the Expat callbacks */ + svn_xml_start_elem start_handler; + svn_xml_end_elem end_handler; + svn_xml_char_data data_handler; + + /** the user's baton for private data */ + void *baton; + + /** if non-@c NULL, an error happened while parsing */ + svn_error_t *error; + + /** where this object is allocated, so we can free it easily */ + apr_pool_t *pool; + +}; + + +/*** XML character validation ***/ + +svn_boolean_t +svn_xml_is_xml_safe(const char *data, apr_size_t len) +{ + const char *end = data + len; + const char *p; + + if (! svn_utf__is_valid(data, len)) + return FALSE; + + for (p = data; p < end; p++) + { + unsigned char c = *p; + + if (svn_ctype_iscntrl(c)) + { + if ((c != SVN_CTYPE_ASCII_TAB) + && (c != SVN_CTYPE_ASCII_LINEFEED) + && (c != SVN_CTYPE_ASCII_CARRIAGERETURN) + && (c != SVN_CTYPE_ASCII_DELETE)) + return FALSE; + } + } + return TRUE; +} + + + + + +/*** XML escaping. ***/ + +/* ### ...? + * + * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated + * in POOL, else append to the existing stringbuf there. + */ +static void +xml_escape_cdata(svn_stringbuf_t **outstr, + const char *data, + apr_size_t len, + apr_pool_t *pool) +{ + const char *end = data + len; + const char *p = data, *q; + + if (*outstr == NULL) + *outstr = svn_stringbuf_create_empty(pool); + + while (1) + { + /* Find a character which needs to be quoted and append bytes up + to that point. Strictly speaking, '>' only needs to be + quoted if it follows "]]", but it's easier to quote it all + the time. + + So, why are we escaping '\r' here? Well, according to the + XML spec, '\r\n' gets converted to '\n' during XML parsing. + Also, any '\r' not followed by '\n' is converted to '\n'. By + golly, if we say we want to escape a '\r', we want to make + sure it remains a '\r'! */ + q = p; + while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r') + q++; + svn_stringbuf_appendbytes(*outstr, p, q - p); + + /* We may already be a winner. */ + if (q == end) + break; + + /* Append the entity reference for the character. */ + if (*q == '&') + svn_stringbuf_appendcstr(*outstr, "&"); + else if (*q == '<') + svn_stringbuf_appendcstr(*outstr, "<"); + else if (*q == '>') + svn_stringbuf_appendcstr(*outstr, ">"); + else if (*q == '\r') + svn_stringbuf_appendcstr(*outstr, " "); + + p = q + 1; + } +} + +/* Essentially the same as xml_escape_cdata, with the addition of + whitespace and quote characters. */ +static void +xml_escape_attr(svn_stringbuf_t **outstr, + const char *data, + apr_size_t len, + apr_pool_t *pool) +{ + const char *end = data + len; + const char *p = data, *q; + + if (*outstr == NULL) + *outstr = svn_stringbuf_create_ensure(len, pool); + + while (1) + { + /* Find a character which needs to be quoted and append bytes up + to that point. */ + q = p; + while (q < end && *q != '&' && *q != '<' && *q != '>' + && *q != '"' && *q != '\'' && *q != '\r' + && *q != '\n' && *q != '\t') + q++; + svn_stringbuf_appendbytes(*outstr, p, q - p); + + /* We may already be a winner. */ + if (q == end) + break; + + /* Append the entity reference for the character. */ + if (*q == '&') + svn_stringbuf_appendcstr(*outstr, "&"); + else if (*q == '<') + svn_stringbuf_appendcstr(*outstr, "<"); + else if (*q == '>') + svn_stringbuf_appendcstr(*outstr, ">"); + else if (*q == '"') + svn_stringbuf_appendcstr(*outstr, """); + else if (*q == '\'') + svn_stringbuf_appendcstr(*outstr, "'"); + else if (*q == '\r') + svn_stringbuf_appendcstr(*outstr, " "); + else if (*q == '\n') + svn_stringbuf_appendcstr(*outstr, " "); + else if (*q == '\t') + svn_stringbuf_appendcstr(*outstr, "	"); + + p = q + 1; + } +} + + +void +svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr, + const svn_stringbuf_t *string, + apr_pool_t *pool) +{ + xml_escape_cdata(outstr, string->data, string->len, pool); +} + + +void +svn_xml_escape_cdata_string(svn_stringbuf_t **outstr, + const svn_string_t *string, + apr_pool_t *pool) +{ + xml_escape_cdata(outstr, string->data, string->len, pool); +} + + +void +svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr, + const char *string, + apr_pool_t *pool) +{ + xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool); +} + + +void +svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr, + const svn_stringbuf_t *string, + apr_pool_t *pool) +{ + xml_escape_attr(outstr, string->data, string->len, pool); +} + + +void +svn_xml_escape_attr_string(svn_stringbuf_t **outstr, + const svn_string_t *string, + apr_pool_t *pool) +{ + xml_escape_attr(outstr, string->data, string->len, pool); +} + + +void +svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr, + const char *string, + apr_pool_t *pool) +{ + xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool); +} + + +const char * +svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool) +{ + const char *end = string + strlen(string); + const char *p = string, *q; + svn_stringbuf_t *outstr; + char escaped_char[6]; /* ? \ u u u \0 */ + + for (q = p; q < end; q++) + { + if (svn_ctype_iscntrl(*q) + && ! ((*q == '\n') || (*q == '\r') || (*q == '\t'))) + break; + } + + /* Return original string if no unsafe characters found. */ + if (q == end) + return string; + + outstr = svn_stringbuf_create_empty(pool); + while (1) + { + q = p; + + /* Traverse till either unsafe character or eos. */ + while ((q < end) + && ((! svn_ctype_iscntrl(*q)) + || (*q == '\n') || (*q == '\r') || (*q == '\t'))) + q++; + + /* copy chunk before marker */ + svn_stringbuf_appendbytes(outstr, p, q - p); + + if (q == end) + break; + + /* Append an escaped version of the unsafe character. + + ### This format was chosen for consistency with + ### svn_utf__cstring_from_utf8_fuzzy(). The two functions + ### should probably share code, even though they escape + ### different characters. + */ + apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u", + (unsigned char) *q); + svn_stringbuf_appendcstr(outstr, escaped_char); + + p = q + 1; + } + + return outstr->data; +} + + +/*** Map from the Expat callback types to the SVN XML types. ***/ + +static void expat_start_handler(void *userData, + const XML_Char *name, + const XML_Char **atts) +{ + svn_xml_parser_t *svn_parser = userData; + + (*svn_parser->start_handler)(svn_parser->baton, name, atts); +} + +static void expat_end_handler(void *userData, const XML_Char *name) +{ + svn_xml_parser_t *svn_parser = userData; + + (*svn_parser->end_handler)(svn_parser->baton, name); +} + +static void expat_data_handler(void *userData, const XML_Char *s, int len) +{ + svn_xml_parser_t *svn_parser = userData; + + (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len); +} + + +/*** Making a parser. ***/ + +svn_xml_parser_t * +svn_xml_make_parser(void *baton, + svn_xml_start_elem start_handler, + svn_xml_end_elem end_handler, + svn_xml_char_data data_handler, + apr_pool_t *pool) +{ + svn_xml_parser_t *svn_parser; + apr_pool_t *subpool; + + XML_Parser parser = XML_ParserCreate(NULL); + + XML_SetElementHandler(parser, + start_handler ? expat_start_handler : NULL, + end_handler ? expat_end_handler : NULL); + XML_SetCharacterDataHandler(parser, + data_handler ? expat_data_handler : NULL); + + /* ### we probably don't want this pool; or at least we should pass it + ### to the callbacks and clear it periodically. */ + subpool = svn_pool_create(pool); + + svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser)); + + svn_parser->parser = parser; + svn_parser->start_handler = start_handler; + svn_parser->end_handler = end_handler; + svn_parser->data_handler = data_handler; + svn_parser->baton = baton; + svn_parser->pool = subpool; + + /* store our parser info as the UserData in the Expat parser */ + XML_SetUserData(parser, svn_parser); + + return svn_parser; +} + + +/* Free a parser */ +void +svn_xml_free_parser(svn_xml_parser_t *svn_parser) +{ + /* Free the expat parser */ + XML_ParserFree(svn_parser->parser); + + /* Free the subversion parser */ + svn_pool_destroy(svn_parser->pool); +} + + + + +svn_error_t * +svn_xml_parse(svn_xml_parser_t *svn_parser, + const char *buf, + apr_size_t len, + svn_boolean_t is_final) +{ + svn_error_t *err; + int success; + + /* Parse some xml data */ + success = XML_Parse(svn_parser->parser, buf, (int) len, is_final); + + /* If expat choked internally, return its error. */ + if (! success) + { + /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */ + long line = XML_GetCurrentLineNumber(svn_parser->parser); + + err = svn_error_createf + (SVN_ERR_XML_MALFORMED, NULL, + _("Malformed XML: %s at line %ld"), + XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line); + + /* Kill all parsers and return the expat error */ + svn_xml_free_parser(svn_parser); + return err; + } + + /* Did an error occur somewhere *inside* the expat callbacks? */ + if (svn_parser->error) + { + err = svn_parser->error; + svn_xml_free_parser(svn_parser); + return err; + } + + return SVN_NO_ERROR; +} + + + +void svn_xml_signal_bailout(svn_error_t *error, + svn_xml_parser_t *svn_parser) +{ + /* This will cause the current XML_Parse() call to finish quickly! */ + XML_SetElementHandler(svn_parser->parser, NULL, NULL); + XML_SetCharacterDataHandler(svn_parser->parser, NULL); + + /* Once outside of XML_Parse(), the existence of this field will + cause svn_delta_parse()'s main read-loop to return error. */ + svn_parser->error = error; +} + + + + + + + + +/*** Attribute walking. ***/ + +const char * +svn_xml_get_attr_value(const char *name, const char *const *atts) +{ + while (atts && (*atts)) + { + if (strcmp(atts[0], name) == 0) + return atts[1]; + else + atts += 2; /* continue looping */ + } + + /* Else no such attribute name seen. */ + return NULL; +} + + + +/*** Printing XML ***/ + +void +svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding, + apr_pool_t *pool) +{ + + if (*str == NULL) + *str = svn_stringbuf_create_empty(pool); + svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\""); + if (encoding) + { + encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding); + svn_stringbuf_appendcstr(*str, encoding); + } + svn_stringbuf_appendcstr(*str, "?>\n"); +} + + + +/*** Creating attribute hashes. ***/ + +/* Combine an existing attribute list ATTS with a HASH that itself + represents an attribute list. Iff PRESERVE is true, then no value + already in HASH will be changed, else values from ATTS will + override previous values in HASH. */ +static void +amalgamate(const char **atts, + apr_hash_t *ht, + svn_boolean_t preserve, + apr_pool_t *pool) +{ + const char *key; + + if (atts) + for (key = *atts; key; key = *(++atts)) + { + const char *val = *(++atts); + size_t keylen; + assert(key != NULL); + /* kff todo: should we also insist that val be non-null here? + Probably. */ + + keylen = strlen(key); + if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL)) + continue; + else + apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen, + val ? apr_pstrdup(pool, val) : NULL); + } +} + + +apr_hash_t * +svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool) +{ + apr_hash_t *ht = apr_hash_make(pool); + const char *key; + + while ((key = va_arg(ap, char *)) != NULL) + { + const char *val = va_arg(ap, const char *); + svn_hash_sets(ht, key, val); + } + + return ht; +} + + +apr_hash_t * +svn_xml_make_att_hash(const char **atts, apr_pool_t *pool) +{ + apr_hash_t *ht = apr_hash_make(pool); + amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */ + return ht; +} + + +void +svn_xml_hash_atts_overlaying(const char **atts, + apr_hash_t *ht, + apr_pool_t *pool) +{ + amalgamate(atts, ht, 0, pool); +} + + +void +svn_xml_hash_atts_preserving(const char **atts, + apr_hash_t *ht, + apr_pool_t *pool) +{ + amalgamate(atts, ht, 1, pool); +} + + + +/*** Making XML tags. ***/ + + +void +svn_xml_make_open_tag_hash(svn_stringbuf_t **str, + apr_pool_t *pool, + enum svn_xml_open_tag_style style, + const char *tagname, + apr_hash_t *attributes) +{ + apr_hash_index_t *hi; + apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30; + + if (*str == NULL) + *str = svn_stringbuf_create_ensure(est_size, pool); + + svn_stringbuf_appendcstr(*str, "<"); + svn_stringbuf_appendcstr(*str, tagname); + + for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi)) + { + const void *key; + void *val; + + apr_hash_this(hi, &key, NULL, &val); + assert(val != NULL); + + svn_stringbuf_appendcstr(*str, "\n "); + svn_stringbuf_appendcstr(*str, key); + svn_stringbuf_appendcstr(*str, "=\""); + svn_xml_escape_attr_cstring(str, val, pool); + svn_stringbuf_appendcstr(*str, "\""); + } + + if (style == svn_xml_self_closing) + svn_stringbuf_appendcstr(*str, "/"); + svn_stringbuf_appendcstr(*str, ">"); + if (style != svn_xml_protect_pcdata) + svn_stringbuf_appendcstr(*str, "\n"); +} + + +void +svn_xml_make_open_tag_v(svn_stringbuf_t **str, + apr_pool_t *pool, + enum svn_xml_open_tag_style style, + const char *tagname, + va_list ap) +{ + apr_pool_t *subpool = svn_pool_create(pool); + apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool); + + svn_xml_make_open_tag_hash(str, pool, style, tagname, ht); + svn_pool_destroy(subpool); +} + + + +void +svn_xml_make_open_tag(svn_stringbuf_t **str, + apr_pool_t *pool, + enum svn_xml_open_tag_style style, + const char *tagname, + ...) +{ + va_list ap; + + va_start(ap, tagname); + svn_xml_make_open_tag_v(str, pool, style, tagname, ap); + va_end(ap); +} + + +void svn_xml_make_close_tag(svn_stringbuf_t **str, + apr_pool_t *pool, + const char *tagname) +{ + if (*str == NULL) + *str = svn_stringbuf_create_empty(pool); + + svn_stringbuf_appendcstr(*str, "</"); + svn_stringbuf_appendcstr(*str, tagname); + svn_stringbuf_appendcstr(*str, ">\n"); +} |