diff options
author | dim <dim@FreeBSD.org> | 2012-12-02 13:20:44 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2012-12-02 13:20:44 +0000 |
commit | 056abd2059c65a3e908193aeae16fad98017437c (patch) | |
tree | 2732d02d7d51218d6eed98ac7fcfc5b8794896b5 /lib/Basic/ConvertUTF.c | |
parent | cc73504950eb7b5dff2dded9bedd67bc36d64641 (diff) | |
download | FreeBSD-src-056abd2059c65a3e908193aeae16fad98017437c.zip FreeBSD-src-056abd2059c65a3e908193aeae16fad98017437c.tar.gz |
Vendor import of clang release_32 branch r168974 (effectively, 3.2 RC2):
http://llvm.org/svn/llvm-project/cfe/branches/release_32@168974
Diffstat (limited to 'lib/Basic/ConvertUTF.c')
-rw-r--r-- | lib/Basic/ConvertUTF.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c index 4793b25..d16965d 100644 --- a/lib/Basic/ConvertUTF.c +++ b/lib/Basic/ConvertUTF.c @@ -111,7 +111,6 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC * into an inline function. */ -#ifdef CLANG_NEEDS_THESE_ONE_DAY /* --------------------------------------------------------------------- */ @@ -285,7 +284,6 @@ ConversionResult ConvertUTF16toUTF8 ( *targetStart = target; return result; } -#endif /* --------------------------------------------------------------------- */ @@ -361,7 +359,7 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) { /* Everything else falls through when "true"... */ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 2: if ((a = (*--srcptr)) > 0xBF) return false; + case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; switch (*source) { /* no fall-through in this inner switch */ @@ -395,15 +393,25 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { /* --------------------------------------------------------------------- */ /* + * Exported function to return the total number of bytes in a codepoint + * represented in UTF-8, given the value of the first byte. + */ +unsigned getNumBytesForUTF8(UTF8 first) { + return trailingBytesForUTF8[first] + 1; +} + +/* --------------------------------------------------------------------- */ + +/* * Exported function to return whether a UTF-8 string is legal or not. * This is not used here; it's just exported. */ -Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) { - while (source != sourceEnd) { - int length = trailingBytesForUTF8[*source] + 1; - if (length > sourceEnd - source || !isLegalUTF8(source, length)) +Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) { + while (*source != sourceEnd) { + int length = trailingBytesForUTF8[**source] + 1; + if (length > sourceEnd - *source || !isLegalUTF8(*source, length)) return false; - source += length; + *source += length; } return true; } |