diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp | 148 |
1 files changed, 106 insertions, 42 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp index 17c6bb3..6417d0f 100644 --- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp @@ -195,7 +195,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, << std::string(1, ResultChar); break; default: - if (Diags == 0) + if (!Diags) break; if (isPrintable(ResultChar)) @@ -212,6 +212,48 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, return ResultChar; } +static void appendCodePoint(unsigned Codepoint, + llvm::SmallVectorImpl<char> &Str) { + char ResultBuf[4]; + char *ResultPtr = ResultBuf; + bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr); + (void)Res; + assert(Res && "Unexpected conversion failure"); + Str.append(ResultBuf, ResultPtr); +} + +void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) { + for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) { + if (*I != '\\') { + Buf.push_back(*I); + continue; + } + + ++I; + assert(*I == 'u' || *I == 'U'); + + unsigned NumHexDigits; + if (*I == 'u') + NumHexDigits = 4; + else + NumHexDigits = 8; + + assert(I + NumHexDigits <= E); + + uint32_t CodePoint = 0; + for (++I; NumHexDigits != 0; ++I, --NumHexDigits) { + unsigned Value = llvm::hexDigitValue(*I); + assert(Value != -1U); + + CodePoint <<= 4; + CodePoint += Value; + } + + appendCodePoint(CodePoint, Buf); + --I; + } +} + /// ProcessUCNEscape - Read the Universal Character Name, check constraints and /// return the UTF32. static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, @@ -298,7 +340,7 @@ static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, FullSourceLoc Loc; if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, - UcnLen, Loc, 0, Features, true)) { + UcnLen, Loc, nullptr, Features, true)) { HadError = true; return 0; } @@ -480,7 +522,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, isLongLong = false; isFloat = false; isImaginary = false; - isMicrosoftInteger = false; + MicrosoftInteger = 0; hadError = false; if (*s == '0') { // parse radix @@ -529,7 +571,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // Parse the suffix. At this point we can classify whether we have an FP or // integer constant. bool isFPConstant = isFloatingLiteral(); - const char *ImaginarySuffixLoc = 0; + const char *ImaginarySuffixLoc = nullptr; // Loop over all of the characters of the suffix. If we see something bad, // we break out of the loop. @@ -564,49 +606,53 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, case 'i': case 'I': if (PP.getLangOpts().MicrosoftExt) { - if (isFPConstant || isLong || isLongLong) break; + if (isLong || isLongLong || MicrosoftInteger) + break; // Allow i8, i16, i32, i64, and i128. if (s + 1 != ThisTokEnd) { switch (s[1]) { case '8': + if (isFPConstant) break; s += 2; // i8 suffix - isMicrosoftInteger = true; + MicrosoftInteger = 8; break; case '1': + if (isFPConstant) break; if (s + 2 == ThisTokEnd) break; if (s[2] == '6') { s += 3; // i16 suffix - isMicrosoftInteger = true; + MicrosoftInteger = 16; } else if (s[2] == '2') { if (s + 3 == ThisTokEnd) break; if (s[3] == '8') { s += 4; // i128 suffix - isMicrosoftInteger = true; + MicrosoftInteger = 128; } } break; case '3': + if (isFPConstant) break; if (s + 2 == ThisTokEnd) break; if (s[2] == '2') { s += 3; // i32 suffix - isLong = true; - isMicrosoftInteger = true; + MicrosoftInteger = 32; } break; case '6': + if (isFPConstant) break; if (s + 2 == ThisTokEnd) break; if (s[2] == '4') { s += 3; // i64 suffix - isLongLong = true; - isMicrosoftInteger = true; + MicrosoftInteger = 64; } break; default: break; } - break; + if (MicrosoftInteger) + break; } } // "i", "if", and "il" are user-defined suffixes in C++1y. @@ -625,8 +671,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, } if (s != ThisTokEnd) { - if (isValidUDSuffix(PP.getLangOpts(), - StringRef(SuffixBegin, ThisTokEnd - SuffixBegin))) { + // FIXME: Don't bother expanding UCNs if !tok.hasUCN(). + expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)); + if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) { // Any suffix pieces we might have parsed are actually part of the // ud-suffix. isLong = false; @@ -634,7 +681,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, isLongLong = false; isFloat = false; isImaginary = false; - isMicrosoftInteger = false; + MicrosoftInteger = 0; saw_ud_suffix = true; return; @@ -722,6 +769,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { s++; saw_period = true; const char *floatDigitsBegin = s; + checkSeparator(TokLoc, s, CSK_BeforeDigits); s = SkipHexDigits(s); noSignificand &= (floatDigitsBegin == s); } @@ -736,6 +784,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // A binary exponent can appear with or with a '.'. If dotted, the // binary exponent is required. if (*s == 'p' || *s == 'P') { + checkSeparator(TokLoc, s, CSK_AfterDigits); const char *Exponent = s; s++; saw_exponent = true; @@ -747,6 +796,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { hadError = true; return; } + checkSeparator(TokLoc, s, CSK_BeforeDigits); s = first_non_digit; if (!PP.getLangOpts().HexFloats) @@ -815,9 +865,11 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { s++; radix = 10; saw_period = true; + checkSeparator(TokLoc, s, CSK_BeforeDigits); s = SkipDigits(s); // Skip suffix. } if (*s == 'e' || *s == 'E') { // exponent + checkSeparator(TokLoc, s, CSK_AfterDigits); const char *Exponent = s; s++; radix = 10; @@ -825,6 +877,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { if (*s == '+' || *s == '-') s++; // sign const char *first_non_digit = SkipDigits(s); if (first_non_digit != s) { + checkSeparator(TokLoc, s, CSK_BeforeDigits); s = first_non_digit; } else { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), @@ -992,7 +1045,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, do { --end; } while (end[-1] != '\''); - UDSuffixBuf.assign(end, UDSuffixEnd); + // FIXME: Don't bother with this if !tok.hasUCN(). + expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end)); UDSuffixOffset = end - TokBegin; } @@ -1201,26 +1255,26 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, /// \endverbatim /// StringLiteralParser:: -StringLiteralParser(const Token *StringToks, unsigned NumStringToks, +StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP, bool Complain) : SM(PP.getSourceManager()), Features(PP.getLangOpts()), - Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0), + Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr), MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { - init(StringToks, NumStringToks); + init(StringToks); } -void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ +void StringLiteralParser::init(ArrayRef<Token> StringToks){ // The literal token may have come from an invalid source location (e.g. due // to a PCH error), in which case the token length will be 0. - if (NumStringToks == 0 || StringToks[0].getLength() < 2) + if (StringToks.empty() || StringToks[0].getLength() < 2) return DiagnoseLexingError(SourceLocation()); // Scan all of the string portions, remember the max individual token length, // computing a bound on the concatenated string length, and see whether any // piece is a wide-string. If any of the string portions is a wide-string // literal, the result is a wide-string literal [C99 6.4.5p4]. - assert(NumStringToks && "expected at least one token"); + assert(!StringToks.empty() && "expected at least one token"); MaxTokenLength = StringToks[0].getLength(); assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); SizeBound = StringToks[0].getLength()-2; // -2 for "". @@ -1230,7 +1284,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ // Implement Translation Phase #6: concatenation of string literals /// (C99 5.1.1.2p1). The common case is only one string fragment. - for (unsigned i = 1; i != NumStringToks; ++i) { + for (unsigned i = 1; i != StringToks.size(); ++i) { if (StringToks[i].getLength() < 2) return DiagnoseLexingError(StringToks[i].getLocation()); @@ -1286,7 +1340,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ SourceLocation UDSuffixTokLoc; - for (unsigned i = 0, e = NumStringToks; i != e; ++i) { + for (unsigned i = 0, e = StringToks.size(); i != e; ++i) { const char *ThisTokBuf = &TokenBuf[0]; // Get the spelling of the token, which eliminates trigraphs, etc. We know // that ThisTokBuf points to a buffer that is big enough for the whole token @@ -1311,23 +1365,34 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd); if (UDSuffixBuf.empty()) { - UDSuffixBuf.assign(UDSuffix); + if (StringToks[i].hasUCN()) + expandUCNs(UDSuffixBuf, UDSuffix); + else + UDSuffixBuf.assign(UDSuffix); UDSuffixToken = i; UDSuffixOffset = ThisTokEnd - ThisTokBuf; UDSuffixTokLoc = StringToks[i].getLocation(); - } else if (!UDSuffixBuf.equals(UDSuffix)) { + } else { + SmallString<32> ExpandedUDSuffix; + if (StringToks[i].hasUCN()) { + expandUCNs(ExpandedUDSuffix, UDSuffix); + UDSuffix = ExpandedUDSuffix; + } + // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the // result of a concatenation involving at least one user-defined-string- // literal, all the participating user-defined-string-literals shall // have the same ud-suffix. - if (Diags) { - SourceLocation TokLoc = StringToks[i].getLocation(); - Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) - << UDSuffixBuf << UDSuffix - << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) - << SourceRange(TokLoc, TokLoc); + if (UDSuffixBuf != UDSuffix) { + if (Diags) { + SourceLocation TokLoc = StringToks[i].getLocation(); + Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) + << SourceRange(TokLoc, TokLoc); + } + hadError = true; } - hadError = true; } } @@ -1449,10 +1514,10 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ // Verify that pascal strings aren't too large. if (GetStringLength() > 256) { if (Diags) - Diags->Report(StringToks[0].getLocation(), + Diags->Report(StringToks.front().getLocation(), diag::err_pascal_string_too_long) - << SourceRange(StringToks[0].getLocation(), - StringToks[NumStringToks-1].getLocation()); + << SourceRange(StringToks.front().getLocation(), + StringToks.back().getLocation()); hadError = true; return; } @@ -1461,12 +1526,12 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509; if (GetNumStringChars() > MaxChars) - Diags->Report(StringToks[0].getLocation(), + Diags->Report(StringToks.front().getLocation(), diag::ext_string_too_long) << GetNumStringChars() << MaxChars << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0) - << SourceRange(StringToks[0].getLocation(), - StringToks[NumStringToks-1].getLocation()); + << SourceRange(StringToks.front().getLocation(), + StringToks.back().getLocation()); } } @@ -1516,8 +1581,7 @@ bool StringLiteralParser::CopyStringFragment(const Token &Tok, Dummy.reserve(Fragment.size() * CharByteWidth); char *Ptr = Dummy.data(); - while (!Builder.hasMaxRanges() && - !ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) { + while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) { const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp); NextStart = resyncUTF8(ErrorPtr, Fragment.end()); Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin, |