summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp')
-rw-r--r--contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp148
1 files changed, 106 insertions, 42 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
index 17c6bb3..6417d0f 100644
--- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
@@ -195,7 +195,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
<< std::string(1, ResultChar);
break;
default:
- if (Diags == 0)
+ if (!Diags)
break;
if (isPrintable(ResultChar))
@@ -212,6 +212,48 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
return ResultChar;
}
+static void appendCodePoint(unsigned Codepoint,
+ llvm::SmallVectorImpl<char> &Str) {
+ char ResultBuf[4];
+ char *ResultPtr = ResultBuf;
+ bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
+ (void)Res;
+ assert(Res && "Unexpected conversion failure");
+ Str.append(ResultBuf, ResultPtr);
+}
+
+void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
+ for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
+ if (*I != '\\') {
+ Buf.push_back(*I);
+ continue;
+ }
+
+ ++I;
+ assert(*I == 'u' || *I == 'U');
+
+ unsigned NumHexDigits;
+ if (*I == 'u')
+ NumHexDigits = 4;
+ else
+ NumHexDigits = 8;
+
+ assert(I + NumHexDigits <= E);
+
+ uint32_t CodePoint = 0;
+ for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+ unsigned Value = llvm::hexDigitValue(*I);
+ assert(Value != -1U);
+
+ CodePoint <<= 4;
+ CodePoint += Value;
+ }
+
+ appendCodePoint(CodePoint, Buf);
+ --I;
+ }
+}
+
/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
/// return the UTF32.
static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
@@ -298,7 +340,7 @@ static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
FullSourceLoc Loc;
if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
- UcnLen, Loc, 0, Features, true)) {
+ UcnLen, Loc, nullptr, Features, true)) {
HadError = true;
return 0;
}
@@ -480,7 +522,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
isLongLong = false;
isFloat = false;
isImaginary = false;
- isMicrosoftInteger = false;
+ MicrosoftInteger = 0;
hadError = false;
if (*s == '0') { // parse radix
@@ -529,7 +571,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
// Parse the suffix. At this point we can classify whether we have an FP or
// integer constant.
bool isFPConstant = isFloatingLiteral();
- const char *ImaginarySuffixLoc = 0;
+ const char *ImaginarySuffixLoc = nullptr;
// Loop over all of the characters of the suffix. If we see something bad,
// we break out of the loop.
@@ -564,49 +606,53 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
case 'i':
case 'I':
if (PP.getLangOpts().MicrosoftExt) {
- if (isFPConstant || isLong || isLongLong) break;
+ if (isLong || isLongLong || MicrosoftInteger)
+ break;
// Allow i8, i16, i32, i64, and i128.
if (s + 1 != ThisTokEnd) {
switch (s[1]) {
case '8':
+ if (isFPConstant) break;
s += 2; // i8 suffix
- isMicrosoftInteger = true;
+ MicrosoftInteger = 8;
break;
case '1':
+ if (isFPConstant) break;
if (s + 2 == ThisTokEnd) break;
if (s[2] == '6') {
s += 3; // i16 suffix
- isMicrosoftInteger = true;
+ MicrosoftInteger = 16;
}
else if (s[2] == '2') {
if (s + 3 == ThisTokEnd) break;
if (s[3] == '8') {
s += 4; // i128 suffix
- isMicrosoftInteger = true;
+ MicrosoftInteger = 128;
}
}
break;
case '3':
+ if (isFPConstant) break;
if (s + 2 == ThisTokEnd) break;
if (s[2] == '2') {
s += 3; // i32 suffix
- isLong = true;
- isMicrosoftInteger = true;
+ MicrosoftInteger = 32;
}
break;
case '6':
+ if (isFPConstant) break;
if (s + 2 == ThisTokEnd) break;
if (s[2] == '4') {
s += 3; // i64 suffix
- isLongLong = true;
- isMicrosoftInteger = true;
+ MicrosoftInteger = 64;
}
break;
default:
break;
}
- break;
+ if (MicrosoftInteger)
+ break;
}
}
// "i", "if", and "il" are user-defined suffixes in C++1y.
@@ -625,8 +671,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
}
if (s != ThisTokEnd) {
- if (isValidUDSuffix(PP.getLangOpts(),
- StringRef(SuffixBegin, ThisTokEnd - SuffixBegin))) {
+ // FIXME: Don't bother expanding UCNs if !tok.hasUCN().
+ expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
+ if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) {
// Any suffix pieces we might have parsed are actually part of the
// ud-suffix.
isLong = false;
@@ -634,7 +681,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
isLongLong = false;
isFloat = false;
isImaginary = false;
- isMicrosoftInteger = false;
+ MicrosoftInteger = 0;
saw_ud_suffix = true;
return;
@@ -722,6 +769,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
s++;
saw_period = true;
const char *floatDigitsBegin = s;
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
s = SkipHexDigits(s);
noSignificand &= (floatDigitsBegin == s);
}
@@ -736,6 +784,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
// A binary exponent can appear with or with a '.'. If dotted, the
// binary exponent is required.
if (*s == 'p' || *s == 'P') {
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
const char *Exponent = s;
s++;
saw_exponent = true;
@@ -747,6 +796,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
hadError = true;
return;
}
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
s = first_non_digit;
if (!PP.getLangOpts().HexFloats)
@@ -815,9 +865,11 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
s++;
radix = 10;
saw_period = true;
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
s = SkipDigits(s); // Skip suffix.
}
if (*s == 'e' || *s == 'E') { // exponent
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
const char *Exponent = s;
s++;
radix = 10;
@@ -825,6 +877,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
if (*s == '+' || *s == '-') s++; // sign
const char *first_non_digit = SkipDigits(s);
if (first_non_digit != s) {
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
s = first_non_digit;
} else {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
@@ -992,7 +1045,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
do {
--end;
} while (end[-1] != '\'');
- UDSuffixBuf.assign(end, UDSuffixEnd);
+ // FIXME: Don't bother with this if !tok.hasUCN().
+ expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
UDSuffixOffset = end - TokBegin;
}
@@ -1201,26 +1255,26 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
/// \endverbatim
///
StringLiteralParser::
-StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
+StringLiteralParser(ArrayRef<Token> StringToks,
Preprocessor &PP, bool Complain)
: SM(PP.getSourceManager()), Features(PP.getLangOpts()),
- Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
+ Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr),
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
- init(StringToks, NumStringToks);
+ init(StringToks);
}
-void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
+void StringLiteralParser::init(ArrayRef<Token> StringToks){
// The literal token may have come from an invalid source location (e.g. due
// to a PCH error), in which case the token length will be 0.
- if (NumStringToks == 0 || StringToks[0].getLength() < 2)
+ if (StringToks.empty() || StringToks[0].getLength() < 2)
return DiagnoseLexingError(SourceLocation());
// Scan all of the string portions, remember the max individual token length,
// computing a bound on the concatenated string length, and see whether any
// piece is a wide-string. If any of the string portions is a wide-string
// literal, the result is a wide-string literal [C99 6.4.5p4].
- assert(NumStringToks && "expected at least one token");
+ assert(!StringToks.empty() && "expected at least one token");
MaxTokenLength = StringToks[0].getLength();
assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
SizeBound = StringToks[0].getLength()-2; // -2 for "".
@@ -1230,7 +1284,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
// Implement Translation Phase #6: concatenation of string literals
/// (C99 5.1.1.2p1). The common case is only one string fragment.
- for (unsigned i = 1; i != NumStringToks; ++i) {
+ for (unsigned i = 1; i != StringToks.size(); ++i) {
if (StringToks[i].getLength() < 2)
return DiagnoseLexingError(StringToks[i].getLocation());
@@ -1286,7 +1340,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
SourceLocation UDSuffixTokLoc;
- for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+ for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
const char *ThisTokBuf = &TokenBuf[0];
// Get the spelling of the token, which eliminates trigraphs, etc. We know
// that ThisTokBuf points to a buffer that is big enough for the whole token
@@ -1311,23 +1365,34 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
if (UDSuffixBuf.empty()) {
- UDSuffixBuf.assign(UDSuffix);
+ if (StringToks[i].hasUCN())
+ expandUCNs(UDSuffixBuf, UDSuffix);
+ else
+ UDSuffixBuf.assign(UDSuffix);
UDSuffixToken = i;
UDSuffixOffset = ThisTokEnd - ThisTokBuf;
UDSuffixTokLoc = StringToks[i].getLocation();
- } else if (!UDSuffixBuf.equals(UDSuffix)) {
+ } else {
+ SmallString<32> ExpandedUDSuffix;
+ if (StringToks[i].hasUCN()) {
+ expandUCNs(ExpandedUDSuffix, UDSuffix);
+ UDSuffix = ExpandedUDSuffix;
+ }
+
// C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
// result of a concatenation involving at least one user-defined-string-
// literal, all the participating user-defined-string-literals shall
// have the same ud-suffix.
- if (Diags) {
- SourceLocation TokLoc = StringToks[i].getLocation();
- Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
- << UDSuffixBuf << UDSuffix
- << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
- << SourceRange(TokLoc, TokLoc);
+ if (UDSuffixBuf != UDSuffix) {
+ if (Diags) {
+ SourceLocation TokLoc = StringToks[i].getLocation();
+ Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+ << UDSuffixBuf << UDSuffix
+ << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+ << SourceRange(TokLoc, TokLoc);
+ }
+ hadError = true;
}
- hadError = true;
}
}
@@ -1449,10 +1514,10 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
// Verify that pascal strings aren't too large.
if (GetStringLength() > 256) {
if (Diags)
- Diags->Report(StringToks[0].getLocation(),
+ Diags->Report(StringToks.front().getLocation(),
diag::err_pascal_string_too_long)
- << SourceRange(StringToks[0].getLocation(),
- StringToks[NumStringToks-1].getLocation());
+ << SourceRange(StringToks.front().getLocation(),
+ StringToks.back().getLocation());
hadError = true;
return;
}
@@ -1461,12 +1526,12 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
if (GetNumStringChars() > MaxChars)
- Diags->Report(StringToks[0].getLocation(),
+ Diags->Report(StringToks.front().getLocation(),
diag::ext_string_too_long)
<< GetNumStringChars() << MaxChars
<< (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
- << SourceRange(StringToks[0].getLocation(),
- StringToks[NumStringToks-1].getLocation());
+ << SourceRange(StringToks.front().getLocation(),
+ StringToks.back().getLocation());
}
}
@@ -1516,8 +1581,7 @@ bool StringLiteralParser::CopyStringFragment(const Token &Tok,
Dummy.reserve(Fragment.size() * CharByteWidth);
char *Ptr = Dummy.data();
- while (!Builder.hasMaxRanges() &&
- !ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
+ while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
NextStart = resyncUTF8(ErrorPtr, Fragment.end());
Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
OpenPOWER on IntegriCloud