summaryrefslogtreecommitdiffstats
path: root/lib/Lex/LiteralSupport.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/LiteralSupport.cpp')
-rw-r--r--lib/Lex/LiteralSupport.cpp156
1 files changed, 123 insertions, 33 deletions
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 09f4a68..17c6bb3 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -157,7 +157,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
// Check for overflow.
if (Overflow && Diags) // Too many digits to fit in
Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
- diag::warn_hex_escape_too_large);
+ diag::err_hex_escape_too_large);
break;
}
case '0': case '1': case '2': case '3':
@@ -180,7 +180,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
- diag::warn_octal_escape_too_large);
+ diag::err_octal_escape_too_large);
ResultChar &= ~0U >> (32-CharWidth);
}
break;
@@ -336,7 +336,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
return;
}
- assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
+ assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) &&
"only character widths of 1, 2, or 4 bytes supported");
(void)UcnLen;
@@ -413,10 +413,12 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
/// decimal-constant integer-suffix
/// octal-constant integer-suffix
/// hexadecimal-constant integer-suffix
+/// binary-literal integer-suffix [GNU, C++1y]
/// user-defined-integer-literal: [C++11 lex.ext]
/// decimal-literal ud-suffix
/// octal-literal ud-suffix
/// hexadecimal-literal ud-suffix
+/// binary-literal ud-suffix [GNU, C++1y]
/// decimal-constant:
/// nonzero-digit
/// decimal-constant digit
@@ -428,6 +430,10 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
/// hexadecimal-constant hexadecimal-digit
/// hexadecimal-prefix: one of
/// 0x 0X
+/// binary-literal:
+/// 0b binary-digit
+/// 0B binary-digit
+/// binary-literal binary-digit
/// integer-suffix:
/// unsigned-suffix [long-suffix]
/// unsigned-suffix [long-long-suffix]
@@ -441,6 +447,9 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
/// 0 1 2 3 4 5 6 7 8 9
/// a b c d e f
/// A B C D E F
+/// binary-digit:
+/// 0
+/// 1
/// unsigned-suffix: one of
/// u U
/// long-suffix: one of
@@ -489,15 +498,19 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
hadError = true;
return;
} else if (*s == '.') {
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
s++;
saw_period = true;
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
s = SkipDigits(s);
}
if ((*s == 'e' || *s == 'E')) { // exponent
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
const char *Exponent = s;
s++;
saw_exponent = true;
if (*s == '+' || *s == '-') s++; // sign
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
const char *first_non_digit = SkipDigits(s);
if (first_non_digit != s) {
s = first_non_digit;
@@ -511,10 +524,12 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
}
SuffixBegin = s;
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
// Parse the suffix. At this point we can classify whether we have an FP or
// integer constant.
bool isFPConstant = isFloatingLiteral();
+ const char *ImaginarySuffixLoc = 0;
// Loop over all of the characters of the suffix. If we see something bad,
// we break out of the loop.
@@ -594,13 +609,15 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
break;
}
}
+ // "i", "if", and "il" are user-defined suffixes in C++1y.
+ if (PP.getLangOpts().CPlusPlus1y && *s == 'i')
+ break;
// fall through.
case 'j':
case 'J':
if (isImaginary) break; // Cannot be repeated.
- PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
- diag::ext_imaginary_constant);
isImaginary = true;
+ ImaginarySuffixLoc = s;
continue; // Success.
}
// If we reached here, there was an error or a ud-suffix.
@@ -608,9 +625,17 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
}
if (s != ThisTokEnd) {
- if (PP.getLangOpts().CPlusPlus11 && s == SuffixBegin && *s == '_') {
- // We have a ud-suffix! By C++11 [lex.ext]p10, ud-suffixes not starting
- // with an '_' are ill-formed.
+ if (isValidUDSuffix(PP.getLangOpts(),
+ StringRef(SuffixBegin, ThisTokEnd - SuffixBegin))) {
+ // Any suffix pieces we might have parsed are actually part of the
+ // ud-suffix.
+ isLong = false;
+ isUnsigned = false;
+ isLongLong = false;
+ isFloat = false;
+ isImaginary = false;
+ isMicrosoftInteger = false;
+
saw_ud_suffix = true;
return;
}
@@ -623,6 +648,53 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
hadError = true;
return;
}
+
+ if (isImaginary) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc,
+ ImaginarySuffixLoc - ThisTokBegin),
+ diag::ext_imaginary_constant);
+ }
+}
+
+/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
+/// suffixes as ud-suffixes, because the diagnostic experience is better if we
+/// treat it as an invalid suffix.
+bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
+ StringRef Suffix) {
+ if (!LangOpts.CPlusPlus11 || Suffix.empty())
+ return false;
+
+ // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
+ if (Suffix[0] == '_')
+ return true;
+
+ // In C++11, there are no library suffixes.
+ if (!LangOpts.CPlusPlus1y)
+ return false;
+
+ // In C++1y, "s", "h", "min", "ms", "us", and "ns" are used in the library.
+ // Per tweaked N3660, "il", "i", and "if" are also used in the library.
+ return llvm::StringSwitch<bool>(Suffix)
+ .Cases("h", "min", "s", true)
+ .Cases("ms", "us", "ns", true)
+ .Cases("il", "i", "if", true)
+ .Default(false);
+}
+
+void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
+ const char *Pos,
+ CheckSeparatorKind IsAfterDigits) {
+ if (IsAfterDigits == CSK_AfterDigits) {
+ if (Pos == ThisTokBegin)
+ return;
+ --Pos;
+ } else if (Pos == ThisTokEnd)
+ return;
+
+ if (isDigitSeparator(*Pos))
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin),
+ diag::err_digit_separator_not_between_digits)
+ << IsAfterDigits;
}
/// ParseNumberStartingWithZero - This method is called when the first character
@@ -634,8 +706,11 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
assert(s[0] == '0' && "Invalid method call");
s++;
+ int c1 = s[0];
+ int c2 = s[1];
+
// Handle a hex number like 0x1234.
- if ((*s == 'x' || *s == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
+ if ((c1 == 'x' || c1 == 'X') && (isHexDigit(c2) || c2 == '.')) {
s++;
radix = 16;
DigitsBegin = s;
@@ -685,7 +760,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
}
// Handle simple binary numbers 0b01010
- if (*s == 'b' || *s == 'B') {
+ if ((c1 == 'b' || c1 == 'B') && (c2 == '0' || c2 == '1')) {
// 0b101010 is a C++1y / GCC extension.
PP.Diag(TokLoc,
PP.getLangOpts().CPlusPlus1y
@@ -789,7 +864,8 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
if (alwaysFitsInto64Bits(radix, NumDigits)) {
uint64_t N = 0;
for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
- N = N * radix + llvm::hexDigitValue(*Ptr);
+ if (!isDigitSeparator(*Ptr))
+ N = N * radix + llvm::hexDigitValue(*Ptr);
// This will truncate the value to Val's input width. Simply check
// for overflow by comparing.
@@ -806,6 +882,11 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
bool OverflowOccurred = false;
while (Ptr < SuffixBegin) {
+ if (isDigitSeparator(*Ptr)) {
+ ++Ptr;
+ continue;
+ }
+
unsigned C = llvm::hexDigitValue(*Ptr++);
// If this letter is out of bound for this radix, reject it.
@@ -834,8 +915,17 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
using llvm::APFloat;
unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
- return Result.convertFromString(StringRef(ThisTokBegin, n),
- APFloat::rmNearestTiesToEven);
+
+ llvm::SmallString<16> Buffer;
+ StringRef Str(ThisTokBegin, n);
+ if (Str.find('\'') != StringRef::npos) {
+ Buffer.reserve(n);
+ std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
+ &isDigitSeparator);
+ Str = Buffer;
+ }
+
+ return Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
}
@@ -921,8 +1011,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
"Assumes sizeof(wchar) on target is <= 64");
- SmallVector<uint32_t,4> codepoint_buffer;
- codepoint_buffer.resize(end-begin);
+ SmallVector<uint32_t, 4> codepoint_buffer;
+ codepoint_buffer.resize(end - begin);
uint32_t *buffer_begin = &codepoint_buffer.front();
uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
@@ -931,7 +1021,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
// by this implementation.
uint32_t largest_character_for_kind;
if (tok::wide_char_constant == Kind) {
- largest_character_for_kind = 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
+ largest_character_for_kind =
+ 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
} else if (tok::utf16_char_constant == Kind) {
largest_character_for_kind = 0xFFFF;
} else if (tok::utf32_char_constant == Kind) {
@@ -940,7 +1031,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
largest_character_for_kind = 0x7Fu;
}
- while (begin!=end) {
+ while (begin != end) {
// Is this a span of non-escape characters?
if (begin[0] != '\\') {
char const *start = begin;
@@ -951,12 +1042,12 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
char const *tmp_in_start = start;
uint32_t *tmp_out_start = buffer_begin;
ConversionResult res =
- ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
- reinterpret_cast<UTF8 const *>(begin),
- &buffer_begin,buffer_end,strictConversion);
- if (res!=conversionOK) {
- // If we see bad encoding for unprefixed character literals, warn and
- // simply copy the byte values, for compatibility with gcc and
+ ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
+ reinterpret_cast<UTF8 const *>(begin),
+ &buffer_begin, buffer_end, strictConversion);
+ if (res != conversionOK) {
+ // If we see bad encoding for unprefixed character literals, warn and
+ // simply copy the byte values, for compatibility with gcc and
// older versions of clang.
bool NoErrorOnBadEncoding = isAscii();
unsigned Msg = diag::err_bad_character_encoding;
@@ -966,13 +1057,13 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
if (NoErrorOnBadEncoding) {
start = tmp_in_start;
buffer_begin = tmp_out_start;
- for ( ; start != begin; ++start, ++buffer_begin)
+ for (; start != begin; ++start, ++buffer_begin)
*buffer_begin = static_cast<uint8_t>(*start);
} else {
HadError = true;
}
} else {
- for (; tmp_out_start <buffer_begin; ++tmp_out_start) {
+ for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
if (*tmp_out_start > largest_character_for_kind) {
HadError = true;
PP.Diag(Loc, diag::err_character_too_large);
@@ -982,14 +1073,12 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
continue;
}
- // Is this a Universal Character Name excape?
+ // Is this a Universal Character Name escape?
if (begin[1] == 'u' || begin[1] == 'U') {
unsigned short UcnLen = 0;
if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
FullSourceLoc(Loc, PP.getSourceManager()),
- &PP.getDiagnostics(), PP.getLangOpts(),
- true))
- {
+ &PP.getDiagnostics(), PP.getLangOpts(), true)) {
HadError = true;
} else if (*buffer_begin > largest_character_for_kind) {
HadError = true;
@@ -1007,7 +1096,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
*buffer_begin++ = result;
}
- unsigned NumCharsSoFar = buffer_begin-&codepoint_buffer.front();
+ unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
if (NumCharsSoFar > 1) {
if (isWide())
@@ -1019,8 +1108,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
else
PP.Diag(Loc, diag::err_multichar_utf_character_literal);
IsMultiChar = true;
- } else
+ } else {
IsMultiChar = false;
+ }
llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
@@ -1029,7 +1119,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
bool multi_char_too_long = false;
if (isAscii() && isMultiChar()) {
LitVal = 0;
- for (size_t i=0;i<NumCharsSoFar;++i) {
+ for (size_t i = 0; i < NumCharsSoFar; ++i) {
// check for enough leading zeros to shift into
multi_char_too_long |= (LitVal.countLeadingZeros() < 8);
LitVal <<= 8;
@@ -1041,7 +1131,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
}
if (!HadError && multi_char_too_long) {
- PP.Diag(Loc,diag::warn_char_constant_too_large);
+ PP.Diag(Loc, diag::warn_char_constant_too_large);
}
// Transfer the value from APInt to uint64_t
OpenPOWER on IntegriCloud