1 files changed, 106 insertions, 42 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
index 17c6bb3..6417d0f 100644
--- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
@@ -195,7 +195,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
         << std::string(1, ResultChar);
     break;
   default:
-    if (Diags == 0)
+    if (!Diags)
       break;
 
     if (isPrintable(ResultChar))
@@ -212,6 +212,48 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
   return ResultChar;
 }
 
+static void appendCodePoint(unsigned Codepoint,
+                            llvm::SmallVectorImpl<char> &Str) {
+  char ResultBuf[4];
+  char *ResultPtr = ResultBuf;
+  bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
+  (void)Res;
+  assert(Res && "Unexpected conversion failure");
+  Str.append(ResultBuf, ResultPtr);
+}
+
+void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
+  for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
+    if (*I != '\\') {
+      Buf.push_back(*I);
+      continue;
+    }
+
+    ++I;
+    assert(*I == 'u' || *I == 'U');
+
+    unsigned NumHexDigits;
+    if (*I == 'u')
+      NumHexDigits = 4;
+    else
+      NumHexDigits = 8;
+
+    assert(I + NumHexDigits <= E);
+
+    uint32_t CodePoint = 0;
+    for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+      unsigned Value = llvm::hexDigitValue(*I);
+      assert(Value != -1U);
+
+      CodePoint <<= 4;
+      CodePoint += Value;
+    }
+
+    appendCodePoint(CodePoint, Buf);
+    --I;
+  }
+}
+
 /// ProcessUCNEscape - Read the Universal Character Name, check constraints and
 /// return the UTF32.
 static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
@@ -298,7 +340,7 @@ static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
   FullSourceLoc Loc;
 
   if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
-                        UcnLen, Loc, 0, Features, true)) {
+                        UcnLen, Loc, nullptr, Features, true)) {
     HadError = true;
     return 0;
   }
@@ -480,7 +522,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   isLongLong = false;
   isFloat = false;
   isImaginary = false;
-  isMicrosoftInteger = false;
+  MicrosoftInteger = 0;
   hadError = false;
 
   if (*s == '0') { // parse radix
@@ -529,7 +571,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   // Parse the suffix.  At this point we can classify whether we have an FP or
   // integer constant.
   bool isFPConstant = isFloatingLiteral();
-  const char *ImaginarySuffixLoc = 0;
+  const char *ImaginarySuffixLoc = nullptr;
 
   // Loop over all of the characters of the suffix.  If we see something bad,
   // we break out of the loop.
@@ -564,49 +606,53 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
     case 'i':
     case 'I':
       if (PP.getLangOpts().MicrosoftExt) {
-        if (isFPConstant || isLong || isLongLong) break;
+        if (isLong || isLongLong || MicrosoftInteger)
+          break;
 
         // Allow i8, i16, i32, i64, and i128.
         if (s + 1 != ThisTokEnd) {
           switch (s[1]) {
             case '8':
+              if (isFPConstant) break;
               s += 2; // i8 suffix
-              isMicrosoftInteger = true;
+              MicrosoftInteger = 8;
               break;
             case '1':
+              if (isFPConstant) break;
               if (s + 2 == ThisTokEnd) break;
               if (s[2] == '6') {
                 s += 3; // i16 suffix
-                isMicrosoftInteger = true;
+                MicrosoftInteger = 16;
               }
               else if (s[2] == '2') {
                 if (s + 3 == ThisTokEnd) break;
                 if (s[3] == '8') {
                   s += 4; // i128 suffix
-                  isMicrosoftInteger = true;
+                  MicrosoftInteger = 128;
                 }
               }
               break;
             case '3':
+              if (isFPConstant) break;
               if (s + 2 == ThisTokEnd) break;
               if (s[2] == '2') {
                 s += 3; // i32 suffix
-                isLong = true;
-                isMicrosoftInteger = true;
+                MicrosoftInteger = 32;
               }
               break;
             case '6':
+              if (isFPConstant) break;
               if (s + 2 == ThisTokEnd) break;
               if (s[2] == '4') {
                 s += 3; // i64 suffix
-                isLongLong = true;
-                isMicrosoftInteger = true;
+                MicrosoftInteger = 64;
               }
               break;
             default:
               break;
           }
-          break;
+          if (MicrosoftInteger)
+            break;
         }
       }
       // "i", "if", and "il" are user-defined suffixes in C++1y.
@@ -625,8 +671,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   }
 
   if (s != ThisTokEnd) {
-    if (isValidUDSuffix(PP.getLangOpts(),
-                        StringRef(SuffixBegin, ThisTokEnd - SuffixBegin))) {
+    // FIXME: Don't bother expanding UCNs if !tok.hasUCN().
+    expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
+    if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) {
       // Any suffix pieces we might have parsed are actually part of the
       // ud-suffix.
       isLong = false;
@@ -634,7 +681,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       isLongLong = false;
       isFloat = false;
       isImaginary = false;
-      isMicrosoftInteger = false;
+      MicrosoftInteger = 0;
 
       saw_ud_suffix = true;
       return;
@@ -722,6 +769,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
       s++;
       saw_period = true;
       const char *floatDigitsBegin = s;
+      checkSeparator(TokLoc, s, CSK_BeforeDigits);
       s = SkipHexDigits(s);
       noSignificand &= (floatDigitsBegin == s);
     }
@@ -736,6 +784,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     // A binary exponent can appear with or with a '.'. If dotted, the
     // binary exponent is required.
     if (*s == 'p' || *s == 'P') {
+      checkSeparator(TokLoc, s, CSK_AfterDigits);
       const char *Exponent = s;
       s++;
       saw_exponent = true;
@@ -747,6 +796,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
         hadError = true;
         return;
       }
+      checkSeparator(TokLoc, s, CSK_BeforeDigits);
       s = first_non_digit;
 
       if (!PP.getLangOpts().HexFloats)
@@ -815,9 +865,11 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     s++;
     radix = 10;
     saw_period = true;
+    checkSeparator(TokLoc, s, CSK_BeforeDigits);
     s = SkipDigits(s); // Skip suffix.
   }
   if (*s == 'e' || *s == 'E') { // exponent
+    checkSeparator(TokLoc, s, CSK_AfterDigits);
     const char *Exponent = s;
     s++;
     radix = 10;
@@ -825,6 +877,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     if (*s == '+' || *s == '-')  s++; // sign
     const char *first_non_digit = SkipDigits(s);
     if (first_non_digit != s) {
+      checkSeparator(TokLoc, s, CSK_BeforeDigits);
       s = first_non_digit;
     } else {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
@@ -992,7 +1045,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
     do {
       --end;
     } while (end[-1] != '\'');
-    UDSuffixBuf.assign(end, UDSuffixEnd);
+    // FIXME: Don't bother with this if !tok.hasUCN().
+    expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
     UDSuffixOffset = end - TokBegin;
   }
 
@@ -1201,26 +1255,26 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
 /// \endverbatim
 ///
 StringLiteralParser::
-StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
+StringLiteralParser(ArrayRef<Token> StringToks,
                     Preprocessor &PP, bool Complain)
   : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
-    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
+    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr),
     MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
     ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
-  init(StringToks, NumStringToks);
+  init(StringToks);
 }
 
-void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
+void StringLiteralParser::init(ArrayRef<Token> StringToks){
   // The literal token may have come from an invalid source location (e.g. due
   // to a PCH error), in which case the token length will be 0.
-  if (NumStringToks == 0 || StringToks[0].getLength() < 2)
+  if (StringToks.empty() || StringToks[0].getLength() < 2)
     return DiagnoseLexingError(SourceLocation());
 
   // Scan all of the string portions, remember the max individual token length,
   // computing a bound on the concatenated string length, and see whether any
   // piece is a wide-string.  If any of the string portions is a wide-string
   // literal, the result is a wide-string literal [C99 6.4.5p4].
-  assert(NumStringToks && "expected at least one token");
+  assert(!StringToks.empty() && "expected at least one token");
   MaxTokenLength = StringToks[0].getLength();
   assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
   SizeBound = StringToks[0].getLength()-2;  // -2 for "".
@@ -1230,7 +1284,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
 
   // Implement Translation Phase #6: concatenation of string literals
   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
-  for (unsigned i = 1; i != NumStringToks; ++i) {
+  for (unsigned i = 1; i != StringToks.size(); ++i) {
     if (StringToks[i].getLength() < 2)
       return DiagnoseLexingError(StringToks[i].getLocation());
 
@@ -1286,7 +1340,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
 
   SourceLocation UDSuffixTokLoc;
 
-  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
     const char *ThisTokBuf = &TokenBuf[0];
     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     // that ThisTokBuf points to a buffer that is big enough for the whole token
@@ -1311,23 +1365,34 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
       StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
 
       if (UDSuffixBuf.empty()) {
-        UDSuffixBuf.assign(UDSuffix);
+        if (StringToks[i].hasUCN())
+          expandUCNs(UDSuffixBuf, UDSuffix);
+        else
+          UDSuffixBuf.assign(UDSuffix);
         UDSuffixToken = i;
         UDSuffixOffset = ThisTokEnd - ThisTokBuf;
         UDSuffixTokLoc = StringToks[i].getLocation();
-      } else if (!UDSuffixBuf.equals(UDSuffix)) {
+      } else {
+        SmallString<32> ExpandedUDSuffix;
+        if (StringToks[i].hasUCN()) {
+          expandUCNs(ExpandedUDSuffix, UDSuffix);
+          UDSuffix = ExpandedUDSuffix;
+        }
+
         // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
         // result of a concatenation involving at least one user-defined-string-
         // literal, all the participating user-defined-string-literals shall
         // have the same ud-suffix.
-        if (Diags) {
-          SourceLocation TokLoc = StringToks[i].getLocation();
-          Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
-            << UDSuffixBuf << UDSuffix
-            << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
-            << SourceRange(TokLoc, TokLoc);
+        if (UDSuffixBuf != UDSuffix) {
+          if (Diags) {
+            SourceLocation TokLoc = StringToks[i].getLocation();
+            Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+              << UDSuffixBuf << UDSuffix
+              << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+              << SourceRange(TokLoc, TokLoc);
+          }
+          hadError = true;
         }
-        hadError = true;
       }
     }
 
@@ -1449,10 +1514,10 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
     // Verify that pascal strings aren't too large.
     if (GetStringLength() > 256) {
       if (Diags)
-        Diags->Report(StringToks[0].getLocation(),
+        Diags->Report(StringToks.front().getLocation(),
                       diag::err_pascal_string_too_long)
-          << SourceRange(StringToks[0].getLocation(),
-                         StringToks[NumStringToks-1].getLocation());
+          << SourceRange(StringToks.front().getLocation(),
+                         StringToks.back().getLocation());
       hadError = true;
       return;
     }
@@ -1461,12 +1526,12 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
     unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
 
     if (GetNumStringChars() > MaxChars)
-      Diags->Report(StringToks[0].getLocation(),
+      Diags->Report(StringToks.front().getLocation(),
                     diag::ext_string_too_long)
         << GetNumStringChars() << MaxChars
         << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
-        << SourceRange(StringToks[0].getLocation(),
-                       StringToks[NumStringToks-1].getLocation());
+        << SourceRange(StringToks.front().getLocation(),
+                       StringToks.back().getLocation());
   }
 }
 
@@ -1516,8 +1581,7 @@ bool StringLiteralParser::CopyStringFragment(const Token &Tok,
     Dummy.reserve(Fragment.size() * CharByteWidth);
     char *Ptr = Dummy.data();
 
-    while (!Builder.hasMaxRanges() &&
-           !ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
+    while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
       const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
       NextStart = resyncUTF8(ErrorPtr, Fragment.end());
       Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,