1 files changed, 210 insertions, 20 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
index 917829b..b17198b 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
@@ -212,6 +212,109 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
   }
 }
 
+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
+                               const LangOptions &Features, bool *Invalid) {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  
+  // If this token contains nothing interesting, return it directly.
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+  
+  if (!Tok.needsCleaning())
+    return std::string(TokStart, TokStart+Tok.getLength());
+  
+  std::string Result;
+  Result.reserve(Tok.getLength());
+  
+  // Otherwise, hard case, relex the characters into the string.
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+    Ptr += CharSize;
+  }
+  assert(Result.size() != unsigned(Tok.getLength()) &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string.  The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long.  The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy).  The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, 
+                            const SourceManager &SourceMgr,
+                            const LangOptions &Features, bool *Invalid) {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+  const char *TokStart = 0;
+  // NOTE: this has to be checked *before* testing for an IdentifierInfo.
+  if (Tok.is(tok::raw_identifier))
+    TokStart = Tok.getRawIdentifierData();
+  else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    // Just return the string from the identifier table, which is very quick.
+    Buffer = II->getNameStart();
+    return II->getLength();
+  }
+
+  // NOTE: this can be checked even after testing for an IdentifierInfo.
+  if (Tok.isLiteral())
+    TokStart = Tok.getLiteralData();
+
+  if (TokStart == 0) {
+    // Compute the start of the token in the input lexer buffer.
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
+
+  // If this token contains nothing interesting, return it directly.
+  if (!Tok.needsCleaning()) {
+    Buffer = TokStart;
+    return Tok.getLength();
+  }
+
+  // Otherwise, hard case, relex the characters into the string.
+  char *OutBuf = const_cast<char*>(Buffer);
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+    Ptr += CharSize;
+  }
+  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+
+  return OutBuf-Buffer;
+}
+
+
+
 static bool isWhitespace(unsigned char c);
 
 /// MeasureTokenLength - Relex the token at the specified location and return
@@ -242,7 +345,8 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
     return 0;
 
   // Create a lexer starting at the beginning of this token.
-  Lexer TheLexer(Loc, LangOpts, Buffer.begin(), StrData, Buffer.end());
+  Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts,
+                 Buffer.begin(), StrData, Buffer.end());
   TheLexer.SetCommentRetentionState(true);
   Token TheTok;
   TheLexer.LexFromRawLexer(TheTok);
@@ -253,6 +357,9 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
                                           const SourceManager &SM,
                                           const LangOptions &LangOpts) {
   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+  if (LocInfo.first.isInvalid())
+    return Loc;
+  
   bool Invalid = false;
   llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
   if (Invalid)
@@ -261,6 +368,9 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
   // Back up from the current location until we hit the beginning of a line
   // (or the buffer). We'll relex from that point.
   const char *BufStart = Buffer.data();
+  if (LocInfo.second >= Buffer.size())
+    return Loc;
+  
   const char *StrData = BufStart+LocInfo.second;
   if (StrData[0] == '\n' || StrData[0] == '\r')
     return Loc;
@@ -371,10 +481,9 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
       // we don't have an identifier table available. Instead, just look at
       // the raw identifier to recognize and categorize preprocessor directives.
       TheLexer.LexFromRawLexer(TheTok);
-      if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) {
-        const char *IdStart = Buffer->getBufferStart() 
-                            + TheTok.getLocation().getRawEncoding() - 1;
-        llvm::StringRef Keyword(IdStart, TheTok.getLength());
+      if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
+        llvm::StringRef Keyword(TheTok.getRawIdentifierData(),
+                                TheTok.getLength());
         PreambleDirectiveKind PDK
           = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
               .Case("include", PDK_Skipped)
@@ -443,6 +552,83 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
                                : TheTok.isAtStartOfLine());
 }
 
+
+/// AdvanceToTokenCharacter - Given a location that specifies the start of a
+/// token, return a new location that specifies a character within the token.
+SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
+                                              unsigned CharNo,
+                                              const SourceManager &SM,
+                                              const LangOptions &Features) {
+  // Figure out how many physical characters away the specified instantiation
+  // character is.  This needs to take into consideration newlines and
+  // trigraphs.
+  bool Invalid = false;
+  const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);
+  
+  // If they request the first char of the token, we're trivially done.
+  if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
+    return TokStart;
+  
+  unsigned PhysOffset = 0;
+  
+  // The usual case is that tokens don't contain anything interesting.  Skip
+  // over the uninteresting characters.  If a token only consists of simple
+  // chars, this method is extremely fast.
+  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
+    if (CharNo == 0)
+      return TokStart.getFileLocWithOffset(PhysOffset);
+    ++TokPtr, --CharNo, ++PhysOffset;
+  }
+  
+  // If we have a character that may be a trigraph or escaped newline, use a
+  // lexer to parse it correctly.
+  for (; CharNo; --CharNo) {
+    unsigned Size;
+    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
+    TokPtr += Size;
+    PhysOffset += Size;
+  }
+  
+  // Final detail: if we end up on an escaped newline, we want to return the
+  // location of the actual byte of the token.  For example foo\<newline>bar
+  // advanced by 3 should return the location of b, not of \\.  One compounding
+  // detail of this is that the escape may be made by a trigraph.
+  if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
+    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+  
+  return TokStart.getFileLocWithOffset(PhysOffset);
+}
+
+/// \brief Computes the source location just past the end of the
+/// token at this source location.
+///
+/// This routine can be used to produce a source location that
+/// points just past the end of the token referenced by \p Loc, and
+/// is generally used when a diagnostic needs to point just after a
+/// token where it expected something different that it received. If
+/// the returned source location would not be meaningful (e.g., if
+/// it points into a macro), this routine returns an invalid
+/// source location.
+///
+/// \param Offset an offset from the end of the token, where the source
+/// location should refer to. The default offset (0) produces a source
+/// location pointing just past the end of the token; an offset of 1 produces
+/// a source location pointing to the last character in the token, etc.
+SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
+                                          const SourceManager &SM,
+                                          const LangOptions &Features) {
+  if (Loc.isInvalid() || !Loc.isFileID())
+    return SourceLocation();
+  
+  unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features);
+  if (Len > Offset)
+    Len = Len - Offset;
+  else
+    return Loc;
+  
+  return AdvanceToTokenCharacter(Loc, Len, SM, Features);
+}
+
 //===----------------------------------------------------------------------===//
 // Character information.
 //===----------------------------------------------------------------------===//
@@ -584,10 +770,8 @@ static inline bool isNumberBody(unsigned char c) {
 /// lexer buffer was all instantiated at a single point, perform the mapping.
 /// This is currently only used for _Pragma implementation, so it is the slow
 /// path of the hot getSourceLocation method.  Do not allow it to be inlined.
-static DISABLE_INLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP,
-                                                       SourceLocation FileLoc,
-                                                       unsigned CharNo,
-                                                       unsigned TokLen);
+static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(
+    Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen);
 static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
                                         SourceLocation FileLoc,
                                         unsigned CharNo, unsigned TokLen) {
@@ -869,19 +1053,17 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
   if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
 FinishIdentifier:
     const char *IdStart = BufferPtr;
-    FormTokenWithChars(Result, CurPtr, tok::identifier);
+    FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
+    Result.setRawIdentifierData(IdStart);
 
     // If we are in raw mode, return this identifier raw.  There is no need to
     // look up identifier information or attempt to macro expand it.
-    if (LexingRawMode) return;
-
-    // Fill in Result.IdentifierInfo, looking up the identifier in the
-    // identifier table.
-    IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart);
+    if (LexingRawMode)
+      return;
 
-    // Change the kind of this identifier to the appropriate token kind, e.g.
-    // turning "for" into a keyword.
-    Result.setKind(II->getTokenID());
+    // Fill in Result.IdentifierInfo and update the token kind,
+    // looking up the identifier in the identifier table.
+    IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
 
     // Finally, now that we know we have an identifier, pass this off to the
     // preprocessor, which may macro expand it or something.
@@ -980,7 +1162,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
       if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc))
         PP->CodeCompleteNaturalLanguage();
       else if (!isLexingRawMode() && !Features.AsmPreprocessor)
-        Diag(BufferPtr, diag::err_unterminated_string);
+        Diag(BufferPtr, diag::warn_unterminated_string);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return;
     }
@@ -1059,7 +1241,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
       if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc))
         PP->CodeCompleteNaturalLanguage();
       else if (!isLexingRawMode() && !Features.AsmPreprocessor)
-        Diag(BufferPtr, diag::err_unterminated_char);
+        Diag(BufferPtr, diag::warn_unterminated_char);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return;
     } else if (C == 0) {
@@ -2141,6 +2323,10 @@ LexNextToken:
         // If this is actually a '<<<<<<<' version control conflict marker,
         // recognize it as such and recover nicely.
         goto LexNextToken;
+      } else if (Features.CUDA && After == '<') {
+        Kind = tok::lesslessless;
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
       } else {
         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
         Kind = tok::lessless;
@@ -2172,6 +2358,10 @@ LexNextToken:
       } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
         // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
         goto LexNextToken;
+      } else if (Features.CUDA && After == '>') {
+        Kind = tok::greatergreatergreater;
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
       } else {
         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
         Kind = tok::greatergreater;