9 files changed, 101 insertions, 59 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 3207062..6cdb96f 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -229,14 +229,18 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
   // the token this macro expanded to.
   Loc = SM.getInstantiationLoc(Loc);
   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
-  std::pair<const char *,const char *> Buffer = SM.getBufferData(LocInfo.first);
-  const char *StrData = Buffer.first+LocInfo.second;
+  bool Invalid = false;
+  llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+  if (Invalid)
+    return 0;
+
+  const char *StrData = Buffer.data()+LocInfo.second;
 
   if (isWhitespace(StrData[0]))
     return 0;
 
   // Create a lexer starting at the beginning of this token.
-  Lexer TheLexer(Loc, LangOpts, Buffer.first, StrData, Buffer.second);
+  Lexer TheLexer(Loc, LangOpts, Buffer.begin(), StrData, Buffer.end());
   TheLexer.SetCommentRetentionState(true);
   Token TheTok;
   TheLexer.LexFromRawLexer(TheTok);
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 004e675..1cfa0e3 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -806,7 +806,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     // that ThisTokBuf points to a buffer that is big enough for the whole token
     // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    bool StringInvalid = false;
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, 
+                                         &StringInvalid);
+    if (StringInvalid) {
+      hadError = 1;
+      continue;
+    }
+
     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
 
     // TODO: Input character set mapping support.
@@ -904,8 +911,12 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
   llvm::SmallString<16> SpellingBuffer;
   SpellingBuffer.resize(Tok.getLength());
 
+  bool StringInvalid = false;
   const char *SpellingPtr = &SpellingBuffer[0];
-  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
+  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr, &StringInvalid);
+  if (StringInvalid) {
+    return 0;
+  }
 
   assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
 
diff --git a/lib/Lex/Makefile b/lib/Lex/Makefile
index 5090770..bd3c7a8 100644
--- a/lib/Lex/Makefile
+++ b/lib/Lex/Makefile
@@ -18,10 +18,10 @@ LIBRARYNAME := clangLex
 BUILD_ARCHIVE = 1
 
 ifeq ($(ARCH),PowerPC)
-CXXFLAGS += -maltivec
+CXX.Flags += -maltivec
 endif
 
-CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include -I$(PROJ_OBJ_DIR)/../../include
+CPP.Flags += -I$(PROJ_SRC_DIR)/../../include -I$(PROJ_OBJ_DIR)/../../include
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 976c94e..cddc6cf 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -471,7 +471,7 @@ void Preprocessor::HandleDirective(Token &Result) {
   CurPPLexer->ParsingPreprocessorDirective = true;
 
   ++NumDirectives;
-  
+
   // We are about to read a token.  For the multiple-include optimization FA to
   // work, we have to remember if we had read any tokens *before* this
   // pp-directive.
@@ -964,7 +964,7 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
 /// false if the > was found, otherwise it returns true if it finds and consumes
 /// the EOM marker.
 bool Preprocessor::ConcatenateIncludeName(
-  llvm::SmallVector<char, 128> &FilenameBuffer) {
+  llvm::SmallString<128> &FilenameBuffer) {
   Token CurTok;
 
   Lex(CurTok);
@@ -1042,7 +1042,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok,
     return;
   }
 
-  bool isAngled = 
+  bool isAngled =
     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
   // error.
@@ -1070,7 +1070,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok,
     Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
     return;
   }
-  
+
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
   if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport))
@@ -1512,7 +1512,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
 
   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
   MacroInfo *MI = getMacroInfo(MII);
-  
+
   if (CurPPLexer->getConditionalStackDepth() == 0) {
     // If the start of a top-level #ifdef and if the macro is not defined,
     // inform MIOpt that this might be the start of a proper include guard.
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index ede129e..756ce27 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -170,7 +170,12 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     return true;
   case tok::numeric_constant: {
     llvm::SmallString<64> IntegerBuffer;
-    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer);
+    bool NumberInvalid = false;
+    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, 
+                                              &NumberInvalid);
+    if (NumberInvalid)
+      return true; // a diagnostic was already reported
+
     NumericLiteralParser Literal(Spelling.begin(), Spelling.end(),
                                  PeekTok.getLocation(), PP);
     if (Literal.hadError)
@@ -216,7 +221,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   }
   case tok::char_constant: {   // 'x'
     llvm::SmallString<32> CharBuffer;
-    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer);
+    bool CharInvalid = false;
+    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
+    if (CharInvalid)
+      return true;
 
     CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
                               PeekTok.getLocation(), PP);
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index 0b26ccb..81e6bf8 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -80,9 +80,8 @@ bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
   }
   
   // Get the MemoryBuffer for this FID, if it fails, we fail.
-  const llvm::MemoryBuffer *InputFile =
-    getSourceManager().getBuffer(FID, &ErrorStr);
-  if (!ErrorStr.empty())
+  const llvm::MemoryBuffer *InputFile = getSourceManager().getBuffer(FID);
+  if (!InputFile)
     return true;
   
   EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index a64008a..3b949d0 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -549,12 +549,12 @@ IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
   return II;
 }
 
-IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
+IdentifierInfo* PTHManager::get(llvm::StringRef Name) {
   PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
   // Double check our assumption that the last character isn't '\0'.
-  assert(NameEnd==NameStart || NameStart[NameEnd-NameStart-1] != '\0');
-  PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart,
-                                                         NameEnd - NameStart));
+  assert(Name.empty() || Name.data()[Name.size()-1] != '\0');
+  PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),
+                                                         Name.size()));
   if (I == SL.end()) // No identifier found?
     return 0;
 
@@ -662,7 +662,7 @@ public:
     CacheTy::iterator I = Cache.find(path);
 
     // If we don't get a hit in the PTH file just forward to 'stat'.
-    if (I == Cache.end()) 
+    if (I == Cache.end())
       return StatSysCallCache::stat(path, buf);
 
     const PTHStatData& Data = *I;
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 2c6ad6e..5584b18 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -52,7 +52,7 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
                            bool OwnsHeaders)
   : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()),
     SourceMgr(SM), HeaderInfo(Headers), ExternalSource(0),
-    Identifiers(opts, IILookup), BuiltinInfo(Target), CodeCompletionFile(0), 
+    Identifiers(opts, IILookup), BuiltinInfo(Target), CodeCompletionFile(0),
     CurPPLexer(0), CurDirLookup(0), Callbacks(0), MacroArgCache(0) {
   ScratchBuf = new ScratchBuffer(SourceMgr);
   CounterValue = 0; // __COUNTER__ starts at 0.
@@ -80,7 +80,7 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
 
   // We haven't read anything from the external source.
   ReadMacrosFromExternalSource = false;
-      
+
   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
   // This gets unpoisoned where it is allowed.
   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
@@ -116,7 +116,7 @@ Preprocessor::~Preprocessor() {
   // Free any cached macro expanders.
   for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)
     delete TokenLexerCache[i];
-  
+
   // Free any cached MacroArgs.
   for (MacroArgs *ArgList = MacroArgCache; ArgList; )
     ArgList = ArgList->deallocate();
@@ -198,30 +198,30 @@ void Preprocessor::PrintStats() {
              << NumFastTokenPaste << " on the fast path.\n";
 }
 
-Preprocessor::macro_iterator 
-Preprocessor::macro_begin(bool IncludeExternalMacros) const { 
-  if (IncludeExternalMacros && ExternalSource && 
+Preprocessor::macro_iterator
+Preprocessor::macro_begin(bool IncludeExternalMacros) const {
+  if (IncludeExternalMacros && ExternalSource &&
       !ReadMacrosFromExternalSource) {
     ReadMacrosFromExternalSource = true;
     ExternalSource->ReadDefinedMacros();
   }
-  
-  return Macros.begin(); 
+
+  return Macros.begin();
 }
 
-Preprocessor::macro_iterator 
-Preprocessor::macro_end(bool IncludeExternalMacros) const { 
-  if (IncludeExternalMacros && ExternalSource && 
+Preprocessor::macro_iterator
+Preprocessor::macro_end(bool IncludeExternalMacros) const {
+  if (IncludeExternalMacros && ExternalSource &&
       !ReadMacrosFromExternalSource) {
     ReadMacrosFromExternalSource = true;
     ExternalSource->ReadDefinedMacros();
   }
-  
-  return Macros.end(); 
+
+  return Macros.end();
 }
 
-bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 
-                                          unsigned TruncateAtLine, 
+bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
+                                          unsigned TruncateAtLine,
                                           unsigned TruncateAtColumn) {
   using llvm::MemoryBuffer;
 
@@ -242,7 +242,7 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
     for (; *Position; ++Position) {
       if (*Position != '\r' && *Position != '\n')
         continue;
-      
+
       // Eat \r\n or \n\r as a single line.
       if ((Position[1] == '\r' || Position[1] == '\n') &&
           Position[0] != Position[1])
@@ -251,13 +251,13 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
       break;
     }
   }
-  
+
   Position += TruncateAtColumn - 1;
-  
+
   // Truncate the buffer.
   if (Position < Buffer->getBufferEnd()) {
-    MemoryBuffer *TruncatedBuffer 
-      = MemoryBuffer::getMemBufferCopy(Buffer->getBufferStart(), Position, 
+    MemoryBuffer *TruncatedBuffer
+      = MemoryBuffer::getMemBufferCopy(Buffer->getBufferStart(), Position,
                                        Buffer->getBufferIdentifier());
     SourceMgr.overrideFileContents(File, TruncatedBuffer);
   }
@@ -282,11 +282,19 @@ bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const {
 /// UCNs, etc.
 std::string Preprocessor::getSpelling(const Token &Tok,
                                       const SourceManager &SourceMgr,
-                                      const LangOptions &Features) {
+                                      const LangOptions &Features, 
+                                      bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token contains nothing interesting, return it directly.
-  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+
   if (!Tok.needsCleaning())
     return std::string(TokStart, TokStart+Tok.getLength());
 
@@ -310,8 +318,8 @@ std::string Preprocessor::getSpelling(const Token &Tok,
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
-std::string Preprocessor::getSpelling(const Token &Tok) const {
-  return getSpelling(Tok, SourceMgr, Features);
+std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
+  return getSpelling(Tok, SourceMgr, Features, Invalid);
 }
 
 /// getSpelling - This method is used to get the spelling of a token into a
@@ -325,7 +333,7 @@ std::string Preprocessor::getSpelling(const Token &Tok) const {
 /// copy).  The caller is not allowed to modify the returned buffer pointer
 /// if an internal buffer is returned.
 unsigned Preprocessor::getSpelling(const Token &Tok,
-                                   const char *&Buffer) const {
+                                   const char *&Buffer, bool *Invalid) const {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token is an identifier, just return the string from the identifier
@@ -341,8 +349,16 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
 
-  if (TokStart == 0)
-    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  if (TokStart == 0) {
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
@@ -368,7 +384,8 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
 /// SmallVector. Note that the returned StringRef may not point to the
 /// supplied buffer if a copy can be avoided.
 llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
-                                    llvm::SmallVectorImpl<char> &Buffer) const {
+                                          llvm::SmallVectorImpl<char> &Buffer,
+                                          bool *Invalid) const {
   // Try the fast path.
   if (const IdentifierInfo *II = Tok.getIdentifierInfo())
     return II->getName();
@@ -378,7 +395,7 @@ llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
     Buffer.resize(Tok.getLength());
 
   const char *Ptr = Buffer.data();
-  unsigned Len = getSpelling(Tok, Ptr);
+  unsigned Len = getSpelling(Tok, Ptr, Invalid);
   return llvm::StringRef(Ptr, Len);
 }
 
@@ -446,7 +463,7 @@ SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart,
   return TokStart.getFileLocWithOffset(PhysOffset);
 }
 
-SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc, 
+SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc,
                                                  unsigned Offset) {
   if (Loc.isInvalid() || !Loc.isFileID())
     return SourceLocation();
@@ -456,7 +473,7 @@ SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc,
     Len = Len - Offset;
   else
     return Loc;
-  
+
   return AdvanceToTokenCharacter(Loc, Len);
 }
 
@@ -519,7 +536,7 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
     II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength()));
   } else {
     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
-    llvm::SmallVector<char, 64> IdentifierBuffer;
+    llvm::SmallString<64> IdentifierBuffer;
     llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
     II = getIdentifierInfo(CleanedStr);
   }
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 5d95eb3..dbd1b84 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -325,7 +325,7 @@ void TokenLexer::Lex(Token &Tok) {
     // returned by PasteTokens, not the pasted token.
     if (PasteTokens(Tok))
       return;
-    
+
     TokenIsFromPaste = true;
   }
 
@@ -379,7 +379,7 @@ void TokenLexer::Lex(Token &Tok) {
 /// are more ## after it, chomp them iteratively.  Return the result as Tok.
 /// If this returns true, the caller should immediately return the token.
 bool TokenLexer::PasteTokens(Token &Tok) {
-  llvm::SmallVector<char, 128> Buffer;
+  llvm::SmallString<128> Buffer;
   const char *ResultTokStrPtr = 0;
   do {
     // Consume the ## operator.
@@ -439,7 +439,11 @@ bool TokenLexer::PasteTokens(Token &Tok) {
       SourceManager &SourceMgr = PP.getSourceManager();
       FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
 
-      const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first;
+      bool Invalid = false;
+      const char *ScratchBufStart
+        = SourceMgr.getBufferData(LocFileID, &Invalid).data();
+      if (Invalid)
+        return false;
 
       // Make a lexer to lex this string from.  Lex just this one token.
       // Make a lexer object so that we lex and expand the paste result.
@@ -506,8 +510,7 @@ bool TokenLexer::PasteTokens(Token &Tok) {
   if (Tok.is(tok::identifier)) {
     // Look up the identifier info for the token.  We disabled identifier lookup
     // by saying we're skipping contents, so we need to do this manually.
-    IdentifierInfo *II = PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr);
-    Tok.setIdentifierInfo(II);
+    PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr);
   }
   return false;
 }