22 files changed, 2473 insertions, 1308 deletions
diff --git a/lib/Lex/CMakeLists.txt b/lib/Lex/CMakeLists.txt
index 241abbc..2ee4682 100644
--- a/lib/Lex/CMakeLists.txt
+++ b/lib/Lex/CMakeLists.txt
@@ -12,6 +12,7 @@ add_clang_library(clangLex
   ModuleMap.cpp
   PPCaching.cpp
   PPCallbacks.cpp
+  PPConditionalDirectiveRecord.cpp
   PPDirectives.cpp
   PPExpressions.cpp
   PPLexerChange.cpp
diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp
index 7dc0491..dcf1f0c 100644
--- a/lib/Lex/HeaderMap.cpp
+++ b/lib/Lex/HeaderMap.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/FileManager.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <cctype>
 #include <cstdio>
 using namespace clang;
 
@@ -62,7 +62,7 @@ static inline unsigned HashHMapKey(StringRef Str) {
   const char *S = Str.begin(), *End = Str.end();
 
   for (; S != End; S++)
-    Result += tolower(*S) * 13;
+    Result += toLowercase(*S) * 13;
   return Result;
 }
 
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index 67000b68..304bd69 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -12,17 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/HeaderSearchOptions.h"
-#include "clang/Lex/HeaderMap.h"
-#include "clang/Lex/Lexer.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/IdentifierTable.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/Lexer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Capacity.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
 #include <cstdio>
+#if defined(LLVM_ON_UNIX)
+#include <limits.h>
+#endif
 using namespace clang;
 
 const IdentifierInfo *
@@ -39,12 +42,12 @@ HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) {
 
 ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {}
 
-HeaderSearch::HeaderSearch(llvm::IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts,
+HeaderSearch::HeaderSearch(IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts,
                            FileManager &FM, DiagnosticsEngine &Diags,
                            const LangOptions &LangOpts, 
                            const TargetInfo *Target)
   : HSOpts(HSOpts), FileMgr(FM), FrameworkMap(64),
-    ModMap(FileMgr, *Diags.getClient(), LangOpts, Target)
+    ModMap(FileMgr, *Diags.getClient(), LangOpts, Target, *this)
 {
   AngledDirIdx = 0;
   SystemDirIdx = 0;
@@ -134,7 +137,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) {
   if (Module || !AllowSearch)
     return Module;
   
-  // Look through the various header search paths to load any avai;able module 
+  // Look through the various header search paths to load any available module
   // maps, searching for a module map that describes this module.
   for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
     if (SearchDirs[Idx].isFramework()) {
@@ -178,8 +181,22 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) {
       if (Module)
         break;
     }
+
+    // If we've already performed the exhaustive search for module maps in this
+    // search directory, don't do it again.
+    if (SearchDirs[Idx].haveSearchedAllModuleMaps())
+      continue;
+
+    // Load all module maps in the immediate subdirectories of this search
+    // directory.
+    loadSubdirectoryModuleMaps(SearchDirs[Idx]);
+
+    // Look again for the module.
+    Module = ModMap.findModule(ModuleName);
+    if (Module)
+      break;
   }
-  
+
   return Module;
 }
 
@@ -263,6 +280,55 @@ const FileEntry *DirectoryLookup::LookupFile(
   return Result;
 }
 
+/// \brief Given a framework directory, find the top-most framework directory.
+///
+/// \param FileMgr The file manager to use for directory lookups.
+/// \param DirName The name of the framework directory.
+/// \param SubmodulePath Will be populated with the submodule path from the
+/// returned top-level module to the originally named framework.
+static const DirectoryEntry *
+getTopFrameworkDir(FileManager &FileMgr, StringRef DirName,
+                   SmallVectorImpl<std::string> &SubmodulePath) {
+  assert(llvm::sys::path::extension(DirName) == ".framework" &&
+         "Not a framework directory");
+
+  // Note: as an egregious but useful hack we use the real path here, because
+  // frameworks moving between top-level frameworks to embedded frameworks tend
+  // to be symlinked, and we base the logical structure of modules on the
+  // physical layout. In particular, we need to deal with crazy includes like
+  //
+  //   #include <Foo/Frameworks/Bar.framework/Headers/Wibble.h>
+  //
+  // where 'Bar' used to be embedded in 'Foo', is now a top-level framework
+  // which one should access with, e.g.,
+  //
+  //   #include <Bar/Wibble.h>
+  //
+  // Similar issues occur when a top-level framework has moved into an
+  // embedded framework.
+  const DirectoryEntry *TopFrameworkDir = FileMgr.getDirectory(DirName);
+  DirName = FileMgr.getCanonicalName(TopFrameworkDir);
+  do {
+    // Get the parent directory name.
+    DirName = llvm::sys::path::parent_path(DirName);
+    if (DirName.empty())
+      break;
+
+    // Determine whether this directory exists.
+    const DirectoryEntry *Dir = FileMgr.getDirectory(DirName);
+    if (!Dir)
+      break;
+
+    // If this is a framework directory, then we're a subframework of this
+    // framework.
+    if (llvm::sys::path::extension(DirName) == ".framework") {
+      SubmodulePath.push_back(llvm::sys::path::stem(DirName));
+      TopFrameworkDir = Dir;
+    }
+  } while (true);
+
+  return TopFrameworkDir;
+}
 
 /// DoFrameworkLookup - Do a lookup of the specified file in the current
 /// DirectoryLookup, which is a framework directory.
@@ -334,17 +400,6 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
     RelativePath->clear();
     RelativePath->append(Filename.begin()+SlashPos+1, Filename.end());
   }
-
-  // If we're allowed to look for modules, try to load or create the module
-  // corresponding to this framework.
-  Module *Module = 0;
-  if (SuggestedModule) {
-    if (const DirectoryEntry *FrameworkDir
-                                        = FileMgr.getDirectory(FrameworkName)) {
-      bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
-      Module = HS.loadFrameworkModule(ModuleName, FrameworkDir, IsSystem);
-    }
-  }
   
   // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h"
   unsigned OrigSize = FrameworkName.size();
@@ -357,28 +412,64 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
     SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1);
   }
 
-  // Determine whether this is the module we're building or not.
-  bool AutomaticImport = Module;  
   FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());
-  if (const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),
-                                            /*openFile=*/!AutomaticImport)) {
-    if (AutomaticImport)
-      *SuggestedModule = HS.findModuleForHeader(FE);
-    return FE;
+  const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),
+                                        /*openFile=*/!SuggestedModule);
+  if (!FE) {
+    // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
+    const char *Private = "Private";
+    FrameworkName.insert(FrameworkName.begin()+OrigSize, Private,
+                         Private+strlen(Private));
+    if (SearchPath != NULL)
+      SearchPath->insert(SearchPath->begin()+OrigSize, Private,
+                         Private+strlen(Private));
+
+    FE = FileMgr.getFile(FrameworkName.str(), /*openFile=*/!SuggestedModule);
   }
 
-  // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
-  const char *Private = "Private";
-  FrameworkName.insert(FrameworkName.begin()+OrigSize, Private,
-                       Private+strlen(Private));
-  if (SearchPath != NULL)
-    SearchPath->insert(SearchPath->begin()+OrigSize, Private,
-                       Private+strlen(Private));
-
-  const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), 
-                                        /*openFile=*/!AutomaticImport);
-  if (FE && AutomaticImport)
-    *SuggestedModule = HS.findModuleForHeader(FE);
+  // If we found the header and are allowed to suggest a module, do so now.
+  if (FE && SuggestedModule) {
+    // Find the framework in which this header occurs.
+    StringRef FrameworkPath = FE->getName();
+    bool FoundFramework = false;
+    do {
+      // Get the parent directory name.
+      FrameworkPath = llvm::sys::path::parent_path(FrameworkPath);
+      if (FrameworkPath.empty())
+        break;
+
+      // Determine whether this directory exists.
+      const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkPath);
+      if (!Dir)
+        break;
+
+      // If this is a framework directory, then we're a subframework of this
+      // framework.
+      if (llvm::sys::path::extension(FrameworkPath) == ".framework") {
+        FoundFramework = true;
+        break;
+      }
+    } while (true);
+
+    if (FoundFramework) {
+      // Find the top-level framework based on this framework.
+      SmallVector<std::string, 4> SubmodulePath;
+      const DirectoryEntry *TopFrameworkDir
+        = ::getTopFrameworkDir(FileMgr, FrameworkPath, SubmodulePath);
+
+      // Determine the name of the top-level framework.
+      StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
+
+      // Load this framework module. If that succeeds, find the suggested module
+      // for this header, if any.
+      bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
+      if (HS.loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) {
+        *SuggestedModule = HS.findModuleForHeader(FE);
+      }
+    } else {
+      *SuggestedModule = HS.findModuleForHeader(FE);
+    }
+  }
   return FE;
 }
 
@@ -584,7 +675,8 @@ const FileEntry *HeaderSearch::
 LookupSubframeworkHeader(StringRef Filename,
                          const FileEntry *ContextFileEnt,
                          SmallVectorImpl<char> *SearchPath,
-                         SmallVectorImpl<char> *RelativePath) {
+                         SmallVectorImpl<char> *RelativePath,
+                         Module **SuggestedModule) {
   assert(ContextFileEnt && "No context file?");
 
   // Framework names must have a '/' in the filename.  Find it.
@@ -673,6 +765,26 @@ LookupSubframeworkHeader(StringRef Filename,
   // of evaluation.
   unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
   getFileInfo(FE).DirInfo = DirInfo;
+
+  // If we're supposed to suggest a module, look for one now.
+  if (SuggestedModule) {
+    // Find the top-level framework based on this framework.
+    FrameworkName.pop_back(); // remove the trailing '/'
+    SmallVector<std::string, 4> SubmodulePath;
+    const DirectoryEntry *TopFrameworkDir
+      = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath);
+    
+    // Determine the name of the top-level framework.
+    StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
+
+    // Load this framework module. If that succeeds, find the suggested module
+    // for this header, if any.
+    bool IsSystem = false;
+    if (loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) {
+      *SuggestedModule = findModuleForHeader(FE);
+    }
+  }
+
   return FE;
 }
 
@@ -708,6 +820,7 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
                                 const HeaderFileInfo &OtherHFI) {
   HFI.isImport |= OtherHFI.isImport;
   HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
+  HFI.isModuleHeader |= OtherHFI.isModuleHeader;
   HFI.NumIncludes += OtherHFI.NumIncludes;
   
   if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
@@ -749,7 +862,16 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) {
   if (ExternalSource && !HFI.Resolved)
     mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(File));
 
-  return HFI.isPragmaOnce || HFI.ControllingMacro || HFI.ControllingMacroID;
+  return HFI.isPragmaOnce || HFI.isImport ||
+      HFI.ControllingMacro || HFI.ControllingMacroID;
+}
+
+void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE) {
+  if (FE->getUID() >= FileInfo.size())
+    FileInfo.resize(FE->getUID()+1);
+
+  HeaderFileInfo &HFI = FileInfo[FE->getUID()];
+  HFI.isModuleHeader = true;
 }
 
 void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) {
@@ -809,7 +931,7 @@ StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) {
 
 bool HeaderSearch::hasModuleMap(StringRef FileName, 
                                 const DirectoryEntry *Root) {
-  llvm::SmallVector<const DirectoryEntry *, 2> FixUpDirectories;
+  SmallVector<const DirectoryEntry *, 2> FixUpDirectories;
   
   StringRef DirName = FileName;
   do {
@@ -849,7 +971,12 @@ bool HeaderSearch::hasModuleMap(StringRef FileName,
   } while (true);
 }
 
-Module *HeaderSearch::findModuleForHeader(const FileEntry *File) {
+Module *HeaderSearch::findModuleForHeader(const FileEntry *File) const {
+  if (ExternalSource) {
+    // Make sure the external source has handled header info about this file,
+    // which includes whether the file is part of a module.
+    (void)getFileInfo(File);
+  }
   if (Module *Mod = ModMap.findModuleForHeader(File))
     return Mod;
   
@@ -897,80 +1024,21 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name,
     return ModMap.findModule(Name);
   }
 
-  // The top-level framework directory, from which we'll infer a framework
-  // module.
-  const DirectoryEntry *TopFrameworkDir = Dir;
-  
-  // The path from the module we're actually looking for back to the top-level
-  // framework name.
-  llvm::SmallVector<StringRef, 2> SubmodulePath;
+  // Figure out the top-level framework directory and the submodule path from
+  // that top-level framework to the requested framework.
+  SmallVector<std::string, 2> SubmodulePath;
   SubmodulePath.push_back(Name);
-  
-  // Walk the directory structure to find any enclosing frameworks.
-#ifdef LLVM_ON_UNIX
-  // Note: as an egregious but useful hack we use the real path here, because
-  // frameworks moving from top-level frameworks to embedded frameworks tend
-  // to be symlinked from the top-level location to the embedded location,
-  // and we need to resolve lookups as if we had found the embedded location.
-  char RealDirName[PATH_MAX];
-  StringRef DirName;
-  if (realpath(Dir->getName(), RealDirName))
-    DirName = RealDirName;
-  else
-    DirName = Dir->getName();
-#else
-  StringRef DirName = Dir->getName();
-#endif
-  do {
-    // Get the parent directory name.
-    DirName = llvm::sys::path::parent_path(DirName);
-    if (DirName.empty())
-      break;
-    
-    // Determine whether this directory exists.
-    Dir = FileMgr.getDirectory(DirName);
-    if (!Dir)
-      break;
-    
-    // If this is a framework directory, then we're a subframework of this
-    // framework.
-    if (llvm::sys::path::extension(DirName) == ".framework") {
-      SubmodulePath.push_back(llvm::sys::path::stem(DirName));
-      TopFrameworkDir = Dir;
-    }
-  } while (true);
+  const DirectoryEntry *TopFrameworkDir
+    = ::getTopFrameworkDir(FileMgr, Dir->getName(), SubmodulePath);
 
-  // Determine whether we're allowed to infer a module map.
-  bool canInfer = false;
-  if (llvm::sys::path::has_parent_path(TopFrameworkDir->getName())) {
-    // Figure out the parent path.
-    StringRef Parent = llvm::sys::path::parent_path(TopFrameworkDir->getName());
-    if (const DirectoryEntry *ParentDir = FileMgr.getDirectory(Parent)) {
-      // If there's a module map file in the parent directory, it can
-      // explicitly allow us to infer framework modules.
-      switch (loadModuleMapFile(ParentDir)) {
-        case LMM_AlreadyLoaded:
-        case LMM_NewlyLoaded: {
-          StringRef Name = llvm::sys::path::stem(TopFrameworkDir->getName());
-          canInfer = ModMap.canInferFrameworkModule(ParentDir, Name, IsSystem);
-          break;
-        }
-        case LMM_InvalidModuleMap:
-        case LMM_NoDirectory:
-          break;
-      }
-    }
-  }
-
-  // If we're not allowed to infer a module map, we're done.
-  if (!canInfer)
-    return 0;
 
   // Try to infer a module map from the top-level framework directory.
   Module *Result = ModMap.inferFrameworkModule(SubmodulePath.back(), 
                                                TopFrameworkDir,
                                                IsSystem,
                                                /*Parent=*/0);
+  if (!Result)
+    return 0;
   
   // Follow the submodule path to find the requested (sub)framework module
   // within the top-level framework module.
@@ -1034,7 +1102,7 @@ HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir) {
   return LMM_InvalidModuleMap;
 }
 
-void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) {
+void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) {
   Modules.clear();
   
   // Load module maps for each of the header search directories.
@@ -1072,13 +1140,7 @@ void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) {
     
     // Try to load module map files for immediate subdirectories of this search
     // directory.
-    llvm::error_code EC;
-    SmallString<128> DirNative;
-    llvm::sys::path::native(SearchDirs[Idx].getDir()->getName(), DirNative);
-    for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd;
-         Dir != DirEnd && !EC; Dir.increment(EC)) {
-      loadModuleMapFile(Dir->path());
-    }
+    loadSubdirectoryModuleMaps(SearchDirs[Idx]);
   }
   
   // Populate the list of modules.
@@ -1088,3 +1150,18 @@ void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) {
     Modules.push_back(M->getValue());
   }
 }
+
+void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) {
+  if (SearchDir.haveSearchedAllModuleMaps())
+    return;
+  
+  llvm::error_code EC;
+  SmallString<128> DirNative;
+  llvm::sys::path::native(SearchDir.getDir()->getName(), DirNative);
+  for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd;
+       Dir != DirEnd && !EC; Dir.increment(EC)) {
+    loadModuleMapFile(Dir->path());
+  }
+
+  SearchDir.setSearchedAllModuleMaps(true);
+}
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a5ba7db..ed4666a 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -25,19 +25,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Lexer.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceManager.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "UnicodeCharSets.h"
 #include <cstring>
 using namespace clang;
 
-static void InitCharacterInfo();
-
 //===----------------------------------------------------------------------===//
 // Token Class Implementation
 //===----------------------------------------------------------------------===//
@@ -64,8 +66,6 @@ void Lexer::anchor() { }
 
 void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
                       const char *BufEnd) {
-  InitCharacterInfo();
-
   BufferStart = BufStart;
   BufferPtr = BufPtr;
   BufferEnd = BufEnd;
@@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
   InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
             InputFile->getBufferEnd());
 
-  // Default to keeping comments if the preprocessor wants them.
-  SetCommentRetentionState(PP.getCommentRetentionState());
+  resetExtendedTokenMode();
+}
+
+void Lexer::resetExtendedTokenMode() {
+  assert(PP && "Cannot reset token mode without a preprocessor");
+  if (LangOpts.TraditionalCPP)
+    SetKeepWhitespaceMode(true);
+  else
+    SetCommentRetentionState(PP->getCommentRetentionState());
 }
 
 /// Lexer constructor - Create a new raw lexer object.  This object is only
@@ -233,16 +240,67 @@ void Lexer::Stringify(SmallVectorImpl<char> &Str) {
 // Token Spelling
 //===----------------------------------------------------------------------===//
 
+/// \brief Slow case of getSpelling. Extract the characters comprising the
+/// spelling of this token from the provided input buffer.
+static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
+                              const LangOptions &LangOpts, char *Spelling) {
+  assert(Tok.needsCleaning() && "getSpellingSlow called on simple token");
+
+  size_t Length = 0;
+  const char *BufEnd = BufPtr + Tok.getLength();
+
+  if (Tok.is(tok::string_literal)) {
+    // Munch the encoding-prefix and opening double-quote.
+    while (BufPtr < BufEnd) {
+      unsigned Size;
+      Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+      BufPtr += Size;
+
+      if (Spelling[Length - 1] == '"')
+        break;
+    }
+
+    // Raw string literals need special handling; trigraph expansion and line
+    // splicing do not occur within their d-char-sequence nor within their
+    // r-char-sequence.
+    if (Length >= 2 &&
+        Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+      // Search backwards from the end of the token to find the matching closing
+      // quote.
+      const char *RawEnd = BufEnd;
+      do --RawEnd; while (*RawEnd != '"');
+      size_t RawLength = RawEnd - BufPtr + 1;
+
+      // Everything between the quotes is included verbatim in the spelling.
+      memcpy(Spelling + Length, BufPtr, RawLength);
+      Length += RawLength;
+      BufPtr += RawLength;
+
+      // The rest of the token is lexed normally.
+    }
+  }
+
+  while (BufPtr < BufEnd) {
+    unsigned Size;
+    Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+    BufPtr += Size;
+  }
+
+  assert(Length < Tok.getLength() &&
+         "NeedsCleaning flag set on token that didn't need cleaning!");
+  return Length;
+}
+
 /// getSpelling() - Return the 'spelling' of this token.  The spelling of a
 /// token are the characters used to represent the token in the source file
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
 StringRef Lexer::getSpelling(SourceLocation loc,
-                                   SmallVectorImpl<char> &buffer,
-                                   const SourceManager &SM,
-                                   const LangOptions &options,
-                                   bool *invalid) {
+                             SmallVectorImpl<char> &buffer,
+                             const SourceManager &SM,
+                             const LangOptions &options,
+                             bool *invalid) {
   // Break down the source location.
   std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
 
@@ -267,17 +325,10 @@ StringRef Lexer::getSpelling(SourceLocation loc,
   // Common case:  no need for cleaning.
   if (!token.needsCleaning())
     return StringRef(tokenBegin, length);
-  
-  // Hard case, we need to relex the characters into the string.
-  buffer.clear();
-  buffer.reserve(length);
-  
-  for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) {
-    unsigned charSize;
-    buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options));
-    ti += charSize;
-  }
 
+  // Hard case, we need to relex the characters into the string.
+  buffer.resize(length);
+  buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data()));
   return StringRef(buffer.data(), buffer.size());
 }
 
@@ -289,31 +340,22 @@ StringRef Lexer::getSpelling(SourceLocation loc,
 std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
                                const LangOptions &LangOpts, bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-  
-  // If this token contains nothing interesting, return it directly.
+
   bool CharDataInvalid = false;
-  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
                                                     &CharDataInvalid);
   if (Invalid)
     *Invalid = CharDataInvalid;
   if (CharDataInvalid)
     return std::string();
-  
+
+  // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning())
-    return std::string(TokStart, TokStart+Tok.getLength());
-  
+    return std::string(TokStart, TokStart + Tok.getLength());
+
   std::string Result;
-  Result.reserve(Tok.getLength());
-  
-  // Otherwise, hard case, relex the characters into the string.
-  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
-       Ptr != End; ) {
-    unsigned CharSize;
-    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts));
-    Ptr += CharSize;
-  }
-  assert(Result.size() != unsigned(Tok.getLength()) &&
-         "NeedsCleaning flag set on something that didn't need cleaning!");
+  Result.resize(Tok.getLength());
+  Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
   return Result;
 }
 
@@ -336,10 +378,12 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
   if (Tok.is(tok::raw_identifier))
     TokStart = Tok.getRawIdentifierData();
-  else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
-    // Just return the string from the identifier table, which is very quick.
-    Buffer = II->getNameStart();
-    return II->getLength();
+  else if (!Tok.hasUCN()) {
+    if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+      // Just return the string from the identifier table, which is very quick.
+      Buffer = II->getNameStart();
+      return II->getLength();
+    }
   }
 
   // NOTE: this can be checked even after testing for an IdentifierInfo.
@@ -365,23 +409,10 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
   }
 
   // Otherwise, hard case, relex the characters into the string.
-  char *OutBuf = const_cast<char*>(Buffer);
-  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
-       Ptr != End; ) {
-    unsigned CharSize;
-    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts);
-    Ptr += CharSize;
-  }
-  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
-         "NeedsCleaning flag set on something that didn't need cleaning!");
-
-  return OutBuf-Buffer;
+  return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
 }
 
 
-
-static bool isWhitespace(unsigned char c);
-
 /// MeasureTokenLength - Relex the token at the specified location and return
 /// its length in bytes in the input file.  If the token needs cleaning (e.g.
 /// includes a trigraph or an escaped newline) then this count includes bytes
@@ -389,6 +420,17 @@ static bool isWhitespace(unsigned char c);
 unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
                                    const SourceManager &SM,
                                    const LangOptions &LangOpts) {
+  Token TheTok;
+  if (getRawToken(Loc, TheTok, SM, LangOpts))
+    return 0;
+  return TheTok.getLength();
+}
+
+/// \brief Relex the token at the specified location.
+/// \returns true if there was a failure, false on success.
+bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
+                        const SourceManager &SM,
+                        const LangOptions &LangOpts) {
   // TODO: this could be special cased for common tokens like identifiers, ')',
   // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
   // all obviously single-char tokens.  This could use
@@ -402,20 +444,19 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
   if (Invalid)
-    return 0;
+    return true;
 
   const char *StrData = Buffer.data()+LocInfo.second;
 
   if (isWhitespace(StrData[0]))
-    return 0;
+    return true;
 
   // Create a lexer starting at the beginning of this token.
   Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts,
                  Buffer.begin(), StrData, Buffer.end());
   TheLexer.SetCommentRetentionState(true);
-  Token TheTok;
-  TheLexer.LexFromRawLexer(TheTok);
-  return TheTok.getLength();
+  TheLexer.LexFromRawLexer(Result);
+  return false;
 }
 
 static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
@@ -969,163 +1010,8 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
   return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
 }
 
-//===----------------------------------------------------------------------===//
-// Character information.
-//===----------------------------------------------------------------------===//
-
-enum {
-  CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0'
-  CHAR_VERT_WS  = 0x02,  // '\r', '\n'
-  CHAR_LETTER   = 0x04,  // a-z,A-Z
-  CHAR_NUMBER   = 0x08,  // 0-9
-  CHAR_UNDER    = 0x10,  // _
-  CHAR_PERIOD   = 0x20,  // .
-  CHAR_RAWDEL   = 0x40   // {}[]#<>%:;?*+-/^&|~!=,"'
-};
-
-// Statically initialize CharInfo table based on ASCII character set
-// Reference: FreeBSD 7.2 /usr/share/misc/ascii
-static const unsigned char CharInfo[256] =
-{
-// 0 NUL         1 SOH         2 STX         3 ETX
-// 4 EOT         5 ENQ         6 ACK         7 BEL
-   0           , 0           , 0           , 0           ,
-   0           , 0           , 0           , 0           ,
-// 8 BS          9 HT         10 NL         11 VT
-//12 NP         13 CR         14 SO         15 SI
-   0           , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS,
-   CHAR_HORZ_WS, CHAR_VERT_WS, 0           , 0           ,
-//16 DLE        17 DC1        18 DC2        19 DC3
-//20 DC4        21 NAK        22 SYN        23 ETB
-   0           , 0           , 0           , 0           ,
-   0           , 0           , 0           , 0           ,
-//24 CAN        25 EM         26 SUB        27 ESC
-//28 FS         29 GS         30 RS         31 US
-   0           , 0           , 0           , 0           ,
-   0           , 0           , 0           , 0           ,
-//32 SP         33  !         34  "         35  #
-//36  $         37  %         38  &         39  '
-   CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
-   0           , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
-//40  (         41  )         42  *         43  +
-//44  ,         45  -         46  .         47  /
-   0           , 0           , CHAR_RAWDEL , CHAR_RAWDEL ,
-   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
-//48  0         49  1         50  2         51  3
-//52  4         53  5         54  6         55  7
-   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
-   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
-//56  8         57  9         58  :         59  ;
-//60  <         61  =         62  >         63  ?
-   CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
-   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
-//64  @         65  A         66  B         67  C
-//68  D         69  E         70  F         71  G
-   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//72  H         73  I         74  J         75  K
-//76  L         77  M         78  N         79  O
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//80  P         81  Q         82  R         83  S
-//84  T         85  U         86  V         87  W
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//88  X         89  Y         90  Z         91  [
-//92  \         93  ]         94  ^         95  _
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
-   0           , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER  ,
-//96  `         97  a         98  b         99  c
-//100  d       101  e        102  f        103  g
-   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//104  h       105  i        106  j        107  k
-//108  l       109  m        110  n        111  o
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//112  p       113  q        114  r        115  s
-//116  t       117  u        118  v        119  w
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//120  x       121  y        122  z        123  {
-//124  |       125  }        126  ~        127 DEL
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
-   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
-};
-
-static void InitCharacterInfo() {
-  static bool isInited = false;
-  if (isInited) return;
-  // check the statically-initialized CharInfo table
-  assert(CHAR_HORZ_WS == CharInfo[(int)' ']);
-  assert(CHAR_HORZ_WS == CharInfo[(int)'\t']);
-  assert(CHAR_HORZ_WS == CharInfo[(int)'\f']);
-  assert(CHAR_HORZ_WS == CharInfo[(int)'\v']);
-  assert(CHAR_VERT_WS == CharInfo[(int)'\n']);
-  assert(CHAR_VERT_WS == CharInfo[(int)'\r']);
-  assert(CHAR_UNDER   == CharInfo[(int)'_']);
-  assert(CHAR_PERIOD  == CharInfo[(int)'.']);
-  for (unsigned i = 'a'; i <= 'z'; ++i) {
-    assert(CHAR_LETTER == CharInfo[i]);
-    assert(CHAR_LETTER == CharInfo[i+'A'-'a']);
-  }
-  for (unsigned i = '0'; i <= '9'; ++i)
-    assert(CHAR_NUMBER == CharInfo[i]);
-    
-  isInited = true;
-}
-
-
-/// isIdentifierHead - Return true if this is the first character of an
-/// identifier, which is [a-zA-Z_].
-static inline bool isIdentifierHead(unsigned char c) {
-  return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
-}
-
-/// isIdentifierBody - Return true if this is the body character of an
-/// identifier, which is [a-zA-Z0-9_].
-static inline bool isIdentifierBody(unsigned char c) {
-  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false;
-}
-
-/// isHorizontalWhitespace - Return true if this character is horizontal
-/// whitespace: ' ', '\\t', '\\f', '\\v'.  Note that this returns false for
-/// '\\0'.
-static inline bool isHorizontalWhitespace(unsigned char c) {
-  return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
-}
-
-/// isVerticalWhitespace - Return true if this character is vertical
-/// whitespace: '\\n', '\\r'.  Note that this returns false for '\\0'.
-static inline bool isVerticalWhitespace(unsigned char c) {
-  return (CharInfo[c] & CHAR_VERT_WS) ? true : false;
-}
-
-/// isWhitespace - Return true if this character is horizontal or vertical
-/// whitespace: ' ', '\\t', '\\f', '\\v', '\\n', '\\r'.  Note that this returns
-/// false for '\\0'.
-static inline bool isWhitespace(unsigned char c) {
-  return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false;
-}
-
-/// isNumberBody - Return true if this is the body character of an
-/// preprocessing number, which is [a-zA-Z0-9_.].
-static inline bool isNumberBody(unsigned char c) {
-  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ?
-    true : false;
-}
-
-/// isRawStringDelimBody - Return true if this is the body character of a
-/// raw string delimiter.
-static inline bool isRawStringDelimBody(unsigned char c) {
-  return (CharInfo[c] &
-          (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
-    true : false;
-}
-
-// Allow external clients to make use of CharInfo.
 bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {
-  return isIdentifierBody(c) || (c == '$' && LangOpts.DollarIdents);
+  return isIdentifierBody(c, LangOpts.DollarIdents);
 }
 
 
@@ -1293,7 +1179,7 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
 
   // Try to load the file buffer.
   bool InvalidTemp = false;
-  llvm::StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
+  StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
   if (InvalidTemp)
     return SourceLocation();
 
@@ -1319,8 +1205,15 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
       C = *(++TokenEnd);
       NumWhitespaceChars++;
     }
-    if (isVerticalWhitespace(C))
+
+    // Skip \r, \n, \r\n, or \n\r
+    if (C == '\n' || C == '\r') {
+      char PrevC = C;
+      C = *(++TokenEnd);
       NumWhitespaceChars++;
+      if ((C == '\n' || C == '\r') && C != PrevC)
+        NumWhitespaceChars++;
+    }
   }
 
   return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars);
@@ -1334,7 +1227,6 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
 ///   2. If this is an escaped newline (potentially with whitespace between
 ///      the backslash and newline), implicitly skip the newline and return
 ///      the char after it.
-///   3. If this is a UCN, return it.  FIXME: C++ UCN's?
 ///
 /// This handles the slow/uncommon case of the getCharAndSize method.  Here we
 /// know that we can accumulate into Size, and that we have already incremented
@@ -1467,6 +1359,62 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
   IsAtStartOfLine = StartOfLine;
 }
 
+static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
+  if (LangOpts.CPlusPlus11 || LangOpts.C11)
+    return isCharInSet(C, C11AllowedIDChars);
+  else if (LangOpts.CPlusPlus)
+    return isCharInSet(C, CXX03AllowedIDChars);
+  else
+    return isCharInSet(C, C99AllowedIDChars);
+}
+
+static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) {
+  assert(isAllowedIDChar(C, LangOpts));
+  if (LangOpts.CPlusPlus11 || LangOpts.C11)
+    return !isCharInSet(C, C11DisallowedInitialIDChars);
+  else if (LangOpts.CPlusPlus)
+    return true;
+  else
+    return !isCharInSet(C, C99DisallowedInitialIDChars);
+}
+
+static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin,
+                                            const char *End) {
+  return CharSourceRange::getCharRange(L.getSourceLocation(Begin),
+                                       L.getSourceLocation(End));
+}
+
+static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
+                                      CharSourceRange Range, bool IsFirst) {
+  // Check C99 compatibility.
+  if (Diags.getDiagnosticLevel(diag::warn_c99_compat_unicode_id,
+                               Range.getBegin()) > DiagnosticsEngine::Ignored) {
+    enum {
+      CannotAppearInIdentifier = 0,
+      CannotStartIdentifier
+    };
+
+    if (!isCharInSet(C, C99AllowedIDChars)) {
+      Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
+        << Range
+        << CannotAppearInIdentifier;
+    } else if (IsFirst && isCharInSet(C, C99DisallowedInitialIDChars)) {
+      Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
+        << Range
+        << CannotStartIdentifier;
+    }
+  }
+
+  // Check C++98 compatibility.
+  if (Diags.getDiagnosticLevel(diag::warn_cxx98_compat_unicode_id,
+                               Range.getBegin()) > DiagnosticsEngine::Ignored) {
+    if (!isCharInSet(C, CXX03AllowedIDChars)) {
+      Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id)
+        << Range;
+    }
+  }
+ }
+
 void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
   // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
   unsigned Size;
@@ -1478,11 +1426,11 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
 
   // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline
   // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
-  // FIXME: UCNs.
   //
-  // TODO: Could merge these checks into a CharInfo flag to make the comparison
-  // cheaper
-  if (C != '\\' && C != '?' && (C != '$' || !LangOpts.DollarIdents)) {
+  // TODO: Could merge these checks into an InfoTable flag to make the
+  // comparison cheaper
+  if (isASCII(C) && C != '\\' && C != '?' &&
+      (C != '$' || !LangOpts.DollarIdents)) {
 FinishIdentifier:
     const char *IdStart = BufferPtr;
     FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
@@ -1519,8 +1467,51 @@ FinishIdentifier:
       CurPtr = ConsumeChar(CurPtr, Size, Result);
       C = getCharAndSize(CurPtr, Size);
       continue;
-    } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
-      // Found end of identifier.
+
+    } else if (C == '\\') {
+      const char *UCNPtr = CurPtr + Size;
+      uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0);
+      if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
+        goto FinishIdentifier;
+
+      if (!isLexingRawMode()) {
+        maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+                                  makeCharRange(*this, CurPtr, UCNPtr),
+                                  /*IsFirst=*/false);
+      }
+
+      Result.setFlag(Token::HasUCN);
+      if ((UCNPtr - CurPtr ==  6 && CurPtr[1] == 'u') ||
+          (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
+        CurPtr = UCNPtr;
+      else
+        while (CurPtr != UCNPtr)
+          (void)getAndAdvanceChar(CurPtr, Result);
+
+      C = getCharAndSize(CurPtr, Size);
+      continue;
+    } else if (!isASCII(C)) {
+      const char *UnicodePtr = CurPtr;
+      UTF32 CodePoint;
+      ConversionResult Result =
+          llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
+                                    (const UTF8 *)BufferEnd,
+                                    &CodePoint,
+                                    strictConversion);
+      if (Result != conversionOK ||
+          !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
+        goto FinishIdentifier;
+
+      if (!isLexingRawMode()) {
+        maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+                                  makeCharRange(*this, CurPtr, UnicodePtr),
+                                  /*IsFirst=*/false);
+      }
+
+      CurPtr = UnicodePtr;
+      C = getCharAndSize(CurPtr, Size);
+      continue;
+    } else if (!isIdentifierBody(C)) {
       goto FinishIdentifier;
     }
 
@@ -1528,7 +1519,7 @@ FinishIdentifier:
     CurPtr = ConsumeChar(CurPtr, Size, Result);
 
     C = getCharAndSize(CurPtr, Size);
-    while (isIdentifierBody(C)) { // FIXME: UCNs.
+    while (isIdentifierBody(C)) {
       CurPtr = ConsumeChar(CurPtr, Size, Result);
       C = getCharAndSize(CurPtr, Size);
     }
@@ -1553,7 +1544,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   unsigned Size;
   char C = getCharAndSize(CurPtr, Size);
   char PrevCh = 0;
-  while (isNumberBody(C)) { // FIXME: UCNs.
+  while (isPreprocessingNumberBody(C)) { // FIXME: UCNs in ud-suffix.
     CurPtr = ConsumeChar(CurPtr, Size, Result);
     PrevCh = C;
     C = getCharAndSize(CurPtr, Size);
@@ -1598,7 +1589,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
   unsigned Size;
   char C = getCharAndSize(CurPtr, Size);
   if (isIdentifierHead(C)) {
-    if (!getLangOpts().CPlusPlus0x) {
+    if (!getLangOpts().CPlusPlus11) {
       if (!isLexingRawMode())
         Diag(CurPtr,
              C == '_' ? diag::warn_cxx11_compat_user_defined_literal
@@ -1639,7 +1630,9 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
       (Kind == tok::utf8_string_literal ||
        Kind == tok::utf16_string_literal ||
        Kind == tok::utf32_string_literal))
-    Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);
+    Diag(BufferPtr, getLangOpts().CPlusPlus
+           ? diag::warn_cxx98_compat_unicode_literal
+           : diag::warn_c99_compat_unicode_literal);
 
   char C = getAndAdvanceChar(CurPtr, Result);
   while (C != '"') {
@@ -1804,7 +1797,9 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 
   if (!isLexingRawMode() &&
       (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant))
-    Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);
+    Diag(BufferPtr, getLangOpts().CPlusPlus
+           ? diag::warn_cxx98_compat_unicode_literal
+           : diag::warn_c99_compat_unicode_literal);
 
   char C = getAndAdvanceChar(CurPtr, Result);
   if (C == '\'') {
@@ -1860,6 +1855,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 ///
 bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
   // Whitespace - Skip it, then return the token after the whitespace.
+  bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
+
   unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
   while (1) {
     // Skip horizontal whitespace very aggressively.
@@ -1867,7 +1864,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
       Char = *++CurPtr;
 
     // Otherwise if we have something other than whitespace, we're done.
-    if (Char != '\n' && Char != '\r')
+    if (!isVerticalWhitespace(Char))
       break;
 
     if (ParsingPreprocessorDirective) {
@@ -1877,24 +1874,27 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
     }
 
     // ok, but handle newline.
-    // The returned token is at the start of the line.
-    Result.setFlag(Token::StartOfLine);
-    // No leading whitespace seen so far.
-    Result.clearFlag(Token::LeadingSpace);
+    SawNewline = true;
     Char = *++CurPtr;
   }
 
-  // If this isn't immediately after a newline, there is leading space.
-  char PrevChar = CurPtr[-1];
-  if (PrevChar != '\n' && PrevChar != '\r')
-    Result.setFlag(Token::LeadingSpace);
-
   // If the client wants us to return whitespace, return it now.
   if (isKeepWhitespaceMode()) {
     FormTokenWithChars(Result, CurPtr, tok::unknown);
+    if (SawNewline)
+      IsAtStartOfLine = true;
+    // FIXME: The next token will not have LeadingSpace set.
     return true;
   }
 
+  // If this isn't immediately after a newline, there is leading space.
+  char PrevChar = CurPtr[-1];
+  bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
+
+  Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+  if (SawNewline)
+    Result.setFlag(Token::StartOfLine);
+
   BufferPtr = CurPtr;
   return false;
 }
@@ -2285,7 +2285,6 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
   // have already returned above with the comment as a token.
   if (isHorizontalWhitespace(*CurPtr)) {
-    Result.setFlag(Token::LeadingSpace);
     SkipWhitespace(Result, CurPtr+1);
     return false;
   }
@@ -2367,7 +2366,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
     FormTokenWithChars(Result, CurPtr, tok::eod);
 
     // Restore comment saving mode, in case it was disabled for directive.
-    SetCommentRetentionState(PP->getCommentRetentionState());
+    resetExtendedTokenMode();
     return true;  // Have a token.
   }
  
@@ -2393,7 +2392,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
   // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
   // a pedwarn.
   if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r'))
-    Diag(BufferEnd, LangOpts.CPlusPlus0x ? // C++11 [lex.phases] 2.2 p2
+    Diag(BufferEnd, LangOpts.CPlusPlus11 ? // C++11 [lex.phases] 2.2 p2
          diag::warn_cxx98_compat_no_newline_eof : diag::ext_no_newline_eof)
     << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n");
 
@@ -2550,6 +2549,164 @@ bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
   return false;
 }
 
+uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
+                           Token *Result) {
+  unsigned CharSize;
+  char Kind = getCharAndSize(StartPtr, CharSize);
+
+  unsigned NumHexDigits;
+  if (Kind == 'u')
+    NumHexDigits = 4;
+  else if (Kind == 'U')
+    NumHexDigits = 8;
+  else
+    return 0;
+
+  if (!LangOpts.CPlusPlus && !LangOpts.C99) {
+    if (Result && !isLexingRawMode())
+      Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
+    return 0;
+  }
+
+  const char *CurPtr = StartPtr + CharSize;
+  const char *KindLoc = &CurPtr[-1];
+
+  uint32_t CodePoint = 0;
+  for (unsigned i = 0; i < NumHexDigits; ++i) {
+    char C = getCharAndSize(CurPtr, CharSize);
+
+    unsigned Value = llvm::hexDigitValue(C);
+    if (Value == -1U) {
+      if (Result && !isLexingRawMode()) {
+        if (i == 0) {
+          Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
+            << StringRef(KindLoc, 1);
+        } else {
+          Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
+
+          // If the user wrote \U1234, suggest a fixit to \u.
+          if (i == 4 && NumHexDigits == 8) {
+            CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
+            Diag(KindLoc, diag::note_ucn_four_not_eight)
+              << FixItHint::CreateReplacement(URange, "u");
+          }
+        }
+      }
+
+      return 0;
+    }
+
+    CodePoint <<= 4;
+    CodePoint += Value;
+
+    CurPtr += CharSize;
+  }
+
+  if (Result) {
+    Result->setFlag(Token::HasUCN);
+    if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2)
+      StartPtr = CurPtr;
+    else
+      while (StartPtr != CurPtr)
+        (void)getAndAdvanceChar(StartPtr, *Result);
+  } else {
+    StartPtr = CurPtr;
+  }
+
+  // C99 6.4.3p2: A universal character name shall not specify a character whose
+  //   short identifier is less than 00A0 other than 0024 ($), 0040 (@), or
+  //   0060 (`), nor one in the range D800 through DFFF inclusive.)
+  // C++11 [lex.charset]p2: If the hexadecimal value for a
+  //   universal-character-name corresponds to a surrogate code point (in the
+  //   range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally,
+  //   if the hexadecimal value for a universal-character-name outside the
+  //   c-char-sequence, s-char-sequence, or r-char-sequence of a character or
+  //   string literal corresponds to a control character (in either of the
+  //   ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the
+  //   basic source character set, the program is ill-formed.
+  if (CodePoint < 0xA0) {
+    if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
+      return CodePoint;
+
+    // We don't use isLexingRawMode() here because we need to warn about bad
+    // UCNs even when skipping preprocessing tokens in a #if block.
+    if (Result && PP) {
+      if (CodePoint < 0x20 || CodePoint >= 0x7F)
+        Diag(BufferPtr, diag::err_ucn_control_character);
+      else {
+        char C = static_cast<char>(CodePoint);
+        Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
+      }
+    }
+
+    return 0;
+
+  } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
+    // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't.
+    // We don't use isLexingRawMode() here because we need to diagnose bad
+    // UCNs even when skipping preprocessing tokens in a #if block.
+    if (Result && PP) {
+      if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
+        Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
+      else
+        Diag(BufferPtr, diag::err_ucn_escape_invalid);
+    }
+    return 0;
+  }
+
+  return CodePoint;
+}
+
+void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+  if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+      isCharInSet(C, UnicodeWhitespaceChars)) {
+    Diag(BufferPtr, diag::ext_unicode_whitespace)
+      << makeCharRange(*this, BufferPtr, CurPtr);
+
+    Result.setFlag(Token::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr))
+      return; // KeepWhitespaceMode
+
+    return LexTokenInternal(Result);
+  }
+
+  if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) {
+    if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+        !PP->isPreprocessedOutput()) {
+      maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C,
+                                makeCharRange(*this, BufferPtr, CurPtr),
+                                /*IsFirst=*/true);
+    }
+
+    MIOpt.ReadToken();
+    return LexIdentifier(Result, CurPtr);
+  }
+
+  if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+      !PP->isPreprocessedOutput() &&
+      !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) {
+    // Non-ASCII characters tend to creep into source code unintentionally.
+    // Instead of letting the parser complain about the unknown token,
+    // just drop the character.
+    // Note that we can /only/ do this when the non-ASCII character is actually
+    // spelled as Unicode, not written as a UCN. The standard requires that
+    // we not throw away any possible preprocessor tokens, but there's a
+    // loophole in the mapping of Unicode characters to basic character set
+    // characters that allows us to map these particular characters to, say,
+    // whitespace.
+    Diag(BufferPtr, diag::err_non_ascii)
+      << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr));
+
+    BufferPtr = CurPtr;
+    return LexTokenInternal(Result);
+  }
+
+  // Otherwise, we have an explicit UCN or a character that's unlikely to show
+  // up by accident.
+  MIOpt.ReadToken();
+  FormTokenWithChars(Result, CurPtr, tok::unknown);
+}
+
 
 /// LexTokenInternal - This implements a simple C family lexer.  It is an
 /// extremely performance critical piece of code.  This assumes that the buffer
@@ -2576,6 +2733,7 @@ LexNextToken:
     // whitespace.
     if (isKeepWhitespaceMode()) {
       FormTokenWithChars(Result, CurPtr, tok::unknown);
+      // FIXME: The next token will not have LeadingSpace set.
       return;
     }
 
@@ -2643,7 +2801,7 @@ LexNextToken:
 
       // Restore comment saving mode, in case it was disabled for directive.
       if (PP)
-        SetCommentRetentionState(PP->getCommentRetentionState());
+        resetExtendedTokenMode();
 
       // Since we consumed a newline, we are back at the start of a line.
       IsAtStartOfLine = true;
@@ -2651,8 +2809,7 @@ LexNextToken:
       Kind = tok::eod;
       break;
     }
-    // The returned token is at the start of the line.
-    Result.setFlag(Token::StartOfLine);
+
     // No leading whitespace seen so far.
     Result.clearFlag(Token::LeadingSpace);
 
@@ -2695,11 +2852,11 @@ LexNextToken:
     MIOpt.ReadToken();
     return LexNumericConstant(Result, CurPtr);
 
-  case 'u':   // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal
+  case 'u':   // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
-    if (LangOpts.CPlusPlus0x) {
+    if (LangOpts.CPlusPlus11 || LangOpts.C11) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       // UTF-16 string literal
@@ -2713,7 +2870,8 @@ LexNextToken:
                                tok::utf16_char_constant);
 
       // UTF-16 raw string literal
-      if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+      if (Char == 'R' && LangOpts.CPlusPlus11 &&
+          getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
         return LexRawStringLiteral(Result,
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                            SizeTmp2, Result),
@@ -2729,7 +2887,7 @@ LexNextToken:
                                            SizeTmp2, Result),
                                tok::utf8_string_literal);
 
-        if (Char2 == 'R') {
+        if (Char2 == 'R' && LangOpts.CPlusPlus11) {
           unsigned SizeTmp3;
           char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
           // UTF-8 raw string literal
@@ -2747,11 +2905,11 @@ LexNextToken:
     // treat u like the start of an identifier.
     return LexIdentifier(Result, CurPtr);
 
-  case 'U':   // Identifier (Uber) or C++0x UTF-32 string literal
+  case 'U':   // Identifier (Uber) or C11/C++11 UTF-32 string literal
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
-    if (LangOpts.CPlusPlus0x) {
+    if (LangOpts.CPlusPlus11 || LangOpts.C11) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       // UTF-32 string literal
@@ -2765,7 +2923,8 @@ LexNextToken:
                                tok::utf32_char_constant);
 
       // UTF-32 raw string literal
-      if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+      if (Char == 'R' && LangOpts.CPlusPlus11 &&
+          getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
         return LexRawStringLiteral(Result,
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                            SizeTmp2, Result),
@@ -2779,7 +2938,7 @@ LexNextToken:
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
-    if (LangOpts.CPlusPlus0x) {
+    if (LangOpts.CPlusPlus11) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       if (Char == '"')
@@ -2802,7 +2961,7 @@ LexNextToken:
                               tok::wide_string_literal);
 
     // Wide raw string literal.
-    if (LangOpts.CPlusPlus0x && Char == 'R' &&
+    if (LangOpts.CPlusPlus11 && Char == 'R' &&
         getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
       return LexRawStringLiteral(Result,
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -2968,10 +3127,13 @@ LexNextToken:
       // this as "foo / bar" and langauges with Line comments would lex it as
       // "foo".  Check to see if the character after the second slash is a '*'.
       // If so, we will lex that as a "/" instead of the start of a comment.
-      // However, we never do this in -traditional-cpp mode.
-      if ((LangOpts.LineComment ||
-           getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') &&
-          !LangOpts.TraditionalCPP) {
+      // However, we never do this if we are just preprocessing.
+      bool TreatAsComment = LangOpts.LineComment && !LangOpts.TraditionalCPP;
+      if (!TreatAsComment)
+        if (!(PP && PP->isPreprocessedOutput()))
+          TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
+
+      if (TreatAsComment) {
         if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
           return; // There is a token to return.
 
@@ -3020,26 +3182,8 @@ LexNextToken:
         // it's actually the start of a preprocessing directive.  Callback to
         // the preprocessor to handle it.
         // FIXME: -fpreprocessed mode??
-        if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
-          FormTokenWithChars(Result, CurPtr, tok::hash);
-          PP->HandleDirective(Result);
-
-          // As an optimization, if the preprocessor didn't switch lexers, tail
-          // recurse.
-          if (PP->isCurrentLexer(this)) {
-            // Start a new token. If this is a #include or something, the PP may
-            // want us starting at the beginning of the line again.  If so, set
-            // the StartOfLine flag and clear LeadingSpace.
-            if (IsAtStartOfLine) {
-              Result.setFlag(Token::StartOfLine);
-              Result.clearFlag(Token::LeadingSpace);
-              IsAtStartOfLine = false;
-            }
-            goto LexNextToken;   // GCC isn't tail call eliminating.
-          }
-
-          return PP->Lex(Result);
-        }
+        if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+          goto HandleDirective;
 
         Kind = tok::hash;
       }
@@ -3077,7 +3221,7 @@ LexNextToken:
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::lessequal;
     } else if (LangOpts.Digraphs && Char == ':') {     // '<:' -> '['
-      if (LangOpts.CPlusPlus0x &&
+      if (LangOpts.CPlusPlus11 &&
           getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
         // C++0x [lex.pptoken]p3:
         //  Otherwise, if the next three characters are <:: and the subsequent
@@ -3204,25 +3348,8 @@ LexNextToken:
       // it's actually the start of a preprocessing directive.  Callback to
       // the preprocessor to handle it.
       // FIXME: -fpreprocessed mode??
-      if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
-        FormTokenWithChars(Result, CurPtr, tok::hash);
-        PP->HandleDirective(Result);
-
-        // As an optimization, if the preprocessor didn't switch lexers, tail
-        // recurse.
-        if (PP->isCurrentLexer(this)) {
-          // Start a new token.  If this is a #include or something, the PP may
-          // want us starting at the beginning of the line again.  If so, set
-          // the StartOfLine flag and clear LeadingSpace.
-          if (IsAtStartOfLine) {
-            Result.setFlag(Token::StartOfLine);
-            Result.clearFlag(Token::LeadingSpace);
-            IsAtStartOfLine = false;
-          }
-          goto LexNextToken;   // GCC isn't tail call eliminating.
-        }
-        return PP->Lex(Result);
-      }
+      if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+        goto HandleDirective;
 
       Kind = tok::hash;
     }
@@ -3236,12 +3363,48 @@ LexNextToken:
       Kind = tok::unknown;
     break;
 
+  // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-    // FIXME: UCN's.
-    // FALL THROUGH.
-  default:
+    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result))
+      return LexUnicode(Result, CodePoint, CurPtr);
+
     Kind = tok::unknown;
     break;
+
+  default: {
+    if (isASCII(Char)) {
+      Kind = tok::unknown;
+      break;
+    }
+
+    UTF32 CodePoint;
+
+    // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
+    // an escaped newline.
+    --CurPtr;
+    ConversionResult Status =
+        llvm::convertUTF8Sequence((const UTF8 **)&CurPtr,
+                                  (const UTF8 *)BufferEnd,
+                                  &CodePoint,
+                                  strictConversion);
+    if (Status == conversionOK)
+      return LexUnicode(Result, CodePoint, CurPtr);
+    
+    if (isLexingRawMode() || ParsingPreprocessorDirective ||
+        PP->isPreprocessedOutput()) {
+      ++CurPtr;
+      Kind = tok::unknown;
+      break;
+    }
+
+    // Non-ASCII characters tend to creep into source code unintentionally.
+    // Instead of letting the parser complain about the unknown token,
+    // just diagnose the invalid UTF-8, then drop the character.
+    Diag(CurPtr, diag::err_invalid_utf8);
+
+    BufferPtr = CurPtr+1;
+    goto LexNextToken;
+  }
   }
 
   // Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -3249,4 +3412,26 @@ LexNextToken:
 
   // Update the location of token as well as BufferPtr.
   FormTokenWithChars(Result, CurPtr, Kind);
+  return;
+
+HandleDirective:
+  // We parsed a # character and it's the start of a preprocessing directive.
+
+  FormTokenWithChars(Result, CurPtr, tok::hash);
+  PP->HandleDirective(Result);
+
+  // As an optimization, if the preprocessor didn't switch lexers, tail
+  // recurse.
+  if (PP->isCurrentLexer(this)) {
+    // Start a new token.  If this is a #include or something, the PP may
+    // want us starting at the beginning of the line again.  If so, set
+    // the StartOfLine flag and clear LeadingSpace.
+    if (IsAtStartOfLine) {
+      Result.setFlag(Token::StartOfLine);
+      Result.clearFlag(Token::LeadingSpace);
+      IsAtStartOfLine = false;
+    }
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+  }
+  return PP->Lex(Result);
 }
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index e30612e..91da822 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -13,22 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/TargetInfo.h"
-#include "clang/Basic/ConvertUTF.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
-using namespace clang;
 
-/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
-/// not valid.
-static int HexDigitValue(char C) {
-  if (C >= '0' && C <= '9') return C-'0';
-  if (C >= 'a' && C <= 'f') return C-'a'+10;
-  if (C >= 'A' && C <= 'F') return C-'A'+10;
-  return -1;
-}
+using namespace clang;
 
 static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
   switch (kind) {
@@ -136,10 +129,10 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
     break;
   case 'x': { // Hex escape.
     ResultChar = 0;
-    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+    if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
       if (Diags)
         Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
-             diag::err_hex_escape_no_digits);
+             diag::err_hex_escape_no_digits) << "x";
       HadError = 1;
       break;
     }
@@ -147,7 +140,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
     // Hex escapes are a maximal series of hex digits.
     bool Overflow = false;
     for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
-      int CharVal = HexDigitValue(ThisTokBuf[0]);
+      int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
       if (CharVal == -1) break;
       // About to shift out a digit?
       Overflow |= (ResultChar & 0xF0000000) ? true : false;
@@ -205,7 +198,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
     if (Diags == 0)
       break;
 
-    if (isgraph(ResultChar))
+    if (isPrintable(ResultChar))
       Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
            diag::ext_unknown_escape)
         << std::string(1, ResultChar);
@@ -232,16 +225,16 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
   // Skip the '\u' char's.
   ThisTokBuf += 2;
 
-  if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+  if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
     if (Diags)
       Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
-           diag::err_ucn_escape_no_digits);
+           diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
     return false;
   }
   UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
   unsigned short UcnLenSave = UcnLen;
   for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
-    int CharVal = HexDigitValue(ThisTokBuf[0]);
+    int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
     if (CharVal == -1) break;
     UcnVal <<= 4;
     UcnVal |= CharVal;
@@ -267,7 +260,7 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
   // characters inside character and string literals
   if (UcnVal < 0xa0 &&
       (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) {  // $, @, `
-    bool IsError = (!Features.CPlusPlus0x || !in_char_string_literal);
+    bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal);
     if (Diags) {
       char BasicSCSChar = UcnVal;
       if (UcnVal >= 0x20 && UcnVal < 0x7f)
@@ -286,7 +279,7 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
 
   if (!Features.CPlusPlus && !Features.C99 && Diags)
     Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
-         diag::warn_ucn_not_valid_in_c89);
+         diag::warn_ucn_not_valid_in_c89_literal);
 
   return true;
 }
@@ -467,8 +460,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   // and FP constants (specifically, the 'pp-number' regex), and assumes that
   // the byte at "*end" is both valid and not part of the regex.  Because of
   // this, it doesn't have to check for 'overscan' in various places.
-  assert(!isalnum(*ThisTokEnd) && *ThisTokEnd != '.' && *ThisTokEnd != '_' &&
-         "Lexer didn't maximally munch?");
+  assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
 
   s = DigitsBegin = ThisTokBegin;
   saw_exponent = false;
@@ -491,7 +483,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
     s = SkipDigits(s);
     if (s == ThisTokEnd) {
       // Done.
-    } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
+    } else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
               diag::err_invalid_decimal_digit) << StringRef(s, 1);
       hadError = true;
@@ -616,7 +608,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   }
 
   if (s != ThisTokEnd) {
-    if (PP.getLangOpts().CPlusPlus0x && s == SuffixBegin && *s == '_') {
+    if (PP.getLangOpts().CPlusPlus11 && s == SuffixBegin && *s == '_') {
       // We have a ud-suffix! By C++11 [lex.ext]p10, ud-suffixes not starting
       // with an '_' are ill-formed.
       saw_ud_suffix = true;
@@ -643,7 +635,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
   s++;
 
   // Handle a hex number like 0x1234.
-  if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
+  if ((*s == 'x' || *s == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
     s++;
     radix = 16;
     DigitsBegin = s;
@@ -702,7 +694,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     s = SkipBinaryDigits(s);
     if (s == ThisTokEnd) {
       // Done.
-    } else if (isxdigit(*s)) {
+    } else if (isHexDigit(*s)) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
               diag::err_invalid_binary_digit) << StringRef(s, 1);
       hadError = true;
@@ -722,7 +714,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
 
   // If we have some other non-octal digit that *is* a decimal digit, see if
   // this is part of a floating point number like 094.123 or 09e1.
-  if (isdigit(*s)) {
+  if (isDigit(*s)) {
     const char *EndDecimal = SkipDigits(s);
     if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
       s = EndDecimal;
@@ -732,7 +724,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
 
   // If we have a hex digit other than 'e' (which denotes a FP exponent) then
   // the code is using an incorrect base.
-  if (isxdigit(*s) && *s != 'e' && *s != 'E') {
+  if (isHexDigit(*s) && *s != 'e' && *s != 'E') {
     PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
             diag::err_invalid_octal_digit) << StringRef(s, 1);
     hadError = true;
@@ -792,7 +784,7 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
   if (alwaysFitsInto64Bits(radix, NumDigits)) {
     uint64_t N = 0;
     for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
-      N = N * radix + HexDigitValue(*Ptr);
+      N = N * radix + llvm::hexDigitValue(*Ptr);
 
     // This will truncate the value to Val's input width. Simply check
     // for overflow by comparing.
@@ -809,7 +801,7 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
 
   bool OverflowOccurred = false;
   while (Ptr < SuffixBegin) {
-    unsigned C = HexDigitValue(*Ptr++);
+    unsigned C = llvm::hexDigitValue(*Ptr++);
 
     // If this letter is out of bound for this radix, reject it.
     assert(C < radix && "NumericLiteralParser ctor should have rejected this");
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index ed8873d..f6e781a 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "MacroArgs.h"
+#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include <algorithm>
@@ -23,7 +23,7 @@ using namespace clang;
 
 /// MacroArgs ctor function - This destroys the vector passed in.
 MacroArgs *MacroArgs::create(const MacroInfo *MI,
-                             llvm::ArrayRef<Token> UnexpArgTokens,
+                             ArrayRef<Token> UnexpArgTokens,
                              bool VarargsElided, Preprocessor &PP) {
   assert(MI->isFunctionLike() &&
          "Can't have args for an object-like macro!");
@@ -215,15 +215,11 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
 
     // If this is a string or character constant, escape the token as specified
     // by 6.10.3.2p2.
-    if (Tok.is(tok::string_literal) ||       // "foo"
-        Tok.is(tok::wide_string_literal) ||  // L"foo"
-        Tok.is(tok::utf8_string_literal) ||  // u8"foo"
-        Tok.is(tok::utf16_string_literal) || // u"foo"
-        Tok.is(tok::utf32_string_literal) || // U"foo"
-        Tok.is(tok::char_constant) ||        // 'x'
-        Tok.is(tok::wide_char_constant) ||   // L'x'.
-        Tok.is(tok::utf16_char_constant) ||  // u'x'.
-        Tok.is(tok::utf32_char_constant)) {  // U'x'.
+    if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
+        Tok.is(tok::char_constant) ||          // 'x'
+        Tok.is(tok::wide_char_constant) ||     // L'x'.
+        Tok.is(tok::utf16_char_constant) ||    // u'x'.
+        Tok.is(tok::utf32_char_constant)) {    // U'x'.
       bool Invalid = false;
       std::string TokStr = PP.getSpelling(Tok, &Invalid);
       if (!Invalid) {
diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h
index cf86d71..1fd295e 100644
--- a/lib/Lex/MacroArgs.h
+++ b/lib/Lex/MacroArgs.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_CLANG_MACROARGS_H
 #define LLVM_CLANG_MACROARGS_H
 
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
-
 #include <vector>
 
 namespace clang {
@@ -60,7 +60,7 @@ public:
   /// MacroArgs ctor function - Create a new MacroArgs object with the specified
   /// macro and argument info.
   static MacroArgs *create(const MacroInfo *MI,
-                           llvm::ArrayRef<Token> UnexpArgTokens,
+                           ArrayRef<Token> UnexpArgTokens,
                            bool VarargsElided, Preprocessor &PP);
 
   /// destroy - Destroy and deallocate the memory for this object.
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 904f04e..b61ff71 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -17,7 +17,6 @@ using namespace clang;
 
 MacroInfo::MacroInfo(SourceLocation DefLoc)
   : Location(DefLoc),
-    PreviousDefinition(0),
     ArgumentList(0),
     NumArguments(0),
     IsDefinitionLengthCached(false),
@@ -25,53 +24,12 @@ MacroInfo::MacroInfo(SourceLocation DefLoc)
     IsC99Varargs(false),
     IsGNUVarargs(false),
     IsBuiltinMacro(false),
-    IsFromAST(false),
-    ChangedAfterLoad(false),
+    HasCommaPasting(false),
     IsDisabled(false),
     IsUsed(false),
     IsAllowRedefinitionsWithoutWarning(false),
     IsWarnIfUnused(false),
-    IsPublic(true),
-    IsHidden(false),
-    IsAmbiguous(false) {
-}
-
-MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator)
-  : Location(MI.Location),
-    EndLocation(MI.EndLocation),
-    UndefLocation(MI.UndefLocation),
-    PreviousDefinition(0),
-    ArgumentList(0),
-    NumArguments(0),
-    ReplacementTokens(MI.ReplacementTokens),
-    DefinitionLength(MI.DefinitionLength),
-    IsDefinitionLengthCached(MI.IsDefinitionLengthCached),
-    IsFunctionLike(MI.IsFunctionLike),
-    IsC99Varargs(MI.IsC99Varargs),
-    IsGNUVarargs(MI.IsGNUVarargs),
-    IsBuiltinMacro(MI.IsBuiltinMacro),
-    IsFromAST(MI.IsFromAST),
-    ChangedAfterLoad(MI.ChangedAfterLoad),
-    IsDisabled(MI.IsDisabled),
-    IsUsed(MI.IsUsed),
-    IsAllowRedefinitionsWithoutWarning(MI.IsAllowRedefinitionsWithoutWarning),
-    IsWarnIfUnused(MI.IsWarnIfUnused),
-    IsPublic(MI.IsPublic),
-    IsHidden(MI.IsHidden),
-    IsAmbiguous(MI.IsAmbiguous) {
-  setArgumentList(MI.ArgumentList, MI.NumArguments, PPAllocator);
-}
-
-const MacroInfo *MacroInfo::findDefinitionAtLoc(SourceLocation L,
-                                                SourceManager &SM) const {
-  assert(L.isValid() && "SourceLocation is invalid.");
-  for (const MacroInfo *MI = this; MI; MI = MI->PreviousDefinition) {
-    if (MI->Location.isInvalid() ||  // For macros defined on the command line.
-        SM.isBeforeInTranslationUnit(MI->Location, L))
-      return (MI->UndefLocation.isInvalid() ||
-              SM.isBeforeInTranslationUnit(L, MI->UndefLocation)) ? MI : NULL;
-  }
-  return NULL;
+    FromASTFile(false) {
 }
 
 unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const {
@@ -103,11 +61,17 @@ unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const {
   return DefinitionLength;
 }
 
-/// isIdenticalTo - Return true if the specified macro definition is equal to
-/// this macro in spelling, arguments, and whitespace.  This is used to emit
-/// duplicate definition warnings.  This implements the rules in C99 6.10.3.
+/// \brief Return true if the specified macro definition is equal to
+/// this macro in spelling, arguments, and whitespace.
 ///
-bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
+/// \param Syntactically if true, the macro definitions can be identical even
+/// if they use different identifiers for the function macro parameters.
+/// Otherwise the comparison is lexical and this implements the rules in
+/// C99 6.10.3.
+bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
+                              bool Syntactically) const {
+  bool Lexically = !Syntactically;
+
   // Check # tokens in replacement, number of args, and various flags all match.
   if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
       getNumArgs() != Other.getNumArgs() ||
@@ -116,10 +80,12 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
       isGNUVarargs() != Other.isGNUVarargs())
     return false;
 
-  // Check arguments.
-  for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
-       I != E; ++I, ++OI)
-    if (*I != *OI) return false;
+  if (Lexically) {
+    // Check arguments.
+    for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
+         I != E; ++I, ++OI)
+      if (*I != *OI) return false;
+  }
 
   // Check all the tokens.
   for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
@@ -137,7 +103,16 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
 
     // If this is an identifier, it is easy.
     if (A.getIdentifierInfo() || B.getIdentifierInfo()) {
-      if (A.getIdentifierInfo() != B.getIdentifierInfo())
+      if (A.getIdentifierInfo() == B.getIdentifierInfo())
+        continue;
+      if (Lexically)
+        return false;
+      // With syntactic equivalence the parameter names can be different as long
+      // as they are used in the same place.
+      int AArgNum = getArgumentNum(A.getIdentifierInfo());
+      if (AArgNum == -1)
+        return false;
+      if (AArgNum != Other.getArgumentNum(B.getIdentifierInfo()))
         return false;
       continue;
     }
@@ -149,3 +124,41 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
 
   return true;
 }
+
+MacroDirective::DefInfo MacroDirective::getDefinition(bool AllowHidden) {
+  MacroDirective *MD = this;
+  SourceLocation UndefLoc;
+  Optional<bool> isPublic;
+  for (; MD; MD = MD->getPrevious()) {
+    if (!AllowHidden && MD->isHidden())
+      continue;
+
+    if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
+      return DefInfo(DefMD, UndefLoc,
+                     !isPublic.hasValue() || isPublic.getValue());
+
+    if (UndefMacroDirective *UndefMD = dyn_cast<UndefMacroDirective>(MD)) {
+      UndefLoc = UndefMD->getLocation();
+      continue;
+    }
+
+    VisibilityMacroDirective *VisMD = cast<VisibilityMacroDirective>(MD);
+    if (!isPublic.hasValue())
+      isPublic = VisMD->isPublic();
+  }
+
+  return DefInfo();
+}
+
+const MacroDirective::DefInfo
+MacroDirective::findDirectiveAtLoc(SourceLocation L, SourceManager &SM) const {
+  assert(L.isValid() && "SourceLocation is invalid.");
+  for (DefInfo Def = getDefinition(); Def; Def = Def.getPreviousDefinition()) {
+    if (Def.getLocation().isInvalid() ||  // For macros defined on the command line.
+        SM.isBeforeInTranslationUnit(Def.getLocation(), L))
+      return (!Def.isUndefined() ||
+              SM.isBeforeInTranslationUnit(L, Def.getUndefLocation()))
+                  ? Def : DefInfo();
+  }
+  return DefInfo();
+}
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index 8a936fa..0c03201 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -12,68 +12,82 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Lex/ModuleMap.h"
-#include "clang/Lex/Lexer.h"
-#include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
 #include <stdlib.h>
+#if defined(LLVM_ON_UNIX)
+#include <limits.h>
+#endif
 using namespace clang;
 
 Module::ExportDecl 
 ModuleMap::resolveExport(Module *Mod, 
                          const Module::UnresolvedExportDecl &Unresolved,
-                         bool Complain) {
+                         bool Complain) const {
   // We may have just a wildcard.
   if (Unresolved.Id.empty()) {
     assert(Unresolved.Wildcard && "Invalid unresolved export");
     return Module::ExportDecl(0, true);
   }
   
+  // Resolve the module-id.
+  Module *Context = resolveModuleId(Unresolved.Id, Mod, Complain);
+  if (!Context)
+    return Module::ExportDecl();
+
+  return Module::ExportDecl(Context, Unresolved.Wildcard);
+}
+
+Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod,
+                                   bool Complain) const {
   // Find the starting module.
-  Module *Context = lookupModuleUnqualified(Unresolved.Id[0].first, Mod);
+  Module *Context = lookupModuleUnqualified(Id[0].first, Mod);
   if (!Context) {
     if (Complain)
-      Diags->Report(Unresolved.Id[0].second, 
-                    diag::err_mmap_missing_module_unqualified)
-        << Unresolved.Id[0].first << Mod->getFullModuleName();
-    
-    return Module::ExportDecl();
+      Diags->Report(Id[0].second, diag::err_mmap_missing_module_unqualified)
+      << Id[0].first << Mod->getFullModuleName();
+
+    return 0;
   }
 
   // Dig into the module path.
-  for (unsigned I = 1, N = Unresolved.Id.size(); I != N; ++I) {
-    Module *Sub = lookupModuleQualified(Unresolved.Id[I].first,
-                                        Context);
+  for (unsigned I = 1, N = Id.size(); I != N; ++I) {
+    Module *Sub = lookupModuleQualified(Id[I].first, Context);
     if (!Sub) {
       if (Complain)
-        Diags->Report(Unresolved.Id[I].second, 
-                      diag::err_mmap_missing_module_qualified)
-          << Unresolved.Id[I].first << Context->getFullModuleName()
-          << SourceRange(Unresolved.Id[0].second, Unresolved.Id[I-1].second);
-      
-      return Module::ExportDecl();      
+        Diags->Report(Id[I].second, diag::err_mmap_missing_module_qualified)
+        << Id[I].first << Context->getFullModuleName()
+        << SourceRange(Id[0].second, Id[I-1].second);
+
+      return 0;
     }
-    
+
     Context = Sub;
   }
-  
-  return Module::ExportDecl(Context, Unresolved.Wildcard);
+
+  return Context;
 }
 
 ModuleMap::ModuleMap(FileManager &FileMgr, const DiagnosticConsumer &DC,
-                     const LangOptions &LangOpts, const TargetInfo *Target)
-  : LangOpts(LangOpts), Target(Target), BuiltinIncludeDir(0)
+                     const LangOptions &LangOpts, const TargetInfo *Target,
+                     HeaderSearch &HeaderInfo)
+  : LangOpts(LangOpts), Target(Target), HeaderInfo(HeaderInfo),
+    BuiltinIncludeDir(0)
 {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs);
   Diags = IntrusiveRefCntPtr<DiagnosticsEngine>(
@@ -104,26 +118,15 @@ static StringRef sanitizeFilenameAsIdentifier(StringRef Name,
   if (Name.empty())
     return Name;
 
-  // Check whether the filename is already an identifier; this is the common
-  // case.
-  bool isIdentifier = true;
-  for (unsigned I = 0, N = Name.size(); I != N; ++I) {
-    if (isalpha(Name[I]) || Name[I] == '_' || (isdigit(Name[I]) && I > 0))
-      continue;
-
-    isIdentifier = false;
-    break;
-  }
-
-  if (!isIdentifier) {
+  if (!isValidIdentifier(Name)) {
     // If we don't already have something with the form of an identifier,
     // create a buffer with the sanitized name.
     Buffer.clear();
-    if (isdigit(Name[0]))
+    if (isDigit(Name[0]))
       Buffer.push_back('_');
     Buffer.reserve(Buffer.size() + Name.size());
     for (unsigned I = 0, N = Name.size(); I != N; ++I) {
-      if (isalnum(Name[I]) || isspace(Name[I]))
+      if (isIdentifierBody(Name[I]))
         Buffer.push_back(Name[I]);
       else
         Buffer.push_back('_');
@@ -157,8 +160,13 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) {
   }
   
   const DirectoryEntry *Dir = File->getDir();
-  llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs;
-  StringRef DirName = Dir->getName();
+  SmallVector<const DirectoryEntry *, 2> SkippedDirs;
+
+  // Note: as an egregious but useful hack we use the real path here, because
+  // frameworks moving from top-level frameworks to embedded frameworks tend
+  // to be symlinked from the top-level location to the embedded location,
+  // and we need to resolve lookups as if we had found the embedded location.
+  StringRef DirName = SourceMgr->getFileManager().getCanonicalName(Dir);
 
   // Keep walking up the directory hierarchy, looking for a directory with
   // an umbrella header.
@@ -204,7 +212,7 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) {
                            llvm::sys::path::stem(File->getName()), NameBuf);
         Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
                                     Explicit).first;
-        Result->TopHeaders.insert(File);
+        Result->addTopHeader(File);
         
         // If inferred submodules export everything they import, add a 
         // wildcard to the set of exports.
@@ -241,19 +249,19 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) {
   return 0;
 }
 
-bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) {
-  HeadersMap::iterator Known = Headers.find(Header);
+bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const {
+  HeadersMap::const_iterator Known = Headers.find(Header);
   if (Known != Headers.end())
     return !Known->second.isAvailable();
   
   const DirectoryEntry *Dir = Header->getDir();
-  llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs;
+  SmallVector<const DirectoryEntry *, 2> SkippedDirs;
   StringRef DirName = Dir->getName();
 
   // Keep walking up the directory hierarchy, looking for a directory with
   // an umbrella header.
   do {    
-    llvm::DenseMap<const DirectoryEntry *, Module *>::iterator KnownDir
+    llvm::DenseMap<const DirectoryEntry *, Module *>::const_iterator KnownDir
       = UmbrellaDirs.find(Dir);
     if (KnownDir != UmbrellaDirs.end()) {
       Module *Found = KnownDir->second;
@@ -307,15 +315,16 @@ bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) {
   return false;
 }
 
-Module *ModuleMap::findModule(StringRef Name) {
-  llvm::StringMap<Module *>::iterator Known = Modules.find(Name);
+Module *ModuleMap::findModule(StringRef Name) const {
+  llvm::StringMap<Module *>::const_iterator Known = Modules.find(Name);
   if (Known != Modules.end())
     return Known->getValue();
   
   return 0;
 }
 
-Module *ModuleMap::lookupModuleUnqualified(StringRef Name, Module *Context) {
+Module *ModuleMap::lookupModuleUnqualified(StringRef Name,
+                                           Module *Context) const {
   for(; Context; Context = Context->Parent) {
     if (Module *Sub = lookupModuleQualified(Name, Context))
       return Sub;
@@ -324,7 +333,7 @@ Module *ModuleMap::lookupModuleUnqualified(StringRef Name, Module *Context) {
   return findModule(Name);
 }
 
-Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) {
+Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
   if (!Context)
     return findModule(Name);
   
@@ -347,10 +356,10 @@ ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
 }
 
 bool ModuleMap::canInferFrameworkModule(const DirectoryEntry *ParentDir,
-                                        StringRef Name, bool &IsSystem) {
+                                        StringRef Name, bool &IsSystem) const {
   // Check whether we have already looked into the parent directory
   // for a module map.
-  llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::iterator
+  llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator
     inferred = InferredDirectories.find(ParentDir);
   if (inferred == InferredDirectories.end())
     return false;
@@ -370,6 +379,23 @@ bool ModuleMap::canInferFrameworkModule(const DirectoryEntry *ParentDir,
   return canInfer;
 }
 
+/// \brief For a framework module, infer the framework against which we
+/// should link.
+static void inferFrameworkLink(Module *Mod, const DirectoryEntry *FrameworkDir,
+                               FileManager &FileMgr) {
+  assert(Mod->IsFramework && "Can only infer linking for framework modules");
+  assert(!Mod->isSubFramework() &&
+         "Can only infer linking for top-level frameworks");
+
+  SmallString<128> LibName;
+  LibName += FrameworkDir->getName();
+  llvm::sys::path::append(LibName, Mod->Name);
+  if (FileMgr.getFile(LibName)) {
+    Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name,
+                                                     /*IsFramework=*/true));
+  }
+}
+
 Module *
 ModuleMap::inferFrameworkModule(StringRef ModuleName,
                                 const DirectoryEntry *FrameworkDir,
@@ -384,14 +410,23 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
   // If the framework has a parent path from which we're allowed to infer
   // a framework module, do so.
   if (!Parent) {
+    // Determine whether we're allowed to infer a module map.
+
+    // Note: as an egregious but useful hack we use the real path here, because
+    // we might be looking at an embedded framework that symlinks out to a
+    // top-level framework, and we need to infer as if we were naming the
+    // top-level framework.
+    StringRef FrameworkDirName
+      = SourceMgr->getFileManager().getCanonicalName(FrameworkDir);
+
     bool canInfer = false;
-    if (llvm::sys::path::has_parent_path(FrameworkDir->getName())) {
+    if (llvm::sys::path::has_parent_path(FrameworkDirName)) {
       // Figure out the parent path.
-      StringRef Parent = llvm::sys::path::parent_path(FrameworkDir->getName());
+      StringRef Parent = llvm::sys::path::parent_path(FrameworkDirName);
       if (const DirectoryEntry *ParentDir = FileMgr.getDirectory(Parent)) {
         // Check whether we have already looked into the parent directory
         // for a module map.
-        llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::iterator
+        llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator
           inferred = InferredDirectories.find(ParentDir);
         if (inferred == InferredDirectories.end()) {
           // We haven't looked here before. Load a module map, if there is
@@ -411,7 +446,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
         if (inferred->second.InferModules) {
           // We're allowed to infer for this directory, but make sure it's okay
           // to infer this particular module.
-          StringRef Name = llvm::sys::path::filename(FrameworkDir->getName());
+          StringRef Name = llvm::sys::path::stem(FrameworkDirName);
           canInfer = std::find(inferred->second.ExcludedModules.begin(),
                                inferred->second.ExcludedModules.end(),
                                Name) == inferred->second.ExcludedModules.end();
@@ -480,29 +515,23 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
       // check whether it is actually a subdirectory of the parent directory.
       // This will not be the case if the 'subframework' is actually a symlink
       // out to a top-level framework.
-#ifdef LLVM_ON_UNIX
-      char RealSubframeworkDirName[PATH_MAX];
-      if (realpath(Dir->path().c_str(), RealSubframeworkDirName)) {
-        StringRef SubframeworkDirName = RealSubframeworkDirName;
-
-        bool FoundParent = false;
-        do {
-          // Get the parent directory name.
-          SubframeworkDirName
-            = llvm::sys::path::parent_path(SubframeworkDirName);
-          if (SubframeworkDirName.empty())
-            break;
-
-          if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) {
-            FoundParent = true;
-            break;
-          }
-        } while (true);
+      StringRef SubframeworkDirName = FileMgr.getCanonicalName(SubframeworkDir);
+      bool FoundParent = false;
+      do {
+        // Get the parent directory name.
+        SubframeworkDirName
+          = llvm::sys::path::parent_path(SubframeworkDirName);
+        if (SubframeworkDirName.empty())
+          break;
+
+        if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) {
+          FoundParent = true;
+          break;
+        }
+      } while (true);
 
-        if (!FoundParent)
-          continue;
-      }
-#endif
+      if (!FoundParent)
+        continue;
 
       // FIXME: Do we want to warn about subframeworks without umbrella headers?
       SmallString<32> NameBuf;
@@ -512,6 +541,12 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
     }
   }
 
+  // If the module is a top-level framework, automatically link against the
+  // framework.
+  if (!Result->isSubFramework()) {
+    inferFrameworkLink(Result, FrameworkDir, FileMgr);
+  }
+
   return Result;
 }
 
@@ -528,15 +563,17 @@ void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir) {
 
 void ModuleMap::addHeader(Module *Mod, const FileEntry *Header,
                           bool Excluded) {
-  if (Excluded)
+  if (Excluded) {
     Mod->ExcludedHeaders.push_back(Header);
-  else
+  } else {
     Mod->Headers.push_back(Header);
+    HeaderInfo.MarkFileModuleHeader(Header);
+  }
   Headers[Header] = KnownHeader(Mod, Excluded);
 }
 
 const FileEntry *
-ModuleMap::getContainingModuleMapFile(Module *Module) {
+ModuleMap::getContainingModuleMapFile(Module *Module) const {
   if (Module->DefinitionLoc.isInvalid() || !SourceMgr)
     return 0;
 
@@ -573,6 +610,25 @@ bool ModuleMap::resolveExports(Module *Mod, bool Complain) {
   return HadError;
 }
 
+bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) {
+  bool HadError = false;
+  for (unsigned I = 0, N = Mod->UnresolvedConflicts.size(); I != N; ++I) {
+    Module *OtherMod = resolveModuleId(Mod->UnresolvedConflicts[I].Id,
+                                       Mod, Complain);
+    if (!OtherMod) {
+      HadError = true;
+      continue;
+    }
+
+    Module::Conflict Conflict;
+    Conflict.Other = OtherMod;
+    Conflict.Message = Mod->UnresolvedConflicts[I].Message;
+    Mod->Conflicts.push_back(Conflict);
+  }
+  Mod->UnresolvedConflicts.clear();
+  return HadError;
+}
+
 Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) {
   if (Loc.isInvalid())
     return 0;
@@ -613,6 +669,8 @@ namespace clang {
   struct MMToken {
     enum TokenKind {
       Comma,
+      ConfigMacros,
+      Conflict,
       EndOfFile,
       HeaderKeyword,
       Identifier,
@@ -620,6 +678,7 @@ namespace clang {
       ExplicitKeyword,
       ExportKeyword,
       FrameworkKeyword,
+      LinkKeyword,
       ModuleKeyword,
       Period,
       UmbrellaKeyword,
@@ -656,10 +715,13 @@ namespace clang {
 
   /// \brief The set of attributes that can be attached to a module.
   struct Attributes {
-    Attributes() : IsSystem() { }
+    Attributes() : IsSystem(), IsExhaustive() { }
 
     /// \brief Whether this is a system module.
     unsigned IsSystem : 1;
+
+    /// \brief Whether this is an exhaustive set of configuration macros.
+    unsigned IsExhaustive : 1;
   };
   
 
@@ -700,14 +762,16 @@ namespace clang {
     /// (or the end of the file).
     void skipUntil(MMToken::TokenKind K);
 
-    typedef llvm::SmallVector<std::pair<std::string, SourceLocation>, 2>
-      ModuleId;
+    typedef SmallVector<std::pair<std::string, SourceLocation>, 2> ModuleId;
     bool parseModuleId(ModuleId &Id);
     void parseModuleDecl();
     void parseRequiresDecl();
     void parseHeaderDecl(SourceLocation UmbrellaLoc, SourceLocation ExcludeLoc);
     void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc);
     void parseExportDecl();
+    void parseLinkDecl();
+    void parseConfigMacros();
+    void parseConflict();
     void parseInferredModuleDecl(bool Framework, bool Explicit);
     bool parseOptionalAttributes(Attributes &Attrs);
 
@@ -745,11 +809,14 @@ retry:
     Tok.StringData = LToken.getRawIdentifierData();
     Tok.StringLength = LToken.getLength();
     Tok.Kind = llvm::StringSwitch<MMToken::TokenKind>(Tok.getString())
-                 .Case("header", MMToken::HeaderKeyword)
+                 .Case("config_macros", MMToken::ConfigMacros)
+                 .Case("conflict", MMToken::Conflict)
                  .Case("exclude", MMToken::ExcludeKeyword)
                  .Case("explicit", MMToken::ExplicitKeyword)
                  .Case("export", MMToken::ExportKeyword)
                  .Case("framework", MMToken::FrameworkKeyword)
+                 .Case("header", MMToken::HeaderKeyword)
+                 .Case("link", MMToken::LinkKeyword)
                  .Case("module", MMToken::ModuleKeyword)
                  .Case("requires", MMToken::RequiresKeyword)
                  .Case("umbrella", MMToken::UmbrellaKeyword)
@@ -905,7 +972,9 @@ namespace {
     /// \brief An unknown attribute.
     AT_unknown,
     /// \brief The 'system' attribute.
-    AT_system
+    AT_system,
+    /// \brief The 'exhaustive' attribute.
+    AT_exhaustive
   };
 }
 
@@ -920,6 +989,7 @@ namespace {
 ///     header-declaration
 ///     submodule-declaration
 ///     export-declaration
+///     link-declaration
 ///
 ///   submodule-declaration:
 ///     module-declaration
@@ -1061,7 +1131,15 @@ void ModuleMapParser::parseModuleDecl() {
     case MMToken::RBrace:
       Done = true;
       break;
-        
+
+    case MMToken::ConfigMacros:
+      parseConfigMacros();
+      break;
+
+    case MMToken::Conflict:
+      parseConflict();
+      break;
+
     case MMToken::ExplicitKeyword:
     case MMToken::FrameworkKeyword:
     case MMToken::ModuleKeyword:
@@ -1099,7 +1177,11 @@ void ModuleMapParser::parseModuleDecl() {
     case MMToken::HeaderKeyword:
       parseHeaderDecl(SourceLocation(), SourceLocation());
       break;
-        
+
+    case MMToken::LinkKeyword:
+      parseLinkDecl();
+      break;
+
     default:
       Diags.Report(Tok.getLocation(), diag::err_mmap_expected_member);
       consumeToken();
@@ -1115,6 +1197,13 @@ void ModuleMapParser::parseModuleDecl() {
     HadError = true;
   }
 
+  // If the active module is a top-level framework, and there are no link
+  // libraries, automatically link against the framework.
+  if (ActiveModule->IsFramework && !ActiveModule->isSubFramework() &&
+      ActiveModule->LinkLibraries.empty()) {
+    inferFrameworkLink(ActiveModule, Directory, SourceMgr.getFileManager());
+  }
+
   // We're done parsing this module. Pop back to the previous module.
   ActiveModule = PreviousActiveModule;
 }
@@ -1159,9 +1248,9 @@ void ModuleMapParser::parseRequiresDecl() {
 /// \brief Append to \p Paths the set of paths needed to get to the 
 /// subframework in which the given module lives.
 static void appendSubframeworkPaths(Module *Mod,
-                                    llvm::SmallVectorImpl<char> &Path) {
+                                    SmallVectorImpl<char> &Path) {
   // Collect the framework names from the given module to the top-level module.
-  llvm::SmallVector<StringRef, 2> Paths;
+  SmallVector<StringRef, 2> Paths;
   for (; Mod; Mod = Mod->Parent) {
     if (Mod->IsFramework)
       Paths.push_back(Mod->Name);
@@ -1307,7 +1396,9 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc,
       if (BuiltinFile)
         Map.addHeader(ActiveModule, BuiltinFile, Exclude);
     }
-  } else {
+  } else if (!Exclude) {
+    // Ignore excluded header files. They're optional anyway.
+    
     Diags.Report(FileNameLoc, diag::err_mmap_header_not_found)
       << Umbrella << FileName;
     HadError = true;
@@ -1414,7 +1505,139 @@ void ModuleMapParser::parseExportDecl() {
   ActiveModule->UnresolvedExports.push_back(Unresolved);
 }
 
-/// \brief Parse an inferried module declaration (wildcard modules).
+/// \brief Parse a link declaration.
+///
+///   module-declaration:
+///     'link' 'framework'[opt] string-literal
+void ModuleMapParser::parseLinkDecl() {
+  assert(Tok.is(MMToken::LinkKeyword));
+  SourceLocation LinkLoc = consumeToken();
+
+  // Parse the optional 'framework' keyword.
+  bool IsFramework = false;
+  if (Tok.is(MMToken::FrameworkKeyword)) {
+    consumeToken();
+    IsFramework = true;
+  }
+
+  // Parse the library name
+  if (!Tok.is(MMToken::StringLiteral)) {
+    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_library_name)
+      << IsFramework << SourceRange(LinkLoc);
+    HadError = true;
+    return;
+  }
+
+  std::string LibraryName = Tok.getString();
+  consumeToken();
+  ActiveModule->LinkLibraries.push_back(Module::LinkLibrary(LibraryName,
+                                                            IsFramework));
+}
+
+/// \brief Parse a configuration macro declaration.
+///
+///   module-declaration:
+///     'config_macros' attributes[opt] config-macro-list?
+///
+///   config-macro-list:
+///     identifier (',' identifier)?
+void ModuleMapParser::parseConfigMacros() {
+  assert(Tok.is(MMToken::ConfigMacros));
+  SourceLocation ConfigMacrosLoc = consumeToken();
+
+  // Only top-level modules can have configuration macros.
+  if (ActiveModule->Parent) {
+    Diags.Report(ConfigMacrosLoc, diag::err_mmap_config_macro_submodule);
+  }
+
+  // Parse the optional attributes.
+  Attributes Attrs;
+  parseOptionalAttributes(Attrs);
+  if (Attrs.IsExhaustive && !ActiveModule->Parent) {
+    ActiveModule->ConfigMacrosExhaustive = true;
+  }
+
+  // If we don't have an identifier, we're done.
+  if (!Tok.is(MMToken::Identifier))
+    return;
+
+  // Consume the first identifier.
+  if (!ActiveModule->Parent) {
+    ActiveModule->ConfigMacros.push_back(Tok.getString().str());
+  }
+  consumeToken();
+
+  do {
+    // If there's a comma, consume it.
+    if (!Tok.is(MMToken::Comma))
+      break;
+    consumeToken();
+
+    // We expect to see a macro name here.
+    if (!Tok.is(MMToken::Identifier)) {
+      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_config_macro);
+      break;
+    }
+
+    // Consume the macro name.
+    if (!ActiveModule->Parent) {
+      ActiveModule->ConfigMacros.push_back(Tok.getString().str());
+    }
+    consumeToken();
+  } while (true);
+}
+
+/// \brief Format a module-id into a string.
+static std::string formatModuleId(const ModuleId &Id) {
+  std::string result;
+  {
+    llvm::raw_string_ostream OS(result);
+
+    for (unsigned I = 0, N = Id.size(); I != N; ++I) {
+      if (I)
+        OS << ".";
+      OS << Id[I].first;
+    }
+  }
+
+  return result;
+}
+
+/// \brief Parse a conflict declaration.
+///
+///   module-declaration:
+///     'conflict' module-id ',' string-literal
+void ModuleMapParser::parseConflict() {
+  assert(Tok.is(MMToken::Conflict));
+  SourceLocation ConflictLoc = consumeToken();
+  Module::UnresolvedConflict Conflict;
+
+  // Parse the module-id.
+  if (parseModuleId(Conflict.Id))
+    return;
+
+  // Parse the ','.
+  if (!Tok.is(MMToken::Comma)) {
+    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_comma)
+      << SourceRange(ConflictLoc);
+    return;
+  }
+  consumeToken();
+
+  // Parse the message.
+  if (!Tok.is(MMToken::StringLiteral)) {
+    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_message)
+      << formatModuleId(Conflict.Id);
+    return;
+  }
+  Conflict.Message = Tok.getString().str();
+  consumeToken();
+
+  // Add this unresolved conflict.
+  ActiveModule->UnresolvedConflicts.push_back(Conflict);
+}
+
+/// \brief Parse an inferred module declaration (wildcard modules).
 ///
 ///   module-declaration:
 ///     'explicit'[opt] 'framework'[opt] 'module' * attributes[opt]
@@ -1593,6 +1816,7 @@ bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) {
     // Decode the attribute name.
     AttributeKind Attribute
       = llvm::StringSwitch<AttributeKind>(Tok.getString())
+          .Case("exhaustive", AT_exhaustive)
           .Case("system", AT_system)
           .Default(AT_unknown);
     switch (Attribute) {
@@ -1604,6 +1828,10 @@ bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) {
     case AT_system:
       Attrs.IsSystem = true;
       break;
+
+    case AT_exhaustive:
+      Attrs.IsExhaustive = true;
+      break;
     }
     consumeToken();
 
@@ -1653,13 +1881,16 @@ bool ModuleMapParser::parseModuleMapFile() {
     case MMToken::FrameworkKeyword:
       parseModuleDecl();
       break;
-      
+
     case MMToken::Comma:
+    case MMToken::ConfigMacros:
+    case MMToken::Conflict:
     case MMToken::ExcludeKeyword:
     case MMToken::ExportKeyword:
     case MMToken::HeaderKeyword:
     case MMToken::Identifier:
     case MMToken::LBrace:
+    case MMToken::LinkKeyword:
     case MMToken::LSquare:
     case MMToken::Period:
     case MMToken::RBrace:
@@ -1677,11 +1908,16 @@ bool ModuleMapParser::parseModuleMapFile() {
 }
 
 bool ModuleMap::parseModuleMapFile(const FileEntry *File) {
+  llvm::DenseMap<const FileEntry *, bool>::iterator Known
+    = ParsedModuleMap.find(File);
+  if (Known != ParsedModuleMap.end())
+    return Known->second;
+
   assert(Target != 0 && "Missing target information");
   FileID ID = SourceMgr->createFileID(File, SourceLocation(), SrcMgr::C_User);
   const llvm::MemoryBuffer *Buffer = SourceMgr->getBuffer(ID);
   if (!Buffer)
-    return true;
+    return ParsedModuleMap[File] = true;
   
   // Parse this module map file.
   Lexer L(ID, SourceMgr->getBuffer(ID), *SourceMgr, MMapLangOpts);
@@ -1690,6 +1926,6 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File) {
                          BuiltinIncludeDir);
   bool Result = Parser.parseModuleMapFile();
   Diags->getClient()->EndSourceFile();
-  
+  ParsedModuleMap[File] = Result;
   return Result;
 }
diff --git a/lib/Lex/PPConditionalDirectiveRecord.cpp b/lib/Lex/PPConditionalDirectiveRecord.cpp
new file mode 100644
index 0000000..16ce3ef
--- /dev/null
+++ b/lib/Lex/PPConditionalDirectiveRecord.cpp
@@ -0,0 +1,120 @@
+//===--- PPConditionalDirectiveRecord.h - Preprocessing Directives-*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the PPConditionalDirectiveRecord class, which maintains
+//  a record of conditional directive regions.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Lex/PPConditionalDirectiveRecord.h"
+#include "llvm/Support/Capacity.h"
+
+using namespace clang;
+
+PPConditionalDirectiveRecord::PPConditionalDirectiveRecord(SourceManager &SM)
+  : SourceMgr(SM) {
+  CondDirectiveStack.push_back(SourceLocation());
+}
+
+bool PPConditionalDirectiveRecord::rangeIntersectsConditionalDirective(
+                                                      SourceRange Range) const {
+  if (Range.isInvalid())
+    return false;
+
+  CondDirectiveLocsTy::const_iterator
+    low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
+                           Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr));
+  if (low == CondDirectiveLocs.end())
+    return false;
+
+  if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc()))
+    return false;
+
+  CondDirectiveLocsTy::const_iterator
+    upp = std::upper_bound(low, CondDirectiveLocs.end(),
+                           Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr));
+  SourceLocation uppRegion;
+  if (upp != CondDirectiveLocs.end())
+    uppRegion = upp->getRegionLoc();
+
+  return low->getRegionLoc() != uppRegion;
+}
+
+SourceLocation PPConditionalDirectiveRecord::findConditionalDirectiveRegionLoc(
+                                                     SourceLocation Loc) const {
+  if (Loc.isInvalid())
+    return SourceLocation();
+  if (CondDirectiveLocs.empty())
+    return SourceLocation();
+
+  if (SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
+                                          Loc))
+    return CondDirectiveStack.back();
+
+  CondDirectiveLocsTy::const_iterator
+    low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
+                           Loc, CondDirectiveLoc::Comp(SourceMgr));
+  assert(low != CondDirectiveLocs.end());
+  return low->getRegionLoc();
+}
+
+void PPConditionalDirectiveRecord::addCondDirectiveLoc(
+                                                      CondDirectiveLoc DirLoc) {
+  // Ignore directives in system headers.
+  if (SourceMgr.isInSystemHeader(DirLoc.getLoc()))
+    return;
+
+  assert(CondDirectiveLocs.empty() ||
+         SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
+                                             DirLoc.getLoc()));
+  CondDirectiveLocs.push_back(DirLoc);
+}
+
+void PPConditionalDirectiveRecord::If(SourceLocation Loc,
+                                      SourceRange ConditionRange) {
+  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+  CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Ifdef(SourceLocation Loc,
+                                         const Token &MacroNameTok,
+                                         const MacroDirective *MD) {
+  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+  CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Ifndef(SourceLocation Loc,
+                                          const Token &MacroNameTok,
+                                          const MacroDirective *MD) {
+  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+  CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Elif(SourceLocation Loc,
+                                        SourceRange ConditionRange,
+                                        SourceLocation IfLoc) {
+  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+  CondDirectiveStack.back() = Loc;
+}
+
+void PPConditionalDirectiveRecord::Else(SourceLocation Loc,
+                                        SourceLocation IfLoc) {
+  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+  CondDirectiveStack.back() = Loc;
+}
+
+void PPConditionalDirectiveRecord::Endif(SourceLocation Loc,
+                                         SourceLocation IfLoc) {
+  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+  assert(!CondDirectiveStack.empty());
+  CondDirectiveStack.pop_back();
+}
+
+size_t PPConditionalDirectiveRecord::getTotalMemory() const {
+  return llvm::capacity_in_bytes(CondDirectiveLocs);
+}
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index b7c1846..07c1867 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -13,17 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/Pragma.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -56,12 +57,42 @@ MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
   return MI;
 }
 
-MacroInfo *Preprocessor::CloneMacroInfo(const MacroInfo &MacroToClone) {
-  MacroInfo *MI = AllocateMacroInfo();
-  new (MI) MacroInfo(MacroToClone, BP);
+MacroInfo *Preprocessor::AllocateDeserializedMacroInfo(SourceLocation L,
+                                                       unsigned SubModuleID) {
+  LLVM_STATIC_ASSERT(llvm::AlignOf<MacroInfo>::Alignment >= sizeof(SubModuleID),
+                     "alignment for MacroInfo is less than the ID");
+  MacroInfo *MI =
+      (MacroInfo*)BP.Allocate(sizeof(MacroInfo) + sizeof(SubModuleID),
+                              llvm::AlignOf<MacroInfo>::Alignment);
+  new (MI) MacroInfo(L);
+  MI->FromASTFile = true;
+  MI->setOwningModuleID(SubModuleID);
   return MI;
 }
 
+DefMacroDirective *
+Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, SourceLocation Loc,
+                                        bool isImported) {
+  DefMacroDirective *MD = BP.Allocate<DefMacroDirective>();
+  new (MD) DefMacroDirective(MI, Loc, isImported);
+  return MD;
+}
+
+UndefMacroDirective *
+Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
+  UndefMacroDirective *MD = BP.Allocate<UndefMacroDirective>();
+  new (MD) UndefMacroDirective(UndefLoc);
+  return MD;
+}
+
+VisibilityMacroDirective *
+Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
+                                               bool isPublic) {
+  VisibilityMacroDirective *MD = BP.Allocate<VisibilityMacroDirective>();
+  new (MD) VisibilityMacroDirective(Loc, isPublic);
+  return MD;
+}
+
 /// \brief Release the specified MacroInfo to be reused for allocating
 /// new MacroInfo objects.
 void Preprocessor::ReleaseMacroInfo(MacroInfo *MI) {
@@ -140,15 +171,14 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
       Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
     // Fall through on error.
   } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) {
-    // Error if defining "defined": C99 6.10.8.4.
+    // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
     Diag(MacroNameTok, diag::err_defined_macro_name);
-  } else if (isDefineUndef && II->hasMacroDefinition() &&
+  } else if (isDefineUndef == 2 && II->hasMacroDefinition() &&
              getMacroInfo(II)->isBuiltinMacro()) {
-    // Error if defining "__LINE__" and other builtins: C99 6.10.8.4.
-    if (isDefineUndef == 1)
-      Diag(MacroNameTok, diag::pp_redef_builtin_macro);
-    else
-      Diag(MacroNameTok, diag::pp_undef_builtin_macro);
+    // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4
+    // and C++ [cpp.predefined]p4], but allow it as an extension.
+    Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
+    return;
   } else {
     // Okay, we got a good identifier node.  Return it.
     return;
@@ -255,7 +285,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
     // directive mode.  Tell the lexer this so any newlines we see will be
     // converted into an EOD token (this terminates the macro).
     CurPPLexer->ParsingPreprocessorDirective = true;
-    if (CurLexer) CurLexer->SetCommentRetentionState(false);
+    if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
 
 
     // Read the next token, the directive flavor.
@@ -266,7 +296,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
     if (Tok.isNot(tok::raw_identifier)) {
       CurPPLexer->ParsingPreprocessorDirective = false;
       // Restore comment saving mode.
-      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      if (CurLexer) CurLexer->resetExtendedTokenMode();
       continue;
     }
 
@@ -282,7 +312,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
         FirstChar != 'i' && FirstChar != 'e') {
       CurPPLexer->ParsingPreprocessorDirective = false;
       // Restore comment saving mode.
-      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      if (CurLexer) CurLexer->resetExtendedTokenMode();
       continue;
     }
 
@@ -299,7 +329,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
       if (IdLen >= 20) {
         CurPPLexer->ParsingPreprocessorDirective = false;
         // Restore comment saving mode.
-        if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+        if (CurLexer) CurLexer->resetExtendedTokenMode();
         continue;
       }
       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
@@ -405,7 +435,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
 
     CurPPLexer->ParsingPreprocessorDirective = false;
     // Restore comment saving mode.
-    if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+    if (CurLexer) CurLexer->resetExtendedTokenMode();
   }
 
   // Finally, if we are out of the conditional (saw an #endif or ran off the end
@@ -536,11 +566,11 @@ const FileEntry *Preprocessor::LookupFile(
   // Otherwise, see if this is a subframework header.  If so, this is relative
   // to one of the headers on the #include stack.  Walk the list of the current
   // headers on the #include stack and pass them to HeaderInfo.
-  // FIXME: SuggestedModule!
   if (IsFileLexer()) {
     if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID())))
       if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt,
-                                                    SearchPath, RelativePath)))
+                                                    SearchPath, RelativePath,
+                                                    SuggestedModule)))
         return FE;
   }
 
@@ -550,7 +580,8 @@ const FileEntry *Preprocessor::LookupFile(
       if ((CurFileEnt =
            SourceMgr.getFileEntryForID(ISEntry.ThePPLexer->getFileID())))
         if ((FE = HeaderInfo.LookupSubframeworkHeader(
-                Filename, CurFileEnt, SearchPath, RelativePath)))
+                Filename, CurFileEnt, SearchPath, RelativePath,
+                SuggestedModule)))
           return FE;
     }
   }
@@ -590,6 +621,7 @@ void Preprocessor::HandleDirective(Token &Result) {
   // mode.  Tell the lexer this so any newlines we see will be converted into an
   // EOD token (which terminates the directive).
   CurPPLexer->ParsingPreprocessorDirective = true;
+  if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
 
   ++NumDirectives;
 
@@ -634,14 +666,9 @@ void Preprocessor::HandleDirective(Token &Result) {
   // and reset to previous state when returning from this function.
   ResetMacroExpansionHelper helper(this);
 
-TryAgain:
   switch (Result.getKind()) {
   case tok::eod:
     return;   // null directive.
-  case tok::comment:
-    // Handle stuff like "# /*foo*/ define X" in -E -C mode.
-    LexUnexpandedToken(Result);
-    goto TryAgain;
   case tok::code_completion:
     if (CodeComplete)
       CodeComplete->CodeCompleteDirective(
@@ -788,7 +815,7 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val,
   // here.
   Val = 0;
   for (unsigned i = 0; i != ActualLength; ++i) {
-    if (!isdigit(DigitTokBegin[i])) {
+    if (!isDigit(DigitTokBegin[i])) {
       PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
               diag::err_pp_line_digit_sequence);
       PP.DiscardUntilEndOfDirective();
@@ -834,11 +861,11 @@ void Preprocessor::HandleLineDirective(Token &Tok) {
   // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
   // number greater than 2147483647".  C90 requires that the line # be <= 32767.
   unsigned LineLimit = 32768U;
-  if (LangOpts.C99 || LangOpts.CPlusPlus0x)
+  if (LangOpts.C99 || LangOpts.CPlusPlus11)
     LineLimit = 2147483648U;
   if (LineNo >= LineLimit)
     Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
-  else if (LangOpts.CPlusPlus0x && LineNo >= 32768U)
+  else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
     Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
 
   int FilenameID = -1;
@@ -1107,23 +1134,19 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
   // Check to see if this is the last token on the #__public_macro line.
   CheckEndOfDirective("__public_macro");
 
+  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
   // Okay, we finally have a valid identifier to undef.
-  MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+  MacroDirective *MD = getMacroDirective(II);
   
   // If the macro is not defined, this is an error.
-  if (MI == 0) {
-    Diag(MacroNameTok, diag::err_pp_visibility_non_macro)
-      << MacroNameTok.getIdentifierInfo();
+  if (MD == 0) {
+    Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
     return;
   }
   
   // Note that this macro has now been exported.
-  MI->setVisibility(/*IsPublic=*/true, MacroNameTok.getLocation());
-  
-  // If this macro definition came from a PCH file, mark it
-  // as having changed since serialization.
-  if (MI->isFromAST())
-    MI->setChangedAfterLoad();
+  appendMacroDirective(II, AllocateVisibilityMacroDirective(
+                                MacroNameTok.getLocation(), /*IsPublic=*/true));
 }
 
 /// \brief Handle a #private directive.
@@ -1138,23 +1161,19 @@ void Preprocessor::HandleMacroPrivateDirective(Token &Tok) {
   // Check to see if this is the last token on the #__private_macro line.
   CheckEndOfDirective("__private_macro");
   
+  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
   // Okay, we finally have a valid identifier to undef.
-  MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+  MacroDirective *MD = getMacroDirective(II);
   
   // If the macro is not defined, this is an error.
-  if (MI == 0) {
-    Diag(MacroNameTok, diag::err_pp_visibility_non_macro)
-      << MacroNameTok.getIdentifierInfo();
+  if (MD == 0) {
+    Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
     return;
   }
   
   // Note that this macro has now been marked private.
-  MI->setVisibility(/*IsPublic=*/false, MacroNameTok.getLocation());
-  
-  // If this macro definition came from a PCH file, mark it
-  // as having changed since serialization.
-  if (MI->isFromAST())
-    MI->setChangedAfterLoad();
+  appendMacroDirective(II, AllocateVisibilityMacroDirective(
+                               MacroNameTok.getLocation(), /*IsPublic=*/false));
 }
 
 //===----------------------------------------------------------------------===//
@@ -1375,7 +1394,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       if (Callbacks->FileNotFound(Filename, RecoveryPath)) {
         if (const DirectoryEntry *DE = FileMgr.getDirectory(RecoveryPath)) {
           // Add the recovery path to the list of search paths.
-          DirectoryLookup DL(DE, SrcMgr::C_User, true, false);
+          DirectoryLookup DL(DE, SrcMgr::C_User, false);
           HeaderInfo.AddSearchPath(DL, isAngled);
           
           // Try the lookup again, skipping the cache.
@@ -1426,7 +1445,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     // Compute the module access path corresponding to this module.
     // FIXME: Should we have a second loadModule() overload to avoid this
     // extra lookup step?
-    llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
+    SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
     for (Module *Mod = SuggestedModule; Mod; Mod = Mod->Parent)
       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
                                     FilenameTok.getLocation()));
@@ -1476,14 +1495,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       Diag(HashLoc, diag::warn_auto_module_import)
         << IncludeKind << PathString 
         << FixItHint::CreateReplacement(ReplaceRange,
-             "@__experimental_modules_import " + PathString.str().str() + ";");
+             "@import " + PathString.str().str() + ";");
     }
     
     // Load the module.
     // If this was an #__include_macros directive, only make macros visible.
     Module::NameVisibilityKind Visibility 
       = (IncludeKind == 3)? Module::MacrosVisible : Module::AllVisible;
-    Module *Imported
+    ModuleLoadResult Imported
       = TheModuleLoader.loadModule(IncludeTok.getLocation(), Path, Visibility,
                                    /*IsIncludeDirective=*/true);
     assert((Imported == 0 || Imported == SuggestedModule) &&
@@ -1498,6 +1517,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       }
       return;
     }
+
+    // If we failed to find a submodule that we expected to find, we can
+    // continue. Otherwise, there's an error in the included file, so we
+    // don't want to include it.
+    if (!BuildingImportedModule && !Imported.isMissingExpected()) {
+      return;
+    }
   }
 
   if (Callbacks && SuggestedModule) {
@@ -1637,10 +1663,16 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
       return true;
     case tok::ellipsis:  // #define X(... -> C99 varargs
       if (!LangOpts.C99)
-        Diag(Tok, LangOpts.CPlusPlus0x ? 
+        Diag(Tok, LangOpts.CPlusPlus11 ? 
              diag::warn_cxx98_compat_variadic_macro :
              diag::ext_variadic_macro);
 
+      // OpenCL v1.2 s6.9.e: variadic macros are not supported.
+      if (LangOpts.OpenCL) {
+        Diag(Tok, diag::err_pp_opencl_variadic_macros);
+        return true;
+      }
+
       // Lex the token after the identifier.
       LexUnexpandedToken(Tok);
       if (Tok.isNot(tok::r_paren)) {
@@ -1763,7 +1795,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
 
     // Read the first token after the arg list for down below.
     LexUnexpandedToken(Tok);
-  } else if (LangOpts.C99 || LangOpts.CPlusPlus0x) {
+  } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
     // C99 requires whitespace between the macro definition and the body.  Emit
     // a diagnostic for something like "#define X+".
     Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
@@ -1809,8 +1841,37 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
     while (Tok.isNot(tok::eod)) {
       LastTok = Tok;
 
-      if (Tok.isNot(tok::hash)) {
+      if (Tok.isNot(tok::hash) && Tok.isNot(tok::hashhash)) {
+        MI->AddTokenToBody(Tok);
+
+        // Get the next token of the macro.
+        LexUnexpandedToken(Tok);
+        continue;
+      }
+
+      if (Tok.is(tok::hashhash)) {
+        
+        // If we see token pasting, check if it looks like the gcc comma
+        // pasting extension.  We'll use this information to suppress
+        // diagnostics later on.
+        
+        // Get the next token of the macro.
+        LexUnexpandedToken(Tok);
+
+        if (Tok.is(tok::eod)) {
+          MI->AddTokenToBody(LastTok);
+          break;
+        }
+
+        unsigned NumTokens = MI->getNumTokens();
+        if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
+            MI->getReplacementToken(NumTokens-1).is(tok::comma))
+          MI->setHasCommaPasting();
+
+        // Things look ok, add the '##' and param name tokens to the macro.
+        MI->AddTokenToBody(LastTok);
         MI->AddTokenToBody(Tok);
+        LastTok = Tok;
 
         // Get the next token of the macro.
         LexUnexpandedToken(Tok);
@@ -1874,7 +1935,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
 
   // Finally, if this identifier already had a macro defined for it, verify that
   // the macro bodies are identical, and issue diagnostics if they are not.
-  if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+  if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
     // It is very common for system headers to have tons of macro redefinitions
     // and for warnings to be disabled in system headers.  If this is the case,
     // then don't bother calling MacroInfo::isIdenticalTo.
@@ -1883,10 +1944,14 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
 
+      // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and 
+      // C++ [cpp.predefined]p4, but allow it as an extension.
+      if (OtherMI->isBuiltinMacro())
+        Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
       // Macros must be identical.  This means all tokens and whitespace
-      // separation must be the same.  C99 6.10.3.2.
-      if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
-          !MI->isIdenticalTo(*OtherMI, *this)) {
+      // separation must be the same.  C99 6.10.3p2.
+      else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
+               !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
         Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
           << MacroNameTok.getIdentifierInfo();
         Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
@@ -1896,7 +1961,8 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
   }
 
-  setMacroInfo(MacroNameTok.getIdentifierInfo(), MI);
+  DefMacroDirective *MD =
+      appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
 
   assert(!MI->isUsed());
   // If we need warning for not using the macro, add its location in the
@@ -1910,7 +1976,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
 
   // If the callbacks want to know, tell them about the macro definition.
   if (Callbacks)
-    Callbacks->MacroDefined(MacroNameTok, MI);
+    Callbacks->MacroDefined(MacroNameTok, MD);
 }
 
 /// HandleUndefDirective - Implements \#undef.
@@ -1929,7 +1995,13 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) {
   CheckEndOfDirective("undef");
 
   // Okay, we finally have a valid identifier to undef.
-  MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+  MacroDirective *MD = getMacroDirective(MacroNameTok.getIdentifierInfo());
+  const MacroInfo *MI = MD ? MD->getMacroInfo() : 0;
+
+  // If the callbacks want to know, tell them about the macro #undef.
+  // Note: no matter if the macro was defined or not.
+  if (Callbacks)
+    Callbacks->MacroUndefined(MacroNameTok, MD);
 
   // If the macro is not defined, this is a noop undef, just return.
   if (MI == 0) return;
@@ -1937,27 +2009,11 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) {
   if (!MI->isUsed() && MI->isWarnIfUnused())
     Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
 
-  // If the callbacks want to know, tell them about the macro #undef.
-  if (Callbacks)
-    Callbacks->MacroUndefined(MacroNameTok, MI);
-
   if (MI->isWarnIfUnused())
     WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
 
-  UndefineMacro(MacroNameTok.getIdentifierInfo(), MI,
-                MacroNameTok.getLocation());
-}
-
-void Preprocessor::UndefineMacro(IdentifierInfo *II, MacroInfo *MI,
-                                 SourceLocation UndefLoc) {
-  MI->setUndefLoc(UndefLoc);
-  if (MI->isFromAST()) {
-    MI->setChangedAfterLoad();
-    if (Listener)
-      Listener->UndefinedMacro(MI);
-  }
-
-  clearMacroInfo(II);
+  appendMacroDirective(MacroNameTok.getIdentifierInfo(),
+                       AllocateUndefMacroDirective(MacroNameTok.getLocation()));
 }
 
 
@@ -1991,7 +2047,8 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
 
   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
-  MacroInfo *MI = getMacroInfo(MII);
+  MacroDirective *MD = getMacroDirective(MII);
+  MacroInfo *MI = MD ? MD->getMacroInfo() : 0;
 
   if (CurPPLexer->getConditionalStackDepth() == 0) {
     // If the start of a top-level #ifdef and if the macro is not defined,
@@ -2011,9 +2068,9 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
 
   if (Callbacks) {
     if (isIfndef)
-      Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok);
+      Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
     else
-      Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok);
+      Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
   }
 
   // Should we include the stuff contained by this directive?
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index d5a88db..d9ce8bf 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -17,13 +17,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroInfo.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
 using namespace clang;
 
 namespace {
@@ -111,15 +112,21 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   Result.Val = II->hasMacroDefinition();
   Result.Val.setIsUnsigned(false);  // Result is signed intmax_t.
 
+  MacroDirective *Macro = 0;
   // If there is a macro, mark it used.
   if (Result.Val != 0 && ValueLive) {
-    MacroInfo *Macro = PP.getMacroInfo(II);
-    PP.markMacroAsUsed(Macro);
+    Macro = PP.getMacroDirective(II);
+    PP.markMacroAsUsed(Macro->getMacroInfo());
   }
 
   // Invoke the 'defined' callback.
-  if (PPCallbacks *Callbacks = PP.getPPCallbacks())
-    Callbacks->Defined(PeekTok);
+  if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
+    MacroDirective *MD = Macro;
+    // Pass the MacroInfo for the macro name even if the value is dead.
+    if (!MD && Result.Val != 0)
+      MD = PP.getMacroDirective(II);
+    Callbacks->Defined(PeekTok, MD);
+  }
 
   // If we are in parens, ensure we have a trailing ).
   if (LParenLoc.isValid()) {
@@ -224,7 +231,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     if (!PP.getLangOpts().C99 && Literal.isLongLong) {
       if (PP.getLangOpts().CPlusPlus)
         PP.Diag(PeekTok,
-             PP.getLangOpts().CPlusPlus0x ?
+             PP.getLangOpts().CPlusPlus11 ?
              diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
       else
         PP.Diag(PeekTok, diag::ext_c99_longlong);
@@ -258,9 +265,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     return false;
   }
   case tok::char_constant:          // 'x'
-  case tok::wide_char_constant: {   // L'x'
+  case tok::wide_char_constant:     // L'x'
   case tok::utf16_char_constant:    // u'x'
-  case tok::utf32_char_constant:    // U'x'
+  case tok::utf32_char_constant: {  // U'x'
     // Complain about, and drop, any ud-suffix.
     if (PeekTok.hasUDSuffix())
       PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0;
@@ -724,6 +731,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
 /// to "!defined(X)" return X in IfNDefMacro.
 bool Preprocessor::
 EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+  SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true);
   // Save the current state of 'DisableMacroExpansion' and reset it to false. If
   // 'DisableMacroExpansion' is true, then we must be in a macro argument list
   // in which case a directive is undefined behavior.  We want macros to be able
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index d827f58..be4defe 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PathV2.h"
-#include "llvm/ADT/StringSwitch.h"
 using namespace clang;
 
 PPCallbacks::~PPCallbacks() {}
@@ -158,15 +158,17 @@ void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,
 /// tokens from it instead of the current buffer.
 void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
                               MacroInfo *Macro, MacroArgs *Args) {
-  PushIncludeMacroStack();
-  CurDirLookup = 0;
-
+  TokenLexer *TokLexer;
   if (NumCachedTokenLexers == 0) {
-    CurTokenLexer.reset(new TokenLexer(Tok, ILEnd, Macro, Args, *this));
+    TokLexer = new TokenLexer(Tok, ILEnd, Macro, Args, *this);
   } else {
-    CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
-    CurTokenLexer->Init(Tok, ILEnd, Macro, Args);
+    TokLexer = TokenLexerCache[--NumCachedTokenLexers];
+    TokLexer->Init(Tok, ILEnd, Macro, Args);
   }
+
+  PushIncludeMacroStack();
+  CurDirLookup = 0;
+  CurTokenLexer.reset(TokLexer);
   if (CurLexerKind != CLK_LexAfterModuleImport)
     CurLexerKind = CLK_TokenLexer;
 }
@@ -186,18 +188,20 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
 void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
                                     bool DisableMacroExpansion,
                                     bool OwnsTokens) {
-  // Save our current state.
-  PushIncludeMacroStack();
-  CurDirLookup = 0;
-
   // Create a macro expander to expand from the specified token stream.
+  TokenLexer *TokLexer;
   if (NumCachedTokenLexers == 0) {
-    CurTokenLexer.reset(new TokenLexer(Toks, NumToks, DisableMacroExpansion,
-                                       OwnsTokens, *this));
+    TokLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion,
+                              OwnsTokens, *this);
   } else {
-    CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
-    CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
+    TokLexer = TokenLexerCache[--NumCachedTokenLexers];
+    TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
   }
+
+  // Save our current state.
+  PushIncludeMacroStack();
+  CurDirLookup = 0;
+  CurTokenLexer.reset(TokLexer);
   if (CurLexerKind != CLK_LexAfterModuleImport)
     CurLexerKind = CLK_TokenLexer;
 }
@@ -328,6 +332,17 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
     CurLexer->BufferPtr = EndPos;
     CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
 
+    if (isCodeCompletionEnabled()) {
+      // Inserting the code-completion point increases the source buffer by 1,
+      // but the main FileID was created before inserting the point.
+      // Compensate by reducing the EOF location by 1, otherwise the location
+      // will point to the next FileID.
+      // FIXME: This is hacky, the code-completion point should probably be
+      // inserted before the main FileID is created.
+      if (CurLexer->getFileLoc() == CodeCompletionFileLoc)
+        Result.setLocation(Result.getLocation().getLocWithOffset(-1));
+    }
+
     if (!isIncrementalProcessingEnabled())
       // We're done with lexing.
       CurLexer.reset();
@@ -380,7 +395,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
                 SmallString<128> RelativePath;
                 computeRelativePath(FileMgr, Dir, Header, RelativePath);              
                 Diag(StartLoc, diag::warn_uncovered_module_header)
-                  << RelativePath;
+                  << Mod->getFullModuleName() << RelativePath;
               }
             }
         }
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index eee4342..21451f5 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -14,25 +14,26 @@
 
 #include "clang/Lex/Preprocessor.h"
 #include "MacroArgs.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
-#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
-#include "clang/Lex/LiteralSupport.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 #include <ctime>
 using namespace clang;
 
-MacroInfo *Preprocessor::getMacroInfoHistory(IdentifierInfo *II) const {
+MacroDirective *
+Preprocessor::getMacroDirectiveHistory(const IdentifierInfo *II) const {
   assert(II->hadMacroDefinition() && "Identifier has not been not a macro!");
 
   macro_iterator Pos = Macros.find(II);
@@ -40,125 +41,31 @@ MacroInfo *Preprocessor::getMacroInfoHistory(IdentifierInfo *II) const {
   return Pos->second;
 }
 
-/// setMacroInfo - Specify a macro for this identifier.
-///
-void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
-  assert(MI && "MacroInfo should be non-zero!");
-  assert(MI->getUndefLoc().isInvalid() &&
-         "Undefined macros cannot be registered");
-
-  MacroInfo *&StoredMI = Macros[II];
-  MI->setPreviousDefinition(StoredMI);
-  StoredMI = MI;
-  II->setHasMacroDefinition(MI->getUndefLoc().isInvalid());
-  if (II->isFromAST())
-    II->setChangedSinceDeserialization();
-}
-
-void Preprocessor::addLoadedMacroInfo(IdentifierInfo *II, MacroInfo *MI,
-                                      MacroInfo *Hint) {
-  assert(MI && "Missing macro?");
-  assert(MI->isFromAST() && "Macro is not from an AST?");
-  assert(!MI->getPreviousDefinition() && "Macro already in chain?");
-  
-  MacroInfo *&StoredMI = Macros[II];
-
-  // Easy case: this is the first macro definition for this macro.
-  if (!StoredMI) {
-    StoredMI = MI;
-
-    if (MI->isDefined())
-      II->setHasMacroDefinition(true);
-    return;
-  }
-
-  // If this macro is a definition and this identifier has been neither
-  // defined nor undef'd in the current translation unit, add this macro
-  // to the end of the chain of definitions.
-  if (MI->isDefined() && StoredMI->isFromAST()) {
-    // Simple case: if this is the first actual definition, just put it at
-    // th beginning.
-    if (!StoredMI->isDefined()) {
-      MI->setPreviousDefinition(StoredMI);
-      StoredMI = MI;
-
-      II->setHasMacroDefinition(true);
-      return;
-    }
-
-    // Find the end of the definition chain.
-    MacroInfo *Prev;
-    MacroInfo *PrevPrev = StoredMI;
-    bool Ambiguous = StoredMI->isAmbiguous();
-    bool MatchedOther = false;
-    do {
-      Prev = PrevPrev;
-
-      // If the macros are not identical, we have an ambiguity.
-      if (!Prev->isIdenticalTo(*MI, *this)) {
-        if (!Ambiguous) {
-          Ambiguous = true;
-          StoredMI->setAmbiguous(true);
-        }
-      } else {
-        MatchedOther = true;
-      }
-    } while ((PrevPrev = Prev->getPreviousDefinition()) &&
-             PrevPrev->isDefined());
-
-    // If there are ambiguous definitions, and we didn't match any other
-    // definition, then mark us as ambiguous.
-    if (Ambiguous && !MatchedOther)
-      MI->setAmbiguous(true);
-
-    // Wire this macro information into the chain.
-    MI->setPreviousDefinition(Prev->getPreviousDefinition());
-    Prev->setPreviousDefinition(MI);
-    return;
-  }
-
-  // The macro is not a definition; put it at the end of the list.
-  MacroInfo *Prev = Hint? Hint : StoredMI;
-  while (Prev->getPreviousDefinition())
-    Prev = Prev->getPreviousDefinition();
-  Prev->setPreviousDefinition(MI);
-}
-
-void Preprocessor::makeLoadedMacroInfoVisible(IdentifierInfo *II,
-                                              MacroInfo *MI) {
-  assert(MI->isFromAST() && "Macro must be from the AST");
-
-  MacroInfo *&StoredMI = Macros[II];
-  if (StoredMI == MI) {
-    // Easy case: this is the first macro anyway.
-    II->setHasMacroDefinition(MI->isDefined());
-    return;
-  }
-
-  // Go find the macro and pull it out of the list.
-  // FIXME: Yes, this is O(N), and making a pile of macros visible or hidden
-  // would be quadratic, but it's extremely rare.
-  MacroInfo *Prev = StoredMI;
-  while (Prev->getPreviousDefinition() != MI)
-    Prev = Prev->getPreviousDefinition();
-  Prev->setPreviousDefinition(MI->getPreviousDefinition());
-  MI->setPreviousDefinition(0);
-
-  // Add the macro back to the list.
-  addLoadedMacroInfo(II, MI);
-
-  II->setHasMacroDefinition(StoredMI->isDefined());
-  if (II->isFromAST())
+void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){
+  assert(MD && "MacroDirective should be non-zero!");
+  assert(!MD->getPrevious() && "Already attached to a MacroDirective history.");
+
+  MacroDirective *&StoredMD = Macros[II];
+  MD->setPrevious(StoredMD);
+  StoredMD = MD;
+  II->setHasMacroDefinition(MD->isDefined());
+  bool isImportedMacro = isa<DefMacroDirective>(MD) &&
+                         cast<DefMacroDirective>(MD)->isImported();
+  if (II->isFromAST() && !isImportedMacro)
     II->setChangedSinceDeserialization();
 }
 
-/// \brief Undefine a macro for this identifier.
-void Preprocessor::clearMacroInfo(IdentifierInfo *II) {
-  assert(II->hasMacroDefinition() && "Macro is not defined!");
-  assert(Macros[II]->getUndefLoc().isValid() && "Macro is still defined!");
-  II->setHasMacroDefinition(false);
-  if (II->isFromAST())
-    II->setChangedSinceDeserialization();
+void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II,
+                                           MacroDirective *MD) {
+  assert(II && MD);
+  MacroDirective *&StoredMD = Macros[II];
+  assert(!StoredMD &&
+         "the macro history was modified before initializing it from a pch");
+  StoredMD = MD;
+  // Setup the identifier as having associated macro history.
+  II->setHasMacroDefinition(true);
+  if (!MD->isDefined())
+    II->setHasMacroDefinition(false);
 }
 
 /// RegisterBuiltinMacro - Register the specified identifier in the identifier
@@ -170,7 +77,7 @@ static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){
   // Mark it as being a macro that is builtin.
   MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation());
   MI->setIsBuiltinMacro();
-  PP.setMacroInfo(Id, MI);
+  PP.appendDefMacroDirective(Id, MI);
   return Id;
 }
 
@@ -303,7 +210,11 @@ bool Preprocessor::isNextPPTokenLParen() {
 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
 /// expanded as a macro, handle it and return the next token as 'Identifier'.
 bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
-                                                 MacroInfo *MI) {
+                                                 MacroDirective *MD) {
+  MacroDirective::DefInfo Def = MD->getDefinition();
+  assert(Def.isValid());
+  MacroInfo *MI = Def.getMacroInfo();
+
   // If this is a macro expansion in the "#if !defined(x)" line for the file,
   // then the macro could expand to different things in other contexts, we need
   // to disable the optimization in this case.
@@ -311,7 +222,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
 
   // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
   if (MI->isBuiltinMacro()) {
-    if (Callbacks) Callbacks->MacroExpands(Identifier, MI,
+    if (Callbacks) Callbacks->MacroExpands(Identifier, MD,
                                            Identifier.getLocation());
     ExpandBuiltinMacro(Identifier);
     return false;
@@ -364,13 +275,13 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
       // MacroExpands callbacks still happen in source order, queue this
       // callback to have it happen after the function macro callback.
       DelayedMacroExpandsCallbacks.push_back(
-                              MacroExpandsInfo(Identifier, MI, ExpansionRange));
+                              MacroExpandsInfo(Identifier, MD, ExpansionRange));
     } else {
-      Callbacks->MacroExpands(Identifier, MI, ExpansionRange);
+      Callbacks->MacroExpands(Identifier, MD, ExpansionRange);
       if (!DelayedMacroExpandsCallbacks.empty()) {
         for (unsigned i=0, e = DelayedMacroExpandsCallbacks.size(); i!=e; ++i) {
           MacroExpandsInfo &Info = DelayedMacroExpandsCallbacks[i];
-          Callbacks->MacroExpands(Info.Tok, Info.MI, Info.Range);
+          Callbacks->MacroExpands(Info.Tok, Info.MD, Info.Range);
         }
         DelayedMacroExpandsCallbacks.clear();
       }
@@ -378,16 +289,17 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
   }
 
   // If the macro definition is ambiguous, complain.
-  if (MI->isAmbiguous()) {
+  if (Def.getDirective()->isAmbiguous()) {
     Diag(Identifier, diag::warn_pp_ambiguous_macro)
       << Identifier.getIdentifierInfo();
     Diag(MI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_chosen)
       << Identifier.getIdentifierInfo();
-    for (MacroInfo *PrevMI = MI->getPreviousDefinition();
-         PrevMI && PrevMI->isDefined();
-         PrevMI = PrevMI->getPreviousDefinition()) {
-      if (PrevMI->isAmbiguous()) {
-        Diag(PrevMI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_other)
+    for (MacroDirective::DefInfo PrevDef = Def.getPreviousDefinition();
+         PrevDef && !PrevDef.isUndefined();
+         PrevDef = PrevDef.getPreviousDefinition()) {
+      if (PrevDef.getDirective()->isAmbiguous()) {
+        Diag(PrevDef.getMacroInfo()->getDefinitionLoc(),
+             diag::note_pp_ambiguous_macro_other)
           << Identifier.getIdentifierInfo();
       }
     }
@@ -455,7 +367,10 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
       if (MacroInfo *NewMI = getMacroInfo(NewII))
         if (!NewMI->isEnabled() || NewMI == MI) {
           Identifier.setFlag(Token::DisableExpand);
-          Diag(Identifier, diag::pp_disabled_macro_expansion);
+          // Don't warn for "#define X X" like "#define bool bool" from
+          // stdbool.h.
+          if (NewMI != MI || MI->isFunctionLike())
+            Diag(Identifier, diag::pp_disabled_macro_expansion);
         }
     }
 
@@ -497,9 +412,13 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
   // argument is separated by an EOF token.  Use a SmallVector so we can avoid
   // heap allocations in the common case.
   SmallVector<Token, 64> ArgTokens;
+  bool ContainsCodeCompletionTok = false;
 
   unsigned NumActuals = 0;
   while (Tok.isNot(tok::r_paren)) {
+    if (ContainsCodeCompletionTok && (Tok.is(tok::eof) || Tok.is(tok::eod)))
+      break;
+
     assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) &&
            "only expect argument separators here");
 
@@ -516,10 +435,20 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
       LexUnexpandedToken(Tok);
 
       if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n"
-        Diag(MacroName, diag::err_unterm_macro_invoc);
-        // Do not lose the EOF/EOD.  Return it to the client.
-        MacroName = Tok;
-        return 0;
+        if (!ContainsCodeCompletionTok) {
+          Diag(MacroName, diag::err_unterm_macro_invoc);
+          Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+            << MacroName.getIdentifierInfo();
+          // Do not lose the EOF/EOD.  Return it to the client.
+          MacroName = Tok;
+          return 0;
+        } else {
+          // Do not lose the EOF/EOD.
+          Token *Toks = new Token[1];
+          Toks[0] = Tok;
+          EnterTokenStream(Toks, 1, true, true);
+          break;
+        }
       } else if (Tok.is(tok::r_paren)) {
         // If we found the ) token, the macro arg list is done.
         if (NumParens-- == 0) {
@@ -550,6 +479,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
           if (!MI->isEnabled())
             Tok.setFlag(Token::DisableExpand);
       } else if (Tok.is(tok::code_completion)) {
+        ContainsCodeCompletionTok = true;
         if (CodeComplete)
           CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),
                                                   MI, NumActuals);
@@ -572,16 +502,20 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
       if (ArgTokens.size() != ArgTokenStart)
         ArgStartLoc = ArgTokens[ArgTokenStart].getLocation();
 
-      // Emit the diagnostic at the macro name in case there is a missing ).
-      // Emitting it at the , could be far away from the macro name.
-      Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
-      return 0;
+      if (!ContainsCodeCompletionTok) {
+        // Emit the diagnostic at the macro name in case there is a missing ).
+        // Emitting it at the , could be far away from the macro name.
+        Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
+        Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+          << MacroName.getIdentifierInfo();
+        return 0;
+      }
     }
 
     // Empty arguments are standard in C99 and C++0x, and are supported as an extension in
     // other modes.
     if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99)
-      Diag(Tok, LangOpts.CPlusPlus0x ?
+      Diag(Tok, LangOpts.CPlusPlus11 ?
            diag::warn_cxx98_compat_empty_fnmacro_arg :
            diag::ext_empty_fnmacro_arg);
 
@@ -593,8 +527,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
     EOFTok.setLength(0);
     ArgTokens.push_back(EOFTok);
     ++NumActuals;
-    assert(NumFixedArgsLeft != 0 && "Too many arguments parsed");
-    --NumFixedArgsLeft;
+    if (!ContainsCodeCompletionTok || NumFixedArgsLeft != 0) {
+      assert(NumFixedArgsLeft != 0 && "Too many arguments parsed");
+      --NumFixedArgsLeft;
+    }
   }
 
   // Okay, we either found the r_paren.  Check to see if we parsed too few
@@ -604,6 +540,17 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
   // See MacroArgs instance var for description of this.
   bool isVarargsElided = false;
 
+  if (ContainsCodeCompletionTok) {
+    // Recover from not-fully-formed macro invocation during code-completion.
+    Token EOFTok;
+    EOFTok.startToken();
+    EOFTok.setKind(tok::eof);
+    EOFTok.setLocation(Tok.getLocation());
+    EOFTok.setLength(0);
+    for (; NumActuals < MinArgsExpected; ++NumActuals)
+      ArgTokens.push_back(EOFTok);
+  }
+
   if (NumActuals < MinArgsExpected) {
     // There are several cases where too few arguments is ok, handle them now.
     if (NumActuals == 0 && MinArgsExpected == 1) {
@@ -619,9 +566,14 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
       // Varargs where the named vararg parameter is missing: OK as extension.
       //   #define A(x, ...)
       //   A("blah")
-      Diag(Tok, diag::ext_missing_varargs_arg);
-      Diag(MI->getDefinitionLoc(), diag::note_macro_here)
-        << MacroName.getIdentifierInfo();
+      //
+      // If the macro contains the comma pasting extension, the diagnostic
+      // is suppressed; we know we'll get another diagnostic later.
+      if (!MI->hasCommaPasting()) {
+        Diag(Tok, diag::ext_missing_varargs_arg);
+        Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+          << MacroName.getIdentifierInfo();
+      }
 
       // Remember this occurred, allowing us to elide the comma when used for
       // cases like:
@@ -630,9 +582,11 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
       //   #define C(...) blah(a, ## __VA_ARGS__)
       //  A(x) B(x) C()
       isVarargsElided = true;
-    } else {
+    } else if (!ContainsCodeCompletionTok) {
       // Otherwise, emit the error.
       Diag(Tok, diag::err_too_few_args_in_macro_invoc);
+      Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+        << MacroName.getIdentifierInfo();
       return 0;
     }
 
@@ -648,10 +602,13 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
     if (NumActuals == 0 && MinArgsExpected == 2)
       ArgTokens.push_back(Tok);
 
-  } else if (NumActuals > MinArgsExpected && !MI->isVariadic()) {
+  } else if (NumActuals > MinArgsExpected && !MI->isVariadic() &&
+             !ContainsCodeCompletionTok) {
     // Emit the diagnostic at the macro name in case there is a missing ).
     // Emitting it at the , could be far away from the macro name.
     Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
+    Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+      << MacroName.getIdentifierInfo();
     return 0;
   }
 
@@ -745,7 +702,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
     Feature = Feature.substr(2, Feature.size() - 4);
 
   return llvm::StringSwitch<bool>(Feature)
-           .Case("address_sanitizer", LangOpts.SanitizeAddress)
+           .Case("address_sanitizer", LangOpts.Sanitize.Address)
            .Case("attribute_analyzer_noreturn", true)
            .Case("attribute_availability", true)
            .Case("attribute_availability_with_message", true)
@@ -767,6 +724,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
            .Case("cxx_exceptions", LangOpts.Exceptions)
            .Case("cxx_rtti", LangOpts.RTTI)
            .Case("enumerator_attributes", true)
+           .Case("memory_sanitizer", LangOpts.Sanitize.Memory)
+           .Case("thread_sanitizer", LangOpts.Sanitize.Thread)
            // Objective-C features
            .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
            .Case("objc_arc", LangOpts.ObjCAutoRefCount)
@@ -776,6 +735,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
            .Case("objc_instancetype", LangOpts.ObjC2)
            .Case("objc_modules", LangOpts.ObjC2 && LangOpts.Modules)
            .Case("objc_nonfragile_abi", LangOpts.ObjCRuntime.isNonFragile())
+           .Case("objc_property_explicit_atomic", true) // Does clang support explicit "atomic" keyword?
            .Case("objc_weak_class", LangOpts.ObjCRuntime.hasWeakClassImport())
            .Case("ownership_holds", true)
            .Case("ownership_returns", true)
@@ -792,41 +752,41 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
            .Case("c_generic_selections", LangOpts.C11)
            .Case("c_static_assert", LangOpts.C11)
            // C++11 features
-           .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus0x)
-           .Case("cxx_alias_templates", LangOpts.CPlusPlus0x)
-           .Case("cxx_alignas", LangOpts.CPlusPlus0x)
-           .Case("cxx_atomic", LangOpts.CPlusPlus0x)
-           .Case("cxx_attributes", LangOpts.CPlusPlus0x)
-           .Case("cxx_auto_type", LangOpts.CPlusPlus0x)
-           .Case("cxx_constexpr", LangOpts.CPlusPlus0x)
-           .Case("cxx_decltype", LangOpts.CPlusPlus0x)
-           .Case("cxx_decltype_incomplete_return_types", LangOpts.CPlusPlus0x)
-           .Case("cxx_default_function_template_args", LangOpts.CPlusPlus0x)
-           .Case("cxx_defaulted_functions", LangOpts.CPlusPlus0x)
-           .Case("cxx_delegating_constructors", LangOpts.CPlusPlus0x)
-           .Case("cxx_deleted_functions", LangOpts.CPlusPlus0x)
-           .Case("cxx_explicit_conversions", LangOpts.CPlusPlus0x)
-           .Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x)
-           .Case("cxx_implicit_moves", LangOpts.CPlusPlus0x)
+           .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus11)
+           .Case("cxx_alias_templates", LangOpts.CPlusPlus11)
+           .Case("cxx_alignas", LangOpts.CPlusPlus11)
+           .Case("cxx_atomic", LangOpts.CPlusPlus11)
+           .Case("cxx_attributes", LangOpts.CPlusPlus11)
+           .Case("cxx_auto_type", LangOpts.CPlusPlus11)
+           .Case("cxx_constexpr", LangOpts.CPlusPlus11)
+           .Case("cxx_decltype", LangOpts.CPlusPlus11)
+           .Case("cxx_decltype_incomplete_return_types", LangOpts.CPlusPlus11)
+           .Case("cxx_default_function_template_args", LangOpts.CPlusPlus11)
+           .Case("cxx_defaulted_functions", LangOpts.CPlusPlus11)
+           .Case("cxx_delegating_constructors", LangOpts.CPlusPlus11)
+           .Case("cxx_deleted_functions", LangOpts.CPlusPlus11)
+           .Case("cxx_explicit_conversions", LangOpts.CPlusPlus11)
+           .Case("cxx_generalized_initializers", LangOpts.CPlusPlus11)
+           .Case("cxx_implicit_moves", LangOpts.CPlusPlus11)
          //.Case("cxx_inheriting_constructors", false)
-           .Case("cxx_inline_namespaces", LangOpts.CPlusPlus0x)
-           .Case("cxx_lambdas", LangOpts.CPlusPlus0x)
-           .Case("cxx_local_type_template_args", LangOpts.CPlusPlus0x)
-           .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus0x)
-           .Case("cxx_noexcept", LangOpts.CPlusPlus0x)
-           .Case("cxx_nullptr", LangOpts.CPlusPlus0x)
-           .Case("cxx_override_control", LangOpts.CPlusPlus0x)
-           .Case("cxx_range_for", LangOpts.CPlusPlus0x)
-           .Case("cxx_raw_string_literals", LangOpts.CPlusPlus0x)
-           .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus0x)
-           .Case("cxx_rvalue_references", LangOpts.CPlusPlus0x)
-           .Case("cxx_strong_enums", LangOpts.CPlusPlus0x)
-           .Case("cxx_static_assert", LangOpts.CPlusPlus0x)
-           .Case("cxx_trailing_return", LangOpts.CPlusPlus0x)
-           .Case("cxx_unicode_literals", LangOpts.CPlusPlus0x)
-           .Case("cxx_unrestricted_unions", LangOpts.CPlusPlus0x)
-           .Case("cxx_user_literals", LangOpts.CPlusPlus0x)
-           .Case("cxx_variadic_templates", LangOpts.CPlusPlus0x)
+           .Case("cxx_inline_namespaces", LangOpts.CPlusPlus11)
+           .Case("cxx_lambdas", LangOpts.CPlusPlus11)
+           .Case("cxx_local_type_template_args", LangOpts.CPlusPlus11)
+           .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus11)
+           .Case("cxx_noexcept", LangOpts.CPlusPlus11)
+           .Case("cxx_nullptr", LangOpts.CPlusPlus11)
+           .Case("cxx_override_control", LangOpts.CPlusPlus11)
+           .Case("cxx_range_for", LangOpts.CPlusPlus11)
+           .Case("cxx_raw_string_literals", LangOpts.CPlusPlus11)
+           .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus11)
+           .Case("cxx_rvalue_references", LangOpts.CPlusPlus11)
+           .Case("cxx_strong_enums", LangOpts.CPlusPlus11)
+           .Case("cxx_static_assert", LangOpts.CPlusPlus11)
+           .Case("cxx_trailing_return", LangOpts.CPlusPlus11)
+           .Case("cxx_unicode_literals", LangOpts.CPlusPlus11)
+           .Case("cxx_unrestricted_unions", LangOpts.CPlusPlus11)
+           .Case("cxx_user_literals", LangOpts.CPlusPlus11)
+           .Case("cxx_variadic_templates", LangOpts.CPlusPlus11)
            // Type traits
            .Case("has_nothrow_assign", LangOpts.CPlusPlus)
            .Case("has_nothrow_copy", LangOpts.CPlusPlus)
@@ -840,10 +800,6 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
            .Case("is_base_of", LangOpts.CPlusPlus)
            .Case("is_class", LangOpts.CPlusPlus)
            .Case("is_convertible_to", LangOpts.CPlusPlus)
-            // __is_empty is available only if the horrible
-            // "struct __is_empty" parsing hack hasn't been needed in this
-            // translation unit. If it has, __is_empty reverts to a normal
-            // identifier and __has_feature(is_empty) evaluates false.
            .Case("is_empty", LangOpts.CPlusPlus)
            .Case("is_enum", LangOpts.CPlusPlus)
            .Case("is_final", LangOpts.CPlusPlus)
@@ -926,9 +882,15 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
                                      IdentifierInfo *II, Preprocessor &PP,
                                      const DirectoryLookup *LookupFrom) {
   // Save the location of the current token.  If a '(' is later found, use
-  // that location.  If no, use the end of this location instead.
+  // that location.  If not, use the end of this location instead.
   SourceLocation LParenLoc = Tok.getLocation();
 
+  // These expressions are only allowed within a preprocessor directive.
+  if (!PP.isParsingIfOrElifDirective()) {
+    PP.Diag(LParenLoc, diag::err_pp_directive_required) << II->getName();
+    return false;
+  }
+
   // Get '('.
   PP.LexNonComment(Tok);
 
@@ -946,8 +908,14 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
     // Save '(' location for possible missing ')' message.
     LParenLoc = Tok.getLocation();
 
-    // Get the file name.
-    PP.getCurrentLexer()->LexIncludeFilename(Tok);
+    if (PP.getCurrentLexer()) {
+      // Get the file name.
+      PP.getCurrentLexer()->LexIncludeFilename(Tok);
+    } else {
+      // We're in a macro, so we can't use LexIncludeFilename; just
+      // grab the next token.
+      PP.Lex(Tok);
+    }
   }
 
   // Reserve a buffer to get the spelling.
@@ -1223,15 +1191,15 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     IdentifierInfo *FeatureII = 0;
 
     // Read the '('.
-    Lex(Tok);
+    LexUnexpandedToken(Tok);
     if (Tok.is(tok::l_paren)) {
       // Read the identifier
-      Lex(Tok);
+      LexUnexpandedToken(Tok);
       if (Tok.is(tok::identifier) || Tok.is(tok::kw_const)) {
         FeatureII = Tok.getIdentifierInfo();
 
         // Read the ')'.
-        Lex(Tok);
+        LexUnexpandedToken(Tok);
         if (Tok.is(tok::r_paren))
           IsValid = true;
       }
@@ -1275,69 +1243,49 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     bool IsValid = false;
     bool Value = false;
     // Read the '('.
-    Lex(Tok);
+    LexUnexpandedToken(Tok);
     do {
-      if (Tok.is(tok::l_paren)) {      
-        // Read the string.
-        Lex(Tok);
-      
-        // We need at least one string literal.
-        if (!Tok.is(tok::string_literal)) {
-          StartLoc = Tok.getLocation();
-          IsValid = false;
-          // Eat tokens until ')'.
-          do Lex(Tok); while (!(Tok.is(tok::r_paren) || Tok.is(tok::eod)));
-          break;
-        }
-        
-        // String concatenation allows multiple strings, which can even come
-        // from macro expansion.
-        SmallVector<Token, 4> StrToks;
-        while (Tok.is(tok::string_literal)) {
-          // Complain about, and drop, any ud-suffix.
-          if (Tok.hasUDSuffix())
-            Diag(Tok, diag::err_invalid_string_udl);
-          StrToks.push_back(Tok);
+      if (Tok.isNot(tok::l_paren)) {
+        Diag(StartLoc, diag::err_warning_check_malformed);
+        break;
+      }
+
+      LexUnexpandedToken(Tok);
+      std::string WarningName;
+      SourceLocation StrStartLoc = Tok.getLocation();
+      if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
+                                  /*MacroExpansion=*/false)) {
+        // Eat tokens until ')'.
+        while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eod) &&
+               Tok.isNot(tok::eof))
           LexUnexpandedToken(Tok);
-        }
-        
-        // Is the end a ')'?
-        if (!(IsValid = Tok.is(tok::r_paren)))
-          break;
-        
-        // Concatenate and parse the strings.
-        StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
-        assert(Literal.isAscii() && "Didn't allow wide strings in");
-        if (Literal.hadError)
-          break;
-        if (Literal.Pascal) {
-          Diag(Tok, diag::warn_pragma_diagnostic_invalid);
-          break;
-        }
-        
-        StringRef WarningName(Literal.GetString());
-        
-        if (WarningName.size() < 3 || WarningName[0] != '-' ||
-            WarningName[1] != 'W') {
-          Diag(StrToks[0].getLocation(), diag::warn_has_warning_invalid_option);
-          break;
-        }
-        
-        // Finally, check if the warning flags maps to a diagnostic group.
-        // We construct a SmallVector here to talk to getDiagnosticIDs().
-        // Although we don't use the result, this isn't a hot path, and not
-        // worth special casing.
-        llvm::SmallVector<diag::kind, 10> Diags;
-        Value = !getDiagnostics().getDiagnosticIDs()->
-          getDiagnosticsInGroup(WarningName.substr(2), Diags);
+        break;
+      }
+
+      // Is the end a ')'?
+      if (!(IsValid = Tok.is(tok::r_paren))) {
+        Diag(StartLoc, diag::err_warning_check_malformed);
+        break;
+      }
+
+      if (WarningName.size() < 3 || WarningName[0] != '-' ||
+          WarningName[1] != 'W') {
+        Diag(StrStartLoc, diag::warn_has_warning_invalid_option);
+        break;
       }
+
+      // Finally, check if the warning flags maps to a diagnostic group.
+      // We construct a SmallVector here to talk to getDiagnosticIDs().
+      // Although we don't use the result, this isn't a hot path, and not
+      // worth special casing.
+      SmallVector<diag::kind, 10> Diags;
+      Value = !getDiagnostics().getDiagnosticIDs()->
+        getDiagnosticsInGroup(WarningName.substr(2), Diags);
     } while (false);
-    
-    if (!IsValid)
-      Diag(StartLoc, diag::err_warning_check_malformed);
 
     OS << (int)Value;
-    Tok.setKind(tok::numeric_constant);
+    if (IsValid)
+      Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__building_module) {
     // The argument to this builtin should be an identifier. The
     // builtin evaluates to 1 when that identifier names the module we are
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index b167172..e8f43f7 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -11,17 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/PTHLexer.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemStatCache.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OnDiskHashTable.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/PTHLexer.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PTHManager.h"
-#include "clang/Lex/Token.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
@@ -679,13 +678,13 @@ public:
   ~PTHStatCache() {}
 
   LookupResult getStat(const char *Path, struct stat &StatBuf,
-                       int *FileDescriptor) {
+                       bool isFile, int *FileDescriptor) {
     // Do the lookup for the file's data in the PTH file.
     CacheTy::iterator I = Cache.find(Path);
 
     // If we don't get a hit in the PTH file just forward to 'stat'.
     if (I == Cache.end())
-      return statChained(Path, StatBuf, FileDescriptor);
+      return statChained(Path, StatBuf, isFile, FileDescriptor);
 
     const PTHStatData &Data = *I;
 
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index e7e6c37..95e8a8c 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -13,13 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Pragma.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Preprocessor.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
@@ -184,7 +184,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
 
   // Read the '"..."'.
   Lex(Tok);
-  if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) {
+  if (!tok::isStringLiteral(Tok.getKind())) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
     // Skip this token, and the ')', if present.
     if (Tok.isNot(tok::r_paren))
@@ -219,15 +219,50 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
   SourceLocation RParenLoc = Tok.getLocation();
   std::string StrVal = getSpelling(StrTok);
 
-  // The _Pragma is lexically sound.  Destringize according to C99 6.10.9.1:
-  // "The string literal is destringized by deleting the L prefix, if present,
+  // The _Pragma is lexically sound.  Destringize according to C11 6.10.9.1:
+  // "The string literal is destringized by deleting any encoding prefix,
   // deleting the leading and trailing double-quotes, replacing each escape
   // sequence \" by a double-quote, and replacing each escape sequence \\ by a
   // single backslash."
-  if (StrVal[0] == 'L')  // Remove L prefix.
+  if (StrVal[0] == 'L' || StrVal[0] == 'U' ||
+      (StrVal[0] == 'u' && StrVal[1] != '8'))
     StrVal.erase(StrVal.begin());
-  assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
-         "Invalid string token!");
+  else if (StrVal[0] == 'u')
+    StrVal.erase(StrVal.begin(), StrVal.begin() + 2);
+
+  if (StrVal[0] == 'R') {
+    // FIXME: C++11 does not specify how to handle raw-string-literals here.
+    // We strip off the 'R', the quotes, the d-char-sequences, and the parens.
+    assert(StrVal[1] == '"' && StrVal[StrVal.size() - 1] == '"' &&
+           "Invalid raw string token!");
+
+    // Measure the length of the d-char-sequence.
+    unsigned NumDChars = 0;
+    while (StrVal[2 + NumDChars] != '(') {
+      assert(NumDChars < (StrVal.size() - 5) / 2 &&
+             "Invalid raw string token!");
+      ++NumDChars;
+    }
+    assert(StrVal[StrVal.size() - 2 - NumDChars] == ')');
+
+    // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the
+    // parens below.
+    StrVal.erase(0, 2 + NumDChars);
+    StrVal.erase(StrVal.size() - 1 - NumDChars);
+  } else {
+    assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+           "Invalid string token!");
+
+    // Remove escaped quotes and escapes.
+    for (unsigned i = 1, e = StrVal.size(); i < e-2; ++i) {
+      if (StrVal[i] == '\\' &&
+          (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
+        // \\ -> '\' and \" -> '"'.
+        StrVal.erase(StrVal.begin()+i);
+        --e;
+      }
+    }
+  }
 
   // Remove the front quote, replacing it with a space, so that the pragma
   // contents appear to have a space before them.
@@ -236,16 +271,6 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
   // Replace the terminating quote with a \n.
   StrVal[StrVal.size()-1] = '\n';
 
-  // Remove escaped quotes and escapes.
-  for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) {
-    if (StrVal[i] == '\\' &&
-        (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
-      // \\ -> '\' and \" -> '"'.
-      StrVal.erase(StrVal.begin()+i);
-      --e;
-    }
-  }
-  
   // Plop the string (including the newline and trailing null) into a buffer
   // where we can lex it.
   Token TmpTok;
@@ -470,7 +495,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
 ///
 /// The syntax is:
 /// \code
-///   \#pragma comment(linker, "foo")
+///   #pragma comment(linker, "foo")
 /// \endcode
 /// 'linker' is one of five identifiers: compiler, exestr, lib, linker, user.
 /// "foo" is a string, which is fully macro expanded, and permits string
@@ -502,38 +527,10 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {
   // Read the optional string if present.
   Lex(Tok);
   std::string ArgumentString;
-  if (Tok.is(tok::comma)) {
-    Lex(Tok); // eat the comma.
-
-    // We need at least one string.
-    if (Tok.isNot(tok::string_literal)) {
-      Diag(Tok.getLocation(), diag::err_pragma_comment_malformed);
-      return;
-    }
-
-    // String concatenation allows multiple strings, which can even come from
-    // macro expansion.
-    // "foo " "bar" "Baz"
-    SmallVector<Token, 4> StrToks;
-    while (Tok.is(tok::string_literal)) {
-      if (Tok.hasUDSuffix())
-        Diag(Tok, diag::err_invalid_string_udl);
-      StrToks.push_back(Tok);
-      Lex(Tok);
-    }
-
-    // Concatenate and parse the strings.
-    StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
-    assert(Literal.isAscii() && "Didn't allow wide strings in");
-    if (Literal.hadError)
-      return;
-    if (Literal.Pascal) {
-      Diag(StrToks[0].getLocation(), diag::err_pragma_comment_malformed);
-      return;
-    }
-
-    ArgumentString = Literal.GetString();
-  }
+  if (Tok.is(tok::comma) && !LexStringLiteral(Tok, ArgumentString,
+                                              "pragma comment",
+                                              /*MacroExpansion=*/true))
+    return;
 
   // FIXME: If the kind is "compiler" warn if the string is present (it is
   // ignored).
@@ -560,11 +557,11 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {
 /// HandlePragmaMessage - Handle the microsoft and gcc \#pragma message
 /// extension.  The syntax is:
 /// \code
-///   \#pragma message(string)
+///   #pragma message(string)
 /// \endcode
 /// OR, in GCC mode:
 /// \code
-///   \#pragma message string
+///   #pragma message string
 /// \endcode
 /// string is a string, which is fully macro expanded, and permits string
 /// concatenation, embedded escape characters, etc... See MSDN for more details.
@@ -587,34 +584,10 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {
     return;
   }
 
-  // We need at least one string.
-  if (Tok.isNot(tok::string_literal)) {
-    Diag(Tok.getLocation(), diag::err_pragma_message_malformed);
-    return;
-  }
-
-  // String concatenation allows multiple strings, which can even come from
-  // macro expansion.
-  // "foo " "bar" "Baz"
-  SmallVector<Token, 4> StrToks;
-  while (Tok.is(tok::string_literal)) {
-    if (Tok.hasUDSuffix())
-      Diag(Tok, diag::err_invalid_string_udl);
-    StrToks.push_back(Tok);
-    Lex(Tok);
-  }
-
-  // Concatenate and parse the strings.
-  StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
-  assert(Literal.isAscii() && "Didn't allow wide strings in");
-  if (Literal.hadError)
-    return;
-  if (Literal.Pascal) {
-    Diag(StrToks[0].getLocation(), diag::err_pragma_message_malformed);
+  std::string MessageString;
+  if (!FinishLexStringLiteral(Tok, MessageString, "pragma message",
+                              /*MacroExpansion=*/true))
     return;
-  }
-
-  StringRef MessageString(Literal.GetString());
 
   if (ExpectClosingParen) {
     if (Tok.isNot(tok::r_paren)) {
@@ -692,7 +665,7 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) {
 ///
 /// The syntax is:
 /// \code
-///   \#pragma push_macro("macro")
+///   #pragma push_macro("macro")
 /// \endcode
 void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) {
   // Parse the pragma directive and get the macro IdentifierInfo*.
@@ -702,17 +675,13 @@ void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) {
   // Get the MacroInfo associated with IdentInfo.
   MacroInfo *MI = getMacroInfo(IdentInfo);
  
-  MacroInfo *MacroCopyToPush = 0;
   if (MI) {
-    // Make a clone of MI.
-    MacroCopyToPush = CloneMacroInfo(*MI);
-    
     // Allow the original MacroInfo to be redefined later.
     MI->setIsAllowRedefinitionsWithoutWarning(true);
   }
 
   // Push the cloned MacroInfo so we can retrieve it later.
-  PragmaPushMacroInfo[IdentInfo].push_back(MacroCopyToPush);
+  PragmaPushMacroInfo[IdentInfo].push_back(MI);
 }
 
 /// \brief Handle \#pragma pop_macro.
@@ -733,10 +702,11 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) {
     PragmaPushMacroInfo.find(IdentInfo);
   if (iter != PragmaPushMacroInfo.end()) {
     // Forget the MacroInfo currently associated with IdentInfo.
-    if (MacroInfo *CurrentMI = getMacroInfo(IdentInfo)) {
-      if (CurrentMI->isWarnIfUnused())
-        WarnUnusedMacroLocs.erase(CurrentMI->getDefinitionLoc());
-      UndefineMacro(IdentInfo, CurrentMI, MessageLoc);
+    if (MacroDirective *CurrentMD = getMacroDirective(IdentInfo)) {
+      MacroInfo *MI = CurrentMD->getMacroInfo();
+      if (MI->isWarnIfUnused())
+        WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
+      appendMacroDirective(IdentInfo, AllocateUndefMacroDirective(MessageLoc));
     }
 
     // Get the MacroInfo we want to reinstall.
@@ -744,9 +714,8 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) {
 
     if (MacroToReInstall) {
       // Reinstall the previously pushed macro.
-      setMacroInfo(IdentInfo, MacroToReInstall);
-    } else if (IdentInfo->hasMacroDefinition()) {
-      clearMacroInfo(IdentInfo);
+      appendDefMacroDirective(IdentInfo, MacroToReInstall, MessageLoc,
+                              /*isImported=*/false);
     }
 
     // Pop PragmaPushMacroInfo stack.
@@ -1090,50 +1059,28 @@ public:
     }
 
     PP.LexUnexpandedToken(Tok);
+    SourceLocation StringLoc = Tok.getLocation();
 
-    // We need at least one string.
-    if (Tok.isNot(tok::string_literal)) {
-      PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token);
+    std::string WarningName;
+    if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic",
+                                   /*MacroExpansion=*/false))
       return;
-    }
-
-    // String concatenation allows multiple strings, which can even come from
-    // macro expansion.
-    // "foo " "bar" "Baz"
-    SmallVector<Token, 4> StrToks;
-    while (Tok.is(tok::string_literal)) {
-      StrToks.push_back(Tok);
-      PP.LexUnexpandedToken(Tok);
-    }
 
     if (Tok.isNot(tok::eod)) {
       PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token);
       return;
     }
 
-    // Concatenate and parse the strings.
-    StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);
-    assert(Literal.isAscii() && "Didn't allow wide strings in");
-    if (Literal.hadError)
-      return;
-    if (Literal.Pascal) {
-      PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid);
-      return;
-    }
-
-    StringRef WarningName(Literal.GetString());
-
     if (WarningName.size() < 3 || WarningName[0] != '-' ||
         WarningName[1] != 'W') {
-      PP.Diag(StrToks[0].getLocation(),
-              diag::warn_pragma_diagnostic_invalid_option);
+      PP.Diag(StringLoc, diag::warn_pragma_diagnostic_invalid_option);
       return;
     }
 
     if (PP.getDiagnostics().setDiagnosticGroupMapping(WarningName.substr(2),
                                                       Map, DiagLoc))
-      PP.Diag(StrToks[0].getLocation(),
-              diag::warn_pragma_diagnostic_unknown_warning) << WarningName;
+      PP.Diag(StringLoc, diag::warn_pragma_diagnostic_unknown_warning)
+        << WarningName;
     else if (Callbacks)
       Callbacks->PragmaDiagnostic(DiagLoc, Namespace, Map, WarningName);
   }
@@ -1277,6 +1224,29 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
   }
 };
 
+  /// \brief Handle "\#pragma region [...]"
+  ///
+  /// The syntax is
+  /// \code
+  ///   #pragma region [optional name]
+  ///   #pragma endregion [optional comment]
+  /// \endcode
+  /// 
+  /// \note This is 
+  /// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a>
+  /// pragma, just skipped by compiler.
+  struct PragmaRegionHandler : public PragmaHandler {
+    PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) { }
+
+    virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                              Token &NameTok) {
+      // #pragma region: endregion matches can be verified
+      // __pragma(region): no sense, but ignored by msvc
+      // _Pragma is not valid for MSVC, but there isn't any point
+      // to handle a _Pragma differently.
+    }
+  };
+
 }  // end anonymous namespace
 
 
@@ -1310,5 +1280,7 @@ void Preprocessor::RegisterBuiltinPragmas() {
   if (LangOpts.MicrosoftExt) {
     AddPragmaHandler(new PragmaCommentHandler());
     AddPragmaHandler(new PragmaIncludeAliasHandler());
+    AddPragmaHandler(new PragmaRegionHandler("region"));
+    AddPragmaHandler(new PragmaRegionHandler("endregion"));
   }
 }
diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index 01f3665e..b10e7f7 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp
@@ -14,8 +14,8 @@
 #include "clang/Lex/PreprocessingRecord.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Token.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Capacity.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
 
@@ -38,14 +38,9 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,
   this->FileName = StringRef(Memory, FileName.size());
 }
 
-PreprocessingRecord::PreprocessingRecord(SourceManager &SM,
-                                         bool RecordConditionalDirectives)
+PreprocessingRecord::PreprocessingRecord(SourceManager &SM)
   : SourceMgr(SM),
-    RecordCondDirectives(RecordConditionalDirectives), CondDirectiveNextIdx(0),
-    ExternalSource(0)
-{
-  if (RecordCondDirectives)
-    CondDirectiveStack.push_back(CondDirectiveNextIdx++);
+    ExternalSource(0) {
 }
 
 /// \brief Returns a pair of [Begin, End) iterators of preprocessed entities
@@ -97,8 +92,10 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
 
   int Pos = PPEI.Position;
   if (Pos < 0) {
-    assert(unsigned(-Pos-1) < LoadedPreprocessedEntities.size() &&
-           "Out-of bounds loaded preprocessed entity");
+    if (unsigned(-Pos-1) >= LoadedPreprocessedEntities.size()) {
+      assert(0 && "Out-of bounds loaded preprocessed entity");
+      return false;
+    }
     assert(ExternalSource && "No external source to load from");
     unsigned LoadedIndex = LoadedPreprocessedEntities.size()+Pos;
     if (PreprocessedEntity *PPE = LoadedPreprocessedEntities[LoadedIndex])
@@ -106,8 +103,8 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
 
     // See if the external source can see if the entity is in the file without
     // deserializing it.
-    llvm::Optional<bool>
-      IsInFile = ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID);
+    Optional<bool> IsInFile =
+        ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID);
     if (IsInFile.hasValue())
       return IsInFile.getValue();
 
@@ -118,8 +115,10 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
                                           FID, SourceMgr);
   }
 
-  assert(unsigned(Pos) < PreprocessedEntities.size() &&
-         "Out-of bounds local preprocessed entity");
+  if (unsigned(Pos) >= PreprocessedEntities.size()) {
+    assert(0 && "Out-of bounds local preprocessed entity");
+    return false;
+  }
   return isPreprocessedEntityIfInFileID(PreprocessedEntities[Pos],
                                         FID, SourceMgr);
 }
@@ -249,11 +248,11 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {
   assert(Entity);
   SourceLocation BeginLoc = Entity->getSourceRange().getBegin();
 
-  if (!isa<class InclusionDirective>(Entity)) {
+  if (isa<MacroDefinition>(Entity)) {
     assert((PreprocessedEntities.empty() ||
             !SourceMgr.isBeforeInTranslationUnit(BeginLoc,
                    PreprocessedEntities.back()->getSourceRange().getBegin())) &&
-           "a macro directive was encountered out-of-order");
+           "a macro definition was encountered out-of-order");
     PreprocessedEntities.push_back(Entity);
     return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false);
   }
@@ -268,7 +267,15 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {
 
   // The entity's location is not after the previous one; this can happen with
   // include directives that form the filename using macros, e.g:
-  // "#include MACRO(STUFF)".
+  // "#include MACRO(STUFF)"
+  // or with macro expansions inside macro arguments where the arguments are
+  // not expanded in the same order as listed, e.g:
+  // \code
+  //  #define M1 1
+  //  #define M2 2
+  //  #define FM(x,y) y x
+  //  FM(M1, M2)
+  // \endcode
 
   typedef std::vector<PreprocessedEntity *>::iterator pp_iter;
 
@@ -313,8 +320,8 @@ unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) {
 }
 
 void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,
-                                                  PPEntityID PPID) {
-  MacroDefinitions[Macro] = PPID;
+                                                  MacroDefinition *Def) {
+  MacroDefinitions[Macro] = Def;
 }
 
 /// \brief Retrieve the preprocessed entity at the given ID.
@@ -351,19 +358,17 @@ PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) {
 }
 
 MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) {
-  llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos
+  llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos
     = MacroDefinitions.find(MI);
   if (Pos == MacroDefinitions.end())
     return 0;
-  
-  PreprocessedEntity *Entity = getPreprocessedEntity(Pos->second);
-  if (Entity->isInvalid())
-    return 0;
-  return cast<MacroDefinition>(Entity);
+
+  return Pos->second;
 }
 
-void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI,
-                                       SourceRange Range) {
+void PreprocessingRecord::addMacroExpansion(const Token &Id,
+                                            const MacroInfo *MI,
+                                            SourceRange Range) {
   // We don't record nested macro expansions.
   if (Id.getLocation().isMacroID())
     return;
@@ -376,17 +381,50 @@ void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI,
                        new (*this) MacroExpansion(Def, Range));
 }
 
+void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok,
+                                const MacroDirective *MD) {
+  // This is not actually a macro expansion but record it as a macro reference.
+  if (MD)
+    addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+                      MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::Ifndef(SourceLocation Loc, const Token &MacroNameTok,
+                                 const MacroDirective *MD) {
+  // This is not actually a macro expansion but record it as a macro reference.
+  if (MD)
+    addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+                      MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::Defined(const Token &MacroNameTok,
+                                  const MacroDirective *MD) {
+  // This is not actually a macro expansion but record it as a macro reference.
+  if (MD)
+    addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+                      MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::MacroExpands(const Token &Id,const MacroDirective *MD,
+                                       SourceRange Range) {
+  addMacroExpansion(Id, MD->getMacroInfo(), Range);
+}
+
 void PreprocessingRecord::MacroDefined(const Token &Id,
-                                       const MacroInfo *MI) {
+                                       const MacroDirective *MD) {
+  const MacroInfo *MI = MD->getMacroInfo();
   SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc());
   MacroDefinition *Def
       = new (*this) MacroDefinition(Id.getIdentifierInfo(), R);
-  MacroDefinitions[MI] = addPreprocessedEntity(Def);
+  addPreprocessedEntity(Def);
+  MacroDefinitions[MI] = Def;
 }
 
 void PreprocessingRecord::MacroUndefined(const Token &Id,
-                                         const MacroInfo *MI) {
-  MacroDefinitions.erase(MI);
+                                         const MacroDirective *MD) {
+  // Note: MI may be null (when #undef'ining an undefined macro).
+  if (MD)
+    MacroDefinitions.erase(MD->getMacroInfo());
 }
 
 void PreprocessingRecord::InclusionDirective(
@@ -438,95 +476,6 @@ void PreprocessingRecord::InclusionDirective(
   addPreprocessedEntity(ID);
 }
 
-bool PreprocessingRecord::rangeIntersectsConditionalDirective(
-                                                      SourceRange Range) const {
-  if (Range.isInvalid())
-    return false;
-
-  CondDirectiveLocsTy::const_iterator
-    low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
-                           Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr));
-  if (low == CondDirectiveLocs.end())
-    return false;
-
-  if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc()))
-    return false;
-
-  CondDirectiveLocsTy::const_iterator
-    upp = std::upper_bound(low, CondDirectiveLocs.end(),
-                           Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr));
-  unsigned uppIdx;
-  if (upp != CondDirectiveLocs.end())
-    uppIdx = upp->getIdx();
-  else
-    uppIdx = 0;
-
-  return low->getIdx() != uppIdx;
-}
-
-unsigned PreprocessingRecord::findCondDirectiveIdx(SourceLocation Loc) const {
-  if (Loc.isInvalid())
-    return 0;
-
-  CondDirectiveLocsTy::const_iterator
-    low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
-                           Loc, CondDirectiveLoc::Comp(SourceMgr));
-  if (low == CondDirectiveLocs.end())
-    return 0;
-  return low->getIdx();
-}
-
-void PreprocessingRecord::addCondDirectiveLoc(CondDirectiveLoc DirLoc) {
-  // Ignore directives in system headers.
-  if (SourceMgr.isInSystemHeader(DirLoc.getLoc()))
-    return;
-
-  assert(CondDirectiveLocs.empty() ||
-         SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
-                                             DirLoc.getLoc()));
-  CondDirectiveLocs.push_back(DirLoc);
-}
-
-void PreprocessingRecord::If(SourceLocation Loc, SourceRange ConditionRange) {
-  if (RecordCondDirectives) {
-    addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-    CondDirectiveStack.push_back(CondDirectiveNextIdx++);
-  }
-}
-
-void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok) {
-  if (RecordCondDirectives) {
-    addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-    CondDirectiveStack.push_back(CondDirectiveNextIdx++);
-  }
-}
-
-void PreprocessingRecord::Ifndef(SourceLocation Loc,const Token &MacroNameTok) {
-  if (RecordCondDirectives) {
-    addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-    CondDirectiveStack.push_back(CondDirectiveNextIdx++);
-  }
-}
-
-void PreprocessingRecord::Elif(SourceLocation Loc, SourceRange ConditionRange,
-                               SourceLocation IfLoc) {
-  if (RecordCondDirectives)
-    addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-}
-
-void PreprocessingRecord::Else(SourceLocation Loc, SourceLocation IfLoc) {
-  if (RecordCondDirectives)
-    addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-}
-
-void PreprocessingRecord::Endif(SourceLocation Loc, SourceLocation IfLoc) {
-  if (RecordCondDirectives) {
-    addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-    assert(!CondDirectiveStack.empty());
-    CondDirectiveStack.pop_back();
-  }
-}
-
 size_t PreprocessingRecord::getTotalMemory() const {
   return BumpAlloc.getTotalMemory()
     + llvm::capacity_in_bytes(MacroDefinitions)
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 3b070ce..53c45dc 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -26,50 +26,48 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/PreprocessorOptions.h"
 #include "MacroArgs.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/Pragma.h"
 #include "clang/Lex/PreprocessingRecord.h"
+#include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Lex/ScratchBuffer.h"
-#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/CodeCompletionHandler.h"
-#include "clang/Lex/ModuleLoader.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Capacity.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Capacity.h"
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
 
-PPMutationListener::~PPMutationListener() { }
-
-Preprocessor::Preprocessor(llvm::IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
+Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
                            DiagnosticsEngine &diags, LangOptions &opts,
                            const TargetInfo *target, SourceManager &SM,
                            HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
-                           IdentifierInfoLookup* IILookup,
-                           bool OwnsHeaders,
-                           bool DelayInitialization,
-                           bool IncrProcessing)
-  : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(target),
-    FileMgr(Headers.getFileMgr()),
-    SourceMgr(SM), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
-    ExternalSource(0), Identifiers(opts, IILookup), 
-    IncrementalProcessing(IncrProcessing), CodeComplete(0), 
-    CodeCompletionFile(0), CodeCompletionOffset(0), CodeCompletionReached(0),
-    SkipMainFilePreamble(0, true), CurPPLexer(0), 
-    CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), Listener(0),
-    MacroArgCache(0), Record(0), MIChainHead(0), MICache(0) 
-{
+                           IdentifierInfoLookup *IILookup, bool OwnsHeaders,
+                           bool DelayInitialization, bool IncrProcessing)
+    : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(target),
+      FileMgr(Headers.getFileMgr()), SourceMgr(SM), HeaderInfo(Headers),
+      TheModuleLoader(TheModuleLoader), ExternalSource(0),
+      Identifiers(opts, IILookup), IncrementalProcessing(IncrProcessing),
+      CodeComplete(0), CodeCompletionFile(0), CodeCompletionOffset(0),
+      CodeCompletionReached(0), SkipMainFilePreamble(0, true), CurPPLexer(0),
+      CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0),
+      MacroArgCache(0), Record(0), MIChainHead(0), MICache(0) {
   OwnsHeaderSearch = OwnsHeaders;
   
   ScratchBuf = new ScratchBuffer(SourceMgr);
@@ -96,9 +94,11 @@ Preprocessor::Preprocessor(llvm::IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
   InMacroArgPreExpansion = false;
   NumCachedTokenLexers = 0;
   PragmasEnabled = true;
+  ParsingIfOrElifDirective = false;
+  PreprocessedOutput = false;
 
   CachedLexPos = 0;
-  
+
   // We haven't read anything from the external source.
   ReadMacrosFromExternalSource = false;
   
@@ -292,7 +292,7 @@ Preprocessor::macro_end(bool IncludeExternalMacros) const {
 
 /// \brief Compares macro tokens with a specified token value sequence.
 static bool MacroDefinitionEquals(const MacroInfo *MI,
-                                  llvm::ArrayRef<TokenValue> Tokens) {
+                                  ArrayRef<TokenValue> Tokens) {
   return Tokens.size() == MI->getNumTokens() &&
       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
 }
@@ -304,14 +304,15 @@ StringRef Preprocessor::getLastMacroWithSpelling(
   StringRef BestSpelling;
   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
        I != E; ++I) {
-    if (!I->second->isObjectLike())
+    if (!I->second->getMacroInfo()->isObjectLike())
       continue;
-    const MacroInfo *MI = I->second->findDefinitionAtLoc(Loc, SourceMgr);
-    if (!MI)
+    const MacroDirective::DefInfo
+      Def = I->second->findDirectiveAtLoc(Loc, SourceMgr);
+    if (!Def)
       continue;
-    if (!MacroDefinitionEquals(MI, Tokens))
+    if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
       continue;
-    SourceLocation Location = I->second->getDefinitionLoc();
+    SourceLocation Location = Def.getLocation();
     // Choose the macro defined latest.
     if (BestLocation.isInvalid() ||
         (Location.isValid() &&
@@ -398,7 +399,7 @@ StringRef Preprocessor::getSpelling(const Token &Tok,
                                           SmallVectorImpl<char> &Buffer,
                                           bool *Invalid) const {
   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
-  if (Tok.isNot(tok::raw_identifier)) {
+  if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
     // Try the fast path.
     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
       return II->getName();
@@ -481,6 +482,7 @@ void Preprocessor::EnterMainSourceFile() {
   assert(SB && "Cannot create predefined source buffer");
   FileID FID = SourceMgr.createFileIDForMemBuffer(SB);
   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
+  setPredefinesFileID(FID);
 
   // Start parsing the predefines.
   EnterSourceFile(FID, 0, SourceLocation());
@@ -496,6 +498,48 @@ void Preprocessor::EndSourceFile() {
 // Lexer Event Handling.
 //===----------------------------------------------------------------------===//
 
+static void appendCodePoint(unsigned Codepoint,
+                            llvm::SmallVectorImpl<char> &Str) {
+  char ResultBuf[4];
+  char *ResultPtr = ResultBuf;
+  bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
+  (void)Res;
+  assert(Res && "Unexpected conversion failure");
+  Str.append(ResultBuf, ResultPtr);
+}
+
+static void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
+  for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
+    if (*I != '\\') {
+      Buf.push_back(*I);
+      continue;
+    }
+
+    ++I;
+    assert(*I == 'u' || *I == 'U');
+
+    unsigned NumHexDigits;
+    if (*I == 'u')
+      NumHexDigits = 4;
+    else
+      NumHexDigits = 8;
+
+    assert(I + NumHexDigits <= E);
+
+    uint32_t CodePoint = 0;
+    for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+      unsigned Value = llvm::hexDigitValue(*I);
+      assert(Value != -1U);
+
+      CodePoint <<= 4;
+      CodePoint += Value;
+    }
+
+    appendCodePoint(CodePoint, Buf);
+    --I;
+  }
+}
+
 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
 /// identifier information for the token and install it into the token,
 /// updating the token kind accordingly.
@@ -504,15 +548,22 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
 
   // Look up this token, see if it is a macro, or if it is a language keyword.
   IdentifierInfo *II;
-  if (!Identifier.needsCleaning()) {
+  if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
     // No cleaning needed, just use the characters from the lexed buffer.
     II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(),
-                                           Identifier.getLength()));
+                                     Identifier.getLength()));
   } else {
     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
     SmallString<64> IdentifierBuffer;
     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
-    II = getIdentifierInfo(CleanedStr);
+
+    if (Identifier.hasUCN()) {
+      SmallString<64> UCNIdentifierBuffer;
+      expandUCNs(UCNIdentifierBuffer, CleanedStr);
+      II = getIdentifierInfo(UCNIdentifierBuffer);
+    } else {
+      II = getIdentifierInfo(CleanedStr);
+    }
   }
 
   // Update the token info (identifier info and appropriate token kind).
@@ -589,19 +640,19 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
   }
 
   // If this is a macro to be expanded, do it.
-  if (MacroInfo *MI = getMacroInfo(&II)) {
+  if (MacroDirective *MD = getMacroDirective(&II)) {
+    MacroInfo *MI = MD->getMacroInfo();
     if (!DisableMacroExpansion) {
-      if (Identifier.isExpandDisabled()) {
-        Diag(Identifier, diag::pp_disabled_macro_expansion);
-      } else if (MI->isEnabled()) {
-        if (!HandleMacroExpandedIdentifier(Identifier, MI))
+      if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
+        if (!HandleMacroExpandedIdentifier(Identifier, MD))
           return;
       } else {
         // C99 6.10.3.4p2 says that a disabled macro may never again be
         // expanded, even if it's in a context where it could be expanded in the
         // future.
         Identifier.setFlag(Token::DisableExpand);
-        Diag(Identifier, diag::pp_disabled_macro_expansion);
+        if (MI->isObjectLike() || isNextPPTokenLParen())
+          Diag(Identifier, diag::pp_disabled_macro_expansion);
       }
     }
   }
@@ -630,10 +681,10 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
   if (II.isExtensionToken() && !DisableMacroExpansion)
     Diag(Identifier, diag::ext_token_used);
   
-  // If this is the '__experimental_modules_import' contextual keyword, note
+  // If this is the 'import' contextual keyword, note
   // that the next token indicates a module name.
   //
-  // Note that we do not treat '__experimental_modules_import' as a contextual
+  // Note that we do not treat 'import' as a contextual
   // keyword when we're in a caching lexer, because caching lexers only get
   // used in contexts where import declarations are disallowed.
   if (II.isModulesImport() && !InMacroArgs && !DisableMacroExpansion &&
@@ -689,6 +740,47 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
   }
 }
 
+bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
+                                          const char *DiagnosticTag,
+                                          bool AllowMacroExpansion) {
+  // We need at least one string literal.
+  if (Result.isNot(tok::string_literal)) {
+    Diag(Result, diag::err_expected_string_literal)
+      << /*Source='in...'*/0 << DiagnosticTag;
+    return false;
+  }
+
+  // Lex string literal tokens, optionally with macro expansion.
+  SmallVector<Token, 4> StrToks;
+  do {
+    StrToks.push_back(Result);
+
+    if (Result.hasUDSuffix())
+      Diag(Result, diag::err_invalid_string_udl);
+
+    if (AllowMacroExpansion)
+      Lex(Result);
+    else
+      LexUnexpandedToken(Result);
+  } while (Result.is(tok::string_literal));
+
+  // Concatenate and parse the strings.
+  StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
+  assert(Literal.isAscii() && "Didn't allow wide strings in");
+
+  if (Literal.hadError)
+    return false;
+
+  if (Literal.Pascal) {
+    Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
+      << /*Source='in...'*/0 << DiagnosticTag;
+    return false;
+  }
+
+  String = Literal.GetString();
+  return true;
+}
+
 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
   assert(Handler && "NULL comment handler");
   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
@@ -723,11 +815,10 @@ CommentHandler::~CommentHandler() { }
 
 CodeCompletionHandler::~CodeCompletionHandler() { }
 
-void Preprocessor::createPreprocessingRecord(bool RecordConditionalDirectives) {
+void Preprocessor::createPreprocessingRecord() {
   if (Record)
     return;
   
-  Record = new PreprocessingRecord(getSourceManager(),
-                                   RecordConditionalDirectives);
+  Record = new PreprocessingRecord(getSourceManager());
   addPPCallbacks(Record);
 }
diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
index a64c84d..5a59849 100644
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/PreprocessorLexer.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
 using namespace clang;
 
 void PreprocessorLexer::anchor() { }
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index dd7ebb0..0a66bba 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -12,25 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/TokenConcatenation.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <cctype>
 using namespace clang;
 
 
 /// IsStringPrefix - Return true if Str is a string prefix.
 /// 'L', 'u', 'U', or 'u8'. Including raw versions.
-static bool IsStringPrefix(StringRef Str, bool CPlusPlus0x) {
+static bool IsStringPrefix(StringRef Str, bool CPlusPlus11) {
 
   if (Str[0] == 'L' ||
-      (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
+      (CPlusPlus11 && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
 
     if (Str.size() == 1)
       return true; // "L", "u", "U", and "R"
 
     // Check for raw flavors. Need to make sure the first character wasn't
-    // already R. Need CPlusPlus0x check for "LR".
-    if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
+    // already R. Need CPlusPlus11 check for "LR".
+    if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus11)
       return true; // "LR", "uR", "UR"
 
     // Check for "u8" and "u8R"
@@ -54,17 +54,17 @@ bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
     SourceManager &SM = PP.getSourceManager();
     const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
     return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
-                          LangOpts.CPlusPlus0x);
+                          LangOpts.CPlusPlus11);
   }
 
   if (Tok.getLength() < 256) {
     char Buffer[256];
     const char *TokPtr = Buffer;
     unsigned length = PP.getSpelling(Tok, TokPtr);
-    return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
+    return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus11);
   }
 
-  return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
+  return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus11);
 }
 
 TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
@@ -87,7 +87,7 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
   TokenInfo[tok::arrow           ] |= aci_custom_firstchar;
 
   // These tokens have custom code in C++11 mode.
-  if (PP.getLangOpts().CPlusPlus0x) {
+  if (PP.getLangOpts().CPlusPlus11) {
     TokenInfo[tok::string_literal      ] |= aci_custom;
     TokenInfo[tok::wide_string_literal ] |= aci_custom;
     TokenInfo[tok::utf8_string_literal ] |= aci_custom;
@@ -156,9 +156,10 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
   // First, check to see if the tokens were directly adjacent in the original
   // source.  If they were, it must be okay to stick them together: if there
   // were an issue, the tokens would have been lexed differently.
-  if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() &&
-      PrevTok.getLocation().getLocWithOffset(PrevTok.getLength()) ==
-        Tok.getLocation())
+  SourceManager &SM = PP.getSourceManager();
+  SourceLocation PrevSpellLoc = SM.getSpellingLoc(PrevTok.getLocation());
+  SourceLocation SpellLoc = SM.getSpellingLoc(Tok.getLocation());
+  if (PrevSpellLoc.getLocWithOffset(PrevTok.getLength()) == SpellLoc)
     return false;
 
   tok::TokenKind PrevKind = PrevTok.getKind();
@@ -206,7 +207,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
   case tok::wide_char_constant:
   case tok::utf16_char_constant:
   case tok::utf32_char_constant:
-    if (!PP.getLangOpts().CPlusPlus0x)
+    if (!PP.getLangOpts().CPlusPlus11)
       return false;
 
     // In C++11, a string or character literal followed by an identifier is a
@@ -239,13 +240,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
     return IsIdentifierStringPrefix(PrevTok);
 
   case tok::numeric_constant:
-    return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
-           FirstChar == '+' || FirstChar == '-' || FirstChar == '.' ||
-           (PP.getLangOpts().CPlusPlus0x && FirstChar == '_');
+    return isPreprocessingNumberBody(FirstChar) ||
+           FirstChar == '+' || FirstChar == '-';
   case tok::period:          // ..., .*, .1234
     return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
-    isdigit(FirstChar) ||
-    (PP.getLangOpts().CPlusPlus && FirstChar == '*');
+           isDigit(FirstChar) ||
+           (PP.getLangOpts().CPlusPlus && FirstChar == '*');
   case tok::amp:             // &&
     return FirstChar == '&';
   case tok::plus:            // ++
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 59b7478..5b41fe9 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -13,10 +13,10 @@
 
 #include "clang/Lex/TokenLexer.h"
 #include "MacroArgs.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/SmallString.h"
 using namespace clang;
 
@@ -647,6 +647,12 @@ bool TokenLexer::PasteTokens(Token &Tok) {
     StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
   if (EndLoc.isFileID())
     EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
+  FileID MacroFID = SM.getFileID(MacroExpansionStart);
+  while (SM.getFileID(StartLoc) != MacroFID)
+    StartLoc = SM.getImmediateExpansionRange(StartLoc).first;
+  while (SM.getFileID(EndLoc) != MacroFID)
+    EndLoc = SM.getImmediateExpansionRange(EndLoc).second;
+    
   Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,
                                         Tok.getLength()));
 
@@ -743,14 +749,18 @@ static void updateConsecutiveMacroArgTokens(SourceManager &SM,
 
   Token *NextTok = begin_tokens + 1;
   for (; NextTok < end_tokens; ++NextTok) {
+    SourceLocation NextLoc = NextTok->getLocation();
+    if (CurLoc.isFileID() != NextLoc.isFileID())
+      break; // Token from different kind of FileID.
+
     int RelOffs;
-    if (!SM.isInSameSLocAddrSpace(CurLoc, NextTok->getLocation(), &RelOffs))
+    if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
       break; // Token from different local/loaded location.
     // Check that token is not before the previous token or more than 50
     // "characters" away.
     if (RelOffs < 0 || RelOffs > 50)
       break;
-    CurLoc = NextTok->getLocation();
+    CurLoc = NextLoc;
   }
 
   // For the consecutive tokens, find the length of the SLocEntry to contain
diff --git a/lib/Lex/UnicodeCharSets.h b/lib/Lex/UnicodeCharSets.h
new file mode 100644
index 0000000..37ff8af
--- /dev/null
+++ b/lib/Lex/UnicodeCharSets.h
@@ -0,0 +1,496 @@
+//===--- UnicodeCharSets.h - Contains important sets of characters --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef CLANG_LEX_UNICODECHARSETS_H
+#define CLANG_LEX_UNICODECHARSETS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace {
+  struct UnicodeCharRange {
+    uint32_t Lower;
+    uint32_t Upper;
+  };
+  typedef llvm::ArrayRef<UnicodeCharRange> UnicodeCharSet;
+
+  typedef llvm::SmallPtrSet<const UnicodeCharRange *, 16> ValidatedCharSetsTy;
+}
+
+static inline ValidatedCharSetsTy &getValidatedCharSets() {
+  static ValidatedCharSetsTy Validated;
+  return Validated;
+}
+
+/// Returns true if each of the ranges in \p CharSet is a proper closed range
+/// [min, max], and if the ranges themselves are ordered and non-overlapping.
+static inline bool isValidCharSet(UnicodeCharSet CharSet) {
+#ifndef NDEBUG
+  static llvm::sys::Mutex ValidationMutex;
+
+  // Check the validation cache.
+  {
+    llvm::MutexGuard Guard(ValidationMutex);
+    if (getValidatedCharSets().count(CharSet.data()))
+      return true;
+  }
+
+  // Walk through the ranges.
+  uint32_t Prev = 0;
+  for (UnicodeCharSet::iterator I = CharSet.begin(), E = CharSet.end();
+       I != E; ++I) {
+    if (Prev >= I->Lower) {
+      DEBUG(llvm::dbgs() << "Upper bound 0x");
+      DEBUG(llvm::dbgs().write_hex(Prev));
+      DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x");
+      DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n");
+      return false;
+    }
+    if (I->Upper < I->Lower) {
+      DEBUG(llvm::dbgs() << "Upper bound 0x");
+      DEBUG(llvm::dbgs().write_hex(I->Lower));
+      DEBUG(llvm::dbgs() << " should not be less than lower bound 0x");
+      DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n");
+      return false;
+    }
+    Prev = I->Upper;
+  }
+
+  // Update the validation cache.
+  {
+    llvm::MutexGuard Guard(ValidationMutex);
+    getValidatedCharSets().insert(CharSet.data());
+  }
+#endif
+  return true;
+}
+
+/// Returns true if the Unicode code point \p C is within the set of
+/// characters specified by \p CharSet.
+LLVM_READONLY static inline bool isCharInSet(uint32_t C,
+                                             UnicodeCharSet CharSet) {
+  assert(isValidCharSet(CharSet));
+
+  size_t LowPoint = 0;
+  size_t HighPoint = CharSet.size();
+
+  // Binary search the set of char ranges.
+  while (HighPoint != LowPoint) {
+    size_t MidPoint = (HighPoint + LowPoint) / 2;
+    if (C < CharSet[MidPoint].Lower)
+      HighPoint = MidPoint;
+    else if (C > CharSet[MidPoint].Upper)
+      LowPoint = MidPoint + 1;
+    else
+      return true;
+  }
+
+  return false;
+}
+
+
+// C11 D.1, C++11 [charname.allowed]
+static const UnicodeCharRange C11AllowedIDChars[] = {
+  // 1
+  { 0x00A8, 0x00A8 }, { 0x00AA, 0x00AA }, { 0x00AD, 0x00AD },
+  { 0x00AF, 0x00AF }, { 0x00B2, 0x00B5 }, { 0x00B7, 0x00BA },
+  { 0x00BC, 0x00BE }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 },
+  { 0x00F8, 0x00FF },
+  // 2
+  { 0x0100, 0x167F }, { 0x1681, 0x180D }, { 0x180F, 0x1FFF },
+  // 3
+  { 0x200B, 0x200D }, { 0x202A, 0x202E }, { 0x203F, 0x2040 },
+  { 0x2054, 0x2054 }, { 0x2060, 0x206F },
+  // 4
+  { 0x2070, 0x218F }, { 0x2460, 0x24FF }, { 0x2776, 0x2793 },
+  { 0x2C00, 0x2DFF }, { 0x2E80, 0x2FFF },
+  // 5
+  { 0x3004, 0x3007 }, { 0x3021, 0x302F }, { 0x3031, 0x303F },
+  // 6
+  { 0x3040, 0xD7FF },
+  // 7
+  { 0xF900, 0xFD3D }, { 0xFD40, 0xFDCF }, { 0xFDF0, 0xFE44 },
+  { 0xFE47, 0xFFFD },
+  // 8
+  { 0x10000, 0x1FFFD }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD },
+  { 0x40000, 0x4FFFD }, { 0x50000, 0x5FFFD }, { 0x60000, 0x6FFFD },
+  { 0x70000, 0x7FFFD }, { 0x80000, 0x8FFFD }, { 0x90000, 0x9FFFD },
+  { 0xA0000, 0xAFFFD }, { 0xB0000, 0xBFFFD }, { 0xC0000, 0xCFFFD },
+  { 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD }
+};
+
+// C++03 [extendid]
+// Note that this is not the same as C++98, but we don't distinguish C++98
+// and C++03 in Clang.
+static const UnicodeCharRange CXX03AllowedIDChars[] = {
+  // Latin
+  { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 },
+  { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 },
+
+  // Greek
+  { 0x0384, 0x0384 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
+  { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
+  { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
+  { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 },
+
+  // Cyrillic
+  { 0x0401, 0x040D }, { 0x040F, 0x044F }, { 0x0451, 0x045C },
+  { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 },
+  { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 },
+  { 0x04F8, 0x04F9 },
+
+  // Armenian
+  { 0x0531, 0x0556 }, { 0x0561, 0x0587 },
+
+  // Hebrew
+  { 0x05D0, 0x05EA }, { 0x05F0, 0x05F4 },
+
+  // Arabic
+  { 0x0621, 0x063A }, { 0x0640, 0x0652 }, { 0x0670, 0x06B7 },
+  { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06E5, 0x06E7 },
+
+  // Devanagari
+  { 0x0905, 0x0939 }, { 0x0958, 0x0962 },
+
+  // Bengali
+  { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 },
+  { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 },
+  { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 },
+
+  // Gurmukhi
+  { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 },
+  { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 },
+  { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E },
+
+  // Gujarti
+  { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 },
+  { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 },
+  { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE0 },
+
+  // Oriya
+  { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 },
+  { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 },
+  { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 },
+
+  // Tamil
+  { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 },
+  { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F },
+  { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 },
+  { 0x0BB7, 0x0BB9 },
+
+  // Telugu
+  { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 },
+  { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 },
+
+  // Kannada
+  { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 },
+  { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CE0, 0x0CE1 },
+
+  // Malayam
+  { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 },
+  { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 },
+
+  // Thai
+  { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 },
+  { 0x0E4F, 0x0E5B },
+
+  // Lao
+  { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E87 },
+  { 0x0E88, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D },
+  { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 },
+  { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAA },
+  { 0x0EAB, 0x0EAB }, { 0x0EAD, 0x0EB0 }, { 0x0EB2, 0x0EB2 },
+  { 0x0EB3, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 },
+  { 0x0EC6, 0x0EC6 },
+
+  // Georgian
+  { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
+
+  // Hangul
+  { 0x1100, 0x1159 }, { 0x1161, 0x11A2 }, { 0x11A8, 0x11F9 },
+
+  // Latin (2)
+  { 0x1E00, 0x1E9A }, { 0x1EA0, 0x1EF9 },
+
+  // Greek (2)
+  { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 },
+  { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 },
+  { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D },
+  { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 },
+  { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB },
+  { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC },
+
+  // Hiragana
+  { 0x3041, 0x3094 }, { 0x309B, 0x309E },
+
+  // Katakana
+  { 0x30A1, 0x30FE },
+
+  // Bopmofo [sic]
+  { 0x3105, 0x312C },
+
+  // CJK Unified Ideographs
+  { 0x4E00, 0x9FA5 }, { 0xF900, 0xFA2D }, { 0xFB1F, 0xFB36 },
+  { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 },
+  { 0xFB42, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3F },
+  { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB },
+  { 0xFE70, 0xFE72 }, { 0xFE74, 0xFE74 }, { 0xFE76, 0xFEFC },
+  { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE },
+  { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 },
+  { 0xFFDA, 0xFFDC }
+};
+
+// C99 Annex D
+static const UnicodeCharRange C99AllowedIDChars[] = {
+  // Latin (1)
+  { 0x00AA, 0x00AA },
+
+  // Special characters (1)
+  { 0x00B5, 0x00B5 }, { 0x00B7, 0x00B7 },
+
+  // Latin (2)
+  { 0x00BA, 0x00BA }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 },
+  { 0x00F8, 0x01F5 }, { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 },
+
+  // Special characters (2)
+  { 0x02B0, 0x02B8 }, { 0x02BB, 0x02BB }, { 0x02BD, 0x02C1 },
+  { 0x02D0, 0x02D1 }, { 0x02E0, 0x02E4 }, { 0x037A, 0x037A },
+
+  // Greek (1)
+  { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
+  { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
+  { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
+  { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 },
+
+  // Cyrillic
+  { 0x0401, 0x040C }, { 0x040E, 0x044F }, { 0x0451, 0x045C },
+  { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 },
+  { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 },
+  { 0x04F8, 0x04F9 },
+
+  // Armenian (1)
+  { 0x0531, 0x0556 },
+
+  // Special characters (3)
+  { 0x0559, 0x0559 },
+
+  // Armenian (2)
+  { 0x0561, 0x0587 },
+
+  // Hebrew
+  { 0x05B0, 0x05B9 }, { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF },
+  { 0x05C1, 0x05C2 }, { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 },
+
+  // Arabic (1)
+  { 0x0621, 0x063A }, { 0x0640, 0x0652 },
+
+  // Digits (1)
+  { 0x0660, 0x0669 },
+
+  // Arabic (2)
+  { 0x0670, 0x06B7 }, { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE },
+  { 0x06D0, 0x06DC }, { 0x06E5, 0x06E8 }, { 0x06EA, 0x06ED },
+
+  // Digits (2)
+  { 0x06F0, 0x06F9 },
+
+  // Devanagari and Special characeter 0x093D.
+  { 0x0901, 0x0903 }, { 0x0905, 0x0939 }, { 0x093D, 0x094D },
+  { 0x0950, 0x0952 }, { 0x0958, 0x0963 },
+
+  // Digits (3)
+  { 0x0966, 0x096F },
+
+  // Bengali (1)
+  { 0x0981, 0x0983 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 },
+  { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 },
+  { 0x09B6, 0x09B9 }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 },
+  { 0x09CB, 0x09CD }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E3 },
+
+  // Digits (4)
+  { 0x09E6, 0x09EF },
+
+  // Bengali (2)
+  { 0x09F0, 0x09F1 },
+
+  // Gurmukhi (1)
+  { 0x0A02, 0x0A02 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 },
+  { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 },
+  { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A3E, 0x0A42 },
+  { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A59, 0x0A5C },
+  { 0x0A5E, 0x0A5E },
+
+  // Digits (5)
+  { 0x0A66, 0x0A6F },
+
+  // Gurmukhi (2)
+  { 0x0A74, 0x0A74 },
+
+  // Gujarti
+  { 0x0A81, 0x0A83 }, { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D },
+  { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 },
+  { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 }, { 0x0ABD, 0x0AC5 },
+  { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AD0, 0x0AD0 },
+  { 0x0AE0, 0x0AE0 },
+
+  // Digits (6)
+  { 0x0AE6, 0x0AEF },
+
+  // Oriya and Special character 0x0B3D
+  { 0x0B01, 0x0B03 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 },
+  { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 },
+  { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B43 }, { 0x0B47, 0x0B48 },
+  { 0x0B4B, 0x0B4D }, { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 },
+
+  // Digits (7)
+  { 0x0B66, 0x0B6F },
+
+  // Tamil
+  { 0x0B82, 0x0B83 }, { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 },
+  { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C },
+  { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA },
+  { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 }, { 0x0BBE, 0x0BC2 },
+  { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD },
+
+  // Digits (8)
+  { 0x0BE7, 0x0BEF },
+
+  // Telugu
+  { 0x0C01, 0x0C03 }, { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 },
+  { 0x0C12, 0x0C28 }, { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 },
+  { 0x0C3E, 0x0C44 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D },
+  { 0x0C60, 0x0C61 },
+
+  // Digits (9)
+  { 0x0C66, 0x0C6F },
+
+  // Kannada
+  { 0x0C82, 0x0C83 }, { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 },
+  { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 },
+  { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD },
+  { 0x0CDE, 0x0CDE }, { 0x0CE0, 0x0CE1 },
+
+  // Digits (10)
+  { 0x0CE6, 0x0CEF },
+
+  // Malayam
+  { 0x0D02, 0x0D03 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 },
+  { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D3E, 0x0D43 },
+  { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, { 0x0D60, 0x0D60 },
+
+  // Digits (11)
+  { 0x0D66, 0x0D6F },
+
+  // Thai...including Digits { 0x0E50, 0x0E59 }
+  { 0x0E01, 0x0E3A }, { 0x0E40, 0x0E5B },
+
+  // Lao (1)
+  { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E88 },
+  { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, { 0x0E94, 0x0E97 },
+  { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, { 0x0EA5, 0x0EA5 },
+  { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB }, { 0x0EAD, 0x0EAE },
+  { 0x0EB0, 0x0EB9 }, { 0x0EBB, 0x0EBD }, { 0x0EC0, 0x0EC4 },
+  { 0x0EC6, 0x0EC6 }, { 0x0EC8, 0x0ECD },
+
+  // Digits (12)
+  { 0x0ED0, 0x0ED9 },
+
+  // Lao (2)
+  { 0x0EDC, 0x0EDD },
+
+  // Tibetan (1)
+  { 0x0F00, 0x0F00 }, { 0x0F18, 0x0F19 },
+
+  // Digits (13)
+  { 0x0F20, 0x0F33 },
+
+  // Tibetan (2)
+  { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
+  { 0x0F3E, 0x0F47 }, { 0x0F49, 0x0F69 }, { 0x0F71, 0x0F84 },
+  { 0x0F86, 0x0F8B }, { 0x0F90, 0x0F95 }, { 0x0F97, 0x0F97 },
+  { 0x0F99, 0x0FAD }, { 0x0FB1, 0x0FB7 }, { 0x0FB9, 0x0FB9 },
+
+  // Georgian
+  { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
+
+  // Latin (3)
+  { 0x1E00, 0x1E9B }, { 0x1EA0, 0x1EF9 },
+
+  // Greek (2)
+  { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 },
+  { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 },
+  { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D },
+  { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC },
+
+  // Special characters (4)
+  { 0x1FBE, 0x1FBE },
+
+  // Greek (3)
+  { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 },
+  { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 },
+  { 0x1FF6, 0x1FFC },
+
+  // Special characters (5)
+  { 0x203F, 0x2040 },
+
+  // Latin (4)
+  { 0x207F, 0x207F },
+
+  // Special characters (6)
+  { 0x2102, 0x2102 }, { 0x2107, 0x2107 }, { 0x210A, 0x2113 },
+  { 0x2115, 0x2115 }, { 0x2118, 0x211D }, { 0x2124, 0x2124 },
+  { 0x2126, 0x2126 }, { 0x2128, 0x2128 }, { 0x212A, 0x2131 },
+  { 0x2133, 0x2138 }, { 0x2160, 0x2182 }, { 0x3005, 0x3007 },
+  { 0x3021, 0x3029 },
+
+  // Hiragana
+  { 0x3041, 0x3093 }, { 0x309B, 0x309C },
+
+  // Katakana
+  { 0x30A1, 0x30F6 }, { 0x30FB, 0x30FC },
+
+  // Bopmofo [sic]
+  { 0x3105, 0x312C },
+
+  // CJK Unified Ideographs
+  { 0x4E00, 0x9FA5 },
+
+  // Hangul,
+  { 0xAC00, 0xD7A3 }
+};
+
+// C11 D.2, C++11 [charname.disallowed]
+static const UnicodeCharRange C11DisallowedInitialIDChars[] = {
+  { 0x0300, 0x036F }, { 0x1DC0, 0x1DFF }, { 0x20D0, 0x20FF },
+  { 0xFE20, 0xFE2F }
+};
+
+// C99 6.4.2.1p3: The initial character [of an identifier] shall not be a
+// universal character name designating a digit.
+// C99 Annex D defines these characters as "Digits".
+static const UnicodeCharRange C99DisallowedInitialIDChars[] = {
+  { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 }, { 0x0966, 0x096F },
+  { 0x09E6, 0x09EF }, { 0x0A66, 0x0A6F }, { 0x0AE6, 0x0AEF },
+  { 0x0B66, 0x0B6F }, { 0x0BE7, 0x0BEF }, { 0x0C66, 0x0C6F },
+  { 0x0CE6, 0x0CEF }, { 0x0D66, 0x0D6F }, { 0x0E50, 0x0E59 },
+  { 0x0ED0, 0x0ED9 }, { 0x0F20, 0x0F33 }
+};
+
+// Unicode v6.2, chapter 6.2, table 6-2.
+static const UnicodeCharRange UnicodeWhitespaceChars[] = {
+  { 0x0085, 0x0085 }, { 0x00A0, 0x00A0 }, { 0x1680, 0x1680 },
+  { 0x180E, 0x180E }, { 0x2000, 0x200A }, { 0x2028, 0x2029 },
+  { 0x202F, 0x202F }, { 0x205F, 0x205F }, { 0x3000, 0x3000 }
+};
+
+#endif