summaryrefslogtreecommitdiffstats
path: root/lib/Lex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex')
-rw-r--r--lib/Lex/CMakeLists.txt1
-rw-r--r--lib/Lex/HeaderMap.cpp4
-rw-r--r--lib/Lex/HeaderSearch.cpp311
-rw-r--r--lib/Lex/Lexer.cpp797
-rw-r--r--lib/Lex/LiteralSupport.cpp54
-rw-r--r--lib/Lex/MacroArgs.cpp18
-rw-r--r--lib/Lex/MacroArgs.h4
-rw-r--r--lib/Lex/MacroInfo.cpp119
-rw-r--r--lib/Lex/ModuleMap.cpp428
-rw-r--r--lib/Lex/PPConditionalDirectiveRecord.cpp120
-rw-r--r--lib/Lex/PPDirectives.cpp229
-rw-r--r--lib/Lex/PPExpressions.cpp28
-rw-r--r--lib/Lex/PPLexerChange.cpp53
-rw-r--r--lib/Lex/PPMacroExpansion.cpp456
-rw-r--r--lib/Lex/PTHLexer.cpp11
-rw-r--r--lib/Lex/Pragma.cpp220
-rw-r--r--lib/Lex/PreprocessingRecord.cpp189
-rw-r--r--lib/Lex/Preprocessor.cpp185
-rw-r--r--lib/Lex/PreprocessorLexer.cpp4
-rw-r--r--lib/Lex/TokenConcatenation.cpp36
-rw-r--r--lib/Lex/TokenLexer.cpp18
-rw-r--r--lib/Lex/UnicodeCharSets.h496
22 files changed, 2473 insertions, 1308 deletions
diff --git a/lib/Lex/CMakeLists.txt b/lib/Lex/CMakeLists.txt
index 241abbc..2ee4682 100644
--- a/lib/Lex/CMakeLists.txt
+++ b/lib/Lex/CMakeLists.txt
@@ -12,6 +12,7 @@ add_clang_library(clangLex
ModuleMap.cpp
PPCaching.cpp
PPCallbacks.cpp
+ PPConditionalDirectiveRecord.cpp
PPDirectives.cpp
PPExpressions.cpp
PPLexerChange.cpp
diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp
index 7dc0491..dcf1f0c 100644
--- a/lib/Lex/HeaderMap.cpp
+++ b/lib/Lex/HeaderMap.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/FileManager.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <cctype>
#include <cstdio>
using namespace clang;
@@ -62,7 +62,7 @@ static inline unsigned HashHMapKey(StringRef Str) {
const char *S = Str.begin(), *End = Str.end();
for (; S != End; S++)
- Result += tolower(*S) * 13;
+ Result += toLowercase(*S) * 13;
return Result;
}
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index 67000b68..304bd69 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -12,17 +12,20 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/HeaderSearchOptions.h"
-#include "clang/Lex/HeaderMap.h"
-#include "clang/Lex/Lexer.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/IdentifierTable.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/Lexer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Capacity.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
#include <cstdio>
+#if defined(LLVM_ON_UNIX)
+#include <limits.h>
+#endif
using namespace clang;
const IdentifierInfo *
@@ -39,12 +42,12 @@ HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) {
ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {}
-HeaderSearch::HeaderSearch(llvm::IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts,
+HeaderSearch::HeaderSearch(IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts,
FileManager &FM, DiagnosticsEngine &Diags,
const LangOptions &LangOpts,
const TargetInfo *Target)
: HSOpts(HSOpts), FileMgr(FM), FrameworkMap(64),
- ModMap(FileMgr, *Diags.getClient(), LangOpts, Target)
+ ModMap(FileMgr, *Diags.getClient(), LangOpts, Target, *this)
{
AngledDirIdx = 0;
SystemDirIdx = 0;
@@ -134,7 +137,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) {
if (Module || !AllowSearch)
return Module;
- // Look through the various header search paths to load any avai;able module
+ // Look through the various header search paths to load any available module
// maps, searching for a module map that describes this module.
for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
if (SearchDirs[Idx].isFramework()) {
@@ -178,8 +181,22 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) {
if (Module)
break;
}
+
+ // If we've already performed the exhaustive search for module maps in this
+ // search directory, don't do it again.
+ if (SearchDirs[Idx].haveSearchedAllModuleMaps())
+ continue;
+
+ // Load all module maps in the immediate subdirectories of this search
+ // directory.
+ loadSubdirectoryModuleMaps(SearchDirs[Idx]);
+
+ // Look again for the module.
+ Module = ModMap.findModule(ModuleName);
+ if (Module)
+ break;
}
-
+
return Module;
}
@@ -263,6 +280,55 @@ const FileEntry *DirectoryLookup::LookupFile(
return Result;
}
+/// \brief Given a framework directory, find the top-most framework directory.
+///
+/// \param FileMgr The file manager to use for directory lookups.
+/// \param DirName The name of the framework directory.
+/// \param SubmodulePath Will be populated with the submodule path from the
+/// returned top-level module to the originally named framework.
+static const DirectoryEntry *
+getTopFrameworkDir(FileManager &FileMgr, StringRef DirName,
+ SmallVectorImpl<std::string> &SubmodulePath) {
+ assert(llvm::sys::path::extension(DirName) == ".framework" &&
+ "Not a framework directory");
+
+ // Note: as an egregious but useful hack we use the real path here, because
+ // frameworks moving between top-level frameworks to embedded frameworks tend
+ // to be symlinked, and we base the logical structure of modules on the
+ // physical layout. In particular, we need to deal with crazy includes like
+ //
+ // #include <Foo/Frameworks/Bar.framework/Headers/Wibble.h>
+ //
+ // where 'Bar' used to be embedded in 'Foo', is now a top-level framework
+ // which one should access with, e.g.,
+ //
+ // #include <Bar/Wibble.h>
+ //
+ // Similar issues occur when a top-level framework has moved into an
+ // embedded framework.
+ const DirectoryEntry *TopFrameworkDir = FileMgr.getDirectory(DirName);
+ DirName = FileMgr.getCanonicalName(TopFrameworkDir);
+ do {
+ // Get the parent directory name.
+ DirName = llvm::sys::path::parent_path(DirName);
+ if (DirName.empty())
+ break;
+
+ // Determine whether this directory exists.
+ const DirectoryEntry *Dir = FileMgr.getDirectory(DirName);
+ if (!Dir)
+ break;
+
+ // If this is a framework directory, then we're a subframework of this
+ // framework.
+ if (llvm::sys::path::extension(DirName) == ".framework") {
+ SubmodulePath.push_back(llvm::sys::path::stem(DirName));
+ TopFrameworkDir = Dir;
+ }
+ } while (true);
+
+ return TopFrameworkDir;
+}
/// DoFrameworkLookup - Do a lookup of the specified file in the current
/// DirectoryLookup, which is a framework directory.
@@ -334,17 +400,6 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
RelativePath->clear();
RelativePath->append(Filename.begin()+SlashPos+1, Filename.end());
}
-
- // If we're allowed to look for modules, try to load or create the module
- // corresponding to this framework.
- Module *Module = 0;
- if (SuggestedModule) {
- if (const DirectoryEntry *FrameworkDir
- = FileMgr.getDirectory(FrameworkName)) {
- bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
- Module = HS.loadFrameworkModule(ModuleName, FrameworkDir, IsSystem);
- }
- }
// Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h"
unsigned OrigSize = FrameworkName.size();
@@ -357,28 +412,64 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1);
}
- // Determine whether this is the module we're building or not.
- bool AutomaticImport = Module;
FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());
- if (const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),
- /*openFile=*/!AutomaticImport)) {
- if (AutomaticImport)
- *SuggestedModule = HS.findModuleForHeader(FE);
- return FE;
+ const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),
+ /*openFile=*/!SuggestedModule);
+ if (!FE) {
+ // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
+ const char *Private = "Private";
+ FrameworkName.insert(FrameworkName.begin()+OrigSize, Private,
+ Private+strlen(Private));
+ if (SearchPath != NULL)
+ SearchPath->insert(SearchPath->begin()+OrigSize, Private,
+ Private+strlen(Private));
+
+ FE = FileMgr.getFile(FrameworkName.str(), /*openFile=*/!SuggestedModule);
}
- // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
- const char *Private = "Private";
- FrameworkName.insert(FrameworkName.begin()+OrigSize, Private,
- Private+strlen(Private));
- if (SearchPath != NULL)
- SearchPath->insert(SearchPath->begin()+OrigSize, Private,
- Private+strlen(Private));
-
- const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),
- /*openFile=*/!AutomaticImport);
- if (FE && AutomaticImport)
- *SuggestedModule = HS.findModuleForHeader(FE);
+ // If we found the header and are allowed to suggest a module, do so now.
+ if (FE && SuggestedModule) {
+ // Find the framework in which this header occurs.
+ StringRef FrameworkPath = FE->getName();
+ bool FoundFramework = false;
+ do {
+ // Get the parent directory name.
+ FrameworkPath = llvm::sys::path::parent_path(FrameworkPath);
+ if (FrameworkPath.empty())
+ break;
+
+ // Determine whether this directory exists.
+ const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkPath);
+ if (!Dir)
+ break;
+
+ // If this is a framework directory, then we're a subframework of this
+ // framework.
+ if (llvm::sys::path::extension(FrameworkPath) == ".framework") {
+ FoundFramework = true;
+ break;
+ }
+ } while (true);
+
+ if (FoundFramework) {
+ // Find the top-level framework based on this framework.
+ SmallVector<std::string, 4> SubmodulePath;
+ const DirectoryEntry *TopFrameworkDir
+ = ::getTopFrameworkDir(FileMgr, FrameworkPath, SubmodulePath);
+
+ // Determine the name of the top-level framework.
+ StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
+
+ // Load this framework module. If that succeeds, find the suggested module
+ // for this header, if any.
+ bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
+ if (HS.loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) {
+ *SuggestedModule = HS.findModuleForHeader(FE);
+ }
+ } else {
+ *SuggestedModule = HS.findModuleForHeader(FE);
+ }
+ }
return FE;
}
@@ -584,7 +675,8 @@ const FileEntry *HeaderSearch::
LookupSubframeworkHeader(StringRef Filename,
const FileEntry *ContextFileEnt,
SmallVectorImpl<char> *SearchPath,
- SmallVectorImpl<char> *RelativePath) {
+ SmallVectorImpl<char> *RelativePath,
+ Module **SuggestedModule) {
assert(ContextFileEnt && "No context file?");
// Framework names must have a '/' in the filename. Find it.
@@ -673,6 +765,26 @@ LookupSubframeworkHeader(StringRef Filename,
// of evaluation.
unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
getFileInfo(FE).DirInfo = DirInfo;
+
+ // If we're supposed to suggest a module, look for one now.
+ if (SuggestedModule) {
+ // Find the top-level framework based on this framework.
+ FrameworkName.pop_back(); // remove the trailing '/'
+ SmallVector<std::string, 4> SubmodulePath;
+ const DirectoryEntry *TopFrameworkDir
+ = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath);
+
+ // Determine the name of the top-level framework.
+ StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
+
+ // Load this framework module. If that succeeds, find the suggested module
+ // for this header, if any.
+ bool IsSystem = false;
+ if (loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) {
+ *SuggestedModule = findModuleForHeader(FE);
+ }
+ }
+
return FE;
}
@@ -708,6 +820,7 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
const HeaderFileInfo &OtherHFI) {
HFI.isImport |= OtherHFI.isImport;
HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
+ HFI.isModuleHeader |= OtherHFI.isModuleHeader;
HFI.NumIncludes += OtherHFI.NumIncludes;
if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
@@ -749,7 +862,16 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) {
if (ExternalSource && !HFI.Resolved)
mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(File));
- return HFI.isPragmaOnce || HFI.ControllingMacro || HFI.ControllingMacroID;
+ return HFI.isPragmaOnce || HFI.isImport ||
+ HFI.ControllingMacro || HFI.ControllingMacroID;
+}
+
+void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE) {
+ if (FE->getUID() >= FileInfo.size())
+ FileInfo.resize(FE->getUID()+1);
+
+ HeaderFileInfo &HFI = FileInfo[FE->getUID()];
+ HFI.isModuleHeader = true;
}
void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) {
@@ -809,7 +931,7 @@ StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) {
bool HeaderSearch::hasModuleMap(StringRef FileName,
const DirectoryEntry *Root) {
- llvm::SmallVector<const DirectoryEntry *, 2> FixUpDirectories;
+ SmallVector<const DirectoryEntry *, 2> FixUpDirectories;
StringRef DirName = FileName;
do {
@@ -849,7 +971,12 @@ bool HeaderSearch::hasModuleMap(StringRef FileName,
} while (true);
}
-Module *HeaderSearch::findModuleForHeader(const FileEntry *File) {
+Module *HeaderSearch::findModuleForHeader(const FileEntry *File) const {
+ if (ExternalSource) {
+ // Make sure the external source has handled header info about this file,
+ // which includes whether the file is part of a module.
+ (void)getFileInfo(File);
+ }
if (Module *Mod = ModMap.findModuleForHeader(File))
return Mod;
@@ -897,80 +1024,21 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name,
return ModMap.findModule(Name);
}
- // The top-level framework directory, from which we'll infer a framework
- // module.
- const DirectoryEntry *TopFrameworkDir = Dir;
-
- // The path from the module we're actually looking for back to the top-level
- // framework name.
- llvm::SmallVector<StringRef, 2> SubmodulePath;
+ // Figure out the top-level framework directory and the submodule path from
+ // that top-level framework to the requested framework.
+ SmallVector<std::string, 2> SubmodulePath;
SubmodulePath.push_back(Name);
-
- // Walk the directory structure to find any enclosing frameworks.
-#ifdef LLVM_ON_UNIX
- // Note: as an egregious but useful hack we use the real path here, because
- // frameworks moving from top-level frameworks to embedded frameworks tend
- // to be symlinked from the top-level location to the embedded location,
- // and we need to resolve lookups as if we had found the embedded location.
- char RealDirName[PATH_MAX];
- StringRef DirName;
- if (realpath(Dir->getName(), RealDirName))
- DirName = RealDirName;
- else
- DirName = Dir->getName();
-#else
- StringRef DirName = Dir->getName();
-#endif
- do {
- // Get the parent directory name.
- DirName = llvm::sys::path::parent_path(DirName);
- if (DirName.empty())
- break;
-
- // Determine whether this directory exists.
- Dir = FileMgr.getDirectory(DirName);
- if (!Dir)
- break;
-
- // If this is a framework directory, then we're a subframework of this
- // framework.
- if (llvm::sys::path::extension(DirName) == ".framework") {
- SubmodulePath.push_back(llvm::sys::path::stem(DirName));
- TopFrameworkDir = Dir;
- }
- } while (true);
+ const DirectoryEntry *TopFrameworkDir
+ = ::getTopFrameworkDir(FileMgr, Dir->getName(), SubmodulePath);
- // Determine whether we're allowed to infer a module map.
- bool canInfer = false;
- if (llvm::sys::path::has_parent_path(TopFrameworkDir->getName())) {
- // Figure out the parent path.
- StringRef Parent = llvm::sys::path::parent_path(TopFrameworkDir->getName());
- if (const DirectoryEntry *ParentDir = FileMgr.getDirectory(Parent)) {
- // If there's a module map file in the parent directory, it can
- // explicitly allow us to infer framework modules.
- switch (loadModuleMapFile(ParentDir)) {
- case LMM_AlreadyLoaded:
- case LMM_NewlyLoaded: {
- StringRef Name = llvm::sys::path::stem(TopFrameworkDir->getName());
- canInfer = ModMap.canInferFrameworkModule(ParentDir, Name, IsSystem);
- break;
- }
- case LMM_InvalidModuleMap:
- case LMM_NoDirectory:
- break;
- }
- }
- }
-
- // If we're not allowed to infer a module map, we're done.
- if (!canInfer)
- return 0;
// Try to infer a module map from the top-level framework directory.
Module *Result = ModMap.inferFrameworkModule(SubmodulePath.back(),
TopFrameworkDir,
IsSystem,
/*Parent=*/0);
+ if (!Result)
+ return 0;
// Follow the submodule path to find the requested (sub)framework module
// within the top-level framework module.
@@ -1034,7 +1102,7 @@ HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir) {
return LMM_InvalidModuleMap;
}
-void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) {
+void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) {
Modules.clear();
// Load module maps for each of the header search directories.
@@ -1072,13 +1140,7 @@ void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) {
// Try to load module map files for immediate subdirectories of this search
// directory.
- llvm::error_code EC;
- SmallString<128> DirNative;
- llvm::sys::path::native(SearchDirs[Idx].getDir()->getName(), DirNative);
- for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd;
- Dir != DirEnd && !EC; Dir.increment(EC)) {
- loadModuleMapFile(Dir->path());
- }
+ loadSubdirectoryModuleMaps(SearchDirs[Idx]);
}
// Populate the list of modules.
@@ -1088,3 +1150,18 @@ void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) {
Modules.push_back(M->getValue());
}
}
+
+void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) {
+ if (SearchDir.haveSearchedAllModuleMaps())
+ return;
+
+ llvm::error_code EC;
+ SmallString<128> DirNative;
+ llvm::sys::path::native(SearchDir.getDir()->getName(), DirNative);
+ for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ loadModuleMapFile(Dir->path());
+ }
+
+ SearchDir.setSearchedAllModuleMaps(true);
+}
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a5ba7db..ed4666a 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -25,19 +25,21 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/Lexer.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceManager.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "UnicodeCharSets.h"
#include <cstring>
using namespace clang;
-static void InitCharacterInfo();
-
//===----------------------------------------------------------------------===//
// Token Class Implementation
//===----------------------------------------------------------------------===//
@@ -64,8 +66,6 @@ void Lexer::anchor() { }
void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
const char *BufEnd) {
- InitCharacterInfo();
-
BufferStart = BufStart;
BufferPtr = BufPtr;
BufferEnd = BufEnd;
@@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
InputFile->getBufferEnd());
- // Default to keeping comments if the preprocessor wants them.
- SetCommentRetentionState(PP.getCommentRetentionState());
+ resetExtendedTokenMode();
+}
+
+void Lexer::resetExtendedTokenMode() {
+ assert(PP && "Cannot reset token mode without a preprocessor");
+ if (LangOpts.TraditionalCPP)
+ SetKeepWhitespaceMode(true);
+ else
+ SetCommentRetentionState(PP->getCommentRetentionState());
}
/// Lexer constructor - Create a new raw lexer object. This object is only
@@ -233,16 +240,67 @@ void Lexer::Stringify(SmallVectorImpl<char> &Str) {
// Token Spelling
//===----------------------------------------------------------------------===//
+/// \brief Slow case of getSpelling. Extract the characters comprising the
+/// spelling of this token from the provided input buffer.
+static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
+ const LangOptions &LangOpts, char *Spelling) {
+ assert(Tok.needsCleaning() && "getSpellingSlow called on simple token");
+
+ size_t Length = 0;
+ const char *BufEnd = BufPtr + Tok.getLength();
+
+ if (Tok.is(tok::string_literal)) {
+ // Munch the encoding-prefix and opening double-quote.
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+
+ if (Spelling[Length - 1] == '"')
+ break;
+ }
+
+ // Raw string literals need special handling; trigraph expansion and line
+ // splicing do not occur within their d-char-sequence nor within their
+ // r-char-sequence.
+ if (Length >= 2 &&
+ Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+ // Search backwards from the end of the token to find the matching closing
+ // quote.
+ const char *RawEnd = BufEnd;
+ do --RawEnd; while (*RawEnd != '"');
+ size_t RawLength = RawEnd - BufPtr + 1;
+
+ // Everything between the quotes is included verbatim in the spelling.
+ memcpy(Spelling + Length, BufPtr, RawLength);
+ Length += RawLength;
+ BufPtr += RawLength;
+
+ // The rest of the token is lexed normally.
+ }
+ }
+
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+ }
+
+ assert(Length < Tok.getLength() &&
+ "NeedsCleaning flag set on token that didn't need cleaning!");
+ return Length;
+}
+
/// getSpelling() - Return the 'spelling' of this token. The spelling of a
/// token are the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding. In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
StringRef Lexer::getSpelling(SourceLocation loc,
- SmallVectorImpl<char> &buffer,
- const SourceManager &SM,
- const LangOptions &options,
- bool *invalid) {
+ SmallVectorImpl<char> &buffer,
+ const SourceManager &SM,
+ const LangOptions &options,
+ bool *invalid) {
// Break down the source location.
std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
@@ -267,17 +325,10 @@ StringRef Lexer::getSpelling(SourceLocation loc,
// Common case: no need for cleaning.
if (!token.needsCleaning())
return StringRef(tokenBegin, length);
-
- // Hard case, we need to relex the characters into the string.
- buffer.clear();
- buffer.reserve(length);
-
- for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) {
- unsigned charSize;
- buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options));
- ti += charSize;
- }
+ // Hard case, we need to relex the characters into the string.
+ buffer.resize(length);
+ buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data()));
return StringRef(buffer.data(), buffer.size());
}
@@ -289,31 +340,22 @@ StringRef Lexer::getSpelling(SourceLocation loc,
std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-
- // If this token contains nothing interesting, return it directly.
+
bool CharDataInvalid = false;
- const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
+ const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
&CharDataInvalid);
if (Invalid)
*Invalid = CharDataInvalid;
if (CharDataInvalid)
return std::string();
-
+
+ // If this token contains nothing interesting, return it directly.
if (!Tok.needsCleaning())
- return std::string(TokStart, TokStart+Tok.getLength());
-
+ return std::string(TokStart, TokStart + Tok.getLength());
+
std::string Result;
- Result.reserve(Tok.getLength());
-
- // Otherwise, hard case, relex the characters into the string.
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts));
- Ptr += CharSize;
- }
- assert(Result.size() != unsigned(Tok.getLength()) &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
+ Result.resize(Tok.getLength());
+ Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
return Result;
}
@@ -336,10 +378,12 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
if (Tok.is(tok::raw_identifier))
TokStart = Tok.getRawIdentifierData();
- else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
- // Just return the string from the identifier table, which is very quick.
- Buffer = II->getNameStart();
- return II->getLength();
+ else if (!Tok.hasUCN()) {
+ if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+ // Just return the string from the identifier table, which is very quick.
+ Buffer = II->getNameStart();
+ return II->getLength();
+ }
}
// NOTE: this can be checked even after testing for an IdentifierInfo.
@@ -365,23 +409,10 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
}
// Otherwise, hard case, relex the characters into the string.
- char *OutBuf = const_cast<char*>(Buffer);
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts);
- Ptr += CharSize;
- }
- assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
-
- return OutBuf-Buffer;
+ return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
}
-
-static bool isWhitespace(unsigned char c);
-
/// MeasureTokenLength - Relex the token at the specified location and return
/// its length in bytes in the input file. If the token needs cleaning (e.g.
/// includes a trigraph or an escaped newline) then this count includes bytes
@@ -389,6 +420,17 @@ static bool isWhitespace(unsigned char c);
unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts) {
+ Token TheTok;
+ if (getRawToken(Loc, TheTok, SM, LangOpts))
+ return 0;
+ return TheTok.getLength();
+}
+
+/// \brief Relex the token at the specified location.
+/// \returns true if there was a failure, false on success.
+bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
// TODO: this could be special cased for common tokens like identifiers, ')',
// etc to make this faster, if it mattered. Just look at StrData[0] to handle
// all obviously single-char tokens. This could use
@@ -402,20 +444,19 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
bool Invalid = false;
StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
if (Invalid)
- return 0;
+ return true;
const char *StrData = Buffer.data()+LocInfo.second;
if (isWhitespace(StrData[0]))
- return 0;
+ return true;
// Create a lexer starting at the beginning of this token.
Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts,
Buffer.begin(), StrData, Buffer.end());
TheLexer.SetCommentRetentionState(true);
- Token TheTok;
- TheLexer.LexFromRawLexer(TheTok);
- return TheTok.getLength();
+ TheLexer.LexFromRawLexer(Result);
+ return false;
}
static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
@@ -969,163 +1010,8 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
}
-//===----------------------------------------------------------------------===//
-// Character information.
-//===----------------------------------------------------------------------===//
-
-enum {
- CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0'
- CHAR_VERT_WS = 0x02, // '\r', '\n'
- CHAR_LETTER = 0x04, // a-z,A-Z
- CHAR_NUMBER = 0x08, // 0-9
- CHAR_UNDER = 0x10, // _
- CHAR_PERIOD = 0x20, // .
- CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"'
-};
-
-// Statically initialize CharInfo table based on ASCII character set
-// Reference: FreeBSD 7.2 /usr/share/misc/ascii
-static const unsigned char CharInfo[256] =
-{
-// 0 NUL 1 SOH 2 STX 3 ETX
-// 4 EOT 5 ENQ 6 ACK 7 BEL
- 0 , 0 , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
-// 8 BS 9 HT 10 NL 11 VT
-//12 NP 13 CR 14 SO 15 SI
- 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS,
- CHAR_HORZ_WS, CHAR_VERT_WS, 0 , 0 ,
-//16 DLE 17 DC1 18 DC2 19 DC3
-//20 DC4 21 NAK 22 SYN 23 ETB
- 0 , 0 , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
-//24 CAN 25 EM 26 SUB 27 ESC
-//28 FS 29 GS 30 RS 31 US
- 0 , 0 , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
-//32 SP 33 ! 34 " 35 #
-//36 $ 37 % 38 & 39 '
- CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
- 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
-//40 ( 41 ) 42 * 43 +
-//44 , 45 - 46 . 47 /
- 0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL ,
- CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
-//48 0 49 1 50 2 51 3
-//52 4 53 5 54 6 55 7
- CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
- CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
-//56 8 57 9 58 : 59 ;
-//60 < 61 = 62 > 63 ?
- CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
- CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
-//64 @ 65 A 66 B 67 C
-//68 D 69 E 70 F 71 G
- 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//72 H 73 I 74 J 75 K
-//76 L 77 M 78 N 79 O
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//80 P 81 Q 82 R 83 S
-//84 T 85 U 86 V 87 W
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//88 X 89 Y 90 Z 91 [
-//92 \ 93 ] 94 ^ 95 _
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
- 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,
-//96 ` 97 a 98 b 99 c
-//100 d 101 e 102 f 103 g
- 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//104 h 105 i 106 j 107 k
-//108 l 109 m 110 n 111 o
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//112 p 113 q 114 r 115 s
-//116 t 117 u 118 v 119 w
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
-//120 x 121 y 122 z 123 {
-//124 | 125 } 126 ~ 127 DEL
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
- CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
-};
-
-static void InitCharacterInfo() {
- static bool isInited = false;
- if (isInited) return;
- // check the statically-initialized CharInfo table
- assert(CHAR_HORZ_WS == CharInfo[(int)' ']);
- assert(CHAR_HORZ_WS == CharInfo[(int)'\t']);
- assert(CHAR_HORZ_WS == CharInfo[(int)'\f']);
- assert(CHAR_HORZ_WS == CharInfo[(int)'\v']);
- assert(CHAR_VERT_WS == CharInfo[(int)'\n']);
- assert(CHAR_VERT_WS == CharInfo[(int)'\r']);
- assert(CHAR_UNDER == CharInfo[(int)'_']);
- assert(CHAR_PERIOD == CharInfo[(int)'.']);
- for (unsigned i = 'a'; i <= 'z'; ++i) {
- assert(CHAR_LETTER == CharInfo[i]);
- assert(CHAR_LETTER == CharInfo[i+'A'-'a']);
- }
- for (unsigned i = '0'; i <= '9'; ++i)
- assert(CHAR_NUMBER == CharInfo[i]);
-
- isInited = true;
-}
-
-
-/// isIdentifierHead - Return true if this is the first character of an
-/// identifier, which is [a-zA-Z_].
-static inline bool isIdentifierHead(unsigned char c) {
- return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
-}
-
-/// isIdentifierBody - Return true if this is the body character of an
-/// identifier, which is [a-zA-Z0-9_].
-static inline bool isIdentifierBody(unsigned char c) {
- return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false;
-}
-
-/// isHorizontalWhitespace - Return true if this character is horizontal
-/// whitespace: ' ', '\\t', '\\f', '\\v'. Note that this returns false for
-/// '\\0'.
-static inline bool isHorizontalWhitespace(unsigned char c) {
- return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
-}
-
-/// isVerticalWhitespace - Return true if this character is vertical
-/// whitespace: '\\n', '\\r'. Note that this returns false for '\\0'.
-static inline bool isVerticalWhitespace(unsigned char c) {
- return (CharInfo[c] & CHAR_VERT_WS) ? true : false;
-}
-
-/// isWhitespace - Return true if this character is horizontal or vertical
-/// whitespace: ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. Note that this returns
-/// false for '\\0'.
-static inline bool isWhitespace(unsigned char c) {
- return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false;
-}
-
-/// isNumberBody - Return true if this is the body character of an
-/// preprocessing number, which is [a-zA-Z0-9_.].
-static inline bool isNumberBody(unsigned char c) {
- return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ?
- true : false;
-}
-
-/// isRawStringDelimBody - Return true if this is the body character of a
-/// raw string delimiter.
-static inline bool isRawStringDelimBody(unsigned char c) {
- return (CharInfo[c] &
- (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
- true : false;
-}
-
-// Allow external clients to make use of CharInfo.
bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {
- return isIdentifierBody(c) || (c == '$' && LangOpts.DollarIdents);
+ return isIdentifierBody(c, LangOpts.DollarIdents);
}
@@ -1293,7 +1179,7 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
// Try to load the file buffer.
bool InvalidTemp = false;
- llvm::StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
+ StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
if (InvalidTemp)
return SourceLocation();
@@ -1319,8 +1205,15 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
C = *(++TokenEnd);
NumWhitespaceChars++;
}
- if (isVerticalWhitespace(C))
+
+ // Skip \r, \n, \r\n, or \n\r
+ if (C == '\n' || C == '\r') {
+ char PrevC = C;
+ C = *(++TokenEnd);
NumWhitespaceChars++;
+ if ((C == '\n' || C == '\r') && C != PrevC)
+ NumWhitespaceChars++;
+ }
}
return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars);
@@ -1334,7 +1227,6 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
/// 2. If this is an escaped newline (potentially with whitespace between
/// the backslash and newline), implicitly skip the newline and return
/// the char after it.
-/// 3. If this is a UCN, return it. FIXME: C++ UCN's?
///
/// This handles the slow/uncommon case of the getCharAndSize method. Here we
/// know that we can accumulate into Size, and that we have already incremented
@@ -1467,6 +1359,62 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
IsAtStartOfLine = StartOfLine;
}
+static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
+ if (LangOpts.CPlusPlus11 || LangOpts.C11)
+ return isCharInSet(C, C11AllowedIDChars);
+ else if (LangOpts.CPlusPlus)
+ return isCharInSet(C, CXX03AllowedIDChars);
+ else
+ return isCharInSet(C, C99AllowedIDChars);
+}
+
+static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) {
+ assert(isAllowedIDChar(C, LangOpts));
+ if (LangOpts.CPlusPlus11 || LangOpts.C11)
+ return !isCharInSet(C, C11DisallowedInitialIDChars);
+ else if (LangOpts.CPlusPlus)
+ return true;
+ else
+ return !isCharInSet(C, C99DisallowedInitialIDChars);
+}
+
+static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin,
+ const char *End) {
+ return CharSourceRange::getCharRange(L.getSourceLocation(Begin),
+ L.getSourceLocation(End));
+}
+
+static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
+ CharSourceRange Range, bool IsFirst) {
+ // Check C99 compatibility.
+ if (Diags.getDiagnosticLevel(diag::warn_c99_compat_unicode_id,
+ Range.getBegin()) > DiagnosticsEngine::Ignored) {
+ enum {
+ CannotAppearInIdentifier = 0,
+ CannotStartIdentifier
+ };
+
+ if (!isCharInSet(C, C99AllowedIDChars)) {
+ Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
+ << Range
+ << CannotAppearInIdentifier;
+ } else if (IsFirst && isCharInSet(C, C99DisallowedInitialIDChars)) {
+ Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
+ << Range
+ << CannotStartIdentifier;
+ }
+ }
+
+ // Check C++98 compatibility.
+ if (Diags.getDiagnosticLevel(diag::warn_cxx98_compat_unicode_id,
+ Range.getBegin()) > DiagnosticsEngine::Ignored) {
+ if (!isCharInSet(C, CXX03AllowedIDChars)) {
+ Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id)
+ << Range;
+ }
+ }
+ }
+
void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
// Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
unsigned Size;
@@ -1478,11 +1426,11 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
// Fast path, no $,\,? in identifier found. '\' might be an escaped newline
// or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
- // FIXME: UCNs.
//
- // TODO: Could merge these checks into a CharInfo flag to make the comparison
- // cheaper
- if (C != '\\' && C != '?' && (C != '$' || !LangOpts.DollarIdents)) {
+ // TODO: Could merge these checks into an InfoTable flag to make the
+ // comparison cheaper
+ if (isASCII(C) && C != '\\' && C != '?' &&
+ (C != '$' || !LangOpts.DollarIdents)) {
FinishIdentifier:
const char *IdStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
@@ -1519,8 +1467,51 @@ FinishIdentifier:
CurPtr = ConsumeChar(CurPtr, Size, Result);
C = getCharAndSize(CurPtr, Size);
continue;
- } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
- // Found end of identifier.
+
+ } else if (C == '\\') {
+ const char *UCNPtr = CurPtr + Size;
+ uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0);
+ if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
+ goto FinishIdentifier;
+
+ if (!isLexingRawMode()) {
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+ makeCharRange(*this, CurPtr, UCNPtr),
+ /*IsFirst=*/false);
+ }
+
+ Result.setFlag(Token::HasUCN);
+ if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
+ (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
+ CurPtr = UCNPtr;
+ else
+ while (CurPtr != UCNPtr)
+ (void)getAndAdvanceChar(CurPtr, Result);
+
+ C = getCharAndSize(CurPtr, Size);
+ continue;
+ } else if (!isASCII(C)) {
+ const char *UnicodePtr = CurPtr;
+ UTF32 CodePoint;
+ ConversionResult Result =
+ llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
+ (const UTF8 *)BufferEnd,
+ &CodePoint,
+ strictConversion);
+ if (Result != conversionOK ||
+ !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
+ goto FinishIdentifier;
+
+ if (!isLexingRawMode()) {
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+ makeCharRange(*this, CurPtr, UnicodePtr),
+ /*IsFirst=*/false);
+ }
+
+ CurPtr = UnicodePtr;
+ C = getCharAndSize(CurPtr, Size);
+ continue;
+ } else if (!isIdentifierBody(C)) {
goto FinishIdentifier;
}
@@ -1528,7 +1519,7 @@ FinishIdentifier:
CurPtr = ConsumeChar(CurPtr, Size, Result);
C = getCharAndSize(CurPtr, Size);
- while (isIdentifierBody(C)) { // FIXME: UCNs.
+ while (isIdentifierBody(C)) {
CurPtr = ConsumeChar(CurPtr, Size, Result);
C = getCharAndSize(CurPtr, Size);
}
@@ -1553,7 +1544,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
char PrevCh = 0;
- while (isNumberBody(C)) { // FIXME: UCNs.
+ while (isPreprocessingNumberBody(C)) { // FIXME: UCNs in ud-suffix.
CurPtr = ConsumeChar(CurPtr, Size, Result);
PrevCh = C;
C = getCharAndSize(CurPtr, Size);
@@ -1598,7 +1589,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
if (isIdentifierHead(C)) {
- if (!getLangOpts().CPlusPlus0x) {
+ if (!getLangOpts().CPlusPlus11) {
if (!isLexingRawMode())
Diag(CurPtr,
C == '_' ? diag::warn_cxx11_compat_user_defined_literal
@@ -1639,7 +1630,9 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
(Kind == tok::utf8_string_literal ||
Kind == tok::utf16_string_literal ||
Kind == tok::utf32_string_literal))
- Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);
+ Diag(BufferPtr, getLangOpts().CPlusPlus
+ ? diag::warn_cxx98_compat_unicode_literal
+ : diag::warn_c99_compat_unicode_literal);
char C = getAndAdvanceChar(CurPtr, Result);
while (C != '"') {
@@ -1804,7 +1797,9 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
if (!isLexingRawMode() &&
(Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant))
- Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);
+ Diag(BufferPtr, getLangOpts().CPlusPlus
+ ? diag::warn_cxx98_compat_unicode_literal
+ : diag::warn_c99_compat_unicode_literal);
char C = getAndAdvanceChar(CurPtr, Result);
if (C == '\'') {
@@ -1860,6 +1855,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
///
bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
// Whitespace - Skip it, then return the token after the whitespace.
+ bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
+
unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently.
while (1) {
// Skip horizontal whitespace very aggressively.
@@ -1867,7 +1864,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
Char = *++CurPtr;
// Otherwise if we have something other than whitespace, we're done.
- if (Char != '\n' && Char != '\r')
+ if (!isVerticalWhitespace(Char))
break;
if (ParsingPreprocessorDirective) {
@@ -1877,24 +1874,27 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
}
// ok, but handle newline.
- // The returned token is at the start of the line.
- Result.setFlag(Token::StartOfLine);
- // No leading whitespace seen so far.
- Result.clearFlag(Token::LeadingSpace);
+ SawNewline = true;
Char = *++CurPtr;
}
- // If this isn't immediately after a newline, there is leading space.
- char PrevChar = CurPtr[-1];
- if (PrevChar != '\n' && PrevChar != '\r')
- Result.setFlag(Token::LeadingSpace);
-
// If the client wants us to return whitespace, return it now.
if (isKeepWhitespaceMode()) {
FormTokenWithChars(Result, CurPtr, tok::unknown);
+ if (SawNewline)
+ IsAtStartOfLine = true;
+ // FIXME: The next token will not have LeadingSpace set.
return true;
}
+ // If this isn't immediately after a newline, there is leading space.
+ char PrevChar = CurPtr[-1];
+ bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
+
+ Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+ if (SawNewline)
+ Result.setFlag(Token::StartOfLine);
+
BufferPtr = CurPtr;
return false;
}
@@ -2285,7 +2285,6 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
// efficiently now. This is safe even in KeepWhitespaceMode because we would
// have already returned above with the comment as a token.
if (isHorizontalWhitespace(*CurPtr)) {
- Result.setFlag(Token::LeadingSpace);
SkipWhitespace(Result, CurPtr+1);
return false;
}
@@ -2367,7 +2366,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
FormTokenWithChars(Result, CurPtr, tok::eod);
// Restore comment saving mode, in case it was disabled for directive.
- SetCommentRetentionState(PP->getCommentRetentionState());
+ resetExtendedTokenMode();
return true; // Have a token.
}
@@ -2393,7 +2392,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
// C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
// a pedwarn.
if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r'))
- Diag(BufferEnd, LangOpts.CPlusPlus0x ? // C++11 [lex.phases] 2.2 p2
+ Diag(BufferEnd, LangOpts.CPlusPlus11 ? // C++11 [lex.phases] 2.2 p2
diag::warn_cxx98_compat_no_newline_eof : diag::ext_no_newline_eof)
<< FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n");
@@ -2550,6 +2549,164 @@ bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
return false;
}
+uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
+ Token *Result) {
+ unsigned CharSize;
+ char Kind = getCharAndSize(StartPtr, CharSize);
+
+ unsigned NumHexDigits;
+ if (Kind == 'u')
+ NumHexDigits = 4;
+ else if (Kind == 'U')
+ NumHexDigits = 8;
+ else
+ return 0;
+
+ if (!LangOpts.CPlusPlus && !LangOpts.C99) {
+ if (Result && !isLexingRawMode())
+ Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
+ return 0;
+ }
+
+ const char *CurPtr = StartPtr + CharSize;
+ const char *KindLoc = &CurPtr[-1];
+
+ uint32_t CodePoint = 0;
+ for (unsigned i = 0; i < NumHexDigits; ++i) {
+ char C = getCharAndSize(CurPtr, CharSize);
+
+ unsigned Value = llvm::hexDigitValue(C);
+ if (Value == -1U) {
+ if (Result && !isLexingRawMode()) {
+ if (i == 0) {
+ Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
+ << StringRef(KindLoc, 1);
+ } else {
+ Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
+
+ // If the user wrote \U1234, suggest a fixit to \u.
+ if (i == 4 && NumHexDigits == 8) {
+ CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
+ Diag(KindLoc, diag::note_ucn_four_not_eight)
+ << FixItHint::CreateReplacement(URange, "u");
+ }
+ }
+ }
+
+ return 0;
+ }
+
+ CodePoint <<= 4;
+ CodePoint += Value;
+
+ CurPtr += CharSize;
+ }
+
+ if (Result) {
+ Result->setFlag(Token::HasUCN);
+ if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2)
+ StartPtr = CurPtr;
+ else
+ while (StartPtr != CurPtr)
+ (void)getAndAdvanceChar(StartPtr, *Result);
+ } else {
+ StartPtr = CurPtr;
+ }
+
+ // C99 6.4.3p2: A universal character name shall not specify a character whose
+ // short identifier is less than 00A0 other than 0024 ($), 0040 (@), or
+ // 0060 (`), nor one in the range D800 through DFFF inclusive.)
+ // C++11 [lex.charset]p2: If the hexadecimal value for a
+ // universal-character-name corresponds to a surrogate code point (in the
+ // range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally,
+ // if the hexadecimal value for a universal-character-name outside the
+ // c-char-sequence, s-char-sequence, or r-char-sequence of a character or
+ // string literal corresponds to a control character (in either of the
+ // ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the
+ // basic source character set, the program is ill-formed.
+ if (CodePoint < 0xA0) {
+ if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
+ return CodePoint;
+
+ // We don't use isLexingRawMode() here because we need to warn about bad
+ // UCNs even when skipping preprocessing tokens in a #if block.
+ if (Result && PP) {
+ if (CodePoint < 0x20 || CodePoint >= 0x7F)
+ Diag(BufferPtr, diag::err_ucn_control_character);
+ else {
+ char C = static_cast<char>(CodePoint);
+ Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
+ }
+ }
+
+ return 0;
+
+ } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
+ // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't.
+ // We don't use isLexingRawMode() here because we need to diagnose bad
+ // UCNs even when skipping preprocessing tokens in a #if block.
+ if (Result && PP) {
+ if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
+ Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
+ else
+ Diag(BufferPtr, diag::err_ucn_escape_invalid);
+ }
+ return 0;
+ }
+
+ return CodePoint;
+}
+
+void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+ if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+ isCharInSet(C, UnicodeWhitespaceChars)) {
+ Diag(BufferPtr, diag::ext_unicode_whitespace)
+ << makeCharRange(*this, BufferPtr, CurPtr);
+
+ Result.setFlag(Token::LeadingSpace);
+ if (SkipWhitespace(Result, CurPtr))
+ return; // KeepWhitespaceMode
+
+ return LexTokenInternal(Result);
+ }
+
+ if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) {
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput()) {
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C,
+ makeCharRange(*this, BufferPtr, CurPtr),
+ /*IsFirst=*/true);
+ }
+
+ MIOpt.ReadToken();
+ return LexIdentifier(Result, CurPtr);
+ }
+
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput() &&
+ !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) {
+ // Non-ASCII characters tend to creep into source code unintentionally.
+ // Instead of letting the parser complain about the unknown token,
+ // just drop the character.
+ // Note that we can /only/ do this when the non-ASCII character is actually
+ // spelled as Unicode, not written as a UCN. The standard requires that
+ // we not throw away any possible preprocessor tokens, but there's a
+ // loophole in the mapping of Unicode characters to basic character set
+ // characters that allows us to map these particular characters to, say,
+ // whitespace.
+ Diag(BufferPtr, diag::err_non_ascii)
+ << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr));
+
+ BufferPtr = CurPtr;
+ return LexTokenInternal(Result);
+ }
+
+ // Otherwise, we have an explicit UCN or a character that's unlikely to show
+ // up by accident.
+ MIOpt.ReadToken();
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+}
+
/// LexTokenInternal - This implements a simple C family lexer. It is an
/// extremely performance critical piece of code. This assumes that the buffer
@@ -2576,6 +2733,7 @@ LexNextToken:
// whitespace.
if (isKeepWhitespaceMode()) {
FormTokenWithChars(Result, CurPtr, tok::unknown);
+ // FIXME: The next token will not have LeadingSpace set.
return;
}
@@ -2643,7 +2801,7 @@ LexNextToken:
// Restore comment saving mode, in case it was disabled for directive.
if (PP)
- SetCommentRetentionState(PP->getCommentRetentionState());
+ resetExtendedTokenMode();
// Since we consumed a newline, we are back at the start of a line.
IsAtStartOfLine = true;
@@ -2651,8 +2809,7 @@ LexNextToken:
Kind = tok::eod;
break;
}
- // The returned token is at the start of the line.
- Result.setFlag(Token::StartOfLine);
+
// No leading whitespace seen so far.
Result.clearFlag(Token::LeadingSpace);
@@ -2695,11 +2852,11 @@ LexNextToken:
MIOpt.ReadToken();
return LexNumericConstant(Result, CurPtr);
- case 'u': // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal
+ case 'u': // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- if (LangOpts.CPlusPlus0x) {
+ if (LangOpts.CPlusPlus11 || LangOpts.C11) {
Char = getCharAndSize(CurPtr, SizeTmp);
// UTF-16 string literal
@@ -2713,7 +2870,8 @@ LexNextToken:
tok::utf16_char_constant);
// UTF-16 raw string literal
- if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ if (Char == 'R' && LangOpts.CPlusPlus11 &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
@@ -2729,7 +2887,7 @@ LexNextToken:
SizeTmp2, Result),
tok::utf8_string_literal);
- if (Char2 == 'R') {
+ if (Char2 == 'R' && LangOpts.CPlusPlus11) {
unsigned SizeTmp3;
char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
// UTF-8 raw string literal
@@ -2747,11 +2905,11 @@ LexNextToken:
// treat u like the start of an identifier.
return LexIdentifier(Result, CurPtr);
- case 'U': // Identifier (Uber) or C++0x UTF-32 string literal
+ case 'U': // Identifier (Uber) or C11/C++11 UTF-32 string literal
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- if (LangOpts.CPlusPlus0x) {
+ if (LangOpts.CPlusPlus11 || LangOpts.C11) {
Char = getCharAndSize(CurPtr, SizeTmp);
// UTF-32 string literal
@@ -2765,7 +2923,8 @@ LexNextToken:
tok::utf32_char_constant);
// UTF-32 raw string literal
- if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ if (Char == 'R' && LangOpts.CPlusPlus11 &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
@@ -2779,7 +2938,7 @@ LexNextToken:
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- if (LangOpts.CPlusPlus0x) {
+ if (LangOpts.CPlusPlus11) {
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '"')
@@ -2802,7 +2961,7 @@ LexNextToken:
tok::wide_string_literal);
// Wide raw string literal.
- if (LangOpts.CPlusPlus0x && Char == 'R' &&
+ if (LangOpts.CPlusPlus11 && Char == 'R' &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -2968,10 +3127,13 @@ LexNextToken:
// this as "foo / bar" and langauges with Line comments would lex it as
// "foo". Check to see if the character after the second slash is a '*'.
// If so, we will lex that as a "/" instead of the start of a comment.
- // However, we never do this in -traditional-cpp mode.
- if ((LangOpts.LineComment ||
- getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') &&
- !LangOpts.TraditionalCPP) {
+ // However, we never do this if we are just preprocessing.
+ bool TreatAsComment = LangOpts.LineComment && !LangOpts.TraditionalCPP;
+ if (!TreatAsComment)
+ if (!(PP && PP->isPreprocessedOutput()))
+ TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
+
+ if (TreatAsComment) {
if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
return; // There is a token to return.
@@ -3020,26 +3182,8 @@ LexNextToken:
// it's actually the start of a preprocessing directive. Callback to
// the preprocessor to handle it.
// FIXME: -fpreprocessed mode??
- if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
- FormTokenWithChars(Result, CurPtr, tok::hash);
- PP->HandleDirective(Result);
-
- // As an optimization, if the preprocessor didn't switch lexers, tail
- // recurse.
- if (PP->isCurrentLexer(this)) {
- // Start a new token. If this is a #include or something, the PP may
- // want us starting at the beginning of the line again. If so, set
- // the StartOfLine flag and clear LeadingSpace.
- if (IsAtStartOfLine) {
- Result.setFlag(Token::StartOfLine);
- Result.clearFlag(Token::LeadingSpace);
- IsAtStartOfLine = false;
- }
- goto LexNextToken; // GCC isn't tail call eliminating.
- }
-
- return PP->Lex(Result);
- }
+ if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+ goto HandleDirective;
Kind = tok::hash;
}
@@ -3077,7 +3221,7 @@ LexNextToken:
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessequal;
} else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '['
- if (LangOpts.CPlusPlus0x &&
+ if (LangOpts.CPlusPlus11 &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
// C++0x [lex.pptoken]p3:
// Otherwise, if the next three characters are <:: and the subsequent
@@ -3204,25 +3348,8 @@ LexNextToken:
// it's actually the start of a preprocessing directive. Callback to
// the preprocessor to handle it.
// FIXME: -fpreprocessed mode??
- if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
- FormTokenWithChars(Result, CurPtr, tok::hash);
- PP->HandleDirective(Result);
-
- // As an optimization, if the preprocessor didn't switch lexers, tail
- // recurse.
- if (PP->isCurrentLexer(this)) {
- // Start a new token. If this is a #include or something, the PP may
- // want us starting at the beginning of the line again. If so, set
- // the StartOfLine flag and clear LeadingSpace.
- if (IsAtStartOfLine) {
- Result.setFlag(Token::StartOfLine);
- Result.clearFlag(Token::LeadingSpace);
- IsAtStartOfLine = false;
- }
- goto LexNextToken; // GCC isn't tail call eliminating.
- }
- return PP->Lex(Result);
- }
+ if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+ goto HandleDirective;
Kind = tok::hash;
}
@@ -3236,12 +3363,48 @@ LexNextToken:
Kind = tok::unknown;
break;
+ // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
case '\\':
- // FIXME: UCN's.
- // FALL THROUGH.
- default:
+ if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result))
+ return LexUnicode(Result, CodePoint, CurPtr);
+
Kind = tok::unknown;
break;
+
+ default: {
+ if (isASCII(Char)) {
+ Kind = tok::unknown;
+ break;
+ }
+
+ UTF32 CodePoint;
+
+ // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
+ // an escaped newline.
+ --CurPtr;
+ ConversionResult Status =
+ llvm::convertUTF8Sequence((const UTF8 **)&CurPtr,
+ (const UTF8 *)BufferEnd,
+ &CodePoint,
+ strictConversion);
+ if (Status == conversionOK)
+ return LexUnicode(Result, CodePoint, CurPtr);
+
+ if (isLexingRawMode() || ParsingPreprocessorDirective ||
+ PP->isPreprocessedOutput()) {
+ ++CurPtr;
+ Kind = tok::unknown;
+ break;
+ }
+
+ // Non-ASCII characters tend to creep into source code unintentionally.
+ // Instead of letting the parser complain about the unknown token,
+ // just diagnose the invalid UTF-8, then drop the character.
+ Diag(CurPtr, diag::err_invalid_utf8);
+
+ BufferPtr = CurPtr+1;
+ goto LexNextToken;
+ }
}
// Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -3249,4 +3412,26 @@ LexNextToken:
// Update the location of token as well as BufferPtr.
FormTokenWithChars(Result, CurPtr, Kind);
+ return;
+
+HandleDirective:
+ // We parsed a # character and it's the start of a preprocessing directive.
+
+ FormTokenWithChars(Result, CurPtr, tok::hash);
+ PP->HandleDirective(Result);
+
+ // As an optimization, if the preprocessor didn't switch lexers, tail
+ // recurse.
+ if (PP->isCurrentLexer(this)) {
+ // Start a new token. If this is a #include or something, the PP may
+ // want us starting at the beginning of the line again. If so, set
+ // the StartOfLine flag and clear LeadingSpace.
+ if (IsAtStartOfLine) {
+ Result.setFlag(Token::StartOfLine);
+ Result.clearFlag(Token::LeadingSpace);
+ IsAtStartOfLine = false;
+ }
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ }
+ return PP->Lex(Result);
}
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index e30612e..91da822 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -13,22 +13,15 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Basic/ConvertUTF.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
-using namespace clang;
-/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
-/// not valid.
-static int HexDigitValue(char C) {
- if (C >= '0' && C <= '9') return C-'0';
- if (C >= 'a' && C <= 'f') return C-'a'+10;
- if (C >= 'A' && C <= 'F') return C-'A'+10;
- return -1;
-}
+using namespace clang;
static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
switch (kind) {
@@ -136,10 +129,10 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
break;
case 'x': { // Hex escape.
ResultChar = 0;
- if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+ if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
- diag::err_hex_escape_no_digits);
+ diag::err_hex_escape_no_digits) << "x";
HadError = 1;
break;
}
@@ -147,7 +140,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
// Hex escapes are a maximal series of hex digits.
bool Overflow = false;
for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
- int CharVal = HexDigitValue(ThisTokBuf[0]);
+ int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
if (CharVal == -1) break;
// About to shift out a digit?
Overflow |= (ResultChar & 0xF0000000) ? true : false;
@@ -205,7 +198,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
if (Diags == 0)
break;
- if (isgraph(ResultChar))
+ if (isPrintable(ResultChar))
Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
diag::ext_unknown_escape)
<< std::string(1, ResultChar);
@@ -232,16 +225,16 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
// Skip the '\u' char's.
ThisTokBuf += 2;
- if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+ if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
- diag::err_ucn_escape_no_digits);
+ diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
return false;
}
UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
unsigned short UcnLenSave = UcnLen;
for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
- int CharVal = HexDigitValue(ThisTokBuf[0]);
+ int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
if (CharVal == -1) break;
UcnVal <<= 4;
UcnVal |= CharVal;
@@ -267,7 +260,7 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
// characters inside character and string literals
if (UcnVal < 0xa0 &&
(UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
- bool IsError = (!Features.CPlusPlus0x || !in_char_string_literal);
+ bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal);
if (Diags) {
char BasicSCSChar = UcnVal;
if (UcnVal >= 0x20 && UcnVal < 0x7f)
@@ -286,7 +279,7 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
if (!Features.CPlusPlus && !Features.C99 && Diags)
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
- diag::warn_ucn_not_valid_in_c89);
+ diag::warn_ucn_not_valid_in_c89_literal);
return true;
}
@@ -467,8 +460,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
// and FP constants (specifically, the 'pp-number' regex), and assumes that
// the byte at "*end" is both valid and not part of the regex. Because of
// this, it doesn't have to check for 'overscan' in various places.
- assert(!isalnum(*ThisTokEnd) && *ThisTokEnd != '.' && *ThisTokEnd != '_' &&
- "Lexer didn't maximally munch?");
+ assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
s = DigitsBegin = ThisTokBegin;
saw_exponent = false;
@@ -491,7 +483,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
s = SkipDigits(s);
if (s == ThisTokEnd) {
// Done.
- } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
+ } else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
diag::err_invalid_decimal_digit) << StringRef(s, 1);
hadError = true;
@@ -616,7 +608,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
}
if (s != ThisTokEnd) {
- if (PP.getLangOpts().CPlusPlus0x && s == SuffixBegin && *s == '_') {
+ if (PP.getLangOpts().CPlusPlus11 && s == SuffixBegin && *s == '_') {
// We have a ud-suffix! By C++11 [lex.ext]p10, ud-suffixes not starting
// with an '_' are ill-formed.
saw_ud_suffix = true;
@@ -643,7 +635,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
s++;
// Handle a hex number like 0x1234.
- if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
+ if ((*s == 'x' || *s == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
s++;
radix = 16;
DigitsBegin = s;
@@ -702,7 +694,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
s = SkipBinaryDigits(s);
if (s == ThisTokEnd) {
// Done.
- } else if (isxdigit(*s)) {
+ } else if (isHexDigit(*s)) {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
diag::err_invalid_binary_digit) << StringRef(s, 1);
hadError = true;
@@ -722,7 +714,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
// If we have some other non-octal digit that *is* a decimal digit, see if
// this is part of a floating point number like 094.123 or 09e1.
- if (isdigit(*s)) {
+ if (isDigit(*s)) {
const char *EndDecimal = SkipDigits(s);
if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
s = EndDecimal;
@@ -732,7 +724,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
// If we have a hex digit other than 'e' (which denotes a FP exponent) then
// the code is using an incorrect base.
- if (isxdigit(*s) && *s != 'e' && *s != 'E') {
+ if (isHexDigit(*s) && *s != 'e' && *s != 'E') {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
diag::err_invalid_octal_digit) << StringRef(s, 1);
hadError = true;
@@ -792,7 +784,7 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
if (alwaysFitsInto64Bits(radix, NumDigits)) {
uint64_t N = 0;
for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
- N = N * radix + HexDigitValue(*Ptr);
+ N = N * radix + llvm::hexDigitValue(*Ptr);
// This will truncate the value to Val's input width. Simply check
// for overflow by comparing.
@@ -809,7 +801,7 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
bool OverflowOccurred = false;
while (Ptr < SuffixBegin) {
- unsigned C = HexDigitValue(*Ptr++);
+ unsigned C = llvm::hexDigitValue(*Ptr++);
// If this letter is out of bound for this radix, reject it.
assert(C < radix && "NumericLiteralParser ctor should have rejected this");
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index ed8873d..f6e781a 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "MacroArgs.h"
+#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/SaveAndRestore.h"
#include <algorithm>
@@ -23,7 +23,7 @@ using namespace clang;
/// MacroArgs ctor function - This destroys the vector passed in.
MacroArgs *MacroArgs::create(const MacroInfo *MI,
- llvm::ArrayRef<Token> UnexpArgTokens,
+ ArrayRef<Token> UnexpArgTokens,
bool VarargsElided, Preprocessor &PP) {
assert(MI->isFunctionLike() &&
"Can't have args for an object-like macro!");
@@ -215,15 +215,11 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
// If this is a string or character constant, escape the token as specified
// by 6.10.3.2p2.
- if (Tok.is(tok::string_literal) || // "foo"
- Tok.is(tok::wide_string_literal) || // L"foo"
- Tok.is(tok::utf8_string_literal) || // u8"foo"
- Tok.is(tok::utf16_string_literal) || // u"foo"
- Tok.is(tok::utf32_string_literal) || // U"foo"
- Tok.is(tok::char_constant) || // 'x'
- Tok.is(tok::wide_char_constant) || // L'x'.
- Tok.is(tok::utf16_char_constant) || // u'x'.
- Tok.is(tok::utf32_char_constant)) { // U'x'.
+ if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
+ Tok.is(tok::char_constant) || // 'x'
+ Tok.is(tok::wide_char_constant) || // L'x'.
+ Tok.is(tok::utf16_char_constant) || // u'x'.
+ Tok.is(tok::utf32_char_constant)) { // U'x'.
bool Invalid = false;
std::string TokStr = PP.getSpelling(Tok, &Invalid);
if (!Invalid) {
diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h
index cf86d71..1fd295e 100644
--- a/lib/Lex/MacroArgs.h
+++ b/lib/Lex/MacroArgs.h
@@ -14,8 +14,8 @@
#ifndef LLVM_CLANG_MACROARGS_H
#define LLVM_CLANG_MACROARGS_H
+#include "clang/Basic/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
-
#include <vector>
namespace clang {
@@ -60,7 +60,7 @@ public:
/// MacroArgs ctor function - Create a new MacroArgs object with the specified
/// macro and argument info.
static MacroArgs *create(const MacroInfo *MI,
- llvm::ArrayRef<Token> UnexpArgTokens,
+ ArrayRef<Token> UnexpArgTokens,
bool VarargsElided, Preprocessor &PP);
/// destroy - Destroy and deallocate the memory for this object.
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 904f04e..b61ff71 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -17,7 +17,6 @@ using namespace clang;
MacroInfo::MacroInfo(SourceLocation DefLoc)
: Location(DefLoc),
- PreviousDefinition(0),
ArgumentList(0),
NumArguments(0),
IsDefinitionLengthCached(false),
@@ -25,53 +24,12 @@ MacroInfo::MacroInfo(SourceLocation DefLoc)
IsC99Varargs(false),
IsGNUVarargs(false),
IsBuiltinMacro(false),
- IsFromAST(false),
- ChangedAfterLoad(false),
+ HasCommaPasting(false),
IsDisabled(false),
IsUsed(false),
IsAllowRedefinitionsWithoutWarning(false),
IsWarnIfUnused(false),
- IsPublic(true),
- IsHidden(false),
- IsAmbiguous(false) {
-}
-
-MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator)
- : Location(MI.Location),
- EndLocation(MI.EndLocation),
- UndefLocation(MI.UndefLocation),
- PreviousDefinition(0),
- ArgumentList(0),
- NumArguments(0),
- ReplacementTokens(MI.ReplacementTokens),
- DefinitionLength(MI.DefinitionLength),
- IsDefinitionLengthCached(MI.IsDefinitionLengthCached),
- IsFunctionLike(MI.IsFunctionLike),
- IsC99Varargs(MI.IsC99Varargs),
- IsGNUVarargs(MI.IsGNUVarargs),
- IsBuiltinMacro(MI.IsBuiltinMacro),
- IsFromAST(MI.IsFromAST),
- ChangedAfterLoad(MI.ChangedAfterLoad),
- IsDisabled(MI.IsDisabled),
- IsUsed(MI.IsUsed),
- IsAllowRedefinitionsWithoutWarning(MI.IsAllowRedefinitionsWithoutWarning),
- IsWarnIfUnused(MI.IsWarnIfUnused),
- IsPublic(MI.IsPublic),
- IsHidden(MI.IsHidden),
- IsAmbiguous(MI.IsAmbiguous) {
- setArgumentList(MI.ArgumentList, MI.NumArguments, PPAllocator);
-}
-
-const MacroInfo *MacroInfo::findDefinitionAtLoc(SourceLocation L,
- SourceManager &SM) const {
- assert(L.isValid() && "SourceLocation is invalid.");
- for (const MacroInfo *MI = this; MI; MI = MI->PreviousDefinition) {
- if (MI->Location.isInvalid() || // For macros defined on the command line.
- SM.isBeforeInTranslationUnit(MI->Location, L))
- return (MI->UndefLocation.isInvalid() ||
- SM.isBeforeInTranslationUnit(L, MI->UndefLocation)) ? MI : NULL;
- }
- return NULL;
+ FromASTFile(false) {
}
unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const {
@@ -103,11 +61,17 @@ unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const {
return DefinitionLength;
}
-/// isIdenticalTo - Return true if the specified macro definition is equal to
-/// this macro in spelling, arguments, and whitespace. This is used to emit
-/// duplicate definition warnings. This implements the rules in C99 6.10.3.
+/// \brief Return true if the specified macro definition is equal to
+/// this macro in spelling, arguments, and whitespace.
///
-bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
+/// \param Syntactically if true, the macro definitions can be identical even
+/// if they use different identifiers for the function macro parameters.
+/// Otherwise the comparison is lexical and this implements the rules in
+/// C99 6.10.3.
+bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
+ bool Syntactically) const {
+ bool Lexically = !Syntactically;
+
// Check # tokens in replacement, number of args, and various flags all match.
if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
getNumArgs() != Other.getNumArgs() ||
@@ -116,10 +80,12 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
isGNUVarargs() != Other.isGNUVarargs())
return false;
- // Check arguments.
- for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
- I != E; ++I, ++OI)
- if (*I != *OI) return false;
+ if (Lexically) {
+ // Check arguments.
+ for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
+ I != E; ++I, ++OI)
+ if (*I != *OI) return false;
+ }
// Check all the tokens.
for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
@@ -137,7 +103,16 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
// If this is an identifier, it is easy.
if (A.getIdentifierInfo() || B.getIdentifierInfo()) {
- if (A.getIdentifierInfo() != B.getIdentifierInfo())
+ if (A.getIdentifierInfo() == B.getIdentifierInfo())
+ continue;
+ if (Lexically)
+ return false;
+ // With syntactic equivalence the parameter names can be different as long
+ // as they are used in the same place.
+ int AArgNum = getArgumentNum(A.getIdentifierInfo());
+ if (AArgNum == -1)
+ return false;
+ if (AArgNum != Other.getArgumentNum(B.getIdentifierInfo()))
return false;
continue;
}
@@ -149,3 +124,41 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
return true;
}
+
+MacroDirective::DefInfo MacroDirective::getDefinition(bool AllowHidden) {
+ MacroDirective *MD = this;
+ SourceLocation UndefLoc;
+ Optional<bool> isPublic;
+ for (; MD; MD = MD->getPrevious()) {
+ if (!AllowHidden && MD->isHidden())
+ continue;
+
+ if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
+ return DefInfo(DefMD, UndefLoc,
+ !isPublic.hasValue() || isPublic.getValue());
+
+ if (UndefMacroDirective *UndefMD = dyn_cast<UndefMacroDirective>(MD)) {
+ UndefLoc = UndefMD->getLocation();
+ continue;
+ }
+
+ VisibilityMacroDirective *VisMD = cast<VisibilityMacroDirective>(MD);
+ if (!isPublic.hasValue())
+ isPublic = VisMD->isPublic();
+ }
+
+ return DefInfo();
+}
+
+const MacroDirective::DefInfo
+MacroDirective::findDirectiveAtLoc(SourceLocation L, SourceManager &SM) const {
+ assert(L.isValid() && "SourceLocation is invalid.");
+ for (DefInfo Def = getDefinition(); Def; Def = Def.getPreviousDefinition()) {
+ if (Def.getLocation().isInvalid() || // For macros defined on the command line.
+ SM.isBeforeInTranslationUnit(Def.getLocation(), L))
+ return (!Def.isUndefined() ||
+ SM.isBeforeInTranslationUnit(L, Def.getUndefLocation()))
+ ? Def : DefInfo();
+ }
+ return DefInfo();
+}
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index 8a936fa..0c03201 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -12,68 +12,82 @@
//
//===----------------------------------------------------------------------===//
#include "clang/Lex/ModuleMap.h"
-#include "clang/Lex/Lexer.h"
-#include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/PathV2.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
#include <stdlib.h>
+#if defined(LLVM_ON_UNIX)
+#include <limits.h>
+#endif
using namespace clang;
Module::ExportDecl
ModuleMap::resolveExport(Module *Mod,
const Module::UnresolvedExportDecl &Unresolved,
- bool Complain) {
+ bool Complain) const {
// We may have just a wildcard.
if (Unresolved.Id.empty()) {
assert(Unresolved.Wildcard && "Invalid unresolved export");
return Module::ExportDecl(0, true);
}
+ // Resolve the module-id.
+ Module *Context = resolveModuleId(Unresolved.Id, Mod, Complain);
+ if (!Context)
+ return Module::ExportDecl();
+
+ return Module::ExportDecl(Context, Unresolved.Wildcard);
+}
+
+Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod,
+ bool Complain) const {
// Find the starting module.
- Module *Context = lookupModuleUnqualified(Unresolved.Id[0].first, Mod);
+ Module *Context = lookupModuleUnqualified(Id[0].first, Mod);
if (!Context) {
if (Complain)
- Diags->Report(Unresolved.Id[0].second,
- diag::err_mmap_missing_module_unqualified)
- << Unresolved.Id[0].first << Mod->getFullModuleName();
-
- return Module::ExportDecl();
+ Diags->Report(Id[0].second, diag::err_mmap_missing_module_unqualified)
+ << Id[0].first << Mod->getFullModuleName();
+
+ return 0;
}
// Dig into the module path.
- for (unsigned I = 1, N = Unresolved.Id.size(); I != N; ++I) {
- Module *Sub = lookupModuleQualified(Unresolved.Id[I].first,
- Context);
+ for (unsigned I = 1, N = Id.size(); I != N; ++I) {
+ Module *Sub = lookupModuleQualified(Id[I].first, Context);
if (!Sub) {
if (Complain)
- Diags->Report(Unresolved.Id[I].second,
- diag::err_mmap_missing_module_qualified)
- << Unresolved.Id[I].first << Context->getFullModuleName()
- << SourceRange(Unresolved.Id[0].second, Unresolved.Id[I-1].second);
-
- return Module::ExportDecl();
+ Diags->Report(Id[I].second, diag::err_mmap_missing_module_qualified)
+ << Id[I].first << Context->getFullModuleName()
+ << SourceRange(Id[0].second, Id[I-1].second);
+
+ return 0;
}
-
+
Context = Sub;
}
-
- return Module::ExportDecl(Context, Unresolved.Wildcard);
+
+ return Context;
}
ModuleMap::ModuleMap(FileManager &FileMgr, const DiagnosticConsumer &DC,
- const LangOptions &LangOpts, const TargetInfo *Target)
- : LangOpts(LangOpts), Target(Target), BuiltinIncludeDir(0)
+ const LangOptions &LangOpts, const TargetInfo *Target,
+ HeaderSearch &HeaderInfo)
+ : LangOpts(LangOpts), Target(Target), HeaderInfo(HeaderInfo),
+ BuiltinIncludeDir(0)
{
IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs);
Diags = IntrusiveRefCntPtr<DiagnosticsEngine>(
@@ -104,26 +118,15 @@ static StringRef sanitizeFilenameAsIdentifier(StringRef Name,
if (Name.empty())
return Name;
- // Check whether the filename is already an identifier; this is the common
- // case.
- bool isIdentifier = true;
- for (unsigned I = 0, N = Name.size(); I != N; ++I) {
- if (isalpha(Name[I]) || Name[I] == '_' || (isdigit(Name[I]) && I > 0))
- continue;
-
- isIdentifier = false;
- break;
- }
-
- if (!isIdentifier) {
+ if (!isValidIdentifier(Name)) {
// If we don't already have something with the form of an identifier,
// create a buffer with the sanitized name.
Buffer.clear();
- if (isdigit(Name[0]))
+ if (isDigit(Name[0]))
Buffer.push_back('_');
Buffer.reserve(Buffer.size() + Name.size());
for (unsigned I = 0, N = Name.size(); I != N; ++I) {
- if (isalnum(Name[I]) || isspace(Name[I]))
+ if (isIdentifierBody(Name[I]))
Buffer.push_back(Name[I]);
else
Buffer.push_back('_');
@@ -157,8 +160,13 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) {
}
const DirectoryEntry *Dir = File->getDir();
- llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs;
- StringRef DirName = Dir->getName();
+ SmallVector<const DirectoryEntry *, 2> SkippedDirs;
+
+ // Note: as an egregious but useful hack we use the real path here, because
+ // frameworks moving from top-level frameworks to embedded frameworks tend
+ // to be symlinked from the top-level location to the embedded location,
+ // and we need to resolve lookups as if we had found the embedded location.
+ StringRef DirName = SourceMgr->getFileManager().getCanonicalName(Dir);
// Keep walking up the directory hierarchy, looking for a directory with
// an umbrella header.
@@ -204,7 +212,7 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) {
llvm::sys::path::stem(File->getName()), NameBuf);
Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
Explicit).first;
- Result->TopHeaders.insert(File);
+ Result->addTopHeader(File);
// If inferred submodules export everything they import, add a
// wildcard to the set of exports.
@@ -241,19 +249,19 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) {
return 0;
}
-bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) {
- HeadersMap::iterator Known = Headers.find(Header);
+bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const {
+ HeadersMap::const_iterator Known = Headers.find(Header);
if (Known != Headers.end())
return !Known->second.isAvailable();
const DirectoryEntry *Dir = Header->getDir();
- llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs;
+ SmallVector<const DirectoryEntry *, 2> SkippedDirs;
StringRef DirName = Dir->getName();
// Keep walking up the directory hierarchy, looking for a directory with
// an umbrella header.
do {
- llvm::DenseMap<const DirectoryEntry *, Module *>::iterator KnownDir
+ llvm::DenseMap<const DirectoryEntry *, Module *>::const_iterator KnownDir
= UmbrellaDirs.find(Dir);
if (KnownDir != UmbrellaDirs.end()) {
Module *Found = KnownDir->second;
@@ -307,15 +315,16 @@ bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) {
return false;
}
-Module *ModuleMap::findModule(StringRef Name) {
- llvm::StringMap<Module *>::iterator Known = Modules.find(Name);
+Module *ModuleMap::findModule(StringRef Name) const {
+ llvm::StringMap<Module *>::const_iterator Known = Modules.find(Name);
if (Known != Modules.end())
return Known->getValue();
return 0;
}
-Module *ModuleMap::lookupModuleUnqualified(StringRef Name, Module *Context) {
+Module *ModuleMap::lookupModuleUnqualified(StringRef Name,
+ Module *Context) const {
for(; Context; Context = Context->Parent) {
if (Module *Sub = lookupModuleQualified(Name, Context))
return Sub;
@@ -324,7 +333,7 @@ Module *ModuleMap::lookupModuleUnqualified(StringRef Name, Module *Context) {
return findModule(Name);
}
-Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) {
+Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
if (!Context)
return findModule(Name);
@@ -347,10 +356,10 @@ ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
}
bool ModuleMap::canInferFrameworkModule(const DirectoryEntry *ParentDir,
- StringRef Name, bool &IsSystem) {
+ StringRef Name, bool &IsSystem) const {
// Check whether we have already looked into the parent directory
// for a module map.
- llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::iterator
+ llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator
inferred = InferredDirectories.find(ParentDir);
if (inferred == InferredDirectories.end())
return false;
@@ -370,6 +379,23 @@ bool ModuleMap::canInferFrameworkModule(const DirectoryEntry *ParentDir,
return canInfer;
}
+/// \brief For a framework module, infer the framework against which we
+/// should link.
+static void inferFrameworkLink(Module *Mod, const DirectoryEntry *FrameworkDir,
+ FileManager &FileMgr) {
+ assert(Mod->IsFramework && "Can only infer linking for framework modules");
+ assert(!Mod->isSubFramework() &&
+ "Can only infer linking for top-level frameworks");
+
+ SmallString<128> LibName;
+ LibName += FrameworkDir->getName();
+ llvm::sys::path::append(LibName, Mod->Name);
+ if (FileMgr.getFile(LibName)) {
+ Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name,
+ /*IsFramework=*/true));
+ }
+}
+
Module *
ModuleMap::inferFrameworkModule(StringRef ModuleName,
const DirectoryEntry *FrameworkDir,
@@ -384,14 +410,23 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
// If the framework has a parent path from which we're allowed to infer
// a framework module, do so.
if (!Parent) {
+ // Determine whether we're allowed to infer a module map.
+
+ // Note: as an egregious but useful hack we use the real path here, because
+ // we might be looking at an embedded framework that symlinks out to a
+ // top-level framework, and we need to infer as if we were naming the
+ // top-level framework.
+ StringRef FrameworkDirName
+ = SourceMgr->getFileManager().getCanonicalName(FrameworkDir);
+
bool canInfer = false;
- if (llvm::sys::path::has_parent_path(FrameworkDir->getName())) {
+ if (llvm::sys::path::has_parent_path(FrameworkDirName)) {
// Figure out the parent path.
- StringRef Parent = llvm::sys::path::parent_path(FrameworkDir->getName());
+ StringRef Parent = llvm::sys::path::parent_path(FrameworkDirName);
if (const DirectoryEntry *ParentDir = FileMgr.getDirectory(Parent)) {
// Check whether we have already looked into the parent directory
// for a module map.
- llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::iterator
+ llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator
inferred = InferredDirectories.find(ParentDir);
if (inferred == InferredDirectories.end()) {
// We haven't looked here before. Load a module map, if there is
@@ -411,7 +446,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
if (inferred->second.InferModules) {
// We're allowed to infer for this directory, but make sure it's okay
// to infer this particular module.
- StringRef Name = llvm::sys::path::filename(FrameworkDir->getName());
+ StringRef Name = llvm::sys::path::stem(FrameworkDirName);
canInfer = std::find(inferred->second.ExcludedModules.begin(),
inferred->second.ExcludedModules.end(),
Name) == inferred->second.ExcludedModules.end();
@@ -480,29 +515,23 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
// check whether it is actually a subdirectory of the parent directory.
// This will not be the case if the 'subframework' is actually a symlink
// out to a top-level framework.
-#ifdef LLVM_ON_UNIX
- char RealSubframeworkDirName[PATH_MAX];
- if (realpath(Dir->path().c_str(), RealSubframeworkDirName)) {
- StringRef SubframeworkDirName = RealSubframeworkDirName;
-
- bool FoundParent = false;
- do {
- // Get the parent directory name.
- SubframeworkDirName
- = llvm::sys::path::parent_path(SubframeworkDirName);
- if (SubframeworkDirName.empty())
- break;
-
- if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) {
- FoundParent = true;
- break;
- }
- } while (true);
+ StringRef SubframeworkDirName = FileMgr.getCanonicalName(SubframeworkDir);
+ bool FoundParent = false;
+ do {
+ // Get the parent directory name.
+ SubframeworkDirName
+ = llvm::sys::path::parent_path(SubframeworkDirName);
+ if (SubframeworkDirName.empty())
+ break;
+
+ if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) {
+ FoundParent = true;
+ break;
+ }
+ } while (true);
- if (!FoundParent)
- continue;
- }
-#endif
+ if (!FoundParent)
+ continue;
// FIXME: Do we want to warn about subframeworks without umbrella headers?
SmallString<32> NameBuf;
@@ -512,6 +541,12 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName,
}
}
+ // If the module is a top-level framework, automatically link against the
+ // framework.
+ if (!Result->isSubFramework()) {
+ inferFrameworkLink(Result, FrameworkDir, FileMgr);
+ }
+
return Result;
}
@@ -528,15 +563,17 @@ void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir) {
void ModuleMap::addHeader(Module *Mod, const FileEntry *Header,
bool Excluded) {
- if (Excluded)
+ if (Excluded) {
Mod->ExcludedHeaders.push_back(Header);
- else
+ } else {
Mod->Headers.push_back(Header);
+ HeaderInfo.MarkFileModuleHeader(Header);
+ }
Headers[Header] = KnownHeader(Mod, Excluded);
}
const FileEntry *
-ModuleMap::getContainingModuleMapFile(Module *Module) {
+ModuleMap::getContainingModuleMapFile(Module *Module) const {
if (Module->DefinitionLoc.isInvalid() || !SourceMgr)
return 0;
@@ -573,6 +610,25 @@ bool ModuleMap::resolveExports(Module *Mod, bool Complain) {
return HadError;
}
+bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) {
+ bool HadError = false;
+ for (unsigned I = 0, N = Mod->UnresolvedConflicts.size(); I != N; ++I) {
+ Module *OtherMod = resolveModuleId(Mod->UnresolvedConflicts[I].Id,
+ Mod, Complain);
+ if (!OtherMod) {
+ HadError = true;
+ continue;
+ }
+
+ Module::Conflict Conflict;
+ Conflict.Other = OtherMod;
+ Conflict.Message = Mod->UnresolvedConflicts[I].Message;
+ Mod->Conflicts.push_back(Conflict);
+ }
+ Mod->UnresolvedConflicts.clear();
+ return HadError;
+}
+
Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) {
if (Loc.isInvalid())
return 0;
@@ -613,6 +669,8 @@ namespace clang {
struct MMToken {
enum TokenKind {
Comma,
+ ConfigMacros,
+ Conflict,
EndOfFile,
HeaderKeyword,
Identifier,
@@ -620,6 +678,7 @@ namespace clang {
ExplicitKeyword,
ExportKeyword,
FrameworkKeyword,
+ LinkKeyword,
ModuleKeyword,
Period,
UmbrellaKeyword,
@@ -656,10 +715,13 @@ namespace clang {
/// \brief The set of attributes that can be attached to a module.
struct Attributes {
- Attributes() : IsSystem() { }
+ Attributes() : IsSystem(), IsExhaustive() { }
/// \brief Whether this is a system module.
unsigned IsSystem : 1;
+
+ /// \brief Whether this is an exhaustive set of configuration macros.
+ unsigned IsExhaustive : 1;
};
@@ -700,14 +762,16 @@ namespace clang {
/// (or the end of the file).
void skipUntil(MMToken::TokenKind K);
- typedef llvm::SmallVector<std::pair<std::string, SourceLocation>, 2>
- ModuleId;
+ typedef SmallVector<std::pair<std::string, SourceLocation>, 2> ModuleId;
bool parseModuleId(ModuleId &Id);
void parseModuleDecl();
void parseRequiresDecl();
void parseHeaderDecl(SourceLocation UmbrellaLoc, SourceLocation ExcludeLoc);
void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc);
void parseExportDecl();
+ void parseLinkDecl();
+ void parseConfigMacros();
+ void parseConflict();
void parseInferredModuleDecl(bool Framework, bool Explicit);
bool parseOptionalAttributes(Attributes &Attrs);
@@ -745,11 +809,14 @@ retry:
Tok.StringData = LToken.getRawIdentifierData();
Tok.StringLength = LToken.getLength();
Tok.Kind = llvm::StringSwitch<MMToken::TokenKind>(Tok.getString())
- .Case("header", MMToken::HeaderKeyword)
+ .Case("config_macros", MMToken::ConfigMacros)
+ .Case("conflict", MMToken::Conflict)
.Case("exclude", MMToken::ExcludeKeyword)
.Case("explicit", MMToken::ExplicitKeyword)
.Case("export", MMToken::ExportKeyword)
.Case("framework", MMToken::FrameworkKeyword)
+ .Case("header", MMToken::HeaderKeyword)
+ .Case("link", MMToken::LinkKeyword)
.Case("module", MMToken::ModuleKeyword)
.Case("requires", MMToken::RequiresKeyword)
.Case("umbrella", MMToken::UmbrellaKeyword)
@@ -905,7 +972,9 @@ namespace {
/// \brief An unknown attribute.
AT_unknown,
/// \brief The 'system' attribute.
- AT_system
+ AT_system,
+ /// \brief The 'exhaustive' attribute.
+ AT_exhaustive
};
}
@@ -920,6 +989,7 @@ namespace {
/// header-declaration
/// submodule-declaration
/// export-declaration
+/// link-declaration
///
/// submodule-declaration:
/// module-declaration
@@ -1061,7 +1131,15 @@ void ModuleMapParser::parseModuleDecl() {
case MMToken::RBrace:
Done = true;
break;
-
+
+ case MMToken::ConfigMacros:
+ parseConfigMacros();
+ break;
+
+ case MMToken::Conflict:
+ parseConflict();
+ break;
+
case MMToken::ExplicitKeyword:
case MMToken::FrameworkKeyword:
case MMToken::ModuleKeyword:
@@ -1099,7 +1177,11 @@ void ModuleMapParser::parseModuleDecl() {
case MMToken::HeaderKeyword:
parseHeaderDecl(SourceLocation(), SourceLocation());
break;
-
+
+ case MMToken::LinkKeyword:
+ parseLinkDecl();
+ break;
+
default:
Diags.Report(Tok.getLocation(), diag::err_mmap_expected_member);
consumeToken();
@@ -1115,6 +1197,13 @@ void ModuleMapParser::parseModuleDecl() {
HadError = true;
}
+ // If the active module is a top-level framework, and there are no link
+ // libraries, automatically link against the framework.
+ if (ActiveModule->IsFramework && !ActiveModule->isSubFramework() &&
+ ActiveModule->LinkLibraries.empty()) {
+ inferFrameworkLink(ActiveModule, Directory, SourceMgr.getFileManager());
+ }
+
// We're done parsing this module. Pop back to the previous module.
ActiveModule = PreviousActiveModule;
}
@@ -1159,9 +1248,9 @@ void ModuleMapParser::parseRequiresDecl() {
/// \brief Append to \p Paths the set of paths needed to get to the
/// subframework in which the given module lives.
static void appendSubframeworkPaths(Module *Mod,
- llvm::SmallVectorImpl<char> &Path) {
+ SmallVectorImpl<char> &Path) {
// Collect the framework names from the given module to the top-level module.
- llvm::SmallVector<StringRef, 2> Paths;
+ SmallVector<StringRef, 2> Paths;
for (; Mod; Mod = Mod->Parent) {
if (Mod->IsFramework)
Paths.push_back(Mod->Name);
@@ -1307,7 +1396,9 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc,
if (BuiltinFile)
Map.addHeader(ActiveModule, BuiltinFile, Exclude);
}
- } else {
+ } else if (!Exclude) {
+ // Ignore excluded header files. They're optional anyway.
+
Diags.Report(FileNameLoc, diag::err_mmap_header_not_found)
<< Umbrella << FileName;
HadError = true;
@@ -1414,7 +1505,139 @@ void ModuleMapParser::parseExportDecl() {
ActiveModule->UnresolvedExports.push_back(Unresolved);
}
-/// \brief Parse an inferried module declaration (wildcard modules).
+/// \brief Parse a link declaration.
+///
+/// module-declaration:
+/// 'link' 'framework'[opt] string-literal
+void ModuleMapParser::parseLinkDecl() {
+ assert(Tok.is(MMToken::LinkKeyword));
+ SourceLocation LinkLoc = consumeToken();
+
+ // Parse the optional 'framework' keyword.
+ bool IsFramework = false;
+ if (Tok.is(MMToken::FrameworkKeyword)) {
+ consumeToken();
+ IsFramework = true;
+ }
+
+ // Parse the library name
+ if (!Tok.is(MMToken::StringLiteral)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_library_name)
+ << IsFramework << SourceRange(LinkLoc);
+ HadError = true;
+ return;
+ }
+
+ std::string LibraryName = Tok.getString();
+ consumeToken();
+ ActiveModule->LinkLibraries.push_back(Module::LinkLibrary(LibraryName,
+ IsFramework));
+}
+
+/// \brief Parse a configuration macro declaration.
+///
+/// module-declaration:
+/// 'config_macros' attributes[opt] config-macro-list?
+///
+/// config-macro-list:
+/// identifier (',' identifier)?
+void ModuleMapParser::parseConfigMacros() {
+ assert(Tok.is(MMToken::ConfigMacros));
+ SourceLocation ConfigMacrosLoc = consumeToken();
+
+ // Only top-level modules can have configuration macros.
+ if (ActiveModule->Parent) {
+ Diags.Report(ConfigMacrosLoc, diag::err_mmap_config_macro_submodule);
+ }
+
+ // Parse the optional attributes.
+ Attributes Attrs;
+ parseOptionalAttributes(Attrs);
+ if (Attrs.IsExhaustive && !ActiveModule->Parent) {
+ ActiveModule->ConfigMacrosExhaustive = true;
+ }
+
+ // If we don't have an identifier, we're done.
+ if (!Tok.is(MMToken::Identifier))
+ return;
+
+ // Consume the first identifier.
+ if (!ActiveModule->Parent) {
+ ActiveModule->ConfigMacros.push_back(Tok.getString().str());
+ }
+ consumeToken();
+
+ do {
+ // If there's a comma, consume it.
+ if (!Tok.is(MMToken::Comma))
+ break;
+ consumeToken();
+
+ // We expect to see a macro name here.
+ if (!Tok.is(MMToken::Identifier)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_config_macro);
+ break;
+ }
+
+ // Consume the macro name.
+ if (!ActiveModule->Parent) {
+ ActiveModule->ConfigMacros.push_back(Tok.getString().str());
+ }
+ consumeToken();
+ } while (true);
+}
+
+/// \brief Format a module-id into a string.
+static std::string formatModuleId(const ModuleId &Id) {
+ std::string result;
+ {
+ llvm::raw_string_ostream OS(result);
+
+ for (unsigned I = 0, N = Id.size(); I != N; ++I) {
+ if (I)
+ OS << ".";
+ OS << Id[I].first;
+ }
+ }
+
+ return result;
+}
+
+/// \brief Parse a conflict declaration.
+///
+/// module-declaration:
+/// 'conflict' module-id ',' string-literal
+void ModuleMapParser::parseConflict() {
+ assert(Tok.is(MMToken::Conflict));
+ SourceLocation ConflictLoc = consumeToken();
+ Module::UnresolvedConflict Conflict;
+
+ // Parse the module-id.
+ if (parseModuleId(Conflict.Id))
+ return;
+
+ // Parse the ','.
+ if (!Tok.is(MMToken::Comma)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_comma)
+ << SourceRange(ConflictLoc);
+ return;
+ }
+ consumeToken();
+
+ // Parse the message.
+ if (!Tok.is(MMToken::StringLiteral)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_message)
+ << formatModuleId(Conflict.Id);
+ return;
+ }
+ Conflict.Message = Tok.getString().str();
+ consumeToken();
+
+ // Add this unresolved conflict.
+ ActiveModule->UnresolvedConflicts.push_back(Conflict);
+}
+
+/// \brief Parse an inferred module declaration (wildcard modules).
///
/// module-declaration:
/// 'explicit'[opt] 'framework'[opt] 'module' * attributes[opt]
@@ -1593,6 +1816,7 @@ bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) {
// Decode the attribute name.
AttributeKind Attribute
= llvm::StringSwitch<AttributeKind>(Tok.getString())
+ .Case("exhaustive", AT_exhaustive)
.Case("system", AT_system)
.Default(AT_unknown);
switch (Attribute) {
@@ -1604,6 +1828,10 @@ bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) {
case AT_system:
Attrs.IsSystem = true;
break;
+
+ case AT_exhaustive:
+ Attrs.IsExhaustive = true;
+ break;
}
consumeToken();
@@ -1653,13 +1881,16 @@ bool ModuleMapParser::parseModuleMapFile() {
case MMToken::FrameworkKeyword:
parseModuleDecl();
break;
-
+
case MMToken::Comma:
+ case MMToken::ConfigMacros:
+ case MMToken::Conflict:
case MMToken::ExcludeKeyword:
case MMToken::ExportKeyword:
case MMToken::HeaderKeyword:
case MMToken::Identifier:
case MMToken::LBrace:
+ case MMToken::LinkKeyword:
case MMToken::LSquare:
case MMToken::Period:
case MMToken::RBrace:
@@ -1677,11 +1908,16 @@ bool ModuleMapParser::parseModuleMapFile() {
}
bool ModuleMap::parseModuleMapFile(const FileEntry *File) {
+ llvm::DenseMap<const FileEntry *, bool>::iterator Known
+ = ParsedModuleMap.find(File);
+ if (Known != ParsedModuleMap.end())
+ return Known->second;
+
assert(Target != 0 && "Missing target information");
FileID ID = SourceMgr->createFileID(File, SourceLocation(), SrcMgr::C_User);
const llvm::MemoryBuffer *Buffer = SourceMgr->getBuffer(ID);
if (!Buffer)
- return true;
+ return ParsedModuleMap[File] = true;
// Parse this module map file.
Lexer L(ID, SourceMgr->getBuffer(ID), *SourceMgr, MMapLangOpts);
@@ -1690,6 +1926,6 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File) {
BuiltinIncludeDir);
bool Result = Parser.parseModuleMapFile();
Diags->getClient()->EndSourceFile();
-
+ ParsedModuleMap[File] = Result;
return Result;
}
diff --git a/lib/Lex/PPConditionalDirectiveRecord.cpp b/lib/Lex/PPConditionalDirectiveRecord.cpp
new file mode 100644
index 0000000..16ce3ef
--- /dev/null
+++ b/lib/Lex/PPConditionalDirectiveRecord.cpp
@@ -0,0 +1,120 @@
+//===--- PPConditionalDirectiveRecord.h - Preprocessing Directives-*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPConditionalDirectiveRecord class, which maintains
+// a record of conditional directive regions.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Lex/PPConditionalDirectiveRecord.h"
+#include "llvm/Support/Capacity.h"
+
+using namespace clang;
+
+PPConditionalDirectiveRecord::PPConditionalDirectiveRecord(SourceManager &SM)
+ : SourceMgr(SM) {
+ CondDirectiveStack.push_back(SourceLocation());
+}
+
+bool PPConditionalDirectiveRecord::rangeIntersectsConditionalDirective(
+ SourceRange Range) const {
+ if (Range.isInvalid())
+ return false;
+
+ CondDirectiveLocsTy::const_iterator
+ low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
+ Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr));
+ if (low == CondDirectiveLocs.end())
+ return false;
+
+ if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc()))
+ return false;
+
+ CondDirectiveLocsTy::const_iterator
+ upp = std::upper_bound(low, CondDirectiveLocs.end(),
+ Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr));
+ SourceLocation uppRegion;
+ if (upp != CondDirectiveLocs.end())
+ uppRegion = upp->getRegionLoc();
+
+ return low->getRegionLoc() != uppRegion;
+}
+
+SourceLocation PPConditionalDirectiveRecord::findConditionalDirectiveRegionLoc(
+ SourceLocation Loc) const {
+ if (Loc.isInvalid())
+ return SourceLocation();
+ if (CondDirectiveLocs.empty())
+ return SourceLocation();
+
+ if (SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
+ Loc))
+ return CondDirectiveStack.back();
+
+ CondDirectiveLocsTy::const_iterator
+ low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
+ Loc, CondDirectiveLoc::Comp(SourceMgr));
+ assert(low != CondDirectiveLocs.end());
+ return low->getRegionLoc();
+}
+
+void PPConditionalDirectiveRecord::addCondDirectiveLoc(
+ CondDirectiveLoc DirLoc) {
+ // Ignore directives in system headers.
+ if (SourceMgr.isInSystemHeader(DirLoc.getLoc()))
+ return;
+
+ assert(CondDirectiveLocs.empty() ||
+ SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
+ DirLoc.getLoc()));
+ CondDirectiveLocs.push_back(DirLoc);
+}
+
+void PPConditionalDirectiveRecord::If(SourceLocation Loc,
+ SourceRange ConditionRange) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Ifdef(SourceLocation Loc,
+ const Token &MacroNameTok,
+ const MacroDirective *MD) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Ifndef(SourceLocation Loc,
+ const Token &MacroNameTok,
+ const MacroDirective *MD) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Elif(SourceLocation Loc,
+ SourceRange ConditionRange,
+ SourceLocation IfLoc) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.back() = Loc;
+}
+
+void PPConditionalDirectiveRecord::Else(SourceLocation Loc,
+ SourceLocation IfLoc) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.back() = Loc;
+}
+
+void PPConditionalDirectiveRecord::Endif(SourceLocation Loc,
+ SourceLocation IfLoc) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ assert(!CondDirectiveStack.empty());
+ CondDirectiveStack.pop_back();
+}
+
+size_t PPConditionalDirectiveRecord::getTotalMemory() const {
+ return llvm::capacity_in_bytes(CondDirectiveLocs);
+}
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index b7c1846..07c1867 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -13,17 +13,18 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/Pragma.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/APInt.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
//===----------------------------------------------------------------------===//
@@ -56,12 +57,42 @@ MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
return MI;
}
-MacroInfo *Preprocessor::CloneMacroInfo(const MacroInfo &MacroToClone) {
- MacroInfo *MI = AllocateMacroInfo();
- new (MI) MacroInfo(MacroToClone, BP);
+MacroInfo *Preprocessor::AllocateDeserializedMacroInfo(SourceLocation L,
+ unsigned SubModuleID) {
+ LLVM_STATIC_ASSERT(llvm::AlignOf<MacroInfo>::Alignment >= sizeof(SubModuleID),
+ "alignment for MacroInfo is less than the ID");
+ MacroInfo *MI =
+ (MacroInfo*)BP.Allocate(sizeof(MacroInfo) + sizeof(SubModuleID),
+ llvm::AlignOf<MacroInfo>::Alignment);
+ new (MI) MacroInfo(L);
+ MI->FromASTFile = true;
+ MI->setOwningModuleID(SubModuleID);
return MI;
}
+DefMacroDirective *
+Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, SourceLocation Loc,
+ bool isImported) {
+ DefMacroDirective *MD = BP.Allocate<DefMacroDirective>();
+ new (MD) DefMacroDirective(MI, Loc, isImported);
+ return MD;
+}
+
+UndefMacroDirective *
+Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
+ UndefMacroDirective *MD = BP.Allocate<UndefMacroDirective>();
+ new (MD) UndefMacroDirective(UndefLoc);
+ return MD;
+}
+
+VisibilityMacroDirective *
+Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
+ bool isPublic) {
+ VisibilityMacroDirective *MD = BP.Allocate<VisibilityMacroDirective>();
+ new (MD) VisibilityMacroDirective(Loc, isPublic);
+ return MD;
+}
+
/// \brief Release the specified MacroInfo to be reused for allocating
/// new MacroInfo objects.
void Preprocessor::ReleaseMacroInfo(MacroInfo *MI) {
@@ -140,15 +171,14 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
// Fall through on error.
} else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) {
- // Error if defining "defined": C99 6.10.8.4.
+ // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
Diag(MacroNameTok, diag::err_defined_macro_name);
- } else if (isDefineUndef && II->hasMacroDefinition() &&
+ } else if (isDefineUndef == 2 && II->hasMacroDefinition() &&
getMacroInfo(II)->isBuiltinMacro()) {
- // Error if defining "__LINE__" and other builtins: C99 6.10.8.4.
- if (isDefineUndef == 1)
- Diag(MacroNameTok, diag::pp_redef_builtin_macro);
- else
- Diag(MacroNameTok, diag::pp_undef_builtin_macro);
+ // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4
+ // and C++ [cpp.predefined]p4], but allow it as an extension.
+ Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
+ return;
} else {
// Okay, we got a good identifier node. Return it.
return;
@@ -255,7 +285,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
// directive mode. Tell the lexer this so any newlines we see will be
// converted into an EOD token (this terminates the macro).
CurPPLexer->ParsingPreprocessorDirective = true;
- if (CurLexer) CurLexer->SetCommentRetentionState(false);
+ if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
// Read the next token, the directive flavor.
@@ -266,7 +296,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
if (Tok.isNot(tok::raw_identifier)) {
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
continue;
}
@@ -282,7 +312,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
FirstChar != 'i' && FirstChar != 'e') {
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
continue;
}
@@ -299,7 +329,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
if (IdLen >= 20) {
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
continue;
}
memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
@@ -405,7 +435,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
}
// Finally, if we are out of the conditional (saw an #endif or ran off the end
@@ -536,11 +566,11 @@ const FileEntry *Preprocessor::LookupFile(
// Otherwise, see if this is a subframework header. If so, this is relative
// to one of the headers on the #include stack. Walk the list of the current
// headers on the #include stack and pass them to HeaderInfo.
- // FIXME: SuggestedModule!
if (IsFileLexer()) {
if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID())))
if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt,
- SearchPath, RelativePath)))
+ SearchPath, RelativePath,
+ SuggestedModule)))
return FE;
}
@@ -550,7 +580,8 @@ const FileEntry *Preprocessor::LookupFile(
if ((CurFileEnt =
SourceMgr.getFileEntryForID(ISEntry.ThePPLexer->getFileID())))
if ((FE = HeaderInfo.LookupSubframeworkHeader(
- Filename, CurFileEnt, SearchPath, RelativePath)))
+ Filename, CurFileEnt, SearchPath, RelativePath,
+ SuggestedModule)))
return FE;
}
}
@@ -590,6 +621,7 @@ void Preprocessor::HandleDirective(Token &Result) {
// mode. Tell the lexer this so any newlines we see will be converted into an
// EOD token (which terminates the directive).
CurPPLexer->ParsingPreprocessorDirective = true;
+ if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
++NumDirectives;
@@ -634,14 +666,9 @@ void Preprocessor::HandleDirective(Token &Result) {
// and reset to previous state when returning from this function.
ResetMacroExpansionHelper helper(this);
-TryAgain:
switch (Result.getKind()) {
case tok::eod:
return; // null directive.
- case tok::comment:
- // Handle stuff like "# /*foo*/ define X" in -E -C mode.
- LexUnexpandedToken(Result);
- goto TryAgain;
case tok::code_completion:
if (CodeComplete)
CodeComplete->CodeCompleteDirective(
@@ -788,7 +815,7 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val,
// here.
Val = 0;
for (unsigned i = 0; i != ActualLength; ++i) {
- if (!isdigit(DigitTokBegin[i])) {
+ if (!isDigit(DigitTokBegin[i])) {
PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
diag::err_pp_line_digit_sequence);
PP.DiscardUntilEndOfDirective();
@@ -834,11 +861,11 @@ void Preprocessor::HandleLineDirective(Token &Tok) {
// Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
// number greater than 2147483647". C90 requires that the line # be <= 32767.
unsigned LineLimit = 32768U;
- if (LangOpts.C99 || LangOpts.CPlusPlus0x)
+ if (LangOpts.C99 || LangOpts.CPlusPlus11)
LineLimit = 2147483648U;
if (LineNo >= LineLimit)
Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
- else if (LangOpts.CPlusPlus0x && LineNo >= 32768U)
+ else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
int FilenameID = -1;
@@ -1107,23 +1134,19 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
// Check to see if this is the last token on the #__public_macro line.
CheckEndOfDirective("__public_macro");
+ IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
// Okay, we finally have a valid identifier to undef.
- MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+ MacroDirective *MD = getMacroDirective(II);
// If the macro is not defined, this is an error.
- if (MI == 0) {
- Diag(MacroNameTok, diag::err_pp_visibility_non_macro)
- << MacroNameTok.getIdentifierInfo();
+ if (MD == 0) {
+ Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
return;
}
// Note that this macro has now been exported.
- MI->setVisibility(/*IsPublic=*/true, MacroNameTok.getLocation());
-
- // If this macro definition came from a PCH file, mark it
- // as having changed since serialization.
- if (MI->isFromAST())
- MI->setChangedAfterLoad();
+ appendMacroDirective(II, AllocateVisibilityMacroDirective(
+ MacroNameTok.getLocation(), /*IsPublic=*/true));
}
/// \brief Handle a #private directive.
@@ -1138,23 +1161,19 @@ void Preprocessor::HandleMacroPrivateDirective(Token &Tok) {
// Check to see if this is the last token on the #__private_macro line.
CheckEndOfDirective("__private_macro");
+ IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
// Okay, we finally have a valid identifier to undef.
- MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+ MacroDirective *MD = getMacroDirective(II);
// If the macro is not defined, this is an error.
- if (MI == 0) {
- Diag(MacroNameTok, diag::err_pp_visibility_non_macro)
- << MacroNameTok.getIdentifierInfo();
+ if (MD == 0) {
+ Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
return;
}
// Note that this macro has now been marked private.
- MI->setVisibility(/*IsPublic=*/false, MacroNameTok.getLocation());
-
- // If this macro definition came from a PCH file, mark it
- // as having changed since serialization.
- if (MI->isFromAST())
- MI->setChangedAfterLoad();
+ appendMacroDirective(II, AllocateVisibilityMacroDirective(
+ MacroNameTok.getLocation(), /*IsPublic=*/false));
}
//===----------------------------------------------------------------------===//
@@ -1375,7 +1394,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (Callbacks->FileNotFound(Filename, RecoveryPath)) {
if (const DirectoryEntry *DE = FileMgr.getDirectory(RecoveryPath)) {
// Add the recovery path to the list of search paths.
- DirectoryLookup DL(DE, SrcMgr::C_User, true, false);
+ DirectoryLookup DL(DE, SrcMgr::C_User, false);
HeaderInfo.AddSearchPath(DL, isAngled);
// Try the lookup again, skipping the cache.
@@ -1426,7 +1445,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// Compute the module access path corresponding to this module.
// FIXME: Should we have a second loadModule() overload to avoid this
// extra lookup step?
- llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
+ SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
for (Module *Mod = SuggestedModule; Mod; Mod = Mod->Parent)
Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
FilenameTok.getLocation()));
@@ -1476,14 +1495,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
Diag(HashLoc, diag::warn_auto_module_import)
<< IncludeKind << PathString
<< FixItHint::CreateReplacement(ReplaceRange,
- "@__experimental_modules_import " + PathString.str().str() + ";");
+ "@import " + PathString.str().str() + ";");
}
// Load the module.
// If this was an #__include_macros directive, only make macros visible.
Module::NameVisibilityKind Visibility
= (IncludeKind == 3)? Module::MacrosVisible : Module::AllVisible;
- Module *Imported
+ ModuleLoadResult Imported
= TheModuleLoader.loadModule(IncludeTok.getLocation(), Path, Visibility,
/*IsIncludeDirective=*/true);
assert((Imported == 0 || Imported == SuggestedModule) &&
@@ -1498,6 +1517,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
}
return;
}
+
+ // If we failed to find a submodule that we expected to find, we can
+ // continue. Otherwise, there's an error in the included file, so we
+ // don't want to include it.
+ if (!BuildingImportedModule && !Imported.isMissingExpected()) {
+ return;
+ }
}
if (Callbacks && SuggestedModule) {
@@ -1637,10 +1663,16 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
return true;
case tok::ellipsis: // #define X(... -> C99 varargs
if (!LangOpts.C99)
- Diag(Tok, LangOpts.CPlusPlus0x ?
+ Diag(Tok, LangOpts.CPlusPlus11 ?
diag::warn_cxx98_compat_variadic_macro :
diag::ext_variadic_macro);
+ // OpenCL v1.2 s6.9.e: variadic macros are not supported.
+ if (LangOpts.OpenCL) {
+ Diag(Tok, diag::err_pp_opencl_variadic_macros);
+ return true;
+ }
+
// Lex the token after the identifier.
LexUnexpandedToken(Tok);
if (Tok.isNot(tok::r_paren)) {
@@ -1763,7 +1795,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
// Read the first token after the arg list for down below.
LexUnexpandedToken(Tok);
- } else if (LangOpts.C99 || LangOpts.CPlusPlus0x) {
+ } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
// C99 requires whitespace between the macro definition and the body. Emit
// a diagnostic for something like "#define X+".
Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
@@ -1809,8 +1841,37 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
while (Tok.isNot(tok::eod)) {
LastTok = Tok;
- if (Tok.isNot(tok::hash)) {
+ if (Tok.isNot(tok::hash) && Tok.isNot(tok::hashhash)) {
+ MI->AddTokenToBody(Tok);
+
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ continue;
+ }
+
+ if (Tok.is(tok::hashhash)) {
+
+ // If we see token pasting, check if it looks like the gcc comma
+ // pasting extension. We'll use this information to suppress
+ // diagnostics later on.
+
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+
+ if (Tok.is(tok::eod)) {
+ MI->AddTokenToBody(LastTok);
+ break;
+ }
+
+ unsigned NumTokens = MI->getNumTokens();
+ if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
+ MI->getReplacementToken(NumTokens-1).is(tok::comma))
+ MI->setHasCommaPasting();
+
+ // Things look ok, add the '##' and param name tokens to the macro.
+ MI->AddTokenToBody(LastTok);
MI->AddTokenToBody(Tok);
+ LastTok = Tok;
// Get the next token of the macro.
LexUnexpandedToken(Tok);
@@ -1874,7 +1935,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
// Finally, if this identifier already had a macro defined for it, verify that
// the macro bodies are identical, and issue diagnostics if they are not.
- if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+ if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
// It is very common for system headers to have tons of macro redefinitions
// and for warnings to be disabled in system headers. If this is the case,
// then don't bother calling MacroInfo::isIdenticalTo.
@@ -1883,10 +1944,14 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
+ // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
+ // C++ [cpp.predefined]p4, but allow it as an extension.
+ if (OtherMI->isBuiltinMacro())
+ Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
// Macros must be identical. This means all tokens and whitespace
- // separation must be the same. C99 6.10.3.2.
- if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
- !MI->isIdenticalTo(*OtherMI, *this)) {
+ // separation must be the same. C99 6.10.3p2.
+ else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
+ !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
<< MacroNameTok.getIdentifierInfo();
Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
@@ -1896,7 +1961,8 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
}
- setMacroInfo(MacroNameTok.getIdentifierInfo(), MI);
+ DefMacroDirective *MD =
+ appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
assert(!MI->isUsed());
// If we need warning for not using the macro, add its location in the
@@ -1910,7 +1976,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
// If the callbacks want to know, tell them about the macro definition.
if (Callbacks)
- Callbacks->MacroDefined(MacroNameTok, MI);
+ Callbacks->MacroDefined(MacroNameTok, MD);
}
/// HandleUndefDirective - Implements \#undef.
@@ -1929,7 +1995,13 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) {
CheckEndOfDirective("undef");
// Okay, we finally have a valid identifier to undef.
- MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+ MacroDirective *MD = getMacroDirective(MacroNameTok.getIdentifierInfo());
+ const MacroInfo *MI = MD ? MD->getMacroInfo() : 0;
+
+ // If the callbacks want to know, tell them about the macro #undef.
+ // Note: no matter if the macro was defined or not.
+ if (Callbacks)
+ Callbacks->MacroUndefined(MacroNameTok, MD);
// If the macro is not defined, this is a noop undef, just return.
if (MI == 0) return;
@@ -1937,27 +2009,11 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) {
if (!MI->isUsed() && MI->isWarnIfUnused())
Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
- // If the callbacks want to know, tell them about the macro #undef.
- if (Callbacks)
- Callbacks->MacroUndefined(MacroNameTok, MI);
-
if (MI->isWarnIfUnused())
WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
- UndefineMacro(MacroNameTok.getIdentifierInfo(), MI,
- MacroNameTok.getLocation());
-}
-
-void Preprocessor::UndefineMacro(IdentifierInfo *II, MacroInfo *MI,
- SourceLocation UndefLoc) {
- MI->setUndefLoc(UndefLoc);
- if (MI->isFromAST()) {
- MI->setChangedAfterLoad();
- if (Listener)
- Listener->UndefinedMacro(MI);
- }
-
- clearMacroInfo(II);
+ appendMacroDirective(MacroNameTok.getIdentifierInfo(),
+ AllocateUndefMacroDirective(MacroNameTok.getLocation()));
}
@@ -1991,7 +2047,8 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
- MacroInfo *MI = getMacroInfo(MII);
+ MacroDirective *MD = getMacroDirective(MII);
+ MacroInfo *MI = MD ? MD->getMacroInfo() : 0;
if (CurPPLexer->getConditionalStackDepth() == 0) {
// If the start of a top-level #ifdef and if the macro is not defined,
@@ -2011,9 +2068,9 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
if (Callbacks) {
if (isIfndef)
- Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok);
+ Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
else
- Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok);
+ Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
}
// Should we include the stuff contained by this directive?
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index d5a88db..d9ce8bf 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -17,13 +17,14 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroInfo.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
namespace {
@@ -111,15 +112,21 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
Result.Val = II->hasMacroDefinition();
Result.Val.setIsUnsigned(false); // Result is signed intmax_t.
+ MacroDirective *Macro = 0;
// If there is a macro, mark it used.
if (Result.Val != 0 && ValueLive) {
- MacroInfo *Macro = PP.getMacroInfo(II);
- PP.markMacroAsUsed(Macro);
+ Macro = PP.getMacroDirective(II);
+ PP.markMacroAsUsed(Macro->getMacroInfo());
}
// Invoke the 'defined' callback.
- if (PPCallbacks *Callbacks = PP.getPPCallbacks())
- Callbacks->Defined(PeekTok);
+ if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
+ MacroDirective *MD = Macro;
+ // Pass the MacroInfo for the macro name even if the value is dead.
+ if (!MD && Result.Val != 0)
+ MD = PP.getMacroDirective(II);
+ Callbacks->Defined(PeekTok, MD);
+ }
// If we are in parens, ensure we have a trailing ).
if (LParenLoc.isValid()) {
@@ -224,7 +231,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
if (!PP.getLangOpts().C99 && Literal.isLongLong) {
if (PP.getLangOpts().CPlusPlus)
PP.Diag(PeekTok,
- PP.getLangOpts().CPlusPlus0x ?
+ PP.getLangOpts().CPlusPlus11 ?
diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
else
PP.Diag(PeekTok, diag::ext_c99_longlong);
@@ -258,9 +265,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
return false;
}
case tok::char_constant: // 'x'
- case tok::wide_char_constant: { // L'x'
+ case tok::wide_char_constant: // L'x'
case tok::utf16_char_constant: // u'x'
- case tok::utf32_char_constant: // U'x'
+ case tok::utf32_char_constant: { // U'x'
// Complain about, and drop, any ud-suffix.
if (PeekTok.hasUDSuffix())
PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0;
@@ -724,6 +731,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
/// to "!defined(X)" return X in IfNDefMacro.
bool Preprocessor::
EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+ SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true);
// Save the current state of 'DisableMacroExpansion' and reset it to false. If
// 'DisableMacroExpansion' is true, then we must be in a macro argument list
// in which case a directive is undefined behavior. We want macros to be able
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index d827f58..be4defe 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -13,15 +13,15 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/LexDiagnostic.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PathV2.h"
-#include "llvm/ADT/StringSwitch.h"
using namespace clang;
PPCallbacks::~PPCallbacks() {}
@@ -158,15 +158,17 @@ void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,
/// tokens from it instead of the current buffer.
void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
MacroInfo *Macro, MacroArgs *Args) {
- PushIncludeMacroStack();
- CurDirLookup = 0;
-
+ TokenLexer *TokLexer;
if (NumCachedTokenLexers == 0) {
- CurTokenLexer.reset(new TokenLexer(Tok, ILEnd, Macro, Args, *this));
+ TokLexer = new TokenLexer(Tok, ILEnd, Macro, Args, *this);
} else {
- CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
- CurTokenLexer->Init(Tok, ILEnd, Macro, Args);
+ TokLexer = TokenLexerCache[--NumCachedTokenLexers];
+ TokLexer->Init(Tok, ILEnd, Macro, Args);
}
+
+ PushIncludeMacroStack();
+ CurDirLookup = 0;
+ CurTokenLexer.reset(TokLexer);
if (CurLexerKind != CLK_LexAfterModuleImport)
CurLexerKind = CLK_TokenLexer;
}
@@ -186,18 +188,20 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
bool DisableMacroExpansion,
bool OwnsTokens) {
- // Save our current state.
- PushIncludeMacroStack();
- CurDirLookup = 0;
-
// Create a macro expander to expand from the specified token stream.
+ TokenLexer *TokLexer;
if (NumCachedTokenLexers == 0) {
- CurTokenLexer.reset(new TokenLexer(Toks, NumToks, DisableMacroExpansion,
- OwnsTokens, *this));
+ TokLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion,
+ OwnsTokens, *this);
} else {
- CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
- CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
+ TokLexer = TokenLexerCache[--NumCachedTokenLexers];
+ TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
}
+
+ // Save our current state.
+ PushIncludeMacroStack();
+ CurDirLookup = 0;
+ CurTokenLexer.reset(TokLexer);
if (CurLexerKind != CLK_LexAfterModuleImport)
CurLexerKind = CLK_TokenLexer;
}
@@ -328,6 +332,17 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
CurLexer->BufferPtr = EndPos;
CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
+ if (isCodeCompletionEnabled()) {
+ // Inserting the code-completion point increases the source buffer by 1,
+ // but the main FileID was created before inserting the point.
+ // Compensate by reducing the EOF location by 1, otherwise the location
+ // will point to the next FileID.
+ // FIXME: This is hacky, the code-completion point should probably be
+ // inserted before the main FileID is created.
+ if (CurLexer->getFileLoc() == CodeCompletionFileLoc)
+ Result.setLocation(Result.getLocation().getLocWithOffset(-1));
+ }
+
if (!isIncrementalProcessingEnabled())
// We're done with lexing.
CurLexer.reset();
@@ -380,7 +395,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
SmallString<128> RelativePath;
computeRelativePath(FileMgr, Dir, Header, RelativePath);
Diag(StartLoc, diag::warn_uncovered_module_header)
- << RelativePath;
+ << Mod->getFullModuleName() << RelativePath;
}
}
}
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index eee4342..21451f5 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -14,25 +14,26 @@
#include "clang/Lex/Preprocessor.h"
#include "MacroArgs.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Basic/SourceManager.h"
#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/ExternalPreprocessorSource.h"
-#include "clang/Lex/LiteralSupport.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstdio>
#include <ctime>
using namespace clang;
-MacroInfo *Preprocessor::getMacroInfoHistory(IdentifierInfo *II) const {
+MacroDirective *
+Preprocessor::getMacroDirectiveHistory(const IdentifierInfo *II) const {
assert(II->hadMacroDefinition() && "Identifier has not been not a macro!");
macro_iterator Pos = Macros.find(II);
@@ -40,125 +41,31 @@ MacroInfo *Preprocessor::getMacroInfoHistory(IdentifierInfo *II) const {
return Pos->second;
}
-/// setMacroInfo - Specify a macro for this identifier.
-///
-void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
- assert(MI && "MacroInfo should be non-zero!");
- assert(MI->getUndefLoc().isInvalid() &&
- "Undefined macros cannot be registered");
-
- MacroInfo *&StoredMI = Macros[II];
- MI->setPreviousDefinition(StoredMI);
- StoredMI = MI;
- II->setHasMacroDefinition(MI->getUndefLoc().isInvalid());
- if (II->isFromAST())
- II->setChangedSinceDeserialization();
-}
-
-void Preprocessor::addLoadedMacroInfo(IdentifierInfo *II, MacroInfo *MI,
- MacroInfo *Hint) {
- assert(MI && "Missing macro?");
- assert(MI->isFromAST() && "Macro is not from an AST?");
- assert(!MI->getPreviousDefinition() && "Macro already in chain?");
-
- MacroInfo *&StoredMI = Macros[II];
-
- // Easy case: this is the first macro definition for this macro.
- if (!StoredMI) {
- StoredMI = MI;
-
- if (MI->isDefined())
- II->setHasMacroDefinition(true);
- return;
- }
-
- // If this macro is a definition and this identifier has been neither
- // defined nor undef'd in the current translation unit, add this macro
- // to the end of the chain of definitions.
- if (MI->isDefined() && StoredMI->isFromAST()) {
- // Simple case: if this is the first actual definition, just put it at
- // th beginning.
- if (!StoredMI->isDefined()) {
- MI->setPreviousDefinition(StoredMI);
- StoredMI = MI;
-
- II->setHasMacroDefinition(true);
- return;
- }
-
- // Find the end of the definition chain.
- MacroInfo *Prev;
- MacroInfo *PrevPrev = StoredMI;
- bool Ambiguous = StoredMI->isAmbiguous();
- bool MatchedOther = false;
- do {
- Prev = PrevPrev;
-
- // If the macros are not identical, we have an ambiguity.
- if (!Prev->isIdenticalTo(*MI, *this)) {
- if (!Ambiguous) {
- Ambiguous = true;
- StoredMI->setAmbiguous(true);
- }
- } else {
- MatchedOther = true;
- }
- } while ((PrevPrev = Prev->getPreviousDefinition()) &&
- PrevPrev->isDefined());
-
- // If there are ambiguous definitions, and we didn't match any other
- // definition, then mark us as ambiguous.
- if (Ambiguous && !MatchedOther)
- MI->setAmbiguous(true);
-
- // Wire this macro information into the chain.
- MI->setPreviousDefinition(Prev->getPreviousDefinition());
- Prev->setPreviousDefinition(MI);
- return;
- }
-
- // The macro is not a definition; put it at the end of the list.
- MacroInfo *Prev = Hint? Hint : StoredMI;
- while (Prev->getPreviousDefinition())
- Prev = Prev->getPreviousDefinition();
- Prev->setPreviousDefinition(MI);
-}
-
-void Preprocessor::makeLoadedMacroInfoVisible(IdentifierInfo *II,
- MacroInfo *MI) {
- assert(MI->isFromAST() && "Macro must be from the AST");
-
- MacroInfo *&StoredMI = Macros[II];
- if (StoredMI == MI) {
- // Easy case: this is the first macro anyway.
- II->setHasMacroDefinition(MI->isDefined());
- return;
- }
-
- // Go find the macro and pull it out of the list.
- // FIXME: Yes, this is O(N), and making a pile of macros visible or hidden
- // would be quadratic, but it's extremely rare.
- MacroInfo *Prev = StoredMI;
- while (Prev->getPreviousDefinition() != MI)
- Prev = Prev->getPreviousDefinition();
- Prev->setPreviousDefinition(MI->getPreviousDefinition());
- MI->setPreviousDefinition(0);
-
- // Add the macro back to the list.
- addLoadedMacroInfo(II, MI);
-
- II->setHasMacroDefinition(StoredMI->isDefined());
- if (II->isFromAST())
+void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){
+ assert(MD && "MacroDirective should be non-zero!");
+ assert(!MD->getPrevious() && "Already attached to a MacroDirective history.");
+
+ MacroDirective *&StoredMD = Macros[II];
+ MD->setPrevious(StoredMD);
+ StoredMD = MD;
+ II->setHasMacroDefinition(MD->isDefined());
+ bool isImportedMacro = isa<DefMacroDirective>(MD) &&
+ cast<DefMacroDirective>(MD)->isImported();
+ if (II->isFromAST() && !isImportedMacro)
II->setChangedSinceDeserialization();
}
-/// \brief Undefine a macro for this identifier.
-void Preprocessor::clearMacroInfo(IdentifierInfo *II) {
- assert(II->hasMacroDefinition() && "Macro is not defined!");
- assert(Macros[II]->getUndefLoc().isValid() && "Macro is still defined!");
- II->setHasMacroDefinition(false);
- if (II->isFromAST())
- II->setChangedSinceDeserialization();
+void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II,
+ MacroDirective *MD) {
+ assert(II && MD);
+ MacroDirective *&StoredMD = Macros[II];
+ assert(!StoredMD &&
+ "the macro history was modified before initializing it from a pch");
+ StoredMD = MD;
+ // Setup the identifier as having associated macro history.
+ II->setHasMacroDefinition(true);
+ if (!MD->isDefined())
+ II->setHasMacroDefinition(false);
}
/// RegisterBuiltinMacro - Register the specified identifier in the identifier
@@ -170,7 +77,7 @@ static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){
// Mark it as being a macro that is builtin.
MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation());
MI->setIsBuiltinMacro();
- PP.setMacroInfo(Id, MI);
+ PP.appendDefMacroDirective(Id, MI);
return Id;
}
@@ -303,7 +210,11 @@ bool Preprocessor::isNextPPTokenLParen() {
/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
/// expanded as a macro, handle it and return the next token as 'Identifier'.
bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
- MacroInfo *MI) {
+ MacroDirective *MD) {
+ MacroDirective::DefInfo Def = MD->getDefinition();
+ assert(Def.isValid());
+ MacroInfo *MI = Def.getMacroInfo();
+
// If this is a macro expansion in the "#if !defined(x)" line for the file,
// then the macro could expand to different things in other contexts, we need
// to disable the optimization in this case.
@@ -311,7 +222,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
// If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
if (MI->isBuiltinMacro()) {
- if (Callbacks) Callbacks->MacroExpands(Identifier, MI,
+ if (Callbacks) Callbacks->MacroExpands(Identifier, MD,
Identifier.getLocation());
ExpandBuiltinMacro(Identifier);
return false;
@@ -364,13 +275,13 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
// MacroExpands callbacks still happen in source order, queue this
// callback to have it happen after the function macro callback.
DelayedMacroExpandsCallbacks.push_back(
- MacroExpandsInfo(Identifier, MI, ExpansionRange));
+ MacroExpandsInfo(Identifier, MD, ExpansionRange));
} else {
- Callbacks->MacroExpands(Identifier, MI, ExpansionRange);
+ Callbacks->MacroExpands(Identifier, MD, ExpansionRange);
if (!DelayedMacroExpandsCallbacks.empty()) {
for (unsigned i=0, e = DelayedMacroExpandsCallbacks.size(); i!=e; ++i) {
MacroExpandsInfo &Info = DelayedMacroExpandsCallbacks[i];
- Callbacks->MacroExpands(Info.Tok, Info.MI, Info.Range);
+ Callbacks->MacroExpands(Info.Tok, Info.MD, Info.Range);
}
DelayedMacroExpandsCallbacks.clear();
}
@@ -378,16 +289,17 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
}
// If the macro definition is ambiguous, complain.
- if (MI->isAmbiguous()) {
+ if (Def.getDirective()->isAmbiguous()) {
Diag(Identifier, diag::warn_pp_ambiguous_macro)
<< Identifier.getIdentifierInfo();
Diag(MI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_chosen)
<< Identifier.getIdentifierInfo();
- for (MacroInfo *PrevMI = MI->getPreviousDefinition();
- PrevMI && PrevMI->isDefined();
- PrevMI = PrevMI->getPreviousDefinition()) {
- if (PrevMI->isAmbiguous()) {
- Diag(PrevMI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_other)
+ for (MacroDirective::DefInfo PrevDef = Def.getPreviousDefinition();
+ PrevDef && !PrevDef.isUndefined();
+ PrevDef = PrevDef.getPreviousDefinition()) {
+ if (PrevDef.getDirective()->isAmbiguous()) {
+ Diag(PrevDef.getMacroInfo()->getDefinitionLoc(),
+ diag::note_pp_ambiguous_macro_other)
<< Identifier.getIdentifierInfo();
}
}
@@ -455,7 +367,10 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
if (MacroInfo *NewMI = getMacroInfo(NewII))
if (!NewMI->isEnabled() || NewMI == MI) {
Identifier.setFlag(Token::DisableExpand);
- Diag(Identifier, diag::pp_disabled_macro_expansion);
+ // Don't warn for "#define X X" like "#define bool bool" from
+ // stdbool.h.
+ if (NewMI != MI || MI->isFunctionLike())
+ Diag(Identifier, diag::pp_disabled_macro_expansion);
}
}
@@ -497,9 +412,13 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
// argument is separated by an EOF token. Use a SmallVector so we can avoid
// heap allocations in the common case.
SmallVector<Token, 64> ArgTokens;
+ bool ContainsCodeCompletionTok = false;
unsigned NumActuals = 0;
while (Tok.isNot(tok::r_paren)) {
+ if (ContainsCodeCompletionTok && (Tok.is(tok::eof) || Tok.is(tok::eod)))
+ break;
+
assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) &&
"only expect argument separators here");
@@ -516,10 +435,20 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
LexUnexpandedToken(Tok);
if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n"
- Diag(MacroName, diag::err_unterm_macro_invoc);
- // Do not lose the EOF/EOD. Return it to the client.
- MacroName = Tok;
- return 0;
+ if (!ContainsCodeCompletionTok) {
+ Diag(MacroName, diag::err_unterm_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+ // Do not lose the EOF/EOD. Return it to the client.
+ MacroName = Tok;
+ return 0;
+ } else {
+ // Do not lose the EOF/EOD.
+ Token *Toks = new Token[1];
+ Toks[0] = Tok;
+ EnterTokenStream(Toks, 1, true, true);
+ break;
+ }
} else if (Tok.is(tok::r_paren)) {
// If we found the ) token, the macro arg list is done.
if (NumParens-- == 0) {
@@ -550,6 +479,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
if (!MI->isEnabled())
Tok.setFlag(Token::DisableExpand);
} else if (Tok.is(tok::code_completion)) {
+ ContainsCodeCompletionTok = true;
if (CodeComplete)
CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),
MI, NumActuals);
@@ -572,16 +502,20 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
if (ArgTokens.size() != ArgTokenStart)
ArgStartLoc = ArgTokens[ArgTokenStart].getLocation();
- // Emit the diagnostic at the macro name in case there is a missing ).
- // Emitting it at the , could be far away from the macro name.
- Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
- return 0;
+ if (!ContainsCodeCompletionTok) {
+ // Emit the diagnostic at the macro name in case there is a missing ).
+ // Emitting it at the , could be far away from the macro name.
+ Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+ return 0;
+ }
}
// Empty arguments are standard in C99 and C++0x, and are supported as an extension in
// other modes.
if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99)
- Diag(Tok, LangOpts.CPlusPlus0x ?
+ Diag(Tok, LangOpts.CPlusPlus11 ?
diag::warn_cxx98_compat_empty_fnmacro_arg :
diag::ext_empty_fnmacro_arg);
@@ -593,8 +527,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
EOFTok.setLength(0);
ArgTokens.push_back(EOFTok);
++NumActuals;
- assert(NumFixedArgsLeft != 0 && "Too many arguments parsed");
- --NumFixedArgsLeft;
+ if (!ContainsCodeCompletionTok || NumFixedArgsLeft != 0) {
+ assert(NumFixedArgsLeft != 0 && "Too many arguments parsed");
+ --NumFixedArgsLeft;
+ }
}
// Okay, we either found the r_paren. Check to see if we parsed too few
@@ -604,6 +540,17 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
// See MacroArgs instance var for description of this.
bool isVarargsElided = false;
+ if (ContainsCodeCompletionTok) {
+ // Recover from not-fully-formed macro invocation during code-completion.
+ Token EOFTok;
+ EOFTok.startToken();
+ EOFTok.setKind(tok::eof);
+ EOFTok.setLocation(Tok.getLocation());
+ EOFTok.setLength(0);
+ for (; NumActuals < MinArgsExpected; ++NumActuals)
+ ArgTokens.push_back(EOFTok);
+ }
+
if (NumActuals < MinArgsExpected) {
// There are several cases where too few arguments is ok, handle them now.
if (NumActuals == 0 && MinArgsExpected == 1) {
@@ -619,9 +566,14 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
// Varargs where the named vararg parameter is missing: OK as extension.
// #define A(x, ...)
// A("blah")
- Diag(Tok, diag::ext_missing_varargs_arg);
- Diag(MI->getDefinitionLoc(), diag::note_macro_here)
- << MacroName.getIdentifierInfo();
+ //
+ // If the macro contains the comma pasting extension, the diagnostic
+ // is suppressed; we know we'll get another diagnostic later.
+ if (!MI->hasCommaPasting()) {
+ Diag(Tok, diag::ext_missing_varargs_arg);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+ }
// Remember this occurred, allowing us to elide the comma when used for
// cases like:
@@ -630,9 +582,11 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
// #define C(...) blah(a, ## __VA_ARGS__)
// A(x) B(x) C()
isVarargsElided = true;
- } else {
+ } else if (!ContainsCodeCompletionTok) {
// Otherwise, emit the error.
Diag(Tok, diag::err_too_few_args_in_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
return 0;
}
@@ -648,10 +602,13 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
if (NumActuals == 0 && MinArgsExpected == 2)
ArgTokens.push_back(Tok);
- } else if (NumActuals > MinArgsExpected && !MI->isVariadic()) {
+ } else if (NumActuals > MinArgsExpected && !MI->isVariadic() &&
+ !ContainsCodeCompletionTok) {
// Emit the diagnostic at the macro name in case there is a missing ).
// Emitting it at the , could be far away from the macro name.
Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
return 0;
}
@@ -745,7 +702,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
Feature = Feature.substr(2, Feature.size() - 4);
return llvm::StringSwitch<bool>(Feature)
- .Case("address_sanitizer", LangOpts.SanitizeAddress)
+ .Case("address_sanitizer", LangOpts.Sanitize.Address)
.Case("attribute_analyzer_noreturn", true)
.Case("attribute_availability", true)
.Case("attribute_availability_with_message", true)
@@ -767,6 +724,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
.Case("cxx_exceptions", LangOpts.Exceptions)
.Case("cxx_rtti", LangOpts.RTTI)
.Case("enumerator_attributes", true)
+ .Case("memory_sanitizer", LangOpts.Sanitize.Memory)
+ .Case("thread_sanitizer", LangOpts.Sanitize.Thread)
// Objective-C features
.Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
.Case("objc_arc", LangOpts.ObjCAutoRefCount)
@@ -776,6 +735,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
.Case("objc_instancetype", LangOpts.ObjC2)
.Case("objc_modules", LangOpts.ObjC2 && LangOpts.Modules)
.Case("objc_nonfragile_abi", LangOpts.ObjCRuntime.isNonFragile())
+ .Case("objc_property_explicit_atomic", true) // Does clang support explicit "atomic" keyword?
.Case("objc_weak_class", LangOpts.ObjCRuntime.hasWeakClassImport())
.Case("ownership_holds", true)
.Case("ownership_returns", true)
@@ -792,41 +752,41 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
.Case("c_generic_selections", LangOpts.C11)
.Case("c_static_assert", LangOpts.C11)
// C++11 features
- .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus0x)
- .Case("cxx_alias_templates", LangOpts.CPlusPlus0x)
- .Case("cxx_alignas", LangOpts.CPlusPlus0x)
- .Case("cxx_atomic", LangOpts.CPlusPlus0x)
- .Case("cxx_attributes", LangOpts.CPlusPlus0x)
- .Case("cxx_auto_type", LangOpts.CPlusPlus0x)
- .Case("cxx_constexpr", LangOpts.CPlusPlus0x)
- .Case("cxx_decltype", LangOpts.CPlusPlus0x)
- .Case("cxx_decltype_incomplete_return_types", LangOpts.CPlusPlus0x)
- .Case("cxx_default_function_template_args", LangOpts.CPlusPlus0x)
- .Case("cxx_defaulted_functions", LangOpts.CPlusPlus0x)
- .Case("cxx_delegating_constructors", LangOpts.CPlusPlus0x)
- .Case("cxx_deleted_functions", LangOpts.CPlusPlus0x)
- .Case("cxx_explicit_conversions", LangOpts.CPlusPlus0x)
- .Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x)
- .Case("cxx_implicit_moves", LangOpts.CPlusPlus0x)
+ .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus11)
+ .Case("cxx_alias_templates", LangOpts.CPlusPlus11)
+ .Case("cxx_alignas", LangOpts.CPlusPlus11)
+ .Case("cxx_atomic", LangOpts.CPlusPlus11)
+ .Case("cxx_attributes", LangOpts.CPlusPlus11)
+ .Case("cxx_auto_type", LangOpts.CPlusPlus11)
+ .Case("cxx_constexpr", LangOpts.CPlusPlus11)
+ .Case("cxx_decltype", LangOpts.CPlusPlus11)
+ .Case("cxx_decltype_incomplete_return_types", LangOpts.CPlusPlus11)
+ .Case("cxx_default_function_template_args", LangOpts.CPlusPlus11)
+ .Case("cxx_defaulted_functions", LangOpts.CPlusPlus11)
+ .Case("cxx_delegating_constructors", LangOpts.CPlusPlus11)
+ .Case("cxx_deleted_functions", LangOpts.CPlusPlus11)
+ .Case("cxx_explicit_conversions", LangOpts.CPlusPlus11)
+ .Case("cxx_generalized_initializers", LangOpts.CPlusPlus11)
+ .Case("cxx_implicit_moves", LangOpts.CPlusPlus11)
//.Case("cxx_inheriting_constructors", false)
- .Case("cxx_inline_namespaces", LangOpts.CPlusPlus0x)
- .Case("cxx_lambdas", LangOpts.CPlusPlus0x)
- .Case("cxx_local_type_template_args", LangOpts.CPlusPlus0x)
- .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus0x)
- .Case("cxx_noexcept", LangOpts.CPlusPlus0x)
- .Case("cxx_nullptr", LangOpts.CPlusPlus0x)
- .Case("cxx_override_control", LangOpts.CPlusPlus0x)
- .Case("cxx_range_for", LangOpts.CPlusPlus0x)
- .Case("cxx_raw_string_literals", LangOpts.CPlusPlus0x)
- .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus0x)
- .Case("cxx_rvalue_references", LangOpts.CPlusPlus0x)
- .Case("cxx_strong_enums", LangOpts.CPlusPlus0x)
- .Case("cxx_static_assert", LangOpts.CPlusPlus0x)
- .Case("cxx_trailing_return", LangOpts.CPlusPlus0x)
- .Case("cxx_unicode_literals", LangOpts.CPlusPlus0x)
- .Case("cxx_unrestricted_unions", LangOpts.CPlusPlus0x)
- .Case("cxx_user_literals", LangOpts.CPlusPlus0x)
- .Case("cxx_variadic_templates", LangOpts.CPlusPlus0x)
+ .Case("cxx_inline_namespaces", LangOpts.CPlusPlus11)
+ .Case("cxx_lambdas", LangOpts.CPlusPlus11)
+ .Case("cxx_local_type_template_args", LangOpts.CPlusPlus11)
+ .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus11)
+ .Case("cxx_noexcept", LangOpts.CPlusPlus11)
+ .Case("cxx_nullptr", LangOpts.CPlusPlus11)
+ .Case("cxx_override_control", LangOpts.CPlusPlus11)
+ .Case("cxx_range_for", LangOpts.CPlusPlus11)
+ .Case("cxx_raw_string_literals", LangOpts.CPlusPlus11)
+ .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus11)
+ .Case("cxx_rvalue_references", LangOpts.CPlusPlus11)
+ .Case("cxx_strong_enums", LangOpts.CPlusPlus11)
+ .Case("cxx_static_assert", LangOpts.CPlusPlus11)
+ .Case("cxx_trailing_return", LangOpts.CPlusPlus11)
+ .Case("cxx_unicode_literals", LangOpts.CPlusPlus11)
+ .Case("cxx_unrestricted_unions", LangOpts.CPlusPlus11)
+ .Case("cxx_user_literals", LangOpts.CPlusPlus11)
+ .Case("cxx_variadic_templates", LangOpts.CPlusPlus11)
// Type traits
.Case("has_nothrow_assign", LangOpts.CPlusPlus)
.Case("has_nothrow_copy", LangOpts.CPlusPlus)
@@ -840,10 +800,6 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
.Case("is_base_of", LangOpts.CPlusPlus)
.Case("is_class", LangOpts.CPlusPlus)
.Case("is_convertible_to", LangOpts.CPlusPlus)
- // __is_empty is available only if the horrible
- // "struct __is_empty" parsing hack hasn't been needed in this
- // translation unit. If it has, __is_empty reverts to a normal
- // identifier and __has_feature(is_empty) evaluates false.
.Case("is_empty", LangOpts.CPlusPlus)
.Case("is_enum", LangOpts.CPlusPlus)
.Case("is_final", LangOpts.CPlusPlus)
@@ -926,9 +882,15 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
IdentifierInfo *II, Preprocessor &PP,
const DirectoryLookup *LookupFrom) {
// Save the location of the current token. If a '(' is later found, use
- // that location. If no, use the end of this location instead.
+ // that location. If not, use the end of this location instead.
SourceLocation LParenLoc = Tok.getLocation();
+ // These expressions are only allowed within a preprocessor directive.
+ if (!PP.isParsingIfOrElifDirective()) {
+ PP.Diag(LParenLoc, diag::err_pp_directive_required) << II->getName();
+ return false;
+ }
+
// Get '('.
PP.LexNonComment(Tok);
@@ -946,8 +908,14 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
// Save '(' location for possible missing ')' message.
LParenLoc = Tok.getLocation();
- // Get the file name.
- PP.getCurrentLexer()->LexIncludeFilename(Tok);
+ if (PP.getCurrentLexer()) {
+ // Get the file name.
+ PP.getCurrentLexer()->LexIncludeFilename(Tok);
+ } else {
+ // We're in a macro, so we can't use LexIncludeFilename; just
+ // grab the next token.
+ PP.Lex(Tok);
+ }
}
// Reserve a buffer to get the spelling.
@@ -1223,15 +1191,15 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
IdentifierInfo *FeatureII = 0;
// Read the '('.
- Lex(Tok);
+ LexUnexpandedToken(Tok);
if (Tok.is(tok::l_paren)) {
// Read the identifier
- Lex(Tok);
+ LexUnexpandedToken(Tok);
if (Tok.is(tok::identifier) || Tok.is(tok::kw_const)) {
FeatureII = Tok.getIdentifierInfo();
// Read the ')'.
- Lex(Tok);
+ LexUnexpandedToken(Tok);
if (Tok.is(tok::r_paren))
IsValid = true;
}
@@ -1275,69 +1243,49 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
bool IsValid = false;
bool Value = false;
// Read the '('.
- Lex(Tok);
+ LexUnexpandedToken(Tok);
do {
- if (Tok.is(tok::l_paren)) {
- // Read the string.
- Lex(Tok);
-
- // We need at least one string literal.
- if (!Tok.is(tok::string_literal)) {
- StartLoc = Tok.getLocation();
- IsValid = false;
- // Eat tokens until ')'.
- do Lex(Tok); while (!(Tok.is(tok::r_paren) || Tok.is(tok::eod)));
- break;
- }
-
- // String concatenation allows multiple strings, which can even come
- // from macro expansion.
- SmallVector<Token, 4> StrToks;
- while (Tok.is(tok::string_literal)) {
- // Complain about, and drop, any ud-suffix.
- if (Tok.hasUDSuffix())
- Diag(Tok, diag::err_invalid_string_udl);
- StrToks.push_back(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(StartLoc, diag::err_warning_check_malformed);
+ break;
+ }
+
+ LexUnexpandedToken(Tok);
+ std::string WarningName;
+ SourceLocation StrStartLoc = Tok.getLocation();
+ if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
+ /*MacroExpansion=*/false)) {
+ // Eat tokens until ')'.
+ while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eod) &&
+ Tok.isNot(tok::eof))
LexUnexpandedToken(Tok);
- }
-
- // Is the end a ')'?
- if (!(IsValid = Tok.is(tok::r_paren)))
- break;
-
- // Concatenate and parse the strings.
- StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
- assert(Literal.isAscii() && "Didn't allow wide strings in");
- if (Literal.hadError)
- break;
- if (Literal.Pascal) {
- Diag(Tok, diag::warn_pragma_diagnostic_invalid);
- break;
- }
-
- StringRef WarningName(Literal.GetString());
-
- if (WarningName.size() < 3 || WarningName[0] != '-' ||
- WarningName[1] != 'W') {
- Diag(StrToks[0].getLocation(), diag::warn_has_warning_invalid_option);
- break;
- }
-
- // Finally, check if the warning flags maps to a diagnostic group.
- // We construct a SmallVector here to talk to getDiagnosticIDs().
- // Although we don't use the result, this isn't a hot path, and not
- // worth special casing.
- llvm::SmallVector<diag::kind, 10> Diags;
- Value = !getDiagnostics().getDiagnosticIDs()->
- getDiagnosticsInGroup(WarningName.substr(2), Diags);
+ break;
+ }
+
+ // Is the end a ')'?
+ if (!(IsValid = Tok.is(tok::r_paren))) {
+ Diag(StartLoc, diag::err_warning_check_malformed);
+ break;
+ }
+
+ if (WarningName.size() < 3 || WarningName[0] != '-' ||
+ WarningName[1] != 'W') {
+ Diag(StrStartLoc, diag::warn_has_warning_invalid_option);
+ break;
}
+
+ // Finally, check if the warning flags maps to a diagnostic group.
+ // We construct a SmallVector here to talk to getDiagnosticIDs().
+ // Although we don't use the result, this isn't a hot path, and not
+ // worth special casing.
+ SmallVector<diag::kind, 10> Diags;
+ Value = !getDiagnostics().getDiagnosticIDs()->
+ getDiagnosticsInGroup(WarningName.substr(2), Diags);
} while (false);
-
- if (!IsValid)
- Diag(StartLoc, diag::err_warning_check_malformed);
OS << (int)Value;
- Tok.setKind(tok::numeric_constant);
+ if (IsValid)
+ Tok.setKind(tok::numeric_constant);
} else if (II == Ident__building_module) {
// The argument to this builtin should be an identifier. The
// builtin evaluates to 1 when that identifier names the module we are
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index b167172..e8f43f7 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -11,17 +11,16 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/PTHLexer.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemStatCache.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/OnDiskHashTable.h"
+#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/PTHLexer.h"
-#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PTHManager.h"
-#include "clang/Lex/Token.h"
#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
@@ -679,13 +678,13 @@ public:
~PTHStatCache() {}
LookupResult getStat(const char *Path, struct stat &StatBuf,
- int *FileDescriptor) {
+ bool isFile, int *FileDescriptor) {
// Do the lookup for the file's data in the PTH file.
CacheTy::iterator I = Cache.find(Path);
// If we don't get a hit in the PTH file just forward to 'stat'.
if (I == Cache.end())
- return statChained(Path, StatBuf, FileDescriptor);
+ return statChained(Path, StatBuf, isFile, FileDescriptor);
const PTHStatData &Data = *I;
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index e7e6c37..95e8a8c 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -13,13 +13,13 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/Pragma.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/LiteralSupport.h"
-#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Preprocessor.h"
#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
@@ -184,7 +184,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
// Read the '"..."'.
Lex(Tok);
- if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) {
+ if (!tok::isStringLiteral(Tok.getKind())) {
Diag(PragmaLoc, diag::err__Pragma_malformed);
// Skip this token, and the ')', if present.
if (Tok.isNot(tok::r_paren))
@@ -219,15 +219,50 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
SourceLocation RParenLoc = Tok.getLocation();
std::string StrVal = getSpelling(StrTok);
- // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1:
- // "The string literal is destringized by deleting the L prefix, if present,
+ // The _Pragma is lexically sound. Destringize according to C11 6.10.9.1:
+ // "The string literal is destringized by deleting any encoding prefix,
// deleting the leading and trailing double-quotes, replacing each escape
// sequence \" by a double-quote, and replacing each escape sequence \\ by a
// single backslash."
- if (StrVal[0] == 'L') // Remove L prefix.
+ if (StrVal[0] == 'L' || StrVal[0] == 'U' ||
+ (StrVal[0] == 'u' && StrVal[1] != '8'))
StrVal.erase(StrVal.begin());
- assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
- "Invalid string token!");
+ else if (StrVal[0] == 'u')
+ StrVal.erase(StrVal.begin(), StrVal.begin() + 2);
+
+ if (StrVal[0] == 'R') {
+ // FIXME: C++11 does not specify how to handle raw-string-literals here.
+ // We strip off the 'R', the quotes, the d-char-sequences, and the parens.
+ assert(StrVal[1] == '"' && StrVal[StrVal.size() - 1] == '"' &&
+ "Invalid raw string token!");
+
+ // Measure the length of the d-char-sequence.
+ unsigned NumDChars = 0;
+ while (StrVal[2 + NumDChars] != '(') {
+ assert(NumDChars < (StrVal.size() - 5) / 2 &&
+ "Invalid raw string token!");
+ ++NumDChars;
+ }
+ assert(StrVal[StrVal.size() - 2 - NumDChars] == ')');
+
+ // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the
+ // parens below.
+ StrVal.erase(0, 2 + NumDChars);
+ StrVal.erase(StrVal.size() - 1 - NumDChars);
+ } else {
+ assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+ "Invalid string token!");
+
+ // Remove escaped quotes and escapes.
+ for (unsigned i = 1, e = StrVal.size(); i < e-2; ++i) {
+ if (StrVal[i] == '\\' &&
+ (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
+ // \\ -> '\' and \" -> '"'.
+ StrVal.erase(StrVal.begin()+i);
+ --e;
+ }
+ }
+ }
// Remove the front quote, replacing it with a space, so that the pragma
// contents appear to have a space before them.
@@ -236,16 +271,6 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
// Replace the terminating quote with a \n.
StrVal[StrVal.size()-1] = '\n';
- // Remove escaped quotes and escapes.
- for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) {
- if (StrVal[i] == '\\' &&
- (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
- // \\ -> '\' and \" -> '"'.
- StrVal.erase(StrVal.begin()+i);
- --e;
- }
- }
-
// Plop the string (including the newline and trailing null) into a buffer
// where we can lex it.
Token TmpTok;
@@ -470,7 +495,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
///
/// The syntax is:
/// \code
-/// \#pragma comment(linker, "foo")
+/// #pragma comment(linker, "foo")
/// \endcode
/// 'linker' is one of five identifiers: compiler, exestr, lib, linker, user.
/// "foo" is a string, which is fully macro expanded, and permits string
@@ -502,38 +527,10 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {
// Read the optional string if present.
Lex(Tok);
std::string ArgumentString;
- if (Tok.is(tok::comma)) {
- Lex(Tok); // eat the comma.
-
- // We need at least one string.
- if (Tok.isNot(tok::string_literal)) {
- Diag(Tok.getLocation(), diag::err_pragma_comment_malformed);
- return;
- }
-
- // String concatenation allows multiple strings, which can even come from
- // macro expansion.
- // "foo " "bar" "Baz"
- SmallVector<Token, 4> StrToks;
- while (Tok.is(tok::string_literal)) {
- if (Tok.hasUDSuffix())
- Diag(Tok, diag::err_invalid_string_udl);
- StrToks.push_back(Tok);
- Lex(Tok);
- }
-
- // Concatenate and parse the strings.
- StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
- assert(Literal.isAscii() && "Didn't allow wide strings in");
- if (Literal.hadError)
- return;
- if (Literal.Pascal) {
- Diag(StrToks[0].getLocation(), diag::err_pragma_comment_malformed);
- return;
- }
-
- ArgumentString = Literal.GetString();
- }
+ if (Tok.is(tok::comma) && !LexStringLiteral(Tok, ArgumentString,
+ "pragma comment",
+ /*MacroExpansion=*/true))
+ return;
// FIXME: If the kind is "compiler" warn if the string is present (it is
// ignored).
@@ -560,11 +557,11 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {
/// HandlePragmaMessage - Handle the microsoft and gcc \#pragma message
/// extension. The syntax is:
/// \code
-/// \#pragma message(string)
+/// #pragma message(string)
/// \endcode
/// OR, in GCC mode:
/// \code
-/// \#pragma message string
+/// #pragma message string
/// \endcode
/// string is a string, which is fully macro expanded, and permits string
/// concatenation, embedded escape characters, etc... See MSDN for more details.
@@ -587,34 +584,10 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {
return;
}
- // We need at least one string.
- if (Tok.isNot(tok::string_literal)) {
- Diag(Tok.getLocation(), diag::err_pragma_message_malformed);
- return;
- }
-
- // String concatenation allows multiple strings, which can even come from
- // macro expansion.
- // "foo " "bar" "Baz"
- SmallVector<Token, 4> StrToks;
- while (Tok.is(tok::string_literal)) {
- if (Tok.hasUDSuffix())
- Diag(Tok, diag::err_invalid_string_udl);
- StrToks.push_back(Tok);
- Lex(Tok);
- }
-
- // Concatenate and parse the strings.
- StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
- assert(Literal.isAscii() && "Didn't allow wide strings in");
- if (Literal.hadError)
- return;
- if (Literal.Pascal) {
- Diag(StrToks[0].getLocation(), diag::err_pragma_message_malformed);
+ std::string MessageString;
+ if (!FinishLexStringLiteral(Tok, MessageString, "pragma message",
+ /*MacroExpansion=*/true))
return;
- }
-
- StringRef MessageString(Literal.GetString());
if (ExpectClosingParen) {
if (Tok.isNot(tok::r_paren)) {
@@ -692,7 +665,7 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) {
///
/// The syntax is:
/// \code
-/// \#pragma push_macro("macro")
+/// #pragma push_macro("macro")
/// \endcode
void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) {
// Parse the pragma directive and get the macro IdentifierInfo*.
@@ -702,17 +675,13 @@ void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) {
// Get the MacroInfo associated with IdentInfo.
MacroInfo *MI = getMacroInfo(IdentInfo);
- MacroInfo *MacroCopyToPush = 0;
if (MI) {
- // Make a clone of MI.
- MacroCopyToPush = CloneMacroInfo(*MI);
-
// Allow the original MacroInfo to be redefined later.
MI->setIsAllowRedefinitionsWithoutWarning(true);
}
// Push the cloned MacroInfo so we can retrieve it later.
- PragmaPushMacroInfo[IdentInfo].push_back(MacroCopyToPush);
+ PragmaPushMacroInfo[IdentInfo].push_back(MI);
}
/// \brief Handle \#pragma pop_macro.
@@ -733,10 +702,11 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) {
PragmaPushMacroInfo.find(IdentInfo);
if (iter != PragmaPushMacroInfo.end()) {
// Forget the MacroInfo currently associated with IdentInfo.
- if (MacroInfo *CurrentMI = getMacroInfo(IdentInfo)) {
- if (CurrentMI->isWarnIfUnused())
- WarnUnusedMacroLocs.erase(CurrentMI->getDefinitionLoc());
- UndefineMacro(IdentInfo, CurrentMI, MessageLoc);
+ if (MacroDirective *CurrentMD = getMacroDirective(IdentInfo)) {
+ MacroInfo *MI = CurrentMD->getMacroInfo();
+ if (MI->isWarnIfUnused())
+ WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
+ appendMacroDirective(IdentInfo, AllocateUndefMacroDirective(MessageLoc));
}
// Get the MacroInfo we want to reinstall.
@@ -744,9 +714,8 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) {
if (MacroToReInstall) {
// Reinstall the previously pushed macro.
- setMacroInfo(IdentInfo, MacroToReInstall);
- } else if (IdentInfo->hasMacroDefinition()) {
- clearMacroInfo(IdentInfo);
+ appendDefMacroDirective(IdentInfo, MacroToReInstall, MessageLoc,
+ /*isImported=*/false);
}
// Pop PragmaPushMacroInfo stack.
@@ -1090,50 +1059,28 @@ public:
}
PP.LexUnexpandedToken(Tok);
+ SourceLocation StringLoc = Tok.getLocation();
- // We need at least one string.
- if (Tok.isNot(tok::string_literal)) {
- PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token);
+ std::string WarningName;
+ if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic",
+ /*MacroExpansion=*/false))
return;
- }
-
- // String concatenation allows multiple strings, which can even come from
- // macro expansion.
- // "foo " "bar" "Baz"
- SmallVector<Token, 4> StrToks;
- while (Tok.is(tok::string_literal)) {
- StrToks.push_back(Tok);
- PP.LexUnexpandedToken(Tok);
- }
if (Tok.isNot(tok::eod)) {
PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token);
return;
}
- // Concatenate and parse the strings.
- StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);
- assert(Literal.isAscii() && "Didn't allow wide strings in");
- if (Literal.hadError)
- return;
- if (Literal.Pascal) {
- PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid);
- return;
- }
-
- StringRef WarningName(Literal.GetString());
-
if (WarningName.size() < 3 || WarningName[0] != '-' ||
WarningName[1] != 'W') {
- PP.Diag(StrToks[0].getLocation(),
- diag::warn_pragma_diagnostic_invalid_option);
+ PP.Diag(StringLoc, diag::warn_pragma_diagnostic_invalid_option);
return;
}
if (PP.getDiagnostics().setDiagnosticGroupMapping(WarningName.substr(2),
Map, DiagLoc))
- PP.Diag(StrToks[0].getLocation(),
- diag::warn_pragma_diagnostic_unknown_warning) << WarningName;
+ PP.Diag(StringLoc, diag::warn_pragma_diagnostic_unknown_warning)
+ << WarningName;
else if (Callbacks)
Callbacks->PragmaDiagnostic(DiagLoc, Namespace, Map, WarningName);
}
@@ -1277,6 +1224,29 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
}
};
+ /// \brief Handle "\#pragma region [...]"
+ ///
+ /// The syntax is
+ /// \code
+ /// #pragma region [optional name]
+ /// #pragma endregion [optional comment]
+ /// \endcode
+ ///
+ /// \note This is
+ /// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a>
+ /// pragma, just skipped by compiler.
+ struct PragmaRegionHandler : public PragmaHandler {
+ PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) { }
+
+ virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+ Token &NameTok) {
+ // #pragma region: endregion matches can be verified
+ // __pragma(region): no sense, but ignored by msvc
+ // _Pragma is not valid for MSVC, but there isn't any point
+ // to handle a _Pragma differently.
+ }
+ };
+
} // end anonymous namespace
@@ -1310,5 +1280,7 @@ void Preprocessor::RegisterBuiltinPragmas() {
if (LangOpts.MicrosoftExt) {
AddPragmaHandler(new PragmaCommentHandler());
AddPragmaHandler(new PragmaIncludeAliasHandler());
+ AddPragmaHandler(new PragmaRegionHandler("region"));
+ AddPragmaHandler(new PragmaRegionHandler("endregion"));
}
}
diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index 01f3665e..b10e7f7 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp
@@ -14,8 +14,8 @@
#include "clang/Lex/PreprocessingRecord.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/Token.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Capacity.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace clang;
@@ -38,14 +38,9 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,
this->FileName = StringRef(Memory, FileName.size());
}
-PreprocessingRecord::PreprocessingRecord(SourceManager &SM,
- bool RecordConditionalDirectives)
+PreprocessingRecord::PreprocessingRecord(SourceManager &SM)
: SourceMgr(SM),
- RecordCondDirectives(RecordConditionalDirectives), CondDirectiveNextIdx(0),
- ExternalSource(0)
-{
- if (RecordCondDirectives)
- CondDirectiveStack.push_back(CondDirectiveNextIdx++);
+ ExternalSource(0) {
}
/// \brief Returns a pair of [Begin, End) iterators of preprocessed entities
@@ -97,8 +92,10 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
int Pos = PPEI.Position;
if (Pos < 0) {
- assert(unsigned(-Pos-1) < LoadedPreprocessedEntities.size() &&
- "Out-of bounds loaded preprocessed entity");
+ if (unsigned(-Pos-1) >= LoadedPreprocessedEntities.size()) {
+ assert(0 && "Out-of bounds loaded preprocessed entity");
+ return false;
+ }
assert(ExternalSource && "No external source to load from");
unsigned LoadedIndex = LoadedPreprocessedEntities.size()+Pos;
if (PreprocessedEntity *PPE = LoadedPreprocessedEntities[LoadedIndex])
@@ -106,8 +103,8 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
// See if the external source can see if the entity is in the file without
// deserializing it.
- llvm::Optional<bool>
- IsInFile = ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID);
+ Optional<bool> IsInFile =
+ ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID);
if (IsInFile.hasValue())
return IsInFile.getValue();
@@ -118,8 +115,10 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
FID, SourceMgr);
}
- assert(unsigned(Pos) < PreprocessedEntities.size() &&
- "Out-of bounds local preprocessed entity");
+ if (unsigned(Pos) >= PreprocessedEntities.size()) {
+ assert(0 && "Out-of bounds local preprocessed entity");
+ return false;
+ }
return isPreprocessedEntityIfInFileID(PreprocessedEntities[Pos],
FID, SourceMgr);
}
@@ -249,11 +248,11 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {
assert(Entity);
SourceLocation BeginLoc = Entity->getSourceRange().getBegin();
- if (!isa<class InclusionDirective>(Entity)) {
+ if (isa<MacroDefinition>(Entity)) {
assert((PreprocessedEntities.empty() ||
!SourceMgr.isBeforeInTranslationUnit(BeginLoc,
PreprocessedEntities.back()->getSourceRange().getBegin())) &&
- "a macro directive was encountered out-of-order");
+ "a macro definition was encountered out-of-order");
PreprocessedEntities.push_back(Entity);
return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false);
}
@@ -268,7 +267,15 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {
// The entity's location is not after the previous one; this can happen with
// include directives that form the filename using macros, e.g:
- // "#include MACRO(STUFF)".
+ // "#include MACRO(STUFF)"
+ // or with macro expansions inside macro arguments where the arguments are
+ // not expanded in the same order as listed, e.g:
+ // \code
+ // #define M1 1
+ // #define M2 2
+ // #define FM(x,y) y x
+ // FM(M1, M2)
+ // \endcode
typedef std::vector<PreprocessedEntity *>::iterator pp_iter;
@@ -313,8 +320,8 @@ unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) {
}
void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,
- PPEntityID PPID) {
- MacroDefinitions[Macro] = PPID;
+ MacroDefinition *Def) {
+ MacroDefinitions[Macro] = Def;
}
/// \brief Retrieve the preprocessed entity at the given ID.
@@ -351,19 +358,17 @@ PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) {
}
MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) {
- llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos
+ llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos
= MacroDefinitions.find(MI);
if (Pos == MacroDefinitions.end())
return 0;
-
- PreprocessedEntity *Entity = getPreprocessedEntity(Pos->second);
- if (Entity->isInvalid())
- return 0;
- return cast<MacroDefinition>(Entity);
+
+ return Pos->second;
}
-void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI,
- SourceRange Range) {
+void PreprocessingRecord::addMacroExpansion(const Token &Id,
+ const MacroInfo *MI,
+ SourceRange Range) {
// We don't record nested macro expansions.
if (Id.getLocation().isMacroID())
return;
@@ -376,17 +381,50 @@ void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI,
new (*this) MacroExpansion(Def, Range));
}
+void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDirective *MD) {
+ // This is not actually a macro expansion but record it as a macro reference.
+ if (MD)
+ addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+ MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::Ifndef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDirective *MD) {
+ // This is not actually a macro expansion but record it as a macro reference.
+ if (MD)
+ addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+ MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::Defined(const Token &MacroNameTok,
+ const MacroDirective *MD) {
+ // This is not actually a macro expansion but record it as a macro reference.
+ if (MD)
+ addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+ MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::MacroExpands(const Token &Id,const MacroDirective *MD,
+ SourceRange Range) {
+ addMacroExpansion(Id, MD->getMacroInfo(), Range);
+}
+
void PreprocessingRecord::MacroDefined(const Token &Id,
- const MacroInfo *MI) {
+ const MacroDirective *MD) {
+ const MacroInfo *MI = MD->getMacroInfo();
SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc());
MacroDefinition *Def
= new (*this) MacroDefinition(Id.getIdentifierInfo(), R);
- MacroDefinitions[MI] = addPreprocessedEntity(Def);
+ addPreprocessedEntity(Def);
+ MacroDefinitions[MI] = Def;
}
void PreprocessingRecord::MacroUndefined(const Token &Id,
- const MacroInfo *MI) {
- MacroDefinitions.erase(MI);
+ const MacroDirective *MD) {
+ // Note: MI may be null (when #undef'ining an undefined macro).
+ if (MD)
+ MacroDefinitions.erase(MD->getMacroInfo());
}
void PreprocessingRecord::InclusionDirective(
@@ -438,95 +476,6 @@ void PreprocessingRecord::InclusionDirective(
addPreprocessedEntity(ID);
}
-bool PreprocessingRecord::rangeIntersectsConditionalDirective(
- SourceRange Range) const {
- if (Range.isInvalid())
- return false;
-
- CondDirectiveLocsTy::const_iterator
- low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
- Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr));
- if (low == CondDirectiveLocs.end())
- return false;
-
- if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc()))
- return false;
-
- CondDirectiveLocsTy::const_iterator
- upp = std::upper_bound(low, CondDirectiveLocs.end(),
- Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr));
- unsigned uppIdx;
- if (upp != CondDirectiveLocs.end())
- uppIdx = upp->getIdx();
- else
- uppIdx = 0;
-
- return low->getIdx() != uppIdx;
-}
-
-unsigned PreprocessingRecord::findCondDirectiveIdx(SourceLocation Loc) const {
- if (Loc.isInvalid())
- return 0;
-
- CondDirectiveLocsTy::const_iterator
- low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
- Loc, CondDirectiveLoc::Comp(SourceMgr));
- if (low == CondDirectiveLocs.end())
- return 0;
- return low->getIdx();
-}
-
-void PreprocessingRecord::addCondDirectiveLoc(CondDirectiveLoc DirLoc) {
- // Ignore directives in system headers.
- if (SourceMgr.isInSystemHeader(DirLoc.getLoc()))
- return;
-
- assert(CondDirectiveLocs.empty() ||
- SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
- DirLoc.getLoc()));
- CondDirectiveLocs.push_back(DirLoc);
-}
-
-void PreprocessingRecord::If(SourceLocation Loc, SourceRange ConditionRange) {
- if (RecordCondDirectives) {
- addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
- CondDirectiveStack.push_back(CondDirectiveNextIdx++);
- }
-}
-
-void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok) {
- if (RecordCondDirectives) {
- addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
- CondDirectiveStack.push_back(CondDirectiveNextIdx++);
- }
-}
-
-void PreprocessingRecord::Ifndef(SourceLocation Loc,const Token &MacroNameTok) {
- if (RecordCondDirectives) {
- addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
- CondDirectiveStack.push_back(CondDirectiveNextIdx++);
- }
-}
-
-void PreprocessingRecord::Elif(SourceLocation Loc, SourceRange ConditionRange,
- SourceLocation IfLoc) {
- if (RecordCondDirectives)
- addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-}
-
-void PreprocessingRecord::Else(SourceLocation Loc, SourceLocation IfLoc) {
- if (RecordCondDirectives)
- addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
-}
-
-void PreprocessingRecord::Endif(SourceLocation Loc, SourceLocation IfLoc) {
- if (RecordCondDirectives) {
- addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
- assert(!CondDirectiveStack.empty());
- CondDirectiveStack.pop_back();
- }
-}
-
size_t PreprocessingRecord::getTotalMemory() const {
return BumpAlloc.getTotalMemory()
+ llvm::capacity_in_bytes(MacroDefinitions)
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 3b070ce..53c45dc 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -26,50 +26,48 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/PreprocessorOptions.h"
#include "MacroArgs.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/ExternalPreprocessorSource.h"
#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/Pragma.h"
#include "clang/Lex/PreprocessingRecord.h"
+#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/ScratchBuffer.h"
-#include "clang/Lex/LexDiagnostic.h"
-#include "clang/Lex/CodeCompletionHandler.h"
-#include "clang/Lex/ModuleLoader.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Capacity.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Capacity.h"
using namespace clang;
//===----------------------------------------------------------------------===//
ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
-PPMutationListener::~PPMutationListener() { }
-
-Preprocessor::Preprocessor(llvm::IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
+Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
DiagnosticsEngine &diags, LangOptions &opts,
const TargetInfo *target, SourceManager &SM,
HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
- IdentifierInfoLookup* IILookup,
- bool OwnsHeaders,
- bool DelayInitialization,
- bool IncrProcessing)
- : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(target),
- FileMgr(Headers.getFileMgr()),
- SourceMgr(SM), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
- ExternalSource(0), Identifiers(opts, IILookup),
- IncrementalProcessing(IncrProcessing), CodeComplete(0),
- CodeCompletionFile(0), CodeCompletionOffset(0), CodeCompletionReached(0),
- SkipMainFilePreamble(0, true), CurPPLexer(0),
- CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), Listener(0),
- MacroArgCache(0), Record(0), MIChainHead(0), MICache(0)
-{
+ IdentifierInfoLookup *IILookup, bool OwnsHeaders,
+ bool DelayInitialization, bool IncrProcessing)
+ : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(target),
+ FileMgr(Headers.getFileMgr()), SourceMgr(SM), HeaderInfo(Headers),
+ TheModuleLoader(TheModuleLoader), ExternalSource(0),
+ Identifiers(opts, IILookup), IncrementalProcessing(IncrProcessing),
+ CodeComplete(0), CodeCompletionFile(0), CodeCompletionOffset(0),
+ CodeCompletionReached(0), SkipMainFilePreamble(0, true), CurPPLexer(0),
+ CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0),
+ MacroArgCache(0), Record(0), MIChainHead(0), MICache(0) {
OwnsHeaderSearch = OwnsHeaders;
ScratchBuf = new ScratchBuffer(SourceMgr);
@@ -96,9 +94,11 @@ Preprocessor::Preprocessor(llvm::IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
InMacroArgPreExpansion = false;
NumCachedTokenLexers = 0;
PragmasEnabled = true;
+ ParsingIfOrElifDirective = false;
+ PreprocessedOutput = false;
CachedLexPos = 0;
-
+
// We haven't read anything from the external source.
ReadMacrosFromExternalSource = false;
@@ -292,7 +292,7 @@ Preprocessor::macro_end(bool IncludeExternalMacros) const {
/// \brief Compares macro tokens with a specified token value sequence.
static bool MacroDefinitionEquals(const MacroInfo *MI,
- llvm::ArrayRef<TokenValue> Tokens) {
+ ArrayRef<TokenValue> Tokens) {
return Tokens.size() == MI->getNumTokens() &&
std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
}
@@ -304,14 +304,15 @@ StringRef Preprocessor::getLastMacroWithSpelling(
StringRef BestSpelling;
for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
I != E; ++I) {
- if (!I->second->isObjectLike())
+ if (!I->second->getMacroInfo()->isObjectLike())
continue;
- const MacroInfo *MI = I->second->findDefinitionAtLoc(Loc, SourceMgr);
- if (!MI)
+ const MacroDirective::DefInfo
+ Def = I->second->findDirectiveAtLoc(Loc, SourceMgr);
+ if (!Def)
continue;
- if (!MacroDefinitionEquals(MI, Tokens))
+ if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
continue;
- SourceLocation Location = I->second->getDefinitionLoc();
+ SourceLocation Location = Def.getLocation();
// Choose the macro defined latest.
if (BestLocation.isInvalid() ||
(Location.isValid() &&
@@ -398,7 +399,7 @@ StringRef Preprocessor::getSpelling(const Token &Tok,
SmallVectorImpl<char> &Buffer,
bool *Invalid) const {
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
- if (Tok.isNot(tok::raw_identifier)) {
+ if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
// Try the fast path.
if (const IdentifierInfo *II = Tok.getIdentifierInfo())
return II->getName();
@@ -481,6 +482,7 @@ void Preprocessor::EnterMainSourceFile() {
assert(SB && "Cannot create predefined source buffer");
FileID FID = SourceMgr.createFileIDForMemBuffer(SB);
assert(!FID.isInvalid() && "Could not create FileID for predefines?");
+ setPredefinesFileID(FID);
// Start parsing the predefines.
EnterSourceFile(FID, 0, SourceLocation());
@@ -496,6 +498,48 @@ void Preprocessor::EndSourceFile() {
// Lexer Event Handling.
//===----------------------------------------------------------------------===//
+static void appendCodePoint(unsigned Codepoint,
+ llvm::SmallVectorImpl<char> &Str) {
+ char ResultBuf[4];
+ char *ResultPtr = ResultBuf;
+ bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
+ (void)Res;
+ assert(Res && "Unexpected conversion failure");
+ Str.append(ResultBuf, ResultPtr);
+}
+
+static void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
+ for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
+ if (*I != '\\') {
+ Buf.push_back(*I);
+ continue;
+ }
+
+ ++I;
+ assert(*I == 'u' || *I == 'U');
+
+ unsigned NumHexDigits;
+ if (*I == 'u')
+ NumHexDigits = 4;
+ else
+ NumHexDigits = 8;
+
+ assert(I + NumHexDigits <= E);
+
+ uint32_t CodePoint = 0;
+ for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+ unsigned Value = llvm::hexDigitValue(*I);
+ assert(Value != -1U);
+
+ CodePoint <<= 4;
+ CodePoint += Value;
+ }
+
+ appendCodePoint(CodePoint, Buf);
+ --I;
+ }
+}
+
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
@@ -504,15 +548,22 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
// Look up this token, see if it is a macro, or if it is a language keyword.
IdentifierInfo *II;
- if (!Identifier.needsCleaning()) {
+ if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
// No cleaning needed, just use the characters from the lexed buffer.
II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(),
- Identifier.getLength()));
+ Identifier.getLength()));
} else {
// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
SmallString<64> IdentifierBuffer;
StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
- II = getIdentifierInfo(CleanedStr);
+
+ if (Identifier.hasUCN()) {
+ SmallString<64> UCNIdentifierBuffer;
+ expandUCNs(UCNIdentifierBuffer, CleanedStr);
+ II = getIdentifierInfo(UCNIdentifierBuffer);
+ } else {
+ II = getIdentifierInfo(CleanedStr);
+ }
}
// Update the token info (identifier info and appropriate token kind).
@@ -589,19 +640,19 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
}
// If this is a macro to be expanded, do it.
- if (MacroInfo *MI = getMacroInfo(&II)) {
+ if (MacroDirective *MD = getMacroDirective(&II)) {
+ MacroInfo *MI = MD->getMacroInfo();
if (!DisableMacroExpansion) {
- if (Identifier.isExpandDisabled()) {
- Diag(Identifier, diag::pp_disabled_macro_expansion);
- } else if (MI->isEnabled()) {
- if (!HandleMacroExpandedIdentifier(Identifier, MI))
+ if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
+ if (!HandleMacroExpandedIdentifier(Identifier, MD))
return;
} else {
// C99 6.10.3.4p2 says that a disabled macro may never again be
// expanded, even if it's in a context where it could be expanded in the
// future.
Identifier.setFlag(Token::DisableExpand);
- Diag(Identifier, diag::pp_disabled_macro_expansion);
+ if (MI->isObjectLike() || isNextPPTokenLParen())
+ Diag(Identifier, diag::pp_disabled_macro_expansion);
}
}
}
@@ -630,10 +681,10 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
if (II.isExtensionToken() && !DisableMacroExpansion)
Diag(Identifier, diag::ext_token_used);
- // If this is the '__experimental_modules_import' contextual keyword, note
+ // If this is the 'import' contextual keyword, note
// that the next token indicates a module name.
//
- // Note that we do not treat '__experimental_modules_import' as a contextual
+ // Note that we do not treat 'import' as a contextual
// keyword when we're in a caching lexer, because caching lexers only get
// used in contexts where import declarations are disallowed.
if (II.isModulesImport() && !InMacroArgs && !DisableMacroExpansion &&
@@ -689,6 +740,47 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
}
}
+bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
+ const char *DiagnosticTag,
+ bool AllowMacroExpansion) {
+ // We need at least one string literal.
+ if (Result.isNot(tok::string_literal)) {
+ Diag(Result, diag::err_expected_string_literal)
+ << /*Source='in...'*/0 << DiagnosticTag;
+ return false;
+ }
+
+ // Lex string literal tokens, optionally with macro expansion.
+ SmallVector<Token, 4> StrToks;
+ do {
+ StrToks.push_back(Result);
+
+ if (Result.hasUDSuffix())
+ Diag(Result, diag::err_invalid_string_udl);
+
+ if (AllowMacroExpansion)
+ Lex(Result);
+ else
+ LexUnexpandedToken(Result);
+ } while (Result.is(tok::string_literal));
+
+ // Concatenate and parse the strings.
+ StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
+
+ if (Literal.hadError)
+ return false;
+
+ if (Literal.Pascal) {
+ Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
+ << /*Source='in...'*/0 << DiagnosticTag;
+ return false;
+ }
+
+ String = Literal.GetString();
+ return true;
+}
+
void Preprocessor::addCommentHandler(CommentHandler *Handler) {
assert(Handler && "NULL comment handler");
assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
@@ -723,11 +815,10 @@ CommentHandler::~CommentHandler() { }
CodeCompletionHandler::~CodeCompletionHandler() { }
-void Preprocessor::createPreprocessingRecord(bool RecordConditionalDirectives) {
+void Preprocessor::createPreprocessingRecord() {
if (Record)
return;
- Record = new PreprocessingRecord(getSourceManager(),
- RecordConditionalDirectives);
+ Record = new PreprocessingRecord(getSourceManager());
addPPCallbacks(Record);
}
diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
index a64c84d..5a59849 100644
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/PreprocessorLexer.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/LexDiagnostic.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
using namespace clang;
void PreprocessorLexer::anchor() { }
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index dd7ebb0..0a66bba 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -12,25 +12,25 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/TokenConcatenation.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Support/ErrorHandling.h"
-#include <cctype>
using namespace clang;
/// IsStringPrefix - Return true if Str is a string prefix.
/// 'L', 'u', 'U', or 'u8'. Including raw versions.
-static bool IsStringPrefix(StringRef Str, bool CPlusPlus0x) {
+static bool IsStringPrefix(StringRef Str, bool CPlusPlus11) {
if (Str[0] == 'L' ||
- (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
+ (CPlusPlus11 && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
if (Str.size() == 1)
return true; // "L", "u", "U", and "R"
// Check for raw flavors. Need to make sure the first character wasn't
- // already R. Need CPlusPlus0x check for "LR".
- if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
+ // already R. Need CPlusPlus11 check for "LR".
+ if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus11)
return true; // "LR", "uR", "UR"
// Check for "u8" and "u8R"
@@ -54,17 +54,17 @@ bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
SourceManager &SM = PP.getSourceManager();
const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
- LangOpts.CPlusPlus0x);
+ LangOpts.CPlusPlus11);
}
if (Tok.getLength() < 256) {
char Buffer[256];
const char *TokPtr = Buffer;
unsigned length = PP.getSpelling(Tok, TokPtr);
- return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
+ return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus11);
}
- return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
+ return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus11);
}
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
@@ -87,7 +87,7 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
TokenInfo[tok::arrow ] |= aci_custom_firstchar;
// These tokens have custom code in C++11 mode.
- if (PP.getLangOpts().CPlusPlus0x) {
+ if (PP.getLangOpts().CPlusPlus11) {
TokenInfo[tok::string_literal ] |= aci_custom;
TokenInfo[tok::wide_string_literal ] |= aci_custom;
TokenInfo[tok::utf8_string_literal ] |= aci_custom;
@@ -156,9 +156,10 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
// First, check to see if the tokens were directly adjacent in the original
// source. If they were, it must be okay to stick them together: if there
// were an issue, the tokens would have been lexed differently.
- if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() &&
- PrevTok.getLocation().getLocWithOffset(PrevTok.getLength()) ==
- Tok.getLocation())
+ SourceManager &SM = PP.getSourceManager();
+ SourceLocation PrevSpellLoc = SM.getSpellingLoc(PrevTok.getLocation());
+ SourceLocation SpellLoc = SM.getSpellingLoc(Tok.getLocation());
+ if (PrevSpellLoc.getLocWithOffset(PrevTok.getLength()) == SpellLoc)
return false;
tok::TokenKind PrevKind = PrevTok.getKind();
@@ -206,7 +207,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
case tok::wide_char_constant:
case tok::utf16_char_constant:
case tok::utf32_char_constant:
- if (!PP.getLangOpts().CPlusPlus0x)
+ if (!PP.getLangOpts().CPlusPlus11)
return false;
// In C++11, a string or character literal followed by an identifier is a
@@ -239,13 +240,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
return IsIdentifierStringPrefix(PrevTok);
case tok::numeric_constant:
- return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
- FirstChar == '+' || FirstChar == '-' || FirstChar == '.' ||
- (PP.getLangOpts().CPlusPlus0x && FirstChar == '_');
+ return isPreprocessingNumberBody(FirstChar) ||
+ FirstChar == '+' || FirstChar == '-';
case tok::period: // ..., .*, .1234
return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
- isdigit(FirstChar) ||
- (PP.getLangOpts().CPlusPlus && FirstChar == '*');
+ isDigit(FirstChar) ||
+ (PP.getLangOpts().CPlusPlus && FirstChar == '*');
case tok::amp: // &&
return FirstChar == '&';
case tok::plus: // ++
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 59b7478..5b41fe9 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -13,10 +13,10 @@
#include "clang/Lex/TokenLexer.h"
#include "MacroArgs.h"
-#include "clang/Lex/MacroInfo.h"
-#include "clang/Lex/Preprocessor.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
using namespace clang;
@@ -647,6 +647,12 @@ bool TokenLexer::PasteTokens(Token &Tok) {
StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
if (EndLoc.isFileID())
EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
+ FileID MacroFID = SM.getFileID(MacroExpansionStart);
+ while (SM.getFileID(StartLoc) != MacroFID)
+ StartLoc = SM.getImmediateExpansionRange(StartLoc).first;
+ while (SM.getFileID(EndLoc) != MacroFID)
+ EndLoc = SM.getImmediateExpansionRange(EndLoc).second;
+
Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,
Tok.getLength()));
@@ -743,14 +749,18 @@ static void updateConsecutiveMacroArgTokens(SourceManager &SM,
Token *NextTok = begin_tokens + 1;
for (; NextTok < end_tokens; ++NextTok) {
+ SourceLocation NextLoc = NextTok->getLocation();
+ if (CurLoc.isFileID() != NextLoc.isFileID())
+ break; // Token from different kind of FileID.
+
int RelOffs;
- if (!SM.isInSameSLocAddrSpace(CurLoc, NextTok->getLocation(), &RelOffs))
+ if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
break; // Token from different local/loaded location.
// Check that token is not before the previous token or more than 50
// "characters" away.
if (RelOffs < 0 || RelOffs > 50)
break;
- CurLoc = NextTok->getLocation();
+ CurLoc = NextLoc;
}
// For the consecutive tokens, find the length of the SLocEntry to contain
diff --git a/lib/Lex/UnicodeCharSets.h b/lib/Lex/UnicodeCharSets.h
new file mode 100644
index 0000000..37ff8af
--- /dev/null
+++ b/lib/Lex/UnicodeCharSets.h
@@ -0,0 +1,496 @@
+//===--- UnicodeCharSets.h - Contains important sets of characters --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef CLANG_LEX_UNICODECHARSETS_H
+#define CLANG_LEX_UNICODECHARSETS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace {
+ struct UnicodeCharRange {
+ uint32_t Lower;
+ uint32_t Upper;
+ };
+ typedef llvm::ArrayRef<UnicodeCharRange> UnicodeCharSet;
+
+ typedef llvm::SmallPtrSet<const UnicodeCharRange *, 16> ValidatedCharSetsTy;
+}
+
+static inline ValidatedCharSetsTy &getValidatedCharSets() {
+ static ValidatedCharSetsTy Validated;
+ return Validated;
+}
+
+/// Returns true if each of the ranges in \p CharSet is a proper closed range
+/// [min, max], and if the ranges themselves are ordered and non-overlapping.
+static inline bool isValidCharSet(UnicodeCharSet CharSet) {
+#ifndef NDEBUG
+ static llvm::sys::Mutex ValidationMutex;
+
+ // Check the validation cache.
+ {
+ llvm::MutexGuard Guard(ValidationMutex);
+ if (getValidatedCharSets().count(CharSet.data()))
+ return true;
+ }
+
+ // Walk through the ranges.
+ uint32_t Prev = 0;
+ for (UnicodeCharSet::iterator I = CharSet.begin(), E = CharSet.end();
+ I != E; ++I) {
+ if (Prev >= I->Lower) {
+ DEBUG(llvm::dbgs() << "Upper bound 0x");
+ DEBUG(llvm::dbgs().write_hex(Prev));
+ DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x");
+ DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n");
+ return false;
+ }
+ if (I->Upper < I->Lower) {
+ DEBUG(llvm::dbgs() << "Upper bound 0x");
+ DEBUG(llvm::dbgs().write_hex(I->Lower));
+ DEBUG(llvm::dbgs() << " should not be less than lower bound 0x");
+ DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n");
+ return false;
+ }
+ Prev = I->Upper;
+ }
+
+ // Update the validation cache.
+ {
+ llvm::MutexGuard Guard(ValidationMutex);
+ getValidatedCharSets().insert(CharSet.data());
+ }
+#endif
+ return true;
+}
+
+/// Returns true if the Unicode code point \p C is within the set of
+/// characters specified by \p CharSet.
+LLVM_READONLY static inline bool isCharInSet(uint32_t C,
+ UnicodeCharSet CharSet) {
+ assert(isValidCharSet(CharSet));
+
+ size_t LowPoint = 0;
+ size_t HighPoint = CharSet.size();
+
+ // Binary search the set of char ranges.
+ while (HighPoint != LowPoint) {
+ size_t MidPoint = (HighPoint + LowPoint) / 2;
+ if (C < CharSet[MidPoint].Lower)
+ HighPoint = MidPoint;
+ else if (C > CharSet[MidPoint].Upper)
+ LowPoint = MidPoint + 1;
+ else
+ return true;
+ }
+
+ return false;
+}
+
+
+// C11 D.1, C++11 [charname.allowed]
+static const UnicodeCharRange C11AllowedIDChars[] = {
+ // 1
+ { 0x00A8, 0x00A8 }, { 0x00AA, 0x00AA }, { 0x00AD, 0x00AD },
+ { 0x00AF, 0x00AF }, { 0x00B2, 0x00B5 }, { 0x00B7, 0x00BA },
+ { 0x00BC, 0x00BE }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 },
+ { 0x00F8, 0x00FF },
+ // 2
+ { 0x0100, 0x167F }, { 0x1681, 0x180D }, { 0x180F, 0x1FFF },
+ // 3
+ { 0x200B, 0x200D }, { 0x202A, 0x202E }, { 0x203F, 0x2040 },
+ { 0x2054, 0x2054 }, { 0x2060, 0x206F },
+ // 4
+ { 0x2070, 0x218F }, { 0x2460, 0x24FF }, { 0x2776, 0x2793 },
+ { 0x2C00, 0x2DFF }, { 0x2E80, 0x2FFF },
+ // 5
+ { 0x3004, 0x3007 }, { 0x3021, 0x302F }, { 0x3031, 0x303F },
+ // 6
+ { 0x3040, 0xD7FF },
+ // 7
+ { 0xF900, 0xFD3D }, { 0xFD40, 0xFDCF }, { 0xFDF0, 0xFE44 },
+ { 0xFE47, 0xFFFD },
+ // 8
+ { 0x10000, 0x1FFFD }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD },
+ { 0x40000, 0x4FFFD }, { 0x50000, 0x5FFFD }, { 0x60000, 0x6FFFD },
+ { 0x70000, 0x7FFFD }, { 0x80000, 0x8FFFD }, { 0x90000, 0x9FFFD },
+ { 0xA0000, 0xAFFFD }, { 0xB0000, 0xBFFFD }, { 0xC0000, 0xCFFFD },
+ { 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD }
+};
+
+// C++03 [extendid]
+// Note that this is not the same as C++98, but we don't distinguish C++98
+// and C++03 in Clang.
+static const UnicodeCharRange CXX03AllowedIDChars[] = {
+ // Latin
+ { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 },
+ { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 },
+
+ // Greek
+ { 0x0384, 0x0384 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
+ { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
+ { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
+ { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 },
+
+ // Cyrillic
+ { 0x0401, 0x040D }, { 0x040F, 0x044F }, { 0x0451, 0x045C },
+ { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 },
+ { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 },
+ { 0x04F8, 0x04F9 },
+
+ // Armenian
+ { 0x0531, 0x0556 }, { 0x0561, 0x0587 },
+
+ // Hebrew
+ { 0x05D0, 0x05EA }, { 0x05F0, 0x05F4 },
+
+ // Arabic
+ { 0x0621, 0x063A }, { 0x0640, 0x0652 }, { 0x0670, 0x06B7 },
+ { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06E5, 0x06E7 },
+
+ // Devanagari
+ { 0x0905, 0x0939 }, { 0x0958, 0x0962 },
+
+ // Bengali
+ { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 },
+ { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 },
+ { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 },
+
+ // Gurmukhi
+ { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 },
+ { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 },
+ { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E },
+
+ // Gujarti
+ { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 },
+ { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 },
+ { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE0 },
+
+ // Oriya
+ { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 },
+ { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 },
+ { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 },
+
+ // Tamil
+ { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 },
+ { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F },
+ { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 },
+ { 0x0BB7, 0x0BB9 },
+
+ // Telugu
+ { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 },
+ { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 },
+
+ // Kannada
+ { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 },
+ { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CE0, 0x0CE1 },
+
+ // Malayam
+ { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 },
+ { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 },
+
+ // Thai
+ { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 },
+ { 0x0E4F, 0x0E5B },
+
+ // Lao
+ { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E87 },
+ { 0x0E88, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D },
+ { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 },
+ { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAA },
+ { 0x0EAB, 0x0EAB }, { 0x0EAD, 0x0EB0 }, { 0x0EB2, 0x0EB2 },
+ { 0x0EB3, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 },
+ { 0x0EC6, 0x0EC6 },
+
+ // Georgian
+ { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
+
+ // Hangul
+ { 0x1100, 0x1159 }, { 0x1161, 0x11A2 }, { 0x11A8, 0x11F9 },
+
+ // Latin (2)
+ { 0x1E00, 0x1E9A }, { 0x1EA0, 0x1EF9 },
+
+ // Greek (2)
+ { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 },
+ { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 },
+ { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D },
+ { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 },
+ { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB },
+ { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC },
+
+ // Hiragana
+ { 0x3041, 0x3094 }, { 0x309B, 0x309E },
+
+ // Katakana
+ { 0x30A1, 0x30FE },
+
+ // Bopmofo [sic]
+ { 0x3105, 0x312C },
+
+ // CJK Unified Ideographs
+ { 0x4E00, 0x9FA5 }, { 0xF900, 0xFA2D }, { 0xFB1F, 0xFB36 },
+ { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 },
+ { 0xFB42, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3F },
+ { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB },
+ { 0xFE70, 0xFE72 }, { 0xFE74, 0xFE74 }, { 0xFE76, 0xFEFC },
+ { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE },
+ { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 },
+ { 0xFFDA, 0xFFDC }
+};
+
+// C99 Annex D
+static const UnicodeCharRange C99AllowedIDChars[] = {
+ // Latin (1)
+ { 0x00AA, 0x00AA },
+
+ // Special characters (1)
+ { 0x00B5, 0x00B5 }, { 0x00B7, 0x00B7 },
+
+ // Latin (2)
+ { 0x00BA, 0x00BA }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 },
+ { 0x00F8, 0x01F5 }, { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 },
+
+ // Special characters (2)
+ { 0x02B0, 0x02B8 }, { 0x02BB, 0x02BB }, { 0x02BD, 0x02C1 },
+ { 0x02D0, 0x02D1 }, { 0x02E0, 0x02E4 }, { 0x037A, 0x037A },
+
+ // Greek (1)
+ { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
+ { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
+ { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
+ { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 },
+
+ // Cyrillic
+ { 0x0401, 0x040C }, { 0x040E, 0x044F }, { 0x0451, 0x045C },
+ { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 },
+ { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 },
+ { 0x04F8, 0x04F9 },
+
+ // Armenian (1)
+ { 0x0531, 0x0556 },
+
+ // Special characters (3)
+ { 0x0559, 0x0559 },
+
+ // Armenian (2)
+ { 0x0561, 0x0587 },
+
+ // Hebrew
+ { 0x05B0, 0x05B9 }, { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF },
+ { 0x05C1, 0x05C2 }, { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 },
+
+ // Arabic (1)
+ { 0x0621, 0x063A }, { 0x0640, 0x0652 },
+
+ // Digits (1)
+ { 0x0660, 0x0669 },
+
+ // Arabic (2)
+ { 0x0670, 0x06B7 }, { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE },
+ { 0x06D0, 0x06DC }, { 0x06E5, 0x06E8 }, { 0x06EA, 0x06ED },
+
+ // Digits (2)
+ { 0x06F0, 0x06F9 },
+
+ // Devanagari and Special characeter 0x093D.
+ { 0x0901, 0x0903 }, { 0x0905, 0x0939 }, { 0x093D, 0x094D },
+ { 0x0950, 0x0952 }, { 0x0958, 0x0963 },
+
+ // Digits (3)
+ { 0x0966, 0x096F },
+
+ // Bengali (1)
+ { 0x0981, 0x0983 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 },
+ { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 },
+ { 0x09B6, 0x09B9 }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 },
+ { 0x09CB, 0x09CD }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E3 },
+
+ // Digits (4)
+ { 0x09E6, 0x09EF },
+
+ // Bengali (2)
+ { 0x09F0, 0x09F1 },
+
+ // Gurmukhi (1)
+ { 0x0A02, 0x0A02 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 },
+ { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 },
+ { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A3E, 0x0A42 },
+ { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A59, 0x0A5C },
+ { 0x0A5E, 0x0A5E },
+
+ // Digits (5)
+ { 0x0A66, 0x0A6F },
+
+ // Gurmukhi (2)
+ { 0x0A74, 0x0A74 },
+
+ // Gujarti
+ { 0x0A81, 0x0A83 }, { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D },
+ { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 },
+ { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 }, { 0x0ABD, 0x0AC5 },
+ { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AD0, 0x0AD0 },
+ { 0x0AE0, 0x0AE0 },
+
+ // Digits (6)
+ { 0x0AE6, 0x0AEF },
+
+ // Oriya and Special character 0x0B3D
+ { 0x0B01, 0x0B03 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 },
+ { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 },
+ { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B43 }, { 0x0B47, 0x0B48 },
+ { 0x0B4B, 0x0B4D }, { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 },
+
+ // Digits (7)
+ { 0x0B66, 0x0B6F },
+
+ // Tamil
+ { 0x0B82, 0x0B83 }, { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 },
+ { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C },
+ { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA },
+ { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 }, { 0x0BBE, 0x0BC2 },
+ { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD },
+
+ // Digits (8)
+ { 0x0BE7, 0x0BEF },
+
+ // Telugu
+ { 0x0C01, 0x0C03 }, { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 },
+ { 0x0C12, 0x0C28 }, { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 },
+ { 0x0C3E, 0x0C44 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D },
+ { 0x0C60, 0x0C61 },
+
+ // Digits (9)
+ { 0x0C66, 0x0C6F },
+
+ // Kannada
+ { 0x0C82, 0x0C83 }, { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 },
+ { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 },
+ { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD },
+ { 0x0CDE, 0x0CDE }, { 0x0CE0, 0x0CE1 },
+
+ // Digits (10)
+ { 0x0CE6, 0x0CEF },
+
+ // Malayam
+ { 0x0D02, 0x0D03 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 },
+ { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D3E, 0x0D43 },
+ { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, { 0x0D60, 0x0D60 },
+
+ // Digits (11)
+ { 0x0D66, 0x0D6F },
+
+ // Thai...including Digits { 0x0E50, 0x0E59 }
+ { 0x0E01, 0x0E3A }, { 0x0E40, 0x0E5B },
+
+ // Lao (1)
+ { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E88 },
+ { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, { 0x0E94, 0x0E97 },
+ { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, { 0x0EA5, 0x0EA5 },
+ { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB }, { 0x0EAD, 0x0EAE },
+ { 0x0EB0, 0x0EB9 }, { 0x0EBB, 0x0EBD }, { 0x0EC0, 0x0EC4 },
+ { 0x0EC6, 0x0EC6 }, { 0x0EC8, 0x0ECD },
+
+ // Digits (12)
+ { 0x0ED0, 0x0ED9 },
+
+ // Lao (2)
+ { 0x0EDC, 0x0EDD },
+
+ // Tibetan (1)
+ { 0x0F00, 0x0F00 }, { 0x0F18, 0x0F19 },
+
+ // Digits (13)
+ { 0x0F20, 0x0F33 },
+
+ // Tibetan (2)
+ { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
+ { 0x0F3E, 0x0F47 }, { 0x0F49, 0x0F69 }, { 0x0F71, 0x0F84 },
+ { 0x0F86, 0x0F8B }, { 0x0F90, 0x0F95 }, { 0x0F97, 0x0F97 },
+ { 0x0F99, 0x0FAD }, { 0x0FB1, 0x0FB7 }, { 0x0FB9, 0x0FB9 },
+
+ // Georgian
+ { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
+
+ // Latin (3)
+ { 0x1E00, 0x1E9B }, { 0x1EA0, 0x1EF9 },
+
+ // Greek (2)
+ { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 },
+ { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 },
+ { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D },
+ { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC },
+
+ // Special characters (4)
+ { 0x1FBE, 0x1FBE },
+
+ // Greek (3)
+ { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 },
+ { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 },
+ { 0x1FF6, 0x1FFC },
+
+ // Special characters (5)
+ { 0x203F, 0x2040 },
+
+ // Latin (4)
+ { 0x207F, 0x207F },
+
+ // Special characters (6)
+ { 0x2102, 0x2102 }, { 0x2107, 0x2107 }, { 0x210A, 0x2113 },
+ { 0x2115, 0x2115 }, { 0x2118, 0x211D }, { 0x2124, 0x2124 },
+ { 0x2126, 0x2126 }, { 0x2128, 0x2128 }, { 0x212A, 0x2131 },
+ { 0x2133, 0x2138 }, { 0x2160, 0x2182 }, { 0x3005, 0x3007 },
+ { 0x3021, 0x3029 },
+
+ // Hiragana
+ { 0x3041, 0x3093 }, { 0x309B, 0x309C },
+
+ // Katakana
+ { 0x30A1, 0x30F6 }, { 0x30FB, 0x30FC },
+
+ // Bopmofo [sic]
+ { 0x3105, 0x312C },
+
+ // CJK Unified Ideographs
+ { 0x4E00, 0x9FA5 },
+
+ // Hangul,
+ { 0xAC00, 0xD7A3 }
+};
+
+// C11 D.2, C++11 [charname.disallowed]
+static const UnicodeCharRange C11DisallowedInitialIDChars[] = {
+ { 0x0300, 0x036F }, { 0x1DC0, 0x1DFF }, { 0x20D0, 0x20FF },
+ { 0xFE20, 0xFE2F }
+};
+
+// C99 6.4.2.1p3: The initial character [of an identifier] shall not be a
+// universal character name designating a digit.
+// C99 Annex D defines these characters as "Digits".
+static const UnicodeCharRange C99DisallowedInitialIDChars[] = {
+ { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 }, { 0x0966, 0x096F },
+ { 0x09E6, 0x09EF }, { 0x0A66, 0x0A6F }, { 0x0AE6, 0x0AEF },
+ { 0x0B66, 0x0B6F }, { 0x0BE7, 0x0BEF }, { 0x0C66, 0x0C6F },
+ { 0x0CE6, 0x0CEF }, { 0x0D66, 0x0D6F }, { 0x0E50, 0x0E59 },
+ { 0x0ED0, 0x0ED9 }, { 0x0F20, 0x0F33 }
+};
+
+// Unicode v6.2, chapter 6.2, table 6-2.
+static const UnicodeCharRange UnicodeWhitespaceChars[] = {
+ { 0x0085, 0x0085 }, { 0x00A0, 0x00A0 }, { 0x1680, 0x1680 },
+ { 0x180E, 0x180E }, { 0x2000, 0x200A }, { 0x2028, 0x2029 },
+ { 0x202F, 0x202F }, { 0x205F, 0x205F }, { 0x3000, 0x3000 }
+};
+
+#endif
OpenPOWER on IntegriCloud