diff options
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Lex/HeaderMap.cpp | 4 | ||||
-rw-r--r-- | lib/Lex/HeaderSearch.cpp | 311 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 797 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 54 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.cpp | 18 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.h | 4 | ||||
-rw-r--r-- | lib/Lex/MacroInfo.cpp | 119 | ||||
-rw-r--r-- | lib/Lex/ModuleMap.cpp | 428 | ||||
-rw-r--r-- | lib/Lex/PPConditionalDirectiveRecord.cpp | 120 | ||||
-rw-r--r-- | lib/Lex/PPDirectives.cpp | 229 | ||||
-rw-r--r-- | lib/Lex/PPExpressions.cpp | 28 | ||||
-rw-r--r-- | lib/Lex/PPLexerChange.cpp | 53 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 456 | ||||
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 11 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 220 | ||||
-rw-r--r-- | lib/Lex/PreprocessingRecord.cpp | 189 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 185 | ||||
-rw-r--r-- | lib/Lex/PreprocessorLexer.cpp | 4 | ||||
-rw-r--r-- | lib/Lex/TokenConcatenation.cpp | 36 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 18 | ||||
-rw-r--r-- | lib/Lex/UnicodeCharSets.h | 496 |
22 files changed, 2473 insertions, 1308 deletions
diff --git a/lib/Lex/CMakeLists.txt b/lib/Lex/CMakeLists.txt index 241abbc..2ee4682 100644 --- a/lib/Lex/CMakeLists.txt +++ b/lib/Lex/CMakeLists.txt @@ -12,6 +12,7 @@ add_clang_library(clangLex ModuleMap.cpp PPCaching.cpp PPCallbacks.cpp + PPConditionalDirectiveRecord.cpp PPDirectives.cpp PPExpressions.cpp PPLexerChange.cpp diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp index 7dc0491..dcf1f0c 100644 --- a/lib/Lex/HeaderMap.cpp +++ b/lib/Lex/HeaderMap.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/HeaderMap.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/FileManager.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" -#include <cctype> #include <cstdio> using namespace clang; @@ -62,7 +62,7 @@ static inline unsigned HashHMapKey(StringRef Str) { const char *S = Str.begin(), *End = Str.end(); for (; S != End; S++) - Result += tolower(*S) * 13; + Result += toLowercase(*S) * 13; return Result; } diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index 67000b68..304bd69 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -12,17 +12,20 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/HeaderSearch.h" -#include "clang/Lex/HeaderSearchOptions.h" -#include "clang/Lex/HeaderMap.h" -#include "clang/Lex/Lexer.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/IdentifierTable.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" +#include "clang/Lex/HeaderMap.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Capacity.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include <cstdio> +#if defined(LLVM_ON_UNIX) +#include <limits.h> +#endif using namespace clang; const IdentifierInfo * @@ -39,12 +42,12 @@ HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) { ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {} -HeaderSearch::HeaderSearch(llvm::IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts, +HeaderSearch::HeaderSearch(IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts, FileManager &FM, DiagnosticsEngine &Diags, const LangOptions &LangOpts, const TargetInfo *Target) : HSOpts(HSOpts), FileMgr(FM), FrameworkMap(64), - ModMap(FileMgr, *Diags.getClient(), LangOpts, Target) + ModMap(FileMgr, *Diags.getClient(), LangOpts, Target, *this) { AngledDirIdx = 0; SystemDirIdx = 0; @@ -134,7 +137,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) { if (Module || !AllowSearch) return Module; - // Look through the various header search paths to load any avai;able module + // Look through the various header search paths to load any available module // maps, searching for a module map that describes this module. for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { if (SearchDirs[Idx].isFramework()) { @@ -178,8 +181,22 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) { if (Module) break; } + + // If we've already performed the exhaustive search for module maps in this + // search directory, don't do it again. + if (SearchDirs[Idx].haveSearchedAllModuleMaps()) + continue; + + // Load all module maps in the immediate subdirectories of this search + // directory. + loadSubdirectoryModuleMaps(SearchDirs[Idx]); + + // Look again for the module. + Module = ModMap.findModule(ModuleName); + if (Module) + break; } - + return Module; } @@ -263,6 +280,55 @@ const FileEntry *DirectoryLookup::LookupFile( return Result; } +/// \brief Given a framework directory, find the top-most framework directory. +/// +/// \param FileMgr The file manager to use for directory lookups. +/// \param DirName The name of the framework directory. +/// \param SubmodulePath Will be populated with the submodule path from the +/// returned top-level module to the originally named framework. +static const DirectoryEntry * +getTopFrameworkDir(FileManager &FileMgr, StringRef DirName, + SmallVectorImpl<std::string> &SubmodulePath) { + assert(llvm::sys::path::extension(DirName) == ".framework" && + "Not a framework directory"); + + // Note: as an egregious but useful hack we use the real path here, because + // frameworks moving between top-level frameworks to embedded frameworks tend + // to be symlinked, and we base the logical structure of modules on the + // physical layout. In particular, we need to deal with crazy includes like + // + // #include <Foo/Frameworks/Bar.framework/Headers/Wibble.h> + // + // where 'Bar' used to be embedded in 'Foo', is now a top-level framework + // which one should access with, e.g., + // + // #include <Bar/Wibble.h> + // + // Similar issues occur when a top-level framework has moved into an + // embedded framework. + const DirectoryEntry *TopFrameworkDir = FileMgr.getDirectory(DirName); + DirName = FileMgr.getCanonicalName(TopFrameworkDir); + do { + // Get the parent directory name. + DirName = llvm::sys::path::parent_path(DirName); + if (DirName.empty()) + break; + + // Determine whether this directory exists. + const DirectoryEntry *Dir = FileMgr.getDirectory(DirName); + if (!Dir) + break; + + // If this is a framework directory, then we're a subframework of this + // framework. + if (llvm::sys::path::extension(DirName) == ".framework") { + SubmodulePath.push_back(llvm::sys::path::stem(DirName)); + TopFrameworkDir = Dir; + } + } while (true); + + return TopFrameworkDir; +} /// DoFrameworkLookup - Do a lookup of the specified file in the current /// DirectoryLookup, which is a framework directory. @@ -334,17 +400,6 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( RelativePath->clear(); RelativePath->append(Filename.begin()+SlashPos+1, Filename.end()); } - - // If we're allowed to look for modules, try to load or create the module - // corresponding to this framework. - Module *Module = 0; - if (SuggestedModule) { - if (const DirectoryEntry *FrameworkDir - = FileMgr.getDirectory(FrameworkName)) { - bool IsSystem = getDirCharacteristic() != SrcMgr::C_User; - Module = HS.loadFrameworkModule(ModuleName, FrameworkDir, IsSystem); - } - } // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h" unsigned OrigSize = FrameworkName.size(); @@ -357,28 +412,64 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1); } - // Determine whether this is the module we're building or not. - bool AutomaticImport = Module; FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end()); - if (const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), - /*openFile=*/!AutomaticImport)) { - if (AutomaticImport) - *SuggestedModule = HS.findModuleForHeader(FE); - return FE; + const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), + /*openFile=*/!SuggestedModule); + if (!FE) { + // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" + const char *Private = "Private"; + FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, + Private+strlen(Private)); + if (SearchPath != NULL) + SearchPath->insert(SearchPath->begin()+OrigSize, Private, + Private+strlen(Private)); + + FE = FileMgr.getFile(FrameworkName.str(), /*openFile=*/!SuggestedModule); } - // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" - const char *Private = "Private"; - FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, - Private+strlen(Private)); - if (SearchPath != NULL) - SearchPath->insert(SearchPath->begin()+OrigSize, Private, - Private+strlen(Private)); - - const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), - /*openFile=*/!AutomaticImport); - if (FE && AutomaticImport) - *SuggestedModule = HS.findModuleForHeader(FE); + // If we found the header and are allowed to suggest a module, do so now. + if (FE && SuggestedModule) { + // Find the framework in which this header occurs. + StringRef FrameworkPath = FE->getName(); + bool FoundFramework = false; + do { + // Get the parent directory name. + FrameworkPath = llvm::sys::path::parent_path(FrameworkPath); + if (FrameworkPath.empty()) + break; + + // Determine whether this directory exists. + const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkPath); + if (!Dir) + break; + + // If this is a framework directory, then we're a subframework of this + // framework. + if (llvm::sys::path::extension(FrameworkPath) == ".framework") { + FoundFramework = true; + break; + } + } while (true); + + if (FoundFramework) { + // Find the top-level framework based on this framework. + SmallVector<std::string, 4> SubmodulePath; + const DirectoryEntry *TopFrameworkDir + = ::getTopFrameworkDir(FileMgr, FrameworkPath, SubmodulePath); + + // Determine the name of the top-level framework. + StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName()); + + // Load this framework module. If that succeeds, find the suggested module + // for this header, if any. + bool IsSystem = getDirCharacteristic() != SrcMgr::C_User; + if (HS.loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) { + *SuggestedModule = HS.findModuleForHeader(FE); + } + } else { + *SuggestedModule = HS.findModuleForHeader(FE); + } + } return FE; } @@ -584,7 +675,8 @@ const FileEntry *HeaderSearch:: LookupSubframeworkHeader(StringRef Filename, const FileEntry *ContextFileEnt, SmallVectorImpl<char> *SearchPath, - SmallVectorImpl<char> *RelativePath) { + SmallVectorImpl<char> *RelativePath, + Module **SuggestedModule) { assert(ContextFileEnt && "No context file?"); // Framework names must have a '/' in the filename. Find it. @@ -673,6 +765,26 @@ LookupSubframeworkHeader(StringRef Filename, // of evaluation. unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo; getFileInfo(FE).DirInfo = DirInfo; + + // If we're supposed to suggest a module, look for one now. + if (SuggestedModule) { + // Find the top-level framework based on this framework. + FrameworkName.pop_back(); // remove the trailing '/' + SmallVector<std::string, 4> SubmodulePath; + const DirectoryEntry *TopFrameworkDir + = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath); + + // Determine the name of the top-level framework. + StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName()); + + // Load this framework module. If that succeeds, find the suggested module + // for this header, if any. + bool IsSystem = false; + if (loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) { + *SuggestedModule = findModuleForHeader(FE); + } + } + return FE; } @@ -708,6 +820,7 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI, const HeaderFileInfo &OtherHFI) { HFI.isImport |= OtherHFI.isImport; HFI.isPragmaOnce |= OtherHFI.isPragmaOnce; + HFI.isModuleHeader |= OtherHFI.isModuleHeader; HFI.NumIncludes += OtherHFI.NumIncludes; if (!HFI.ControllingMacro && !HFI.ControllingMacroID) { @@ -749,7 +862,16 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) { if (ExternalSource && !HFI.Resolved) mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(File)); - return HFI.isPragmaOnce || HFI.ControllingMacro || HFI.ControllingMacroID; + return HFI.isPragmaOnce || HFI.isImport || + HFI.ControllingMacro || HFI.ControllingMacroID; +} + +void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE) { + if (FE->getUID() >= FileInfo.size()) + FileInfo.resize(FE->getUID()+1); + + HeaderFileInfo &HFI = FileInfo[FE->getUID()]; + HFI.isModuleHeader = true; } void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) { @@ -809,7 +931,7 @@ StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) { bool HeaderSearch::hasModuleMap(StringRef FileName, const DirectoryEntry *Root) { - llvm::SmallVector<const DirectoryEntry *, 2> FixUpDirectories; + SmallVector<const DirectoryEntry *, 2> FixUpDirectories; StringRef DirName = FileName; do { @@ -849,7 +971,12 @@ bool HeaderSearch::hasModuleMap(StringRef FileName, } while (true); } -Module *HeaderSearch::findModuleForHeader(const FileEntry *File) { +Module *HeaderSearch::findModuleForHeader(const FileEntry *File) const { + if (ExternalSource) { + // Make sure the external source has handled header info about this file, + // which includes whether the file is part of a module. + (void)getFileInfo(File); + } if (Module *Mod = ModMap.findModuleForHeader(File)) return Mod; @@ -897,80 +1024,21 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name, return ModMap.findModule(Name); } - // The top-level framework directory, from which we'll infer a framework - // module. - const DirectoryEntry *TopFrameworkDir = Dir; - - // The path from the module we're actually looking for back to the top-level - // framework name. - llvm::SmallVector<StringRef, 2> SubmodulePath; + // Figure out the top-level framework directory and the submodule path from + // that top-level framework to the requested framework. + SmallVector<std::string, 2> SubmodulePath; SubmodulePath.push_back(Name); - - // Walk the directory structure to find any enclosing frameworks. -#ifdef LLVM_ON_UNIX - // Note: as an egregious but useful hack we use the real path here, because - // frameworks moving from top-level frameworks to embedded frameworks tend - // to be symlinked from the top-level location to the embedded location, - // and we need to resolve lookups as if we had found the embedded location. - char RealDirName[PATH_MAX]; - StringRef DirName; - if (realpath(Dir->getName(), RealDirName)) - DirName = RealDirName; - else - DirName = Dir->getName(); -#else - StringRef DirName = Dir->getName(); -#endif - do { - // Get the parent directory name. - DirName = llvm::sys::path::parent_path(DirName); - if (DirName.empty()) - break; - - // Determine whether this directory exists. - Dir = FileMgr.getDirectory(DirName); - if (!Dir) - break; - - // If this is a framework directory, then we're a subframework of this - // framework. - if (llvm::sys::path::extension(DirName) == ".framework") { - SubmodulePath.push_back(llvm::sys::path::stem(DirName)); - TopFrameworkDir = Dir; - } - } while (true); + const DirectoryEntry *TopFrameworkDir + = ::getTopFrameworkDir(FileMgr, Dir->getName(), SubmodulePath); - // Determine whether we're allowed to infer a module map. - bool canInfer = false; - if (llvm::sys::path::has_parent_path(TopFrameworkDir->getName())) { - // Figure out the parent path. - StringRef Parent = llvm::sys::path::parent_path(TopFrameworkDir->getName()); - if (const DirectoryEntry *ParentDir = FileMgr.getDirectory(Parent)) { - // If there's a module map file in the parent directory, it can - // explicitly allow us to infer framework modules. - switch (loadModuleMapFile(ParentDir)) { - case LMM_AlreadyLoaded: - case LMM_NewlyLoaded: { - StringRef Name = llvm::sys::path::stem(TopFrameworkDir->getName()); - canInfer = ModMap.canInferFrameworkModule(ParentDir, Name, IsSystem); - break; - } - case LMM_InvalidModuleMap: - case LMM_NoDirectory: - break; - } - } - } - - // If we're not allowed to infer a module map, we're done. - if (!canInfer) - return 0; // Try to infer a module map from the top-level framework directory. Module *Result = ModMap.inferFrameworkModule(SubmodulePath.back(), TopFrameworkDir, IsSystem, /*Parent=*/0); + if (!Result) + return 0; // Follow the submodule path to find the requested (sub)framework module // within the top-level framework module. @@ -1034,7 +1102,7 @@ HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir) { return LMM_InvalidModuleMap; } -void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) { +void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { Modules.clear(); // Load module maps for each of the header search directories. @@ -1072,13 +1140,7 @@ void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) { // Try to load module map files for immediate subdirectories of this search // directory. - llvm::error_code EC; - SmallString<128> DirNative; - llvm::sys::path::native(SearchDirs[Idx].getDir()->getName(), DirNative); - for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd; - Dir != DirEnd && !EC; Dir.increment(EC)) { - loadModuleMapFile(Dir->path()); - } + loadSubdirectoryModuleMaps(SearchDirs[Idx]); } // Populate the list of modules. @@ -1088,3 +1150,18 @@ void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) { Modules.push_back(M->getValue()); } } + +void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) { + if (SearchDir.haveSearchedAllModuleMaps()) + return; + + llvm::error_code EC; + SmallString<128> DirNative; + llvm::sys::path::native(SearchDir.getDir()->getName(), DirNative); + for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + loadModuleMapFile(Dir->path()); + } + + SearchDir.setSearchedAllModuleMaps(true); +} diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index a5ba7db..ed4666a 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -25,19 +25,21 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/Lexer.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Lex/LexDiagnostic.h" -#include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceManager.h" -#include "llvm/ADT/StringSwitch.h" +#include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MemoryBuffer.h" +#include "UnicodeCharSets.h" #include <cstring> using namespace clang; -static void InitCharacterInfo(); - //===----------------------------------------------------------------------===// // Token Class Implementation //===----------------------------------------------------------------------===// @@ -64,8 +66,6 @@ void Lexer::anchor() { } void Lexer::InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd) { - InitCharacterInfo(); - BufferStart = BufStart; BufferPtr = BufPtr; BufferEnd = BufEnd; @@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), InputFile->getBufferEnd()); - // Default to keeping comments if the preprocessor wants them. - SetCommentRetentionState(PP.getCommentRetentionState()); + resetExtendedTokenMode(); +} + +void Lexer::resetExtendedTokenMode() { + assert(PP && "Cannot reset token mode without a preprocessor"); + if (LangOpts.TraditionalCPP) + SetKeepWhitespaceMode(true); + else + SetCommentRetentionState(PP->getCommentRetentionState()); } /// Lexer constructor - Create a new raw lexer object. This object is only @@ -233,16 +240,67 @@ void Lexer::Stringify(SmallVectorImpl<char> &Str) { // Token Spelling //===----------------------------------------------------------------------===// +/// \brief Slow case of getSpelling. Extract the characters comprising the +/// spelling of this token from the provided input buffer. +static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, + const LangOptions &LangOpts, char *Spelling) { + assert(Tok.needsCleaning() && "getSpellingSlow called on simple token"); + + size_t Length = 0; + const char *BufEnd = BufPtr + Tok.getLength(); + + if (Tok.is(tok::string_literal)) { + // Munch the encoding-prefix and opening double-quote. + while (BufPtr < BufEnd) { + unsigned Size; + Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); + BufPtr += Size; + + if (Spelling[Length - 1] == '"') + break; + } + + // Raw string literals need special handling; trigraph expansion and line + // splicing do not occur within their d-char-sequence nor within their + // r-char-sequence. + if (Length >= 2 && + Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') { + // Search backwards from the end of the token to find the matching closing + // quote. + const char *RawEnd = BufEnd; + do --RawEnd; while (*RawEnd != '"'); + size_t RawLength = RawEnd - BufPtr + 1; + + // Everything between the quotes is included verbatim in the spelling. + memcpy(Spelling + Length, BufPtr, RawLength); + Length += RawLength; + BufPtr += RawLength; + + // The rest of the token is lexed normally. + } + } + + while (BufPtr < BufEnd) { + unsigned Size; + Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); + BufPtr += Size; + } + + assert(Length < Tok.getLength() && + "NeedsCleaning flag set on token that didn't need cleaning!"); + return Length; +} + /// getSpelling() - Return the 'spelling' of this token. The spelling of a /// token are the characters used to represent the token in the source file /// after trigraph expansion and escaped-newline folding. In particular, this /// wants to get the true, uncanonicalized, spelling of things like digraphs /// UCNs, etc. StringRef Lexer::getSpelling(SourceLocation loc, - SmallVectorImpl<char> &buffer, - const SourceManager &SM, - const LangOptions &options, - bool *invalid) { + SmallVectorImpl<char> &buffer, + const SourceManager &SM, + const LangOptions &options, + bool *invalid) { // Break down the source location. std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc); @@ -267,17 +325,10 @@ StringRef Lexer::getSpelling(SourceLocation loc, // Common case: no need for cleaning. if (!token.needsCleaning()) return StringRef(tokenBegin, length); - - // Hard case, we need to relex the characters into the string. - buffer.clear(); - buffer.reserve(length); - - for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) { - unsigned charSize; - buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options)); - ti += charSize; - } + // Hard case, we need to relex the characters into the string. + buffer.resize(length); + buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data())); return StringRef(buffer.data(), buffer.size()); } @@ -289,31 +340,22 @@ StringRef Lexer::getSpelling(SourceLocation loc, std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid) { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - - // If this token contains nothing interesting, return it directly. + bool CharDataInvalid = false; - const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), + const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); if (Invalid) *Invalid = CharDataInvalid; if (CharDataInvalid) return std::string(); - + + // If this token contains nothing interesting, return it directly. if (!Tok.needsCleaning()) - return std::string(TokStart, TokStart+Tok.getLength()); - + return std::string(TokStart, TokStart + Tok.getLength()); + std::string Result; - Result.reserve(Tok.getLength()); - - // Otherwise, hard case, relex the characters into the string. - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); - Ptr != End; ) { - unsigned CharSize; - Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts)); - Ptr += CharSize; - } - assert(Result.size() != unsigned(Tok.getLength()) && - "NeedsCleaning flag set on something that didn't need cleaning!"); + Result.resize(Tok.getLength()); + Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin())); return Result; } @@ -336,10 +378,12 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, // NOTE: this has to be checked *before* testing for an IdentifierInfo. if (Tok.is(tok::raw_identifier)) TokStart = Tok.getRawIdentifierData(); - else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { - // Just return the string from the identifier table, which is very quick. - Buffer = II->getNameStart(); - return II->getLength(); + else if (!Tok.hasUCN()) { + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + // Just return the string from the identifier table, which is very quick. + Buffer = II->getNameStart(); + return II->getLength(); + } } // NOTE: this can be checked even after testing for an IdentifierInfo. @@ -365,23 +409,10 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, } // Otherwise, hard case, relex the characters into the string. - char *OutBuf = const_cast<char*>(Buffer); - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); - Ptr != End; ) { - unsigned CharSize; - *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts); - Ptr += CharSize; - } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && - "NeedsCleaning flag set on something that didn't need cleaning!"); - - return OutBuf-Buffer; + return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer)); } - -static bool isWhitespace(unsigned char c); - /// MeasureTokenLength - Relex the token at the specified location and return /// its length in bytes in the input file. If the token needs cleaning (e.g. /// includes a trigraph or an escaped newline) then this count includes bytes @@ -389,6 +420,17 @@ static bool isWhitespace(unsigned char c); unsigned Lexer::MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { + Token TheTok; + if (getRawToken(Loc, TheTok, SM, LangOpts)) + return 0; + return TheTok.getLength(); +} + +/// \brief Relex the token at the specified location. +/// \returns true if there was a failure, false on success. +bool Lexer::getRawToken(SourceLocation Loc, Token &Result, + const SourceManager &SM, + const LangOptions &LangOpts) { // TODO: this could be special cased for common tokens like identifiers, ')', // etc to make this faster, if it mattered. Just look at StrData[0] to handle // all obviously single-char tokens. This could use @@ -402,20 +444,19 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) - return 0; + return true; const char *StrData = Buffer.data()+LocInfo.second; if (isWhitespace(StrData[0])) - return 0; + return true; // Create a lexer starting at the beginning of this token. Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, Buffer.begin(), StrData, Buffer.end()); TheLexer.SetCommentRetentionState(true); - Token TheTok; - TheLexer.LexFromRawLexer(TheTok); - return TheTok.getLength(); + TheLexer.LexFromRawLexer(Result); + return false; } static SourceLocation getBeginningOfFileToken(SourceLocation Loc, @@ -969,163 +1010,8 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc, return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); } -//===----------------------------------------------------------------------===// -// Character information. -//===----------------------------------------------------------------------===// - -enum { - CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0' - CHAR_VERT_WS = 0x02, // '\r', '\n' - CHAR_LETTER = 0x04, // a-z,A-Z - CHAR_NUMBER = 0x08, // 0-9 - CHAR_UNDER = 0x10, // _ - CHAR_PERIOD = 0x20, // . - CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"' -}; - -// Statically initialize CharInfo table based on ASCII character set -// Reference: FreeBSD 7.2 /usr/share/misc/ascii -static const unsigned char CharInfo[256] = -{ -// 0 NUL 1 SOH 2 STX 3 ETX -// 4 EOT 5 ENQ 6 ACK 7 BEL - 0 , 0 , 0 , 0 , - 0 , 0 , 0 , 0 , -// 8 BS 9 HT 10 NL 11 VT -//12 NP 13 CR 14 SO 15 SI - 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS, - CHAR_HORZ_WS, CHAR_VERT_WS, 0 , 0 , -//16 DLE 17 DC1 18 DC2 19 DC3 -//20 DC4 21 NAK 22 SYN 23 ETB - 0 , 0 , 0 , 0 , - 0 , 0 , 0 , 0 , -//24 CAN 25 EM 26 SUB 27 ESC -//28 FS 29 GS 30 RS 31 US - 0 , 0 , 0 , 0 , - 0 , 0 , 0 , 0 , -//32 SP 33 ! 34 " 35 # -//36 $ 37 % 38 & 39 ' - CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , - 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , -//40 ( 41 ) 42 * 43 + -//44 , 45 - 46 . 47 / - 0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL , -//48 0 49 1 50 2 51 3 -//52 4 53 5 54 6 55 7 - CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , - CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , -//56 8 57 9 58 : 59 ; -//60 < 61 = 62 > 63 ? - CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , -//64 @ 65 A 66 B 67 C -//68 D 69 E 70 F 71 G - 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , -//72 H 73 I 74 J 75 K -//76 L 77 M 78 N 79 O - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , -//80 P 81 Q 82 R 83 S -//84 T 85 U 86 V 87 W - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , -//88 X 89 Y 90 Z 91 [ -//92 \ 93 ] 94 ^ 95 _ - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL , - 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER , -//96 ` 97 a 98 b 99 c -//100 d 101 e 102 f 103 g - 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , -//104 h 105 i 106 j 107 k -//108 l 109 m 110 n 111 o - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , -//112 p 113 q 114 r 115 s -//116 t 117 u 118 v 119 w - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , -//120 x 121 y 122 z 123 { -//124 | 125 } 126 ~ 127 DEL - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0 -}; - -static void InitCharacterInfo() { - static bool isInited = false; - if (isInited) return; - // check the statically-initialized CharInfo table - assert(CHAR_HORZ_WS == CharInfo[(int)' ']); - assert(CHAR_HORZ_WS == CharInfo[(int)'\t']); - assert(CHAR_HORZ_WS == CharInfo[(int)'\f']); - assert(CHAR_HORZ_WS == CharInfo[(int)'\v']); - assert(CHAR_VERT_WS == CharInfo[(int)'\n']); - assert(CHAR_VERT_WS == CharInfo[(int)'\r']); - assert(CHAR_UNDER == CharInfo[(int)'_']); - assert(CHAR_PERIOD == CharInfo[(int)'.']); - for (unsigned i = 'a'; i <= 'z'; ++i) { - assert(CHAR_LETTER == CharInfo[i]); - assert(CHAR_LETTER == CharInfo[i+'A'-'a']); - } - for (unsigned i = '0'; i <= '9'; ++i) - assert(CHAR_NUMBER == CharInfo[i]); - - isInited = true; -} - - -/// isIdentifierHead - Return true if this is the first character of an -/// identifier, which is [a-zA-Z_]. -static inline bool isIdentifierHead(unsigned char c) { - return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false; -} - -/// isIdentifierBody - Return true if this is the body character of an -/// identifier, which is [a-zA-Z0-9_]. -static inline bool isIdentifierBody(unsigned char c) { - return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false; -} - -/// isHorizontalWhitespace - Return true if this character is horizontal -/// whitespace: ' ', '\\t', '\\f', '\\v'. Note that this returns false for -/// '\\0'. -static inline bool isHorizontalWhitespace(unsigned char c) { - return (CharInfo[c] & CHAR_HORZ_WS) ? true : false; -} - -/// isVerticalWhitespace - Return true if this character is vertical -/// whitespace: '\\n', '\\r'. Note that this returns false for '\\0'. -static inline bool isVerticalWhitespace(unsigned char c) { - return (CharInfo[c] & CHAR_VERT_WS) ? true : false; -} - -/// isWhitespace - Return true if this character is horizontal or vertical -/// whitespace: ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. Note that this returns -/// false for '\\0'. -static inline bool isWhitespace(unsigned char c) { - return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false; -} - -/// isNumberBody - Return true if this is the body character of an -/// preprocessing number, which is [a-zA-Z0-9_.]. -static inline bool isNumberBody(unsigned char c) { - return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ? - true : false; -} - -/// isRawStringDelimBody - Return true if this is the body character of a -/// raw string delimiter. -static inline bool isRawStringDelimBody(unsigned char c) { - return (CharInfo[c] & - (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ? - true : false; -} - -// Allow external clients to make use of CharInfo. bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { - return isIdentifierBody(c) || (c == '$' && LangOpts.DollarIdents); + return isIdentifierBody(c, LangOpts.DollarIdents); } @@ -1293,7 +1179,7 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, // Try to load the file buffer. bool InvalidTemp = false; - llvm::StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); + StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); if (InvalidTemp) return SourceLocation(); @@ -1319,8 +1205,15 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, C = *(++TokenEnd); NumWhitespaceChars++; } - if (isVerticalWhitespace(C)) + + // Skip \r, \n, \r\n, or \n\r + if (C == '\n' || C == '\r') { + char PrevC = C; + C = *(++TokenEnd); NumWhitespaceChars++; + if ((C == '\n' || C == '\r') && C != PrevC) + NumWhitespaceChars++; + } } return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars); @@ -1334,7 +1227,6 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, /// 2. If this is an escaped newline (potentially with whitespace between /// the backslash and newline), implicitly skip the newline and return /// the char after it. -/// 3. If this is a UCN, return it. FIXME: C++ UCN's? /// /// This handles the slow/uncommon case of the getCharAndSize method. Here we /// know that we can accumulate into Size, and that we have already incremented @@ -1467,6 +1359,62 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { IsAtStartOfLine = StartOfLine; } +static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { + if (LangOpts.CPlusPlus11 || LangOpts.C11) + return isCharInSet(C, C11AllowedIDChars); + else if (LangOpts.CPlusPlus) + return isCharInSet(C, CXX03AllowedIDChars); + else + return isCharInSet(C, C99AllowedIDChars); +} + +static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { + assert(isAllowedIDChar(C, LangOpts)); + if (LangOpts.CPlusPlus11 || LangOpts.C11) + return !isCharInSet(C, C11DisallowedInitialIDChars); + else if (LangOpts.CPlusPlus) + return true; + else + return !isCharInSet(C, C99DisallowedInitialIDChars); +} + +static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin, + const char *End) { + return CharSourceRange::getCharRange(L.getSourceLocation(Begin), + L.getSourceLocation(End)); +} + +static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, + CharSourceRange Range, bool IsFirst) { + // Check C99 compatibility. + if (Diags.getDiagnosticLevel(diag::warn_c99_compat_unicode_id, + Range.getBegin()) > DiagnosticsEngine::Ignored) { + enum { + CannotAppearInIdentifier = 0, + CannotStartIdentifier + }; + + if (!isCharInSet(C, C99AllowedIDChars)) { + Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) + << Range + << CannotAppearInIdentifier; + } else if (IsFirst && isCharInSet(C, C99DisallowedInitialIDChars)) { + Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) + << Range + << CannotStartIdentifier; + } + } + + // Check C++98 compatibility. + if (Diags.getDiagnosticLevel(diag::warn_cxx98_compat_unicode_id, + Range.getBegin()) > DiagnosticsEngine::Ignored) { + if (!isCharInSet(C, CXX03AllowedIDChars)) { + Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id) + << Range; + } + } + } + void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] unsigned Size; @@ -1478,11 +1426,11 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { // Fast path, no $,\,? in identifier found. '\' might be an escaped newline // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN. - // FIXME: UCNs. // - // TODO: Could merge these checks into a CharInfo flag to make the comparison - // cheaper - if (C != '\\' && C != '?' && (C != '$' || !LangOpts.DollarIdents)) { + // TODO: Could merge these checks into an InfoTable flag to make the + // comparison cheaper + if (isASCII(C) && C != '\\' && C != '?' && + (C != '$' || !LangOpts.DollarIdents)) { FinishIdentifier: const char *IdStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::raw_identifier); @@ -1519,8 +1467,51 @@ FinishIdentifier: CurPtr = ConsumeChar(CurPtr, Size, Result); C = getCharAndSize(CurPtr, Size); continue; - } else if (!isIdentifierBody(C)) { // FIXME: UCNs. - // Found end of identifier. + + } else if (C == '\\') { + const char *UCNPtr = CurPtr + Size; + uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0); + if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts)) + goto FinishIdentifier; + + if (!isLexingRawMode()) { + maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurPtr, UCNPtr), + /*IsFirst=*/false); + } + + Result.setFlag(Token::HasUCN); + if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || + (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) + CurPtr = UCNPtr; + else + while (CurPtr != UCNPtr) + (void)getAndAdvanceChar(CurPtr, Result); + + C = getCharAndSize(CurPtr, Size); + continue; + } else if (!isASCII(C)) { + const char *UnicodePtr = CurPtr; + UTF32 CodePoint; + ConversionResult Result = + llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, + (const UTF8 *)BufferEnd, + &CodePoint, + strictConversion); + if (Result != conversionOK || + !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) + goto FinishIdentifier; + + if (!isLexingRawMode()) { + maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurPtr, UnicodePtr), + /*IsFirst=*/false); + } + + CurPtr = UnicodePtr; + C = getCharAndSize(CurPtr, Size); + continue; + } else if (!isIdentifierBody(C)) { goto FinishIdentifier; } @@ -1528,7 +1519,7 @@ FinishIdentifier: CurPtr = ConsumeChar(CurPtr, Size, Result); C = getCharAndSize(CurPtr, Size); - while (isIdentifierBody(C)) { // FIXME: UCNs. + while (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); C = getCharAndSize(CurPtr, Size); } @@ -1553,7 +1544,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); char PrevCh = 0; - while (isNumberBody(C)) { // FIXME: UCNs. + while (isPreprocessingNumberBody(C)) { // FIXME: UCNs in ud-suffix. CurPtr = ConsumeChar(CurPtr, Size, Result); PrevCh = C; C = getCharAndSize(CurPtr, Size); @@ -1598,7 +1589,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); if (isIdentifierHead(C)) { - if (!getLangOpts().CPlusPlus0x) { + if (!getLangOpts().CPlusPlus11) { if (!isLexingRawMode()) Diag(CurPtr, C == '_' ? diag::warn_cxx11_compat_user_defined_literal @@ -1639,7 +1630,9 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, (Kind == tok::utf8_string_literal || Kind == tok::utf16_string_literal || Kind == tok::utf32_string_literal)) - Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal); + Diag(BufferPtr, getLangOpts().CPlusPlus + ? diag::warn_cxx98_compat_unicode_literal + : diag::warn_c99_compat_unicode_literal); char C = getAndAdvanceChar(CurPtr, Result); while (C != '"') { @@ -1804,7 +1797,9 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, if (!isLexingRawMode() && (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)) - Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal); + Diag(BufferPtr, getLangOpts().CPlusPlus + ? diag::warn_cxx98_compat_unicode_literal + : diag::warn_c99_compat_unicode_literal); char C = getAndAdvanceChar(CurPtr, Result); if (C == '\'') { @@ -1860,6 +1855,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, /// bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { // Whitespace - Skip it, then return the token after the whitespace. + bool SawNewline = isVerticalWhitespace(CurPtr[-1]); + unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently. while (1) { // Skip horizontal whitespace very aggressively. @@ -1867,7 +1864,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { Char = *++CurPtr; // Otherwise if we have something other than whitespace, we're done. - if (Char != '\n' && Char != '\r') + if (!isVerticalWhitespace(Char)) break; if (ParsingPreprocessorDirective) { @@ -1877,24 +1874,27 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { } // ok, but handle newline. - // The returned token is at the start of the line. - Result.setFlag(Token::StartOfLine); - // No leading whitespace seen so far. - Result.clearFlag(Token::LeadingSpace); + SawNewline = true; Char = *++CurPtr; } - // If this isn't immediately after a newline, there is leading space. - char PrevChar = CurPtr[-1]; - if (PrevChar != '\n' && PrevChar != '\r') - Result.setFlag(Token::LeadingSpace); - // If the client wants us to return whitespace, return it now. if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); + if (SawNewline) + IsAtStartOfLine = true; + // FIXME: The next token will not have LeadingSpace set. return true; } + // If this isn't immediately after a newline, there is leading space. + char PrevChar = CurPtr[-1]; + bool HasLeadingSpace = !isVerticalWhitespace(PrevChar); + + Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); + if (SawNewline) + Result.setFlag(Token::StartOfLine); + BufferPtr = CurPtr; return false; } @@ -2285,7 +2285,6 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(*CurPtr)) { - Result.setFlag(Token::LeadingSpace); SkipWhitespace(Result, CurPtr+1); return false; } @@ -2367,7 +2366,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { FormTokenWithChars(Result, CurPtr, tok::eod); // Restore comment saving mode, in case it was disabled for directive. - SetCommentRetentionState(PP->getCommentRetentionState()); + resetExtendedTokenMode(); return true; // Have a token. } @@ -2393,7 +2392,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue // a pedwarn. if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) - Diag(BufferEnd, LangOpts.CPlusPlus0x ? // C++11 [lex.phases] 2.2 p2 + Diag(BufferEnd, LangOpts.CPlusPlus11 ? // C++11 [lex.phases] 2.2 p2 diag::warn_cxx98_compat_no_newline_eof : diag::ext_no_newline_eof) << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n"); @@ -2550,6 +2549,164 @@ bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { return false; } +uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, + Token *Result) { + unsigned CharSize; + char Kind = getCharAndSize(StartPtr, CharSize); + + unsigned NumHexDigits; + if (Kind == 'u') + NumHexDigits = 4; + else if (Kind == 'U') + NumHexDigits = 8; + else + return 0; + + if (!LangOpts.CPlusPlus && !LangOpts.C99) { + if (Result && !isLexingRawMode()) + Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89); + return 0; + } + + const char *CurPtr = StartPtr + CharSize; + const char *KindLoc = &CurPtr[-1]; + + uint32_t CodePoint = 0; + for (unsigned i = 0; i < NumHexDigits; ++i) { + char C = getCharAndSize(CurPtr, CharSize); + + unsigned Value = llvm::hexDigitValue(C); + if (Value == -1U) { + if (Result && !isLexingRawMode()) { + if (i == 0) { + Diag(BufferPtr, diag::warn_ucn_escape_no_digits) + << StringRef(KindLoc, 1); + } else { + Diag(BufferPtr, diag::warn_ucn_escape_incomplete); + + // If the user wrote \U1234, suggest a fixit to \u. + if (i == 4 && NumHexDigits == 8) { + CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1); + Diag(KindLoc, diag::note_ucn_four_not_eight) + << FixItHint::CreateReplacement(URange, "u"); + } + } + } + + return 0; + } + + CodePoint <<= 4; + CodePoint += Value; + + CurPtr += CharSize; + } + + if (Result) { + Result->setFlag(Token::HasUCN); + if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2) + StartPtr = CurPtr; + else + while (StartPtr != CurPtr) + (void)getAndAdvanceChar(StartPtr, *Result); + } else { + StartPtr = CurPtr; + } + + // C99 6.4.3p2: A universal character name shall not specify a character whose + // short identifier is less than 00A0 other than 0024 ($), 0040 (@), or + // 0060 (`), nor one in the range D800 through DFFF inclusive.) + // C++11 [lex.charset]p2: If the hexadecimal value for a + // universal-character-name corresponds to a surrogate code point (in the + // range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally, + // if the hexadecimal value for a universal-character-name outside the + // c-char-sequence, s-char-sequence, or r-char-sequence of a character or + // string literal corresponds to a control character (in either of the + // ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the + // basic source character set, the program is ill-formed. + if (CodePoint < 0xA0) { + if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60) + return CodePoint; + + // We don't use isLexingRawMode() here because we need to warn about bad + // UCNs even when skipping preprocessing tokens in a #if block. + if (Result && PP) { + if (CodePoint < 0x20 || CodePoint >= 0x7F) + Diag(BufferPtr, diag::err_ucn_control_character); + else { + char C = static_cast<char>(CodePoint); + Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1); + } + } + + return 0; + + } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) { + // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't. + // We don't use isLexingRawMode() here because we need to diagnose bad + // UCNs even when skipping preprocessing tokens in a #if block. + if (Result && PP) { + if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11) + Diag(BufferPtr, diag::warn_ucn_escape_surrogate); + else + Diag(BufferPtr, diag::err_ucn_escape_invalid); + } + return 0; + } + + return CodePoint; +} + +void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { + if (!isLexingRawMode() && !PP->isPreprocessedOutput() && + isCharInSet(C, UnicodeWhitespaceChars)) { + Diag(BufferPtr, diag::ext_unicode_whitespace) + << makeCharRange(*this, BufferPtr, CurPtr); + + Result.setFlag(Token::LeadingSpace); + if (SkipWhitespace(Result, CurPtr)) + return; // KeepWhitespaceMode + + return LexTokenInternal(Result); + } + + if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) { + if (!isLexingRawMode() && !ParsingPreprocessorDirective && + !PP->isPreprocessedOutput()) { + maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, + makeCharRange(*this, BufferPtr, CurPtr), + /*IsFirst=*/true); + } + + MIOpt.ReadToken(); + return LexIdentifier(Result, CurPtr); + } + + if (!isLexingRawMode() && !ParsingPreprocessorDirective && + !PP->isPreprocessedOutput() && + !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) { + // Non-ASCII characters tend to creep into source code unintentionally. + // Instead of letting the parser complain about the unknown token, + // just drop the character. + // Note that we can /only/ do this when the non-ASCII character is actually + // spelled as Unicode, not written as a UCN. The standard requires that + // we not throw away any possible preprocessor tokens, but there's a + // loophole in the mapping of Unicode characters to basic character set + // characters that allows us to map these particular characters to, say, + // whitespace. + Diag(BufferPtr, diag::err_non_ascii) + << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr)); + + BufferPtr = CurPtr; + return LexTokenInternal(Result); + } + + // Otherwise, we have an explicit UCN or a character that's unlikely to show + // up by accident. + MIOpt.ReadToken(); + FormTokenWithChars(Result, CurPtr, tok::unknown); +} + /// LexTokenInternal - This implements a simple C family lexer. It is an /// extremely performance critical piece of code. This assumes that the buffer @@ -2576,6 +2733,7 @@ LexNextToken: // whitespace. if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); + // FIXME: The next token will not have LeadingSpace set. return; } @@ -2643,7 +2801,7 @@ LexNextToken: // Restore comment saving mode, in case it was disabled for directive. if (PP) - SetCommentRetentionState(PP->getCommentRetentionState()); + resetExtendedTokenMode(); // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; @@ -2651,8 +2809,7 @@ LexNextToken: Kind = tok::eod; break; } - // The returned token is at the start of the line. - Result.setFlag(Token::StartOfLine); + // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); @@ -2695,11 +2852,11 @@ LexNextToken: MIOpt.ReadToken(); return LexNumericConstant(Result, CurPtr); - case 'u': // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal + case 'u': // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (LangOpts.CPlusPlus0x) { + if (LangOpts.CPlusPlus11 || LangOpts.C11) { Char = getCharAndSize(CurPtr, SizeTmp); // UTF-16 string literal @@ -2713,7 +2870,8 @@ LexNextToken: tok::utf16_char_constant); // UTF-16 raw string literal - if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') + if (Char == 'R' && LangOpts.CPlusPlus11 && + getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result), @@ -2729,7 +2887,7 @@ LexNextToken: SizeTmp2, Result), tok::utf8_string_literal); - if (Char2 == 'R') { + if (Char2 == 'R' && LangOpts.CPlusPlus11) { unsigned SizeTmp3; char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); // UTF-8 raw string literal @@ -2747,11 +2905,11 @@ LexNextToken: // treat u like the start of an identifier. return LexIdentifier(Result, CurPtr); - case 'U': // Identifier (Uber) or C++0x UTF-32 string literal + case 'U': // Identifier (Uber) or C11/C++11 UTF-32 string literal // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (LangOpts.CPlusPlus0x) { + if (LangOpts.CPlusPlus11 || LangOpts.C11) { Char = getCharAndSize(CurPtr, SizeTmp); // UTF-32 string literal @@ -2765,7 +2923,8 @@ LexNextToken: tok::utf32_char_constant); // UTF-32 raw string literal - if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') + if (Char == 'R' && LangOpts.CPlusPlus11 && + getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result), @@ -2779,7 +2938,7 @@ LexNextToken: // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (LangOpts.CPlusPlus0x) { + if (LangOpts.CPlusPlus11) { Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '"') @@ -2802,7 +2961,7 @@ LexNextToken: tok::wide_string_literal); // Wide raw string literal. - if (LangOpts.CPlusPlus0x && Char == 'R' && + if (LangOpts.CPlusPlus11 && Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), @@ -2968,10 +3127,13 @@ LexNextToken: // this as "foo / bar" and langauges with Line comments would lex it as // "foo". Check to see if the character after the second slash is a '*'. // If so, we will lex that as a "/" instead of the start of a comment. - // However, we never do this in -traditional-cpp mode. - if ((LangOpts.LineComment || - getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') && - !LangOpts.TraditionalCPP) { + // However, we never do this if we are just preprocessing. + bool TreatAsComment = LangOpts.LineComment && !LangOpts.TraditionalCPP; + if (!TreatAsComment) + if (!(PP && PP->isPreprocessedOutput())) + TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; + + if (TreatAsComment) { if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return; // There is a token to return. @@ -3020,26 +3182,8 @@ LexNextToken: // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // FIXME: -fpreprocessed mode?? - if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) { - FormTokenWithChars(Result, CurPtr, tok::hash); - PP->HandleDirective(Result); - - // As an optimization, if the preprocessor didn't switch lexers, tail - // recurse. - if (PP->isCurrentLexer(this)) { - // Start a new token. If this is a #include or something, the PP may - // want us starting at the beginning of the line again. If so, set - // the StartOfLine flag and clear LeadingSpace. - if (IsAtStartOfLine) { - Result.setFlag(Token::StartOfLine); - Result.clearFlag(Token::LeadingSpace); - IsAtStartOfLine = false; - } - goto LexNextToken; // GCC isn't tail call eliminating. - } - - return PP->Lex(Result); - } + if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) + goto HandleDirective; Kind = tok::hash; } @@ -3077,7 +3221,7 @@ LexNextToken: CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::lessequal; } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' - if (LangOpts.CPlusPlus0x && + if (LangOpts.CPlusPlus11 && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') { // C++0x [lex.pptoken]p3: // Otherwise, if the next three characters are <:: and the subsequent @@ -3204,25 +3348,8 @@ LexNextToken: // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // FIXME: -fpreprocessed mode?? - if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) { - FormTokenWithChars(Result, CurPtr, tok::hash); - PP->HandleDirective(Result); - - // As an optimization, if the preprocessor didn't switch lexers, tail - // recurse. - if (PP->isCurrentLexer(this)) { - // Start a new token. If this is a #include or something, the PP may - // want us starting at the beginning of the line again. If so, set - // the StartOfLine flag and clear LeadingSpace. - if (IsAtStartOfLine) { - Result.setFlag(Token::StartOfLine); - Result.clearFlag(Token::LeadingSpace); - IsAtStartOfLine = false; - } - goto LexNextToken; // GCC isn't tail call eliminating. - } - return PP->Lex(Result); - } + if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) + goto HandleDirective; Kind = tok::hash; } @@ -3236,12 +3363,48 @@ LexNextToken: Kind = tok::unknown; break; + // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': - // FIXME: UCN's. - // FALL THROUGH. - default: + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) + return LexUnicode(Result, CodePoint, CurPtr); + Kind = tok::unknown; break; + + default: { + if (isASCII(Char)) { + Kind = tok::unknown; + break; + } + + UTF32 CodePoint; + + // We can't just reset CurPtr to BufferPtr because BufferPtr may point to + // an escaped newline. + --CurPtr; + ConversionResult Status = + llvm::convertUTF8Sequence((const UTF8 **)&CurPtr, + (const UTF8 *)BufferEnd, + &CodePoint, + strictConversion); + if (Status == conversionOK) + return LexUnicode(Result, CodePoint, CurPtr); + + if (isLexingRawMode() || ParsingPreprocessorDirective || + PP->isPreprocessedOutput()) { + ++CurPtr; + Kind = tok::unknown; + break; + } + + // Non-ASCII characters tend to creep into source code unintentionally. + // Instead of letting the parser complain about the unknown token, + // just diagnose the invalid UTF-8, then drop the character. + Diag(CurPtr, diag::err_invalid_utf8); + + BufferPtr = CurPtr+1; + goto LexNextToken; + } } // Notify MIOpt that we read a non-whitespace/non-comment token. @@ -3249,4 +3412,26 @@ LexNextToken: // Update the location of token as well as BufferPtr. FormTokenWithChars(Result, CurPtr, Kind); + return; + +HandleDirective: + // We parsed a # character and it's the start of a preprocessing directive. + + FormTokenWithChars(Result, CurPtr, tok::hash); + PP->HandleDirective(Result); + + // As an optimization, if the preprocessor didn't switch lexers, tail + // recurse. + if (PP->isCurrentLexer(this)) { + // Start a new token. If this is a #include or something, the PP may + // want us starting at the beginning of the line again. If so, set + // the StartOfLine flag and clear LeadingSpace. + if (IsAtStartOfLine) { + Result.setFlag(Token::StartOfLine); + Result.clearFlag(Token::LeadingSpace); + IsAtStartOfLine = false; + } + goto LexNextToken; // GCC isn't tail call eliminating. + } + return PP->Lex(Result); } diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index e30612e..91da822 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -13,22 +13,15 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/LiteralSupport.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Lex/LexDiagnostic.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Basic/ConvertUTF.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" -using namespace clang; -/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's -/// not valid. -static int HexDigitValue(char C) { - if (C >= '0' && C <= '9') return C-'0'; - if (C >= 'a' && C <= 'f') return C-'a'+10; - if (C >= 'A' && C <= 'F') return C-'A'+10; - return -1; -} +using namespace clang; static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) { switch (kind) { @@ -136,10 +129,10 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, break; case 'x': { // Hex escape. ResultChar = 0; - if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) { + if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { if (Diags) Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, - diag::err_hex_escape_no_digits); + diag::err_hex_escape_no_digits) << "x"; HadError = 1; break; } @@ -147,7 +140,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, // Hex escapes are a maximal series of hex digits. bool Overflow = false; for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { - int CharVal = HexDigitValue(ThisTokBuf[0]); + int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); if (CharVal == -1) break; // About to shift out a digit? Overflow |= (ResultChar & 0xF0000000) ? true : false; @@ -205,7 +198,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, if (Diags == 0) break; - if (isgraph(ResultChar)) + if (isPrintable(ResultChar)) Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, diag::ext_unknown_escape) << std::string(1, ResultChar); @@ -232,16 +225,16 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, // Skip the '\u' char's. ThisTokBuf += 2; - if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) { + if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { if (Diags) Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, - diag::err_ucn_escape_no_digits); + diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1); return false; } UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); unsigned short UcnLenSave = UcnLen; for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) { - int CharVal = HexDigitValue(ThisTokBuf[0]); + int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); if (CharVal == -1) break; UcnVal <<= 4; UcnVal |= CharVal; @@ -267,7 +260,7 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, // characters inside character and string literals if (UcnVal < 0xa0 && (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, ` - bool IsError = (!Features.CPlusPlus0x || !in_char_string_literal); + bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal); if (Diags) { char BasicSCSChar = UcnVal; if (UcnVal >= 0x20 && UcnVal < 0x7f) @@ -286,7 +279,7 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, if (!Features.CPlusPlus && !Features.C99 && Diags) Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, - diag::warn_ucn_not_valid_in_c89); + diag::warn_ucn_not_valid_in_c89_literal); return true; } @@ -467,8 +460,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // and FP constants (specifically, the 'pp-number' regex), and assumes that // the byte at "*end" is both valid and not part of the regex. Because of // this, it doesn't have to check for 'overscan' in various places. - assert(!isalnum(*ThisTokEnd) && *ThisTokEnd != '.' && *ThisTokEnd != '_' && - "Lexer didn't maximally munch?"); + assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?"); s = DigitsBegin = ThisTokBegin; saw_exponent = false; @@ -491,7 +483,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, s = SkipDigits(s); if (s == ThisTokEnd) { // Done. - } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) { + } else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), diag::err_invalid_decimal_digit) << StringRef(s, 1); hadError = true; @@ -616,7 +608,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, } if (s != ThisTokEnd) { - if (PP.getLangOpts().CPlusPlus0x && s == SuffixBegin && *s == '_') { + if (PP.getLangOpts().CPlusPlus11 && s == SuffixBegin && *s == '_') { // We have a ud-suffix! By C++11 [lex.ext]p10, ud-suffixes not starting // with an '_' are ill-formed. saw_ud_suffix = true; @@ -643,7 +635,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { s++; // Handle a hex number like 0x1234. - if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) { + if ((*s == 'x' || *s == 'X') && (isHexDigit(s[1]) || s[1] == '.')) { s++; radix = 16; DigitsBegin = s; @@ -702,7 +694,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { s = SkipBinaryDigits(s); if (s == ThisTokEnd) { // Done. - } else if (isxdigit(*s)) { + } else if (isHexDigit(*s)) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), diag::err_invalid_binary_digit) << StringRef(s, 1); hadError = true; @@ -722,7 +714,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // If we have some other non-octal digit that *is* a decimal digit, see if // this is part of a floating point number like 094.123 or 09e1. - if (isdigit(*s)) { + if (isDigit(*s)) { const char *EndDecimal = SkipDigits(s); if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') { s = EndDecimal; @@ -732,7 +724,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // If we have a hex digit other than 'e' (which denotes a FP exponent) then // the code is using an incorrect base. - if (isxdigit(*s) && *s != 'e' && *s != 'E') { + if (isHexDigit(*s) && *s != 'e' && *s != 'E') { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), diag::err_invalid_octal_digit) << StringRef(s, 1); hadError = true; @@ -792,7 +784,7 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { if (alwaysFitsInto64Bits(radix, NumDigits)) { uint64_t N = 0; for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr) - N = N * radix + HexDigitValue(*Ptr); + N = N * radix + llvm::hexDigitValue(*Ptr); // This will truncate the value to Val's input width. Simply check // for overflow by comparing. @@ -809,7 +801,7 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { bool OverflowOccurred = false; while (Ptr < SuffixBegin) { - unsigned C = HexDigitValue(*Ptr++); + unsigned C = llvm::hexDigitValue(*Ptr++); // If this letter is out of bound for this radix, reject it. assert(C < radix && "NumericLiteralParser ctor should have rejected this"); diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp index ed8873d..f6e781a 100644 --- a/lib/Lex/MacroArgs.cpp +++ b/lib/Lex/MacroArgs.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "MacroArgs.h" +#include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" -#include "clang/Lex/LexDiagnostic.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/SaveAndRestore.h" #include <algorithm> @@ -23,7 +23,7 @@ using namespace clang; /// MacroArgs ctor function - This destroys the vector passed in. MacroArgs *MacroArgs::create(const MacroInfo *MI, - llvm::ArrayRef<Token> UnexpArgTokens, + ArrayRef<Token> UnexpArgTokens, bool VarargsElided, Preprocessor &PP) { assert(MI->isFunctionLike() && "Can't have args for an object-like macro!"); @@ -215,15 +215,11 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, // If this is a string or character constant, escape the token as specified // by 6.10.3.2p2. - if (Tok.is(tok::string_literal) || // "foo" - Tok.is(tok::wide_string_literal) || // L"foo" - Tok.is(tok::utf8_string_literal) || // u8"foo" - Tok.is(tok::utf16_string_literal) || // u"foo" - Tok.is(tok::utf32_string_literal) || // U"foo" - Tok.is(tok::char_constant) || // 'x' - Tok.is(tok::wide_char_constant) || // L'x'. - Tok.is(tok::utf16_char_constant) || // u'x'. - Tok.is(tok::utf32_char_constant)) { // U'x'. + if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc. + Tok.is(tok::char_constant) || // 'x' + Tok.is(tok::wide_char_constant) || // L'x'. + Tok.is(tok::utf16_char_constant) || // u'x'. + Tok.is(tok::utf32_char_constant)) { // U'x'. bool Invalid = false; std::string TokStr = PP.getSpelling(Tok, &Invalid); if (!Invalid) { diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h index cf86d71..1fd295e 100644 --- a/lib/Lex/MacroArgs.h +++ b/lib/Lex/MacroArgs.h @@ -14,8 +14,8 @@ #ifndef LLVM_CLANG_MACROARGS_H #define LLVM_CLANG_MACROARGS_H +#include "clang/Basic/LLVM.h" #include "llvm/ADT/ArrayRef.h" - #include <vector> namespace clang { @@ -60,7 +60,7 @@ public: /// MacroArgs ctor function - Create a new MacroArgs object with the specified /// macro and argument info. static MacroArgs *create(const MacroInfo *MI, - llvm::ArrayRef<Token> UnexpArgTokens, + ArrayRef<Token> UnexpArgTokens, bool VarargsElided, Preprocessor &PP); /// destroy - Destroy and deallocate the memory for this object. diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp index 904f04e..b61ff71 100644 --- a/lib/Lex/MacroInfo.cpp +++ b/lib/Lex/MacroInfo.cpp @@ -17,7 +17,6 @@ using namespace clang; MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc), - PreviousDefinition(0), ArgumentList(0), NumArguments(0), IsDefinitionLengthCached(false), @@ -25,53 +24,12 @@ MacroInfo::MacroInfo(SourceLocation DefLoc) IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false), - IsFromAST(false), - ChangedAfterLoad(false), + HasCommaPasting(false), IsDisabled(false), IsUsed(false), IsAllowRedefinitionsWithoutWarning(false), IsWarnIfUnused(false), - IsPublic(true), - IsHidden(false), - IsAmbiguous(false) { -} - -MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator) - : Location(MI.Location), - EndLocation(MI.EndLocation), - UndefLocation(MI.UndefLocation), - PreviousDefinition(0), - ArgumentList(0), - NumArguments(0), - ReplacementTokens(MI.ReplacementTokens), - DefinitionLength(MI.DefinitionLength), - IsDefinitionLengthCached(MI.IsDefinitionLengthCached), - IsFunctionLike(MI.IsFunctionLike), - IsC99Varargs(MI.IsC99Varargs), - IsGNUVarargs(MI.IsGNUVarargs), - IsBuiltinMacro(MI.IsBuiltinMacro), - IsFromAST(MI.IsFromAST), - ChangedAfterLoad(MI.ChangedAfterLoad), - IsDisabled(MI.IsDisabled), - IsUsed(MI.IsUsed), - IsAllowRedefinitionsWithoutWarning(MI.IsAllowRedefinitionsWithoutWarning), - IsWarnIfUnused(MI.IsWarnIfUnused), - IsPublic(MI.IsPublic), - IsHidden(MI.IsHidden), - IsAmbiguous(MI.IsAmbiguous) { - setArgumentList(MI.ArgumentList, MI.NumArguments, PPAllocator); -} - -const MacroInfo *MacroInfo::findDefinitionAtLoc(SourceLocation L, - SourceManager &SM) const { - assert(L.isValid() && "SourceLocation is invalid."); - for (const MacroInfo *MI = this; MI; MI = MI->PreviousDefinition) { - if (MI->Location.isInvalid() || // For macros defined on the command line. - SM.isBeforeInTranslationUnit(MI->Location, L)) - return (MI->UndefLocation.isInvalid() || - SM.isBeforeInTranslationUnit(L, MI->UndefLocation)) ? MI : NULL; - } - return NULL; + FromASTFile(false) { } unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const { @@ -103,11 +61,17 @@ unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const { return DefinitionLength; } -/// isIdenticalTo - Return true if the specified macro definition is equal to -/// this macro in spelling, arguments, and whitespace. This is used to emit -/// duplicate definition warnings. This implements the rules in C99 6.10.3. +/// \brief Return true if the specified macro definition is equal to +/// this macro in spelling, arguments, and whitespace. /// -bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const { +/// \param Syntactically if true, the macro definitions can be identical even +/// if they use different identifiers for the function macro parameters. +/// Otherwise the comparison is lexical and this implements the rules in +/// C99 6.10.3. +bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, + bool Syntactically) const { + bool Lexically = !Syntactically; + // Check # tokens in replacement, number of args, and various flags all match. if (ReplacementTokens.size() != Other.ReplacementTokens.size() || getNumArgs() != Other.getNumArgs() || @@ -116,10 +80,12 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const { isGNUVarargs() != Other.isGNUVarargs()) return false; - // Check arguments. - for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end(); - I != E; ++I, ++OI) - if (*I != *OI) return false; + if (Lexically) { + // Check arguments. + for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end(); + I != E; ++I, ++OI) + if (*I != *OI) return false; + } // Check all the tokens. for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) { @@ -137,7 +103,16 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const { // If this is an identifier, it is easy. if (A.getIdentifierInfo() || B.getIdentifierInfo()) { - if (A.getIdentifierInfo() != B.getIdentifierInfo()) + if (A.getIdentifierInfo() == B.getIdentifierInfo()) + continue; + if (Lexically) + return false; + // With syntactic equivalence the parameter names can be different as long + // as they are used in the same place. + int AArgNum = getArgumentNum(A.getIdentifierInfo()); + if (AArgNum == -1) + return false; + if (AArgNum != Other.getArgumentNum(B.getIdentifierInfo())) return false; continue; } @@ -149,3 +124,41 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const { return true; } + +MacroDirective::DefInfo MacroDirective::getDefinition(bool AllowHidden) { + MacroDirective *MD = this; + SourceLocation UndefLoc; + Optional<bool> isPublic; + for (; MD; MD = MD->getPrevious()) { + if (!AllowHidden && MD->isHidden()) + continue; + + if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD)) + return DefInfo(DefMD, UndefLoc, + !isPublic.hasValue() || isPublic.getValue()); + + if (UndefMacroDirective *UndefMD = dyn_cast<UndefMacroDirective>(MD)) { + UndefLoc = UndefMD->getLocation(); + continue; + } + + VisibilityMacroDirective *VisMD = cast<VisibilityMacroDirective>(MD); + if (!isPublic.hasValue()) + isPublic = VisMD->isPublic(); + } + + return DefInfo(); +} + +const MacroDirective::DefInfo +MacroDirective::findDirectiveAtLoc(SourceLocation L, SourceManager &SM) const { + assert(L.isValid() && "SourceLocation is invalid."); + for (DefInfo Def = getDefinition(); Def; Def = Def.getPreviousDefinition()) { + if (Def.getLocation().isInvalid() || // For macros defined on the command line. + SM.isBeforeInTranslationUnit(Def.getLocation(), L)) + return (!Def.isUndefined() || + SM.isBeforeInTranslationUnit(L, Def.getUndefLocation())) + ? Def : DefInfo(); + } + return DefInfo(); +} diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp index 8a936fa..0c03201 100644 --- a/lib/Lex/ModuleMap.cpp +++ b/lib/Lex/ModuleMap.cpp @@ -12,68 +12,82 @@ // //===----------------------------------------------------------------------===// #include "clang/Lex/ModuleMap.h" -#include "clang/Lex/Lexer.h" -#include "clang/Lex/LiteralSupport.h" -#include "clang/Lex/LexDiagnostic.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" #include "llvm/Support/PathV2.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" #include <stdlib.h> +#if defined(LLVM_ON_UNIX) +#include <limits.h> +#endif using namespace clang; Module::ExportDecl ModuleMap::resolveExport(Module *Mod, const Module::UnresolvedExportDecl &Unresolved, - bool Complain) { + bool Complain) const { // We may have just a wildcard. if (Unresolved.Id.empty()) { assert(Unresolved.Wildcard && "Invalid unresolved export"); return Module::ExportDecl(0, true); } + // Resolve the module-id. + Module *Context = resolveModuleId(Unresolved.Id, Mod, Complain); + if (!Context) + return Module::ExportDecl(); + + return Module::ExportDecl(Context, Unresolved.Wildcard); +} + +Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod, + bool Complain) const { // Find the starting module. - Module *Context = lookupModuleUnqualified(Unresolved.Id[0].first, Mod); + Module *Context = lookupModuleUnqualified(Id[0].first, Mod); if (!Context) { if (Complain) - Diags->Report(Unresolved.Id[0].second, - diag::err_mmap_missing_module_unqualified) - << Unresolved.Id[0].first << Mod->getFullModuleName(); - - return Module::ExportDecl(); + Diags->Report(Id[0].second, diag::err_mmap_missing_module_unqualified) + << Id[0].first << Mod->getFullModuleName(); + + return 0; } // Dig into the module path. - for (unsigned I = 1, N = Unresolved.Id.size(); I != N; ++I) { - Module *Sub = lookupModuleQualified(Unresolved.Id[I].first, - Context); + for (unsigned I = 1, N = Id.size(); I != N; ++I) { + Module *Sub = lookupModuleQualified(Id[I].first, Context); if (!Sub) { if (Complain) - Diags->Report(Unresolved.Id[I].second, - diag::err_mmap_missing_module_qualified) - << Unresolved.Id[I].first << Context->getFullModuleName() - << SourceRange(Unresolved.Id[0].second, Unresolved.Id[I-1].second); - - return Module::ExportDecl(); + Diags->Report(Id[I].second, diag::err_mmap_missing_module_qualified) + << Id[I].first << Context->getFullModuleName() + << SourceRange(Id[0].second, Id[I-1].second); + + return 0; } - + Context = Sub; } - - return Module::ExportDecl(Context, Unresolved.Wildcard); + + return Context; } ModuleMap::ModuleMap(FileManager &FileMgr, const DiagnosticConsumer &DC, - const LangOptions &LangOpts, const TargetInfo *Target) - : LangOpts(LangOpts), Target(Target), BuiltinIncludeDir(0) + const LangOptions &LangOpts, const TargetInfo *Target, + HeaderSearch &HeaderInfo) + : LangOpts(LangOpts), Target(Target), HeaderInfo(HeaderInfo), + BuiltinIncludeDir(0) { IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs); Diags = IntrusiveRefCntPtr<DiagnosticsEngine>( @@ -104,26 +118,15 @@ static StringRef sanitizeFilenameAsIdentifier(StringRef Name, if (Name.empty()) return Name; - // Check whether the filename is already an identifier; this is the common - // case. - bool isIdentifier = true; - for (unsigned I = 0, N = Name.size(); I != N; ++I) { - if (isalpha(Name[I]) || Name[I] == '_' || (isdigit(Name[I]) && I > 0)) - continue; - - isIdentifier = false; - break; - } - - if (!isIdentifier) { + if (!isValidIdentifier(Name)) { // If we don't already have something with the form of an identifier, // create a buffer with the sanitized name. Buffer.clear(); - if (isdigit(Name[0])) + if (isDigit(Name[0])) Buffer.push_back('_'); Buffer.reserve(Buffer.size() + Name.size()); for (unsigned I = 0, N = Name.size(); I != N; ++I) { - if (isalnum(Name[I]) || isspace(Name[I])) + if (isIdentifierBody(Name[I])) Buffer.push_back(Name[I]); else Buffer.push_back('_'); @@ -157,8 +160,13 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { } const DirectoryEntry *Dir = File->getDir(); - llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs; - StringRef DirName = Dir->getName(); + SmallVector<const DirectoryEntry *, 2> SkippedDirs; + + // Note: as an egregious but useful hack we use the real path here, because + // frameworks moving from top-level frameworks to embedded frameworks tend + // to be symlinked from the top-level location to the embedded location, + // and we need to resolve lookups as if we had found the embedded location. + StringRef DirName = SourceMgr->getFileManager().getCanonicalName(Dir); // Keep walking up the directory hierarchy, looking for a directory with // an umbrella header. @@ -204,7 +212,7 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { llvm::sys::path::stem(File->getName()), NameBuf); Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit).first; - Result->TopHeaders.insert(File); + Result->addTopHeader(File); // If inferred submodules export everything they import, add a // wildcard to the set of exports. @@ -241,19 +249,19 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { return 0; } -bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) { - HeadersMap::iterator Known = Headers.find(Header); +bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const { + HeadersMap::const_iterator Known = Headers.find(Header); if (Known != Headers.end()) return !Known->second.isAvailable(); const DirectoryEntry *Dir = Header->getDir(); - llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs; + SmallVector<const DirectoryEntry *, 2> SkippedDirs; StringRef DirName = Dir->getName(); // Keep walking up the directory hierarchy, looking for a directory with // an umbrella header. do { - llvm::DenseMap<const DirectoryEntry *, Module *>::iterator KnownDir + llvm::DenseMap<const DirectoryEntry *, Module *>::const_iterator KnownDir = UmbrellaDirs.find(Dir); if (KnownDir != UmbrellaDirs.end()) { Module *Found = KnownDir->second; @@ -307,15 +315,16 @@ bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) { return false; } -Module *ModuleMap::findModule(StringRef Name) { - llvm::StringMap<Module *>::iterator Known = Modules.find(Name); +Module *ModuleMap::findModule(StringRef Name) const { + llvm::StringMap<Module *>::const_iterator Known = Modules.find(Name); if (Known != Modules.end()) return Known->getValue(); return 0; } -Module *ModuleMap::lookupModuleUnqualified(StringRef Name, Module *Context) { +Module *ModuleMap::lookupModuleUnqualified(StringRef Name, + Module *Context) const { for(; Context; Context = Context->Parent) { if (Module *Sub = lookupModuleQualified(Name, Context)) return Sub; @@ -324,7 +333,7 @@ Module *ModuleMap::lookupModuleUnqualified(StringRef Name, Module *Context) { return findModule(Name); } -Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) { +Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ if (!Context) return findModule(Name); @@ -347,10 +356,10 @@ ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, } bool ModuleMap::canInferFrameworkModule(const DirectoryEntry *ParentDir, - StringRef Name, bool &IsSystem) { + StringRef Name, bool &IsSystem) const { // Check whether we have already looked into the parent directory // for a module map. - llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::iterator + llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator inferred = InferredDirectories.find(ParentDir); if (inferred == InferredDirectories.end()) return false; @@ -370,6 +379,23 @@ bool ModuleMap::canInferFrameworkModule(const DirectoryEntry *ParentDir, return canInfer; } +/// \brief For a framework module, infer the framework against which we +/// should link. +static void inferFrameworkLink(Module *Mod, const DirectoryEntry *FrameworkDir, + FileManager &FileMgr) { + assert(Mod->IsFramework && "Can only infer linking for framework modules"); + assert(!Mod->isSubFramework() && + "Can only infer linking for top-level frameworks"); + + SmallString<128> LibName; + LibName += FrameworkDir->getName(); + llvm::sys::path::append(LibName, Mod->Name); + if (FileMgr.getFile(LibName)) { + Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name, + /*IsFramework=*/true)); + } +} + Module * ModuleMap::inferFrameworkModule(StringRef ModuleName, const DirectoryEntry *FrameworkDir, @@ -384,14 +410,23 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, // If the framework has a parent path from which we're allowed to infer // a framework module, do so. if (!Parent) { + // Determine whether we're allowed to infer a module map. + + // Note: as an egregious but useful hack we use the real path here, because + // we might be looking at an embedded framework that symlinks out to a + // top-level framework, and we need to infer as if we were naming the + // top-level framework. + StringRef FrameworkDirName + = SourceMgr->getFileManager().getCanonicalName(FrameworkDir); + bool canInfer = false; - if (llvm::sys::path::has_parent_path(FrameworkDir->getName())) { + if (llvm::sys::path::has_parent_path(FrameworkDirName)) { // Figure out the parent path. - StringRef Parent = llvm::sys::path::parent_path(FrameworkDir->getName()); + StringRef Parent = llvm::sys::path::parent_path(FrameworkDirName); if (const DirectoryEntry *ParentDir = FileMgr.getDirectory(Parent)) { // Check whether we have already looked into the parent directory // for a module map. - llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::iterator + llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator inferred = InferredDirectories.find(ParentDir); if (inferred == InferredDirectories.end()) { // We haven't looked here before. Load a module map, if there is @@ -411,7 +446,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, if (inferred->second.InferModules) { // We're allowed to infer for this directory, but make sure it's okay // to infer this particular module. - StringRef Name = llvm::sys::path::filename(FrameworkDir->getName()); + StringRef Name = llvm::sys::path::stem(FrameworkDirName); canInfer = std::find(inferred->second.ExcludedModules.begin(), inferred->second.ExcludedModules.end(), Name) == inferred->second.ExcludedModules.end(); @@ -480,29 +515,23 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, // check whether it is actually a subdirectory of the parent directory. // This will not be the case if the 'subframework' is actually a symlink // out to a top-level framework. -#ifdef LLVM_ON_UNIX - char RealSubframeworkDirName[PATH_MAX]; - if (realpath(Dir->path().c_str(), RealSubframeworkDirName)) { - StringRef SubframeworkDirName = RealSubframeworkDirName; - - bool FoundParent = false; - do { - // Get the parent directory name. - SubframeworkDirName - = llvm::sys::path::parent_path(SubframeworkDirName); - if (SubframeworkDirName.empty()) - break; - - if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) { - FoundParent = true; - break; - } - } while (true); + StringRef SubframeworkDirName = FileMgr.getCanonicalName(SubframeworkDir); + bool FoundParent = false; + do { + // Get the parent directory name. + SubframeworkDirName + = llvm::sys::path::parent_path(SubframeworkDirName); + if (SubframeworkDirName.empty()) + break; + + if (FileMgr.getDirectory(SubframeworkDirName) == FrameworkDir) { + FoundParent = true; + break; + } + } while (true); - if (!FoundParent) - continue; - } -#endif + if (!FoundParent) + continue; // FIXME: Do we want to warn about subframeworks without umbrella headers? SmallString<32> NameBuf; @@ -512,6 +541,12 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, } } + // If the module is a top-level framework, automatically link against the + // framework. + if (!Result->isSubFramework()) { + inferFrameworkLink(Result, FrameworkDir, FileMgr); + } + return Result; } @@ -528,15 +563,17 @@ void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir) { void ModuleMap::addHeader(Module *Mod, const FileEntry *Header, bool Excluded) { - if (Excluded) + if (Excluded) { Mod->ExcludedHeaders.push_back(Header); - else + } else { Mod->Headers.push_back(Header); + HeaderInfo.MarkFileModuleHeader(Header); + } Headers[Header] = KnownHeader(Mod, Excluded); } const FileEntry * -ModuleMap::getContainingModuleMapFile(Module *Module) { +ModuleMap::getContainingModuleMapFile(Module *Module) const { if (Module->DefinitionLoc.isInvalid() || !SourceMgr) return 0; @@ -573,6 +610,25 @@ bool ModuleMap::resolveExports(Module *Mod, bool Complain) { return HadError; } +bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) { + bool HadError = false; + for (unsigned I = 0, N = Mod->UnresolvedConflicts.size(); I != N; ++I) { + Module *OtherMod = resolveModuleId(Mod->UnresolvedConflicts[I].Id, + Mod, Complain); + if (!OtherMod) { + HadError = true; + continue; + } + + Module::Conflict Conflict; + Conflict.Other = OtherMod; + Conflict.Message = Mod->UnresolvedConflicts[I].Message; + Mod->Conflicts.push_back(Conflict); + } + Mod->UnresolvedConflicts.clear(); + return HadError; +} + Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) { if (Loc.isInvalid()) return 0; @@ -613,6 +669,8 @@ namespace clang { struct MMToken { enum TokenKind { Comma, + ConfigMacros, + Conflict, EndOfFile, HeaderKeyword, Identifier, @@ -620,6 +678,7 @@ namespace clang { ExplicitKeyword, ExportKeyword, FrameworkKeyword, + LinkKeyword, ModuleKeyword, Period, UmbrellaKeyword, @@ -656,10 +715,13 @@ namespace clang { /// \brief The set of attributes that can be attached to a module. struct Attributes { - Attributes() : IsSystem() { } + Attributes() : IsSystem(), IsExhaustive() { } /// \brief Whether this is a system module. unsigned IsSystem : 1; + + /// \brief Whether this is an exhaustive set of configuration macros. + unsigned IsExhaustive : 1; }; @@ -700,14 +762,16 @@ namespace clang { /// (or the end of the file). void skipUntil(MMToken::TokenKind K); - typedef llvm::SmallVector<std::pair<std::string, SourceLocation>, 2> - ModuleId; + typedef SmallVector<std::pair<std::string, SourceLocation>, 2> ModuleId; bool parseModuleId(ModuleId &Id); void parseModuleDecl(); void parseRequiresDecl(); void parseHeaderDecl(SourceLocation UmbrellaLoc, SourceLocation ExcludeLoc); void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc); void parseExportDecl(); + void parseLinkDecl(); + void parseConfigMacros(); + void parseConflict(); void parseInferredModuleDecl(bool Framework, bool Explicit); bool parseOptionalAttributes(Attributes &Attrs); @@ -745,11 +809,14 @@ retry: Tok.StringData = LToken.getRawIdentifierData(); Tok.StringLength = LToken.getLength(); Tok.Kind = llvm::StringSwitch<MMToken::TokenKind>(Tok.getString()) - .Case("header", MMToken::HeaderKeyword) + .Case("config_macros", MMToken::ConfigMacros) + .Case("conflict", MMToken::Conflict) .Case("exclude", MMToken::ExcludeKeyword) .Case("explicit", MMToken::ExplicitKeyword) .Case("export", MMToken::ExportKeyword) .Case("framework", MMToken::FrameworkKeyword) + .Case("header", MMToken::HeaderKeyword) + .Case("link", MMToken::LinkKeyword) .Case("module", MMToken::ModuleKeyword) .Case("requires", MMToken::RequiresKeyword) .Case("umbrella", MMToken::UmbrellaKeyword) @@ -905,7 +972,9 @@ namespace { /// \brief An unknown attribute. AT_unknown, /// \brief The 'system' attribute. - AT_system + AT_system, + /// \brief The 'exhaustive' attribute. + AT_exhaustive }; } @@ -920,6 +989,7 @@ namespace { /// header-declaration /// submodule-declaration /// export-declaration +/// link-declaration /// /// submodule-declaration: /// module-declaration @@ -1061,7 +1131,15 @@ void ModuleMapParser::parseModuleDecl() { case MMToken::RBrace: Done = true; break; - + + case MMToken::ConfigMacros: + parseConfigMacros(); + break; + + case MMToken::Conflict: + parseConflict(); + break; + case MMToken::ExplicitKeyword: case MMToken::FrameworkKeyword: case MMToken::ModuleKeyword: @@ -1099,7 +1177,11 @@ void ModuleMapParser::parseModuleDecl() { case MMToken::HeaderKeyword: parseHeaderDecl(SourceLocation(), SourceLocation()); break; - + + case MMToken::LinkKeyword: + parseLinkDecl(); + break; + default: Diags.Report(Tok.getLocation(), diag::err_mmap_expected_member); consumeToken(); @@ -1115,6 +1197,13 @@ void ModuleMapParser::parseModuleDecl() { HadError = true; } + // If the active module is a top-level framework, and there are no link + // libraries, automatically link against the framework. + if (ActiveModule->IsFramework && !ActiveModule->isSubFramework() && + ActiveModule->LinkLibraries.empty()) { + inferFrameworkLink(ActiveModule, Directory, SourceMgr.getFileManager()); + } + // We're done parsing this module. Pop back to the previous module. ActiveModule = PreviousActiveModule; } @@ -1159,9 +1248,9 @@ void ModuleMapParser::parseRequiresDecl() { /// \brief Append to \p Paths the set of paths needed to get to the /// subframework in which the given module lives. static void appendSubframeworkPaths(Module *Mod, - llvm::SmallVectorImpl<char> &Path) { + SmallVectorImpl<char> &Path) { // Collect the framework names from the given module to the top-level module. - llvm::SmallVector<StringRef, 2> Paths; + SmallVector<StringRef, 2> Paths; for (; Mod; Mod = Mod->Parent) { if (Mod->IsFramework) Paths.push_back(Mod->Name); @@ -1307,7 +1396,9 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc, if (BuiltinFile) Map.addHeader(ActiveModule, BuiltinFile, Exclude); } - } else { + } else if (!Exclude) { + // Ignore excluded header files. They're optional anyway. + Diags.Report(FileNameLoc, diag::err_mmap_header_not_found) << Umbrella << FileName; HadError = true; @@ -1414,7 +1505,139 @@ void ModuleMapParser::parseExportDecl() { ActiveModule->UnresolvedExports.push_back(Unresolved); } -/// \brief Parse an inferried module declaration (wildcard modules). +/// \brief Parse a link declaration. +/// +/// module-declaration: +/// 'link' 'framework'[opt] string-literal +void ModuleMapParser::parseLinkDecl() { + assert(Tok.is(MMToken::LinkKeyword)); + SourceLocation LinkLoc = consumeToken(); + + // Parse the optional 'framework' keyword. + bool IsFramework = false; + if (Tok.is(MMToken::FrameworkKeyword)) { + consumeToken(); + IsFramework = true; + } + + // Parse the library name + if (!Tok.is(MMToken::StringLiteral)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_library_name) + << IsFramework << SourceRange(LinkLoc); + HadError = true; + return; + } + + std::string LibraryName = Tok.getString(); + consumeToken(); + ActiveModule->LinkLibraries.push_back(Module::LinkLibrary(LibraryName, + IsFramework)); +} + +/// \brief Parse a configuration macro declaration. +/// +/// module-declaration: +/// 'config_macros' attributes[opt] config-macro-list? +/// +/// config-macro-list: +/// identifier (',' identifier)? +void ModuleMapParser::parseConfigMacros() { + assert(Tok.is(MMToken::ConfigMacros)); + SourceLocation ConfigMacrosLoc = consumeToken(); + + // Only top-level modules can have configuration macros. + if (ActiveModule->Parent) { + Diags.Report(ConfigMacrosLoc, diag::err_mmap_config_macro_submodule); + } + + // Parse the optional attributes. + Attributes Attrs; + parseOptionalAttributes(Attrs); + if (Attrs.IsExhaustive && !ActiveModule->Parent) { + ActiveModule->ConfigMacrosExhaustive = true; + } + + // If we don't have an identifier, we're done. + if (!Tok.is(MMToken::Identifier)) + return; + + // Consume the first identifier. + if (!ActiveModule->Parent) { + ActiveModule->ConfigMacros.push_back(Tok.getString().str()); + } + consumeToken(); + + do { + // If there's a comma, consume it. + if (!Tok.is(MMToken::Comma)) + break; + consumeToken(); + + // We expect to see a macro name here. + if (!Tok.is(MMToken::Identifier)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_config_macro); + break; + } + + // Consume the macro name. + if (!ActiveModule->Parent) { + ActiveModule->ConfigMacros.push_back(Tok.getString().str()); + } + consumeToken(); + } while (true); +} + +/// \brief Format a module-id into a string. +static std::string formatModuleId(const ModuleId &Id) { + std::string result; + { + llvm::raw_string_ostream OS(result); + + for (unsigned I = 0, N = Id.size(); I != N; ++I) { + if (I) + OS << "."; + OS << Id[I].first; + } + } + + return result; +} + +/// \brief Parse a conflict declaration. +/// +/// module-declaration: +/// 'conflict' module-id ',' string-literal +void ModuleMapParser::parseConflict() { + assert(Tok.is(MMToken::Conflict)); + SourceLocation ConflictLoc = consumeToken(); + Module::UnresolvedConflict Conflict; + + // Parse the module-id. + if (parseModuleId(Conflict.Id)) + return; + + // Parse the ','. + if (!Tok.is(MMToken::Comma)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_comma) + << SourceRange(ConflictLoc); + return; + } + consumeToken(); + + // Parse the message. + if (!Tok.is(MMToken::StringLiteral)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_message) + << formatModuleId(Conflict.Id); + return; + } + Conflict.Message = Tok.getString().str(); + consumeToken(); + + // Add this unresolved conflict. + ActiveModule->UnresolvedConflicts.push_back(Conflict); +} + +/// \brief Parse an inferred module declaration (wildcard modules). /// /// module-declaration: /// 'explicit'[opt] 'framework'[opt] 'module' * attributes[opt] @@ -1593,6 +1816,7 @@ bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) { // Decode the attribute name. AttributeKind Attribute = llvm::StringSwitch<AttributeKind>(Tok.getString()) + .Case("exhaustive", AT_exhaustive) .Case("system", AT_system) .Default(AT_unknown); switch (Attribute) { @@ -1604,6 +1828,10 @@ bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) { case AT_system: Attrs.IsSystem = true; break; + + case AT_exhaustive: + Attrs.IsExhaustive = true; + break; } consumeToken(); @@ -1653,13 +1881,16 @@ bool ModuleMapParser::parseModuleMapFile() { case MMToken::FrameworkKeyword: parseModuleDecl(); break; - + case MMToken::Comma: + case MMToken::ConfigMacros: + case MMToken::Conflict: case MMToken::ExcludeKeyword: case MMToken::ExportKeyword: case MMToken::HeaderKeyword: case MMToken::Identifier: case MMToken::LBrace: + case MMToken::LinkKeyword: case MMToken::LSquare: case MMToken::Period: case MMToken::RBrace: @@ -1677,11 +1908,16 @@ bool ModuleMapParser::parseModuleMapFile() { } bool ModuleMap::parseModuleMapFile(const FileEntry *File) { + llvm::DenseMap<const FileEntry *, bool>::iterator Known + = ParsedModuleMap.find(File); + if (Known != ParsedModuleMap.end()) + return Known->second; + assert(Target != 0 && "Missing target information"); FileID ID = SourceMgr->createFileID(File, SourceLocation(), SrcMgr::C_User); const llvm::MemoryBuffer *Buffer = SourceMgr->getBuffer(ID); if (!Buffer) - return true; + return ParsedModuleMap[File] = true; // Parse this module map file. Lexer L(ID, SourceMgr->getBuffer(ID), *SourceMgr, MMapLangOpts); @@ -1690,6 +1926,6 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File) { BuiltinIncludeDir); bool Result = Parser.parseModuleMapFile(); Diags->getClient()->EndSourceFile(); - + ParsedModuleMap[File] = Result; return Result; } diff --git a/lib/Lex/PPConditionalDirectiveRecord.cpp b/lib/Lex/PPConditionalDirectiveRecord.cpp new file mode 100644 index 0000000..16ce3ef --- /dev/null +++ b/lib/Lex/PPConditionalDirectiveRecord.cpp @@ -0,0 +1,120 @@ +//===--- PPConditionalDirectiveRecord.h - Preprocessing Directives-*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPConditionalDirectiveRecord class, which maintains +// a record of conditional directive regions. +// +//===----------------------------------------------------------------------===// +#include "clang/Lex/PPConditionalDirectiveRecord.h" +#include "llvm/Support/Capacity.h" + +using namespace clang; + +PPConditionalDirectiveRecord::PPConditionalDirectiveRecord(SourceManager &SM) + : SourceMgr(SM) { + CondDirectiveStack.push_back(SourceLocation()); +} + +bool PPConditionalDirectiveRecord::rangeIntersectsConditionalDirective( + SourceRange Range) const { + if (Range.isInvalid()) + return false; + + CondDirectiveLocsTy::const_iterator + low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), + Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr)); + if (low == CondDirectiveLocs.end()) + return false; + + if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc())) + return false; + + CondDirectiveLocsTy::const_iterator + upp = std::upper_bound(low, CondDirectiveLocs.end(), + Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr)); + SourceLocation uppRegion; + if (upp != CondDirectiveLocs.end()) + uppRegion = upp->getRegionLoc(); + + return low->getRegionLoc() != uppRegion; +} + +SourceLocation PPConditionalDirectiveRecord::findConditionalDirectiveRegionLoc( + SourceLocation Loc) const { + if (Loc.isInvalid()) + return SourceLocation(); + if (CondDirectiveLocs.empty()) + return SourceLocation(); + + if (SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(), + Loc)) + return CondDirectiveStack.back(); + + CondDirectiveLocsTy::const_iterator + low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), + Loc, CondDirectiveLoc::Comp(SourceMgr)); + assert(low != CondDirectiveLocs.end()); + return low->getRegionLoc(); +} + +void PPConditionalDirectiveRecord::addCondDirectiveLoc( + CondDirectiveLoc DirLoc) { + // Ignore directives in system headers. + if (SourceMgr.isInSystemHeader(DirLoc.getLoc())) + return; + + assert(CondDirectiveLocs.empty() || + SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(), + DirLoc.getLoc())); + CondDirectiveLocs.push_back(DirLoc); +} + +void PPConditionalDirectiveRecord::If(SourceLocation Loc, + SourceRange ConditionRange) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.push_back(Loc); +} + +void PPConditionalDirectiveRecord::Ifdef(SourceLocation Loc, + const Token &MacroNameTok, + const MacroDirective *MD) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.push_back(Loc); +} + +void PPConditionalDirectiveRecord::Ifndef(SourceLocation Loc, + const Token &MacroNameTok, + const MacroDirective *MD) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.push_back(Loc); +} + +void PPConditionalDirectiveRecord::Elif(SourceLocation Loc, + SourceRange ConditionRange, + SourceLocation IfLoc) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.back() = Loc; +} + +void PPConditionalDirectiveRecord::Else(SourceLocation Loc, + SourceLocation IfLoc) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.back() = Loc; +} + +void PPConditionalDirectiveRecord::Endif(SourceLocation Loc, + SourceLocation IfLoc) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + assert(!CondDirectiveStack.empty()); + CondDirectiveStack.pop_back(); +} + +size_t PPConditionalDirectiveRecord::getTotalMemory() const { + return llvm::capacity_in_bytes(CondDirectiveLocs); +} diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index b7c1846..07c1867 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -13,17 +13,18 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/Preprocessor.h" -#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/HeaderSearch.h" -#include "clang/Lex/MacroInfo.h" #include "clang/Lex/LexDiagnostic.h" -#include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MacroInfo.h" #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/Pragma.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/SourceManager.h" #include "llvm/ADT/APInt.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; //===----------------------------------------------------------------------===// @@ -56,12 +57,42 @@ MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) { return MI; } -MacroInfo *Preprocessor::CloneMacroInfo(const MacroInfo &MacroToClone) { - MacroInfo *MI = AllocateMacroInfo(); - new (MI) MacroInfo(MacroToClone, BP); +MacroInfo *Preprocessor::AllocateDeserializedMacroInfo(SourceLocation L, + unsigned SubModuleID) { + LLVM_STATIC_ASSERT(llvm::AlignOf<MacroInfo>::Alignment >= sizeof(SubModuleID), + "alignment for MacroInfo is less than the ID"); + MacroInfo *MI = + (MacroInfo*)BP.Allocate(sizeof(MacroInfo) + sizeof(SubModuleID), + llvm::AlignOf<MacroInfo>::Alignment); + new (MI) MacroInfo(L); + MI->FromASTFile = true; + MI->setOwningModuleID(SubModuleID); return MI; } +DefMacroDirective * +Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, SourceLocation Loc, + bool isImported) { + DefMacroDirective *MD = BP.Allocate<DefMacroDirective>(); + new (MD) DefMacroDirective(MI, Loc, isImported); + return MD; +} + +UndefMacroDirective * +Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) { + UndefMacroDirective *MD = BP.Allocate<UndefMacroDirective>(); + new (MD) UndefMacroDirective(UndefLoc); + return MD; +} + +VisibilityMacroDirective * +Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, + bool isPublic) { + VisibilityMacroDirective *MD = BP.Allocate<VisibilityMacroDirective>(); + new (MD) VisibilityMacroDirective(Loc, isPublic); + return MD; +} + /// \brief Release the specified MacroInfo to be reused for allocating /// new MacroInfo objects. void Preprocessor::ReleaseMacroInfo(MacroInfo *MI) { @@ -140,15 +171,14 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { Diag(MacroNameTok, diag::err_pp_macro_not_identifier); // Fall through on error. } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) { - // Error if defining "defined": C99 6.10.8.4. + // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4. Diag(MacroNameTok, diag::err_defined_macro_name); - } else if (isDefineUndef && II->hasMacroDefinition() && + } else if (isDefineUndef == 2 && II->hasMacroDefinition() && getMacroInfo(II)->isBuiltinMacro()) { - // Error if defining "__LINE__" and other builtins: C99 6.10.8.4. - if (isDefineUndef == 1) - Diag(MacroNameTok, diag::pp_redef_builtin_macro); - else - Diag(MacroNameTok, diag::pp_undef_builtin_macro); + // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 + // and C++ [cpp.predefined]p4], but allow it as an extension. + Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro); + return; } else { // Okay, we got a good identifier node. Return it. return; @@ -255,7 +285,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // directive mode. Tell the lexer this so any newlines we see will be // converted into an EOD token (this terminates the macro). CurPPLexer->ParsingPreprocessorDirective = true; - if (CurLexer) CurLexer->SetCommentRetentionState(false); + if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); // Read the next token, the directive flavor. @@ -266,7 +296,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, if (Tok.isNot(tok::raw_identifier)) { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); continue; } @@ -282,7 +312,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, FirstChar != 'i' && FirstChar != 'e') { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); continue; } @@ -299,7 +329,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, if (IdLen >= 20) { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); continue; } memcpy(DirectiveBuf, &DirectiveStr[0], IdLen); @@ -405,7 +435,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); } // Finally, if we are out of the conditional (saw an #endif or ran off the end @@ -536,11 +566,11 @@ const FileEntry *Preprocessor::LookupFile( // Otherwise, see if this is a subframework header. If so, this is relative // to one of the headers on the #include stack. Walk the list of the current // headers on the #include stack and pass them to HeaderInfo. - // FIXME: SuggestedModule! if (IsFileLexer()) { if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID()))) if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt, - SearchPath, RelativePath))) + SearchPath, RelativePath, + SuggestedModule))) return FE; } @@ -550,7 +580,8 @@ const FileEntry *Preprocessor::LookupFile( if ((CurFileEnt = SourceMgr.getFileEntryForID(ISEntry.ThePPLexer->getFileID()))) if ((FE = HeaderInfo.LookupSubframeworkHeader( - Filename, CurFileEnt, SearchPath, RelativePath))) + Filename, CurFileEnt, SearchPath, RelativePath, + SuggestedModule))) return FE; } } @@ -590,6 +621,7 @@ void Preprocessor::HandleDirective(Token &Result) { // mode. Tell the lexer this so any newlines we see will be converted into an // EOD token (which terminates the directive). CurPPLexer->ParsingPreprocessorDirective = true; + if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); ++NumDirectives; @@ -634,14 +666,9 @@ void Preprocessor::HandleDirective(Token &Result) { // and reset to previous state when returning from this function. ResetMacroExpansionHelper helper(this); -TryAgain: switch (Result.getKind()) { case tok::eod: return; // null directive. - case tok::comment: - // Handle stuff like "# /*foo*/ define X" in -E -C mode. - LexUnexpandedToken(Result); - goto TryAgain; case tok::code_completion: if (CodeComplete) CodeComplete->CodeCompleteDirective( @@ -788,7 +815,7 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val, // here. Val = 0; for (unsigned i = 0; i != ActualLength; ++i) { - if (!isdigit(DigitTokBegin[i])) { + if (!isDigit(DigitTokBegin[i])) { PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i), diag::err_pp_line_digit_sequence); PP.DiscardUntilEndOfDirective(); @@ -834,11 +861,11 @@ void Preprocessor::HandleLineDirective(Token &Tok) { // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a // number greater than 2147483647". C90 requires that the line # be <= 32767. unsigned LineLimit = 32768U; - if (LangOpts.C99 || LangOpts.CPlusPlus0x) + if (LangOpts.C99 || LangOpts.CPlusPlus11) LineLimit = 2147483648U; if (LineNo >= LineLimit) Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit; - else if (LangOpts.CPlusPlus0x && LineNo >= 32768U) + else if (LangOpts.CPlusPlus11 && LineNo >= 32768U) Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big); int FilenameID = -1; @@ -1107,23 +1134,19 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) { // Check to see if this is the last token on the #__public_macro line. CheckEndOfDirective("__public_macro"); + IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); // Okay, we finally have a valid identifier to undef. - MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); + MacroDirective *MD = getMacroDirective(II); // If the macro is not defined, this is an error. - if (MI == 0) { - Diag(MacroNameTok, diag::err_pp_visibility_non_macro) - << MacroNameTok.getIdentifierInfo(); + if (MD == 0) { + Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; return; } // Note that this macro has now been exported. - MI->setVisibility(/*IsPublic=*/true, MacroNameTok.getLocation()); - - // If this macro definition came from a PCH file, mark it - // as having changed since serialization. - if (MI->isFromAST()) - MI->setChangedAfterLoad(); + appendMacroDirective(II, AllocateVisibilityMacroDirective( + MacroNameTok.getLocation(), /*IsPublic=*/true)); } /// \brief Handle a #private directive. @@ -1138,23 +1161,19 @@ void Preprocessor::HandleMacroPrivateDirective(Token &Tok) { // Check to see if this is the last token on the #__private_macro line. CheckEndOfDirective("__private_macro"); + IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); // Okay, we finally have a valid identifier to undef. - MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); + MacroDirective *MD = getMacroDirective(II); // If the macro is not defined, this is an error. - if (MI == 0) { - Diag(MacroNameTok, diag::err_pp_visibility_non_macro) - << MacroNameTok.getIdentifierInfo(); + if (MD == 0) { + Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; return; } // Note that this macro has now been marked private. - MI->setVisibility(/*IsPublic=*/false, MacroNameTok.getLocation()); - - // If this macro definition came from a PCH file, mark it - // as having changed since serialization. - if (MI->isFromAST()) - MI->setChangedAfterLoad(); + appendMacroDirective(II, AllocateVisibilityMacroDirective( + MacroNameTok.getLocation(), /*IsPublic=*/false)); } //===----------------------------------------------------------------------===// @@ -1375,7 +1394,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, if (Callbacks->FileNotFound(Filename, RecoveryPath)) { if (const DirectoryEntry *DE = FileMgr.getDirectory(RecoveryPath)) { // Add the recovery path to the list of search paths. - DirectoryLookup DL(DE, SrcMgr::C_User, true, false); + DirectoryLookup DL(DE, SrcMgr::C_User, false); HeaderInfo.AddSearchPath(DL, isAngled); // Try the lookup again, skipping the cache. @@ -1426,7 +1445,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // Compute the module access path corresponding to this module. // FIXME: Should we have a second loadModule() overload to avoid this // extra lookup step? - llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; + SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; for (Module *Mod = SuggestedModule; Mod; Mod = Mod->Parent) Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), FilenameTok.getLocation())); @@ -1476,14 +1495,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, Diag(HashLoc, diag::warn_auto_module_import) << IncludeKind << PathString << FixItHint::CreateReplacement(ReplaceRange, - "@__experimental_modules_import " + PathString.str().str() + ";"); + "@import " + PathString.str().str() + ";"); } // Load the module. // If this was an #__include_macros directive, only make macros visible. Module::NameVisibilityKind Visibility = (IncludeKind == 3)? Module::MacrosVisible : Module::AllVisible; - Module *Imported + ModuleLoadResult Imported = TheModuleLoader.loadModule(IncludeTok.getLocation(), Path, Visibility, /*IsIncludeDirective=*/true); assert((Imported == 0 || Imported == SuggestedModule) && @@ -1498,6 +1517,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, } return; } + + // If we failed to find a submodule that we expected to find, we can + // continue. Otherwise, there's an error in the included file, so we + // don't want to include it. + if (!BuildingImportedModule && !Imported.isMissingExpected()) { + return; + } } if (Callbacks && SuggestedModule) { @@ -1637,10 +1663,16 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { return true; case tok::ellipsis: // #define X(... -> C99 varargs if (!LangOpts.C99) - Diag(Tok, LangOpts.CPlusPlus0x ? + Diag(Tok, LangOpts.CPlusPlus11 ? diag::warn_cxx98_compat_variadic_macro : diag::ext_variadic_macro); + // OpenCL v1.2 s6.9.e: variadic macros are not supported. + if (LangOpts.OpenCL) { + Diag(Tok, diag::err_pp_opencl_variadic_macros); + return true; + } + // Lex the token after the identifier. LexUnexpandedToken(Tok); if (Tok.isNot(tok::r_paren)) { @@ -1763,7 +1795,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // Read the first token after the arg list for down below. LexUnexpandedToken(Tok); - } else if (LangOpts.C99 || LangOpts.CPlusPlus0x) { + } else if (LangOpts.C99 || LangOpts.CPlusPlus11) { // C99 requires whitespace between the macro definition and the body. Emit // a diagnostic for something like "#define X+". Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); @@ -1809,8 +1841,37 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { while (Tok.isNot(tok::eod)) { LastTok = Tok; - if (Tok.isNot(tok::hash)) { + if (Tok.isNot(tok::hash) && Tok.isNot(tok::hashhash)) { + MI->AddTokenToBody(Tok); + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + continue; + } + + if (Tok.is(tok::hashhash)) { + + // If we see token pasting, check if it looks like the gcc comma + // pasting extension. We'll use this information to suppress + // diagnostics later on. + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + + if (Tok.is(tok::eod)) { + MI->AddTokenToBody(LastTok); + break; + } + + unsigned NumTokens = MI->getNumTokens(); + if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && + MI->getReplacementToken(NumTokens-1).is(tok::comma)) + MI->setHasCommaPasting(); + + // Things look ok, add the '##' and param name tokens to the macro. + MI->AddTokenToBody(LastTok); MI->AddTokenToBody(Tok); + LastTok = Tok; // Get the next token of the macro. LexUnexpandedToken(Tok); @@ -1874,7 +1935,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // Finally, if this identifier already had a macro defined for it, verify that // the macro bodies are identical, and issue diagnostics if they are not. - if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) { + if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) { // It is very common for system headers to have tons of macro redefinitions // and for warnings to be disabled in system headers. If this is the case, // then don't bother calling MacroInfo::isIdenticalTo. @@ -1883,10 +1944,14 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused()) Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used); + // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and + // C++ [cpp.predefined]p4, but allow it as an extension. + if (OtherMI->isBuiltinMacro()) + Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro); // Macros must be identical. This means all tokens and whitespace - // separation must be the same. C99 6.10.3.2. - if (!OtherMI->isAllowRedefinitionsWithoutWarning() && - !MI->isIdenticalTo(*OtherMI, *this)) { + // separation must be the same. C99 6.10.3p2. + else if (!OtherMI->isAllowRedefinitionsWithoutWarning() && + !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) { Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef) << MacroNameTok.getIdentifierInfo(); Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition); @@ -1896,7 +1961,8 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc()); } - setMacroInfo(MacroNameTok.getIdentifierInfo(), MI); + DefMacroDirective *MD = + appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI); assert(!MI->isUsed()); // If we need warning for not using the macro, add its location in the @@ -1910,7 +1976,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // If the callbacks want to know, tell them about the macro definition. if (Callbacks) - Callbacks->MacroDefined(MacroNameTok, MI); + Callbacks->MacroDefined(MacroNameTok, MD); } /// HandleUndefDirective - Implements \#undef. @@ -1929,7 +1995,13 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) { CheckEndOfDirective("undef"); // Okay, we finally have a valid identifier to undef. - MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); + MacroDirective *MD = getMacroDirective(MacroNameTok.getIdentifierInfo()); + const MacroInfo *MI = MD ? MD->getMacroInfo() : 0; + + // If the callbacks want to know, tell them about the macro #undef. + // Note: no matter if the macro was defined or not. + if (Callbacks) + Callbacks->MacroUndefined(MacroNameTok, MD); // If the macro is not defined, this is a noop undef, just return. if (MI == 0) return; @@ -1937,27 +2009,11 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) { if (!MI->isUsed() && MI->isWarnIfUnused()) Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used); - // If the callbacks want to know, tell them about the macro #undef. - if (Callbacks) - Callbacks->MacroUndefined(MacroNameTok, MI); - if (MI->isWarnIfUnused()) WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); - UndefineMacro(MacroNameTok.getIdentifierInfo(), MI, - MacroNameTok.getLocation()); -} - -void Preprocessor::UndefineMacro(IdentifierInfo *II, MacroInfo *MI, - SourceLocation UndefLoc) { - MI->setUndefLoc(UndefLoc); - if (MI->isFromAST()) { - MI->setChangedAfterLoad(); - if (Listener) - Listener->UndefinedMacro(MI); - } - - clearMacroInfo(II); + appendMacroDirective(MacroNameTok.getIdentifierInfo(), + AllocateUndefMacroDirective(MacroNameTok.getLocation())); } @@ -1991,7 +2047,8 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef"); IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); - MacroInfo *MI = getMacroInfo(MII); + MacroDirective *MD = getMacroDirective(MII); + MacroInfo *MI = MD ? MD->getMacroInfo() : 0; if (CurPPLexer->getConditionalStackDepth() == 0) { // If the start of a top-level #ifdef and if the macro is not defined, @@ -2011,9 +2068,9 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, if (Callbacks) { if (isIfndef) - Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok); + Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD); else - Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok); + Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD); } // Should we include the stuff contained by this directive? diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp index d5a88db..d9ce8bf 100644 --- a/lib/Lex/PPExpressions.cpp +++ b/lib/Lex/PPExpressions.cpp @@ -17,13 +17,14 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/Preprocessor.h" -#include "clang/Lex/MacroInfo.h" -#include "clang/Lex/LiteralSupport.h" -#include "clang/Lex/CodeCompletionHandler.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MacroInfo.h" #include "llvm/ADT/APSInt.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; namespace { @@ -111,15 +112,21 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Result.Val = II->hasMacroDefinition(); Result.Val.setIsUnsigned(false); // Result is signed intmax_t. + MacroDirective *Macro = 0; // If there is a macro, mark it used. if (Result.Val != 0 && ValueLive) { - MacroInfo *Macro = PP.getMacroInfo(II); - PP.markMacroAsUsed(Macro); + Macro = PP.getMacroDirective(II); + PP.markMacroAsUsed(Macro->getMacroInfo()); } // Invoke the 'defined' callback. - if (PPCallbacks *Callbacks = PP.getPPCallbacks()) - Callbacks->Defined(PeekTok); + if (PPCallbacks *Callbacks = PP.getPPCallbacks()) { + MacroDirective *MD = Macro; + // Pass the MacroInfo for the macro name even if the value is dead. + if (!MD && Result.Val != 0) + MD = PP.getMacroDirective(II); + Callbacks->Defined(PeekTok, MD); + } // If we are in parens, ensure we have a trailing ). if (LParenLoc.isValid()) { @@ -224,7 +231,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, if (!PP.getLangOpts().C99 && Literal.isLongLong) { if (PP.getLangOpts().CPlusPlus) PP.Diag(PeekTok, - PP.getLangOpts().CPlusPlus0x ? + PP.getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else PP.Diag(PeekTok, diag::ext_c99_longlong); @@ -258,9 +265,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, return false; } case tok::char_constant: // 'x' - case tok::wide_char_constant: { // L'x' + case tok::wide_char_constant: // L'x' case tok::utf16_char_constant: // u'x' - case tok::utf32_char_constant: // U'x' + case tok::utf32_char_constant: { // U'x' // Complain about, and drop, any ud-suffix. if (PeekTok.hasUDSuffix()) PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0; @@ -724,6 +731,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, /// to "!defined(X)" return X in IfNDefMacro. bool Preprocessor:: EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { + SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true); // Save the current state of 'DisableMacroExpansion' and reset it to false. If // 'DisableMacroExpansion' is true, then we must be in a macro argument list // in which case a directive is undefined behavior. We want macros to be able diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index d827f58..be4defe 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/Preprocessor.h" -#include "clang/Lex/HeaderSearch.h" -#include "clang/Lex/MacroInfo.h" -#include "clang/Lex/LexDiagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/MacroInfo.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PathV2.h" -#include "llvm/ADT/StringSwitch.h" using namespace clang; PPCallbacks::~PPCallbacks() {} @@ -158,15 +158,17 @@ void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL, /// tokens from it instead of the current buffer. void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, MacroArgs *Args) { - PushIncludeMacroStack(); - CurDirLookup = 0; - + TokenLexer *TokLexer; if (NumCachedTokenLexers == 0) { - CurTokenLexer.reset(new TokenLexer(Tok, ILEnd, Macro, Args, *this)); + TokLexer = new TokenLexer(Tok, ILEnd, Macro, Args, *this); } else { - CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]); - CurTokenLexer->Init(Tok, ILEnd, Macro, Args); + TokLexer = TokenLexerCache[--NumCachedTokenLexers]; + TokLexer->Init(Tok, ILEnd, Macro, Args); } + + PushIncludeMacroStack(); + CurDirLookup = 0; + CurTokenLexer.reset(TokLexer); if (CurLexerKind != CLK_LexAfterModuleImport) CurLexerKind = CLK_TokenLexer; } @@ -186,18 +188,20 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens) { - // Save our current state. - PushIncludeMacroStack(); - CurDirLookup = 0; - // Create a macro expander to expand from the specified token stream. + TokenLexer *TokLexer; if (NumCachedTokenLexers == 0) { - CurTokenLexer.reset(new TokenLexer(Toks, NumToks, DisableMacroExpansion, - OwnsTokens, *this)); + TokLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion, + OwnsTokens, *this); } else { - CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]); - CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens); + TokLexer = TokenLexerCache[--NumCachedTokenLexers]; + TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens); } + + // Save our current state. + PushIncludeMacroStack(); + CurDirLookup = 0; + CurTokenLexer.reset(TokLexer); if (CurLexerKind != CLK_LexAfterModuleImport) CurLexerKind = CLK_TokenLexer; } @@ -328,6 +332,17 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { CurLexer->BufferPtr = EndPos; CurLexer->FormTokenWithChars(Result, EndPos, tok::eof); + if (isCodeCompletionEnabled()) { + // Inserting the code-completion point increases the source buffer by 1, + // but the main FileID was created before inserting the point. + // Compensate by reducing the EOF location by 1, otherwise the location + // will point to the next FileID. + // FIXME: This is hacky, the code-completion point should probably be + // inserted before the main FileID is created. + if (CurLexer->getFileLoc() == CodeCompletionFileLoc) + Result.setLocation(Result.getLocation().getLocWithOffset(-1)); + } + if (!isIncrementalProcessingEnabled()) // We're done with lexing. CurLexer.reset(); @@ -380,7 +395,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { SmallString<128> RelativePath; computeRelativePath(FileMgr, Dir, Header, RelativePath); Diag(StartLoc, diag::warn_uncovered_module_header) - << RelativePath; + << Mod->getFullModuleName() << RelativePath; } } } diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index eee4342..21451f5 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -14,25 +14,26 @@ #include "clang/Lex/Preprocessor.h" #include "MacroArgs.h" -#include "clang/Lex/MacroInfo.h" -#include "clang/Basic/SourceManager.h" #include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/ExternalPreprocessorSource.h" -#include "clang/Lex/LiteralSupport.h" -#include "llvm/ADT/StringSwitch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/MacroInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" #include <cstdio> #include <ctime> using namespace clang; -MacroInfo *Preprocessor::getMacroInfoHistory(IdentifierInfo *II) const { +MacroDirective * +Preprocessor::getMacroDirectiveHistory(const IdentifierInfo *II) const { assert(II->hadMacroDefinition() && "Identifier has not been not a macro!"); macro_iterator Pos = Macros.find(II); @@ -40,125 +41,31 @@ MacroInfo *Preprocessor::getMacroInfoHistory(IdentifierInfo *II) const { return Pos->second; } -/// setMacroInfo - Specify a macro for this identifier. -/// -void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) { - assert(MI && "MacroInfo should be non-zero!"); - assert(MI->getUndefLoc().isInvalid() && - "Undefined macros cannot be registered"); - - MacroInfo *&StoredMI = Macros[II]; - MI->setPreviousDefinition(StoredMI); - StoredMI = MI; - II->setHasMacroDefinition(MI->getUndefLoc().isInvalid()); - if (II->isFromAST()) - II->setChangedSinceDeserialization(); -} - -void Preprocessor::addLoadedMacroInfo(IdentifierInfo *II, MacroInfo *MI, - MacroInfo *Hint) { - assert(MI && "Missing macro?"); - assert(MI->isFromAST() && "Macro is not from an AST?"); - assert(!MI->getPreviousDefinition() && "Macro already in chain?"); - - MacroInfo *&StoredMI = Macros[II]; - - // Easy case: this is the first macro definition for this macro. - if (!StoredMI) { - StoredMI = MI; - - if (MI->isDefined()) - II->setHasMacroDefinition(true); - return; - } - - // If this macro is a definition and this identifier has been neither - // defined nor undef'd in the current translation unit, add this macro - // to the end of the chain of definitions. - if (MI->isDefined() && StoredMI->isFromAST()) { - // Simple case: if this is the first actual definition, just put it at - // th beginning. - if (!StoredMI->isDefined()) { - MI->setPreviousDefinition(StoredMI); - StoredMI = MI; - - II->setHasMacroDefinition(true); - return; - } - - // Find the end of the definition chain. - MacroInfo *Prev; - MacroInfo *PrevPrev = StoredMI; - bool Ambiguous = StoredMI->isAmbiguous(); - bool MatchedOther = false; - do { - Prev = PrevPrev; - - // If the macros are not identical, we have an ambiguity. - if (!Prev->isIdenticalTo(*MI, *this)) { - if (!Ambiguous) { - Ambiguous = true; - StoredMI->setAmbiguous(true); - } - } else { - MatchedOther = true; - } - } while ((PrevPrev = Prev->getPreviousDefinition()) && - PrevPrev->isDefined()); - - // If there are ambiguous definitions, and we didn't match any other - // definition, then mark us as ambiguous. - if (Ambiguous && !MatchedOther) - MI->setAmbiguous(true); - - // Wire this macro information into the chain. - MI->setPreviousDefinition(Prev->getPreviousDefinition()); - Prev->setPreviousDefinition(MI); - return; - } - - // The macro is not a definition; put it at the end of the list. - MacroInfo *Prev = Hint? Hint : StoredMI; - while (Prev->getPreviousDefinition()) - Prev = Prev->getPreviousDefinition(); - Prev->setPreviousDefinition(MI); -} - -void Preprocessor::makeLoadedMacroInfoVisible(IdentifierInfo *II, - MacroInfo *MI) { - assert(MI->isFromAST() && "Macro must be from the AST"); - - MacroInfo *&StoredMI = Macros[II]; - if (StoredMI == MI) { - // Easy case: this is the first macro anyway. - II->setHasMacroDefinition(MI->isDefined()); - return; - } - - // Go find the macro and pull it out of the list. - // FIXME: Yes, this is O(N), and making a pile of macros visible or hidden - // would be quadratic, but it's extremely rare. - MacroInfo *Prev = StoredMI; - while (Prev->getPreviousDefinition() != MI) - Prev = Prev->getPreviousDefinition(); - Prev->setPreviousDefinition(MI->getPreviousDefinition()); - MI->setPreviousDefinition(0); - - // Add the macro back to the list. - addLoadedMacroInfo(II, MI); - - II->setHasMacroDefinition(StoredMI->isDefined()); - if (II->isFromAST()) +void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){ + assert(MD && "MacroDirective should be non-zero!"); + assert(!MD->getPrevious() && "Already attached to a MacroDirective history."); + + MacroDirective *&StoredMD = Macros[II]; + MD->setPrevious(StoredMD); + StoredMD = MD; + II->setHasMacroDefinition(MD->isDefined()); + bool isImportedMacro = isa<DefMacroDirective>(MD) && + cast<DefMacroDirective>(MD)->isImported(); + if (II->isFromAST() && !isImportedMacro) II->setChangedSinceDeserialization(); } -/// \brief Undefine a macro for this identifier. -void Preprocessor::clearMacroInfo(IdentifierInfo *II) { - assert(II->hasMacroDefinition() && "Macro is not defined!"); - assert(Macros[II]->getUndefLoc().isValid() && "Macro is still defined!"); - II->setHasMacroDefinition(false); - if (II->isFromAST()) - II->setChangedSinceDeserialization(); +void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II, + MacroDirective *MD) { + assert(II && MD); + MacroDirective *&StoredMD = Macros[II]; + assert(!StoredMD && + "the macro history was modified before initializing it from a pch"); + StoredMD = MD; + // Setup the identifier as having associated macro history. + II->setHasMacroDefinition(true); + if (!MD->isDefined()) + II->setHasMacroDefinition(false); } /// RegisterBuiltinMacro - Register the specified identifier in the identifier @@ -170,7 +77,7 @@ static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){ // Mark it as being a macro that is builtin. MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation()); MI->setIsBuiltinMacro(); - PP.setMacroInfo(Id, MI); + PP.appendDefMacroDirective(Id, MI); return Id; } @@ -303,7 +210,11 @@ bool Preprocessor::isNextPPTokenLParen() { /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be /// expanded as a macro, handle it and return the next token as 'Identifier'. bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, - MacroInfo *MI) { + MacroDirective *MD) { + MacroDirective::DefInfo Def = MD->getDefinition(); + assert(Def.isValid()); + MacroInfo *MI = Def.getMacroInfo(); + // If this is a macro expansion in the "#if !defined(x)" line for the file, // then the macro could expand to different things in other contexts, we need // to disable the optimization in this case. @@ -311,7 +222,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. if (MI->isBuiltinMacro()) { - if (Callbacks) Callbacks->MacroExpands(Identifier, MI, + if (Callbacks) Callbacks->MacroExpands(Identifier, MD, Identifier.getLocation()); ExpandBuiltinMacro(Identifier); return false; @@ -364,13 +275,13 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // MacroExpands callbacks still happen in source order, queue this // callback to have it happen after the function macro callback. DelayedMacroExpandsCallbacks.push_back( - MacroExpandsInfo(Identifier, MI, ExpansionRange)); + MacroExpandsInfo(Identifier, MD, ExpansionRange)); } else { - Callbacks->MacroExpands(Identifier, MI, ExpansionRange); + Callbacks->MacroExpands(Identifier, MD, ExpansionRange); if (!DelayedMacroExpandsCallbacks.empty()) { for (unsigned i=0, e = DelayedMacroExpandsCallbacks.size(); i!=e; ++i) { MacroExpandsInfo &Info = DelayedMacroExpandsCallbacks[i]; - Callbacks->MacroExpands(Info.Tok, Info.MI, Info.Range); + Callbacks->MacroExpands(Info.Tok, Info.MD, Info.Range); } DelayedMacroExpandsCallbacks.clear(); } @@ -378,16 +289,17 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, } // If the macro definition is ambiguous, complain. - if (MI->isAmbiguous()) { + if (Def.getDirective()->isAmbiguous()) { Diag(Identifier, diag::warn_pp_ambiguous_macro) << Identifier.getIdentifierInfo(); Diag(MI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_chosen) << Identifier.getIdentifierInfo(); - for (MacroInfo *PrevMI = MI->getPreviousDefinition(); - PrevMI && PrevMI->isDefined(); - PrevMI = PrevMI->getPreviousDefinition()) { - if (PrevMI->isAmbiguous()) { - Diag(PrevMI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_other) + for (MacroDirective::DefInfo PrevDef = Def.getPreviousDefinition(); + PrevDef && !PrevDef.isUndefined(); + PrevDef = PrevDef.getPreviousDefinition()) { + if (PrevDef.getDirective()->isAmbiguous()) { + Diag(PrevDef.getMacroInfo()->getDefinitionLoc(), + diag::note_pp_ambiguous_macro_other) << Identifier.getIdentifierInfo(); } } @@ -455,7 +367,10 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, if (MacroInfo *NewMI = getMacroInfo(NewII)) if (!NewMI->isEnabled() || NewMI == MI) { Identifier.setFlag(Token::DisableExpand); - Diag(Identifier, diag::pp_disabled_macro_expansion); + // Don't warn for "#define X X" like "#define bool bool" from + // stdbool.h. + if (NewMI != MI || MI->isFunctionLike()) + Diag(Identifier, diag::pp_disabled_macro_expansion); } } @@ -497,9 +412,13 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // argument is separated by an EOF token. Use a SmallVector so we can avoid // heap allocations in the common case. SmallVector<Token, 64> ArgTokens; + bool ContainsCodeCompletionTok = false; unsigned NumActuals = 0; while (Tok.isNot(tok::r_paren)) { + if (ContainsCodeCompletionTok && (Tok.is(tok::eof) || Tok.is(tok::eod))) + break; + assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) && "only expect argument separators here"); @@ -516,10 +435,20 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, LexUnexpandedToken(Tok); if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n" - Diag(MacroName, diag::err_unterm_macro_invoc); - // Do not lose the EOF/EOD. Return it to the client. - MacroName = Tok; - return 0; + if (!ContainsCodeCompletionTok) { + Diag(MacroName, diag::err_unterm_macro_invoc); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << MacroName.getIdentifierInfo(); + // Do not lose the EOF/EOD. Return it to the client. + MacroName = Tok; + return 0; + } else { + // Do not lose the EOF/EOD. + Token *Toks = new Token[1]; + Toks[0] = Tok; + EnterTokenStream(Toks, 1, true, true); + break; + } } else if (Tok.is(tok::r_paren)) { // If we found the ) token, the macro arg list is done. if (NumParens-- == 0) { @@ -550,6 +479,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, if (!MI->isEnabled()) Tok.setFlag(Token::DisableExpand); } else if (Tok.is(tok::code_completion)) { + ContainsCodeCompletionTok = true; if (CodeComplete) CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(), MI, NumActuals); @@ -572,16 +502,20 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, if (ArgTokens.size() != ArgTokenStart) ArgStartLoc = ArgTokens[ArgTokenStart].getLocation(); - // Emit the diagnostic at the macro name in case there is a missing ). - // Emitting it at the , could be far away from the macro name. - Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc); - return 0; + if (!ContainsCodeCompletionTok) { + // Emit the diagnostic at the macro name in case there is a missing ). + // Emitting it at the , could be far away from the macro name. + Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << MacroName.getIdentifierInfo(); + return 0; + } } // Empty arguments are standard in C99 and C++0x, and are supported as an extension in // other modes. if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99) - Diag(Tok, LangOpts.CPlusPlus0x ? + Diag(Tok, LangOpts.CPlusPlus11 ? diag::warn_cxx98_compat_empty_fnmacro_arg : diag::ext_empty_fnmacro_arg); @@ -593,8 +527,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, EOFTok.setLength(0); ArgTokens.push_back(EOFTok); ++NumActuals; - assert(NumFixedArgsLeft != 0 && "Too many arguments parsed"); - --NumFixedArgsLeft; + if (!ContainsCodeCompletionTok || NumFixedArgsLeft != 0) { + assert(NumFixedArgsLeft != 0 && "Too many arguments parsed"); + --NumFixedArgsLeft; + } } // Okay, we either found the r_paren. Check to see if we parsed too few @@ -604,6 +540,17 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // See MacroArgs instance var for description of this. bool isVarargsElided = false; + if (ContainsCodeCompletionTok) { + // Recover from not-fully-formed macro invocation during code-completion. + Token EOFTok; + EOFTok.startToken(); + EOFTok.setKind(tok::eof); + EOFTok.setLocation(Tok.getLocation()); + EOFTok.setLength(0); + for (; NumActuals < MinArgsExpected; ++NumActuals) + ArgTokens.push_back(EOFTok); + } + if (NumActuals < MinArgsExpected) { // There are several cases where too few arguments is ok, handle them now. if (NumActuals == 0 && MinArgsExpected == 1) { @@ -619,9 +566,14 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // Varargs where the named vararg parameter is missing: OK as extension. // #define A(x, ...) // A("blah") - Diag(Tok, diag::ext_missing_varargs_arg); - Diag(MI->getDefinitionLoc(), diag::note_macro_here) - << MacroName.getIdentifierInfo(); + // + // If the macro contains the comma pasting extension, the diagnostic + // is suppressed; we know we'll get another diagnostic later. + if (!MI->hasCommaPasting()) { + Diag(Tok, diag::ext_missing_varargs_arg); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << MacroName.getIdentifierInfo(); + } // Remember this occurred, allowing us to elide the comma when used for // cases like: @@ -630,9 +582,11 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // #define C(...) blah(a, ## __VA_ARGS__) // A(x) B(x) C() isVarargsElided = true; - } else { + } else if (!ContainsCodeCompletionTok) { // Otherwise, emit the error. Diag(Tok, diag::err_too_few_args_in_macro_invoc); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << MacroName.getIdentifierInfo(); return 0; } @@ -648,10 +602,13 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, if (NumActuals == 0 && MinArgsExpected == 2) ArgTokens.push_back(Tok); - } else if (NumActuals > MinArgsExpected && !MI->isVariadic()) { + } else if (NumActuals > MinArgsExpected && !MI->isVariadic() && + !ContainsCodeCompletionTok) { // Emit the diagnostic at the macro name in case there is a missing ). // Emitting it at the , could be far away from the macro name. Diag(MacroName, diag::err_too_many_args_in_macro_invoc); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << MacroName.getIdentifierInfo(); return 0; } @@ -745,7 +702,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { Feature = Feature.substr(2, Feature.size() - 4); return llvm::StringSwitch<bool>(Feature) - .Case("address_sanitizer", LangOpts.SanitizeAddress) + .Case("address_sanitizer", LangOpts.Sanitize.Address) .Case("attribute_analyzer_noreturn", true) .Case("attribute_availability", true) .Case("attribute_availability_with_message", true) @@ -767,6 +724,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("cxx_exceptions", LangOpts.Exceptions) .Case("cxx_rtti", LangOpts.RTTI) .Case("enumerator_attributes", true) + .Case("memory_sanitizer", LangOpts.Sanitize.Memory) + .Case("thread_sanitizer", LangOpts.Sanitize.Thread) // Objective-C features .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? .Case("objc_arc", LangOpts.ObjCAutoRefCount) @@ -776,6 +735,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("objc_instancetype", LangOpts.ObjC2) .Case("objc_modules", LangOpts.ObjC2 && LangOpts.Modules) .Case("objc_nonfragile_abi", LangOpts.ObjCRuntime.isNonFragile()) + .Case("objc_property_explicit_atomic", true) // Does clang support explicit "atomic" keyword? .Case("objc_weak_class", LangOpts.ObjCRuntime.hasWeakClassImport()) .Case("ownership_holds", true) .Case("ownership_returns", true) @@ -792,41 +752,41 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("c_generic_selections", LangOpts.C11) .Case("c_static_assert", LangOpts.C11) // C++11 features - .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus0x) - .Case("cxx_alias_templates", LangOpts.CPlusPlus0x) - .Case("cxx_alignas", LangOpts.CPlusPlus0x) - .Case("cxx_atomic", LangOpts.CPlusPlus0x) - .Case("cxx_attributes", LangOpts.CPlusPlus0x) - .Case("cxx_auto_type", LangOpts.CPlusPlus0x) - .Case("cxx_constexpr", LangOpts.CPlusPlus0x) - .Case("cxx_decltype", LangOpts.CPlusPlus0x) - .Case("cxx_decltype_incomplete_return_types", LangOpts.CPlusPlus0x) - .Case("cxx_default_function_template_args", LangOpts.CPlusPlus0x) - .Case("cxx_defaulted_functions", LangOpts.CPlusPlus0x) - .Case("cxx_delegating_constructors", LangOpts.CPlusPlus0x) - .Case("cxx_deleted_functions", LangOpts.CPlusPlus0x) - .Case("cxx_explicit_conversions", LangOpts.CPlusPlus0x) - .Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x) - .Case("cxx_implicit_moves", LangOpts.CPlusPlus0x) + .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus11) + .Case("cxx_alias_templates", LangOpts.CPlusPlus11) + .Case("cxx_alignas", LangOpts.CPlusPlus11) + .Case("cxx_atomic", LangOpts.CPlusPlus11) + .Case("cxx_attributes", LangOpts.CPlusPlus11) + .Case("cxx_auto_type", LangOpts.CPlusPlus11) + .Case("cxx_constexpr", LangOpts.CPlusPlus11) + .Case("cxx_decltype", LangOpts.CPlusPlus11) + .Case("cxx_decltype_incomplete_return_types", LangOpts.CPlusPlus11) + .Case("cxx_default_function_template_args", LangOpts.CPlusPlus11) + .Case("cxx_defaulted_functions", LangOpts.CPlusPlus11) + .Case("cxx_delegating_constructors", LangOpts.CPlusPlus11) + .Case("cxx_deleted_functions", LangOpts.CPlusPlus11) + .Case("cxx_explicit_conversions", LangOpts.CPlusPlus11) + .Case("cxx_generalized_initializers", LangOpts.CPlusPlus11) + .Case("cxx_implicit_moves", LangOpts.CPlusPlus11) //.Case("cxx_inheriting_constructors", false) - .Case("cxx_inline_namespaces", LangOpts.CPlusPlus0x) - .Case("cxx_lambdas", LangOpts.CPlusPlus0x) - .Case("cxx_local_type_template_args", LangOpts.CPlusPlus0x) - .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus0x) - .Case("cxx_noexcept", LangOpts.CPlusPlus0x) - .Case("cxx_nullptr", LangOpts.CPlusPlus0x) - .Case("cxx_override_control", LangOpts.CPlusPlus0x) - .Case("cxx_range_for", LangOpts.CPlusPlus0x) - .Case("cxx_raw_string_literals", LangOpts.CPlusPlus0x) - .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus0x) - .Case("cxx_rvalue_references", LangOpts.CPlusPlus0x) - .Case("cxx_strong_enums", LangOpts.CPlusPlus0x) - .Case("cxx_static_assert", LangOpts.CPlusPlus0x) - .Case("cxx_trailing_return", LangOpts.CPlusPlus0x) - .Case("cxx_unicode_literals", LangOpts.CPlusPlus0x) - .Case("cxx_unrestricted_unions", LangOpts.CPlusPlus0x) - .Case("cxx_user_literals", LangOpts.CPlusPlus0x) - .Case("cxx_variadic_templates", LangOpts.CPlusPlus0x) + .Case("cxx_inline_namespaces", LangOpts.CPlusPlus11) + .Case("cxx_lambdas", LangOpts.CPlusPlus11) + .Case("cxx_local_type_template_args", LangOpts.CPlusPlus11) + .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus11) + .Case("cxx_noexcept", LangOpts.CPlusPlus11) + .Case("cxx_nullptr", LangOpts.CPlusPlus11) + .Case("cxx_override_control", LangOpts.CPlusPlus11) + .Case("cxx_range_for", LangOpts.CPlusPlus11) + .Case("cxx_raw_string_literals", LangOpts.CPlusPlus11) + .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus11) + .Case("cxx_rvalue_references", LangOpts.CPlusPlus11) + .Case("cxx_strong_enums", LangOpts.CPlusPlus11) + .Case("cxx_static_assert", LangOpts.CPlusPlus11) + .Case("cxx_trailing_return", LangOpts.CPlusPlus11) + .Case("cxx_unicode_literals", LangOpts.CPlusPlus11) + .Case("cxx_unrestricted_unions", LangOpts.CPlusPlus11) + .Case("cxx_user_literals", LangOpts.CPlusPlus11) + .Case("cxx_variadic_templates", LangOpts.CPlusPlus11) // Type traits .Case("has_nothrow_assign", LangOpts.CPlusPlus) .Case("has_nothrow_copy", LangOpts.CPlusPlus) @@ -840,10 +800,6 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("is_base_of", LangOpts.CPlusPlus) .Case("is_class", LangOpts.CPlusPlus) .Case("is_convertible_to", LangOpts.CPlusPlus) - // __is_empty is available only if the horrible - // "struct __is_empty" parsing hack hasn't been needed in this - // translation unit. If it has, __is_empty reverts to a normal - // identifier and __has_feature(is_empty) evaluates false. .Case("is_empty", LangOpts.CPlusPlus) .Case("is_enum", LangOpts.CPlusPlus) .Case("is_final", LangOpts.CPlusPlus) @@ -926,9 +882,15 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, Preprocessor &PP, const DirectoryLookup *LookupFrom) { // Save the location of the current token. If a '(' is later found, use - // that location. If no, use the end of this location instead. + // that location. If not, use the end of this location instead. SourceLocation LParenLoc = Tok.getLocation(); + // These expressions are only allowed within a preprocessor directive. + if (!PP.isParsingIfOrElifDirective()) { + PP.Diag(LParenLoc, diag::err_pp_directive_required) << II->getName(); + return false; + } + // Get '('. PP.LexNonComment(Tok); @@ -946,8 +908,14 @@ static bool EvaluateHasIncludeCommon(Token &Tok, // Save '(' location for possible missing ')' message. LParenLoc = Tok.getLocation(); - // Get the file name. - PP.getCurrentLexer()->LexIncludeFilename(Tok); + if (PP.getCurrentLexer()) { + // Get the file name. + PP.getCurrentLexer()->LexIncludeFilename(Tok); + } else { + // We're in a macro, so we can't use LexIncludeFilename; just + // grab the next token. + PP.Lex(Tok); + } } // Reserve a buffer to get the spelling. @@ -1223,15 +1191,15 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { IdentifierInfo *FeatureII = 0; // Read the '('. - Lex(Tok); + LexUnexpandedToken(Tok); if (Tok.is(tok::l_paren)) { // Read the identifier - Lex(Tok); + LexUnexpandedToken(Tok); if (Tok.is(tok::identifier) || Tok.is(tok::kw_const)) { FeatureII = Tok.getIdentifierInfo(); // Read the ')'. - Lex(Tok); + LexUnexpandedToken(Tok); if (Tok.is(tok::r_paren)) IsValid = true; } @@ -1275,69 +1243,49 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { bool IsValid = false; bool Value = false; // Read the '('. - Lex(Tok); + LexUnexpandedToken(Tok); do { - if (Tok.is(tok::l_paren)) { - // Read the string. - Lex(Tok); - - // We need at least one string literal. - if (!Tok.is(tok::string_literal)) { - StartLoc = Tok.getLocation(); - IsValid = false; - // Eat tokens until ')'. - do Lex(Tok); while (!(Tok.is(tok::r_paren) || Tok.is(tok::eod))); - break; - } - - // String concatenation allows multiple strings, which can even come - // from macro expansion. - SmallVector<Token, 4> StrToks; - while (Tok.is(tok::string_literal)) { - // Complain about, and drop, any ud-suffix. - if (Tok.hasUDSuffix()) - Diag(Tok, diag::err_invalid_string_udl); - StrToks.push_back(Tok); + if (Tok.isNot(tok::l_paren)) { + Diag(StartLoc, diag::err_warning_check_malformed); + break; + } + + LexUnexpandedToken(Tok); + std::string WarningName; + SourceLocation StrStartLoc = Tok.getLocation(); + if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'", + /*MacroExpansion=*/false)) { + // Eat tokens until ')'. + while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eod) && + Tok.isNot(tok::eof)) LexUnexpandedToken(Tok); - } - - // Is the end a ')'? - if (!(IsValid = Tok.is(tok::r_paren))) - break; - - // Concatenate and parse the strings. - StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); - assert(Literal.isAscii() && "Didn't allow wide strings in"); - if (Literal.hadError) - break; - if (Literal.Pascal) { - Diag(Tok, diag::warn_pragma_diagnostic_invalid); - break; - } - - StringRef WarningName(Literal.GetString()); - - if (WarningName.size() < 3 || WarningName[0] != '-' || - WarningName[1] != 'W') { - Diag(StrToks[0].getLocation(), diag::warn_has_warning_invalid_option); - break; - } - - // Finally, check if the warning flags maps to a diagnostic group. - // We construct a SmallVector here to talk to getDiagnosticIDs(). - // Although we don't use the result, this isn't a hot path, and not - // worth special casing. - llvm::SmallVector<diag::kind, 10> Diags; - Value = !getDiagnostics().getDiagnosticIDs()-> - getDiagnosticsInGroup(WarningName.substr(2), Diags); + break; + } + + // Is the end a ')'? + if (!(IsValid = Tok.is(tok::r_paren))) { + Diag(StartLoc, diag::err_warning_check_malformed); + break; + } + + if (WarningName.size() < 3 || WarningName[0] != '-' || + WarningName[1] != 'W') { + Diag(StrStartLoc, diag::warn_has_warning_invalid_option); + break; } + + // Finally, check if the warning flags maps to a diagnostic group. + // We construct a SmallVector here to talk to getDiagnosticIDs(). + // Although we don't use the result, this isn't a hot path, and not + // worth special casing. + SmallVector<diag::kind, 10> Diags; + Value = !getDiagnostics().getDiagnosticIDs()-> + getDiagnosticsInGroup(WarningName.substr(2), Diags); } while (false); - - if (!IsValid) - Diag(StartLoc, diag::err_warning_check_malformed); OS << (int)Value; - Tok.setKind(tok::numeric_constant); + if (IsValid) + Tok.setKind(tok::numeric_constant); } else if (II == Ident__building_module) { // The argument to this builtin should be an identifier. The // builtin evaluates to 1 when that identifier names the module we are diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index b167172..e8f43f7 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -11,17 +11,16 @@ // //===----------------------------------------------------------------------===// -#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/PTHLexer.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/FileSystemStatCache.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/OnDiskHashTable.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/LexDiagnostic.h" -#include "clang/Lex/PTHLexer.h" -#include "clang/Lex/Preprocessor.h" #include "clang/Lex/PTHManager.h" -#include "clang/Lex/Token.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" @@ -679,13 +678,13 @@ public: ~PTHStatCache() {} LookupResult getStat(const char *Path, struct stat &StatBuf, - int *FileDescriptor) { + bool isFile, int *FileDescriptor) { // Do the lookup for the file's data in the PTH file. CacheTy::iterator I = Cache.find(Path); // If we don't get a hit in the PTH file just forward to 'stat'. if (I == Cache.end()) - return statChained(Path, StatBuf, FileDescriptor); + return statChained(Path, StatBuf, isFile, FileDescriptor); const PTHStatData &Data = *I; diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index e7e6c37..95e8a8c 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -13,13 +13,13 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/Pragma.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/LiteralSupport.h" -#include "clang/Lex/Preprocessor.h" #include "clang/Lex/MacroInfo.h" -#include "clang/Lex/LexDiagnostic.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> @@ -184,7 +184,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // Read the '"..."'. Lex(Tok); - if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) { + if (!tok::isStringLiteral(Tok.getKind())) { Diag(PragmaLoc, diag::err__Pragma_malformed); // Skip this token, and the ')', if present. if (Tok.isNot(tok::r_paren)) @@ -219,15 +219,50 @@ void Preprocessor::Handle_Pragma(Token &Tok) { SourceLocation RParenLoc = Tok.getLocation(); std::string StrVal = getSpelling(StrTok); - // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1: - // "The string literal is destringized by deleting the L prefix, if present, + // The _Pragma is lexically sound. Destringize according to C11 6.10.9.1: + // "The string literal is destringized by deleting any encoding prefix, // deleting the leading and trailing double-quotes, replacing each escape // sequence \" by a double-quote, and replacing each escape sequence \\ by a // single backslash." - if (StrVal[0] == 'L') // Remove L prefix. + if (StrVal[0] == 'L' || StrVal[0] == 'U' || + (StrVal[0] == 'u' && StrVal[1] != '8')) StrVal.erase(StrVal.begin()); - assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && - "Invalid string token!"); + else if (StrVal[0] == 'u') + StrVal.erase(StrVal.begin(), StrVal.begin() + 2); + + if (StrVal[0] == 'R') { + // FIXME: C++11 does not specify how to handle raw-string-literals here. + // We strip off the 'R', the quotes, the d-char-sequences, and the parens. + assert(StrVal[1] == '"' && StrVal[StrVal.size() - 1] == '"' && + "Invalid raw string token!"); + + // Measure the length of the d-char-sequence. + unsigned NumDChars = 0; + while (StrVal[2 + NumDChars] != '(') { + assert(NumDChars < (StrVal.size() - 5) / 2 && + "Invalid raw string token!"); + ++NumDChars; + } + assert(StrVal[StrVal.size() - 2 - NumDChars] == ')'); + + // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the + // parens below. + StrVal.erase(0, 2 + NumDChars); + StrVal.erase(StrVal.size() - 1 - NumDChars); + } else { + assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && + "Invalid string token!"); + + // Remove escaped quotes and escapes. + for (unsigned i = 1, e = StrVal.size(); i < e-2; ++i) { + if (StrVal[i] == '\\' && + (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) { + // \\ -> '\' and \" -> '"'. + StrVal.erase(StrVal.begin()+i); + --e; + } + } + } // Remove the front quote, replacing it with a space, so that the pragma // contents appear to have a space before them. @@ -236,16 +271,6 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // Replace the terminating quote with a \n. StrVal[StrVal.size()-1] = '\n'; - // Remove escaped quotes and escapes. - for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) { - if (StrVal[i] == '\\' && - (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) { - // \\ -> '\' and \" -> '"'. - StrVal.erase(StrVal.begin()+i); - --e; - } - } - // Plop the string (including the newline and trailing null) into a buffer // where we can lex it. Token TmpTok; @@ -470,7 +495,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { /// /// The syntax is: /// \code -/// \#pragma comment(linker, "foo") +/// #pragma comment(linker, "foo") /// \endcode /// 'linker' is one of five identifiers: compiler, exestr, lib, linker, user. /// "foo" is a string, which is fully macro expanded, and permits string @@ -502,38 +527,10 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { // Read the optional string if present. Lex(Tok); std::string ArgumentString; - if (Tok.is(tok::comma)) { - Lex(Tok); // eat the comma. - - // We need at least one string. - if (Tok.isNot(tok::string_literal)) { - Diag(Tok.getLocation(), diag::err_pragma_comment_malformed); - return; - } - - // String concatenation allows multiple strings, which can even come from - // macro expansion. - // "foo " "bar" "Baz" - SmallVector<Token, 4> StrToks; - while (Tok.is(tok::string_literal)) { - if (Tok.hasUDSuffix()) - Diag(Tok, diag::err_invalid_string_udl); - StrToks.push_back(Tok); - Lex(Tok); - } - - // Concatenate and parse the strings. - StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); - assert(Literal.isAscii() && "Didn't allow wide strings in"); - if (Literal.hadError) - return; - if (Literal.Pascal) { - Diag(StrToks[0].getLocation(), diag::err_pragma_comment_malformed); - return; - } - - ArgumentString = Literal.GetString(); - } + if (Tok.is(tok::comma) && !LexStringLiteral(Tok, ArgumentString, + "pragma comment", + /*MacroExpansion=*/true)) + return; // FIXME: If the kind is "compiler" warn if the string is present (it is // ignored). @@ -560,11 +557,11 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { /// HandlePragmaMessage - Handle the microsoft and gcc \#pragma message /// extension. The syntax is: /// \code -/// \#pragma message(string) +/// #pragma message(string) /// \endcode /// OR, in GCC mode: /// \code -/// \#pragma message string +/// #pragma message string /// \endcode /// string is a string, which is fully macro expanded, and permits string /// concatenation, embedded escape characters, etc... See MSDN for more details. @@ -587,34 +584,10 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) { return; } - // We need at least one string. - if (Tok.isNot(tok::string_literal)) { - Diag(Tok.getLocation(), diag::err_pragma_message_malformed); - return; - } - - // String concatenation allows multiple strings, which can even come from - // macro expansion. - // "foo " "bar" "Baz" - SmallVector<Token, 4> StrToks; - while (Tok.is(tok::string_literal)) { - if (Tok.hasUDSuffix()) - Diag(Tok, diag::err_invalid_string_udl); - StrToks.push_back(Tok); - Lex(Tok); - } - - // Concatenate and parse the strings. - StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); - assert(Literal.isAscii() && "Didn't allow wide strings in"); - if (Literal.hadError) - return; - if (Literal.Pascal) { - Diag(StrToks[0].getLocation(), diag::err_pragma_message_malformed); + std::string MessageString; + if (!FinishLexStringLiteral(Tok, MessageString, "pragma message", + /*MacroExpansion=*/true)) return; - } - - StringRef MessageString(Literal.GetString()); if (ExpectClosingParen) { if (Tok.isNot(tok::r_paren)) { @@ -692,7 +665,7 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) { /// /// The syntax is: /// \code -/// \#pragma push_macro("macro") +/// #pragma push_macro("macro") /// \endcode void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) { // Parse the pragma directive and get the macro IdentifierInfo*. @@ -702,17 +675,13 @@ void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) { // Get the MacroInfo associated with IdentInfo. MacroInfo *MI = getMacroInfo(IdentInfo); - MacroInfo *MacroCopyToPush = 0; if (MI) { - // Make a clone of MI. - MacroCopyToPush = CloneMacroInfo(*MI); - // Allow the original MacroInfo to be redefined later. MI->setIsAllowRedefinitionsWithoutWarning(true); } // Push the cloned MacroInfo so we can retrieve it later. - PragmaPushMacroInfo[IdentInfo].push_back(MacroCopyToPush); + PragmaPushMacroInfo[IdentInfo].push_back(MI); } /// \brief Handle \#pragma pop_macro. @@ -733,10 +702,11 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { PragmaPushMacroInfo.find(IdentInfo); if (iter != PragmaPushMacroInfo.end()) { // Forget the MacroInfo currently associated with IdentInfo. - if (MacroInfo *CurrentMI = getMacroInfo(IdentInfo)) { - if (CurrentMI->isWarnIfUnused()) - WarnUnusedMacroLocs.erase(CurrentMI->getDefinitionLoc()); - UndefineMacro(IdentInfo, CurrentMI, MessageLoc); + if (MacroDirective *CurrentMD = getMacroDirective(IdentInfo)) { + MacroInfo *MI = CurrentMD->getMacroInfo(); + if (MI->isWarnIfUnused()) + WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); + appendMacroDirective(IdentInfo, AllocateUndefMacroDirective(MessageLoc)); } // Get the MacroInfo we want to reinstall. @@ -744,9 +714,8 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { if (MacroToReInstall) { // Reinstall the previously pushed macro. - setMacroInfo(IdentInfo, MacroToReInstall); - } else if (IdentInfo->hasMacroDefinition()) { - clearMacroInfo(IdentInfo); + appendDefMacroDirective(IdentInfo, MacroToReInstall, MessageLoc, + /*isImported=*/false); } // Pop PragmaPushMacroInfo stack. @@ -1090,50 +1059,28 @@ public: } PP.LexUnexpandedToken(Tok); + SourceLocation StringLoc = Tok.getLocation(); - // We need at least one string. - if (Tok.isNot(tok::string_literal)) { - PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token); + std::string WarningName; + if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic", + /*MacroExpansion=*/false)) return; - } - - // String concatenation allows multiple strings, which can even come from - // macro expansion. - // "foo " "bar" "Baz" - SmallVector<Token, 4> StrToks; - while (Tok.is(tok::string_literal)) { - StrToks.push_back(Tok); - PP.LexUnexpandedToken(Tok); - } if (Tok.isNot(tok::eod)) { PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token); return; } - // Concatenate and parse the strings. - StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP); - assert(Literal.isAscii() && "Didn't allow wide strings in"); - if (Literal.hadError) - return; - if (Literal.Pascal) { - PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid); - return; - } - - StringRef WarningName(Literal.GetString()); - if (WarningName.size() < 3 || WarningName[0] != '-' || WarningName[1] != 'W') { - PP.Diag(StrToks[0].getLocation(), - diag::warn_pragma_diagnostic_invalid_option); + PP.Diag(StringLoc, diag::warn_pragma_diagnostic_invalid_option); return; } if (PP.getDiagnostics().setDiagnosticGroupMapping(WarningName.substr(2), Map, DiagLoc)) - PP.Diag(StrToks[0].getLocation(), - diag::warn_pragma_diagnostic_unknown_warning) << WarningName; + PP.Diag(StringLoc, diag::warn_pragma_diagnostic_unknown_warning) + << WarningName; else if (Callbacks) Callbacks->PragmaDiagnostic(DiagLoc, Namespace, Map, WarningName); } @@ -1277,6 +1224,29 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { } }; + /// \brief Handle "\#pragma region [...]" + /// + /// The syntax is + /// \code + /// #pragma region [optional name] + /// #pragma endregion [optional comment] + /// \endcode + /// + /// \note This is + /// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a> + /// pragma, just skipped by compiler. + struct PragmaRegionHandler : public PragmaHandler { + PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) { } + + virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &NameTok) { + // #pragma region: endregion matches can be verified + // __pragma(region): no sense, but ignored by msvc + // _Pragma is not valid for MSVC, but there isn't any point + // to handle a _Pragma differently. + } + }; + } // end anonymous namespace @@ -1310,5 +1280,7 @@ void Preprocessor::RegisterBuiltinPragmas() { if (LangOpts.MicrosoftExt) { AddPragmaHandler(new PragmaCommentHandler()); AddPragmaHandler(new PragmaIncludeAliasHandler()); + AddPragmaHandler(new PragmaRegionHandler("region")); + AddPragmaHandler(new PragmaRegionHandler("endregion")); } } diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp index 01f3665e..b10e7f7 100644 --- a/lib/Lex/PreprocessingRecord.cpp +++ b/lib/Lex/PreprocessingRecord.cpp @@ -14,8 +14,8 @@ #include "clang/Lex/PreprocessingRecord.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Token.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Capacity.h" +#include "llvm/Support/ErrorHandling.h" using namespace clang; @@ -38,14 +38,9 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, this->FileName = StringRef(Memory, FileName.size()); } -PreprocessingRecord::PreprocessingRecord(SourceManager &SM, - bool RecordConditionalDirectives) +PreprocessingRecord::PreprocessingRecord(SourceManager &SM) : SourceMgr(SM), - RecordCondDirectives(RecordConditionalDirectives), CondDirectiveNextIdx(0), - ExternalSource(0) -{ - if (RecordCondDirectives) - CondDirectiveStack.push_back(CondDirectiveNextIdx++); + ExternalSource(0) { } /// \brief Returns a pair of [Begin, End) iterators of preprocessed entities @@ -97,8 +92,10 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { int Pos = PPEI.Position; if (Pos < 0) { - assert(unsigned(-Pos-1) < LoadedPreprocessedEntities.size() && - "Out-of bounds loaded preprocessed entity"); + if (unsigned(-Pos-1) >= LoadedPreprocessedEntities.size()) { + assert(0 && "Out-of bounds loaded preprocessed entity"); + return false; + } assert(ExternalSource && "No external source to load from"); unsigned LoadedIndex = LoadedPreprocessedEntities.size()+Pos; if (PreprocessedEntity *PPE = LoadedPreprocessedEntities[LoadedIndex]) @@ -106,8 +103,8 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { // See if the external source can see if the entity is in the file without // deserializing it. - llvm::Optional<bool> - IsInFile = ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID); + Optional<bool> IsInFile = + ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID); if (IsInFile.hasValue()) return IsInFile.getValue(); @@ -118,8 +115,10 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { FID, SourceMgr); } - assert(unsigned(Pos) < PreprocessedEntities.size() && - "Out-of bounds local preprocessed entity"); + if (unsigned(Pos) >= PreprocessedEntities.size()) { + assert(0 && "Out-of bounds local preprocessed entity"); + return false; + } return isPreprocessedEntityIfInFileID(PreprocessedEntities[Pos], FID, SourceMgr); } @@ -249,11 +248,11 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { assert(Entity); SourceLocation BeginLoc = Entity->getSourceRange().getBegin(); - if (!isa<class InclusionDirective>(Entity)) { + if (isa<MacroDefinition>(Entity)) { assert((PreprocessedEntities.empty() || !SourceMgr.isBeforeInTranslationUnit(BeginLoc, PreprocessedEntities.back()->getSourceRange().getBegin())) && - "a macro directive was encountered out-of-order"); + "a macro definition was encountered out-of-order"); PreprocessedEntities.push_back(Entity); return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false); } @@ -268,7 +267,15 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { // The entity's location is not after the previous one; this can happen with // include directives that form the filename using macros, e.g: - // "#include MACRO(STUFF)". + // "#include MACRO(STUFF)" + // or with macro expansions inside macro arguments where the arguments are + // not expanded in the same order as listed, e.g: + // \code + // #define M1 1 + // #define M2 2 + // #define FM(x,y) y x + // FM(M1, M2) + // \endcode typedef std::vector<PreprocessedEntity *>::iterator pp_iter; @@ -313,8 +320,8 @@ unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) { } void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro, - PPEntityID PPID) { - MacroDefinitions[Macro] = PPID; + MacroDefinition *Def) { + MacroDefinitions[Macro] = Def; } /// \brief Retrieve the preprocessed entity at the given ID. @@ -351,19 +358,17 @@ PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) { } MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) { - llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos + llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos = MacroDefinitions.find(MI); if (Pos == MacroDefinitions.end()) return 0; - - PreprocessedEntity *Entity = getPreprocessedEntity(Pos->second); - if (Entity->isInvalid()) - return 0; - return cast<MacroDefinition>(Entity); + + return Pos->second; } -void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI, - SourceRange Range) { +void PreprocessingRecord::addMacroExpansion(const Token &Id, + const MacroInfo *MI, + SourceRange Range) { // We don't record nested macro expansions. if (Id.getLocation().isMacroID()) return; @@ -376,17 +381,50 @@ void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI, new (*this) MacroExpansion(Def, Range)); } +void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDirective *MD) { + // This is not actually a macro expansion but record it as a macro reference. + if (MD) + addMacroExpansion(MacroNameTok, MD->getMacroInfo(), + MacroNameTok.getLocation()); +} + +void PreprocessingRecord::Ifndef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDirective *MD) { + // This is not actually a macro expansion but record it as a macro reference. + if (MD) + addMacroExpansion(MacroNameTok, MD->getMacroInfo(), + MacroNameTok.getLocation()); +} + +void PreprocessingRecord::Defined(const Token &MacroNameTok, + const MacroDirective *MD) { + // This is not actually a macro expansion but record it as a macro reference. + if (MD) + addMacroExpansion(MacroNameTok, MD->getMacroInfo(), + MacroNameTok.getLocation()); +} + +void PreprocessingRecord::MacroExpands(const Token &Id,const MacroDirective *MD, + SourceRange Range) { + addMacroExpansion(Id, MD->getMacroInfo(), Range); +} + void PreprocessingRecord::MacroDefined(const Token &Id, - const MacroInfo *MI) { + const MacroDirective *MD) { + const MacroInfo *MI = MD->getMacroInfo(); SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc()); MacroDefinition *Def = new (*this) MacroDefinition(Id.getIdentifierInfo(), R); - MacroDefinitions[MI] = addPreprocessedEntity(Def); + addPreprocessedEntity(Def); + MacroDefinitions[MI] = Def; } void PreprocessingRecord::MacroUndefined(const Token &Id, - const MacroInfo *MI) { - MacroDefinitions.erase(MI); + const MacroDirective *MD) { + // Note: MI may be null (when #undef'ining an undefined macro). + if (MD) + MacroDefinitions.erase(MD->getMacroInfo()); } void PreprocessingRecord::InclusionDirective( @@ -438,95 +476,6 @@ void PreprocessingRecord::InclusionDirective( addPreprocessedEntity(ID); } -bool PreprocessingRecord::rangeIntersectsConditionalDirective( - SourceRange Range) const { - if (Range.isInvalid()) - return false; - - CondDirectiveLocsTy::const_iterator - low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), - Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr)); - if (low == CondDirectiveLocs.end()) - return false; - - if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc())) - return false; - - CondDirectiveLocsTy::const_iterator - upp = std::upper_bound(low, CondDirectiveLocs.end(), - Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr)); - unsigned uppIdx; - if (upp != CondDirectiveLocs.end()) - uppIdx = upp->getIdx(); - else - uppIdx = 0; - - return low->getIdx() != uppIdx; -} - -unsigned PreprocessingRecord::findCondDirectiveIdx(SourceLocation Loc) const { - if (Loc.isInvalid()) - return 0; - - CondDirectiveLocsTy::const_iterator - low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), - Loc, CondDirectiveLoc::Comp(SourceMgr)); - if (low == CondDirectiveLocs.end()) - return 0; - return low->getIdx(); -} - -void PreprocessingRecord::addCondDirectiveLoc(CondDirectiveLoc DirLoc) { - // Ignore directives in system headers. - if (SourceMgr.isInSystemHeader(DirLoc.getLoc())) - return; - - assert(CondDirectiveLocs.empty() || - SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(), - DirLoc.getLoc())); - CondDirectiveLocs.push_back(DirLoc); -} - -void PreprocessingRecord::If(SourceLocation Loc, SourceRange ConditionRange) { - if (RecordCondDirectives) { - addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); - CondDirectiveStack.push_back(CondDirectiveNextIdx++); - } -} - -void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok) { - if (RecordCondDirectives) { - addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); - CondDirectiveStack.push_back(CondDirectiveNextIdx++); - } -} - -void PreprocessingRecord::Ifndef(SourceLocation Loc,const Token &MacroNameTok) { - if (RecordCondDirectives) { - addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); - CondDirectiveStack.push_back(CondDirectiveNextIdx++); - } -} - -void PreprocessingRecord::Elif(SourceLocation Loc, SourceRange ConditionRange, - SourceLocation IfLoc) { - if (RecordCondDirectives) - addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); -} - -void PreprocessingRecord::Else(SourceLocation Loc, SourceLocation IfLoc) { - if (RecordCondDirectives) - addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); -} - -void PreprocessingRecord::Endif(SourceLocation Loc, SourceLocation IfLoc) { - if (RecordCondDirectives) { - addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); - assert(!CondDirectiveStack.empty()); - CondDirectiveStack.pop_back(); - } -} - size_t PreprocessingRecord::getTotalMemory() const { return BumpAlloc.getTotalMemory() + llvm::capacity_in_bytes(MacroDefinitions) diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 3b070ce..53c45dc 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -26,50 +26,48 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/Preprocessor.h" -#include "clang/Lex/PreprocessorOptions.h" #include "MacroArgs.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/ExternalPreprocessorSource.h" #include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/LiteralSupport.h" #include "clang/Lex/MacroInfo.h" +#include "clang/Lex/ModuleLoader.h" #include "clang/Lex/Pragma.h" #include "clang/Lex/PreprocessingRecord.h" +#include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/ScratchBuffer.h" -#include "clang/Lex/LexDiagnostic.h" -#include "clang/Lex/CodeCompletionHandler.h" -#include "clang/Lex/ModuleLoader.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/TargetInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Capacity.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Capacity.h" using namespace clang; //===----------------------------------------------------------------------===// ExternalPreprocessorSource::~ExternalPreprocessorSource() { } -PPMutationListener::~PPMutationListener() { } - -Preprocessor::Preprocessor(llvm::IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, +Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, DiagnosticsEngine &diags, LangOptions &opts, const TargetInfo *target, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, - IdentifierInfoLookup* IILookup, - bool OwnsHeaders, - bool DelayInitialization, - bool IncrProcessing) - : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(target), - FileMgr(Headers.getFileMgr()), - SourceMgr(SM), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), - ExternalSource(0), Identifiers(opts, IILookup), - IncrementalProcessing(IncrProcessing), CodeComplete(0), - CodeCompletionFile(0), CodeCompletionOffset(0), CodeCompletionReached(0), - SkipMainFilePreamble(0, true), CurPPLexer(0), - CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), Listener(0), - MacroArgCache(0), Record(0), MIChainHead(0), MICache(0) -{ + IdentifierInfoLookup *IILookup, bool OwnsHeaders, + bool DelayInitialization, bool IncrProcessing) + : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(target), + FileMgr(Headers.getFileMgr()), SourceMgr(SM), HeaderInfo(Headers), + TheModuleLoader(TheModuleLoader), ExternalSource(0), + Identifiers(opts, IILookup), IncrementalProcessing(IncrProcessing), + CodeComplete(0), CodeCompletionFile(0), CodeCompletionOffset(0), + CodeCompletionReached(0), SkipMainFilePreamble(0, true), CurPPLexer(0), + CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), + MacroArgCache(0), Record(0), MIChainHead(0), MICache(0) { OwnsHeaderSearch = OwnsHeaders; ScratchBuf = new ScratchBuffer(SourceMgr); @@ -96,9 +94,11 @@ Preprocessor::Preprocessor(llvm::IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, InMacroArgPreExpansion = false; NumCachedTokenLexers = 0; PragmasEnabled = true; + ParsingIfOrElifDirective = false; + PreprocessedOutput = false; CachedLexPos = 0; - + // We haven't read anything from the external source. ReadMacrosFromExternalSource = false; @@ -292,7 +292,7 @@ Preprocessor::macro_end(bool IncludeExternalMacros) const { /// \brief Compares macro tokens with a specified token value sequence. static bool MacroDefinitionEquals(const MacroInfo *MI, - llvm::ArrayRef<TokenValue> Tokens) { + ArrayRef<TokenValue> Tokens) { return Tokens.size() == MI->getNumTokens() && std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); } @@ -304,14 +304,15 @@ StringRef Preprocessor::getLastMacroWithSpelling( StringRef BestSpelling; for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); I != E; ++I) { - if (!I->second->isObjectLike()) + if (!I->second->getMacroInfo()->isObjectLike()) continue; - const MacroInfo *MI = I->second->findDefinitionAtLoc(Loc, SourceMgr); - if (!MI) + const MacroDirective::DefInfo + Def = I->second->findDirectiveAtLoc(Loc, SourceMgr); + if (!Def) continue; - if (!MacroDefinitionEquals(MI, Tokens)) + if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) continue; - SourceLocation Location = I->second->getDefinitionLoc(); + SourceLocation Location = Def.getLocation(); // Choose the macro defined latest. if (BestLocation.isInvalid() || (Location.isValid() && @@ -398,7 +399,7 @@ StringRef Preprocessor::getSpelling(const Token &Tok, SmallVectorImpl<char> &Buffer, bool *Invalid) const { // NOTE: this has to be checked *before* testing for an IdentifierInfo. - if (Tok.isNot(tok::raw_identifier)) { + if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { // Try the fast path. if (const IdentifierInfo *II = Tok.getIdentifierInfo()) return II->getName(); @@ -481,6 +482,7 @@ void Preprocessor::EnterMainSourceFile() { assert(SB && "Cannot create predefined source buffer"); FileID FID = SourceMgr.createFileIDForMemBuffer(SB); assert(!FID.isInvalid() && "Could not create FileID for predefines?"); + setPredefinesFileID(FID); // Start parsing the predefines. EnterSourceFile(FID, 0, SourceLocation()); @@ -496,6 +498,48 @@ void Preprocessor::EndSourceFile() { // Lexer Event Handling. //===----------------------------------------------------------------------===// +static void appendCodePoint(unsigned Codepoint, + llvm::SmallVectorImpl<char> &Str) { + char ResultBuf[4]; + char *ResultPtr = ResultBuf; + bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr); + (void)Res; + assert(Res && "Unexpected conversion failure"); + Str.append(ResultBuf, ResultPtr); +} + +static void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) { + for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) { + if (*I != '\\') { + Buf.push_back(*I); + continue; + } + + ++I; + assert(*I == 'u' || *I == 'U'); + + unsigned NumHexDigits; + if (*I == 'u') + NumHexDigits = 4; + else + NumHexDigits = 8; + + assert(I + NumHexDigits <= E); + + uint32_t CodePoint = 0; + for (++I; NumHexDigits != 0; ++I, --NumHexDigits) { + unsigned Value = llvm::hexDigitValue(*I); + assert(Value != -1U); + + CodePoint <<= 4; + CodePoint += Value; + } + + appendCodePoint(CodePoint, Buf); + --I; + } +} + /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the /// identifier information for the token and install it into the token, /// updating the token kind accordingly. @@ -504,15 +548,22 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { // Look up this token, see if it is a macro, or if it is a language keyword. IdentifierInfo *II; - if (!Identifier.needsCleaning()) { + if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { // No cleaning needed, just use the characters from the lexed buffer. II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(), - Identifier.getLength())); + Identifier.getLength())); } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. SmallString<64> IdentifierBuffer; StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); - II = getIdentifierInfo(CleanedStr); + + if (Identifier.hasUCN()) { + SmallString<64> UCNIdentifierBuffer; + expandUCNs(UCNIdentifierBuffer, CleanedStr); + II = getIdentifierInfo(UCNIdentifierBuffer); + } else { + II = getIdentifierInfo(CleanedStr); + } } // Update the token info (identifier info and appropriate token kind). @@ -589,19 +640,19 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { } // If this is a macro to be expanded, do it. - if (MacroInfo *MI = getMacroInfo(&II)) { + if (MacroDirective *MD = getMacroDirective(&II)) { + MacroInfo *MI = MD->getMacroInfo(); if (!DisableMacroExpansion) { - if (Identifier.isExpandDisabled()) { - Diag(Identifier, diag::pp_disabled_macro_expansion); - } else if (MI->isEnabled()) { - if (!HandleMacroExpandedIdentifier(Identifier, MI)) + if (!Identifier.isExpandDisabled() && MI->isEnabled()) { + if (!HandleMacroExpandedIdentifier(Identifier, MD)) return; } else { // C99 6.10.3.4p2 says that a disabled macro may never again be // expanded, even if it's in a context where it could be expanded in the // future. Identifier.setFlag(Token::DisableExpand); - Diag(Identifier, diag::pp_disabled_macro_expansion); + if (MI->isObjectLike() || isNextPPTokenLParen()) + Diag(Identifier, diag::pp_disabled_macro_expansion); } } } @@ -630,10 +681,10 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { if (II.isExtensionToken() && !DisableMacroExpansion) Diag(Identifier, diag::ext_token_used); - // If this is the '__experimental_modules_import' contextual keyword, note + // If this is the 'import' contextual keyword, note // that the next token indicates a module name. // - // Note that we do not treat '__experimental_modules_import' as a contextual + // Note that we do not treat 'import' as a contextual // keyword when we're in a caching lexer, because caching lexers only get // used in contexts where import declarations are disallowed. if (II.isModulesImport() && !InMacroArgs && !DisableMacroExpansion && @@ -689,6 +740,47 @@ void Preprocessor::LexAfterModuleImport(Token &Result) { } } +bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, + const char *DiagnosticTag, + bool AllowMacroExpansion) { + // We need at least one string literal. + if (Result.isNot(tok::string_literal)) { + Diag(Result, diag::err_expected_string_literal) + << /*Source='in...'*/0 << DiagnosticTag; + return false; + } + + // Lex string literal tokens, optionally with macro expansion. + SmallVector<Token, 4> StrToks; + do { + StrToks.push_back(Result); + + if (Result.hasUDSuffix()) + Diag(Result, diag::err_invalid_string_udl); + + if (AllowMacroExpansion) + Lex(Result); + else + LexUnexpandedToken(Result); + } while (Result.is(tok::string_literal)); + + // Concatenate and parse the strings. + StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); + assert(Literal.isAscii() && "Didn't allow wide strings in"); + + if (Literal.hadError) + return false; + + if (Literal.Pascal) { + Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) + << /*Source='in...'*/0 << DiagnosticTag; + return false; + } + + String = Literal.GetString(); + return true; +} + void Preprocessor::addCommentHandler(CommentHandler *Handler) { assert(Handler && "NULL comment handler"); assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == @@ -723,11 +815,10 @@ CommentHandler::~CommentHandler() { } CodeCompletionHandler::~CodeCompletionHandler() { } -void Preprocessor::createPreprocessingRecord(bool RecordConditionalDirectives) { +void Preprocessor::createPreprocessingRecord() { if (Record) return; - Record = new PreprocessingRecord(getSourceManager(), - RecordConditionalDirectives); + Record = new PreprocessingRecord(getSourceManager()); addPPCallbacks(Record); } diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index a64c84d..5a59849 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/PreprocessorLexer.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Lex/LexDiagnostic.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Preprocessor.h" using namespace clang; void PreprocessorLexer::anchor() { } diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp index dd7ebb0..0a66bba 100644 --- a/lib/Lex/TokenConcatenation.cpp +++ b/lib/Lex/TokenConcatenation.cpp @@ -12,25 +12,25 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/TokenConcatenation.h" +#include "clang/Basic/CharInfo.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Support/ErrorHandling.h" -#include <cctype> using namespace clang; /// IsStringPrefix - Return true if Str is a string prefix. /// 'L', 'u', 'U', or 'u8'. Including raw versions. -static bool IsStringPrefix(StringRef Str, bool CPlusPlus0x) { +static bool IsStringPrefix(StringRef Str, bool CPlusPlus11) { if (Str[0] == 'L' || - (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) { + (CPlusPlus11 && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) { if (Str.size() == 1) return true; // "L", "u", "U", and "R" // Check for raw flavors. Need to make sure the first character wasn't - // already R. Need CPlusPlus0x check for "LR". - if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x) + // already R. Need CPlusPlus11 check for "LR". + if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus11) return true; // "LR", "uR", "UR" // Check for "u8" and "u8R" @@ -54,17 +54,17 @@ bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const { SourceManager &SM = PP.getSourceManager(); const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); return IsStringPrefix(StringRef(Ptr, Tok.getLength()), - LangOpts.CPlusPlus0x); + LangOpts.CPlusPlus11); } if (Tok.getLength() < 256) { char Buffer[256]; const char *TokPtr = Buffer; unsigned length = PP.getSpelling(Tok, TokPtr); - return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x); + return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus11); } - return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x); + return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus11); } TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { @@ -87,7 +87,7 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { TokenInfo[tok::arrow ] |= aci_custom_firstchar; // These tokens have custom code in C++11 mode. - if (PP.getLangOpts().CPlusPlus0x) { + if (PP.getLangOpts().CPlusPlus11) { TokenInfo[tok::string_literal ] |= aci_custom; TokenInfo[tok::wide_string_literal ] |= aci_custom; TokenInfo[tok::utf8_string_literal ] |= aci_custom; @@ -156,9 +156,10 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, // First, check to see if the tokens were directly adjacent in the original // source. If they were, it must be okay to stick them together: if there // were an issue, the tokens would have been lexed differently. - if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() && - PrevTok.getLocation().getLocWithOffset(PrevTok.getLength()) == - Tok.getLocation()) + SourceManager &SM = PP.getSourceManager(); + SourceLocation PrevSpellLoc = SM.getSpellingLoc(PrevTok.getLocation()); + SourceLocation SpellLoc = SM.getSpellingLoc(Tok.getLocation()); + if (PrevSpellLoc.getLocWithOffset(PrevTok.getLength()) == SpellLoc) return false; tok::TokenKind PrevKind = PrevTok.getKind(); @@ -206,7 +207,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, case tok::wide_char_constant: case tok::utf16_char_constant: case tok::utf32_char_constant: - if (!PP.getLangOpts().CPlusPlus0x) + if (!PP.getLangOpts().CPlusPlus11) return false; // In C++11, a string or character literal followed by an identifier is a @@ -239,13 +240,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, return IsIdentifierStringPrefix(PrevTok); case tok::numeric_constant: - return isalnum(FirstChar) || Tok.is(tok::numeric_constant) || - FirstChar == '+' || FirstChar == '-' || FirstChar == '.' || - (PP.getLangOpts().CPlusPlus0x && FirstChar == '_'); + return isPreprocessingNumberBody(FirstChar) || + FirstChar == '+' || FirstChar == '-'; case tok::period: // ..., .*, .1234 return (FirstChar == '.' && PrevPrevTok.is(tok::period)) || - isdigit(FirstChar) || - (PP.getLangOpts().CPlusPlus && FirstChar == '*'); + isDigit(FirstChar) || + (PP.getLangOpts().CPlusPlus && FirstChar == '*'); case tok::amp: // && return FirstChar == '&'; case tok::plus: // ++ diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index 59b7478..5b41fe9 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -13,10 +13,10 @@ #include "clang/Lex/TokenLexer.h" #include "MacroArgs.h" -#include "clang/Lex/MacroInfo.h" -#include "clang/Lex/Preprocessor.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/SmallString.h" using namespace clang; @@ -647,6 +647,12 @@ bool TokenLexer::PasteTokens(Token &Tok) { StartLoc = getExpansionLocForMacroDefLoc(StartLoc); if (EndLoc.isFileID()) EndLoc = getExpansionLocForMacroDefLoc(EndLoc); + FileID MacroFID = SM.getFileID(MacroExpansionStart); + while (SM.getFileID(StartLoc) != MacroFID) + StartLoc = SM.getImmediateExpansionRange(StartLoc).first; + while (SM.getFileID(EndLoc) != MacroFID) + EndLoc = SM.getImmediateExpansionRange(EndLoc).second; + Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc, Tok.getLength())); @@ -743,14 +749,18 @@ static void updateConsecutiveMacroArgTokens(SourceManager &SM, Token *NextTok = begin_tokens + 1; for (; NextTok < end_tokens; ++NextTok) { + SourceLocation NextLoc = NextTok->getLocation(); + if (CurLoc.isFileID() != NextLoc.isFileID()) + break; // Token from different kind of FileID. + int RelOffs; - if (!SM.isInSameSLocAddrSpace(CurLoc, NextTok->getLocation(), &RelOffs)) + if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs)) break; // Token from different local/loaded location. // Check that token is not before the previous token or more than 50 // "characters" away. if (RelOffs < 0 || RelOffs > 50) break; - CurLoc = NextTok->getLocation(); + CurLoc = NextLoc; } // For the consecutive tokens, find the length of the SLocEntry to contain diff --git a/lib/Lex/UnicodeCharSets.h b/lib/Lex/UnicodeCharSets.h new file mode 100644 index 0000000..37ff8af --- /dev/null +++ b/lib/Lex/UnicodeCharSets.h @@ -0,0 +1,496 @@ +//===--- UnicodeCharSets.h - Contains important sets of characters --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef CLANG_LEX_UNICODECHARSETS_H +#define CLANG_LEX_UNICODECHARSETS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/raw_ostream.h" + +namespace { + struct UnicodeCharRange { + uint32_t Lower; + uint32_t Upper; + }; + typedef llvm::ArrayRef<UnicodeCharRange> UnicodeCharSet; + + typedef llvm::SmallPtrSet<const UnicodeCharRange *, 16> ValidatedCharSetsTy; +} + +static inline ValidatedCharSetsTy &getValidatedCharSets() { + static ValidatedCharSetsTy Validated; + return Validated; +} + +/// Returns true if each of the ranges in \p CharSet is a proper closed range +/// [min, max], and if the ranges themselves are ordered and non-overlapping. +static inline bool isValidCharSet(UnicodeCharSet CharSet) { +#ifndef NDEBUG + static llvm::sys::Mutex ValidationMutex; + + // Check the validation cache. + { + llvm::MutexGuard Guard(ValidationMutex); + if (getValidatedCharSets().count(CharSet.data())) + return true; + } + + // Walk through the ranges. + uint32_t Prev = 0; + for (UnicodeCharSet::iterator I = CharSet.begin(), E = CharSet.end(); + I != E; ++I) { + if (Prev >= I->Lower) { + DEBUG(llvm::dbgs() << "Upper bound 0x"); + DEBUG(llvm::dbgs().write_hex(Prev)); + DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x"); + DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n"); + return false; + } + if (I->Upper < I->Lower) { + DEBUG(llvm::dbgs() << "Upper bound 0x"); + DEBUG(llvm::dbgs().write_hex(I->Lower)); + DEBUG(llvm::dbgs() << " should not be less than lower bound 0x"); + DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n"); + return false; + } + Prev = I->Upper; + } + + // Update the validation cache. + { + llvm::MutexGuard Guard(ValidationMutex); + getValidatedCharSets().insert(CharSet.data()); + } +#endif + return true; +} + +/// Returns true if the Unicode code point \p C is within the set of +/// characters specified by \p CharSet. +LLVM_READONLY static inline bool isCharInSet(uint32_t C, + UnicodeCharSet CharSet) { + assert(isValidCharSet(CharSet)); + + size_t LowPoint = 0; + size_t HighPoint = CharSet.size(); + + // Binary search the set of char ranges. + while (HighPoint != LowPoint) { + size_t MidPoint = (HighPoint + LowPoint) / 2; + if (C < CharSet[MidPoint].Lower) + HighPoint = MidPoint; + else if (C > CharSet[MidPoint].Upper) + LowPoint = MidPoint + 1; + else + return true; + } + + return false; +} + + +// C11 D.1, C++11 [charname.allowed] +static const UnicodeCharRange C11AllowedIDChars[] = { + // 1 + { 0x00A8, 0x00A8 }, { 0x00AA, 0x00AA }, { 0x00AD, 0x00AD }, + { 0x00AF, 0x00AF }, { 0x00B2, 0x00B5 }, { 0x00B7, 0x00BA }, + { 0x00BC, 0x00BE }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, + { 0x00F8, 0x00FF }, + // 2 + { 0x0100, 0x167F }, { 0x1681, 0x180D }, { 0x180F, 0x1FFF }, + // 3 + { 0x200B, 0x200D }, { 0x202A, 0x202E }, { 0x203F, 0x2040 }, + { 0x2054, 0x2054 }, { 0x2060, 0x206F }, + // 4 + { 0x2070, 0x218F }, { 0x2460, 0x24FF }, { 0x2776, 0x2793 }, + { 0x2C00, 0x2DFF }, { 0x2E80, 0x2FFF }, + // 5 + { 0x3004, 0x3007 }, { 0x3021, 0x302F }, { 0x3031, 0x303F }, + // 6 + { 0x3040, 0xD7FF }, + // 7 + { 0xF900, 0xFD3D }, { 0xFD40, 0xFDCF }, { 0xFDF0, 0xFE44 }, + { 0xFE47, 0xFFFD }, + // 8 + { 0x10000, 0x1FFFD }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }, + { 0x40000, 0x4FFFD }, { 0x50000, 0x5FFFD }, { 0x60000, 0x6FFFD }, + { 0x70000, 0x7FFFD }, { 0x80000, 0x8FFFD }, { 0x90000, 0x9FFFD }, + { 0xA0000, 0xAFFFD }, { 0xB0000, 0xBFFFD }, { 0xC0000, 0xCFFFD }, + { 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD } +}; + +// C++03 [extendid] +// Note that this is not the same as C++98, but we don't distinguish C++98 +// and C++03 in Clang. +static const UnicodeCharRange CXX03AllowedIDChars[] = { + // Latin + { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 }, + { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, + + // Greek + { 0x0384, 0x0384 }, { 0x0388, 0x038A }, { 0x038C, 0x038C }, + { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 }, + { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE }, + { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, + + // Cyrillic + { 0x0401, 0x040D }, { 0x040F, 0x044F }, { 0x0451, 0x045C }, + { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, + { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, + { 0x04F8, 0x04F9 }, + + // Armenian + { 0x0531, 0x0556 }, { 0x0561, 0x0587 }, + + // Hebrew + { 0x05D0, 0x05EA }, { 0x05F0, 0x05F4 }, + + // Arabic + { 0x0621, 0x063A }, { 0x0640, 0x0652 }, { 0x0670, 0x06B7 }, + { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06E5, 0x06E7 }, + + // Devanagari + { 0x0905, 0x0939 }, { 0x0958, 0x0962 }, + + // Bengali + { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 }, + { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 }, + { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 }, + + // Gurmukhi + { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 }, + { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 }, + { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E }, + + // Gujarti + { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 }, + { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, + { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE0 }, + + // Oriya + { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 }, + { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 }, + { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, + + // Tamil + { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, + { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F }, + { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 }, + { 0x0BB7, 0x0BB9 }, + + // Telugu + { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 }, + { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 }, + + // Kannada + { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 }, + { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CE0, 0x0CE1 }, + + // Malayam + { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 }, + { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 }, + + // Thai + { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 }, + { 0x0E4F, 0x0E5B }, + + // Lao + { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E87 }, + { 0x0E88, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, + { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, + { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAA }, + { 0x0EAB, 0x0EAB }, { 0x0EAD, 0x0EB0 }, { 0x0EB2, 0x0EB2 }, + { 0x0EB3, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 }, + { 0x0EC6, 0x0EC6 }, + + // Georgian + { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 }, + + // Hangul + { 0x1100, 0x1159 }, { 0x1161, 0x11A2 }, { 0x11A8, 0x11F9 }, + + // Latin (2) + { 0x1E00, 0x1E9A }, { 0x1EA0, 0x1EF9 }, + + // Greek (2) + { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, + { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 }, + { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D }, + { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 }, + { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, + { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, + + // Hiragana + { 0x3041, 0x3094 }, { 0x309B, 0x309E }, + + // Katakana + { 0x30A1, 0x30FE }, + + // Bopmofo [sic] + { 0x3105, 0x312C }, + + // CJK Unified Ideographs + { 0x4E00, 0x9FA5 }, { 0xF900, 0xFA2D }, { 0xFB1F, 0xFB36 }, + { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 }, + { 0xFB42, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3F }, + { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB }, + { 0xFE70, 0xFE72 }, { 0xFE74, 0xFE74 }, { 0xFE76, 0xFEFC }, + { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE }, + { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 }, + { 0xFFDA, 0xFFDC } +}; + +// C99 Annex D +static const UnicodeCharRange C99AllowedIDChars[] = { + // Latin (1) + { 0x00AA, 0x00AA }, + + // Special characters (1) + { 0x00B5, 0x00B5 }, { 0x00B7, 0x00B7 }, + + // Latin (2) + { 0x00BA, 0x00BA }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, + { 0x00F8, 0x01F5 }, { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, + + // Special characters (2) + { 0x02B0, 0x02B8 }, { 0x02BB, 0x02BB }, { 0x02BD, 0x02C1 }, + { 0x02D0, 0x02D1 }, { 0x02E0, 0x02E4 }, { 0x037A, 0x037A }, + + // Greek (1) + { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C }, + { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 }, + { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE }, + { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, + + // Cyrillic + { 0x0401, 0x040C }, { 0x040E, 0x044F }, { 0x0451, 0x045C }, + { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, + { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, + { 0x04F8, 0x04F9 }, + + // Armenian (1) + { 0x0531, 0x0556 }, + + // Special characters (3) + { 0x0559, 0x0559 }, + + // Armenian (2) + { 0x0561, 0x0587 }, + + // Hebrew + { 0x05B0, 0x05B9 }, { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, + { 0x05C1, 0x05C2 }, { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 }, + + // Arabic (1) + { 0x0621, 0x063A }, { 0x0640, 0x0652 }, + + // Digits (1) + { 0x0660, 0x0669 }, + + // Arabic (2) + { 0x0670, 0x06B7 }, { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, + { 0x06D0, 0x06DC }, { 0x06E5, 0x06E8 }, { 0x06EA, 0x06ED }, + + // Digits (2) + { 0x06F0, 0x06F9 }, + + // Devanagari and Special characeter 0x093D. + { 0x0901, 0x0903 }, { 0x0905, 0x0939 }, { 0x093D, 0x094D }, + { 0x0950, 0x0952 }, { 0x0958, 0x0963 }, + + // Digits (3) + { 0x0966, 0x096F }, + + // Bengali (1) + { 0x0981, 0x0983 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 }, + { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, + { 0x09B6, 0x09B9 }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 }, + { 0x09CB, 0x09CD }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E3 }, + + // Digits (4) + { 0x09E6, 0x09EF }, + + // Bengali (2) + { 0x09F0, 0x09F1 }, + + // Gurmukhi (1) + { 0x0A02, 0x0A02 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, + { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, + { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A3E, 0x0A42 }, + { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A59, 0x0A5C }, + { 0x0A5E, 0x0A5E }, + + // Digits (5) + { 0x0A66, 0x0A6F }, + + // Gurmukhi (2) + { 0x0A74, 0x0A74 }, + + // Gujarti + { 0x0A81, 0x0A83 }, { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, + { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, + { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 }, { 0x0ABD, 0x0AC5 }, + { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AD0, 0x0AD0 }, + { 0x0AE0, 0x0AE0 }, + + // Digits (6) + { 0x0AE6, 0x0AEF }, + + // Oriya and Special character 0x0B3D + { 0x0B01, 0x0B03 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, + { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, + { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B43 }, { 0x0B47, 0x0B48 }, + { 0x0B4B, 0x0B4D }, { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, + + // Digits (7) + { 0x0B66, 0x0B6F }, + + // Tamil + { 0x0B82, 0x0B83 }, { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, + { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, + { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, + { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 }, { 0x0BBE, 0x0BC2 }, + { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD }, + + // Digits (8) + { 0x0BE7, 0x0BEF }, + + // Telugu + { 0x0C01, 0x0C03 }, { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, + { 0x0C12, 0x0C28 }, { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, + { 0x0C3E, 0x0C44 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, + { 0x0C60, 0x0C61 }, + + // Digits (9) + { 0x0C66, 0x0C6F }, + + // Kannada + { 0x0C82, 0x0C83 }, { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, + { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, + { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD }, + { 0x0CDE, 0x0CDE }, { 0x0CE0, 0x0CE1 }, + + // Digits (10) + { 0x0CE6, 0x0CEF }, + + // Malayam + { 0x0D02, 0x0D03 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, + { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D3E, 0x0D43 }, + { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, { 0x0D60, 0x0D60 }, + + // Digits (11) + { 0x0D66, 0x0D6F }, + + // Thai...including Digits { 0x0E50, 0x0E59 } + { 0x0E01, 0x0E3A }, { 0x0E40, 0x0E5B }, + + // Lao (1) + { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E88 }, + { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, { 0x0E94, 0x0E97 }, + { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, { 0x0EA5, 0x0EA5 }, + { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB }, { 0x0EAD, 0x0EAE }, + { 0x0EB0, 0x0EB9 }, { 0x0EBB, 0x0EBD }, { 0x0EC0, 0x0EC4 }, + { 0x0EC6, 0x0EC6 }, { 0x0EC8, 0x0ECD }, + + // Digits (12) + { 0x0ED0, 0x0ED9 }, + + // Lao (2) + { 0x0EDC, 0x0EDD }, + + // Tibetan (1) + { 0x0F00, 0x0F00 }, { 0x0F18, 0x0F19 }, + + // Digits (13) + { 0x0F20, 0x0F33 }, + + // Tibetan (2) + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, + { 0x0F3E, 0x0F47 }, { 0x0F49, 0x0F69 }, { 0x0F71, 0x0F84 }, + { 0x0F86, 0x0F8B }, { 0x0F90, 0x0F95 }, { 0x0F97, 0x0F97 }, + { 0x0F99, 0x0FAD }, { 0x0FB1, 0x0FB7 }, { 0x0FB9, 0x0FB9 }, + + // Georgian + { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 }, + + // Latin (3) + { 0x1E00, 0x1E9B }, { 0x1EA0, 0x1EF9 }, + + // Greek (2) + { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, + { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 }, + { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D }, + { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, + + // Special characters (4) + { 0x1FBE, 0x1FBE }, + + // Greek (3) + { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, + { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, + { 0x1FF6, 0x1FFC }, + + // Special characters (5) + { 0x203F, 0x2040 }, + + // Latin (4) + { 0x207F, 0x207F }, + + // Special characters (6) + { 0x2102, 0x2102 }, { 0x2107, 0x2107 }, { 0x210A, 0x2113 }, + { 0x2115, 0x2115 }, { 0x2118, 0x211D }, { 0x2124, 0x2124 }, + { 0x2126, 0x2126 }, { 0x2128, 0x2128 }, { 0x212A, 0x2131 }, + { 0x2133, 0x2138 }, { 0x2160, 0x2182 }, { 0x3005, 0x3007 }, + { 0x3021, 0x3029 }, + + // Hiragana + { 0x3041, 0x3093 }, { 0x309B, 0x309C }, + + // Katakana + { 0x30A1, 0x30F6 }, { 0x30FB, 0x30FC }, + + // Bopmofo [sic] + { 0x3105, 0x312C }, + + // CJK Unified Ideographs + { 0x4E00, 0x9FA5 }, + + // Hangul, + { 0xAC00, 0xD7A3 } +}; + +// C11 D.2, C++11 [charname.disallowed] +static const UnicodeCharRange C11DisallowedInitialIDChars[] = { + { 0x0300, 0x036F }, { 0x1DC0, 0x1DFF }, { 0x20D0, 0x20FF }, + { 0xFE20, 0xFE2F } +}; + +// C99 6.4.2.1p3: The initial character [of an identifier] shall not be a +// universal character name designating a digit. +// C99 Annex D defines these characters as "Digits". +static const UnicodeCharRange C99DisallowedInitialIDChars[] = { + { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 }, { 0x0966, 0x096F }, + { 0x09E6, 0x09EF }, { 0x0A66, 0x0A6F }, { 0x0AE6, 0x0AEF }, + { 0x0B66, 0x0B6F }, { 0x0BE7, 0x0BEF }, { 0x0C66, 0x0C6F }, + { 0x0CE6, 0x0CEF }, { 0x0D66, 0x0D6F }, { 0x0E50, 0x0E59 }, + { 0x0ED0, 0x0ED9 }, { 0x0F20, 0x0F33 } +}; + +// Unicode v6.2, chapter 6.2, table 6-2. +static const UnicodeCharRange UnicodeWhitespaceChars[] = { + { 0x0085, 0x0085 }, { 0x00A0, 0x00A0 }, { 0x1680, 0x1680 }, + { 0x180E, 0x180E }, { 0x2000, 0x200A }, { 0x2028, 0x2029 }, + { 0x202F, 0x202F }, { 0x205F, 0x205F }, { 0x3000, 0x3000 } +}; + +#endif |