diff options
author | dim <dim@FreeBSD.org> | 2013-12-22 00:07:40 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2013-12-22 00:07:40 +0000 |
commit | 952eddef9aff85b1e92626e89baaf7a360e2ac85 (patch) | |
tree | df8df0b0067b381eab470a3b8f28d14a552a6340 /lib/Lex | |
parent | ea266cad53e3d49771fa38103913d3ec7a166694 (diff) | |
download | FreeBSD-src-952eddef9aff85b1e92626e89baaf7a360e2ac85.zip FreeBSD-src-952eddef9aff85b1e92626e89baaf7a360e2ac85.tar.gz |
Vendor import of clang release_34 branch r197841 (effectively, 3.4 RC3):
https://llvm.org/svn/llvm-project/cfe/branches/release_34@197841
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/HeaderMap.cpp | 2 | ||||
-rw-r--r-- | lib/Lex/HeaderSearch.cpp | 125 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 470 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 156 | ||||
-rw-r--r-- | lib/Lex/ModuleMap.cpp | 392 | ||||
-rw-r--r-- | lib/Lex/PPConditionalDirectiveRecord.cpp | 4 | ||||
-rw-r--r-- | lib/Lex/PPDirectives.cpp | 219 | ||||
-rw-r--r-- | lib/Lex/PPExpressions.cpp | 29 | ||||
-rw-r--r-- | lib/Lex/PPLexerChange.cpp | 86 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 280 | ||||
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 89 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 206 | ||||
-rw-r--r-- | lib/Lex/PreprocessingRecord.cpp | 3 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 48 | ||||
-rw-r--r-- | lib/Lex/PreprocessorLexer.cpp | 5 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 61 | ||||
-rw-r--r-- | lib/Lex/UnicodeCharSets.h | 102 |
17 files changed, 1625 insertions, 652 deletions
diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp index dcf1f0c..478462c 100644 --- a/lib/Lex/HeaderMap.cpp +++ b/lib/Lex/HeaderMap.cpp @@ -82,7 +82,7 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) { if (FileSize <= sizeof(HMapHeader)) return 0; OwningPtr<const llvm::MemoryBuffer> FileBuffer(FM.getBufferForFile(FE)); - if (FileBuffer == 0) return 0; // Unreadable file? + if (!FileBuffer) return 0; // Unreadable file? const char *FileStart = FileBuffer->getBufferStart(); // We know the file is at least as big as the header, check it now. diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index 304bd69..9e43dda 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Capacity.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" #include <cstdio> #if defined(LLVM_ON_UNIX) #include <limits.h> @@ -43,11 +44,11 @@ HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) { ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {} HeaderSearch::HeaderSearch(IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts, - FileManager &FM, DiagnosticsEngine &Diags, + SourceManager &SourceMgr, DiagnosticsEngine &Diags, const LangOptions &LangOpts, const TargetInfo *Target) - : HSOpts(HSOpts), FileMgr(FM), FrameworkMap(64), - ModMap(FileMgr, *Diags.getClient(), LangOpts, Target, *this) + : HSOpts(HSOpts), FileMgr(SourceMgr.getFileManager()), FrameworkMap(64), + ModMap(SourceMgr, *Diags.getClient(), LangOpts, Target, *this) { AngledDirIdx = 0; SystemDirIdx = 0; @@ -160,9 +161,11 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) { // Only deal with normal search directories. if (!SearchDirs[Idx].isNormalDir()) continue; - + + bool IsSystem = SearchDirs[Idx].isSystemHeaderDirectory(); // Search for a module map file in this directory. - if (loadModuleMapFile(SearchDirs[Idx].getDir()) == LMM_NewlyLoaded) { + if (loadModuleMapFile(SearchDirs[Idx].getDir(), IsSystem) + == LMM_NewlyLoaded) { // We just loaded a module map file; check whether the module is // available now. Module = ModMap.findModule(ModuleName); @@ -175,7 +178,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) { SmallString<128> NestedModuleMapDirName; NestedModuleMapDirName = SearchDirs[Idx].getDir()->getName(); llvm::sys::path::append(NestedModuleMapDirName, ModuleName); - if (loadModuleMapFile(NestedModuleMapDirName) == LMM_NewlyLoaded) { + if (loadModuleMapFile(NestedModuleMapDirName, IsSystem) == LMM_NewlyLoaded){ // If we just loaded a module map file, look for the module again. Module = ModMap.findModule(ModuleName); if (Module) @@ -223,7 +226,7 @@ const FileEntry *DirectoryLookup::LookupFile( HeaderSearch &HS, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - Module **SuggestedModule, + ModuleMap::KnownHeader *SuggestedModule, bool &InUserSpecifiedSystemFramework) const { InUserSpecifiedSystemFramework = false; @@ -244,15 +247,18 @@ const FileEntry *DirectoryLookup::LookupFile( // If we have a module map that might map this header, load it and // check whether we'll have a suggestion for a module. - if (SuggestedModule && HS.hasModuleMap(TmpDir, getDir())) { - const FileEntry *File = HS.getFileMgr().getFile(TmpDir.str(), + HS.hasModuleMap(TmpDir, getDir(), isSystemHeaderDirectory()); + if (SuggestedModule) { + const FileEntry *File = HS.getFileMgr().getFile(TmpDir.str(), /*openFile=*/false); if (!File) return File; - // If there is a module that corresponds to this header, - // suggest it. + // If there is a module that corresponds to this header, suggest it. *SuggestedModule = HS.findModuleForHeader(File); + if (!SuggestedModule->getModule() && + HS.hasModuleMap(TmpDir, getDir(), isSystemHeaderDirectory())) + *SuggestedModule = HS.findModuleForHeader(File); return File; } @@ -337,7 +343,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( HeaderSearch &HS, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - Module **SuggestedModule, + ModuleMap::KnownHeader *SuggestedModule, bool &InUserSpecifiedSystemFramework) const { FileManager &FileMgr = HS.getFileMgr(); @@ -496,11 +502,29 @@ const FileEntry *HeaderSearch::LookupFile( const FileEntry *CurFileEnt, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - Module **SuggestedModule, + ModuleMap::KnownHeader *SuggestedModule, bool SkipCache) { + if (!HSOpts->ModuleMapFiles.empty()) { + // Preload all explicitly specified module map files. This enables modules + // map files lying in a directory structure separate from the header files + // that they describe. These cannot be loaded lazily upon encountering a + // header file, as there is no other knwon mapping from a header file to its + // module map file. + for (llvm::SetVector<std::string>::iterator + I = HSOpts->ModuleMapFiles.begin(), + E = HSOpts->ModuleMapFiles.end(); + I != E; ++I) { + const FileEntry *File = FileMgr.getFile(*I); + if (!File) + continue; + loadModuleMapFile(File, /*IsSystem=*/false); + } + HSOpts->ModuleMapFiles.clear(); + } + if (SuggestedModule) - *SuggestedModule = 0; + *SuggestedModule = ModuleMap::KnownHeader(); // If 'Filename' is absolute, check to see if it exists and no searching. if (llvm::sys::path::is_absolute(Filename)) { @@ -676,7 +700,7 @@ LookupSubframeworkHeader(StringRef Filename, const FileEntry *ContextFileEnt, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - Module **SuggestedModule) { + ModuleMap::KnownHeader *SuggestedModule) { assert(ContextFileEnt && "No context file?"); // Framework names must have a '/' in the filename. Find it. @@ -866,19 +890,16 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) { HFI.ControllingMacro || HFI.ControllingMacroID; } -void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE) { +void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE, + ModuleMap::ModuleHeaderRole Role, + bool isCompilingModuleHeader) { if (FE->getUID() >= FileInfo.size()) FileInfo.resize(FE->getUID()+1); HeaderFileInfo &HFI = FileInfo[FE->getUID()]; HFI.isModuleHeader = true; -} - -void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) { - if (UID >= FileInfo.size()) - FileInfo.resize(UID+1); - HFI.Resolved = true; - FileInfo[UID] = HFI; + HFI.isCompilingModuleHeader = isCompilingModuleHeader; + HFI.setHeaderRole(Role); } bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ @@ -930,7 +951,8 @@ StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) { } bool HeaderSearch::hasModuleMap(StringRef FileName, - const DirectoryEntry *Root) { + const DirectoryEntry *Root, + bool IsSystem) { SmallVector<const DirectoryEntry *, 2> FixUpDirectories; StringRef DirName = FileName; @@ -939,21 +961,20 @@ bool HeaderSearch::hasModuleMap(StringRef FileName, DirName = llvm::sys::path::parent_path(DirName); if (DirName.empty()) return false; - + // Determine whether this directory exists. const DirectoryEntry *Dir = FileMgr.getDirectory(DirName); if (!Dir) return false; - - // Try to load the module map file in this directory. - switch (loadModuleMapFile(Dir)) { + + // Try to load the "module.map" file in this directory. + switch (loadModuleMapFile(Dir, IsSystem)) { case LMM_NewlyLoaded: case LMM_AlreadyLoaded: // Success. All of the directories we stepped through inherit this module // map file. for (unsigned I = 0, N = FixUpDirectories.size(); I != N; ++I) DirectoryHasModuleMap[FixUpDirectories[I]] = true; - return true; case LMM_NoDirectory: @@ -971,19 +992,17 @@ bool HeaderSearch::hasModuleMap(StringRef FileName, } while (true); } -Module *HeaderSearch::findModuleForHeader(const FileEntry *File) const { +ModuleMap::KnownHeader +HeaderSearch::findModuleForHeader(const FileEntry *File) const { if (ExternalSource) { // Make sure the external source has handled header info about this file, // which includes whether the file is part of a module. (void)getFileInfo(File); } - if (Module *Mod = ModMap.findModuleForHeader(File)) - return Mod; - - return 0; + return ModMap.findModuleForHeader(File); } -bool HeaderSearch::loadModuleMapFile(const FileEntry *File) { +bool HeaderSearch::loadModuleMapFile(const FileEntry *File, bool IsSystem) { const DirectoryEntry *Dir = File->getDir(); llvm::DenseMap<const DirectoryEntry *, bool>::iterator KnownDir @@ -991,14 +1010,14 @@ bool HeaderSearch::loadModuleMapFile(const FileEntry *File) { if (KnownDir != DirectoryHasModuleMap.end()) return !KnownDir->second; - bool Result = ModMap.parseModuleMapFile(File); + bool Result = ModMap.parseModuleMapFile(File, IsSystem); if (!Result && llvm::sys::path::filename(File->getName()) == "module.map") { // If the file we loaded was a module.map, look for the corresponding // module_private.map. SmallString<128> PrivateFilename(Dir->getName()); llvm::sys::path::append(PrivateFilename, "module_private.map"); if (const FileEntry *PrivateFile = FileMgr.getFile(PrivateFilename)) - Result = ModMap.parseModuleMapFile(PrivateFile); + Result = ModMap.parseModuleMapFile(PrivateFile, IsSystem); } DirectoryHasModuleMap[Dir] = !Result; @@ -1012,7 +1031,7 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name, return Module; // Try to load a module map file. - switch (loadModuleMapFile(Dir)) { + switch (loadModuleMapFile(Dir, IsSystem)) { case LMM_InvalidModuleMap: break; @@ -1052,15 +1071,15 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name, HeaderSearch::LoadModuleMapResult -HeaderSearch::loadModuleMapFile(StringRef DirName) { +HeaderSearch::loadModuleMapFile(StringRef DirName, bool IsSystem) { if (const DirectoryEntry *Dir = FileMgr.getDirectory(DirName)) - return loadModuleMapFile(Dir); + return loadModuleMapFile(Dir, IsSystem); return LMM_NoDirectory; } HeaderSearch::LoadModuleMapResult -HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir) { +HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir, bool IsSystem) { llvm::DenseMap<const DirectoryEntry *, bool>::iterator KnownDir = DirectoryHasModuleMap.find(Dir); if (KnownDir != DirectoryHasModuleMap.end()) @@ -1072,7 +1091,7 @@ HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir) { llvm::sys::path::append(ModuleMapFileName, "module.map"); if (const FileEntry *ModuleMapFile = FileMgr.getFile(ModuleMapFileName)) { // We have found a module map file. Try to parse it. - if (ModMap.parseModuleMapFile(ModuleMapFile)) { + if (ModMap.parseModuleMapFile(ModuleMapFile, IsSystem)) { // No suitable module map. DirectoryHasModuleMap[Dir] = false; return LMM_InvalidModuleMap; @@ -1087,7 +1106,7 @@ HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir) { llvm::sys::path::append(ModuleMapFileName, "module_private.map"); if (const FileEntry *PrivateModuleMapFile = FileMgr.getFile(ModuleMapFileName)) { - if (ModMap.parseModuleMapFile(PrivateModuleMapFile)) { + if (ModMap.parseModuleMapFile(PrivateModuleMapFile, IsSystem)) { // No suitable module map. DirectoryHasModuleMap[Dir] = false; return LMM_InvalidModuleMap; @@ -1107,6 +1126,7 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { // Load module maps for each of the header search directories. for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { + bool IsSystem = SearchDirs[Idx].isSystemHeaderDirectory(); if (SearchDirs[Idx].isFramework()) { llvm::error_code EC; SmallString<128> DirNative; @@ -1114,7 +1134,6 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { DirNative); // Search each of the ".framework" directories to load them as modules. - bool IsSystem = SearchDirs[Idx].getDirCharacteristic() != SrcMgr::C_User; for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { if (llvm::sys::path::extension(Dir->path()) != ".framework") @@ -1136,7 +1155,7 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { continue; // Try to load a module map file for the search directory. - loadModuleMapFile(SearchDirs[Idx].getDir()); + loadModuleMapFile(SearchDirs[Idx].getDir(), IsSystem); // Try to load module map files for immediate subdirectories of this search // directory. @@ -1151,6 +1170,20 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { } } +void HeaderSearch::loadTopLevelSystemModules() { + // Load module maps for each of the header search directories. + for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { + // We only care about normal header directories. + if (!SearchDirs[Idx].isNormalDir()) { + continue; + } + + // Try to load a module map file for the search directory. + loadModuleMapFile(SearchDirs[Idx].getDir(), + SearchDirs[Idx].isSystemHeaderDirectory()); + } +} + void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) { if (SearchDir.haveSearchedAllModuleMaps()) return; @@ -1160,7 +1193,7 @@ void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) { llvm::sys::path::native(SearchDir.getDir()->getName(), DirNative); for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { - loadModuleMapFile(Dir->path()); + loadModuleMapFile(Dir->path(), SearchDir.isSystemHeaderDirectory()); } SearchDir.setSearchedAllModuleMaps(true); diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 9958287..c071455 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -29,6 +29,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -93,6 +94,10 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, // Start of the file is a start of line. IsAtStartOfLine = true; + IsAtPhysicalStartOfLine = true; + + HasLeadingSpace = false; + HasLeadingEmptyMacro = false; // We are not after parsing a #. ParsingPreprocessorDirective = false; @@ -430,7 +435,8 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, /// \returns true if there was a failure, false on success. bool Lexer::getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, - const LangOptions &LangOpts) { + const LangOptions &LangOpts, + bool IgnoreWhiteSpace) { // TODO: this could be special cased for common tokens like identifiers, ')', // etc to make this faster, if it mattered. Just look at StrData[0] to handle // all obviously single-char tokens. This could use @@ -448,7 +454,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result, const char *StrData = Buffer.data()+LocInfo.second; - if (isWhitespace(StrData[0])) + if (!IgnoreWhiteSpace && isWhitespace(StrData[0])) return true; // Create a lexer starting at the beginning of this token. @@ -798,14 +804,10 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation *MacroBegin) { assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); - std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc); - // FIXME: If the token comes from the macro token paste operator ('##') - // this function will always return false; - if (infoLoc.second > 0) - return false; // Does not point at the start of token. + SourceLocation expansionLoc; + if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc)) + return false; - SourceLocation expansionLoc = - SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart(); if (expansionLoc.isFileID()) { // No other macro expansions, this is the first. if (MacroBegin) @@ -829,16 +831,11 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, if (tokLen == 0) return false; - FileID FID = SM.getFileID(loc); - SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1); - if (SM.isInFileID(afterLoc, FID)) - return false; // Still in the same FileID, does not point to the last token. - - // FIXME: If the token comes from the macro token paste operator ('##') - // or the stringify operator ('#') this function will always return false; + SourceLocation afterLoc = loc.getLocWithOffset(tokLen); + SourceLocation expansionLoc; + if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc)) + return false; - SourceLocation expansionLoc = - SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd(); if (expansionLoc.isFileID()) { // No other macro expansions. if (MacroEnd) @@ -916,25 +913,25 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, return makeRangeFromFileLocs(Range, SM, LangOpts); } - FileID FID; - unsigned BeginOffs; - llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); - if (FID.isInvalid()) + bool Invalid = false; + const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), + &Invalid); + if (Invalid) return CharSourceRange(); - unsigned EndOffs; - if (!SM.isInFileID(End, FID, &EndOffs) || - BeginOffs > EndOffs) - return CharSourceRange(); + if (BeginEntry.getExpansion().isMacroArgExpansion()) { + const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), + &Invalid); + if (Invalid) + return CharSourceRange(); - const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); - const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); - if (Expansion.isMacroArgExpansion() && - Expansion.getSpellingLoc().isFileID()) { - SourceLocation SpellLoc = Expansion.getSpellingLoc(); - Range.setBegin(SpellLoc.getLocWithOffset(BeginOffs)); - Range.setEnd(SpellLoc.getLocWithOffset(EndOffs)); - return makeRangeFromFileLocs(Range, SM, LangOpts); + if (EndEntry.getExpansion().isMacroArgExpansion() && + BeginEntry.getExpansion().getExpansionLocStart() == + EndEntry.getExpansion().getExpansionLocStart()) { + Range.setBegin(SM.getImmediateSpellingLoc(Begin)); + Range.setEnd(SM.getImmediateSpellingLoc(End)); + return makeFileCharRange(Range, SM, LangOpts); + } } return CharSourceRange(); @@ -1369,26 +1366,42 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { BufferPtr += Bytes; if (BufferPtr > BufferEnd) BufferPtr = BufferEnd; + // FIXME: What exactly does the StartOfLine bit mean? There are two + // possible meanings for the "start" of the line: the first token on the + // unexpanded line, or the first token on the expanded line. IsAtStartOfLine = StartOfLine; + IsAtPhysicalStartOfLine = StartOfLine; } static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { - if (LangOpts.CPlusPlus11 || LangOpts.C11) - return isCharInSet(C, C11AllowedIDChars); - else if (LangOpts.CPlusPlus) - return isCharInSet(C, CXX03AllowedIDChars); - else - return isCharInSet(C, C99AllowedIDChars); + if (LangOpts.CPlusPlus11 || LangOpts.C11) { + static const llvm::sys::UnicodeCharSet C11AllowedIDChars( + C11AllowedIDCharRanges); + return C11AllowedIDChars.contains(C); + } else if (LangOpts.CPlusPlus) { + static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( + CXX03AllowedIDCharRanges); + return CXX03AllowedIDChars.contains(C); + } else { + static const llvm::sys::UnicodeCharSet C99AllowedIDChars( + C99AllowedIDCharRanges); + return C99AllowedIDChars.contains(C); + } } static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { assert(isAllowedIDChar(C, LangOpts)); - if (LangOpts.CPlusPlus11 || LangOpts.C11) - return !isCharInSet(C, C11DisallowedInitialIDChars); - else if (LangOpts.CPlusPlus) + if (LangOpts.CPlusPlus11 || LangOpts.C11) { + static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars( + C11DisallowedInitialIDCharRanges); + return !C11DisallowedInitialIDChars.contains(C); + } else if (LangOpts.CPlusPlus) { return true; - else - return !isCharInSet(C, C99DisallowedInitialIDChars); + } else { + static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( + C99DisallowedInitialIDCharRanges); + return !C99DisallowedInitialIDChars.contains(C); + } } static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin, @@ -1407,11 +1420,15 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CannotStartIdentifier }; - if (!isCharInSet(C, C99AllowedIDChars)) { + static const llvm::sys::UnicodeCharSet C99AllowedIDChars( + C99AllowedIDCharRanges); + static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( + C99DisallowedInitialIDCharRanges); + if (!C99AllowedIDChars.contains(C)) { Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) << Range << CannotAppearInIdentifier; - } else if (IsFirst && isCharInSet(C, C99DisallowedInitialIDChars)) { + } else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) { Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) << Range << CannotStartIdentifier; @@ -1421,14 +1438,16 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, // Check C++98 compatibility. if (Diags.getDiagnosticLevel(diag::warn_cxx98_compat_unicode_id, Range.getBegin()) > DiagnosticsEngine::Ignored) { - if (!isCharInSet(C, CXX03AllowedIDChars)) { + static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( + CXX03AllowedIDCharRanges); + if (!CXX03AllowedIDChars.contains(C)) { Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id) << Range; } } } -void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { +bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) { // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] unsigned Size; unsigned char C = *CurPtr++; @@ -1452,7 +1471,7 @@ FinishIdentifier: // If we are in raw mode, return this identifier raw. There is no need to // look up identifier information or attempt to macro expand it. if (LexingRawMode) - return; + return true; // Fill in Result.IdentifierInfo and update the token kind, // looking up the identifier in the identifier table. @@ -1461,9 +1480,9 @@ FinishIdentifier: // Finally, now that we know we have an identifier, pass this off to the // preprocessor, which may macro expand it or something. if (II->isHandleIdentifierCase()) - PP->HandleIdentifier(Result); + return PP->HandleIdentifier(Result); - return; + return true; } // Otherwise, $,\,? in identifier found. Enter slower path. @@ -1553,7 +1572,7 @@ bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) { /// LexNumericConstant - Lex the remainder of a integer or floating point /// constant. From[-1] is the first character lexed. Return the end of the /// constant. -void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { +bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); char PrevCh = 0; @@ -1587,15 +1606,29 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); } + // If we have a digit separator, continue. + if (C == '\'' && getLangOpts().CPlusPlus1y) { + unsigned NextSize; + char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts()); + if (isIdentifierBody(Next)) { + if (!isLexingRawMode()) + Diag(CurPtr, diag::warn_cxx11_compat_digit_separator); + CurPtr = ConsumeChar(CurPtr, Size, Result); + return LexNumericConstant(Result, CurPtr); + } + } + // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::numeric_constant); Result.setLiteralData(TokStart); + return true; } /// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes /// in C++11, or warn on a ud-suffix in C++98. -const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { +const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, + bool IsStringLiteral) { assert(getLangOpts().CPlusPlus); // Maximally munch an identifier. FIXME: UCNs. @@ -1615,9 +1648,41 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { // that does not start with an underscore is ill-formed. As a conforming // extension, we treat all such suffixes as if they had whitespace before // them. - if (C != '_') { + bool IsUDSuffix = false; + if (C == '_') + IsUDSuffix = true; + else if (IsStringLiteral && getLangOpts().CPlusPlus1y) { + // In C++1y, we need to look ahead a few characters to see if this is a + // valid suffix for a string literal or a numeric literal (this could be + // the 'operator""if' defining a numeric literal operator). + const unsigned MaxStandardSuffixLength = 3; + char Buffer[MaxStandardSuffixLength] = { C }; + unsigned Consumed = Size; + unsigned Chars = 1; + while (true) { + unsigned NextSize; + char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, + getLangOpts()); + if (!isIdentifierBody(Next)) { + // End of suffix. Check whether this is on the whitelist. + IsUDSuffix = (Chars == 1 && Buffer[0] == 's') || + NumericLiteralParser::isValidUDSuffix( + getLangOpts(), StringRef(Buffer, Chars)); + break; + } + + if (Chars == MaxStandardSuffixLength) + // Too long: can't be a standard suffix. + break; + + Buffer[Chars++] = Next; + Consumed += NextSize; + } + } + + if (!IsUDSuffix) { if (!isLexingRawMode()) - Diag(CurPtr, getLangOpts().MicrosoftMode ? + Diag(CurPtr, getLangOpts().MicrosoftMode ? diag::ext_ms_reserved_user_defined_literal : diag::ext_reserved_user_defined_literal) << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); @@ -1635,7 +1700,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { /// LexStringLiteral - Lex the remainder of a string literal, after having lexed /// either " or L" or u8" or u" or U". -void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, +bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, tok::TokenKind Kind) { const char *NulCharacter = 0; // Does this string contain the \0 character? @@ -1659,14 +1724,15 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::ext_unterminated_string); FormTokenWithChars(Result, CurPtr-1, tok::unknown); - return; + return true; } if (C == 0) { if (isCodeCompletionPoint(CurPtr-1)) { PP->CodeCompleteNaturalLanguage(); FormTokenWithChars(Result, CurPtr-1, tok::unknown); - return cutOffLexing(); + cutOffLexing(); + return true; } NulCharacter = CurPtr-1; @@ -1676,7 +1742,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, // If we are in C++11, lex the optional ud-suffix. if (getLangOpts().CPlusPlus) - CurPtr = LexUDSuffix(Result, CurPtr); + CurPtr = LexUDSuffix(Result, CurPtr, true); // If a nul character existed in the string, warn about it. if (NulCharacter && !isLexingRawMode()) @@ -1686,11 +1752,12 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); + return true; } /// LexRawStringLiteral - Lex the remainder of a raw string literal, after /// having lexed R", LR", u8R", uR", or UR". -void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, +bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, tok::TokenKind Kind) { // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3: // Between the initial and final double quote characters of the raw string, @@ -1732,7 +1799,7 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, } FormTokenWithChars(Result, CurPtr, tok::unknown); - return; + return true; } // Save prefix and move CurPtr past it @@ -1753,23 +1820,24 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, Diag(BufferPtr, diag::err_unterminated_raw_string) << StringRef(Prefix, PrefixLen); FormTokenWithChars(Result, CurPtr-1, tok::unknown); - return; + return true; } } // If we are in C++11, lex the optional ud-suffix. if (getLangOpts().CPlusPlus) - CurPtr = LexUDSuffix(Result, CurPtr); + CurPtr = LexUDSuffix(Result, CurPtr, true); // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); + return true; } /// LexAngledStringLiteral - Lex the remainder of an angled string literal, /// after having lexed the '<' character. This is used for #include filenames. -void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { +bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { const char *NulCharacter = 0; // Does this string contain the \0 character? const char *AfterLessPos = CurPtr; char C = getAndAdvanceChar(CurPtr, Result); @@ -1784,7 +1852,7 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); - return; + return true; } else if (C == 0) { NulCharacter = CurPtr-1; } @@ -1799,12 +1867,13 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::angle_string_literal); Result.setLiteralData(TokStart); + return true; } /// LexCharConstant - Lex the remainder of a character constant, after having /// lexed either ' or L' or u' or U'. -void Lexer::LexCharConstant(Token &Result, const char *CurPtr, +bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, tok::TokenKind Kind) { const char *NulCharacter = 0; // Does this character contain the \0 character? @@ -1819,7 +1888,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::ext_empty_character); FormTokenWithChars(Result, CurPtr, tok::unknown); - return; + return true; } while (C != '\'') { @@ -1832,14 +1901,15 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::ext_unterminated_char); FormTokenWithChars(Result, CurPtr-1, tok::unknown); - return; + return true; } if (C == 0) { if (isCodeCompletionPoint(CurPtr-1)) { PP->CodeCompleteNaturalLanguage(); FormTokenWithChars(Result, CurPtr-1, tok::unknown); - return cutOffLexing(); + cutOffLexing(); + return true; } NulCharacter = CurPtr-1; @@ -1849,7 +1919,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, // If we are in C++11, lex the optional ud-suffix. if (getLangOpts().CPlusPlus) - CurPtr = LexUDSuffix(Result, CurPtr); + CurPtr = LexUDSuffix(Result, CurPtr, false); // If a nul character existed in the character, warn about it. if (NulCharacter && !isLexingRawMode()) @@ -1859,6 +1929,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); + return true; } /// SkipWhitespace - Efficiently skip over a series of whitespace characters. @@ -1866,11 +1937,14 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, /// /// This method forms a token and returns true if KeepWhitespaceMode is enabled. /// -bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { +bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, + bool &TokAtPhysicalStartOfLine) { // Whitespace - Skip it, then return the token after the whitespace. bool SawNewline = isVerticalWhitespace(CurPtr[-1]); - unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently. + unsigned char Char = *CurPtr; + + // Skip consecutive spaces efficiently. while (1) { // Skip horizontal whitespace very aggressively. while (isHorizontalWhitespace(Char)) @@ -1886,7 +1960,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { return false; } - // ok, but handle newline. + // OK, but handle newline. SawNewline = true; Char = *++CurPtr; } @@ -1894,8 +1968,10 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { // If the client wants us to return whitespace, return it now. if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); - if (SawNewline) + if (SawNewline) { IsAtStartOfLine = true; + IsAtPhysicalStartOfLine = true; + } // FIXME: The next token will not have LeadingSpace set. return true; } @@ -1905,8 +1981,10 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { bool HasLeadingSpace = !isVerticalWhitespace(PrevChar); Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); - if (SawNewline) + if (SawNewline) { Result.setFlag(Token::StartOfLine); + TokAtPhysicalStartOfLine = true; + } BufferPtr = CurPtr; return false; @@ -1918,7 +1996,8 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. -bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) { +bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, + bool &TokAtPhysicalStartOfLine) { // If Line comments aren't explicitly enabled for this language, emit an // extension warning. if (!LangOpts.LineComment && !isLexingRawMode()) { @@ -2037,6 +2116,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) { // The next returned token is at the start of the line. Result.setFlag(Token::StartOfLine); + TokAtPhysicalStartOfLine = true; // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); BufferPtr = CurPtr; @@ -2147,7 +2227,8 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. -bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { +bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, + bool &TokAtPhysicalStartOfLine) { // Scan one character past where we should, looking for a '/' character. Once // we find it, check to see if it was preceded by a *. This common // optimization helps people who like to put a lot of * characters in their @@ -2202,7 +2283,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // Adjust the pointer to point directly after the first slash. It's // not necessary to set C here, it will be overwritten at the end of // the outer loop. - CurPtr += llvm::CountTrailingZeros_32(cmp) + 1; + CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1; goto FoundSlash; } CurPtr += 16; @@ -2298,7 +2379,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(*CurPtr)) { - SkipWhitespace(Result, CurPtr+1); + SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine); return false; } @@ -2404,10 +2485,28 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue // a pedwarn. - if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) - Diag(BufferEnd, LangOpts.CPlusPlus11 ? // C++11 [lex.phases] 2.2 p2 - diag::warn_cxx98_compat_no_newline_eof : diag::ext_no_newline_eof) - << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n"); + if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) { + DiagnosticsEngine &Diags = PP->getDiagnostics(); + SourceLocation EndLoc = getSourceLocation(BufferEnd); + unsigned DiagID; + + if (LangOpts.CPlusPlus11) { + // C++11 [lex.phases] 2.2 p2 + // Prefer the C++98 pedantic compatibility warning over the generic, + // non-extension, user-requested "missing newline at EOF" warning. + if (Diags.getDiagnosticLevel(diag::warn_cxx98_compat_no_newline_eof, + EndLoc) != DiagnosticsEngine::Ignored) { + DiagID = diag::warn_cxx98_compat_no_newline_eof; + } else { + DiagID = diag::warn_no_newline_eof; + } + } else { + DiagID = diag::ext_no_newline_eof; + } + + Diag(BufferEnd, DiagID) + << FixItHint::CreateInsertion(EndLoc, "\n"); + } BufferPtr = CurPtr; @@ -2430,14 +2529,19 @@ unsigned Lexer::isNextPPTokenLParen() { // Save state that can be changed while lexing so that we can restore it. const char *TmpBufferPtr = BufferPtr; bool inPPDirectiveMode = ParsingPreprocessorDirective; + bool atStartOfLine = IsAtStartOfLine; + bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; + bool leadingSpace = HasLeadingSpace; Token Tok; - Tok.startToken(); - LexTokenInternal(Tok); + Lex(Tok); // Restore state that may have changed. BufferPtr = TmpBufferPtr; ParsingPreprocessorDirective = inPPDirectiveMode; + HasLeadingSpace = leadingSpace; + IsAtStartOfLine = atStartOfLine; + IsAtPhysicalStartOfLine = atPhysicalStartOfLine; // Restore the lexer back to non-skipping mode. LexingRawMode = false; @@ -2626,6 +2730,10 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, StartPtr = CurPtr; } + // Don't apply C family restrictions to UCNs in assembly mode + if (LangOpts.AsmPreprocessor) + return CodePoint; + // C99 6.4.3p2: A universal character name shall not specify a character whose // short identifier is less than 00A0 other than 0024 ($), 0040 (@), or // 0060 (`), nor one in the range D800 through DFFF inclusive.) @@ -2670,19 +2778,22 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, return CodePoint; } -void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { +bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, + const char *CurPtr) { + static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars( + UnicodeWhitespaceCharRanges); if (!isLexingRawMode() && !PP->isPreprocessedOutput() && - isCharInSet(C, UnicodeWhitespaceChars)) { + UnicodeWhitespaceChars.contains(C)) { Diag(BufferPtr, diag::ext_unicode_whitespace) << makeCharRange(*this, BufferPtr, CurPtr); Result.setFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr)) - return; // KeepWhitespaceMode - - return LexTokenInternal(Result); + return true; } + return false; +} +bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) { if (!isLexingRawMode() && !ParsingPreprocessorDirective && !PP->isPreprocessedOutput()) { @@ -2711,22 +2822,59 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr)); BufferPtr = CurPtr; - return LexTokenInternal(Result); + return false; } // Otherwise, we have an explicit UCN or a character that's unlikely to show // up by accident. MIOpt.ReadToken(); FormTokenWithChars(Result, CurPtr, tok::unknown); + return true; } +void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { + IsAtStartOfLine = Result.isAtStartOfLine(); + HasLeadingSpace = Result.hasLeadingSpace(); + HasLeadingEmptyMacro = Result.hasLeadingEmptyMacro(); + // Note that this doesn't affect IsAtPhysicalStartOfLine. +} + +bool Lexer::Lex(Token &Result) { + // Start a new token. + Result.startToken(); + + // Set up misc whitespace flags for LexTokenInternal. + if (IsAtStartOfLine) { + Result.setFlag(Token::StartOfLine); + IsAtStartOfLine = false; + } + + if (HasLeadingSpace) { + Result.setFlag(Token::LeadingSpace); + HasLeadingSpace = false; + } + + if (HasLeadingEmptyMacro) { + Result.setFlag(Token::LeadingEmptyMacro); + HasLeadingEmptyMacro = false; + } + + bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; + IsAtPhysicalStartOfLine = false; + bool isRawLex = isLexingRawMode(); + (void) isRawLex; + bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); + // (After the LexTokenInternal call, the lexer might be destroyed.) + assert((returnedToken || !isRawLex) && "Raw lex must succeed"); + return returnedToken; +} /// LexTokenInternal - This implements a simple C family lexer. It is an /// extremely performance critical piece of code. This assumes that the buffer /// has a null character at the end of the file. This returns a preprocessing /// token, not a normal token, as such, it is an internal interface. It assumes /// that the Flags of result have been cleared before calling this. -void Lexer::LexTokenInternal(Token &Result) { +bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { LexNextToken: // New token, can't need cleaning yet. Result.clearFlag(Token::NeedsCleaning); @@ -2747,7 +2895,7 @@ LexNextToken: if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); // FIXME: The next token will not have LeadingSpace set. - return; + return true; } BufferPtr = CurPtr; @@ -2763,43 +2911,32 @@ LexNextToken: switch (Char) { case 0: // Null. // Found end of file? - if (CurPtr-1 == BufferEnd) { - // Read the PP instance variable into an automatic variable, because - // LexEndOfFile will often delete 'this'. - Preprocessor *PPCache = PP; - if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file. - return; // Got a token to return. - assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); - return PPCache->Lex(Result); - } + if (CurPtr-1 == BufferEnd) + return LexEndOfFile(Result, CurPtr-1); // Check if we are performing code completion. if (isCodeCompletionPoint(CurPtr-1)) { // Return the code-completion token. Result.startToken(); FormTokenWithChars(Result, CurPtr, tok::code_completion); - return; + return true; } if (!isLexingRawMode()) Diag(CurPtr-1, diag::null_in_file); Result.setFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr)) - return; // KeepWhitespaceMode + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + return true; // KeepWhitespaceMode - goto LexNextToken; // GCC isn't tail call eliminating. + // We know the lexer hasn't changed, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; case 26: // DOS & CP/M EOF: "^Z". // If we're in Microsoft extensions mode, treat this as end of file. - if (LangOpts.MicrosoftExt) { - // Read the PP instance variable into an automatic variable, because - // LexEndOfFile will often delete 'this'. - Preprocessor *PPCache = PP; - if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file. - return; // Got a token to return. - assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); - return PPCache->Lex(Result); - } + if (LangOpts.MicrosoftExt) + return LexEndOfFile(Result, CurPtr-1); + // If Microsoft extensions are disabled, this is just random garbage. Kind = tok::unknown; break; @@ -2818,6 +2955,7 @@ LexNextToken: // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; + IsAtPhysicalStartOfLine = true; Kind = tok::eod; break; @@ -2826,17 +2964,20 @@ LexNextToken: // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr)) - return; // KeepWhitespaceMode - goto LexNextToken; // GCC isn't tail call eliminating. + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; case ' ': case '\t': case '\f': case '\v': SkipHorizontalWhitespace: Result.setFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr)) - return; // KeepWhitespaceMode + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + return true; // KeepWhitespaceMode SkipIgnoredUnits: CurPtr = BufferPtr; @@ -2844,18 +2985,21 @@ LexNextToken: // If the next token is obviously a // or /* */ comment, skip it efficiently // too (without going through the big switch stmt). if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && - LangOpts.LineComment && !LangOpts.TraditionalCPP) { - if (SkipLineComment(Result, CurPtr+2)) - return; // There is a token to return. + LangOpts.LineComment && + (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { + if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + return true; // There is a token to return. goto SkipIgnoredUnits; } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { - if (SkipBlockComment(Result, CurPtr+2)) - return; // There is a token to return. + if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + return true; // There is a token to return. goto SkipIgnoredUnits; } else if (isHorizontalWhitespace(*CurPtr)) { goto SkipHorizontalWhitespace; } - goto LexNextToken; // GCC isn't tail call eliminating. + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; // C99 6.4.4.1: Integer Constants. // C99 6.4.4.2: Floating Constants. @@ -3141,14 +3285,16 @@ LexNextToken: // "foo". Check to see if the character after the second slash is a '*'. // If so, we will lex that as a "/" instead of the start of a comment. // However, we never do this if we are just preprocessing. - bool TreatAsComment = LangOpts.LineComment && !LangOpts.TraditionalCPP; + bool TreatAsComment = LangOpts.LineComment && + (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP); if (!TreatAsComment) if (!(PP && PP->isPreprocessedOutput())) TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; if (TreatAsComment) { - if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) - return; // There is a token to return. + if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), + TokAtPhysicalStartOfLine)) + return true; // There is a token to return. // It is common for the tokens immediately after a // comment to be // whitespace (indentation for the next line). Instead of going through @@ -3158,9 +3304,13 @@ LexNextToken: } if (Char == '*') { // /**/ comment. - if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) - return; // There is a token to return. - goto LexNextToken; // GCC isn't tail call eliminating. + if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), + TokAtPhysicalStartOfLine)) + return true; // There is a token to return. + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; } if (Char == '=') { @@ -3195,7 +3345,7 @@ LexNextToken: // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // FIXME: -fpreprocessed mode?? - if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) + if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) goto HandleDirective; Kind = tok::hash; @@ -3361,7 +3511,7 @@ LexNextToken: // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // FIXME: -fpreprocessed mode?? - if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) + if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) goto HandleDirective; Kind = tok::hash; @@ -3378,8 +3528,18 @@ LexNextToken: // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': - if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { + if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; + } + return LexUnicode(Result, CodePoint, CurPtr); + } Kind = tok::unknown; break; @@ -3400,8 +3560,17 @@ LexNextToken: (const UTF8 *)BufferEnd, &CodePoint, strictConversion); - if (Status == conversionOK) + if (Status == conversionOK) { + if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; + } return LexUnicode(Result, CodePoint, CurPtr); + } if (isLexingRawMode() || ParsingPreprocessorDirective || PP->isPreprocessedOutput()) { @@ -3416,6 +3585,9 @@ LexNextToken: Diag(CurPtr, diag::err_invalid_utf8); BufferPtr = CurPtr+1; + // We're pretending the character didn't exist, so just try again with + // this lexer. + // (We manually eliminate the tail call to avoid recursion.) goto LexNextToken; } } @@ -3425,7 +3597,7 @@ LexNextToken: // Update the location of token as well as BufferPtr. FormTokenWithChars(Result, CurPtr, Kind); - return; + return true; HandleDirective: // We parsed a # character and it's the start of a preprocessing directive. @@ -3433,18 +3605,12 @@ HandleDirective: FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); - // As an optimization, if the preprocessor didn't switch lexers, tail - // recurse. - if (PP->isCurrentLexer(this)) { - // Start a new token. If this is a #include or something, the PP may - // want us starting at the beginning of the line again. If so, set - // the StartOfLine flag and clear LeadingSpace. - if (IsAtStartOfLine) { - Result.setFlag(Token::StartOfLine); - Result.clearFlag(Token::LeadingSpace); - IsAtStartOfLine = false; - } - goto LexNextToken; // GCC isn't tail call eliminating. + if (PP->hadModuleLoaderFatalFailure()) { + // With a fatal failure in the module loader, we abort parsing. + assert(Result.is(tok::eof) && "Preprocessor did not set tok:eof"); + return true; } - return PP->Lex(Result); + + // We parsed the directive; lex a token with the new state. + return false; } diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 09f4a68..17c6bb3 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -157,7 +157,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, // Check for overflow. if (Overflow && Diags) // Too many digits to fit in Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, - diag::warn_hex_escape_too_large); + diag::err_hex_escape_too_large); break; } case '0': case '1': case '2': case '3': @@ -180,7 +180,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { if (Diags) Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, - diag::warn_octal_escape_too_large); + diag::err_octal_escape_too_large); ResultChar &= ~0U >> (32-CharWidth); } break; @@ -336,7 +336,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, return; } - assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) && + assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) && "only character widths of 1, 2, or 4 bytes supported"); (void)UcnLen; @@ -413,10 +413,12 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, /// decimal-constant integer-suffix /// octal-constant integer-suffix /// hexadecimal-constant integer-suffix +/// binary-literal integer-suffix [GNU, C++1y] /// user-defined-integer-literal: [C++11 lex.ext] /// decimal-literal ud-suffix /// octal-literal ud-suffix /// hexadecimal-literal ud-suffix +/// binary-literal ud-suffix [GNU, C++1y] /// decimal-constant: /// nonzero-digit /// decimal-constant digit @@ -428,6 +430,10 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, /// hexadecimal-constant hexadecimal-digit /// hexadecimal-prefix: one of /// 0x 0X +/// binary-literal: +/// 0b binary-digit +/// 0B binary-digit +/// binary-literal binary-digit /// integer-suffix: /// unsigned-suffix [long-suffix] /// unsigned-suffix [long-long-suffix] @@ -441,6 +447,9 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, /// 0 1 2 3 4 5 6 7 8 9 /// a b c d e f /// A B C D E F +/// binary-digit: +/// 0 +/// 1 /// unsigned-suffix: one of /// u U /// long-suffix: one of @@ -489,15 +498,19 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, hadError = true; return; } else if (*s == '.') { + checkSeparator(TokLoc, s, CSK_AfterDigits); s++; saw_period = true; + checkSeparator(TokLoc, s, CSK_BeforeDigits); s = SkipDigits(s); } if ((*s == 'e' || *s == 'E')) { // exponent + checkSeparator(TokLoc, s, CSK_AfterDigits); const char *Exponent = s; s++; saw_exponent = true; if (*s == '+' || *s == '-') s++; // sign + checkSeparator(TokLoc, s, CSK_BeforeDigits); const char *first_non_digit = SkipDigits(s); if (first_non_digit != s) { s = first_non_digit; @@ -511,10 +524,12 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, } SuffixBegin = s; + checkSeparator(TokLoc, s, CSK_AfterDigits); // Parse the suffix. At this point we can classify whether we have an FP or // integer constant. bool isFPConstant = isFloatingLiteral(); + const char *ImaginarySuffixLoc = 0; // Loop over all of the characters of the suffix. If we see something bad, // we break out of the loop. @@ -594,13 +609,15 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, break; } } + // "i", "if", and "il" are user-defined suffixes in C++1y. + if (PP.getLangOpts().CPlusPlus1y && *s == 'i') + break; // fall through. case 'j': case 'J': if (isImaginary) break; // Cannot be repeated. - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), - diag::ext_imaginary_constant); isImaginary = true; + ImaginarySuffixLoc = s; continue; // Success. } // If we reached here, there was an error or a ud-suffix. @@ -608,9 +625,17 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, } if (s != ThisTokEnd) { - if (PP.getLangOpts().CPlusPlus11 && s == SuffixBegin && *s == '_') { - // We have a ud-suffix! By C++11 [lex.ext]p10, ud-suffixes not starting - // with an '_' are ill-formed. + if (isValidUDSuffix(PP.getLangOpts(), + StringRef(SuffixBegin, ThisTokEnd - SuffixBegin))) { + // Any suffix pieces we might have parsed are actually part of the + // ud-suffix. + isLong = false; + isUnsigned = false; + isLongLong = false; + isFloat = false; + isImaginary = false; + isMicrosoftInteger = false; + saw_ud_suffix = true; return; } @@ -623,6 +648,53 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, hadError = true; return; } + + if (isImaginary) { + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, + ImaginarySuffixLoc - ThisTokBegin), + diag::ext_imaginary_constant); + } +} + +/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved +/// suffixes as ud-suffixes, because the diagnostic experience is better if we +/// treat it as an invalid suffix. +bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts, + StringRef Suffix) { + if (!LangOpts.CPlusPlus11 || Suffix.empty()) + return false; + + // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid. + if (Suffix[0] == '_') + return true; + + // In C++11, there are no library suffixes. + if (!LangOpts.CPlusPlus1y) + return false; + + // In C++1y, "s", "h", "min", "ms", "us", and "ns" are used in the library. + // Per tweaked N3660, "il", "i", and "if" are also used in the library. + return llvm::StringSwitch<bool>(Suffix) + .Cases("h", "min", "s", true) + .Cases("ms", "us", "ns", true) + .Cases("il", "i", "if", true) + .Default(false); +} + +void NumericLiteralParser::checkSeparator(SourceLocation TokLoc, + const char *Pos, + CheckSeparatorKind IsAfterDigits) { + if (IsAfterDigits == CSK_AfterDigits) { + if (Pos == ThisTokBegin) + return; + --Pos; + } else if (Pos == ThisTokEnd) + return; + + if (isDigitSeparator(*Pos)) + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin), + diag::err_digit_separator_not_between_digits) + << IsAfterDigits; } /// ParseNumberStartingWithZero - This method is called when the first character @@ -634,8 +706,11 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { assert(s[0] == '0' && "Invalid method call"); s++; + int c1 = s[0]; + int c2 = s[1]; + // Handle a hex number like 0x1234. - if ((*s == 'x' || *s == 'X') && (isHexDigit(s[1]) || s[1] == '.')) { + if ((c1 == 'x' || c1 == 'X') && (isHexDigit(c2) || c2 == '.')) { s++; radix = 16; DigitsBegin = s; @@ -685,7 +760,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { } // Handle simple binary numbers 0b01010 - if (*s == 'b' || *s == 'B') { + if ((c1 == 'b' || c1 == 'B') && (c2 == '0' || c2 == '1')) { // 0b101010 is a C++1y / GCC extension. PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus1y @@ -789,7 +864,8 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { if (alwaysFitsInto64Bits(radix, NumDigits)) { uint64_t N = 0; for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr) - N = N * radix + llvm::hexDigitValue(*Ptr); + if (!isDigitSeparator(*Ptr)) + N = N * radix + llvm::hexDigitValue(*Ptr); // This will truncate the value to Val's input width. Simply check // for overflow by comparing. @@ -806,6 +882,11 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { bool OverflowOccurred = false; while (Ptr < SuffixBegin) { + if (isDigitSeparator(*Ptr)) { + ++Ptr; + continue; + } + unsigned C = llvm::hexDigitValue(*Ptr++); // If this letter is out of bound for this radix, reject it. @@ -834,8 +915,17 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { using llvm::APFloat; unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin); - return Result.convertFromString(StringRef(ThisTokBegin, n), - APFloat::rmNearestTiesToEven); + + llvm::SmallString<16> Buffer; + StringRef Str(ThisTokBegin, n); + if (Str.find('\'') != StringRef::npos) { + Buffer.reserve(n); + std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer), + &isDigitSeparator); + Str = Buffer; + } + + return Result.convertFromString(Str, APFloat::rmNearestTiesToEven); } @@ -921,8 +1011,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, assert(PP.getTargetInfo().getWCharWidth() <= 64 && "Assumes sizeof(wchar) on target is <= 64"); - SmallVector<uint32_t,4> codepoint_buffer; - codepoint_buffer.resize(end-begin); + SmallVector<uint32_t, 4> codepoint_buffer; + codepoint_buffer.resize(end - begin); uint32_t *buffer_begin = &codepoint_buffer.front(); uint32_t *buffer_end = buffer_begin + codepoint_buffer.size(); @@ -931,7 +1021,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, // by this implementation. uint32_t largest_character_for_kind; if (tok::wide_char_constant == Kind) { - largest_character_for_kind = 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth()); + largest_character_for_kind = + 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth()); } else if (tok::utf16_char_constant == Kind) { largest_character_for_kind = 0xFFFF; } else if (tok::utf32_char_constant == Kind) { @@ -940,7 +1031,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, largest_character_for_kind = 0x7Fu; } - while (begin!=end) { + while (begin != end) { // Is this a span of non-escape characters? if (begin[0] != '\\') { char const *start = begin; @@ -951,12 +1042,12 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, char const *tmp_in_start = start; uint32_t *tmp_out_start = buffer_begin; ConversionResult res = - ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start), - reinterpret_cast<UTF8 const *>(begin), - &buffer_begin,buffer_end,strictConversion); - if (res!=conversionOK) { - // If we see bad encoding for unprefixed character literals, warn and - // simply copy the byte values, for compatibility with gcc and + ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start), + reinterpret_cast<UTF8 const *>(begin), + &buffer_begin, buffer_end, strictConversion); + if (res != conversionOK) { + // If we see bad encoding for unprefixed character literals, warn and + // simply copy the byte values, for compatibility with gcc and // older versions of clang. bool NoErrorOnBadEncoding = isAscii(); unsigned Msg = diag::err_bad_character_encoding; @@ -966,13 +1057,13 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, if (NoErrorOnBadEncoding) { start = tmp_in_start; buffer_begin = tmp_out_start; - for ( ; start != begin; ++start, ++buffer_begin) + for (; start != begin; ++start, ++buffer_begin) *buffer_begin = static_cast<uint8_t>(*start); } else { HadError = true; } } else { - for (; tmp_out_start <buffer_begin; ++tmp_out_start) { + for (; tmp_out_start < buffer_begin; ++tmp_out_start) { if (*tmp_out_start > largest_character_for_kind) { HadError = true; PP.Diag(Loc, diag::err_character_too_large); @@ -982,14 +1073,12 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, continue; } - // Is this a Universal Character Name excape? + // Is this a Universal Character Name escape? if (begin[1] == 'u' || begin[1] == 'U') { unsigned short UcnLen = 0; if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen, FullSourceLoc(Loc, PP.getSourceManager()), - &PP.getDiagnostics(), PP.getLangOpts(), - true)) - { + &PP.getDiagnostics(), PP.getLangOpts(), true)) { HadError = true; } else if (*buffer_begin > largest_character_for_kind) { HadError = true; @@ -1007,7 +1096,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, *buffer_begin++ = result; } - unsigned NumCharsSoFar = buffer_begin-&codepoint_buffer.front(); + unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front(); if (NumCharsSoFar > 1) { if (isWide()) @@ -1019,8 +1108,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, else PP.Diag(Loc, diag::err_multichar_utf_character_literal); IsMultiChar = true; - } else + } else { IsMultiChar = false; + } llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0); @@ -1029,7 +1119,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, bool multi_char_too_long = false; if (isAscii() && isMultiChar()) { LitVal = 0; - for (size_t i=0;i<NumCharsSoFar;++i) { + for (size_t i = 0; i < NumCharsSoFar; ++i) { // check for enough leading zeros to shift into multi_char_too_long |= (LitVal.countLeadingZeros() < 8); LitVal <<= 8; @@ -1041,7 +1131,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, } if (!HadError && multi_char_too_long) { - PP.Diag(Loc,diag::warn_char_constant_too_large); + PP.Diag(Loc, diag::warn_char_constant_too_large); } // Transfer the value from APInt to uint64_t diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp index 3e7a44c..f4dfa12 100644 --- a/lib/Lex/ModuleMap.cpp +++ b/lib/Lex/ModuleMap.cpp @@ -27,7 +27,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" -#include "llvm/Support/PathV2.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include <stdlib.h> #if defined(LLVM_ON_UNIX) @@ -83,18 +83,18 @@ Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod, return Context; } -ModuleMap::ModuleMap(FileManager &FileMgr, DiagnosticConsumer &DC, +ModuleMap::ModuleMap(SourceManager &SourceMgr, DiagnosticConsumer &DC, const LangOptions &LangOpts, const TargetInfo *Target, HeaderSearch &HeaderInfo) - : LangOpts(LangOpts), Target(Target), HeaderInfo(HeaderInfo), - BuiltinIncludeDir(0) -{ + : SourceMgr(SourceMgr), LangOpts(LangOpts), Target(Target), + HeaderInfo(HeaderInfo), BuiltinIncludeDir(0), CompilingModule(0), + SourceModule(0) { IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs); Diags = IntrusiveRefCntPtr<DiagnosticsEngine>( new DiagnosticsEngine(DiagIDs, new DiagnosticOptions)); Diags->setClient(new ForwardingDiagnosticConsumer(DC), /*ShouldOwnClient=*/true); - SourceMgr = new SourceManager(*Diags, FileMgr); + Diags->setSourceManager(&SourceMgr); } ModuleMap::~ModuleMap() { @@ -103,8 +103,6 @@ ModuleMap::~ModuleMap() { I != IEnd; ++I) { delete I->getValue(); } - - delete SourceMgr; } void ModuleMap::setTarget(const TargetInfo &Target) { @@ -168,14 +166,41 @@ static bool isBuiltinHeader(StringRef FileName) { .Default(false); } -Module *ModuleMap::findModuleForHeader(const FileEntry *File) { +ModuleMap::KnownHeader +ModuleMap::findModuleForHeader(const FileEntry *File, + Module *RequestingModule) { HeadersMap::iterator Known = Headers.find(File); if (Known != Headers.end()) { - // If a header is not available, don't report that it maps to anything. - if (!Known->second.isAvailable()) - return 0; + ModuleMap::KnownHeader Result = KnownHeader(); + + // Iterate over all modules that 'File' is part of to find the best fit. + for (SmallVectorImpl<KnownHeader>::iterator I = Known->second.begin(), + E = Known->second.end(); + I != E; ++I) { + // Cannot use a module if the header is excluded or unavailable in it. + if (I->getRole() == ModuleMap::ExcludedHeader || + !I->getModule()->isAvailable()) + continue; - return Known->second.getModule(); + // If 'File' is part of 'RequestingModule', 'RequestingModule' is the + // module we are looking for. + if (I->getModule() == RequestingModule) + return *I; + + // If uses need to be specified explicitly, we are only allowed to return + // modules that are explicitly used by the requesting module. + if (RequestingModule && LangOpts.ModulesDeclUse && + std::find(RequestingModule->DirectUses.begin(), + RequestingModule->DirectUses.end(), + I->getModule()) == RequestingModule->DirectUses.end()) + continue; + Result = *I; + // If 'File' is a public header of this module, this is as good as we + // are going to get. + if (I->getRole() == ModuleMap::NormalHeader) + break; + } + return Result; } // If we've found a builtin header within Clang's builtin include directory, @@ -183,18 +208,11 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { // specific module (e.g., in /usr/include). if (File->getDir() == BuiltinIncludeDir && isBuiltinHeader(llvm::sys::path::filename(File->getName()))) { - SmallVector<Module *, 4> AllModules; - HeaderInfo.collectAllModules(AllModules); + HeaderInfo.loadTopLevelSystemModules(); // Check again. - Known = Headers.find(File); - if (Known != Headers.end()) { - // If a header is not available, don't report that it maps to anything. - if (!Known->second.isAvailable()) - return 0; - - return Known->second.getModule(); - } + if (Headers.find(File) != Headers.end()) + return findModuleForHeader(File, RequestingModule); } const DirectoryEntry *Dir = File->getDir(); @@ -204,7 +222,7 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { // frameworks moving from top-level frameworks to embedded frameworks tend // to be symlinked from the top-level location to the embedded location, // and we need to resolve lookups as if we had found the embedded location. - StringRef DirName = SourceMgr->getFileManager().getCanonicalName(Dir); + StringRef DirName = SourceMgr.getFileManager().getCanonicalName(Dir); // Keep walking up the directory hierarchy, looking for a directory with // an umbrella header. @@ -263,14 +281,14 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { UmbrellaDirs[SkippedDirs[I]] = Result; } - Headers[File] = KnownHeader(Result, /*Excluded=*/false); + Headers[File].push_back(KnownHeader(Result, NormalHeader)); // If a header corresponds to an unavailable module, don't report // that it maps to anything. if (!Result->isAvailable()) - return 0; + return KnownHeader(); - return Result; + return Headers[File].back(); } SkippedDirs.push_back(Dir); @@ -281,16 +299,24 @@ Module *ModuleMap::findModuleForHeader(const FileEntry *File) { break; // Resolve the parent path to a directory entry. - Dir = SourceMgr->getFileManager().getDirectory(DirName); + Dir = SourceMgr.getFileManager().getDirectory(DirName); } while (Dir); - return 0; + return KnownHeader(); } bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const { HeadersMap::const_iterator Known = Headers.find(Header); - if (Known != Headers.end()) - return !Known->second.isAvailable(); + if (Known != Headers.end()) { + for (SmallVectorImpl<KnownHeader>::const_iterator + I = Known->second.begin(), + E = Known->second.end(); + I != E; ++I) { + if (I->isAvailable()) + return false; + } + return true; + } const DirectoryEntry *Dir = Header->getDir(); SmallVector<const DirectoryEntry *, 2> SkippedDirs; @@ -347,7 +373,7 @@ bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const { break; // Resolve the parent path to a directory entry. - Dir = SourceMgr->getFileManager().getDirectory(DirName); + Dir = SourceMgr.getFileManager().getDirectory(DirName); } while (Dir); return false; @@ -388,8 +414,17 @@ ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, // Create a new module with this name. Module *Result = new Module(Name, SourceLocation(), Parent, IsFramework, IsExplicit); - if (!Parent) + if (LangOpts.CurrentModule == Name) { + SourceModule = Result; + SourceModuleName = Name; + } + if (!Parent) { Modules[Name] = Result; + if (!LangOpts.CurrentModule.empty() && !CompilingModule && + Name == LangOpts.CurrentModule) { + CompilingModule = Result; + } + } return std::make_pair(Result, true); } @@ -443,7 +478,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, if (Module *Mod = lookupModuleQualified(ModuleName, Parent)) return Mod; - FileManager &FileMgr = SourceMgr->getFileManager(); + FileManager &FileMgr = SourceMgr.getFileManager(); // If the framework has a parent path from which we're allowed to infer // a framework module, do so. @@ -455,7 +490,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, // top-level framework, and we need to infer as if we were naming the // top-level framework. StringRef FrameworkDirName - = SourceMgr->getFileManager().getCanonicalName(FrameworkDir); + = SourceMgr.getFileManager().getCanonicalName(FrameworkDir); bool canInfer = false; if (llvm::sys::path::has_parent_path(FrameworkDirName)) { @@ -472,7 +507,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, SmallString<128> ModMapPath = Parent; llvm::sys::path::append(ModMapPath, "module.map"); if (const FileEntry *ModMapFile = FileMgr.getFile(ModMapPath)) { - parseModuleMapFile(ModMapFile); + parseModuleMapFile(ModMapFile, IsSystem); inferred = InferredDirectories.find(ParentDir); } @@ -503,8 +538,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, // Look for an umbrella header. SmallString<128> UmbrellaName = StringRef(FrameworkDir->getName()); - llvm::sys::path::append(UmbrellaName, "Headers"); - llvm::sys::path::append(UmbrellaName, ModuleName + ".h"); + llvm::sys::path::append(UmbrellaName, "Headers", ModuleName + ".h"); const FileEntry *UmbrellaHeader = FileMgr.getFile(UmbrellaName); // FIXME: If there's no umbrella header, we could probably scan the @@ -515,6 +549,10 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, Module *Result = new Module(ModuleName, SourceLocation(), Parent, /*IsFramework=*/true, /*IsExplicit=*/false); + if (LangOpts.CurrentModule == ModuleName) { + SourceModule = Result; + SourceModuleName = ModuleName; + } if (IsSystem) Result->IsSystem = IsSystem; @@ -523,7 +561,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, // umbrella header "umbrella-header-name" Result->Umbrella = UmbrellaHeader; - Headers[UmbrellaHeader] = KnownHeader(Result, /*Excluded=*/false); + Headers[UmbrellaHeader].push_back(KnownHeader(Result, NormalHeader)); UmbrellaDirs[UmbrellaHeader->getDir()] = Result; // export * @@ -538,11 +576,9 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, SmallString<128> SubframeworksDirName = StringRef(FrameworkDir->getName()); llvm::sys::path::append(SubframeworksDirName, "Frameworks"); - SmallString<128> SubframeworksDirNameNative; - llvm::sys::path::native(SubframeworksDirName.str(), - SubframeworksDirNameNative); + llvm::sys::path::native(SubframeworksDirName); for (llvm::sys::fs::directory_iterator - Dir(SubframeworksDirNameNative.str(), EC), DirEnd; + Dir(SubframeworksDirName.str(), EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { if (!StringRef(Dir->path()).endswith(".framework")) continue; @@ -589,7 +625,7 @@ ModuleMap::inferFrameworkModule(StringRef ModuleName, } void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader){ - Headers[UmbrellaHeader] = KnownHeader(Mod, /*Excluded=*/false); + Headers[UmbrellaHeader].push_back(KnownHeader(Mod, NormalHeader)); Mod->Umbrella = UmbrellaHeader; UmbrellaDirs[UmbrellaHeader->getDir()] = Mod; } @@ -600,23 +636,27 @@ void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir) { } void ModuleMap::addHeader(Module *Mod, const FileEntry *Header, - bool Excluded) { - if (Excluded) { + ModuleHeaderRole Role) { + if (Role == ExcludedHeader) { Mod->ExcludedHeaders.push_back(Header); } else { - Mod->Headers.push_back(Header); - HeaderInfo.MarkFileModuleHeader(Header); + if (Role == PrivateHeader) + Mod->PrivateHeaders.push_back(Header); + else + Mod->NormalHeaders.push_back(Header); + bool isCompilingModuleHeader = Mod->getTopLevelModule() == CompilingModule; + HeaderInfo.MarkFileModuleHeader(Header, Role, isCompilingModuleHeader); } - Headers[Header] = KnownHeader(Mod, Excluded); + Headers[Header].push_back(KnownHeader(Mod, Role)); } const FileEntry * ModuleMap::getContainingModuleMapFile(Module *Module) const { - if (Module->DefinitionLoc.isInvalid() || !SourceMgr) + if (Module->DefinitionLoc.isInvalid()) return 0; - return SourceMgr->getFileEntryForID( - SourceMgr->getFileID(Module->DefinitionLoc)); + return SourceMgr.getFileEntryForID( + SourceMgr.getFileID(Module->DefinitionLoc)); } void ModuleMap::dump() { @@ -629,8 +669,15 @@ void ModuleMap::dump() { llvm::errs() << "Headers:"; for (HeadersMap::iterator H = Headers.begin(), HEnd = Headers.end(); H != HEnd; ++H) { - llvm::errs() << " \"" << H->first->getName() << "\" -> " - << H->second.getModule()->getFullModuleName() << "\n"; + llvm::errs() << " \"" << H->first->getName() << "\" -> "; + for (SmallVectorImpl<KnownHeader>::const_iterator I = H->second.begin(), + E = H->second.end(); + I != E; ++I) { + if (I != H->second.begin()) + llvm::errs() << ","; + llvm::errs() << I->getModule()->getFullModuleName(); + } + llvm::errs() << "\n"; } } @@ -648,6 +695,20 @@ bool ModuleMap::resolveExports(Module *Mod, bool Complain) { return HadError; } +bool ModuleMap::resolveUses(Module *Mod, bool Complain) { + bool HadError = false; + for (unsigned I = 0, N = Mod->UnresolvedDirectUses.size(); I != N; ++I) { + Module *DirectUse = + resolveModuleId(Mod->UnresolvedDirectUses[I], Mod, Complain); + if (DirectUse) + Mod->DirectUses.push_back(DirectUse); + else + HadError = true; + } + Mod->UnresolvedDirectUses.clear(); + return HadError; +} + bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) { bool HadError = false; for (unsigned I = 0, N = Mod->UnresolvedConflicts.size(); I != N; ++I) { @@ -683,7 +744,7 @@ Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) { while (const FileEntry *ExpansionFile = SrcMgr.getFileEntryForID(ExpansionFileID)) { // Find the module that owns this header (if any). - if (Module *Mod = findModuleForHeader(ExpansionFile)) + if (Module *Mod = findModuleForHeader(ExpansionFile).getModule()) return Mod; // No module owns this header, so look up the inclusion chain to see if @@ -712,14 +773,18 @@ namespace clang { EndOfFile, HeaderKeyword, Identifier, + Exclaim, ExcludeKeyword, ExplicitKeyword, ExportKeyword, + ExternKeyword, FrameworkKeyword, LinkKeyword, ModuleKeyword, Period, + PrivateKeyword, UmbrellaKeyword, + UseKeyword, RequiresKeyword, Star, StringLiteral, @@ -780,6 +845,9 @@ namespace clang { /// \brief The directory containing Clang-supplied headers. const DirectoryEntry *BuiltinIncludeDir; + /// \brief Whether this module map is in a system header directory. + bool IsSystem; + /// \brief Whether an error occurred. bool HadError; @@ -803,10 +871,13 @@ namespace clang { typedef SmallVector<std::pair<std::string, SourceLocation>, 2> ModuleId; bool parseModuleId(ModuleId &Id); void parseModuleDecl(); + void parseExternModuleDecl(); void parseRequiresDecl(); - void parseHeaderDecl(SourceLocation UmbrellaLoc, SourceLocation ExcludeLoc); + void parseHeaderDecl(clang::MMToken::TokenKind, + SourceLocation LeadingLoc); void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc); void parseExportDecl(); + void parseUseDecl(); void parseLinkDecl(); void parseConfigMacros(); void parseConflict(); @@ -821,10 +892,11 @@ namespace clang { DiagnosticsEngine &Diags, ModuleMap &Map, const DirectoryEntry *Directory, - const DirectoryEntry *BuiltinIncludeDir) + const DirectoryEntry *BuiltinIncludeDir, + bool IsSystem) : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), - Directory(Directory), BuiltinIncludeDir(BuiltinIncludeDir), - HadError(false), ActiveModule(0) + Directory(Directory), BuiltinIncludeDir(BuiltinIncludeDir), + IsSystem(IsSystem), HadError(false), ActiveModule(0) { Tok.clear(); consumeToken(); @@ -852,12 +924,15 @@ retry: .Case("exclude", MMToken::ExcludeKeyword) .Case("explicit", MMToken::ExplicitKeyword) .Case("export", MMToken::ExportKeyword) + .Case("extern", MMToken::ExternKeyword) .Case("framework", MMToken::FrameworkKeyword) .Case("header", MMToken::HeaderKeyword) .Case("link", MMToken::LinkKeyword) .Case("module", MMToken::ModuleKeyword) + .Case("private", MMToken::PrivateKeyword) .Case("requires", MMToken::RequiresKeyword) .Case("umbrella", MMToken::UmbrellaKeyword) + .Case("use", MMToken::UseKeyword) .Default(MMToken::Identifier); break; @@ -893,6 +968,10 @@ retry: Tok.Kind = MMToken::Star; break; + case tok::exclaim: + Tok.Kind = MMToken::Exclaim; + break; + case tok::string_literal: { if (LToken.hasUDSuffix()) { Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl); @@ -1019,6 +1098,7 @@ namespace { /// \brief Parse a module declaration. /// /// module-declaration: +/// 'extern' 'module' module-id string-literal /// 'explicit'[opt] 'framework'[opt] 'module' module-id attributes[opt] /// { module-member* } /// @@ -1034,7 +1114,12 @@ namespace { /// inferred-submodule-declaration void ModuleMapParser::parseModuleDecl() { assert(Tok.is(MMToken::ExplicitKeyword) || Tok.is(MMToken::ModuleKeyword) || - Tok.is(MMToken::FrameworkKeyword)); + Tok.is(MMToken::FrameworkKeyword) || Tok.is(MMToken::ExternKeyword)); + if (Tok.is(MMToken::ExternKeyword)) { + parseExternModuleDecl(); + return; + } + // Parse 'explicit' or 'framework' keyword, if present. SourceLocation ExplicitLoc; bool Explicit = false; @@ -1159,7 +1244,7 @@ void ModuleMapParser::parseModuleDecl() { ActiveModule = Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit).first; ActiveModule->DefinitionLoc = ModuleNameLoc; - if (Attrs.IsSystem) + if (Attrs.IsSystem || IsSystem) ActiveModule->IsSystem = true; bool Done = false; @@ -1179,14 +1264,19 @@ void ModuleMapParser::parseModuleDecl() { break; case MMToken::ExplicitKeyword: + case MMToken::ExternKeyword: case MMToken::FrameworkKeyword: case MMToken::ModuleKeyword: parseModuleDecl(); break; - + case MMToken::ExportKeyword: parseExportDecl(); break; + + case MMToken::UseKeyword: + parseUseDecl(); + break; case MMToken::RequiresKeyword: parseRequiresDecl(); @@ -1195,7 +1285,7 @@ void ModuleMapParser::parseModuleDecl() { case MMToken::UmbrellaKeyword: { SourceLocation UmbrellaLoc = consumeToken(); if (Tok.is(MMToken::HeaderKeyword)) - parseHeaderDecl(UmbrellaLoc, SourceLocation()); + parseHeaderDecl(MMToken::UmbrellaKeyword, UmbrellaLoc); else parseUmbrellaDirDecl(UmbrellaLoc); break; @@ -1204,7 +1294,7 @@ void ModuleMapParser::parseModuleDecl() { case MMToken::ExcludeKeyword: { SourceLocation ExcludeLoc = consumeToken(); if (Tok.is(MMToken::HeaderKeyword)) { - parseHeaderDecl(SourceLocation(), ExcludeLoc); + parseHeaderDecl(MMToken::ExcludeKeyword, ExcludeLoc); } else { Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) << "exclude"; @@ -1212,8 +1302,19 @@ void ModuleMapParser::parseModuleDecl() { break; } + case MMToken::PrivateKeyword: { + SourceLocation PrivateLoc = consumeToken(); + if (Tok.is(MMToken::HeaderKeyword)) { + parseHeaderDecl(MMToken::PrivateKeyword, PrivateLoc); + } else { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) + << "private"; + } + break; + } + case MMToken::HeaderKeyword: - parseHeaderDecl(SourceLocation(), SourceLocation()); + parseHeaderDecl(MMToken::HeaderKeyword, SourceLocation()); break; case MMToken::LinkKeyword: @@ -1246,14 +1347,61 @@ void ModuleMapParser::parseModuleDecl() { ActiveModule = PreviousActiveModule; } +/// \brief Parse an extern module declaration. +/// +/// extern module-declaration: +/// 'extern' 'module' module-id string-literal +void ModuleMapParser::parseExternModuleDecl() { + assert(Tok.is(MMToken::ExternKeyword)); + consumeToken(); // 'extern' keyword + + // Parse 'module' keyword. + if (!Tok.is(MMToken::ModuleKeyword)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module); + consumeToken(); + HadError = true; + return; + } + consumeToken(); // 'module' keyword + + // Parse the module name. + ModuleId Id; + if (parseModuleId(Id)) { + HadError = true; + return; + } + + // Parse the referenced module map file name. + if (!Tok.is(MMToken::StringLiteral)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_mmap_file); + HadError = true; + return; + } + std::string FileName = Tok.getString(); + consumeToken(); // filename + + StringRef FileNameRef = FileName; + SmallString<128> ModuleMapFileName; + if (llvm::sys::path::is_relative(FileNameRef)) { + ModuleMapFileName += Directory->getName(); + llvm::sys::path::append(ModuleMapFileName, FileName); + FileNameRef = ModuleMapFileName.str(); + } + if (const FileEntry *File = SourceMgr.getFileManager().getFile(FileNameRef)) + Map.parseModuleMapFile(File, /*IsSystem=*/false); +} + /// \brief Parse a requires declaration. /// /// requires-declaration: /// 'requires' feature-list /// /// feature-list: -/// identifier ',' feature-list -/// identifier +/// feature ',' feature-list +/// feature +/// +/// feature: +/// '!'[opt] identifier void ModuleMapParser::parseRequiresDecl() { assert(Tok.is(MMToken::RequiresKeyword)); @@ -1262,6 +1410,12 @@ void ModuleMapParser::parseRequiresDecl() { // Parse the feature-list. do { + bool RequiredState = true; + if (Tok.is(MMToken::Exclaim)) { + RequiredState = false; + consumeToken(); + } + if (!Tok.is(MMToken::Identifier)) { Diags.Report(Tok.getLocation(), diag::err_mmap_expected_feature); HadError = true; @@ -1273,7 +1427,8 @@ void ModuleMapParser::parseRequiresDecl() { consumeToken(); // Add this feature. - ActiveModule->addRequirement(Feature, Map.LangOpts, *Map.Target); + ActiveModule->addRequirement(Feature, RequiredState, + Map.LangOpts, *Map.Target); if (!Tok.is(MMToken::Comma)) break; @@ -1298,10 +1453,8 @@ static void appendSubframeworkPaths(Module *Mod, return; // Add Frameworks/Name.framework for each subframework. - for (unsigned I = Paths.size() - 1; I != 0; --I) { - llvm::sys::path::append(Path, "Frameworks"); - llvm::sys::path::append(Path, Paths[I-1] + ".framework"); - } + for (unsigned I = Paths.size() - 1; I != 0; --I) + llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework"); } /// \brief Parse a header declaration. @@ -1309,14 +1462,11 @@ static void appendSubframeworkPaths(Module *Mod, /// header-declaration: /// 'umbrella'[opt] 'header' string-literal /// 'exclude'[opt] 'header' string-literal -void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc, - SourceLocation ExcludeLoc) { +void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, + SourceLocation LeadingLoc) { assert(Tok.is(MMToken::HeaderKeyword)); consumeToken(); - bool Umbrella = UmbrellaLoc.isValid(); - bool Exclude = ExcludeLoc.isValid(); - assert(!(Umbrella && Exclude) && "Cannot have both 'umbrella' and 'exclude'"); // Parse the header name. if (!Tok.is(MMToken::StringLiteral)) { Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) @@ -1328,7 +1478,7 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc, SourceLocation FileNameLoc = consumeToken(); // Check whether we already have an umbrella. - if (Umbrella && ActiveModule->Umbrella) { + if (LeadingToken == MMToken::UmbrellaKeyword && ActiveModule->Umbrella) { Diags.Report(FileNameLoc, diag::err_mmap_umbrella_clash) << ActiveModule->getFullModuleName(); HadError = true; @@ -1355,15 +1505,13 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc, appendSubframeworkPaths(ActiveModule, PathName); // Check whether this file is in the public headers. - llvm::sys::path::append(PathName, "Headers"); - llvm::sys::path::append(PathName, FileName); + llvm::sys::path::append(PathName, "Headers", FileName); File = SourceMgr.getFileManager().getFile(PathName); if (!File) { // Check whether this file is in the private headers. PathName.resize(PathLength); - llvm::sys::path::append(PathName, "PrivateHeaders"); - llvm::sys::path::append(PathName, FileName); + llvm::sys::path::append(PathName, "PrivateHeaders", FileName); File = SourceMgr.getFileManager().getFile(PathName); } } else { @@ -1374,8 +1522,9 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc, // If this is a system module with a top-level header, this header // may have a counterpart (or replacement) in the set of headers // supplied by Clang. Find that builtin header. - if (ActiveModule->IsSystem && !Umbrella && BuiltinIncludeDir && - BuiltinIncludeDir != Directory && isBuiltinHeader(FileName)) { + if (ActiveModule->IsSystem && LeadingToken != MMToken::UmbrellaKeyword && + BuiltinIncludeDir && BuiltinIncludeDir != Directory && + isBuiltinHeader(FileName)) { SmallString<128> BuiltinPathName(BuiltinIncludeDir->getName()); llvm::sys::path::append(BuiltinPathName, FileName); BuiltinFile = SourceMgr.getFileManager().getFile(BuiltinPathName); @@ -1394,14 +1543,10 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc, // FIXME: We shouldn't be eagerly stat'ing every file named in a module map. // Come up with a lazy way to do this. if (File) { - if (ModuleMap::KnownHeader OwningModule = Map.Headers[File]) { - Diags.Report(FileNameLoc, diag::err_mmap_header_conflict) - << FileName << OwningModule.getModule()->getFullModuleName(); - HadError = true; - } else if (Umbrella) { + if (LeadingToken == MMToken::UmbrellaKeyword) { const DirectoryEntry *UmbrellaDir = File->getDir(); if (Module *UmbrellaModule = Map.UmbrellaDirs[UmbrellaDir]) { - Diags.Report(UmbrellaLoc, diag::err_mmap_umbrella_clash) + Diags.Report(LeadingLoc, diag::err_mmap_umbrella_clash) << UmbrellaModule->getFullModuleName(); HadError = true; } else { @@ -1410,17 +1555,25 @@ void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc, } } else { // Record this header. - Map.addHeader(ActiveModule, File, Exclude); + ModuleMap::ModuleHeaderRole Role = ModuleMap::NormalHeader; + if (LeadingToken == MMToken::ExcludeKeyword) + Role = ModuleMap::ExcludedHeader; + else if (LeadingToken == MMToken::PrivateKeyword) + Role = ModuleMap::PrivateHeader; + else + assert(LeadingToken == MMToken::HeaderKeyword); + + Map.addHeader(ActiveModule, File, Role); // If there is a builtin counterpart to this file, add it now. if (BuiltinFile) - Map.addHeader(ActiveModule, BuiltinFile, Exclude); + Map.addHeader(ActiveModule, BuiltinFile, Role); } - } else if (!Exclude) { + } else if (LeadingToken != MMToken::ExcludeKeyword) { // Ignore excluded header files. They're optional anyway. Diags.Report(FileNameLoc, diag::err_mmap_header_not_found) - << Umbrella << FileName; + << (LeadingToken == MMToken::UmbrellaKeyword) << FileName; HadError = true; } } @@ -1514,7 +1667,7 @@ void ModuleMapParser::parseExportDecl() { break; } - Diags.Report(Tok.getLocation(), diag::err_mmap_export_module_id); + Diags.Report(Tok.getLocation(), diag::err_mmap_module_id); HadError = true; return; } while (true); @@ -1525,6 +1678,38 @@ void ModuleMapParser::parseExportDecl() { ActiveModule->UnresolvedExports.push_back(Unresolved); } +/// \brief Parse a module uses declaration. +/// +/// uses-declaration: +/// 'uses' wildcard-module-id +void ModuleMapParser::parseUseDecl() { + assert(Tok.is(MMToken::UseKeyword)); + consumeToken(); + // Parse the module-id. + ModuleId ParsedModuleId; + + do { + if (Tok.is(MMToken::Identifier)) { + ParsedModuleId.push_back( + std::make_pair(Tok.getString(), Tok.getLocation())); + consumeToken(); + + if (Tok.is(MMToken::Period)) { + consumeToken(); + continue; + } + + break; + } + + Diags.Report(Tok.getLocation(), diag::err_mmap_module_id); + HadError = true; + return; + } while (true); + + ActiveModule->UnresolvedDirectUses.push_back(ParsedModuleId); +} + /// \brief Parse a link declaration. /// /// module-declaration: @@ -1787,6 +1972,7 @@ void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) { case MMToken::ExplicitKeyword: case MMToken::ModuleKeyword: case MMToken::HeaderKeyword: + case MMToken::PrivateKeyword: case MMToken::UmbrellaKeyword: default: Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member) @@ -1897,6 +2083,7 @@ bool ModuleMapParser::parseModuleMapFile() { return HadError; case MMToken::ExplicitKeyword: + case MMToken::ExternKeyword: case MMToken::ModuleKeyword: case MMToken::FrameworkKeyword: parseModuleDecl(); @@ -1905,6 +2092,7 @@ bool ModuleMapParser::parseModuleMapFile() { case MMToken::Comma: case MMToken::ConfigMacros: case MMToken::Conflict: + case MMToken::Exclaim: case MMToken::ExcludeKeyword: case MMToken::ExportKeyword: case MMToken::HeaderKeyword: @@ -1913,12 +2101,14 @@ bool ModuleMapParser::parseModuleMapFile() { case MMToken::LinkKeyword: case MMToken::LSquare: case MMToken::Period: + case MMToken::PrivateKeyword: case MMToken::RBrace: case MMToken::RSquare: case MMToken::RequiresKeyword: case MMToken::Star: case MMToken::StringLiteral: case MMToken::UmbrellaKeyword: + case MMToken::UseKeyword: Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module); HadError = true; consumeToken(); @@ -1927,23 +2117,23 @@ bool ModuleMapParser::parseModuleMapFile() { } while (true); } -bool ModuleMap::parseModuleMapFile(const FileEntry *File) { +bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem) { llvm::DenseMap<const FileEntry *, bool>::iterator Known = ParsedModuleMap.find(File); if (Known != ParsedModuleMap.end()) return Known->second; assert(Target != 0 && "Missing target information"); - FileID ID = SourceMgr->createFileID(File, SourceLocation(), SrcMgr::C_User); - const llvm::MemoryBuffer *Buffer = SourceMgr->getBuffer(ID); + FileID ID = SourceMgr.createFileID(File, SourceLocation(), SrcMgr::C_User); + const llvm::MemoryBuffer *Buffer = SourceMgr.getBuffer(ID); if (!Buffer) return ParsedModuleMap[File] = true; // Parse this module map file. - Lexer L(ID, SourceMgr->getBuffer(ID), *SourceMgr, MMapLangOpts); + Lexer L(ID, SourceMgr.getBuffer(ID), SourceMgr, MMapLangOpts); Diags->getClient()->BeginSourceFile(MMapLangOpts); - ModuleMapParser Parser(L, *SourceMgr, Target, *Diags, *this, File->getDir(), - BuiltinIncludeDir); + ModuleMapParser Parser(L, SourceMgr, Target, *Diags, *this, File->getDir(), + BuiltinIncludeDir, IsSystem); bool Result = Parser.parseModuleMapFile(); Diags->getClient()->EndSourceFile(); ParsedModuleMap[File] = Result; diff --git a/lib/Lex/PPConditionalDirectiveRecord.cpp b/lib/Lex/PPConditionalDirectiveRecord.cpp index 16ce3ef..16dc1d8 100644 --- a/lib/Lex/PPConditionalDirectiveRecord.cpp +++ b/lib/Lex/PPConditionalDirectiveRecord.cpp @@ -76,7 +76,8 @@ void PPConditionalDirectiveRecord::addCondDirectiveLoc( } void PPConditionalDirectiveRecord::If(SourceLocation Loc, - SourceRange ConditionRange) { + SourceRange ConditionRange, + bool ConditionValue) { addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); CondDirectiveStack.push_back(Loc); } @@ -97,6 +98,7 @@ void PPConditionalDirectiveRecord::Ifndef(SourceLocation Loc, void PPConditionalDirectiveRecord::Elif(SourceLocation Loc, SourceRange ConditionRange, + bool ConditionValue, SourceLocation IfLoc) { addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); CondDirectiveStack.back() = Loc; diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index 50a0cb5..86c508f 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -17,6 +17,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/MacroInfo.h" @@ -241,7 +242,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, bool FoundElse, SourceLocation ElseLoc) { ++NumSkipped; - assert(CurTokenLexer == 0 && CurPPLexer && "Lexing a macro, not a file?"); + assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?"); CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false, FoundNonSkipPortion, FoundElse); @@ -430,7 +431,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, if (Callbacks) Callbacks->Elif(Tok.getLocation(), SourceRange(ConditionalBegin, ConditionalEnd), - CondInfo.IfLoc); + ShouldEnter, CondInfo.IfLoc); break; } } @@ -531,14 +532,97 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() { } } +Module *Preprocessor::getModuleForLocation(SourceLocation FilenameLoc) { + ModuleMap &ModMap = HeaderInfo.getModuleMap(); + if (SourceMgr.isInMainFile(FilenameLoc)) { + if (Module *CurMod = getCurrentModule()) + return CurMod; // Compiling a module. + return HeaderInfo.getModuleMap().SourceModule; // Compiling a source. + } + // Try to determine the module of the include directive. + FileID IDOfIncl = SourceMgr.getFileID(FilenameLoc); + if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) { + // The include comes from a file. + return ModMap.findModuleForHeader(EntryOfIncl).getModule(); + } else { + // The include does not come from a file, + // so it is probably a module compilation. + return getCurrentModule(); + } +} + +bool Preprocessor::violatesPrivateInclude( + Module *RequestingModule, + const FileEntry *IncFileEnt, + ModuleMap::ModuleHeaderRole Role, + Module *RequestedModule) { + #ifndef NDEBUG + // Check for consistency between the module header role + // as obtained from the lookup and as obtained from the module. + // This check is not cheap, so enable it only for debugging. + SmallVectorImpl<const FileEntry *> &PvtHdrs + = RequestedModule->PrivateHeaders; + SmallVectorImpl<const FileEntry *>::iterator Look + = std::find(PvtHdrs.begin(), PvtHdrs.end(), IncFileEnt); + bool IsPrivate = Look != PvtHdrs.end(); + assert((IsPrivate && Role == ModuleMap::PrivateHeader) + || (!IsPrivate && Role != ModuleMap::PrivateHeader)); + #endif + return Role == ModuleMap::PrivateHeader && + RequestedModule->getTopLevelModule() != RequestingModule; +} + +bool Preprocessor::violatesUseDeclarations( + Module *RequestingModule, + Module *RequestedModule) { + ModuleMap &ModMap = HeaderInfo.getModuleMap(); + ModMap.resolveUses(RequestingModule, /*Complain=*/false); + const SmallVectorImpl<Module *> &AllowedUses = RequestingModule->DirectUses; + SmallVectorImpl<Module *>::const_iterator Declared = + std::find(AllowedUses.begin(), AllowedUses.end(), RequestedModule); + return Declared == AllowedUses.end(); +} + +void Preprocessor::verifyModuleInclude(SourceLocation FilenameLoc, + StringRef Filename, + const FileEntry *IncFileEnt) { + Module *RequestingModule = getModuleForLocation(FilenameLoc); + if (RequestingModule) + HeaderInfo.getModuleMap().resolveUses(RequestingModule, /*Complain=*/false); + ModuleMap::KnownHeader RequestedModule = + HeaderInfo.getModuleMap().findModuleForHeader(IncFileEnt, + RequestingModule); + + if (RequestingModule == RequestedModule.getModule()) + return; // No faults wihin a module, or between files both not in modules. + + if (RequestingModule != HeaderInfo.getModuleMap().SourceModule) + return; // No errors for indirect modules. + // This may be a bit of a problem for modules with no source files. + + if (RequestedModule && violatesPrivateInclude(RequestingModule, IncFileEnt, + RequestedModule.getRole(), + RequestedModule.getModule())) + Diag(FilenameLoc, diag::error_use_of_private_header_outside_module) + << Filename; + + // FIXME: Add support for FixIts in module map files and offer adding the + // required use declaration. + if (RequestingModule && getLangOpts().ModulesDeclUse && + violatesUseDeclarations(RequestingModule, RequestedModule.getModule())) + Diag(FilenameLoc, diag::error_undeclared_use_of_module) + << Filename; +} + const FileEntry *Preprocessor::LookupFile( + SourceLocation FilenameLoc, StringRef Filename, bool isAngled, const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - Module **SuggestedModule, + ModuleMap::KnownHeader *SuggestedModule, bool SkipCache) { // If the header lookup mechanism may be relative to the current file, pass in // info about where the current file is. @@ -564,7 +648,11 @@ const FileEntry *Preprocessor::LookupFile( const FileEntry *FE = HeaderInfo.LookupFile( Filename, isAngled, FromDir, CurDir, CurFileEnt, SearchPath, RelativePath, SuggestedModule, SkipCache); - if (FE) return FE; + if (FE) { + if (SuggestedModule) + verifyModuleInclude(FilenameLoc, Filename, FE); + return FE; + } // Otherwise, see if this is a subframework header. If so, this is relative // to one of the headers on the #include stack. Walk the list of the current @@ -626,6 +714,10 @@ void Preprocessor::HandleDirective(Token &Result) { CurPPLexer->ParsingPreprocessorDirective = true; if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); + bool ImmediatelyAfterTopLevelIfndef = + CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef(); + CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef(); + ++NumDirectives; // We are about to read a token. For the multiple-include optimization FA to @@ -713,7 +805,7 @@ void Preprocessor::HandleDirective(Token &Result) { // C99 6.10.3 - Macro Replacement. case tok::pp_define: - return HandleDefineDirective(Result); + return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef); case tok::pp_undef: return HandleUndefDirective(Result); @@ -727,7 +819,7 @@ void Preprocessor::HandleDirective(Token &Result) { // C99 6.10.6 - Pragma Directive. case tok::pp_pragma: - return HandlePragmaDirective(PIK_HashPragma); + return HandlePragmaDirective(SavedHash.getLocation(), PIK_HashPragma); // GNU Extensions. case tok::pp_import: @@ -819,6 +911,11 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val, // here. Val = 0; for (unsigned i = 0; i != ActualLength; ++i) { + // C++1y [lex.fcon]p1: + // Optional separating single quotes in a digit-sequence are ignored + if (DigitTokBegin[i] == '\'') + continue; + if (!isDigit(DigitTokBegin[i])) { PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i), diag::err_pp_line_digit_sequence) << IsGNULineDirective; @@ -1386,11 +1483,12 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, SmallString<1024> RelativePath; // We get the raw path only if we have 'Callbacks' to which we later pass // the path. - Module *SuggestedModule = 0; + ModuleMap::KnownHeader SuggestedModule; + SourceLocation FilenameLoc = FilenameTok.getLocation(); const FileEntry *File = LookupFile( - Filename, isAngled, LookupFrom, CurDir, + FilenameLoc, Filename, isAngled, LookupFrom, CurDir, Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL, - getLangOpts().Modules? &SuggestedModule : 0); + HeaderInfo.getHeaderSearchOpts().ModuleMaps ? &SuggestedModule : 0); if (Callbacks) { if (!File) { @@ -1403,14 +1501,16 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, HeaderInfo.AddSearchPath(DL, isAngled); // Try the lookup again, skipping the cache. - File = LookupFile(Filename, isAngled, LookupFrom, CurDir, 0, 0, - getLangOpts().Modules? &SuggestedModule : 0, - /*SkipCache*/true); + File = LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, CurDir, + 0, 0, HeaderInfo.getHeaderSearchOpts().ModuleMaps + ? &SuggestedModule + : 0, + /*SkipCache*/ true); } } } - if (!SuggestedModule) { + if (!SuggestedModule || !getLangOpts().Modules) { // Notify the callback object that we've seen an inclusion directive. Callbacks->InclusionDirective(HashLoc, IncludeTok, Filename, isAngled, FilenameRange, File, @@ -1425,10 +1525,10 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // brackets, we can attempt a lookup as though it were a quoted path to // provide the user with a possible fixit. if (isAngled) { - File = LookupFile(Filename, false, LookupFrom, CurDir, - Callbacks ? &SearchPath : 0, - Callbacks ? &RelativePath : 0, - getLangOpts().Modules ? &SuggestedModule : 0); + File = LookupFile( + FilenameLoc, Filename, false, LookupFrom, CurDir, + Callbacks ? &SearchPath : 0, Callbacks ? &RelativePath : 0, + HeaderInfo.getHeaderSearchOpts().ModuleMaps ? &SuggestedModule : 0); if (File) { SourceRange Range(FilenameTok.getLocation(), CharEnd); Diag(FilenameTok, diag::err_pp_file_not_found_not_fatal) << @@ -1446,12 +1546,12 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // If we are supposed to import a module rather than including the header, // do so now. - if (SuggestedModule) { + if (SuggestedModule && getLangOpts().Modules) { // Compute the module access path corresponding to this module. // FIXME: Should we have a second loadModule() overload to avoid this // extra lookup step? SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; - for (Module *Mod = SuggestedModule; Mod; Mod = Mod->Parent) + for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent) Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), FilenameTok.getLocation())); std::reverse(Path.begin(), Path.end()); @@ -1503,16 +1603,29 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, "@import " + PathString.str().str() + ";"); } - // Load the module. - // If this was an #__include_macros directive, only make macros visible. - Module::NameVisibilityKind Visibility - = (IncludeKind == 3)? Module::MacrosVisible : Module::AllVisible; + // Load the module. Only make macros visible. We'll make the declarations + // visible when the parser gets here. + Module::NameVisibilityKind Visibility = Module::MacrosVisible; ModuleLoadResult Imported = TheModuleLoader.loadModule(IncludeTok.getLocation(), Path, Visibility, /*IsIncludeDirective=*/true); - assert((Imported == 0 || Imported == SuggestedModule) && + assert((Imported == 0 || Imported == SuggestedModule.getModule()) && "the imported module is different than the suggested one"); - + + if (!Imported && hadModuleLoaderFatalFailure()) { + // With a fatal failure in the module loader, we abort parsing. + Token &Result = IncludeTok; + if (CurLexer) { + Result.startToken(); + CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); + CurLexer->cutOffLexing(); + } else { + assert(CurPTHLexer && "#include but no current lexer set!"); + CurPTHLexer->getEOF(Result); + } + return; + } + // If this header isn't part of the module we're building, we're done. if (!BuildingImportedModule && Imported) { if (Callbacks) { @@ -1520,6 +1633,20 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, FilenameRange, File, SearchPath, RelativePath, Imported); } + + if (IncludeKind != 3) { + // Let the parser know that we hit a module import, and it should + // make the module visible. + // FIXME: Produce this as the current token directly, rather than + // allocating a new token for it. + Token *Tok = new Token[1]; + Tok[0].startToken(); + Tok[0].setKind(tok::annot_module_include); + Tok[0].setLocation(HashLoc); + Tok[0].setAnnotationEndLoc(End); + Tok[0].setAnnotationValue(Imported); + EnterTokenStream(Tok, 1, true, true); + } return; } @@ -1746,7 +1873,8 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { /// HandleDefineDirective - Implements \#define. This consumes the entire macro /// line then lets the caller lex the next real token. -void Preprocessor::HandleDefineDirective(Token &DefineTok) { +void Preprocessor::HandleDefineDirective(Token &DefineTok, + bool ImmediatelyAfterHeaderGuard) { ++NumDefined; Token MacroNameTok; @@ -1772,6 +1900,11 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // marking each of the identifiers as being used as macro arguments. Also, // check other constraints on the first token of the macro body. if (Tok.is(tok::eod)) { + if (ImmediatelyAfterHeaderGuard) { + // Save this macro information since it may part of a header guard. + CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(), + MacroNameTok.getLocation()); + } // If there is no body to this macro, we have no special handling here. } else if (Tok.hasLeadingSpace()) { // This is a normal token with leading space. Clear the leading space @@ -1854,6 +1987,18 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { continue; } + // If we're in -traditional mode, then we should ignore stringification + // and token pasting. Mark the tokens as unknown so as not to confuse + // things. + if (getLangOpts().TraditionalCPP) { + Tok.setKind(tok::unknown); + MI->AddTokenToBody(Tok); + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + continue; + } + if (Tok.is(tok::hashhash)) { // If we see token pasting, check if it looks like the gcc comma @@ -1873,13 +2018,8 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { MI->getReplacementToken(NumTokens-1).is(tok::comma)) MI->setHasCommaPasting(); - // Things look ok, add the '##' and param name tokens to the macro. + // Things look ok, add the '##' token to the macro. MI->AddTokenToBody(LastTok); - MI->AddTokenToBody(Tok); - LastTok = Tok; - - // Get the next token of the macro. - LexUnexpandedToken(Tok); continue; } @@ -1896,6 +2036,8 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // confused. if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) { LastTok.setKind(tok::unknown); + MI->AddTokenToBody(LastTok); + continue; } else { Diag(Tok, diag::err_pp_stringize_not_parameter); ReleaseMacroInfo(MI); @@ -1972,7 +2114,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { assert(!MI->isUsed()); // If we need warning for not using the macro, add its location in the // warn-because-unused-macro set. If it gets used it will be removed from set. - if (isInPrimaryFile() && // don't warn for include'd macros. + if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) && Diags->getDiagnosticLevel(diag::pp_macro_not_used, MI->getDefinitionLoc()) != DiagnosticsEngine::Ignored) { MI->setIsWarnIfUnused(true); @@ -2062,7 +2204,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, // handle. if (!ReadAnyTokensBeforeDirective && MI == 0) { assert(isIfndef && "#ifdef shouldn't reach here"); - CurPPLexer->MIOpt.EnterTopLevelIFNDEF(MII); + CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation()); } else CurPPLexer->MIOpt.EnterTopLevelConditional(); } @@ -2108,14 +2250,16 @@ void Preprocessor::HandleIfDirective(Token &IfToken, // directive seen, handle it for the multiple-include optimization. if (CurPPLexer->getConditionalStackDepth() == 0) { if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue) - CurPPLexer->MIOpt.EnterTopLevelIFNDEF(IfNDefMacro); + // FIXME: Pass in the location of the macro name, not the 'if' token. + CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation()); else CurPPLexer->MIOpt.EnterTopLevelConditional(); } if (Callbacks) Callbacks->If(IfToken.getLocation(), - SourceRange(ConditionalBegin, ConditionalEnd)); + SourceRange(ConditionalBegin, ConditionalEnd), + ConditionalTrue); // Should we include the stuff contained by this directive? if (ConditionalTrue) { @@ -2211,7 +2355,8 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) { if (Callbacks) Callbacks->Elif(ElifToken.getLocation(), - SourceRange(ConditionalBegin, ConditionalEnd), CI.IfLoc); + SourceRange(ConditionalBegin, ConditionalEnd), + true, CI.IfLoc); // Finally, skip the rest of the contents of this block. SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp index d9ce8bf..87c0a6a 100644 --- a/lib/Lex/PPExpressions.cpp +++ b/lib/Lex/PPExpressions.cpp @@ -82,7 +82,8 @@ struct DefinedTracker { static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, bool ValueLive, Preprocessor &PP) { IdentifierInfo *II; - Result.setBegin(PeekTok.getLocation()); + SourceLocation beginLoc(PeekTok.getLocation()); + Result.setBegin(beginLoc); // Get the next token, don't expand it. PP.LexUnexpandedNonComment(PeekTok); @@ -119,14 +120,8 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.markMacroAsUsed(Macro->getMacroInfo()); } - // Invoke the 'defined' callback. - if (PPCallbacks *Callbacks = PP.getPPCallbacks()) { - MacroDirective *MD = Macro; - // Pass the MacroInfo for the macro name even if the value is dead. - if (!MD && Result.Val != 0) - MD = PP.getMacroDirective(II); - Callbacks->Defined(PeekTok, MD); - } + // Save macro token for callback. + Token macroToken(PeekTok); // If we are in parens, ensure we have a trailing ). if (LParenLoc.isValid()) { @@ -148,6 +143,16 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.LexNonComment(PeekTok); } + // Invoke the 'defined' callback. + if (PPCallbacks *Callbacks = PP.getPPCallbacks()) { + MacroDirective *MD = Macro; + // Pass the MacroInfo for the macro name even if the value is dead. + if (!MD && Result.Val != 0) + MD = PP.getMacroDirective(II); + Callbacks->Defined(macroToken, MD, + SourceRange(beginLoc, PeekTok.getLocation())); + } + // Success, remember that we saw defined(X). DT.State = DefinedTracker::DefinedMacro; DT.TheMacro = II; @@ -240,7 +245,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, // Parse the integer literal into Result. if (Literal.GetIntegerValue(Result.Val)) { // Overflow parsing integer literal. - if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large); + if (ValueLive) PP.Diag(PeekTok, diag::err_integer_too_large); Result.Val.setIsUnsigned(true); } else { // Set the signedness of the result to match whether there was a U suffix @@ -252,8 +257,8 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t // is 64-bits. if (!Literal.isUnsigned && Result.Val.isNegative()) { - // Don't warn for a hex literal: 0x8000..0 shouldn't warn. - if (ValueLive && Literal.getRadix() != 16) + // Don't warn for a hex or octal literal: 0x8000..0 shouldn't warn. + if (ValueLive && Literal.getRadix() == 10) PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed); Result.Val.setIsUnsigned(true); } diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index be4defe..1f970a4 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -21,7 +21,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/PathV2.h" +#include "llvm/Support/Path.h" using namespace clang; PPCallbacks::~PPCallbacks() {} @@ -70,7 +70,7 @@ PreprocessorLexer *Preprocessor::getCurrentFileLexer() const { /// start lexing tokens from it instead of the current buffer. void Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir, SourceLocation Loc) { - assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!"); + assert(!CurTokenLexer && "Cannot #include a file inside a macro!"); ++NumEnteredSourceFiles; if (MaxIncludeStackDepth < IncludeMacroStack.size()) @@ -231,6 +231,19 @@ static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir, Result = File->getName(); } +void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) { + if (CurTokenLexer) { + CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result); + return; + } + if (CurLexer) { + CurLexer->PropagateLineStartLeadingSpaceInfo(Result); + return; + } + // FIXME: Handle other kinds of lexers? It generally shouldn't matter, + // but it might if they're empty? +} + /// HandleEndOfFile - This callback is invoked when the lexer hits the end of /// the current file. This either returns the EOF token or pops a level off /// the include stack and keeps going. @@ -244,8 +257,42 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) { // Okay, this has a controlling macro, remember in HeaderFileInfo. if (const FileEntry *FE = - SourceMgr.getFileEntryForID(CurPPLexer->getFileID())) + SourceMgr.getFileEntryForID(CurPPLexer->getFileID())) { HeaderInfo.SetFileControllingMacro(FE, ControllingMacro); + if (const IdentifierInfo *DefinedMacro = + CurPPLexer->MIOpt.GetDefinedMacro()) { + if (!ControllingMacro->hasMacroDefinition() && + DefinedMacro != ControllingMacro && + HeaderInfo.FirstTimeLexingFile(FE)) { + + // If the edit distance between the two macros is more than 50%, + // DefinedMacro may not be header guard, or can be header guard of + // another header file. Therefore, it maybe defining something + // completely different. This can be observed in the wild when + // handling feature macros or header guards in different files. + + const StringRef ControllingMacroName = ControllingMacro->getName(); + const StringRef DefinedMacroName = DefinedMacro->getName(); + const size_t MaxHalfLength = std::max(ControllingMacroName.size(), + DefinedMacroName.size()) / 2; + const unsigned ED = ControllingMacroName.edit_distance( + DefinedMacroName, true, MaxHalfLength); + if (ED <= MaxHalfLength) { + // Emit a warning for a bad header guard. + Diag(CurPPLexer->MIOpt.GetMacroLocation(), + diag::warn_header_guard) + << CurPPLexer->MIOpt.GetMacroLocation() << ControllingMacro; + Diag(CurPPLexer->MIOpt.GetDefinedLocation(), + diag::note_header_guard) + << CurPPLexer->MIOpt.GetDefinedLocation() << DefinedMacro + << ControllingMacro + << FixItHint::CreateReplacement( + CurPPLexer->MIOpt.GetDefinedLocation(), + ControllingMacro->getName()); + } + } + } + } } } @@ -299,6 +346,9 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { // We're done with the #included file. RemoveTopOfLexerStack(); + // Propagate info about start-of-line/leading white-space/etc. + PropagateLineStartLeadingSpaceInfo(Result); + // Notify the client, if desired, that we are in a new source file. if (Callbacks && !isEndOfMacro && CurPPLexer) { SrcMgr::CharacteristicKind FileType = @@ -401,8 +451,36 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { } } } + + // Check whether there are any headers that were included, but not + // mentioned at all in the module map. Such headers + SourceLocation StartLoc + = SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); + if (getDiagnostics().getDiagnosticLevel(diag::warn_forgotten_module_header, + StartLoc) + != DiagnosticsEngine::Ignored) { + ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap(); + for (unsigned I = 0, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) { + // We only care about file entries. + const SrcMgr::SLocEntry &Entry = SourceMgr.getLocalSLocEntry(I); + if (!Entry.isFile()) + continue; + + // Dig out the actual file. + const FileEntry *File = Entry.getFile().getContentCache()->OrigEntry; + if (!File) + continue; + + // If it's not part of a module and not unknown, complain. + if (!ModMap.findModuleForHeader(File) && + !ModMap.isHeaderInUnavailableModule(File)) { + Diag(StartLoc, diag::warn_forgotten_module_header) + << File->getName() << Mod->getFullModuleName(); + } + } + } } - + return true; } diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index 24c6217..f20633f 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -225,7 +225,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, if (Callbacks) Callbacks->MacroExpands(Identifier, MD, Identifier.getLocation(),/*Args=*/0); ExpandBuiltinMacro(Identifier); - return false; + return true; } /// Args - If this is a function-like macro expansion, this contains, @@ -239,11 +239,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // If this is a function-like macro, read the arguments. if (MI->isFunctionLike()) { - // C99 6.10.3p10: If the preprocessing token immediately after the macro - // name isn't a '(', this macro should not be expanded. - if (!isNextPPTokenLParen()) - return true; - // Remember that we are now parsing the arguments to a macro invocation. // Preprocessor directives used inside macro arguments are not portable, and // this enables the warning. @@ -254,7 +249,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, InMacroArgs = false; // If there was an error parsing the arguments, bail out. - if (Args == 0) return false; + if (Args == 0) return true; ++NumFnMacroExpanded; } else { @@ -314,25 +309,12 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // No need for arg info. if (Args) Args->destroy(*this); - // Ignore this macro use, just return the next token in the current - // buffer. - bool HadLeadingSpace = Identifier.hasLeadingSpace(); - bool IsAtStartOfLine = Identifier.isAtStartOfLine(); - - Lex(Identifier); - - // If the identifier isn't on some OTHER line, inherit the leading - // whitespace/first-on-a-line property of this token. This handles - // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is - // empty. - if (!Identifier.isAtStartOfLine()) { - if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine); - if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace); - } + // Propagate whitespace info as if we had pushed, then popped, + // a macro context. Identifier.setFlag(Token::LeadingEmptyMacro); + PropagateLineStartLeadingSpaceInfo(Identifier); ++NumFastMacroExpanded; return false; - } else if (MI->getNumTokens() == 1 && isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(), *this)) { @@ -378,18 +360,144 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // Since this is not an identifier token, it can't be macro expanded, so // we're done. ++NumFastMacroExpanded; - return false; + return true; } // Start expanding the macro. EnterMacro(Identifier, ExpansionEnd, MI, Args); - - // Now that the macro is at the top of the include stack, ask the - // preprocessor to read the next token from it. - Lex(Identifier); return false; } +enum Bracket { + Brace, + Paren +}; + +/// CheckMatchedBrackets - Returns true if the braces and parentheses in the +/// token vector are properly nested. +static bool CheckMatchedBrackets(const SmallVectorImpl<Token> &Tokens) { + SmallVector<Bracket, 8> Brackets; + for (SmallVectorImpl<Token>::const_iterator I = Tokens.begin(), + E = Tokens.end(); + I != E; ++I) { + if (I->is(tok::l_paren)) { + Brackets.push_back(Paren); + } else if (I->is(tok::r_paren)) { + if (Brackets.empty() || Brackets.back() == Brace) + return false; + Brackets.pop_back(); + } else if (I->is(tok::l_brace)) { + Brackets.push_back(Brace); + } else if (I->is(tok::r_brace)) { + if (Brackets.empty() || Brackets.back() == Paren) + return false; + Brackets.pop_back(); + } + } + if (!Brackets.empty()) + return false; + return true; +} + +/// GenerateNewArgTokens - Returns true if OldTokens can be converted to a new +/// vector of tokens in NewTokens. The new number of arguments will be placed +/// in NumArgs and the ranges which need to surrounded in parentheses will be +/// in ParenHints. +/// Returns false if the token stream cannot be changed. If this is because +/// of an initializer list starting a macro argument, the range of those +/// initializer lists will be place in InitLists. +static bool GenerateNewArgTokens(Preprocessor &PP, + SmallVectorImpl<Token> &OldTokens, + SmallVectorImpl<Token> &NewTokens, + unsigned &NumArgs, + SmallVectorImpl<SourceRange> &ParenHints, + SmallVectorImpl<SourceRange> &InitLists) { + if (!CheckMatchedBrackets(OldTokens)) + return false; + + // Once it is known that the brackets are matched, only a simple count of the + // braces is needed. + unsigned Braces = 0; + + // First token of a new macro argument. + SmallVectorImpl<Token>::iterator ArgStartIterator = OldTokens.begin(); + + // First closing brace in a new macro argument. Used to generate + // SourceRanges for InitLists. + SmallVectorImpl<Token>::iterator ClosingBrace = OldTokens.end(); + NumArgs = 0; + Token TempToken; + // Set to true when a macro separator token is found inside a braced list. + // If true, the fixed argument spans multiple old arguments and ParenHints + // will be updated. + bool FoundSeparatorToken = false; + for (SmallVectorImpl<Token>::iterator I = OldTokens.begin(), + E = OldTokens.end(); + I != E; ++I) { + if (I->is(tok::l_brace)) { + ++Braces; + } else if (I->is(tok::r_brace)) { + --Braces; + if (Braces == 0 && ClosingBrace == E && FoundSeparatorToken) + ClosingBrace = I; + } else if (I->is(tok::eof)) { + // EOF token is used to separate macro arguments + if (Braces != 0) { + // Assume comma separator is actually braced list separator and change + // it back to a comma. + FoundSeparatorToken = true; + I->setKind(tok::comma); + I->setLength(1); + } else { // Braces == 0 + // Separator token still separates arguments. + ++NumArgs; + + // If the argument starts with a brace, it can't be fixed with + // parentheses. A different diagnostic will be given. + if (FoundSeparatorToken && ArgStartIterator->is(tok::l_brace)) { + InitLists.push_back( + SourceRange(ArgStartIterator->getLocation(), + PP.getLocForEndOfToken(ClosingBrace->getLocation()))); + ClosingBrace = E; + } + + // Add left paren + if (FoundSeparatorToken) { + TempToken.startToken(); + TempToken.setKind(tok::l_paren); + TempToken.setLocation(ArgStartIterator->getLocation()); + TempToken.setLength(0); + NewTokens.push_back(TempToken); + } + + // Copy over argument tokens + NewTokens.insert(NewTokens.end(), ArgStartIterator, I); + + // Add right paren and store the paren locations in ParenHints + if (FoundSeparatorToken) { + SourceLocation Loc = PP.getLocForEndOfToken((I - 1)->getLocation()); + TempToken.startToken(); + TempToken.setKind(tok::r_paren); + TempToken.setLocation(Loc); + TempToken.setLength(0); + NewTokens.push_back(TempToken); + ParenHints.push_back(SourceRange(ArgStartIterator->getLocation(), + Loc)); + } + + // Copy separator token + NewTokens.push_back(*I); + + // Reset values + ArgStartIterator = I + 1; + FoundSeparatorToken = false; + } + } + } + + return !ParenHints.empty() && InitLists.empty(); +} + /// ReadFunctionLikeMacroArgs - After reading "MACRO" and knowing that the next /// token is the '(' of the macro, this method is invoked to read all of the /// actual arguments specified for the macro invocation. This returns null on @@ -415,6 +523,8 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, SmallVector<Token, 64> ArgTokens; bool ContainsCodeCompletionTok = false; + SourceLocation TooManyArgsLoc; + unsigned NumActuals = 0; while (Tok.isNot(tok::r_paren)) { if (ContainsCodeCompletionTok && (Tok.is(tok::eof) || Tok.is(tok::eod))) @@ -458,7 +568,12 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, } } else if (Tok.is(tok::l_paren)) { ++NumParens; - } else if (Tok.is(tok::comma) && NumParens == 0) { + } else if (Tok.is(tok::comma) && NumParens == 0 && + !(Tok.getFlags() & Token::IgnoredComma)) { + // In Microsoft-compatibility mode, single commas from nested macro + // expansions should not be considered as argument separators. We test + // for this with the IgnoredComma token flag above. + // Comma ends this argument if there are more fixed arguments expected. // However, if this is a variadic macro, and this is part of the // variadic part, then the comma is just an argument token. @@ -499,22 +614,15 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // If this is not a variadic macro, and too many args were specified, emit // an error. - if (!isVariadic && NumFixedArgsLeft == 0) { + if (!isVariadic && NumFixedArgsLeft == 0 && TooManyArgsLoc.isInvalid()) { if (ArgTokens.size() != ArgTokenStart) - ArgStartLoc = ArgTokens[ArgTokenStart].getLocation(); - - if (!ContainsCodeCompletionTok) { - // Emit the diagnostic at the macro name in case there is a missing ). - // Emitting it at the , could be far away from the macro name. - Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc); - Diag(MI->getDefinitionLoc(), diag::note_macro_here) - << MacroName.getIdentifierInfo(); - return 0; - } + TooManyArgsLoc = ArgTokens[ArgTokenStart].getLocation(); + else + TooManyArgsLoc = ArgStartLoc; } - // Empty arguments are standard in C99 and C++0x, and are supported as an extension in - // other modes. + // Empty arguments are standard in C99 and C++0x, and are supported as an + // extension in other modes. if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99) Diag(Tok, LangOpts.CPlusPlus11 ? diag::warn_cxx98_compat_empty_fnmacro_arg : @@ -528,16 +636,66 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, EOFTok.setLength(0); ArgTokens.push_back(EOFTok); ++NumActuals; - if (!ContainsCodeCompletionTok || NumFixedArgsLeft != 0) { - assert(NumFixedArgsLeft != 0 && "Too many arguments parsed"); + if (!ContainsCodeCompletionTok && NumFixedArgsLeft != 0) --NumFixedArgsLeft; - } } // Okay, we either found the r_paren. Check to see if we parsed too few // arguments. unsigned MinArgsExpected = MI->getNumArgs(); + // If this is not a variadic macro, and too many args were specified, emit + // an error. + if (!isVariadic && NumActuals > MinArgsExpected && + !ContainsCodeCompletionTok) { + // Emit the diagnostic at the macro name in case there is a missing ). + // Emitting it at the , could be far away from the macro name. + Diag(TooManyArgsLoc, diag::err_too_many_args_in_macro_invoc); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << MacroName.getIdentifierInfo(); + + // Commas from braced initializer lists will be treated as argument + // separators inside macros. Attempt to correct for this with parentheses. + // TODO: See if this can be generalized to angle brackets for templates + // inside macro arguments. + + SmallVector<Token, 4> FixedArgTokens; + unsigned FixedNumArgs = 0; + SmallVector<SourceRange, 4> ParenHints, InitLists; + if (!GenerateNewArgTokens(*this, ArgTokens, FixedArgTokens, FixedNumArgs, + ParenHints, InitLists)) { + if (!InitLists.empty()) { + DiagnosticBuilder DB = + Diag(MacroName, + diag::note_init_list_at_beginning_of_macro_argument); + for (SmallVector<SourceRange, 4>::iterator + Range = InitLists.begin(), RangeEnd = InitLists.end(); + Range != RangeEnd; ++Range) { + if (DB.hasMaxRanges()) + break; + DB << *Range; + } + } + return 0; + } + if (FixedNumArgs != MinArgsExpected) + return 0; + + DiagnosticBuilder DB = Diag(MacroName, diag::note_suggest_parens_for_macro); + for (SmallVector<SourceRange, 4>::iterator + ParenLocation = ParenHints.begin(), ParenEnd = ParenHints.end(); + ParenLocation != ParenEnd; ++ParenLocation) { + if (DB.hasMaxFixItHints()) + break; + DB << FixItHint::CreateInsertion(ParenLocation->getBegin(), "("); + if (DB.hasMaxFixItHints()) + break; + DB << FixItHint::CreateInsertion(ParenLocation->getEnd(), ")"); + } + ArgTokens.swap(FixedArgTokens); + NumActuals = FixedNumArgs; + } + // See MacroArgs instance var for description of this. bool isVarargsElided = false; @@ -722,11 +880,13 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("attribute_unavailable_with_message", true) .Case("attribute_unused_on_fields", true) .Case("blocks", LangOpts.Blocks) + .Case("c_thread_safety_attributes", true) .Case("cxx_exceptions", LangOpts.Exceptions) .Case("cxx_rtti", LangOpts.RTTI) .Case("enumerator_attributes", true) .Case("memory_sanitizer", LangOpts.Sanitize.Memory) .Case("thread_sanitizer", LangOpts.Sanitize.Thread) + .Case("dataflow_sanitizer", LangOpts.Sanitize.DataFlow) // Objective-C features .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? .Case("objc_arc", LangOpts.ObjCAutoRefCount) @@ -737,6 +897,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("objc_modules", LangOpts.ObjC2 && LangOpts.Modules) .Case("objc_nonfragile_abi", LangOpts.ObjCRuntime.isNonFragile()) .Case("objc_property_explicit_atomic", true) // Does clang support explicit "atomic" keyword? + .Case("objc_protocol_qualifier_mangling", true) .Case("objc_weak_class", LangOpts.ObjCRuntime.hasWeakClassImport()) .Case("ownership_holds", true) .Case("ownership_returns", true) @@ -785,7 +946,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("cxx_rvalue_references", LangOpts.CPlusPlus11) .Case("cxx_strong_enums", LangOpts.CPlusPlus11) .Case("cxx_static_assert", LangOpts.CPlusPlus11) - .Case("cxx_thread_local", + .Case("cxx_thread_local", LangOpts.CPlusPlus11 && PP.getTargetInfo().isTLSSupported()) .Case("cxx_trailing_return", LangOpts.CPlusPlus11) .Case("cxx_unicode_literals", LangOpts.CPlusPlus11) @@ -793,15 +954,15 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("cxx_user_literals", LangOpts.CPlusPlus11) .Case("cxx_variadic_templates", LangOpts.CPlusPlus11) // C++1y features - .Case("cxx_binary_literals", LangOpts.CPlusPlus1y) - //.Case("cxx_contextual_conversions", LangOpts.CPlusPlus1y) - //.Case("cxx_generalized_capture", LangOpts.CPlusPlus1y) - //.Case("cxx_generic_lambda", LangOpts.CPlusPlus1y) - //.Case("cxx_relaxed_constexpr", LangOpts.CPlusPlus1y) - //.Case("cxx_return_type_deduction", LangOpts.CPlusPlus1y) - //.Case("cxx_runtime_array", LangOpts.CPlusPlus1y) .Case("cxx_aggregate_nsdmi", LangOpts.CPlusPlus1y) - //.Case("cxx_variable_templates", LangOpts.CPlusPlus1y) + .Case("cxx_binary_literals", LangOpts.CPlusPlus1y) + .Case("cxx_contextual_conversions", LangOpts.CPlusPlus1y) + //.Case("cxx_generic_lambdas", LangOpts.CPlusPlus1y) + .Case("cxx_init_captures", LangOpts.CPlusPlus1y) + .Case("cxx_relaxed_constexpr", LangOpts.CPlusPlus1y) + .Case("cxx_return_type_deduction", LangOpts.CPlusPlus1y) + //.Case("cxx_runtime_arrays", LangOpts.CPlusPlus1y) + .Case("cxx_variable_templates", LangOpts.CPlusPlus1y) // Type traits .Case("has_nothrow_assign", LangOpts.CPlusPlus) .Case("has_nothrow_copy", LangOpts.CPlusPlus) @@ -822,6 +983,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("is_standard_layout", LangOpts.CPlusPlus) .Case("is_pod", LangOpts.CPlusPlus) .Case("is_polymorphic", LangOpts.CPlusPlus) + .Case("is_sealed", LangOpts.MicrosoftExt) .Case("is_trivial", LangOpts.CPlusPlus) .Case("is_trivially_assignable", LangOpts.CPlusPlus) .Case("is_trivially_constructible", LangOpts.CPlusPlus) @@ -862,6 +1024,7 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) { .Case("c_atomic", true) .Case("c_generic_selections", true) .Case("c_static_assert", true) + .Case("c_thread_local", PP.getTargetInfo().isTLSSupported()) // C++11 features supported by other languages as extensions. .Case("cxx_atomic", LangOpts.CPlusPlus) .Case("cxx_deleted_functions", LangOpts.CPlusPlus) @@ -875,6 +1038,8 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) { .Case("cxx_rvalue_references", LangOpts.CPlusPlus) // C++1y features supported by other languages as extensions. .Case("cxx_binary_literals", true) + .Case("cxx_init_captures", LangOpts.CPlusPlus11) + .Case("cxx_variable_templates", true) .Default(false); } @@ -992,7 +1157,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, // Search include directories. const DirectoryLookup *CurDir; const FileEntry *File = - PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL); + PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, CurDir, NULL, + NULL, NULL); // Get the result value. A result of true means the file exists. return File != 0; @@ -1212,9 +1378,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { if (Tok.is(tok::l_paren)) { // Read the identifier LexUnexpandedToken(Tok); - if (Tok.is(tok::identifier) || Tok.is(tok::kw_const)) { - FeatureII = Tok.getIdentifierInfo(); - + if ((FeatureII = Tok.getIdentifierInfo())) { // Read the ')'. LexUnexpandedToken(Tok); if (Tok.is(tok::r_paren)) diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index e8f43f7..e2629a3 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -43,9 +43,7 @@ PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); } -void PTHLexer::Lex(Token& Tok) { -LexNextToken: - +bool PTHLexer::Lex(Token& Tok) { //===--------------------------------------==// // Read the raw token data. //===--------------------------------------==// @@ -90,8 +88,9 @@ LexNextToken: Tok.setKind(II->getTokenID()); if (II->isHandleIdentifierCase()) - PP->HandleIdentifier(Tok); - return; + return PP->HandleIdentifier(Tok); + + return true; } //===--------------------------------------==// @@ -101,16 +100,10 @@ LexNextToken: // Save the end-of-file token. EofToken = Tok; - // Save 'PP' to 'PPCache' as LexEndOfFile can delete 'this'. - Preprocessor *PPCache = PP; - assert(!ParsingPreprocessorDirective); assert(!LexingRawMode); - - if (LexEndOfFile(Tok)) - return; - return PPCache->Lex(Tok); + return LexEndOfFile(Tok); } if (TKind == tok::hash && Tok.isAtStartOfLine()) { @@ -118,19 +111,17 @@ LexNextToken: assert(!LexingRawMode); PP->HandleDirective(Tok); - if (PP->isCurrentLexer(this)) - goto LexNextToken; - - return PP->Lex(Tok); + return false; } if (TKind == tok::eod) { assert(ParsingPreprocessorDirective); ParsingPreprocessorDirective = false; - return; + return true; } MIOpt.ReadToken(); + return true; } bool PTHLexer::LexEndOfFile(Token &Result) { @@ -619,18 +610,18 @@ PTHLexer *PTHManager::CreateLexer(FileID FID) { namespace { class PTHStatData { public: - const bool hasStat; - const ino_t ino; - const dev_t dev; - const mode_t mode; - const time_t mtime; - const off_t size; - - PTHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s) - : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {} - - PTHStatData() - : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {} + const bool HasData; + uint64_t Size; + time_t ModTime; + llvm::sys::fs::UniqueID UniqueID; + bool IsDirectory; + + PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID, + bool IsDirectory) + : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID), + IsDirectory(IsDirectory) {} + + PTHStatData() : HasData(false) {} }; class PTHStatLookupTrait : public PTHFileLookupCommonTrait { @@ -653,12 +644,18 @@ public: unsigned) { if (k.first /* File or Directory */) { - if (k.first == 0x1 /* File */) d += 4 * 2; // Skip the first 2 words. - ino_t ino = (ino_t) ReadUnalignedLE32(d); - dev_t dev = (dev_t) ReadUnalignedLE32(d); - mode_t mode = (mode_t) ReadUnalignedLE16(d); - time_t mtime = (time_t) ReadUnalignedLE64(d); - return data_type(ino, dev, mode, mtime, (off_t) ReadUnalignedLE64(d)); + bool IsDirectory = true; + if (k.first == 0x1 /* File */) { + IsDirectory = false; + d += 4 * 2; // Skip the first 2 words. + } + + uint64_t File = ReadUnalignedLE64(d); + uint64_t Device = ReadUnalignedLE64(d); + llvm::sys::fs::UniqueID UniqueID(File, Device); + time_t ModTime = ReadUnalignedLE64(d); + uint64_t Size = ReadUnalignedLE64(d); + return data_type(Size, ModTime, UniqueID, IsDirectory); } // Negative stat. Don't read anything. @@ -677,25 +674,27 @@ public: ~PTHStatCache() {} - LookupResult getStat(const char *Path, struct stat &StatBuf, - bool isFile, int *FileDescriptor) { + LookupResult getStat(const char *Path, FileData &Data, bool isFile, + int *FileDescriptor) { // Do the lookup for the file's data in the PTH file. CacheTy::iterator I = Cache.find(Path); // If we don't get a hit in the PTH file just forward to 'stat'. if (I == Cache.end()) - return statChained(Path, StatBuf, isFile, FileDescriptor); + return statChained(Path, Data, isFile, FileDescriptor); - const PTHStatData &Data = *I; + const PTHStatData &D = *I; - if (!Data.hasStat) + if (!D.HasData) return CacheMissing; - StatBuf.st_ino = Data.ino; - StatBuf.st_dev = Data.dev; - StatBuf.st_mtime = Data.mtime; - StatBuf.st_mode = Data.mode; - StatBuf.st_size = Data.size; + Data.Size = D.Size; + Data.ModTime = D.ModTime; + Data.UniqueID = D.UniqueID; + Data.IsDirectory = D.IsDirectory; + Data.IsNamedPipe = false; + Data.InPCH = true; + return CacheExists; } }; diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index b2ae4c9..e4059ee 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -20,11 +20,15 @@ #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> using namespace clang; +#include "llvm/Support/raw_ostream.h" + // Out-of-line destructor to provide a home for the class. PragmaHandler::~PragmaHandler() { } @@ -101,7 +105,11 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, /// HandlePragmaDirective - The "\#pragma" directive has been parsed. Lex the /// rest of the pragma, passing it to the registered pragma handlers. -void Preprocessor::HandlePragmaDirective(unsigned Introducer) { +void Preprocessor::HandlePragmaDirective(SourceLocation IntroducerLoc, + PragmaIntroducerKind Introducer) { + if (Callbacks) + Callbacks->PragmaDirective(IntroducerLoc, Introducer); + if (!PragmasEnabled) return; @@ -109,7 +117,7 @@ void Preprocessor::HandlePragmaDirective(unsigned Introducer) { // Invoke the first level of pragma handlers which reads the namespace id. Token Tok; - PragmaHandlers->HandlePragma(*this, PragmaIntroducerKind(Introducer), Tok); + PragmaHandlers->HandlePragma(*this, Introducer, Tok); // If the pragma handler didn't read the rest of the line, consume it now. if ((CurTokenLexer && CurTokenLexer->isParsingPreprocessorDirective()) @@ -255,14 +263,14 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // Remove escaped quotes and escapes. unsigned ResultPos = 1; - for (unsigned i = 1, e = StrVal.size() - 2; i != e; ++i) { - if (StrVal[i] != '\\' || - (StrVal[i + 1] != '\\' && StrVal[i + 1] != '"')) { - // \\ -> '\' and \" -> '"'. - StrVal[ResultPos++] = StrVal[i]; - } + for (unsigned i = 1, e = StrVal.size() - 1; i != e; ++i) { + // Skip escapes. \\ -> '\' and \" -> '"'. + if (StrVal[i] == '\\' && i + 1 < e && + (StrVal[i + 1] == '\\' || StrVal[i + 1] == '"')) + ++i; + StrVal[ResultPos++] = StrVal[i]; } - StrVal.erase(StrVal.begin() + ResultPos, StrVal.end() - 2); + StrVal.erase(StrVal.begin() + ResultPos, StrVal.end() - 1); } // Remove the front quote, replacing it with a space, so that the pragma @@ -287,7 +295,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { EnterSourceFileWithLexer(TL, 0); // With everything set up, lex this as a #pragma directive. - HandlePragmaDirective(PIK__Pragma); + HandlePragmaDirective(PragmaLoc, PIK__Pragma); // Finally, return whatever came after the pragma directive. return Lex(Tok); @@ -336,7 +344,7 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { EnterTokenStream(TokArray, PragmaToks.size(), true, true); // With everything set up, lex this as a #pragma directive. - HandlePragmaDirective(PIK___pragma); + HandlePragmaDirective(PragmaLoc, PIK___pragma); // Finally, return whatever came after the pragma directive. return Lex(Tok); @@ -466,8 +474,8 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { // Search include directories for this file. const DirectoryLookup *CurDir; - const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir, NULL, NULL, - NULL); + const FileEntry *File = LookupFile(FilenameTok.getLocation(), Filename, + isAngled, 0, CurDir, NULL, NULL, NULL); if (File == 0) { if (!SuppressIncludeNotFoundError) Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; @@ -999,12 +1007,137 @@ public: } }; +// Returns -1 on failure. +static int LexSimpleInt(Preprocessor &PP, Token &Tok) { + assert(Tok.is(tok::numeric_constant)); + SmallString<8> IntegerBuffer; + bool NumberInvalid = false; + StringRef Spelling = PP.getSpelling(Tok, IntegerBuffer, &NumberInvalid); + if (NumberInvalid) + return -1; + NumericLiteralParser Literal(Spelling, Tok.getLocation(), PP); + if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) + return -1; + llvm::APInt APVal(32, 0); + if (Literal.GetIntegerValue(APVal)) + return -1; + PP.Lex(Tok); + return int(APVal.getLimitedValue(INT_MAX)); +} + +/// "\#pragma warning(...)". MSVC's diagnostics do not map cleanly to clang's +/// diagnostics, so we don't really implement this pragma. We parse it and +/// ignore it to avoid -Wunknown-pragma warnings. +struct PragmaWarningHandler : public PragmaHandler { + PragmaWarningHandler() : PragmaHandler("warning") {} + + virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &Tok) { + // Parse things like: + // warning(push, 1) + // warning(pop) + // warning(disable : 1 2 3 ; error : 4 5 6 ; suppress : 7 8 9) + SourceLocation DiagLoc = Tok.getLocation(); + PPCallbacks *Callbacks = PP.getPPCallbacks(); + + PP.Lex(Tok); + if (Tok.isNot(tok::l_paren)) { + PP.Diag(Tok, diag::warn_pragma_warning_expected) << "("; + return; + } + + PP.Lex(Tok); + IdentifierInfo *II = Tok.getIdentifierInfo(); + if (!II) { + PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid); + return; + } + + if (II->isStr("push")) { + // #pragma warning( push[ ,n ] ) + int Level = -1; + PP.Lex(Tok); + if (Tok.is(tok::comma)) { + PP.Lex(Tok); + if (Tok.is(tok::numeric_constant)) + Level = LexSimpleInt(PP, Tok); + if (Level < 0 || Level > 4) { + PP.Diag(Tok, diag::warn_pragma_warning_push_level); + return; + } + } + if (Callbacks) + Callbacks->PragmaWarningPush(DiagLoc, Level); + } else if (II->isStr("pop")) { + // #pragma warning( pop ) + PP.Lex(Tok); + if (Callbacks) + Callbacks->PragmaWarningPop(DiagLoc); + } else { + // #pragma warning( warning-specifier : warning-number-list + // [; warning-specifier : warning-number-list...] ) + while (true) { + II = Tok.getIdentifierInfo(); + if (!II) { + PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid); + return; + } + + // Figure out which warning specifier this is. + StringRef Specifier = II->getName(); + bool SpecifierValid = + llvm::StringSwitch<bool>(Specifier) + .Cases("1", "2", "3", "4", true) + .Cases("default", "disable", "error", "once", "suppress", true) + .Default(false); + if (!SpecifierValid) { + PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid); + return; + } + PP.Lex(Tok); + if (Tok.isNot(tok::colon)) { + PP.Diag(Tok, diag::warn_pragma_warning_expected) << ":"; + return; + } + + // Collect the warning ids. + SmallVector<int, 4> Ids; + PP.Lex(Tok); + while (Tok.is(tok::numeric_constant)) { + int Id = LexSimpleInt(PP, Tok); + if (Id <= 0) { + PP.Diag(Tok, diag::warn_pragma_warning_expected_number); + return; + } + Ids.push_back(Id); + } + if (Callbacks) + Callbacks->PragmaWarning(DiagLoc, Specifier, Ids); + + // Parse the next specifier if there is a semicolon. + if (Tok.isNot(tok::semi)) + break; + PP.Lex(Tok); + } + } + + if (Tok.isNot(tok::r_paren)) { + PP.Diag(Tok, diag::warn_pragma_warning_expected) << ")"; + return; + } + + PP.Lex(Tok); + if (Tok.isNot(tok::eod)) + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma warning"; + } +}; + /// PragmaIncludeAliasHandler - "\#pragma include_alias("...")". struct PragmaIncludeAliasHandler : public PragmaHandler { PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &IncludeAliasTok) { - PP.HandlePragmaIncludeAlias(IncludeAliasTok); + PP.HandlePragmaIncludeAlias(IncludeAliasTok); } }; @@ -1204,28 +1337,28 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { } }; - /// \brief Handle "\#pragma region [...]" - /// - /// The syntax is - /// \code - /// #pragma region [optional name] - /// #pragma endregion [optional comment] - /// \endcode - /// - /// \note This is - /// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a> - /// pragma, just skipped by compiler. - struct PragmaRegionHandler : public PragmaHandler { - PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) { } - - virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, - Token &NameTok) { - // #pragma region: endregion matches can be verified - // __pragma(region): no sense, but ignored by msvc - // _Pragma is not valid for MSVC, but there isn't any point - // to handle a _Pragma differently. - } - }; +/// \brief Handle "\#pragma region [...]" +/// +/// The syntax is +/// \code +/// #pragma region [optional name] +/// #pragma endregion [optional comment] +/// \endcode +/// +/// \note This is +/// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a> +/// pragma, just skipped by compiler. +struct PragmaRegionHandler : public PragmaHandler { + PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) { } + + virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &NameTok) { + // #pragma region: endregion matches can be verified + // __pragma(region): no sense, but ignored by msvc + // _Pragma is not valid for MSVC, but there isn't any point + // to handle a _Pragma differently. + } +}; } // end anonymous namespace @@ -1262,6 +1395,7 @@ void Preprocessor::RegisterBuiltinPragmas() { // MS extensions. if (LangOpts.MicrosoftExt) { + AddPragmaHandler(new PragmaWarningHandler()); AddPragmaHandler(new PragmaIncludeAliasHandler()); AddPragmaHandler(new PragmaRegionHandler("region")); AddPragmaHandler(new PragmaRegionHandler("endregion")); diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp index 426b922..090aeed 100644 --- a/lib/Lex/PreprocessingRecord.cpp +++ b/lib/Lex/PreprocessingRecord.cpp @@ -398,7 +398,8 @@ void PreprocessingRecord::Ifndef(SourceLocation Loc, const Token &MacroNameTok, } void PreprocessingRecord::Defined(const Token &MacroNameTok, - const MacroDirective *MD) { + const MacroDirective *MD, + SourceRange Range) { // This is not actually a macro expansion but record it as a macro reference. if (MD) addMacroExpansion(MacroNameTok, MD->getMacroInfo(), diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 66f23f1..b500efe 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -65,6 +65,7 @@ Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, TheModuleLoader(TheModuleLoader), ExternalSource(0), Identifiers(opts, IILookup), IncrementalProcessing(IncrProcessing), CodeComplete(0), CodeCompletionFile(0), CodeCompletionOffset(0), + LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), CodeCompletionReached(0), SkipMainFilePreamble(0, true), CurPPLexer(0), CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), MacroArgCache(0), Record(0), MIChainHead(0), MICache(0), @@ -614,7 +615,7 @@ void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the /// IdentifierInfo methods that compute these properties will need to change to /// match. -void Preprocessor::HandleIdentifier(Token &Identifier) { +bool Preprocessor::HandleIdentifier(Token &Identifier) { assert(Identifier.getIdentifierInfo() && "Can't handle identifiers without identifier info!"); @@ -648,8 +649,10 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { MacroInfo *MI = MD->getMacroInfo(); if (!DisableMacroExpansion) { if (!Identifier.isExpandDisabled() && MI->isEnabled()) { - if (!HandleMacroExpandedIdentifier(Identifier, MD)) - return; + // C99 6.10.3p10: If the preprocessing token immediately after the + // macro name isn't a '(', this macro should not be expanded. + if (!MI->isFunctionLike() || isNextPPTokenLParen()) + return HandleMacroExpandedIdentifier(Identifier, MD); } else { // C99 6.10.3.4p2 says that a disabled macro may never again be // expanded, even if it's in a context where it could be expanded in the @@ -685,21 +688,52 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { if (II.isExtensionToken() && !DisableMacroExpansion) Diag(Identifier, diag::ext_token_used); - // If this is the 'import' contextual keyword, note + // If this is the 'import' contextual keyword following an '@', note // that the next token indicates a module name. // // Note that we do not treat 'import' as a contextual // keyword when we're in a caching lexer, because caching lexers only get // used in contexts where import declarations are disallowed. - if (II.isModulesImport() && !InMacroArgs && !DisableMacroExpansion && - getLangOpts().Modules && CurLexerKind != CLK_CachingLexer) { + if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && + !DisableMacroExpansion && getLangOpts().Modules && + CurLexerKind != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); ModuleImportPath.clear(); ModuleImportExpectsIdentifier = true; CurLexerKind = CLK_LexAfterModuleImport; } + return true; +} + +void Preprocessor::Lex(Token &Result) { + // We loop here until a lex function retuns a token; this avoids recursion. + bool ReturnedToken; + do { + switch (CurLexerKind) { + case CLK_Lexer: + ReturnedToken = CurLexer->Lex(Result); + break; + case CLK_PTHLexer: + ReturnedToken = CurPTHLexer->Lex(Result); + break; + case CLK_TokenLexer: + ReturnedToken = CurTokenLexer->Lex(Result); + break; + case CLK_CachingLexer: + CachingLex(Result); + ReturnedToken = true; + break; + case CLK_LexAfterModuleImport: + LexAfterModuleImport(Result); + ReturnedToken = true; + break; + } + } while (!ReturnedToken); + + LastTokenWasAt = Result.is(tok::at); } + /// \brief Lex a token following the 'import' contextual keyword. /// void Preprocessor::LexAfterModuleImport(Token &Result) { @@ -734,7 +768,7 @@ void Preprocessor::LexAfterModuleImport(Token &Result) { } // If we have a non-empty module path, load the named module. - if (!ModuleImportPath.empty()) { + if (!ModuleImportPath.empty() && getLangOpts().Modules) { Module *Imported = TheModuleLoader.loadModule(ModuleImportLoc, ModuleImportPath, Module::MacrosVisible, diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index 5a59849..33ccbc0 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -38,7 +38,10 @@ void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { ParsingFilename = true; // Lex the filename. - IndirectLex(FilenameTok); + if (LexingRawMode) + IndirectLex(FilenameTok); + else + PP->Lex(FilenameTok); // We should have obtained the filename now. ParsingFilename = false; diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index 07753c7..0213afc 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -121,7 +121,7 @@ void TokenLexer::destroy() { /// Remove comma ahead of __VA_ARGS__, if present, according to compiler dialect /// settings. Returns true if the comma is removed. -static bool MaybeRemoveCommaBeforeVaArgs(SmallVector<Token, 128> &ResultToks, +static bool MaybeRemoveCommaBeforeVaArgs(SmallVectorImpl<Token> &ResultToks, bool &NextTokGetsSpace, bool HasPasteOperator, MacroInfo *Macro, unsigned MacroArgNo, @@ -244,9 +244,11 @@ void TokenLexer::ExpandFunctionArguments() { // Otherwise, this is a use of the argument. Find out if there is a paste // (##) operator before or after the argument. - bool PasteBefore = + bool NonEmptyPasteBefore = !ResultToks.empty() && ResultToks.back().is(tok::hashhash); + bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash); bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash); + assert(!NonEmptyPasteBefore || PasteBefore); // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there // are no trailing commas if __VA_ARGS__ is empty. @@ -276,6 +278,14 @@ void TokenLexer::ExpandFunctionArguments() { unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); ResultToks.append(ResultArgToks, ResultArgToks+NumToks); + // In Microsoft-compatibility mode, we follow MSVC's preprocessing + // behavior by not considering single commas from nested macro + // expansions as argument separators. Set a flag on the token so we can + // test for this later when the macro expansion is processed. + if (PP.getLangOpts().MicrosoftMode && NumToks == 1 && + ResultToks.back().is(tok::comma)) + ResultToks.back().setFlag(Token::IgnoredComma); + // If the '##' came from expanding an argument, turn it into 'unknown' // to avoid pasting. for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) { @@ -314,13 +324,12 @@ void TokenLexer::ExpandFunctionArguments() { // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when // the expander trys to paste ',' with the first token of the __VA_ARGS__ // expansion. - if (PasteBefore && ResultToks.size() >= 2 && + if (NonEmptyPasteBefore && ResultToks.size() >= 2 && ResultToks[ResultToks.size()-2].is(tok::comma) && (unsigned)ArgNo == Macro->getNumArgs()-1 && Macro->isVariadic()) { // Remove the paste operator, report use of the extension. - PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); - ResultToks.pop_back(); + PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma); } ResultToks.append(ArgToks, ArgToks+NumToks); @@ -350,7 +359,7 @@ void TokenLexer::ExpandFunctionArguments() { // case, we do not want the extra whitespace to be added. For example, // we want ". ## foo" -> ".foo" not ". foo". if ((CurTok.hasLeadingSpace() || NextTokGetsSpace) && - !PasteBefore) + !NonEmptyPasteBefore) ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace); NextTokGetsSpace = false; @@ -371,10 +380,13 @@ void TokenLexer::ExpandFunctionArguments() { } // If this is on the RHS of a paste operator, we've already copied the - // paste operator to the ResultToks list. Remove it. - assert(PasteBefore && ResultToks.back().is(tok::hashhash)); - NextTokGetsSpace |= ResultToks.back().hasLeadingSpace(); - ResultToks.pop_back(); + // paste operator to the ResultToks list, unless the LHS was empty too. + // Remove it. + assert(PasteBefore); + if (NonEmptyPasteBefore) { + assert(ResultToks.back().is(tok::hashhash)); + NextTokGetsSpace |= ResultToks.pop_back_val().hasLeadingSpace(); + } // If this is the __VA_ARGS__ token, and if the argument wasn't provided, // and if the macro had at least one real argument, and if the token before @@ -404,22 +416,19 @@ void TokenLexer::ExpandFunctionArguments() { /// Lex - Lex and return a token from this macro stream. /// -void TokenLexer::Lex(Token &Tok) { +bool TokenLexer::Lex(Token &Tok) { // Lexing off the end of the macro, pop this macro off the expansion stack. if (isAtEnd()) { // If this is a macro (not a token stream), mark the macro enabled now // that it is no longer being expanded. if (Macro) Macro->EnableMacro(); - // Pop this context off the preprocessors lexer stack and get the next - // token. This will delete "this" so remember the PP instance var. - Preprocessor &PPCache = PP; - if (PP.HandleEndOfTokenLexer(Tok)) - return; - - // HandleEndOfTokenLexer may not return a token. If it doesn't, lex - // whatever is next. - return PPCache.Lex(Tok); + Tok.startToken(); + Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); + Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); + if (CurToken == 0) + Tok.setFlag(Token::LeadingEmptyMacro); + return PP.HandleEndOfTokenLexer(Tok); } SourceManager &SM = PP.getSourceManager(); @@ -439,7 +448,7 @@ void TokenLexer::Lex(Token &Tok) { // When handling the microsoft /##/ extension, the final token is // returned by PasteTokens, not the pasted token. if (PasteTokens(Tok)) - return; + return true; TokenIsFromPaste = true; } @@ -470,6 +479,8 @@ void TokenLexer::Lex(Token &Tok) { if (isFirstToken) { Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); + AtStartOfLine = false; + HasLeadingSpace = false; } // Handle recursive expansion! @@ -487,10 +498,11 @@ void TokenLexer::Lex(Token &Tok) { } if (!DisableMacroExpansion && II->isHandleIdentifierCase()) - PP.HandleIdentifier(Tok); + return PP.HandleIdentifier(Tok); } // Otherwise, return a normal token. + return true; } /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## @@ -812,3 +824,8 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); } } + +void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { + AtStartOfLine = Result.isAtStartOfLine(); + HasLeadingSpace = Result.hasLeadingSpace(); +} diff --git a/lib/Lex/UnicodeCharSets.h b/lib/Lex/UnicodeCharSets.h index 37ff8af..01ae7e8 100644 --- a/lib/Lex/UnicodeCharSets.h +++ b/lib/Lex/UnicodeCharSets.h @@ -9,98 +9,10 @@ #ifndef CLANG_LEX_UNICODECHARSETS_H #define CLANG_LEX_UNICODECHARSETS_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" -#include "llvm/Support/raw_ostream.h" - -namespace { - struct UnicodeCharRange { - uint32_t Lower; - uint32_t Upper; - }; - typedef llvm::ArrayRef<UnicodeCharRange> UnicodeCharSet; - - typedef llvm::SmallPtrSet<const UnicodeCharRange *, 16> ValidatedCharSetsTy; -} - -static inline ValidatedCharSetsTy &getValidatedCharSets() { - static ValidatedCharSetsTy Validated; - return Validated; -} - -/// Returns true if each of the ranges in \p CharSet is a proper closed range -/// [min, max], and if the ranges themselves are ordered and non-overlapping. -static inline bool isValidCharSet(UnicodeCharSet CharSet) { -#ifndef NDEBUG - static llvm::sys::Mutex ValidationMutex; - - // Check the validation cache. - { - llvm::MutexGuard Guard(ValidationMutex); - if (getValidatedCharSets().count(CharSet.data())) - return true; - } - - // Walk through the ranges. - uint32_t Prev = 0; - for (UnicodeCharSet::iterator I = CharSet.begin(), E = CharSet.end(); - I != E; ++I) { - if (Prev >= I->Lower) { - DEBUG(llvm::dbgs() << "Upper bound 0x"); - DEBUG(llvm::dbgs().write_hex(Prev)); - DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x"); - DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n"); - return false; - } - if (I->Upper < I->Lower) { - DEBUG(llvm::dbgs() << "Upper bound 0x"); - DEBUG(llvm::dbgs().write_hex(I->Lower)); - DEBUG(llvm::dbgs() << " should not be less than lower bound 0x"); - DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n"); - return false; - } - Prev = I->Upper; - } - - // Update the validation cache. - { - llvm::MutexGuard Guard(ValidationMutex); - getValidatedCharSets().insert(CharSet.data()); - } -#endif - return true; -} - -/// Returns true if the Unicode code point \p C is within the set of -/// characters specified by \p CharSet. -LLVM_READONLY static inline bool isCharInSet(uint32_t C, - UnicodeCharSet CharSet) { - assert(isValidCharSet(CharSet)); - - size_t LowPoint = 0; - size_t HighPoint = CharSet.size(); - - // Binary search the set of char ranges. - while (HighPoint != LowPoint) { - size_t MidPoint = (HighPoint + LowPoint) / 2; - if (C < CharSet[MidPoint].Lower) - HighPoint = MidPoint; - else if (C > CharSet[MidPoint].Upper) - LowPoint = MidPoint + 1; - else - return true; - } - - return false; -} - +#include "llvm/Support/UnicodeCharRanges.h" // C11 D.1, C++11 [charname.allowed] -static const UnicodeCharRange C11AllowedIDChars[] = { +static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = { // 1 { 0x00A8, 0x00A8 }, { 0x00AA, 0x00AA }, { 0x00AD, 0x00AD }, { 0x00AF, 0x00AF }, { 0x00B2, 0x00B5 }, { 0x00B7, 0x00BA }, @@ -132,7 +44,7 @@ static const UnicodeCharRange C11AllowedIDChars[] = { // C++03 [extendid] // Note that this is not the same as C++98, but we don't distinguish C++98 // and C++03 in Clang. -static const UnicodeCharRange CXX03AllowedIDChars[] = { +static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[] = { // Latin { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 }, { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, @@ -251,7 +163,7 @@ static const UnicodeCharRange CXX03AllowedIDChars[] = { }; // C99 Annex D -static const UnicodeCharRange C99AllowedIDChars[] = { +static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[] = { // Latin (1) { 0x00AA, 0x00AA }, @@ -470,7 +382,7 @@ static const UnicodeCharRange C99AllowedIDChars[] = { }; // C11 D.2, C++11 [charname.disallowed] -static const UnicodeCharRange C11DisallowedInitialIDChars[] = { +static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[] = { { 0x0300, 0x036F }, { 0x1DC0, 0x1DFF }, { 0x20D0, 0x20FF }, { 0xFE20, 0xFE2F } }; @@ -478,7 +390,7 @@ static const UnicodeCharRange C11DisallowedInitialIDChars[] = { // C99 6.4.2.1p3: The initial character [of an identifier] shall not be a // universal character name designating a digit. // C99 Annex D defines these characters as "Digits". -static const UnicodeCharRange C99DisallowedInitialIDChars[] = { +static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[] = { { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 }, { 0x0966, 0x096F }, { 0x09E6, 0x09EF }, { 0x0A66, 0x0A6F }, { 0x0AE6, 0x0AEF }, { 0x0B66, 0x0B6F }, { 0x0BE7, 0x0BEF }, { 0x0C66, 0x0C6F }, @@ -487,7 +399,7 @@ static const UnicodeCharRange C99DisallowedInitialIDChars[] = { }; // Unicode v6.2, chapter 6.2, table 6-2. -static const UnicodeCharRange UnicodeWhitespaceChars[] = { +static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[] = { { 0x0085, 0x0085 }, { 0x00A0, 0x00A0 }, { 0x1680, 0x1680 }, { 0x180E, 0x180E }, { 0x2000, 0x200A }, { 0x2028, 0x2029 }, { 0x202F, 0x202F }, { 0x205F, 0x205F }, { 0x3000, 0x3000 } |