diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Lex')
20 files changed, 3638 insertions, 563 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp b/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp index 0cb564c..bbfc1df 100644 --- a/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp @@ -81,7 +81,7 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) { unsigned FileSize = FE->getSize(); if (FileSize <= sizeof(HMapHeader)) return 0; - llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer(FM.getBufferForFile(FE)); + OwningPtr<const llvm::MemoryBuffer> FileBuffer(FM.getBufferForFile(FE)); if (FileBuffer == 0) return 0; // Unreadable file? const char *FileStart = FileBuffer->getBufferStart(); @@ -220,7 +220,7 @@ const FileEntry *HeaderMap::LookupFile( // If so, we have a match in the hash table. Construct the destination // path. - llvm::SmallString<1024> DestPath; + SmallString<1024> DestPath; DestPath += getString(B.Prefix); DestPath += getString(B.Suffix); return FM.getFile(DestPath.str()); diff --git a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp index 931145a..d688e23 100644 --- a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp @@ -13,6 +13,8 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderMap.h" +#include "clang/Lex/Lexer.h" +#include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/IdentifierTable.h" #include "llvm/Support/FileSystem.h" @@ -36,8 +38,12 @@ HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) { ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {} -HeaderSearch::HeaderSearch(FileManager &FM) - : FileMgr(FM), FrameworkMap(64) { +HeaderSearch::HeaderSearch(FileManager &FM, DiagnosticsEngine &Diags, + const LangOptions &LangOpts, + const TargetInfo *Target) + : FileMgr(FM), Diags(Diags), FrameworkMap(64), + ModMap(FileMgr, *Diags.getClient(), LangOpts, Target) +{ AngledDirIdx = 0; SystemDirIdx = 0; NoCurDirSearch = false; @@ -98,58 +104,81 @@ const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) { return 0; } -const FileEntry *HeaderSearch::lookupModule(StringRef ModuleName, - std::string *ModuleFileName, - std::string *UmbrellaHeader) { +std::string HeaderSearch::getModuleFileName(Module *Module) { // If we don't have a module cache path, we can't do anything. - if (ModuleCachePath.empty()) { - if (ModuleFileName) - ModuleFileName->clear(); - return 0; - } + if (ModuleCachePath.empty()) + return std::string(); + + + SmallString<256> Result(ModuleCachePath); + llvm::sys::path::append(Result, Module->getTopLevelModule()->Name + ".pcm"); + return Result.str().str(); +} + +std::string HeaderSearch::getModuleFileName(StringRef ModuleName) { + // If we don't have a module cache path, we can't do anything. + if (ModuleCachePath.empty()) + return std::string(); - // Try to find the module path. - llvm::SmallString<256> FileName(ModuleCachePath); - llvm::sys::path::append(FileName, ModuleName + ".pcm"); - if (ModuleFileName) - *ModuleFileName = FileName.str(); - - if (const FileEntry *ModuleFile - = getFileMgr().getFile(FileName, /*OpenFile=*/false, - /*CacheFailure=*/false)) - return ModuleFile; - // We didn't find the module. If we're not supposed to look for an - // umbrella header, this is the end of the road. - if (!UmbrellaHeader) - return 0; + SmallString<256> Result(ModuleCachePath); + llvm::sys::path::append(Result, ModuleName + ".pcm"); + return Result.str().str(); +} + +Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) { + // Look in the module map to determine if there is a module by this name. + Module *Module = ModMap.findModule(ModuleName); + if (Module || !AllowSearch) + return Module; - // Look in each of the framework directories for an umbrella header with - // the same name as the module. - // FIXME: We need a way for non-frameworks to provide umbrella headers. - llvm::SmallString<128> UmbrellaHeaderName; - UmbrellaHeaderName = ModuleName; - UmbrellaHeaderName += '/'; - UmbrellaHeaderName += ModuleName; - UmbrellaHeaderName += ".h"; + // Look through the various header search paths to load any avai;able module + // maps, searching for a module map that describes this module. for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { - // Skip non-framework include paths - if (!SearchDirs[Idx].isFramework()) + if (SearchDirs[Idx].isFramework()) { + // Search for or infer a module map for a framework. + SmallString<128> FrameworkDirName; + FrameworkDirName += SearchDirs[Idx].getFrameworkDir()->getName(); + llvm::sys::path::append(FrameworkDirName, ModuleName + ".framework"); + if (const DirectoryEntry *FrameworkDir + = FileMgr.getDirectory(FrameworkDirName)) { + bool IsSystem + = SearchDirs[Idx].getDirCharacteristic() != SrcMgr::C_User; + Module = loadFrameworkModule(ModuleName, FrameworkDir, IsSystem); + if (Module) + break; + } + } + + // FIXME: Figure out how header maps and module maps will work together. + + // Only deal with normal search directories. + if (!SearchDirs[Idx].isNormalDir()) continue; - // Look for the umbrella header in this directory. - if (const FileEntry *HeaderFile - = SearchDirs[Idx].LookupFile(UmbrellaHeaderName, *this, 0, 0, - StringRef(), 0)) { - *UmbrellaHeader = HeaderFile->getName(); - return 0; + // Search for a module map file in this directory. + if (loadModuleMapFile(SearchDirs[Idx].getDir()) == LMM_NewlyLoaded) { + // We just loaded a module map file; check whether the module is + // available now. + Module = ModMap.findModule(ModuleName); + if (Module) + break; + } + + // Search for a module map in a subdirectory with the same name as the + // module. + SmallString<128> NestedModuleMapDirName; + NestedModuleMapDirName = SearchDirs[Idx].getDir()->getName(); + llvm::sys::path::append(NestedModuleMapDirName, ModuleName); + if (loadModuleMapFile(NestedModuleMapDirName) == LMM_NewlyLoaded) { + // If we just loaded a module map file, look for the module again. + Module = ModMap.findModule(ModuleName); + if (Module) + break; } } - // We did not find an umbrella header. Clear out the UmbrellaHeader pointee - // so our caller knows that we failed. - UmbrellaHeader->clear(); - return 0; + return Module; } //===----------------------------------------------------------------------===// @@ -175,9 +204,11 @@ const FileEntry *DirectoryLookup::LookupFile( HeaderSearch &HS, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - StringRef BuildingModule, - StringRef *SuggestedModule) const { - llvm::SmallString<1024> TmpDir; + Module **SuggestedModule, + bool &InUserSpecifiedSystemFramework) const { + InUserSpecifiedSystemFramework = false; + + SmallString<1024> TmpDir; if (isNormalDir()) { // Concatenate the requested file onto the directory. TmpDir = getDir()->getName(); @@ -191,12 +222,27 @@ const FileEntry *DirectoryLookup::LookupFile( RelativePath->clear(); RelativePath->append(Filename.begin(), Filename.end()); } + + // If we have a module map that might map this header, load it and + // check whether we'll have a suggestion for a module. + if (SuggestedModule && HS.hasModuleMap(TmpDir, getDir())) { + const FileEntry *File = HS.getFileMgr().getFile(TmpDir.str(), + /*openFile=*/false); + if (!File) + return File; + + // If there is a module that corresponds to this header, + // suggest it. + *SuggestedModule = HS.findModuleForHeader(File); + return File; + } + return HS.getFileMgr().getFile(TmpDir.str(), /*openFile=*/true); } if (isFramework()) return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath, - BuildingModule, SuggestedModule); + SuggestedModule, InUserSpecifiedSystemFramework); assert(isHeaderMap() && "Unknown directory lookup"); const FileEntry * const Result = getHeaderMap()->LookupFile( @@ -223,8 +269,8 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( HeaderSearch &HS, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - StringRef BuildingModule, - StringRef *SuggestedModule) const + Module **SuggestedModule, + bool &InUserSpecifiedSystemFramework) const { FileManager &FileMgr = HS.getFileMgr(); @@ -233,49 +279,71 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( if (SlashPos == StringRef::npos) return 0; // Find out if this is the home for the specified framework, by checking - // HeaderSearch. Possible answer are yes/no and unknown. - const DirectoryEntry *&FrameworkDirCache = + // HeaderSearch. Possible answers are yes/no and unknown. + HeaderSearch::FrameworkCacheEntry &CacheEntry = HS.LookupFrameworkCache(Filename.substr(0, SlashPos)); // If it is known and in some other directory, fail. - if (FrameworkDirCache && FrameworkDirCache != getFrameworkDir()) + if (CacheEntry.Directory && CacheEntry.Directory != getFrameworkDir()) return 0; // Otherwise, construct the path to this framework dir. // FrameworkName = "/System/Library/Frameworks/" - llvm::SmallString<1024> FrameworkName; + SmallString<1024> FrameworkName; FrameworkName += getFrameworkDir()->getName(); if (FrameworkName.empty() || FrameworkName.back() != '/') FrameworkName.push_back('/'); // FrameworkName = "/System/Library/Frameworks/Cocoa" - FrameworkName.append(Filename.begin(), Filename.begin()+SlashPos); + StringRef ModuleName(Filename.begin(), SlashPos); + FrameworkName += ModuleName; // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/" FrameworkName += ".framework/"; - // If the cache entry is still unresolved, query to see if the cache entry is - // still unresolved. If so, check its existence now. - if (FrameworkDirCache == 0) { + // If the cache entry was unresolved, populate it now. + if (CacheEntry.Directory == 0) { HS.IncrementFrameworkLookupCount(); // If the framework dir doesn't exist, we fail. - // FIXME: It's probably more efficient to query this with FileMgr.getDir. - bool Exists; - if (llvm::sys::fs::exists(FrameworkName.str(), Exists) || !Exists) - return 0; + const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.str()); + if (Dir == 0) return 0; // Otherwise, if it does, remember that this is the right direntry for this // framework. - FrameworkDirCache = getFrameworkDir(); + CacheEntry.Directory = getFrameworkDir(); + + // If this is a user search directory, check if the framework has been + // user-specified as a system framework. + if (getDirCharacteristic() == SrcMgr::C_User) { + SmallString<1024> SystemFrameworkMarker(FrameworkName); + SystemFrameworkMarker += ".system_framework"; + if (llvm::sys::fs::exists(SystemFrameworkMarker.str())) { + CacheEntry.IsUserSpecifiedSystemFramework = true; + } + } } + // Set the 'user-specified system framework' flag. + InUserSpecifiedSystemFramework = CacheEntry.IsUserSpecifiedSystemFramework; + if (RelativePath != NULL) { RelativePath->clear(); RelativePath->append(Filename.begin()+SlashPos+1, Filename.end()); } + // If we're allowed to look for modules, try to load or create the module + // corresponding to this framework. + Module *Module = 0; + if (SuggestedModule) { + if (const DirectoryEntry *FrameworkDir + = FileMgr.getDirectory(FrameworkName)) { + bool IsSystem = getDirCharacteristic() != SrcMgr::C_User; + Module = HS.loadFrameworkModule(ModuleName, FrameworkDir, IsSystem); + } + } + // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h" unsigned OrigSize = FrameworkName.size(); @@ -287,16 +355,13 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1); } - /// Determine whether this is the module we're building or not. - bool AutomaticImport = SuggestedModule && - (BuildingModule != StringRef(Filename.begin(), SlashPos)) && - !Filename.substr(SlashPos + 1).startswith(".."); - + // Determine whether this is the module we're building or not. + bool AutomaticImport = Module; FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end()); if (const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), /*openFile=*/!AutomaticImport)) { if (AutomaticImport) - *SuggestedModule = StringRef(Filename.begin(), SlashPos); + *SuggestedModule = HS.findModuleForHeader(FE); return FE; } @@ -311,10 +376,14 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), /*openFile=*/!AutomaticImport); if (FE && AutomaticImport) - *SuggestedModule = StringRef(Filename.begin(), SlashPos); + *SuggestedModule = HS.findModuleForHeader(FE); return FE; } +void HeaderSearch::setTarget(const TargetInfo &Target) { + ModMap.setTarget(Target); +} + //===----------------------------------------------------------------------===// // Header File Location. @@ -334,10 +403,11 @@ const FileEntry *HeaderSearch::LookupFile( const FileEntry *CurFileEnt, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - StringRef *SuggestedModule) + Module **SuggestedModule, + bool SkipCache) { if (SuggestedModule) - *SuggestedModule = StringRef(); + *SuggestedModule = 0; // If 'Filename' is absolute, check to see if it exists and no searching. if (llvm::sys::path::is_absolute(Filename)) { @@ -362,7 +432,7 @@ const FileEntry *HeaderSearch::LookupFile( // a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h". // This search is not done for <> headers. if (CurFileEnt && !isAngled && !NoCurDirSearch) { - llvm::SmallString<1024> TmpDir; + SmallString<1024> TmpDir; // Concatenate the requested file onto the directory. // FIXME: Portability. Filename concatenation should be in sys::Path. TmpDir += CurFileEnt->getDir()->getName(); @@ -410,7 +480,7 @@ const FileEntry *HeaderSearch::LookupFile( // If the entry has been previously looked up, the first value will be // non-zero. If the value is equal to i (the start point of our search), then // this is a matching hit. - if (CacheLookup.first == i+1) { + if (!SkipCache && CacheLookup.first == i+1) { // Skip querying potentially lots of directories for this lookup. i = CacheLookup.second; } else { @@ -422,9 +492,10 @@ const FileEntry *HeaderSearch::LookupFile( // Check each directory in sequence to see if it contains this file. for (; i != SearchDirs.size(); ++i) { + bool InUserSpecifiedSystemFramework = false; const FileEntry *FE = SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath, - BuildingModule, SuggestedModule); + SuggestedModule, InUserSpecifiedSystemFramework); if (!FE) continue; CurDir = &SearchDirs[i]; @@ -433,6 +504,12 @@ const FileEntry *HeaderSearch::LookupFile( HeaderFileInfo &HFI = getFileInfo(FE); HFI.DirInfo = CurDir->getDirCharacteristic(); + // If the directory characteristic is User but this framework was + // user-specified to be treated as a system framework, promote the + // characteristic. + if (HFI.DirInfo == SrcMgr::C_User && InUserSpecifiedSystemFramework) + HFI.DirInfo = SrcMgr::C_System; + // If this file is found in a header map and uses the framework style of // includes, then this header is part of a framework we're building. if (CurDir->isIndexHeaderMap()) { @@ -456,7 +533,7 @@ const FileEntry *HeaderSearch::LookupFile( if (CurFileEnt && !isAngled && Filename.find('/') == StringRef::npos) { HeaderFileInfo &IncludingHFI = getFileInfo(CurFileEnt); if (IncludingHFI.IndexHeaderMapHeader) { - llvm::SmallString<128> ScratchFilename; + SmallString<128> ScratchFilename; ScratchFilename += IncludingHFI.Framework; ScratchFilename += '/'; ScratchFilename += Filename; @@ -491,6 +568,7 @@ LookupSubframeworkHeader(StringRef Filename, assert(ContextFileEnt && "No context file?"); // Framework names must have a '/' in the filename. Find it. + // FIXME: Should we permit '\' on Windows? size_t SlashPos = Filename.find('/'); if (SlashPos == StringRef::npos) return 0; @@ -498,30 +576,32 @@ LookupSubframeworkHeader(StringRef Filename, const char *ContextName = ContextFileEnt->getName(); // If the context info wasn't a framework, couldn't be a subframework. - const char *FrameworkPos = strstr(ContextName, ".framework/"); - if (FrameworkPos == 0) + const unsigned DotFrameworkLen = 10; + const char *FrameworkPos = strstr(ContextName, ".framework"); + if (FrameworkPos == 0 || + (FrameworkPos[DotFrameworkLen] != '/' && + FrameworkPos[DotFrameworkLen] != '\\')) return 0; - llvm::SmallString<1024> FrameworkName(ContextName, - FrameworkPos+strlen(".framework/")); + SmallString<1024> FrameworkName(ContextName, FrameworkPos+DotFrameworkLen+1); // Append Frameworks/HIToolbox.framework/ FrameworkName += "Frameworks/"; FrameworkName.append(Filename.begin(), Filename.begin()+SlashPos); FrameworkName += ".framework/"; - llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup = + llvm::StringMapEntry<FrameworkCacheEntry> &CacheLookup = FrameworkMap.GetOrCreateValue(Filename.substr(0, SlashPos)); // Some other location? - if (CacheLookup.getValue() && + if (CacheLookup.getValue().Directory && CacheLookup.getKeyLength() == FrameworkName.size() && memcmp(CacheLookup.getKeyData(), &FrameworkName[0], CacheLookup.getKeyLength()) != 0) return 0; // Cache subframework. - if (CacheLookup.getValue() == 0) { + if (CacheLookup.getValue().Directory == 0) { ++NumSubFrameworkLookups; // If the framework dir doesn't exist, we fail. @@ -530,7 +610,7 @@ LookupSubframeworkHeader(StringRef Filename, // Otherwise, if it does, remember that this is the right direntry for this // framework. - CacheLookup.setValue(Dir); + CacheLookup.getValue().Directory = Dir; } const FileEntry *FE = 0; @@ -541,7 +621,7 @@ LookupSubframeworkHeader(StringRef Filename, } // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h" - llvm::SmallString<1024> HeadersFilename(FrameworkName); + SmallString<1024> HeadersFilename(FrameworkName); HeadersFilename += "Headers/"; if (SearchPath != NULL) { SearchPath->clear(); @@ -576,6 +656,28 @@ LookupSubframeworkHeader(StringRef Filename, return FE; } +/// \brief Helper static function to normalize a path for injection into +/// a synthetic header. +/*static*/ std::string +HeaderSearch::NormalizeDashIncludePath(StringRef File, FileManager &FileMgr) { + // Implicit include paths should be resolved relative to the current + // working directory first, and then use the regular header search + // mechanism. The proper way to handle this is to have the + // predefines buffer located at the current working directory, but + // it has no file entry. For now, workaround this by using an + // absolute path if we find the file here, and otherwise letting + // header search handle it. + SmallString<128> Path(File); + llvm::sys::fs::make_absolute(Path); + bool exists; + if (llvm::sys::fs::exists(Path.str(), exists) || !exists) + Path = File; + else if (exists) + FileMgr.getFile(File); + + return Lexer::Stringify(Path.str()); +} + //===----------------------------------------------------------------------===// // File Info Management. //===----------------------------------------------------------------------===// @@ -687,3 +789,247 @@ size_t HeaderSearch::getTotalMemory() const { StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) { return FrameworkNames.GetOrCreateValue(Framework).getKey(); } + +bool HeaderSearch::hasModuleMap(StringRef FileName, + const DirectoryEntry *Root) { + llvm::SmallVector<const DirectoryEntry *, 2> FixUpDirectories; + + StringRef DirName = FileName; + do { + // Get the parent directory name. + DirName = llvm::sys::path::parent_path(DirName); + if (DirName.empty()) + return false; + + // Determine whether this directory exists. + const DirectoryEntry *Dir = FileMgr.getDirectory(DirName); + if (!Dir) + return false; + + // Try to load the module map file in this directory. + switch (loadModuleMapFile(Dir)) { + case LMM_NewlyLoaded: + case LMM_AlreadyLoaded: + // Success. All of the directories we stepped through inherit this module + // map file. + for (unsigned I = 0, N = FixUpDirectories.size(); I != N; ++I) + DirectoryHasModuleMap[FixUpDirectories[I]] = true; + + return true; + + case LMM_NoDirectory: + case LMM_InvalidModuleMap: + break; + } + + // If we hit the top of our search, we're done. + if (Dir == Root) + return false; + + // Keep track of all of the directories we checked, so we can mark them as + // having module maps if we eventually do find a module map. + FixUpDirectories.push_back(Dir); + } while (true); +} + +Module *HeaderSearch::findModuleForHeader(const FileEntry *File) { + if (Module *Mod = ModMap.findModuleForHeader(File)) + return Mod; + + return 0; +} + +bool HeaderSearch::loadModuleMapFile(const FileEntry *File) { + const DirectoryEntry *Dir = File->getDir(); + + llvm::DenseMap<const DirectoryEntry *, bool>::iterator KnownDir + = DirectoryHasModuleMap.find(Dir); + if (KnownDir != DirectoryHasModuleMap.end()) + return !KnownDir->second; + + bool Result = ModMap.parseModuleMapFile(File); + if (!Result && llvm::sys::path::filename(File->getName()) == "module.map") { + // If the file we loaded was a module.map, look for the corresponding + // module_private.map. + SmallString<128> PrivateFilename(Dir->getName()); + llvm::sys::path::append(PrivateFilename, "module_private.map"); + if (const FileEntry *PrivateFile = FileMgr.getFile(PrivateFilename)) + Result = ModMap.parseModuleMapFile(PrivateFile); + } + + DirectoryHasModuleMap[Dir] = !Result; + return Result; +} + +Module *HeaderSearch::loadFrameworkModule(StringRef Name, + const DirectoryEntry *Dir, + bool IsSystem) { + if (Module *Module = ModMap.findModule(Name)) + return Module; + + // Try to load a module map file. + switch (loadModuleMapFile(Dir)) { + case LMM_InvalidModuleMap: + break; + + case LMM_AlreadyLoaded: + case LMM_NoDirectory: + return 0; + + case LMM_NewlyLoaded: + return ModMap.findModule(Name); + } + + // The top-level framework directory, from which we'll infer a framework + // module. + const DirectoryEntry *TopFrameworkDir = Dir; + + // The path from the module we're actually looking for back to the top-level + // framework name. + llvm::SmallVector<StringRef, 2> SubmodulePath; + SubmodulePath.push_back(Name); + + // Walk the directory structure to find any enclosing frameworks. + StringRef DirName = Dir->getName(); + do { + // Get the parent directory name. + DirName = llvm::sys::path::parent_path(DirName); + if (DirName.empty()) + break; + + // Determine whether this directory exists. + Dir = FileMgr.getDirectory(DirName); + if (!Dir) + break; + + // If this is a framework directory, then we're a subframework of this + // framework. + if (llvm::sys::path::extension(DirName) == ".framework") { + SubmodulePath.push_back(llvm::sys::path::stem(DirName)); + TopFrameworkDir = Dir; + } + } while (true); + + // Try to infer a module map from the top-level framework directory. + Module *Result = ModMap.inferFrameworkModule(SubmodulePath.back(), + TopFrameworkDir, + IsSystem, + /*Parent=*/0); + + // Follow the submodule path to find the requested (sub)framework module + // within the top-level framework module. + SubmodulePath.pop_back(); + while (!SubmodulePath.empty() && Result) { + Result = ModMap.lookupModuleQualified(SubmodulePath.back(), Result); + SubmodulePath.pop_back(); + } + return Result; +} + + +HeaderSearch::LoadModuleMapResult +HeaderSearch::loadModuleMapFile(StringRef DirName) { + if (const DirectoryEntry *Dir = FileMgr.getDirectory(DirName)) + return loadModuleMapFile(Dir); + + return LMM_NoDirectory; +} + +HeaderSearch::LoadModuleMapResult +HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir) { + llvm::DenseMap<const DirectoryEntry *, bool>::iterator KnownDir + = DirectoryHasModuleMap.find(Dir); + if (KnownDir != DirectoryHasModuleMap.end()) + return KnownDir->second? LMM_AlreadyLoaded : LMM_InvalidModuleMap; + + SmallString<128> ModuleMapFileName; + ModuleMapFileName += Dir->getName(); + unsigned ModuleMapDirNameLen = ModuleMapFileName.size(); + llvm::sys::path::append(ModuleMapFileName, "module.map"); + if (const FileEntry *ModuleMapFile = FileMgr.getFile(ModuleMapFileName)) { + // We have found a module map file. Try to parse it. + if (ModMap.parseModuleMapFile(ModuleMapFile)) { + // No suitable module map. + DirectoryHasModuleMap[Dir] = false; + return LMM_InvalidModuleMap; + } + + // This directory has a module map. + DirectoryHasModuleMap[Dir] = true; + + // Check whether there is a private module map that we need to load as well. + ModuleMapFileName.erase(ModuleMapFileName.begin() + ModuleMapDirNameLen, + ModuleMapFileName.end()); + llvm::sys::path::append(ModuleMapFileName, "module_private.map"); + if (const FileEntry *PrivateModuleMapFile + = FileMgr.getFile(ModuleMapFileName)) { + if (ModMap.parseModuleMapFile(PrivateModuleMapFile)) { + // No suitable module map. + DirectoryHasModuleMap[Dir] = false; + return LMM_InvalidModuleMap; + } + } + + return LMM_NewlyLoaded; + } + + // No suitable module map. + DirectoryHasModuleMap[Dir] = false; + return LMM_InvalidModuleMap; +} + +void HeaderSearch::collectAllModules(llvm::SmallVectorImpl<Module *> &Modules) { + Modules.clear(); + + // Load module maps for each of the header search directories. + for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { + if (SearchDirs[Idx].isFramework()) { + llvm::error_code EC; + SmallString<128> DirNative; + llvm::sys::path::native(SearchDirs[Idx].getFrameworkDir()->getName(), + DirNative); + + // Search each of the ".framework" directories to load them as modules. + bool IsSystem = SearchDirs[Idx].getDirCharacteristic() != SrcMgr::C_User; + for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + if (llvm::sys::path::extension(Dir->path()) != ".framework") + continue; + + const DirectoryEntry *FrameworkDir = FileMgr.getDirectory(Dir->path()); + if (!FrameworkDir) + continue; + + // Load this framework module. + loadFrameworkModule(llvm::sys::path::stem(Dir->path()), FrameworkDir, + IsSystem); + } + continue; + } + + // FIXME: Deal with header maps. + if (SearchDirs[Idx].isHeaderMap()) + continue; + + // Try to load a module map file for the search directory. + loadModuleMapFile(SearchDirs[Idx].getDir()); + + // Try to load module map files for immediate subdirectories of this search + // directory. + llvm::error_code EC; + SmallString<128> DirNative; + llvm::sys::path::native(SearchDirs[Idx].getDir()->getName(), DirNative); + for (llvm::sys::fs::directory_iterator Dir(DirNative.str(), EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + loadModuleMapFile(Dir->path()); + } + } + + // Populate the list of modules. + for (ModuleMap::module_iterator M = ModMap.module_begin(), + MEnd = ModMap.module_end(); + M != MEnd; ++M) { + Modules.push_back(M->getValue()); + } +} + diff --git a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp index a98d889..535a852 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp @@ -30,6 +30,7 @@ #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" #include <cstring> @@ -59,6 +60,8 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { // Lexer Class Implementation //===----------------------------------------------------------------------===// +void Lexer::anchor() { } + void Lexer::InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd) { InitCharacterInfo(); @@ -114,7 +117,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) : PreprocessorLexer(&PP, FID), FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), - Features(PP.getLangOptions()) { + LangOpts(PP.getLangOpts()) { InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), InputFile->getBufferEnd()); @@ -126,9 +129,9 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) /// Lexer constructor - Create a new raw lexer object. This object is only /// suitable for calls to 'LexRawToken'. This lexer assumes that the text /// range will outlive it, so it doesn't take ownership of it. -Lexer::Lexer(SourceLocation fileloc, const LangOptions &features, +Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, const char *BufStart, const char *BufPtr, const char *BufEnd) - : FileLoc(fileloc), Features(features) { + : FileLoc(fileloc), LangOpts(langOpts) { InitLexer(BufStart, BufPtr, BufEnd); @@ -140,8 +143,8 @@ Lexer::Lexer(SourceLocation fileloc, const LangOptions &features, /// suitable for calls to 'LexRawToken'. This lexer assumes that the text /// range will outlive it, so it doesn't take ownership of it. Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile, - const SourceManager &SM, const LangOptions &features) - : FileLoc(SM.getLocForStartOfFile(FID)), Features(features) { + const SourceManager &SM, const LangOptions &langOpts) + : FileLoc(SM.getLocForStartOfFile(FID)), LangOpts(langOpts) { InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(), FromFile->getBufferEnd()); @@ -284,7 +287,7 @@ StringRef Lexer::getSpelling(SourceLocation loc, /// wants to get the true, uncanonicalized, spelling of things like digraphs /// UCNs, etc. std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, - const LangOptions &Features, bool *Invalid) { + const LangOptions &LangOpts, bool *Invalid) { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); // If this token contains nothing interesting, return it directly. @@ -306,7 +309,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); Ptr != End; ) { unsigned CharSize; - Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); + Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts)); Ptr += CharSize; } assert(Result.size() != unsigned(Tok.getLength()) && @@ -326,7 +329,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, /// if an internal buffer is returned. unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, - const LangOptions &Features, bool *Invalid) { + const LangOptions &LangOpts, bool *Invalid) { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); const char *TokStart = 0; @@ -366,7 +369,7 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); Ptr != End; ) { unsigned CharSize; - *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); + *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts); Ptr += CharSize; } assert(unsigned(OutBuf-Buffer) != Tok.getLength() && @@ -487,11 +490,11 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, SourceLocation FileLoc = SM.getSpellingLoc(Loc); SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); - std::pair<FileID, unsigned> BeginFileLocInfo= SM.getDecomposedLoc(BeginFileLoc); + std::pair<FileID, unsigned> BeginFileLocInfo + = SM.getDecomposedLoc(BeginFileLoc); assert(FileLocInfo.first == BeginFileLocInfo.first && FileLocInfo.second >= BeginFileLocInfo.second); - return Loc.getLocWithOffset(SM.getDecomposedLoc(BeginFileLoc).second - - SM.getDecomposedLoc(FileLoc).second); + return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); } namespace { @@ -505,13 +508,13 @@ namespace { std::pair<unsigned, bool> Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, - const LangOptions &Features, unsigned MaxLines) { + const LangOptions &LangOpts, unsigned MaxLines) { // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. const unsigned StartOffset = 1; SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset); - Lexer TheLexer(StartLoc, Features, Buffer->getBufferStart(), + Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(), Buffer->getBufferStart(), Buffer->getBufferEnd()); bool InPreprocessorDirective = false; @@ -655,7 +658,7 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, - const LangOptions &Features) { + const LangOptions &LangOpts) { // Figure out how many physical characters away the specified expansion // character is. This needs to take into consideration newlines and // trigraphs. @@ -681,7 +684,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, // lexer to parse it correctly. for (; CharNo; --CharNo) { unsigned Size; - Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features); + Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts); TokPtr += Size; PhysOffset += Size; } @@ -713,19 +716,16 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, /// a source location pointing to the last character in the token, etc. SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, - const LangOptions &Features) { + const LangOptions &LangOpts) { if (Loc.isInvalid()) return SourceLocation(); if (Loc.isMacroID()) { - if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, Features)) + if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) return SourceLocation(); // Points inside the macro expansion. - - // Continue and find the location just after the macro expansion. - Loc = SM.getExpansionRange(Loc).second; } - unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features); + unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts); if (Len > Offset) Len = Len - Offset; else @@ -738,7 +738,8 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, /// token of the macro expansion. bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, - const LangOptions &LangOpts) { + const LangOptions &LangOpts, + SourceLocation *MacroBegin) { assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc); @@ -749,17 +750,22 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation expansionLoc = SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart(); - if (expansionLoc.isFileID()) - return true; // No other macro expansions, this is the first. + if (expansionLoc.isFileID()) { + // No other macro expansions, this is the first. + if (MacroBegin) + *MacroBegin = expansionLoc; + return true; + } - return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts); + return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin); } /// \brief Returns true if the given MacroID location points at the last /// token of the macro expansion. bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, - const SourceManager &SM, - const LangOptions &LangOpts) { + const SourceManager &SM, + const LangOptions &LangOpts, + SourceLocation *MacroEnd) { assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); SourceLocation spellLoc = SM.getSpellingLoc(loc); @@ -777,10 +783,192 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, SourceLocation expansionLoc = SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd(); - if (expansionLoc.isFileID()) - return true; // No other macro expansions. + if (expansionLoc.isFileID()) { + // No other macro expansions. + if (MacroEnd) + *MacroEnd = expansionLoc; + return true; + } + + return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts, MacroEnd); +} + +static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation Begin = Range.getBegin(); + SourceLocation End = Range.getEnd(); + assert(Begin.isFileID() && End.isFileID()); + if (Range.isTokenRange()) { + End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts); + if (End.isInvalid()) + return CharSourceRange(); + } + + // Break down the source locations. + FileID FID; + unsigned BeginOffs; + llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); + if (FID.isInvalid()) + return CharSourceRange(); + + unsigned EndOffs; + if (!SM.isInFileID(End, FID, &EndOffs) || + BeginOffs > EndOffs) + return CharSourceRange(); + + return CharSourceRange::getCharRange(Begin, End); +} + +/// \brief Accepts a range and returns a character range with file locations. +/// +/// Returns a null range if a part of the range resides inside a macro +/// expansion or the range does not reside on the same FileID. +CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation Begin = Range.getBegin(); + SourceLocation End = Range.getEnd(); + if (Begin.isInvalid() || End.isInvalid()) + return CharSourceRange(); + + if (Begin.isFileID() && End.isFileID()) + return makeRangeFromFileLocs(Range, SM, LangOpts); + + if (Begin.isMacroID() && End.isFileID()) { + if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin)) + return CharSourceRange(); + Range.setBegin(Begin); + return makeRangeFromFileLocs(Range, SM, LangOpts); + } + + if (Begin.isFileID() && End.isMacroID()) { + if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts, + &End)) || + (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts, + &End))) + return CharSourceRange(); + Range.setEnd(End); + return makeRangeFromFileLocs(Range, SM, LangOpts); + } + + assert(Begin.isMacroID() && End.isMacroID()); + SourceLocation MacroBegin, MacroEnd; + if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) && + ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts, + &MacroEnd)) || + (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts, + &MacroEnd)))) { + Range.setBegin(MacroBegin); + Range.setEnd(MacroEnd); + return makeRangeFromFileLocs(Range, SM, LangOpts); + } - return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts); + FileID FID; + unsigned BeginOffs; + llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); + if (FID.isInvalid()) + return CharSourceRange(); + + unsigned EndOffs; + if (!SM.isInFileID(End, FID, &EndOffs) || + BeginOffs > EndOffs) + return CharSourceRange(); + + const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); + const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); + if (Expansion.isMacroArgExpansion() && + Expansion.getSpellingLoc().isFileID()) { + SourceLocation SpellLoc = Expansion.getSpellingLoc(); + Range.setBegin(SpellLoc.getLocWithOffset(BeginOffs)); + Range.setEnd(SpellLoc.getLocWithOffset(EndOffs)); + return makeRangeFromFileLocs(Range, SM, LangOpts); + } + + return CharSourceRange(); +} + +StringRef Lexer::getSourceText(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts, + bool *Invalid) { + Range = makeFileCharRange(Range, SM, LangOpts); + if (Range.isInvalid()) { + if (Invalid) *Invalid = true; + return StringRef(); + } + + // Break down the source location. + std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin()); + if (beginInfo.first.isInvalid()) { + if (Invalid) *Invalid = true; + return StringRef(); + } + + unsigned EndOffs; + if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) || + beginInfo.second > EndOffs) { + if (Invalid) *Invalid = true; + return StringRef(); + } + + // Try to the load the file buffer. + bool invalidTemp = false; + StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp); + if (invalidTemp) { + if (Invalid) *Invalid = true; + return StringRef(); + } + + if (Invalid) *Invalid = false; + return file.substr(beginInfo.second, EndOffs - beginInfo.second); +} + +StringRef Lexer::getImmediateMacroName(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { + assert(Loc.isMacroID() && "Only reasonble to call this on macros"); + + // Find the location of the immediate macro expansion. + while (1) { + FileID FID = SM.getFileID(Loc); + const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); + const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); + Loc = Expansion.getExpansionLocStart(); + if (!Expansion.isMacroArgExpansion()) + break; + + // For macro arguments we need to check that the argument did not come + // from an inner macro, e.g: "MAC1( MAC2(foo) )" + + // Loc points to the argument id of the macro definition, move to the + // macro expansion. + Loc = SM.getImmediateExpansionRange(Loc).first; + SourceLocation SpellLoc = Expansion.getSpellingLoc(); + if (SpellLoc.isFileID()) + break; // No inner macro. + + // If spelling location resides in the same FileID as macro expansion + // location, it means there is no inner macro. + FileID MacroFID = SM.getFileID(Loc); + if (SM.isInFileID(SpellLoc, MacroFID)) + break; + + // Argument came from inner macro. + Loc = SpellLoc; + } + + // Find the spelling location of the start of the non-argument expansion + // range. This is where the macro name was spelled in order to begin + // expanding this macro. + Loc = SM.getSpellingLoc(Loc); + + // Dig out the buffer where the macro name was spelled and the extents of the + // name so that we can render it into the expansion note. + std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc); + unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts); + StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first); + return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); } //===----------------------------------------------------------------------===// @@ -890,6 +1078,12 @@ static void InitCharacterInfo() { } +/// isIdentifierHead - Return true if this is the first character of an +/// identifier, which is [a-zA-Z_]. +static inline bool isIdentifierHead(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false; +} + /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. static inline bool isIdentifierBody(unsigned char c) { @@ -1018,7 +1212,7 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) { char Res = GetTrigraphCharForLetter(*CP); if (!Res || !L) return Res; - if (!L->getFeatures().Trigraphs) { + if (!L->getLangOpts().Trigraphs) { if (!L->isLexingRawMode()) L->Diag(CP-2, diag::trigraph_ignored); return 0; @@ -1085,9 +1279,8 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) { if (Loc.isMacroID()) { - if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts)) + if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) return SourceLocation(); - Loc = SM.getExpansionRange(Loc).second; } Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); @@ -1169,6 +1362,13 @@ Slash: // Found backslash<whitespace><newline>. Parse the char after it. Size += EscapedNewLineSize; Ptr += EscapedNewLineSize; + + // If the char that we finally got was a \n, then we must have had + // something like \<newline><newline>. We don't want to consume the + // second newline. + if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') + return ' '; + // Use slow version to accumulate a correct size field. return getCharAndSizeSlow(Ptr, Size, Tok); } @@ -1205,7 +1405,7 @@ Slash: /// NOTE: When this method is updated, getCharAndSizeSlow (above) should /// be updated to match. char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &Features) { + const LangOptions &LangOpts) { // If we have a slash, look for an escaped newline. if (Ptr[0] == '\\') { ++Size; @@ -1220,8 +1420,14 @@ Slash: Size += EscapedNewLineSize; Ptr += EscapedNewLineSize; + // If the char that we finally got was a \n, then we must have had + // something like \<newline><newline>. We don't want to consume the + // second newline. + if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') + return ' '; + // Use slow version to accumulate a correct size field. - return getCharAndSizeSlowNoWarn(Ptr, Size, Features); + return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); } // Otherwise, this is not an escaped newline, just return the slash. @@ -1229,7 +1435,7 @@ Slash: } // If this is a trigraph, process it. - if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { + if (LangOpts.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { // If this is actually a legal trigraph (not something like "??x"), return // it. if (char C = GetTrigraphCharForLetter(Ptr[2])) { @@ -1272,7 +1478,7 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { // // TODO: Could merge these checks into a CharInfo flag to make the comparison // cheaper - if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) { + if (C != '\\' && C != '?' && (C != '$' || !LangOpts.DollarIdents)) { FinishIdentifier: const char *IdStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::raw_identifier); @@ -1301,7 +1507,7 @@ FinishIdentifier: while (1) { if (C == '$') { // If we hit a $ and they are not supported in identifiers, we are done. - if (!Features.DollarIdents) goto FinishIdentifier; + if (!LangOpts.DollarIdents) goto FinishIdentifier; // Otherwise, emit a diagnostic and continue. if (!isLexingRawMode()) @@ -1327,12 +1533,12 @@ FinishIdentifier: /// isHexaLiteral - Return true if Start points to a hex constant. /// in microsoft mode (where this is supposed to be several different tokens). -static bool isHexaLiteral(const char *Start, const LangOptions &Features) { +static bool isHexaLiteral(const char *Start, const LangOptions &LangOpts) { unsigned Size; - char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, Features); + char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts); if (C1 != '0') return false; - char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, Features); + char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts); return (C2 == 'x' || C2 == 'X'); } @@ -1343,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); char PrevCh = 0; - while (isNumberBody(C)) { // FIXME: UCNs? + while (isNumberBody(C)) { // FIXME: UCNs. CurPtr = ConsumeChar(CurPtr, Size, Result); PrevCh = C; C = getCharAndSize(CurPtr, Size); @@ -1353,7 +1559,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) { // If we are in Microsoft mode, don't continue if the constant is hex. // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1 - if (!Features.MicrosoftExt || !isHexaLiteral(BufferPtr, Features)) + if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts)) return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); } @@ -1367,6 +1573,46 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { Result.setLiteralData(TokStart); } +/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes +/// in C++11, or warn on a ud-suffix in C++98. +const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { + assert(getLangOpts().CPlusPlus); + + // Maximally munch an identifier. FIXME: UCNs. + unsigned Size; + char C = getCharAndSize(CurPtr, Size); + if (isIdentifierHead(C)) { + if (!getLangOpts().CPlusPlus0x) { + if (!isLexingRawMode()) + Diag(CurPtr, + C == '_' ? diag::warn_cxx11_compat_user_defined_literal + : diag::warn_cxx11_compat_reserved_user_defined_literal) + << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); + return CurPtr; + } + + // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix + // that does not start with an underscore is ill-formed. As a conforming + // extension, we treat all such suffixes as if they had whitespace before + // them. + if (C != '_') { + if (!isLexingRawMode()) + Diag(CurPtr, getLangOpts().MicrosoftMode ? + diag::ext_ms_reserved_user_defined_literal : + diag::ext_reserved_user_defined_literal) + << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); + return CurPtr; + } + + Result.setFlag(Token::HasUDSuffix); + do { + CurPtr = ConsumeChar(CurPtr, Size, Result); + C = getCharAndSize(CurPtr, Size); + } while (isIdentifierBody(C)); + } + return CurPtr; +} + /// LexStringLiteral - Lex the remainder of a string literal, after having lexed /// either " or L" or u8" or u" or U". void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, @@ -1388,7 +1634,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - if (!isLexingRawMode() && !Features.AsmPreprocessor) + if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::warn_unterminated_string); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; @@ -1406,6 +1652,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, C = getAndAdvanceChar(CurPtr, Result); } + // If we are in C++11, lex the optional ud-suffix. + if (getLangOpts().CPlusPlus) + CurPtr = LexUDSuffix(Result, CurPtr); + // If a nul character existed in the string, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_string); @@ -1485,6 +1735,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, } } + // If we are in C++11, lex the optional ud-suffix. + if (getLangOpts().CPlusPlus) + CurPtr = LexUDSuffix(Result, CurPtr); + // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, Kind); @@ -1538,7 +1792,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, char C = getAndAdvanceChar(CurPtr, Result); if (C == '\'') { - if (!isLexingRawMode() && !Features.AsmPreprocessor) + if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::err_empty_character); FormTokenWithChars(Result, CurPtr, tok::unknown); return; @@ -1552,7 +1806,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - if (!isLexingRawMode() && !Features.AsmPreprocessor) + if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferPtr, diag::warn_unterminated_char); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; @@ -1568,6 +1822,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, C = getAndAdvanceChar(CurPtr, Result); } + // If we are in C++11, lex the optional ud-suffix. + if (getLangOpts().CPlusPlus) + CurPtr = LexUDSuffix(Result, CurPtr); + // If a nul character existed in the character, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_char); @@ -1633,12 +1891,12 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { // If BCPL comments aren't explicitly enabled for this language, emit an // extension warning. - if (!Features.BCPLComment && !isLexingRawMode()) { + if (!LangOpts.BCPLComment && !isLexingRawMode()) { Diag(BufferPtr, diag::ext_bcpl_comment); // Mark them enabled so we only emit one warning for this translation // unit. - Features.BCPLComment = true; + LangOpts.BCPLComment = true; } // Scan over the body of the comment. The common case, when scanning, is that @@ -1687,14 +1945,6 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { break; } - // If the char that we finally got was a \n, then we must have had something - // like \<newline><newline>. We don't want to have consumed the second - // newline, we want CurPtr, to end up pointing to it down below. - if (C == '\n' || C == '\r') { - --CurPtr; - C = 'x'; // doesn't matter what this is. - } - // If we read multiple characters, and one of those characters was a \r or // \n, then we had an escaped newline within the comment. Emit diagnostic // unless the next line is also a // comment. @@ -1833,7 +2083,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, // If no trigraphs are enabled, warn that we ignored this trigraph and // ignore this * character. - if (!L->getFeatures().Trigraphs) { + if (!L->getLangOpts().Trigraphs) { if (!L->isLexingRawMode()) L->Diag(CurPtr, diag::trigraph_ignored_block_comment); return false; @@ -1916,11 +2166,18 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { if (C == '/') goto FoundSlash; #ifdef __SSE2__ - __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/', - '/', '/', '/', '/', '/', '/', '/', '/'); - while (CurPtr+16 <= BufferEnd && - _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0) + __m128i Slashes = _mm_set1_epi8('/'); + while (CurPtr+16 <= BufferEnd) { + int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)); + if (cmp != 0) { + // Adjust the pointer to point directly after the first slash. It's + // not necessary to set C here, it will be overwritten at the end of + // the outer loop. + CurPtr += llvm::CountTrailingZeros_32(cmp) + 1; + goto FoundSlash; + } CurPtr += 16; + } #elif __ALTIVEC__ __vector unsigned char Slashes = { '/', '/', '/', '/', '/', '/', '/', '/', @@ -1948,8 +2205,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { while (C != '/' && C != '\0') C = *CurPtr++; - FoundSlash: if (C == '/') { + FoundSlash: if (CurPtr[-2] == '*') // We found the final */. We're done! break; @@ -2119,8 +2376,9 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue // a pedwarn. if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) - Diag(BufferEnd, diag::ext_no_newline_eof) - << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n"); + Diag(BufferEnd, LangOpts.CPlusPlus0x ? // C++11 [lex.phases] 2.2 p2 + diag::warn_cxx98_compat_no_newline_eof : diag::ext_no_newline_eof) + << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n"); BufferPtr = CurPtr; @@ -2345,7 +2603,7 @@ LexNextToken: case 26: // DOS & CP/M EOF: "^Z". // If we're in Microsoft extensions mode, treat this as end of file. - if (Features.MicrosoftExt) { + if (LangOpts.MicrosoftExt) { // Read the PP instance variable into an automatic variable, because // LexEndOfFile will often delete 'this'. Preprocessor *PPCache = PP; @@ -2398,7 +2656,7 @@ LexNextToken: // If the next token is obviously a // or /* */ comment, skip it efficiently // too (without going through the big switch stmt). if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && - Features.BCPLComment && !Features.TraditionalCPP) { + LangOpts.BCPLComment && !LangOpts.TraditionalCPP) { if (SkipBCPLComment(Result, CurPtr+2)) return; // There is a token to return. goto SkipIgnoredUnits; @@ -2423,7 +2681,7 @@ LexNextToken: // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (Features.CPlusPlus0x) { + if (LangOpts.CPlusPlus0x) { Char = getCharAndSize(CurPtr, SizeTmp); // UTF-16 string literal @@ -2475,7 +2733,7 @@ LexNextToken: // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (Features.CPlusPlus0x) { + if (LangOpts.CPlusPlus0x) { Char = getCharAndSize(CurPtr, SizeTmp); // UTF-32 string literal @@ -2503,7 +2761,7 @@ LexNextToken: // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (Features.CPlusPlus0x) { + if (LangOpts.CPlusPlus0x) { Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '"') @@ -2526,7 +2784,7 @@ LexNextToken: tok::wide_string_literal); // Wide raw string literal. - if (Features.CPlusPlus0x && Char == 'R' && + if (LangOpts.CPlusPlus0x && Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), @@ -2554,7 +2812,7 @@ LexNextToken: return LexIdentifier(Result, CurPtr); case '$': // $ in identifiers. - if (Features.DollarIdents) { + if (LangOpts.DollarIdents) { if (!isLexingRawMode()) Diag(CurPtr-1, diag::ext_dollar_in_identifier); // Notify MIOpt that we read a non-whitespace/non-comment token. @@ -2606,7 +2864,7 @@ LexNextToken: MIOpt.ReadToken(); return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); - } else if (Features.CPlusPlus && Char == '*') { + } else if (LangOpts.CPlusPlus && Char == '*') { Kind = tok::periodstar; CurPtr += SizeTmp; } else if (Char == '.' && @@ -2655,7 +2913,7 @@ LexNextToken: if (Char == '-') { // -- CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::minusminus; - } else if (Char == '>' && Features.CPlusPlus && + } else if (Char == '>' && LangOpts.CPlusPlus && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->* CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); @@ -2693,9 +2951,9 @@ LexNextToken: // "foo". Check to see if the character after the second slash is a '*'. // If so, we will lex that as a "/" instead of the start of a comment. // However, we never do this in -traditional-cpp mode. - if ((Features.BCPLComment || + if ((LangOpts.BCPLComment || getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') && - !Features.TraditionalCPP) { + !LangOpts.TraditionalCPP) { if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return; // There is a token to return. @@ -2724,20 +2982,20 @@ LexNextToken: if (Char == '=') { Kind = tok::percentequal; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else if (Features.Digraphs && Char == '>') { + } else if (LangOpts.Digraphs && Char == '>') { Kind = tok::r_brace; // '%>' -> '}' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else if (Features.Digraphs && Char == ':') { + } else if (LangOpts.Digraphs && Char == ':') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { Kind = tok::hashhash; // '%:%:' -> '##' CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); - } else if (Char == '@' && Features.MicrosoftExt) {// %:@ -> #@ -> Charize + } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); if (!isLexingRawMode()) - Diag(BufferPtr, diag::charize_microsoft_ext); + Diag(BufferPtr, diag::ext_charize_microsoft); Kind = tok::hashat; } else { // '%:' -> '#' // We parsed a # character. If this occurs at the start of the line, @@ -2789,7 +3047,7 @@ LexNextToken: // If this is '<<<<' and we're in a Perforce-style conflict marker, // ignore it. goto LexNextToken; - } else if (Features.CUDA && After == '<') { + } else if (LangOpts.CUDA && After == '<') { Kind = tok::lesslessless; CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); @@ -2800,8 +3058,8 @@ LexNextToken: } else if (Char == '=') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::lessequal; - } else if (Features.Digraphs && Char == ':') { // '<:' -> '[' - if (Features.CPlusPlus0x && + } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' + if (LangOpts.CPlusPlus0x && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') { // C++0x [lex.pptoken]p3: // Otherwise, if the next three characters are <:: and the subsequent @@ -2820,7 +3078,7 @@ LexNextToken: CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::l_square; - } else if (Features.Digraphs && Char == '%') { // '<%' -> '{' + } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::l_brace; } else { @@ -2845,7 +3103,7 @@ LexNextToken: } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { // If this is '>>>>>>>' and we're in a conflict marker, ignore it. goto LexNextToken; - } else if (Features.CUDA && After == '>') { + } else if (LangOpts.CUDA && After == '>') { Kind = tok::greatergreatergreater; CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); @@ -2884,10 +3142,10 @@ LexNextToken: break; case ':': Char = getCharAndSize(CurPtr, SizeTmp); - if (Features.Digraphs && Char == '>') { + if (LangOpts.Digraphs && Char == '>') { Kind = tok::r_square; // ':>' -> ']' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else if (Features.CPlusPlus && Char == ':') { + } else if (LangOpts.CPlusPlus && Char == ':') { Kind = tok::coloncolon; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else { @@ -2918,10 +3176,10 @@ LexNextToken: if (Char == '#') { Kind = tok::hashhash; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else if (Char == '@' && Features.MicrosoftExt) { // #@ -> Charize + } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize Kind = tok::hashat; if (!isLexingRawMode()) - Diag(BufferPtr, diag::charize_microsoft_ext); + Diag(BufferPtr, diag::ext_charize_microsoft); CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else { // We parsed a # character. If this occurs at the start of the line, @@ -2954,7 +3212,7 @@ LexNextToken: case '@': // Objective C support. - if (CurPtr[-1] == '@' && Features.ObjC1) + if (CurPtr[-1] == '@' && LangOpts.ObjC1) Kind = tok::at; else Kind = tok::unknown; diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp index 70183fd..c1d228b 100644 --- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp @@ -16,6 +16,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Basic/ConvertUTF.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" using namespace clang; @@ -178,15 +179,16 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, /// ProcessUCNEscape - Read the Universal Character Name, check constraints and /// return the UTF32. -static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, +static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, + const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, - const LangOptions &Features) { + const LangOptions &Features, + bool in_char_string_literal = false) { if (!Features.CPlusPlus && !Features.C99 && Diags) Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89); - // Save the beginning of the string (for error diagnostics). - const char *ThisTokBegin = ThisTokBuf; + const char *UcnBegin = ThisTokBuf; // Skip the '\u' char's. ThisTokBuf += 2; @@ -208,22 +210,43 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, if (UcnLenSave) { if (Diags) { SourceLocation L = - Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin, + Lexer::AdvanceToTokenCharacter(Loc, UcnBegin - ThisTokBegin, Loc.getManager(), Features); - Diags->Report(FullSourceLoc(L, Loc.getManager()), - diag::err_ucn_escape_incomplete); + Diags->Report(L, diag::err_ucn_escape_incomplete); } return false; } - // Check UCN constraints (C99 6.4.3p2). - if ((UcnVal < 0xa0 && - (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, ` - || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF) - || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ { + + // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2] + if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints + UcnVal > 0x10FFFF) { // maximum legal UTF32 value if (Diags) Diags->Report(Loc, diag::err_ucn_escape_invalid); return false; } + + // C++11 allows UCNs that refer to control characters and basic source + // characters inside character and string literals + if (UcnVal < 0xa0 && + (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, ` + bool IsError = (!Features.CPlusPlus0x || !in_char_string_literal); + if (Diags) { + SourceLocation UcnBeginLoc = + Lexer::AdvanceToTokenCharacter(Loc, UcnBegin - ThisTokBegin, + Loc.getManager(), Features); + char BasicSCSChar = UcnVal; + if (UcnVal >= 0x20 && UcnVal < 0x7f) + Diags->Report(UcnBeginLoc, IsError ? diag::err_ucn_escape_basic_scs : + diag::warn_cxx98_compat_literal_ucn_escape_basic_scs) + << StringRef(&BasicSCSChar, 1); + else + Diags->Report(UcnBeginLoc, IsError ? diag::err_ucn_control_character : + diag::warn_cxx98_compat_literal_ucn_control_character); + } + if (IsError) + return false; + } + return true; } @@ -231,7 +254,8 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of /// StringLiteralParser. When we decide to implement UCN's for identifiers, /// we will likely rework our support for UCN's. -static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, +static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, + const char *ThisTokEnd, char *&ResultBuf, bool &HadError, FullSourceLoc Loc, unsigned CharByteWidth, DiagnosticsEngine *Diags, @@ -239,8 +263,8 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, typedef uint32_t UTF32; UTF32 UcnVal = 0; unsigned short UcnLen = 0; - if (!ProcessUCNEscape(ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, Loc, Diags, - Features)) { + if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, + Loc, Diags, Features, true)) { HadError = 1; return; } @@ -252,31 +276,30 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported"); if (CharByteWidth == 4) { - // Note: our internal rep of wide char tokens is always little-endian. - *ResultBuf++ = (UcnVal & 0x000000FF); - *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; - *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16; - *ResultBuf++ = (UcnVal & 0xFF000000) >> 24; + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf); + *ResultPtr = UcnVal; + ResultBuf += 4; return; } if (CharByteWidth == 2) { - // Convert to UTF16. + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf); + if (UcnVal < (UTF32)0xFFFF) { - *ResultBuf++ = (UcnVal & 0x000000FF); - *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; + *ResultPtr = UcnVal; + ResultBuf += 2; return; } - if (Diags) Diags->Report(Loc, diag::warn_ucn_escape_too_large); - typedef uint16_t UTF16; + // Convert to UTF16. UcnVal -= 0x10000; - UTF16 surrogate1 = 0xD800 + (UcnVal >> 10); - UTF16 surrogate2 = 0xDC00 + (UcnVal & 0x3FF); - *ResultBuf++ = (surrogate1 & 0x000000FF); - *ResultBuf++ = (surrogate1 & 0x0000FF00) >> 8; - *ResultBuf++ = (surrogate2 & 0x000000FF); - *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8; + *ResultPtr = 0xD800 + (UcnVal >> 10); + *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF); + ResultBuf += 4; return; } @@ -323,6 +346,10 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, /// decimal-constant integer-suffix /// octal-constant integer-suffix /// hexadecimal-constant integer-suffix +/// user-defined-integer-literal: [C++11 lex.ext] +/// decimal-literal ud-suffix +/// octal-literal ud-suffix +/// hexadecimal-literal ud-suffix /// decimal-constant: /// nonzero-digit /// decimal-constant digit @@ -372,6 +399,7 @@ NumericLiteralParser(const char *begin, const char *end, s = DigitsBegin = begin; saw_exponent = false; saw_period = false; + saw_ud_suffix = false; isLong = false; isUnsigned = false; isLongLong = false; @@ -454,7 +482,7 @@ NumericLiteralParser(const char *begin, const char *end, continue; // Success. case 'i': case 'I': - if (PP.getLangOptions().MicrosoftExt) { + if (PP.getLangOpts().MicrosoftExt) { if (isFPConstant || isLong || isLongLong) break; // Allow i8, i16, i32, i64, and i128. @@ -509,13 +537,20 @@ NumericLiteralParser(const char *begin, const char *end, isImaginary = true; continue; // Success. } - // If we reached here, there was an error. + // If we reached here, there was an error or a ud-suffix. break; } - // Report an error if there are any. if (s != ThisTokEnd) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), + if (PP.getLangOpts().CPlusPlus0x && s == SuffixBegin && *s == '_') { + // We have a ud-suffix! By C++11 [lex.ext]p10, ud-suffixes not starting + // with an '_' are ill-formed. + saw_ud_suffix = true; + return; + } + + // Report an error if there are any. + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin-begin), isFPConstant ? diag::err_invalid_suffix_float_constant : diag::err_invalid_suffix_integer_constant) << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin); @@ -539,13 +574,24 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { radix = 16; DigitsBegin = s; s = SkipHexDigits(s); + bool noSignificand = (s == DigitsBegin); if (s == ThisTokEnd) { // Done. } else if (*s == '.') { s++; saw_period = true; + const char *floatDigitsBegin = s; s = SkipHexDigits(s); + noSignificand &= (floatDigitsBegin == s); + } + + if (noSignificand) { + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), \ + diag::err_hexconstant_requires_digits); + hadError = true; + return; } + // A binary exponent can appear with or with a '.'. If dotted, the // binary exponent is required. if (*s == 'p' || *s == 'P') { @@ -562,7 +608,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { } s = first_non_digit; - if (!PP.getLangOptions().HexFloats) + if (!PP.getLangOpts().HexFloats) PP.Diag(TokLoc, diag::ext_hexconstant_invalid); } else if (saw_period) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), @@ -710,7 +756,11 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { } -/// character-literal: [C++0x lex.ccon] +/// user-defined-character-literal: [C++11 lex.ext] +/// character-literal ud-suffix +/// ud-suffix: +/// identifier +/// character-literal: [C++11 lex.ccon] /// ' c-char-sequence ' /// u' c-char-sequence ' /// U' c-char-sequence ' @@ -723,7 +773,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// backslash \, or new-line character /// escape-sequence /// universal-character-name -/// escape-sequence: [C++0x lex.ccon] +/// escape-sequence: /// simple-escape-sequence /// octal-escape-sequence /// hexadecimal-escape-sequence @@ -736,7 +786,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// hexadecimal-escape-sequence: /// \x hexadecimal-digit /// hexadecimal-escape-sequence hexadecimal-digit -/// universal-character-name: +/// universal-character-name: [C++11 lex.charset] /// \u hex-quad /// \U hex-quad hex-quad /// hex-quad: @@ -745,14 +795,15 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { CharLiteralParser::CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, tok::TokenKind kind) { - // At this point we know that the character matches the regex "L?'.*'". + // At this point we know that the character matches the regex "(L|u|U)?'.*'". HadError = false; Kind = kind; - // Determine if this is a wide or UTF character. - if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant || - Kind == tok::utf32_char_constant) { + const char *TokBegin = begin; + + // Skip over wide character determinant. + if (Kind != tok::char_constant) { ++begin; } @@ -760,6 +811,20 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, assert(begin[0] == '\'' && "Invalid token lexed"); ++begin; + // Remove an optional ud-suffix. + if (end[-1] != '\'') { + const char *UDSuffixEnd = end; + do { + --end; + } while (end[-1] != '\''); + UDSuffixBuf.assign(end, UDSuffixEnd); + UDSuffixOffset = end - TokBegin; + } + + // Trim the ending quote. + assert(end != begin && "Invalid token lexed"); + --end; + // FIXME: The "Value" is an uint64_t so we can handle char literals of // up to 64-bits. // FIXME: This extensively assumes that 'char' is 8-bits. @@ -771,76 +836,129 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, assert(PP.getTargetInfo().getWCharWidth() <= 64 && "Assumes sizeof(wchar) on target is <= 64"); - // This is what we will use for overflow detection - llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0); + SmallVector<uint32_t,4> codepoint_buffer; + codepoint_buffer.resize(end-begin); + uint32_t *buffer_begin = &codepoint_buffer.front(); + uint32_t *buffer_end = buffer_begin + codepoint_buffer.size(); + + // Unicode escapes representing characters that cannot be correctly + // represented in a single code unit are disallowed in character literals + // by this implementation. + uint32_t largest_character_for_kind; + if (tok::wide_char_constant == Kind) { + largest_character_for_kind = 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth()); + } else if (tok::utf16_char_constant == Kind) { + largest_character_for_kind = 0xFFFF; + } else if (tok::utf32_char_constant == Kind) { + largest_character_for_kind = 0x10FFFF; + } else { + largest_character_for_kind = 0x7Fu; + } - unsigned NumCharsSoFar = 0; - bool Warned = false; - while (begin[0] != '\'') { - uint64_t ResultChar; - - // Is this a Universal Character Name escape? - if (begin[0] != '\\') // If this is a normal character, consume it. - ResultChar = (unsigned char)*begin++; - else { // Otherwise, this is an escape character. - unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); - // Check for UCN. - if (begin[1] == 'u' || begin[1] == 'U') { - uint32_t utf32 = 0; - unsigned short UcnLen = 0; - if (!ProcessUCNEscape(begin, end, utf32, UcnLen, - FullSourceLoc(Loc, PP.getSourceManager()), - &PP.getDiagnostics(), PP.getLangOptions())) { - HadError = 1; + while (begin!=end) { + // Is this a span of non-escape characters? + if (begin[0] != '\\') { + char const *start = begin; + do { + ++begin; + } while (begin != end && *begin != '\\'); + + char const *tmp_in_start = start; + uint32_t *tmp_out_start = buffer_begin; + ConversionResult res = + ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start), + reinterpret_cast<UTF8 const *>(begin), + &buffer_begin,buffer_end,strictConversion); + if (res!=conversionOK) { + // If we see bad encoding for unprefixed character literals, warn and + // simply copy the byte values, for compatibility with gcc and + // older versions of clang. + bool NoErrorOnBadEncoding = isAscii(); + unsigned Msg = diag::err_bad_character_encoding; + if (NoErrorOnBadEncoding) + Msg = diag::warn_bad_character_encoding; + PP.Diag(Loc, Msg); + if (NoErrorOnBadEncoding) { + start = tmp_in_start; + buffer_begin = tmp_out_start; + for ( ; start != begin; ++start, ++buffer_begin) + *buffer_begin = static_cast<uint8_t>(*start); + } else { + HadError = true; } - ResultChar = utf32; - if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { - PP.Diag(Loc, diag::warn_ucn_escape_too_large); - ResultChar &= ~0U >> (32-CharWidth); - } - } else { - // Otherwise, this is a non-UCN escape character. Process it. - ResultChar = ProcessCharEscape(begin, end, HadError, - FullSourceLoc(Loc,PP.getSourceManager()), - CharWidth, &PP.getDiagnostics()); - } - } - - // If this is a multi-character constant (e.g. 'abc'), handle it. These are - // implementation defined (C99 6.4.4.4p10). - if (NumCharsSoFar) { - if (!isAscii()) { - // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'. - LitVal = 0; } else { - // Narrow character literals act as though their value is concatenated - // in this implementation, but warn on overflow. - if (LitVal.countLeadingZeros() < 8 && !Warned) { - PP.Diag(Loc, diag::warn_char_constant_too_large); - Warned = true; + for (; tmp_out_start <buffer_begin; ++tmp_out_start) { + if (*tmp_out_start > largest_character_for_kind) { + HadError = true; + PP.Diag(Loc, diag::err_character_too_large); + } } - LitVal <<= 8; } + + continue; } + // Is this a Universal Character Name excape? + if (begin[1] == 'u' || begin[1] == 'U') { + unsigned short UcnLen = 0; + if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen, + FullSourceLoc(Loc, PP.getSourceManager()), + &PP.getDiagnostics(), PP.getLangOpts(), + true)) + { + HadError = true; + } else if (*buffer_begin > largest_character_for_kind) { + HadError = true; + PP.Diag(Loc,diag::err_character_too_large); + } - LitVal = LitVal + ResultChar; - ++NumCharsSoFar; + ++buffer_begin; + continue; + } + unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); + uint64_t result = + ProcessCharEscape(begin, end, HadError, + FullSourceLoc(Loc,PP.getSourceManager()), + CharWidth, &PP.getDiagnostics()); + *buffer_begin++ = result; } - // If this is the second character being processed, do special handling. + unsigned NumCharsSoFar = buffer_begin-&codepoint_buffer.front(); + if (NumCharsSoFar > 1) { - // Warn about discarding the top bits for multi-char wide-character - // constants (L'abcd'). - if (!isAscii()) + if (isWide()) PP.Diag(Loc, diag::warn_extraneous_char_constant); - else if (NumCharsSoFar != 4) + else if (isAscii() && NumCharsSoFar == 4) + PP.Diag(Loc, diag::ext_four_char_character_literal); + else if (isAscii()) PP.Diag(Loc, diag::ext_multichar_character_literal); else - PP.Diag(Loc, diag::ext_four_char_character_literal); + PP.Diag(Loc, diag::err_multichar_utf_character_literal); IsMultiChar = true; } else IsMultiChar = false; + llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0); + + // Narrow character literals act as though their value is concatenated + // in this implementation, but warn on overflow. + bool multi_char_too_long = false; + if (isAscii() && isMultiChar()) { + LitVal = 0; + for (size_t i=0;i<NumCharsSoFar;++i) { + // check for enough leading zeros to shift into + multi_char_too_long |= (LitVal.countLeadingZeros() < 8); + LitVal <<= 8; + LitVal = LitVal + (codepoint_buffer[i] & 0xFF); + } + } else if (NumCharsSoFar > 0) { + // otherwise just take the last character + LitVal = buffer_begin[-1]; + } + + if (!HadError && multi_char_too_long) { + PP.Diag(Loc,diag::warn_char_constant_too_large); + } + // Transfer the value from APInt to uint64_t Value = LitVal.getZExtValue(); @@ -849,7 +967,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, // character constants are not sign extended in the this implementation: // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC. if (isAscii() && NumCharsSoFar == 1 && (Value & 128) && - PP.getLangOptions().CharIsSigned) + PP.getLangOpts().CharIsSigned) Value = (signed char)Value; } @@ -909,7 +1027,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, StringLiteralParser:: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, Preprocessor &PP, bool Complain) - : SM(PP.getSourceManager()), Features(PP.getLangOptions()), + : SM(PP.getSourceManager()), Features(PP.getLangOpts()), Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0), MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { @@ -985,7 +1103,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ ResultBuf.resize(SizeBound); // Likewise, but for each string piece. - llvm::SmallString<512> TokenBuf; + SmallString<512> TokenBuf; TokenBuf.resize(MaxTokenLength); // Loop over all the strings, getting their spelling, and expanding them to @@ -994,6 +1112,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ Pascal = false; + SourceLocation UDSuffixTokLoc; + for (unsigned i = 0, e = NumStringToks; i != e; ++i) { const char *ThisTokBuf = &TokenBuf[0]; // Get the spelling of the token, which eliminates trigraphs, etc. We know @@ -1008,7 +1128,42 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ continue; } - const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. + const char *ThisTokBegin = ThisTokBuf; + const char *ThisTokEnd = ThisTokBuf+ThisTokLen; + + // Remove an optional ud-suffix. + if (ThisTokEnd[-1] != '"') { + const char *UDSuffixEnd = ThisTokEnd; + do { + --ThisTokEnd; + } while (ThisTokEnd[-1] != '"'); + + StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd); + + if (UDSuffixBuf.empty()) { + UDSuffixBuf.assign(UDSuffix); + UDSuffixToken = i; + UDSuffixOffset = ThisTokEnd - ThisTokBuf; + UDSuffixTokLoc = StringToks[i].getLocation(); + } else if (!UDSuffixBuf.equals(UDSuffix)) { + // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the + // result of a concatenation involving at least one user-defined-string- + // literal, all the participating user-defined-string-literals shall + // have the same ud-suffix. + if (Diags) { + SourceLocation TokLoc = StringToks[i].getLocation(); + Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) + << SourceRange(TokLoc, TokLoc); + } + hadError = true; + } + } + + // Strip the end quote. + --ThisTokEnd; + // TODO: Input character set mapping support. // Skip marker for wide or unicode strings. @@ -1028,12 +1183,14 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ ++ThisTokBuf; ++ThisTokBuf; // skip '(' - // remove same number of characters from the end - if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix)) - ThisTokEnd -= (ThisTokBuf - Prefix); + // Remove same number of characters from the end + ThisTokEnd -= ThisTokBuf - Prefix; + assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal"); // Copy the string over - CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)); + if (CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf))) + if (DiagnoseBadString(StringToks[i])) + hadError = true; } else { assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); ++ThisTokBuf; // skip " @@ -1060,13 +1217,16 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); // Copy the character span over. - CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart)); + if (CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart))) + if (DiagnoseBadString(StringToks[i])) + hadError = true; continue; } // Is this a Universal Character Name escape? if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { - EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, - hadError, FullSourceLoc(StringToks[i].getLocation(),SM), + EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, + ResultPtr, hadError, + FullSourceLoc(StringToks[i].getLocation(), SM), CharByteWidth, Diags, Features); continue; } @@ -1076,18 +1236,41 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ FullSourceLoc(StringToks[i].getLocation(), SM), CharByteWidth*8, Diags); - // Note: our internal rep of wide char tokens is always little-endian. - *ResultPtr++ = ResultChar & 0xFF; - - for (unsigned i = 1, e = CharByteWidth; i != e; ++i) - *ResultPtr++ = ResultChar >> i*8; + if (CharByteWidth == 4) { + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr); + *ResultWidePtr = ResultChar; + ResultPtr += 4; + } else if (CharByteWidth == 2) { + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr); + *ResultWidePtr = ResultChar & 0xFFFF; + ResultPtr += 2; + } else { + assert(CharByteWidth == 1 && "Unexpected char width"); + *ResultPtr++ = ResultChar & 0xFF; + } } } } if (Pascal) { - ResultBuf[0] = ResultPtr-&ResultBuf[0]-1; - ResultBuf[0] /= CharByteWidth; + if (CharByteWidth == 4) { + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data()); + ResultWidePtr[0] = GetNumStringChars() - 1; + } else if (CharByteWidth == 2) { + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data()); + ResultWidePtr[0] = GetNumStringChars() - 1; + } else { + assert(CharByteWidth == 1 && "Unexpected char width"); + ResultBuf[0] = GetNumStringChars() - 1; + } // Verify that pascal strings aren't too large. if (GetStringLength() > 256) { @@ -1116,22 +1299,55 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ /// copyStringFragment - This function copies from Start to End into ResultPtr. /// Performs widening for multi-byte characters. -void StringLiteralParser::CopyStringFragment(StringRef Fragment) { +bool StringLiteralParser::CopyStringFragment(StringRef Fragment) { + assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4); + ConversionResult result = conversionOK; // Copy the character span over. if (CharByteWidth == 1) { + if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Fragment.begin()), + reinterpret_cast<const UTF8*>(Fragment.end()))) + result = sourceIllegal; memcpy(ResultPtr, Fragment.data(), Fragment.size()); ResultPtr += Fragment.size(); - } else { - // Note: our internal rep of wide char tokens is always little-endian. - for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) { - *ResultPtr++ = *I; - // Add zeros at the end. - for (unsigned i = 1, e = CharByteWidth; i != e; ++i) - *ResultPtr++ = 0; - } + } else if (CharByteWidth == 2) { + UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF16( + &sourceStart,sourceStart + Fragment.size(), + &targetStart,targetStart + 2*Fragment.size(),flags); + if (result==conversionOK) + ResultPtr = reinterpret_cast<char*>(targetStart); + } else if (CharByteWidth == 4) { + UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF32( + &sourceStart,sourceStart + Fragment.size(), + &targetStart,targetStart + 4*Fragment.size(),flags); + if (result==conversionOK) + ResultPtr = reinterpret_cast<char*>(targetStart); } + assert((result != targetExhausted) + && "ConvertUTF8toUTFXX exhausted target buffer"); + return result != conversionOK; } +bool StringLiteralParser::DiagnoseBadString(const Token &Tok) { + // If we see bad encoding for unprefixed string literals, warn and + // simply copy the byte values, for compatibility with gcc and older + // versions of clang. + bool NoErrorOnBadEncoding = isAscii(); + unsigned Msg = NoErrorOnBadEncoding ? diag::warn_bad_string_encoding : + diag::err_bad_string_encoding; + if (Diags) + Diags->Report(FullSourceLoc(Tok.getLocation(), SM), Msg); + return !NoErrorOnBadEncoding; +} /// getOffsetOfStringByte - This function returns the offset of the /// specified byte of the string data represented by Token. This handles @@ -1139,7 +1355,7 @@ void StringLiteralParser::CopyStringFragment(StringRef Fragment) { unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, unsigned ByteNo) const { // Get the spelling of the token. - llvm::SmallString<32> SpellingBuffer; + SmallString<32> SpellingBuffer; SpellingBuffer.resize(Tok.getLength()); bool StringInvalid = false; diff --git a/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp b/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp index 1846d1c..e2b251a 100644 --- a/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp @@ -15,7 +15,8 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/LexDiagnostic.h" - +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/SaveAndRestore.h" #include <algorithm> using namespace clang; @@ -155,6 +156,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, std::vector<Token> &Result = PreExpArgTokens[Arg]; if (!Result.empty()) return Result; + SaveAndRestore<bool> PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true); + const Token *AT = getUnexpArgument(Arg); unsigned NumToks = getArgLength(AT)+1; // Include the EOF. @@ -177,6 +180,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, // will not otherwise be popped until the next token is lexed. The problem is // that the token may be lexed sometime after the vector of tokens itself is // destroyed, which would be badness. + if (PP.InCachingLexMode()) + PP.ExitCachingLexMode(); PP.RemoveTopOfLexerStack(); return Result; } @@ -198,7 +203,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, const Token *ArgTokStart = ArgToks; // Stringify all the tokens. - llvm::SmallString<128> Result; + SmallString<128> Result; Result += "\""; bool isFirst = true; diff --git a/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp b/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp index 5a7af56..3d0c9a1 100644 --- a/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp @@ -27,7 +27,8 @@ MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) { IsAllowRedefinitionsWithoutWarning = false; IsWarnIfUnused = false; IsDefinitionLengthCached = false; - + IsPublic = true; + ArgumentList = 0; NumArguments = 0; } @@ -48,6 +49,8 @@ MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator) { IsWarnIfUnused = MI.IsWarnIfUnused; IsDefinitionLengthCached = MI.IsDefinitionLengthCached; DefinitionLength = MI.DefinitionLength; + IsPublic = MI.IsPublic; + ArgumentList = 0; NumArguments = 0; setArgumentList(MI.ArgumentList, MI.NumArguments, PPAllocator); diff --git a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp new file mode 100644 index 0000000..5304311 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp @@ -0,0 +1,1437 @@ +//===--- ModuleMap.cpp - Describe the layout of modules ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ModuleMap implementation, which describes the layout +// of a module as it relates to headers. +// +//===----------------------------------------------------------------------===// +#include "clang/Lex/ModuleMap.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/PathV2.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +using namespace clang; + +Module::ExportDecl +ModuleMap::resolveExport(Module *Mod, + const Module::UnresolvedExportDecl &Unresolved, + bool Complain) { + // We may have just a wildcard. + if (Unresolved.Id.empty()) { + assert(Unresolved.Wildcard && "Invalid unresolved export"); + return Module::ExportDecl(0, true); + } + + // Find the starting module. + Module *Context = lookupModuleUnqualified(Unresolved.Id[0].first, Mod); + if (!Context) { + if (Complain) + Diags->Report(Unresolved.Id[0].second, + diag::err_mmap_missing_module_unqualified) + << Unresolved.Id[0].first << Mod->getFullModuleName(); + + return Module::ExportDecl(); + } + + // Dig into the module path. + for (unsigned I = 1, N = Unresolved.Id.size(); I != N; ++I) { + Module *Sub = lookupModuleQualified(Unresolved.Id[I].first, + Context); + if (!Sub) { + if (Complain) + Diags->Report(Unresolved.Id[I].second, + diag::err_mmap_missing_module_qualified) + << Unresolved.Id[I].first << Context->getFullModuleName() + << SourceRange(Unresolved.Id[0].second, Unresolved.Id[I-1].second); + + return Module::ExportDecl(); + } + + Context = Sub; + } + + return Module::ExportDecl(Context, Unresolved.Wildcard); +} + +ModuleMap::ModuleMap(FileManager &FileMgr, const DiagnosticConsumer &DC, + const LangOptions &LangOpts, const TargetInfo *Target) + : LangOpts(LangOpts), Target(Target), BuiltinIncludeDir(0) +{ + IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs); + Diags = IntrusiveRefCntPtr<DiagnosticsEngine>( + new DiagnosticsEngine(DiagIDs)); + Diags->setClient(DC.clone(*Diags), /*ShouldOwnClient=*/true); + SourceMgr = new SourceManager(*Diags, FileMgr); +} + +ModuleMap::~ModuleMap() { + for (llvm::StringMap<Module *>::iterator I = Modules.begin(), + IEnd = Modules.end(); + I != IEnd; ++I) { + delete I->getValue(); + } + + delete SourceMgr; +} + +void ModuleMap::setTarget(const TargetInfo &Target) { + assert((!this->Target || this->Target == &Target) && + "Improper target override"); + this->Target = &Target; +} + +Module *ModuleMap::findModuleForHeader(const FileEntry *File) { + llvm::DenseMap<const FileEntry *, Module *>::iterator Known + = Headers.find(File); + if (Known != Headers.end()) { + // If a header corresponds to an unavailable module, don't report + // that it maps to anything. + if (!Known->second->isAvailable()) + return 0; + + return Known->second; + } + + const DirectoryEntry *Dir = File->getDir(); + llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs; + StringRef DirName = Dir->getName(); + + // Keep walking up the directory hierarchy, looking for a directory with + // an umbrella header. + do { + llvm::DenseMap<const DirectoryEntry *, Module *>::iterator KnownDir + = UmbrellaDirs.find(Dir); + if (KnownDir != UmbrellaDirs.end()) { + Module *Result = KnownDir->second; + + // Search up the module stack until we find a module with an umbrella + // directory. + Module *UmbrellaModule = Result; + while (!UmbrellaModule->getUmbrellaDir() && UmbrellaModule->Parent) + UmbrellaModule = UmbrellaModule->Parent; + + if (UmbrellaModule->InferSubmodules) { + // Infer submodules for each of the directories we found between + // the directory of the umbrella header and the directory where + // the actual header is located. + bool Explicit = UmbrellaModule->InferExplicitSubmodules; + + for (unsigned I = SkippedDirs.size(); I != 0; --I) { + // Find or create the module that corresponds to this directory name. + StringRef Name = llvm::sys::path::stem(SkippedDirs[I-1]->getName()); + Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, + Explicit).first; + + // Associate the module and the directory. + UmbrellaDirs[SkippedDirs[I-1]] = Result; + + // If inferred submodules export everything they import, add a + // wildcard to the set of exports. + if (UmbrellaModule->InferExportWildcard && Result->Exports.empty()) + Result->Exports.push_back(Module::ExportDecl(0, true)); + } + + // Infer a submodule with the same name as this header file. + StringRef Name = llvm::sys::path::stem(File->getName()); + Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, + Explicit).first; + + // If inferred submodules export everything they import, add a + // wildcard to the set of exports. + if (UmbrellaModule->InferExportWildcard && Result->Exports.empty()) + Result->Exports.push_back(Module::ExportDecl(0, true)); + } else { + // Record each of the directories we stepped through as being part of + // the module we found, since the umbrella header covers them all. + for (unsigned I = 0, N = SkippedDirs.size(); I != N; ++I) + UmbrellaDirs[SkippedDirs[I]] = Result; + } + + Headers[File] = Result; + + // If a header corresponds to an unavailable module, don't report + // that it maps to anything. + if (!Result->isAvailable()) + return 0; + + return Result; + } + + SkippedDirs.push_back(Dir); + + // Retrieve our parent path. + DirName = llvm::sys::path::parent_path(DirName); + if (DirName.empty()) + break; + + // Resolve the parent path to a directory entry. + Dir = SourceMgr->getFileManager().getDirectory(DirName); + } while (Dir); + + return 0; +} + +bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) { + llvm::DenseMap<const FileEntry *, Module *>::iterator Known + = Headers.find(Header); + if (Known != Headers.end()) + return !Known->second->isAvailable(); + + const DirectoryEntry *Dir = Header->getDir(); + llvm::SmallVector<const DirectoryEntry *, 2> SkippedDirs; + StringRef DirName = Dir->getName(); + + // Keep walking up the directory hierarchy, looking for a directory with + // an umbrella header. + do { + llvm::DenseMap<const DirectoryEntry *, Module *>::iterator KnownDir + = UmbrellaDirs.find(Dir); + if (KnownDir != UmbrellaDirs.end()) { + Module *Found = KnownDir->second; + if (!Found->isAvailable()) + return true; + + // Search up the module stack until we find a module with an umbrella + // directory. + Module *UmbrellaModule = Found; + while (!UmbrellaModule->getUmbrellaDir() && UmbrellaModule->Parent) + UmbrellaModule = UmbrellaModule->Parent; + + if (UmbrellaModule->InferSubmodules) { + for (unsigned I = SkippedDirs.size(); I != 0; --I) { + // Find or create the module that corresponds to this directory name. + StringRef Name = llvm::sys::path::stem(SkippedDirs[I-1]->getName()); + Found = lookupModuleQualified(Name, Found); + if (!Found) + return false; + if (!Found->isAvailable()) + return true; + } + + // Infer a submodule with the same name as this header file. + StringRef Name = llvm::sys::path::stem(Header->getName()); + Found = lookupModuleQualified(Name, Found); + if (!Found) + return false; + } + + return !Found->isAvailable(); + } + + SkippedDirs.push_back(Dir); + + // Retrieve our parent path. + DirName = llvm::sys::path::parent_path(DirName); + if (DirName.empty()) + break; + + // Resolve the parent path to a directory entry. + Dir = SourceMgr->getFileManager().getDirectory(DirName); + } while (Dir); + + return false; +} + +Module *ModuleMap::findModule(StringRef Name) { + llvm::StringMap<Module *>::iterator Known = Modules.find(Name); + if (Known != Modules.end()) + return Known->getValue(); + + return 0; +} + +Module *ModuleMap::lookupModuleUnqualified(StringRef Name, Module *Context) { + for(; Context; Context = Context->Parent) { + if (Module *Sub = lookupModuleQualified(Name, Context)) + return Sub; + } + + return findModule(Name); +} + +Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) { + if (!Context) + return findModule(Name); + + return Context->findSubmodule(Name); +} + +std::pair<Module *, bool> +ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit) { + // Try to find an existing module with this name. + if (Module *Sub = lookupModuleQualified(Name, Parent)) + return std::make_pair(Sub, false); + + // Create a new module with this name. + Module *Result = new Module(Name, SourceLocation(), Parent, IsFramework, + IsExplicit); + if (!Parent) + Modules[Name] = Result; + return std::make_pair(Result, true); +} + +Module * +ModuleMap::inferFrameworkModule(StringRef ModuleName, + const DirectoryEntry *FrameworkDir, + bool IsSystem, + Module *Parent) { + // Check whether we've already found this module. + if (Module *Mod = lookupModuleQualified(ModuleName, Parent)) + return Mod; + + FileManager &FileMgr = SourceMgr->getFileManager(); + + // Look for an umbrella header. + SmallString<128> UmbrellaName = StringRef(FrameworkDir->getName()); + llvm::sys::path::append(UmbrellaName, "Headers"); + llvm::sys::path::append(UmbrellaName, ModuleName + ".h"); + const FileEntry *UmbrellaHeader = FileMgr.getFile(UmbrellaName); + + // FIXME: If there's no umbrella header, we could probably scan the + // framework to load *everything*. But, it's not clear that this is a good + // idea. + if (!UmbrellaHeader) + return 0; + + Module *Result = new Module(ModuleName, SourceLocation(), Parent, + /*IsFramework=*/true, /*IsExplicit=*/false); + if (IsSystem) + Result->IsSystem = IsSystem; + + if (!Parent) + Modules[ModuleName] = Result; + + // umbrella header "umbrella-header-name" + Result->Umbrella = UmbrellaHeader; + Headers[UmbrellaHeader] = Result; + UmbrellaDirs[UmbrellaHeader->getDir()] = Result; + + // export * + Result->Exports.push_back(Module::ExportDecl(0, true)); + + // module * { export * } + Result->InferSubmodules = true; + Result->InferExportWildcard = true; + + // Look for subframeworks. + llvm::error_code EC; + SmallString<128> SubframeworksDirName + = StringRef(FrameworkDir->getName()); + llvm::sys::path::append(SubframeworksDirName, "Frameworks"); + SmallString<128> SubframeworksDirNameNative; + llvm::sys::path::native(SubframeworksDirName.str(), + SubframeworksDirNameNative); + for (llvm::sys::fs::directory_iterator + Dir(SubframeworksDirNameNative.str(), EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + if (!StringRef(Dir->path()).endswith(".framework")) + continue; + + if (const DirectoryEntry *SubframeworkDir + = FileMgr.getDirectory(Dir->path())) { + // FIXME: Do we want to warn about subframeworks without umbrella headers? + inferFrameworkModule(llvm::sys::path::stem(Dir->path()), SubframeworkDir, + IsSystem, Result); + } + } + + return Result; +} + +void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader){ + Headers[UmbrellaHeader] = Mod; + Mod->Umbrella = UmbrellaHeader; + UmbrellaDirs[UmbrellaHeader->getDir()] = Mod; +} + +void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir) { + Mod->Umbrella = UmbrellaDir; + UmbrellaDirs[UmbrellaDir] = Mod; +} + +void ModuleMap::addHeader(Module *Mod, const FileEntry *Header) { + Mod->Headers.push_back(Header); + Headers[Header] = Mod; +} + +const FileEntry * +ModuleMap::getContainingModuleMapFile(Module *Module) { + if (Module->DefinitionLoc.isInvalid() || !SourceMgr) + return 0; + + return SourceMgr->getFileEntryForID( + SourceMgr->getFileID(Module->DefinitionLoc)); +} + +void ModuleMap::dump() { + llvm::errs() << "Modules:"; + for (llvm::StringMap<Module *>::iterator M = Modules.begin(), + MEnd = Modules.end(); + M != MEnd; ++M) + M->getValue()->print(llvm::errs(), 2); + + llvm::errs() << "Headers:"; + for (llvm::DenseMap<const FileEntry *, Module *>::iterator + H = Headers.begin(), + HEnd = Headers.end(); + H != HEnd; ++H) { + llvm::errs() << " \"" << H->first->getName() << "\" -> " + << H->second->getFullModuleName() << "\n"; + } +} + +bool ModuleMap::resolveExports(Module *Mod, bool Complain) { + bool HadError = false; + for (unsigned I = 0, N = Mod->UnresolvedExports.size(); I != N; ++I) { + Module::ExportDecl Export = resolveExport(Mod, Mod->UnresolvedExports[I], + Complain); + if (Export.getPointer() || Export.getInt()) + Mod->Exports.push_back(Export); + else + HadError = true; + } + Mod->UnresolvedExports.clear(); + return HadError; +} + +Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) { + if (Loc.isInvalid()) + return 0; + + // Use the expansion location to determine which module we're in. + FullSourceLoc ExpansionLoc = Loc.getExpansionLoc(); + if (!ExpansionLoc.isFileID()) + return 0; + + + const SourceManager &SrcMgr = Loc.getManager(); + FileID ExpansionFileID = ExpansionLoc.getFileID(); + + while (const FileEntry *ExpansionFile + = SrcMgr.getFileEntryForID(ExpansionFileID)) { + // Find the module that owns this header (if any). + if (Module *Mod = findModuleForHeader(ExpansionFile)) + return Mod; + + // No module owns this header, so look up the inclusion chain to see if + // any included header has an associated module. + SourceLocation IncludeLoc = SrcMgr.getIncludeLoc(ExpansionFileID); + if (IncludeLoc.isInvalid()) + return 0; + + ExpansionFileID = SrcMgr.getFileID(IncludeLoc); + } + + return 0; +} + +//----------------------------------------------------------------------------// +// Module map file parser +//----------------------------------------------------------------------------// + +namespace clang { + /// \brief A token in a module map file. + struct MMToken { + enum TokenKind { + Comma, + EndOfFile, + HeaderKeyword, + Identifier, + ExplicitKeyword, + ExportKeyword, + FrameworkKeyword, + ModuleKeyword, + Period, + UmbrellaKeyword, + RequiresKeyword, + Star, + StringLiteral, + LBrace, + RBrace, + LSquare, + RSquare + } Kind; + + unsigned Location; + unsigned StringLength; + const char *StringData; + + void clear() { + Kind = EndOfFile; + Location = 0; + StringLength = 0; + StringData = 0; + } + + bool is(TokenKind K) const { return Kind == K; } + + SourceLocation getLocation() const { + return SourceLocation::getFromRawEncoding(Location); + } + + StringRef getString() const { + return StringRef(StringData, StringLength); + } + }; + + class ModuleMapParser { + Lexer &L; + SourceManager &SourceMgr; + DiagnosticsEngine &Diags; + ModuleMap ⤅ + + /// \brief The directory that this module map resides in. + const DirectoryEntry *Directory; + + /// \brief The directory containing Clang-supplied headers. + const DirectoryEntry *BuiltinIncludeDir; + + /// \brief Whether an error occurred. + bool HadError; + + /// \brief Default target information, used only for string literal + /// parsing. + OwningPtr<TargetInfo> Target; + + /// \brief Stores string data for the various string literals referenced + /// during parsing. + llvm::BumpPtrAllocator StringData; + + /// \brief The current token. + MMToken Tok; + + /// \brief The active module. + Module *ActiveModule; + + /// \brief Consume the current token and return its location. + SourceLocation consumeToken(); + + /// \brief Skip tokens until we reach the a token with the given kind + /// (or the end of the file). + void skipUntil(MMToken::TokenKind K); + + typedef llvm::SmallVector<std::pair<std::string, SourceLocation>, 2> + ModuleId; + bool parseModuleId(ModuleId &Id); + void parseModuleDecl(); + void parseRequiresDecl(); + void parseHeaderDecl(SourceLocation UmbrellaLoc); + void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc); + void parseExportDecl(); + void parseInferredSubmoduleDecl(bool Explicit); + + const DirectoryEntry *getOverriddenHeaderSearchDir(); + + public: + explicit ModuleMapParser(Lexer &L, SourceManager &SourceMgr, + DiagnosticsEngine &Diags, + ModuleMap &Map, + const DirectoryEntry *Directory, + const DirectoryEntry *BuiltinIncludeDir) + : L(L), SourceMgr(SourceMgr), Diags(Diags), Map(Map), + Directory(Directory), BuiltinIncludeDir(BuiltinIncludeDir), + HadError(false), ActiveModule(0) + { + TargetOptions TargetOpts; + TargetOpts.Triple = llvm::sys::getDefaultTargetTriple(); + Target.reset(TargetInfo::CreateTargetInfo(Diags, TargetOpts)); + + Tok.clear(); + consumeToken(); + } + + bool parseModuleMapFile(); + }; +} + +SourceLocation ModuleMapParser::consumeToken() { +retry: + SourceLocation Result = Tok.getLocation(); + Tok.clear(); + + Token LToken; + L.LexFromRawLexer(LToken); + Tok.Location = LToken.getLocation().getRawEncoding(); + switch (LToken.getKind()) { + case tok::raw_identifier: + Tok.StringData = LToken.getRawIdentifierData(); + Tok.StringLength = LToken.getLength(); + Tok.Kind = llvm::StringSwitch<MMToken::TokenKind>(Tok.getString()) + .Case("header", MMToken::HeaderKeyword) + .Case("explicit", MMToken::ExplicitKeyword) + .Case("export", MMToken::ExportKeyword) + .Case("framework", MMToken::FrameworkKeyword) + .Case("module", MMToken::ModuleKeyword) + .Case("requires", MMToken::RequiresKeyword) + .Case("umbrella", MMToken::UmbrellaKeyword) + .Default(MMToken::Identifier); + break; + + case tok::comma: + Tok.Kind = MMToken::Comma; + break; + + case tok::eof: + Tok.Kind = MMToken::EndOfFile; + break; + + case tok::l_brace: + Tok.Kind = MMToken::LBrace; + break; + + case tok::l_square: + Tok.Kind = MMToken::LSquare; + break; + + case tok::period: + Tok.Kind = MMToken::Period; + break; + + case tok::r_brace: + Tok.Kind = MMToken::RBrace; + break; + + case tok::r_square: + Tok.Kind = MMToken::RSquare; + break; + + case tok::star: + Tok.Kind = MMToken::Star; + break; + + case tok::string_literal: { + if (LToken.hasUDSuffix()) { + Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl); + HadError = true; + goto retry; + } + + // Parse the string literal. + LangOptions LangOpts; + StringLiteralParser StringLiteral(<oken, 1, SourceMgr, LangOpts, *Target); + if (StringLiteral.hadError) + goto retry; + + // Copy the string literal into our string data allocator. + unsigned Length = StringLiteral.GetStringLength(); + char *Saved = StringData.Allocate<char>(Length + 1); + memcpy(Saved, StringLiteral.GetString().data(), Length); + Saved[Length] = 0; + + // Form the token. + Tok.Kind = MMToken::StringLiteral; + Tok.StringData = Saved; + Tok.StringLength = Length; + break; + } + + case tok::comment: + goto retry; + + default: + Diags.Report(LToken.getLocation(), diag::err_mmap_unknown_token); + HadError = true; + goto retry; + } + + return Result; +} + +void ModuleMapParser::skipUntil(MMToken::TokenKind K) { + unsigned braceDepth = 0; + unsigned squareDepth = 0; + do { + switch (Tok.Kind) { + case MMToken::EndOfFile: + return; + + case MMToken::LBrace: + if (Tok.is(K) && braceDepth == 0 && squareDepth == 0) + return; + + ++braceDepth; + break; + + case MMToken::LSquare: + if (Tok.is(K) && braceDepth == 0 && squareDepth == 0) + return; + + ++squareDepth; + break; + + case MMToken::RBrace: + if (braceDepth > 0) + --braceDepth; + else if (Tok.is(K)) + return; + break; + + case MMToken::RSquare: + if (squareDepth > 0) + --squareDepth; + else if (Tok.is(K)) + return; + break; + + default: + if (braceDepth == 0 && squareDepth == 0 && Tok.is(K)) + return; + break; + } + + consumeToken(); + } while (true); +} + +/// \brief Parse a module-id. +/// +/// module-id: +/// identifier +/// identifier '.' module-id +/// +/// \returns true if an error occurred, false otherwise. +bool ModuleMapParser::parseModuleId(ModuleId &Id) { + Id.clear(); + do { + if (Tok.is(MMToken::Identifier)) { + Id.push_back(std::make_pair(Tok.getString(), Tok.getLocation())); + consumeToken(); + } else { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module_name); + return true; + } + + if (!Tok.is(MMToken::Period)) + break; + + consumeToken(); + } while (true); + + return false; +} + +namespace { + /// \brief Enumerates the known attributes. + enum AttributeKind { + /// \brief An unknown attribute. + AT_unknown, + /// \brief The 'system' attribute. + AT_system + }; +} + +/// \brief Parse a module declaration. +/// +/// module-declaration: +/// 'explicit'[opt] 'framework'[opt] 'module' module-id attributes[opt] +/// { module-member* } +/// +/// attributes: +/// attribute attributes +/// attribute +/// +/// attribute: +/// [ identifier ] +/// +/// module-member: +/// requires-declaration +/// header-declaration +/// submodule-declaration +/// export-declaration +/// +/// submodule-declaration: +/// module-declaration +/// inferred-submodule-declaration +void ModuleMapParser::parseModuleDecl() { + assert(Tok.is(MMToken::ExplicitKeyword) || Tok.is(MMToken::ModuleKeyword) || + Tok.is(MMToken::FrameworkKeyword)); + // Parse 'explicit' or 'framework' keyword, if present. + SourceLocation ExplicitLoc; + bool Explicit = false; + bool Framework = false; + + // Parse 'explicit' keyword, if present. + if (Tok.is(MMToken::ExplicitKeyword)) { + ExplicitLoc = consumeToken(); + Explicit = true; + } + + // Parse 'framework' keyword, if present. + if (Tok.is(MMToken::FrameworkKeyword)) { + consumeToken(); + Framework = true; + } + + // Parse 'module' keyword. + if (!Tok.is(MMToken::ModuleKeyword)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module); + consumeToken(); + HadError = true; + return; + } + consumeToken(); // 'module' keyword + + // If we have a wildcard for the module name, this is an inferred submodule. + // Parse it. + if (Tok.is(MMToken::Star)) + return parseInferredSubmoduleDecl(Explicit); + + // Parse the module name. + ModuleId Id; + if (parseModuleId(Id)) { + HadError = true; + return; + } + + if (ActiveModule) { + if (Id.size() > 1) { + Diags.Report(Id.front().second, diag::err_mmap_nested_submodule_id) + << SourceRange(Id.front().second, Id.back().second); + + HadError = true; + return; + } + } else if (Id.size() == 1 && Explicit) { + // Top-level modules can't be explicit. + Diags.Report(ExplicitLoc, diag::err_mmap_explicit_top_level); + Explicit = false; + ExplicitLoc = SourceLocation(); + HadError = true; + } + + Module *PreviousActiveModule = ActiveModule; + if (Id.size() > 1) { + // This module map defines a submodule. Go find the module of which it + // is a submodule. + ActiveModule = 0; + for (unsigned I = 0, N = Id.size() - 1; I != N; ++I) { + if (Module *Next = Map.lookupModuleQualified(Id[I].first, ActiveModule)) { + ActiveModule = Next; + continue; + } + + if (ActiveModule) { + Diags.Report(Id[I].second, diag::err_mmap_missing_module_qualified) + << Id[I].first << ActiveModule->getTopLevelModule(); + } else { + Diags.Report(Id[I].second, diag::err_mmap_expected_module_name); + } + HadError = true; + return; + } + } + + StringRef ModuleName = Id.back().first; + SourceLocation ModuleNameLoc = Id.back().second; + + // Parse the optional attribute list. + bool IsSystem = false; + while (Tok.is(MMToken::LSquare)) { + // Consume the '['. + SourceLocation LSquareLoc = consumeToken(); + + // Check whether we have an attribute name here. + if (!Tok.is(MMToken::Identifier)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_attribute); + skipUntil(MMToken::RSquare); + if (Tok.is(MMToken::RSquare)) + consumeToken(); + continue; + } + + // Decode the attribute name. + AttributeKind Attribute + = llvm::StringSwitch<AttributeKind>(Tok.getString()) + .Case("system", AT_system) + .Default(AT_unknown); + switch (Attribute) { + case AT_unknown: + Diags.Report(Tok.getLocation(), diag::warn_mmap_unknown_attribute) + << Tok.getString(); + break; + + case AT_system: + IsSystem = true; + break; + } + consumeToken(); + + // Consume the ']'. + if (!Tok.is(MMToken::RSquare)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rsquare); + Diags.Report(LSquareLoc, diag::note_mmap_lsquare_match); + skipUntil(MMToken::RSquare); + } + + if (Tok.is(MMToken::RSquare)) + consumeToken(); + } + + // Parse the opening brace. + if (!Tok.is(MMToken::LBrace)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace) + << ModuleName; + HadError = true; + return; + } + SourceLocation LBraceLoc = consumeToken(); + + // Determine whether this (sub)module has already been defined. + if (Module *Existing = Map.lookupModuleQualified(ModuleName, ActiveModule)) { + if (Existing->DefinitionLoc.isInvalid() && !ActiveModule) { + // Skip the module definition. + skipUntil(MMToken::RBrace); + if (Tok.is(MMToken::RBrace)) + consumeToken(); + else { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace); + Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match); + HadError = true; + } + return; + } + + Diags.Report(ModuleNameLoc, diag::err_mmap_module_redefinition) + << ModuleName; + Diags.Report(Existing->DefinitionLoc, diag::note_mmap_prev_definition); + + // Skip the module definition. + skipUntil(MMToken::RBrace); + if (Tok.is(MMToken::RBrace)) + consumeToken(); + + HadError = true; + return; + } + + // Start defining this module. + ActiveModule = Map.findOrCreateModule(ModuleName, ActiveModule, Framework, + Explicit).first; + ActiveModule->DefinitionLoc = ModuleNameLoc; + if (IsSystem) + ActiveModule->IsSystem = true; + + bool Done = false; + do { + switch (Tok.Kind) { + case MMToken::EndOfFile: + case MMToken::RBrace: + Done = true; + break; + + case MMToken::ExplicitKeyword: + case MMToken::FrameworkKeyword: + case MMToken::ModuleKeyword: + parseModuleDecl(); + break; + + case MMToken::ExportKeyword: + parseExportDecl(); + break; + + case MMToken::RequiresKeyword: + parseRequiresDecl(); + break; + + case MMToken::UmbrellaKeyword: { + SourceLocation UmbrellaLoc = consumeToken(); + if (Tok.is(MMToken::HeaderKeyword)) + parseHeaderDecl(UmbrellaLoc); + else + parseUmbrellaDirDecl(UmbrellaLoc); + break; + } + + case MMToken::HeaderKeyword: + parseHeaderDecl(SourceLocation()); + break; + + default: + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_member); + consumeToken(); + break; + } + } while (!Done); + + if (Tok.is(MMToken::RBrace)) + consumeToken(); + else { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace); + Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match); + HadError = true; + } + + // We're done parsing this module. Pop back to the previous module. + ActiveModule = PreviousActiveModule; +} + +/// \brief Parse a requires declaration. +/// +/// requires-declaration: +/// 'requires' feature-list +/// +/// feature-list: +/// identifier ',' feature-list +/// identifier +void ModuleMapParser::parseRequiresDecl() { + assert(Tok.is(MMToken::RequiresKeyword)); + + // Parse 'requires' keyword. + consumeToken(); + + // Parse the feature-list. + do { + if (!Tok.is(MMToken::Identifier)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_feature); + HadError = true; + return; + } + + // Consume the feature name. + std::string Feature = Tok.getString(); + consumeToken(); + + // Add this feature. + ActiveModule->addRequirement(Feature, Map.LangOpts, *Map.Target); + + if (!Tok.is(MMToken::Comma)) + break; + + // Consume the comma. + consumeToken(); + } while (true); +} + +/// \brief Append to \p Paths the set of paths needed to get to the +/// subframework in which the given module lives. +static void appendSubframeworkPaths(Module *Mod, + llvm::SmallVectorImpl<char> &Path) { + // Collect the framework names from the given module to the top-level module. + llvm::SmallVector<StringRef, 2> Paths; + for (; Mod; Mod = Mod->Parent) { + if (Mod->IsFramework) + Paths.push_back(Mod->Name); + } + + if (Paths.empty()) + return; + + // Add Frameworks/Name.framework for each subframework. + for (unsigned I = Paths.size() - 1; I != 0; --I) { + llvm::sys::path::append(Path, "Frameworks"); + llvm::sys::path::append(Path, Paths[I-1] + ".framework"); + } +} + +/// \brief Determine whether the given file name is the name of a builtin +/// header, supplied by Clang to replace, override, or augment existing system +/// headers. +static bool isBuiltinHeader(StringRef FileName) { + return llvm::StringSwitch<bool>(FileName) + .Case("float.h", true) + .Case("iso646.h", true) + .Case("limits.h", true) + .Case("stdalign.h", true) + .Case("stdarg.h", true) + .Case("stdbool.h", true) + .Case("stddef.h", true) + .Case("stdint.h", true) + .Case("tgmath.h", true) + .Case("unwind.h", true) + .Default(false); +} + +/// \brief Parse a header declaration. +/// +/// header-declaration: +/// 'umbrella'[opt] 'header' string-literal +void ModuleMapParser::parseHeaderDecl(SourceLocation UmbrellaLoc) { + assert(Tok.is(MMToken::HeaderKeyword)); + consumeToken(); + + bool Umbrella = UmbrellaLoc.isValid(); + + // Parse the header name. + if (!Tok.is(MMToken::StringLiteral)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) + << "header"; + HadError = true; + return; + } + std::string FileName = Tok.getString(); + SourceLocation FileNameLoc = consumeToken(); + + // Check whether we already have an umbrella. + if (Umbrella && ActiveModule->Umbrella) { + Diags.Report(FileNameLoc, diag::err_mmap_umbrella_clash) + << ActiveModule->getFullModuleName(); + HadError = true; + return; + } + + // Look for this file. + const FileEntry *File = 0; + const FileEntry *BuiltinFile = 0; + SmallString<128> PathName; + if (llvm::sys::path::is_absolute(FileName)) { + PathName = FileName; + File = SourceMgr.getFileManager().getFile(PathName); + } else if (const DirectoryEntry *Dir = getOverriddenHeaderSearchDir()) { + PathName = Dir->getName(); + llvm::sys::path::append(PathName, FileName); + File = SourceMgr.getFileManager().getFile(PathName); + } else { + // Search for the header file within the search directory. + PathName = Directory->getName(); + unsigned PathLength = PathName.size(); + + if (ActiveModule->isPartOfFramework()) { + appendSubframeworkPaths(ActiveModule, PathName); + + // Check whether this file is in the public headers. + llvm::sys::path::append(PathName, "Headers"); + llvm::sys::path::append(PathName, FileName); + File = SourceMgr.getFileManager().getFile(PathName); + + if (!File) { + // Check whether this file is in the private headers. + PathName.resize(PathLength); + llvm::sys::path::append(PathName, "PrivateHeaders"); + llvm::sys::path::append(PathName, FileName); + File = SourceMgr.getFileManager().getFile(PathName); + } + } else { + // Lookup for normal headers. + llvm::sys::path::append(PathName, FileName); + File = SourceMgr.getFileManager().getFile(PathName); + + // If this is a system module with a top-level header, this header + // may have a counterpart (or replacement) in the set of headers + // supplied by Clang. Find that builtin header. + if (ActiveModule->IsSystem && !Umbrella && BuiltinIncludeDir && + BuiltinIncludeDir != Directory && isBuiltinHeader(FileName)) { + SmallString<128> BuiltinPathName(BuiltinIncludeDir->getName()); + llvm::sys::path::append(BuiltinPathName, FileName); + BuiltinFile = SourceMgr.getFileManager().getFile(BuiltinPathName); + + // If Clang supplies this header but the underlying system does not, + // just silently swap in our builtin version. Otherwise, we'll end + // up adding both (later). + if (!File && BuiltinFile) { + File = BuiltinFile; + BuiltinFile = 0; + } + } + } + } + + // FIXME: We shouldn't be eagerly stat'ing every file named in a module map. + // Come up with a lazy way to do this. + if (File) { + if (const Module *OwningModule = Map.Headers[File]) { + Diags.Report(FileNameLoc, diag::err_mmap_header_conflict) + << FileName << OwningModule->getFullModuleName(); + HadError = true; + } else if (Umbrella) { + const DirectoryEntry *UmbrellaDir = File->getDir(); + if ((OwningModule = Map.UmbrellaDirs[UmbrellaDir])) { + Diags.Report(UmbrellaLoc, diag::err_mmap_umbrella_clash) + << OwningModule->getFullModuleName(); + HadError = true; + } else { + // Record this umbrella header. + Map.setUmbrellaHeader(ActiveModule, File); + } + } else { + // Record this header. + Map.addHeader(ActiveModule, File); + + // If there is a builtin counterpart to this file, add it now. + if (BuiltinFile) + Map.addHeader(ActiveModule, BuiltinFile); + } + } else { + Diags.Report(FileNameLoc, diag::err_mmap_header_not_found) + << Umbrella << FileName; + HadError = true; + } +} + +/// \brief Parse an umbrella directory declaration. +/// +/// umbrella-dir-declaration: +/// umbrella string-literal +void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) { + // Parse the directory name. + if (!Tok.is(MMToken::StringLiteral)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) + << "umbrella"; + HadError = true; + return; + } + + std::string DirName = Tok.getString(); + SourceLocation DirNameLoc = consumeToken(); + + // Check whether we already have an umbrella. + if (ActiveModule->Umbrella) { + Diags.Report(DirNameLoc, diag::err_mmap_umbrella_clash) + << ActiveModule->getFullModuleName(); + HadError = true; + return; + } + + // Look for this file. + const DirectoryEntry *Dir = 0; + if (llvm::sys::path::is_absolute(DirName)) + Dir = SourceMgr.getFileManager().getDirectory(DirName); + else { + SmallString<128> PathName; + PathName = Directory->getName(); + llvm::sys::path::append(PathName, DirName); + Dir = SourceMgr.getFileManager().getDirectory(PathName); + } + + if (!Dir) { + Diags.Report(DirNameLoc, diag::err_mmap_umbrella_dir_not_found) + << DirName; + HadError = true; + return; + } + + if (Module *OwningModule = Map.UmbrellaDirs[Dir]) { + Diags.Report(UmbrellaLoc, diag::err_mmap_umbrella_clash) + << OwningModule->getFullModuleName(); + HadError = true; + return; + } + + // Record this umbrella directory. + Map.setUmbrellaDir(ActiveModule, Dir); +} + +/// \brief Parse a module export declaration. +/// +/// export-declaration: +/// 'export' wildcard-module-id +/// +/// wildcard-module-id: +/// identifier +/// '*' +/// identifier '.' wildcard-module-id +void ModuleMapParser::parseExportDecl() { + assert(Tok.is(MMToken::ExportKeyword)); + SourceLocation ExportLoc = consumeToken(); + + // Parse the module-id with an optional wildcard at the end. + ModuleId ParsedModuleId; + bool Wildcard = false; + do { + if (Tok.is(MMToken::Identifier)) { + ParsedModuleId.push_back(std::make_pair(Tok.getString(), + Tok.getLocation())); + consumeToken(); + + if (Tok.is(MMToken::Period)) { + consumeToken(); + continue; + } + + break; + } + + if(Tok.is(MMToken::Star)) { + Wildcard = true; + consumeToken(); + break; + } + + Diags.Report(Tok.getLocation(), diag::err_mmap_export_module_id); + HadError = true; + return; + } while (true); + + Module::UnresolvedExportDecl Unresolved = { + ExportLoc, ParsedModuleId, Wildcard + }; + ActiveModule->UnresolvedExports.push_back(Unresolved); +} + +void ModuleMapParser::parseInferredSubmoduleDecl(bool Explicit) { + assert(Tok.is(MMToken::Star)); + SourceLocation StarLoc = consumeToken(); + bool Failed = false; + + // Inferred modules must be submodules. + if (!ActiveModule) { + Diags.Report(StarLoc, diag::err_mmap_top_level_inferred_submodule); + Failed = true; + } + + // Inferred modules must have umbrella directories. + if (!Failed && !ActiveModule->getUmbrellaDir()) { + Diags.Report(StarLoc, diag::err_mmap_inferred_no_umbrella); + Failed = true; + } + + // Check for redefinition of an inferred module. + if (!Failed && ActiveModule->InferSubmodules) { + Diags.Report(StarLoc, diag::err_mmap_inferred_redef); + if (ActiveModule->InferredSubmoduleLoc.isValid()) + Diags.Report(ActiveModule->InferredSubmoduleLoc, + diag::note_mmap_prev_definition); + Failed = true; + } + + // If there were any problems with this inferred submodule, skip its body. + if (Failed) { + if (Tok.is(MMToken::LBrace)) { + consumeToken(); + skipUntil(MMToken::RBrace); + if (Tok.is(MMToken::RBrace)) + consumeToken(); + } + HadError = true; + return; + } + + // Note that we have an inferred submodule. + ActiveModule->InferSubmodules = true; + ActiveModule->InferredSubmoduleLoc = StarLoc; + ActiveModule->InferExplicitSubmodules = Explicit; + + // Parse the opening brace. + if (!Tok.is(MMToken::LBrace)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace_wildcard); + HadError = true; + return; + } + SourceLocation LBraceLoc = consumeToken(); + + // Parse the body of the inferred submodule. + bool Done = false; + do { + switch (Tok.Kind) { + case MMToken::EndOfFile: + case MMToken::RBrace: + Done = true; + break; + + case MMToken::ExportKeyword: { + consumeToken(); + if (Tok.is(MMToken::Star)) + ActiveModule->InferExportWildcard = true; + else + Diags.Report(Tok.getLocation(), + diag::err_mmap_expected_export_wildcard); + consumeToken(); + break; + } + + case MMToken::ExplicitKeyword: + case MMToken::ModuleKeyword: + case MMToken::HeaderKeyword: + case MMToken::UmbrellaKeyword: + default: + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_wildcard_member); + consumeToken(); + break; + } + } while (!Done); + + if (Tok.is(MMToken::RBrace)) + consumeToken(); + else { + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace); + Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match); + HadError = true; + } +} + +/// \brief If there is a specific header search directory due the presence +/// of an umbrella directory, retrieve that directory. Otherwise, returns null. +const DirectoryEntry *ModuleMapParser::getOverriddenHeaderSearchDir() { + for (Module *Mod = ActiveModule; Mod; Mod = Mod->Parent) { + // If we have an umbrella directory, use that. + if (Mod->hasUmbrellaDir()) + return Mod->getUmbrellaDir(); + + // If we have a framework directory, stop looking. + if (Mod->IsFramework) + return 0; + } + + return 0; +} + +/// \brief Parse a module map file. +/// +/// module-map-file: +/// module-declaration* +bool ModuleMapParser::parseModuleMapFile() { + do { + switch (Tok.Kind) { + case MMToken::EndOfFile: + return HadError; + + case MMToken::ExplicitKeyword: + case MMToken::ModuleKeyword: + case MMToken::FrameworkKeyword: + parseModuleDecl(); + break; + + case MMToken::Comma: + case MMToken::ExportKeyword: + case MMToken::HeaderKeyword: + case MMToken::Identifier: + case MMToken::LBrace: + case MMToken::LSquare: + case MMToken::Period: + case MMToken::RBrace: + case MMToken::RSquare: + case MMToken::RequiresKeyword: + case MMToken::Star: + case MMToken::StringLiteral: + case MMToken::UmbrellaKeyword: + Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module); + HadError = true; + consumeToken(); + break; + } + } while (true); +} + +bool ModuleMap::parseModuleMapFile(const FileEntry *File) { + assert(Target != 0 && "Missing target information"); + FileID ID = SourceMgr->createFileID(File, SourceLocation(), SrcMgr::C_User); + const llvm::MemoryBuffer *Buffer = SourceMgr->getBuffer(ID); + if (!Buffer) + return true; + + // Parse this module map file. + Lexer L(ID, SourceMgr->getBuffer(ID), *SourceMgr, MMapLangOpts); + Diags->getClient()->BeginSourceFile(MMapLangOpts); + ModuleMapParser Parser(L, *SourceMgr, *Diags, *this, File->getDir(), + BuiltinIncludeDir); + bool Result = Parser.parseModuleMapFile(); + Diags->getClient()->EndSourceFile(); + + return Result; +} diff --git a/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp b/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp index 986341b..6f4c189 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp @@ -42,6 +42,7 @@ void Preprocessor::Backtrack() { && "EnableBacktrackAtThisPos was not called!"); CachedLexPos = BacktrackPositions.back(); BacktrackPositions.pop_back(); + recomputeCurLexerKind(); } void Preprocessor::CachingLex(Token &Result) { @@ -56,17 +57,21 @@ void Preprocessor::CachingLex(Token &Result) { ExitCachingLexMode(); Lex(Result); - if (!isBacktrackEnabled()) { + if (isBacktrackEnabled()) { + // Cache the lexed token. + EnterCachingLexMode(); + CachedTokens.push_back(Result); + ++CachedLexPos; + return; + } + + if (CachedLexPos < CachedTokens.size()) { + EnterCachingLexMode(); + } else { // All cached tokens were consumed. CachedTokens.clear(); CachedLexPos = 0; - return; } - - // Cache the lexed token. - EnterCachingLexMode(); - CachedTokens.push_back(Result); - ++CachedLexPos; } void Preprocessor::EnterCachingLexMode() { @@ -74,8 +79,7 @@ void Preprocessor::EnterCachingLexMode() { return; PushIncludeMacroStack(); - if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_CachingLexer; + CurLexerKind = CLK_CachingLexer; } diff --git a/contrib/llvm/tools/clang/lib/Lex/PPCallbacks.cpp b/contrib/llvm/tools/clang/lib/Lex/PPCallbacks.cpp new file mode 100644 index 0000000..952b926 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Lex/PPCallbacks.cpp @@ -0,0 +1,14 @@ +//===--- PPCallbacks.cpp - Callbacks for Preprocessor actions ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/PPCallbacks.h" + +using namespace clang; + +void PPChainedCallbacks::anchor() { } diff --git a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp index de50c75..625a204 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp @@ -22,6 +22,7 @@ #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/APInt.h" +#include "llvm/Support/ErrorHandling.h" using namespace clang; //===----------------------------------------------------------------------===// @@ -119,8 +120,15 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { std::string Spelling = getSpelling(MacroNameTok, &Invalid); if (Invalid) return; - + const IdentifierInfo &Info = Identifiers.get(Spelling); + + // Allow #defining |and| and friends in microsoft mode. + if (Info.isCPlusPlusOperatorKeyword() && getLangOpts().MicrosoftMode) { + MacroNameTok.setIdentifierInfo(getIdentifierInfo(Spelling)); + return; + } + if (Info.isCPlusPlusOperatorKeyword()) // C++ 2.5p2: Alternative tokens behave the same as its primary token // except for their spellings. @@ -173,7 +181,7 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { // trouble than it is worth to insert /**/ and check that there is no /**/ // in the range also. FixItHint Hint; - if ((Features.GNUMode || Features.C99 || Features.CPlusPlus) && + if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && !CurTokenLexer) Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; @@ -305,9 +313,6 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true, /*foundnonskip*/false, /*foundelse*/false); - - if (Callbacks) - Callbacks->Endif(); } } else if (Directive[0] == 'e') { StringRef Sub = Directive.substr(1); @@ -320,8 +325,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, assert(!InCond && "Can't be skipping if not in a conditional!"); // If we popped the outermost skipping block, we're done skipping! - if (!CondInfo.WasSkipping) + if (!CondInfo.WasSkipping) { + if (Callbacks) + Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc); break; + } } else if (Sub == "lse") { // "else". // #else directive in a skipping conditional. If not in some other // skipping conditional, and if #else hasn't already been seen, enter it @@ -334,14 +342,13 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // Note that we've seen a #else in this conditional. CondInfo.FoundElse = true; - if (Callbacks) - Callbacks->Else(); - // If the conditional is at the top level, and the #if block wasn't // entered, enter the #else block now. if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { CondInfo.FoundNonSkip = true; CheckEndOfDirective("else"); + if (Callbacks) + Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc); break; } else { DiscardUntilEndOfDirective(); // C99 6.10p4. @@ -370,12 +377,13 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // If this is a #elif with a #else before it, report the error. if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else); - if (Callbacks) - Callbacks->Elif(SourceRange(ConditionalBegin, ConditionalEnd)); - // If this condition is true, enter it! if (ShouldEnter) { CondInfo.FoundNonSkip = true; + if (Callbacks) + Callbacks->Elif(Tok.getLocation(), + SourceRange(ConditionalBegin, ConditionalEnd), + CondInfo.IfLoc); break; } } @@ -486,7 +494,8 @@ const FileEntry *Preprocessor::LookupFile( const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - StringRef *SuggestedModule) { + Module **SuggestedModule, + bool SkipCache) { // If the header lookup mechanism may be relative to the current file, pass in // info about where the current file is. const FileEntry *CurFileEnt = 0; @@ -510,7 +519,7 @@ const FileEntry *Preprocessor::LookupFile( CurDir = CurDirLookup; const FileEntry *FE = HeaderInfo.LookupFile( Filename, isAngled, FromDir, CurDir, CurFileEnt, - SearchPath, RelativePath, SuggestedModule); + SearchPath, RelativePath, SuggestedModule, SkipCache); if (FE) return FE; // Otherwise, see if this is a subframework header. If so, this is relative @@ -575,9 +584,25 @@ void Preprocessor::HandleDirective(Token &Result) { // A(abc // #warning blah // def) - // If so, the user is relying on non-portable behavior, emit a diagnostic. - if (InMacroArgs) + // If so, the user is relying on undefined behavior, emit a diagnostic. Do + // not support this for #include-like directives, since that can result in + // terrible diagnostics, and does not work in GCC. + if (InMacroArgs) { + if (IdentifierInfo *II = Result.getIdentifierInfo()) { + switch (II->getPPKeywordID()) { + case tok::pp_include: + case tok::pp_import: + case tok::pp_include_next: + case tok::pp___include_macros: + Diag(Result, diag::err_embedded_include) << II->getName(); + DiscardUntilEndOfDirective(); + return; + default: + break; + } + } Diag(Result, diag::ext_embedded_directive); + } TryAgain: switch (Result.getKind()) { @@ -594,7 +619,7 @@ TryAgain: setCodeCompletionReached(); return; case tok::numeric_constant: // # 7 GNU line marker directive. - if (getLangOptions().AsmPreprocessor) + if (getLangOpts().AsmPreprocessor) break; // # 4 is not a preprocessor directive in .S files. return HandleDigitDirective(Result); default: @@ -664,8 +689,15 @@ TryAgain: //isExtension = true; // FIXME: implement #unassert break; - case tok::pp___export_macro__: - return HandleMacroExportDirective(Result); + case tok::pp___public_macro: + if (getLangOpts().Modules) + return HandleMacroPublicDirective(Result); + break; + + case tok::pp___private_macro: + if (getLangOpts().Modules) + return HandleMacroPrivateDirective(Result); + break; } break; } @@ -674,7 +706,7 @@ TryAgain: // directives. This is important because # may be a comment or introduce // various pseudo-ops. Just return the # token and push back the following // token to be lexed next time. - if (getLangOptions().AsmPreprocessor) { + if (getLangOpts().AsmPreprocessor) { Token *Toks = new Token[2]; // Return the # and the token after it. Toks[0] = SavedHash; @@ -713,7 +745,7 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val, return true; } - llvm::SmallString<64> IntegerBuffer; + SmallString<64> IntegerBuffer; IntegerBuffer.resize(DigitTok.getLength()); const char *DigitTokBegin = &IntegerBuffer[0]; bool Invalid = false; @@ -773,11 +805,11 @@ void Preprocessor::HandleLineDirective(Token &Tok) { // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a // number greater than 2147483647". C90 requires that the line # be <= 32767. unsigned LineLimit = 32768U; - if (Features.C99 || Features.CPlusPlus0x) + if (LangOpts.C99 || LangOpts.CPlusPlus0x) LineLimit = 2147483648U; if (LineNo >= LineLimit) Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit; - else if (Features.CPlusPlus0x && LineNo >= 32768U) + else if (LangOpts.CPlusPlus0x && LineNo >= 32768U) Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big); int FilenameID = -1; @@ -790,8 +822,10 @@ void Preprocessor::HandleLineDirective(Token &Tok) { ; // ok else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_line_invalid_filename); - DiscardUntilEndOfDirective(); - return; + return DiscardUntilEndOfDirective(); + } else if (StrTok.hasUDSuffix()) { + Diag(StrTok, diag::err_invalid_string_udl); + return DiscardUntilEndOfDirective(); } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(&StrTok, 1, *this); @@ -925,6 +959,9 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_linemarker_invalid_filename); return DiscardUntilEndOfDirective(); + } else if (StrTok.hasUDSuffix()) { + Diag(StrTok, diag::err_invalid_string_udl); + return DiscardUntilEndOfDirective(); } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(&StrTok, 1, *this); @@ -982,10 +1019,18 @@ void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, // collapse multiple consequtive white space between tokens, but this isn't // specified by the standard. std::string Message = CurLexer->ReadToEndOfLine(); + + // Find the first non-whitespace character, so that we can make the + // diagnostic more succinct. + StringRef Msg(Message); + size_t i = Msg.find_first_not_of(' '); + if (i < Msg.size()) + Msg = Msg.substr(i); + if (isWarning) - Diag(Tok, diag::pp_hash_warning) << Message; + Diag(Tok, diag::pp_hash_warning) << Msg; else - Diag(Tok, diag::err_pp_hash_error) << Message; + Diag(Tok, diag::err_pp_hash_error) << Msg; } /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive. @@ -1007,6 +1052,11 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { return; } + if (StrTok.hasUDSuffix()) { + Diag(StrTok, diag::err_invalid_string_udl); + return DiscardUntilEndOfDirective(); + } + // Verify that there is nothing after the string, other than EOD. CheckEndOfDirective("ident"); @@ -1018,8 +1068,8 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { } } -/// \brief Handle a #__export_macro__ directive. -void Preprocessor::HandleMacroExportDirective(Token &Tok) { +/// \brief Handle a #public directive. +void Preprocessor::HandleMacroPublicDirective(Token &Tok) { Token MacroNameTok; ReadMacroName(MacroNameTok, 2); @@ -1027,21 +1077,52 @@ void Preprocessor::HandleMacroExportDirective(Token &Tok) { if (MacroNameTok.is(tok::eod)) return; - // Check to see if this is the last token on the #__export_macro__ line. - CheckEndOfDirective("__export_macro__"); + // Check to see if this is the last token on the #__public_macro line. + CheckEndOfDirective("__public_macro"); // Okay, we finally have a valid identifier to undef. MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); // If the macro is not defined, this is an error. if (MI == 0) { - Diag(MacroNameTok, diag::err_pp_export_non_macro) + Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << MacroNameTok.getIdentifierInfo(); return; } // Note that this macro has now been exported. - MI->setExportLocation(MacroNameTok.getLocation()); + MI->setVisibility(/*IsPublic=*/true, MacroNameTok.getLocation()); + + // If this macro definition came from a PCH file, mark it + // as having changed since serialization. + if (MI->isFromAST()) + MI->setChangedAfterLoad(); +} + +/// \brief Handle a #private directive. +void Preprocessor::HandleMacroPrivateDirective(Token &Tok) { + Token MacroNameTok; + ReadMacroName(MacroNameTok, 2); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.is(tok::eod)) + return; + + // Check to see if this is the last token on the #__private_macro line. + CheckEndOfDirective("__private_macro"); + + // Okay, we finally have a valid identifier to undef. + MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); + + // If the macro is not defined, this is an error. + if (MI == 0) { + Diag(MacroNameTok, diag::err_pp_visibility_non_macro) + << MacroNameTok.getIdentifierInfo(); + return; + } + + // Note that this macro has now been marked private. + MI->setVisibility(/*IsPublic=*/false, MacroNameTok.getLocation()); // If this macro definition came from a PCH file, mark it // as having changed since serialization. @@ -1109,7 +1190,7 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, /// false if the > was found, otherwise it returns true if it finds and consumes /// the EOD marker. bool Preprocessor::ConcatenateIncludeName( - llvm::SmallString<128> &FilenameBuffer, + SmallString<128> &FilenameBuffer, SourceLocation &End) { Token CurTok; @@ -1171,9 +1252,10 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, CurPPLexer->LexIncludeFilename(FilenameTok); // Reserve a buffer to get the spelling. - llvm::SmallString<128> FilenameBuffer; + SmallString<128> FilenameBuffer; StringRef Filename; SourceLocation End; + SourceLocation CharEnd; // the end of this directive, in characters switch (FilenameTok.getKind()) { case tok::eod: @@ -1184,6 +1266,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, case tok::string_literal: Filename = getSpelling(FilenameTok, FilenameBuffer); End = FilenameTok.getLocation(); + CharEnd = End.getLocWithOffset(Filename.size()); break; case tok::less: @@ -1193,6 +1276,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, if (ConcatenateIncludeName(FilenameBuffer, End)) return; // Found <eod> but no ">"? Diagnostic already emitted. Filename = FilenameBuffer.str(); + CharEnd = getLocForEndOfToken(End); break; default: Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); @@ -1200,6 +1284,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, return; } + StringRef OriginalFilename = Filename; bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); // If GetIncludeFilenameSpelling set the start ptr to null, there was an @@ -1230,38 +1315,128 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, PragmaARCCFCodeAuditedLoc = SourceLocation(); } + if (HeaderInfo.HasIncludeAliasMap()) { + // Map the filename with the brackets still attached. If the name doesn't + // map to anything, fall back on the filename we've already gotten the + // spelling for. + StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename); + if (!NewName.empty()) + Filename = NewName; + } + // Search include directories. const DirectoryLookup *CurDir; - llvm::SmallString<1024> SearchPath; - llvm::SmallString<1024> RelativePath; + SmallString<1024> SearchPath; + SmallString<1024> RelativePath; // We get the raw path only if we have 'Callbacks' to which we later pass // the path. - StringRef SuggestedModule; + Module *SuggestedModule = 0; const FileEntry *File = LookupFile( Filename, isAngled, LookupFrom, CurDir, Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL, - AutoModuleImport? &SuggestedModule : 0); + getLangOpts().Modules? &SuggestedModule : 0); - // If we are supposed to import a module rather than including the header, - // do so now. - if (!SuggestedModule.empty()) { - TheModuleLoader.loadModule(IncludeTok.getLocation(), - Identifiers.get(SuggestedModule), - FilenameTok.getLocation()); - return; - } - - // Notify the callback object that we've seen an inclusion directive. - if (Callbacks) + if (Callbacks) { + if (!File) { + // Give the clients a chance to recover. + SmallString<128> RecoveryPath; + if (Callbacks->FileNotFound(Filename, RecoveryPath)) { + if (const DirectoryEntry *DE = FileMgr.getDirectory(RecoveryPath)) { + // Add the recovery path to the list of search paths. + DirectoryLookup DL(DE, SrcMgr::C_User, true, false); + HeaderInfo.AddSearchPath(DL, isAngled); + + // Try the lookup again, skipping the cache. + File = LookupFile(Filename, isAngled, LookupFrom, CurDir, 0, 0, + getLangOpts().Modules? &SuggestedModule : 0, + /*SkipCache*/true); + } + } + } + + // Notify the callback object that we've seen an inclusion directive. Callbacks->InclusionDirective(HashLoc, IncludeTok, Filename, isAngled, File, End, SearchPath, RelativePath); - + } + if (File == 0) { if (!SuppressIncludeNotFoundError) Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; return; } + // If we are supposed to import a module rather than including the header, + // do so now. + if (SuggestedModule) { + // Compute the module access path corresponding to this module. + // FIXME: Should we have a second loadModule() overload to avoid this + // extra lookup step? + llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; + for (Module *Mod = SuggestedModule; Mod; Mod = Mod->Parent) + Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), + FilenameTok.getLocation())); + std::reverse(Path.begin(), Path.end()); + + // Warn that we're replacing the include/import with a module import. + SmallString<128> PathString; + for (unsigned I = 0, N = Path.size(); I != N; ++I) { + if (I) + PathString += '.'; + PathString += Path[I].first->getName(); + } + int IncludeKind = 0; + + switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { + case tok::pp_include: + IncludeKind = 0; + break; + + case tok::pp_import: + IncludeKind = 1; + break; + + case tok::pp_include_next: + IncludeKind = 2; + break; + + case tok::pp___include_macros: + IncludeKind = 3; + break; + + default: + llvm_unreachable("unknown include directive kind"); + } + + // Determine whether we are actually building the module that this + // include directive maps to. + bool BuildingImportedModule + = Path[0].first->getName() == getLangOpts().CurrentModule; + + if (!BuildingImportedModule && getLangOpts().ObjC2) { + // If we're not building the imported module, warn that we're going + // to automatically turn this inclusion directive into a module import. + // We only do this in Objective-C, where we have a module-import syntax. + CharSourceRange ReplaceRange(SourceRange(HashLoc, CharEnd), + /*IsTokenRange=*/false); + Diag(HashLoc, diag::warn_auto_module_import) + << IncludeKind << PathString + << FixItHint::CreateReplacement(ReplaceRange, + "@__experimental_modules_import " + PathString.str().str() + ";"); + } + + // Load the module. + // If this was an #__include_macros directive, only make macros visible. + Module::NameVisibilityKind Visibility + = (IncludeKind == 3)? Module::MacrosVisible : Module::AllVisible; + Module *Imported + = TheModuleLoader.loadModule(IncludeTok.getLocation(), Path, Visibility, + /*IsIncludeDirective=*/true); + + // If this header isn't part of the module we're building, we're done. + if (!BuildingImportedModule && Imported) + return; + } + // The #included file will be considered to be a system header if either it is // in a system include directory, or if the #includer is a system include // header. @@ -1278,8 +1453,12 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, } // Look up the file, create a File ID for it. - FileID FID = SourceMgr.createFileID(File, FilenameTok.getLocation(), - FileCharacter); + SourceLocation IncludePos = End; + // If the filename string was the result of macro expansions, set the include + // position on the file where it will be included and after the expansions. + if (IncludePos.isMacroID()) + IncludePos = SourceMgr.getExpansionRange(IncludePos).second; + FileID FID = SourceMgr.createFileID(File, IncludePos, FileCharacter); assert(!FID.isInvalid() && "Expected valid file ID"); // Finally, if all is good, enter the new file! @@ -1309,13 +1488,29 @@ void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup); } +/// HandleMicrosoftImportDirective - Implements #import for Microsoft Mode +void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) { + // The Microsoft #import directive takes a type library and generates header + // files from it, and includes those. This is beyond the scope of what clang + // does, so we ignore it and error out. However, #import can optionally have + // trailing attributes that span multiple lines. We're going to eat those + // so we can continue processing from there. + Diag(Tok, diag::err_pp_import_directive_ms ); + + // Read tokens until we get to the end of the directive. Note that the + // directive can be split over multiple lines using the backslash character. + DiscardUntilEndOfDirective(); +} + /// HandleImportDirective - Implements #import. /// void Preprocessor::HandleImportDirective(SourceLocation HashLoc, Token &ImportTok) { - if (!Features.ObjC1) // #import is standard for ObjC. + if (!LangOpts.ObjC1) { // #import is standard for ObjC. + if (LangOpts.MicrosoftMode) + return HandleMicrosoftImportDirective(ImportTok); Diag(ImportTok, diag::ext_pp_import_directive); - + } return HandleIncludeDirective(HashLoc, ImportTok, 0, true); } @@ -1354,10 +1549,9 @@ void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc, /// definition has just been read. Lex the rest of the arguments and the /// closing ), updating MI with what we learn. Return true if an error occurs /// parsing the arg list. -bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { +bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { SmallVector<IdentifierInfo*, 32> Arguments; - Token Tok; while (1) { LexUnexpandedToken(Tok); switch (Tok.getKind()) { @@ -1369,8 +1563,8 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { Diag(Tok, diag::err_pp_expected_ident_in_arg_list); return true; case tok::ellipsis: // #define X(... -> C99 varargs - if (!Features.C99) - Diag(Tok, Features.CPlusPlus0x ? + if (!LangOpts.C99) + Diag(Tok, LangOpts.CPlusPlus0x ? diag::warn_cxx98_compat_variadic_macro : diag::ext_variadic_macro); @@ -1476,7 +1670,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { } else if (Tok.is(tok::l_paren)) { // This is a function-like macro definition. Read the argument list. MI->setIsFunctionLike(); - if (ReadMacroDefinitionArgList(MI)) { + if (ReadMacroDefinitionArgList(MI, LastTok)) { // Forget about MI. ReleaseMacroInfo(MI); // Throw away the rest of the line. @@ -1496,7 +1690,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // Read the first token after the arg list for down below. LexUnexpandedToken(Tok); - } else if (Features.C99 || Features.CPlusPlus0x) { + } else if (LangOpts.C99 || LangOpts.CPlusPlus0x) { // C99 requires whitespace between the macro definition and the body. Emit // a diagnostic for something like "#define X+". Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); @@ -1561,7 +1755,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // the '#' because '#' is often a comment character. However, change // the kind of the token to tok::unknown so that the preprocessor isn't // confused. - if (getLangOptions().AsmPreprocessor && Tok.isNot(tok::eod)) { + if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) { LastTok.setKind(tok::unknown); } else { Diag(Tok, diag::err_pp_stringize_not_parameter); @@ -1732,6 +1926,13 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, if (MI) // Mark it used. markMacroAsUsed(MI); + if (Callbacks) { + if (isIfndef) + Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok); + else + Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok); + } + // Should we include the stuff contained by this directive? if (!MI == isIfndef) { // Yes, remember that we are inside a conditional, then lex the next token. @@ -1744,13 +1945,6 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, /*Foundnonskip*/false, /*FoundElse*/false); } - - if (Callbacks) { - if (isIfndef) - Callbacks->Ifndef(MacroNameTok); - else - Callbacks->Ifdef(MacroNameTok); - } } /// HandleIfDirective - Implements the #if directive. @@ -1774,6 +1968,10 @@ void Preprocessor::HandleIfDirective(Token &IfToken, CurPPLexer->MIOpt.EnterTopLevelConditional(); } + if (Callbacks) + Callbacks->If(IfToken.getLocation(), + SourceRange(ConditionalBegin, ConditionalEnd)); + // Should we include the stuff contained by this directive? if (ConditionalTrue) { // Yes, remember that we are inside a conditional, then lex the next token. @@ -1784,9 +1982,6 @@ void Preprocessor::HandleIfDirective(Token &IfToken, SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false, /*FoundElse*/false); } - - if (Callbacks) - Callbacks->If(SourceRange(ConditionalBegin, ConditionalEnd)); } /// HandleEndifDirective - Implements the #endif directive. @@ -1812,7 +2007,7 @@ void Preprocessor::HandleEndifDirective(Token &EndifToken) { "This code should only be reachable in the non-skipping case!"); if (Callbacks) - Callbacks->Endif(); + Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc); } /// HandleElseDirective - Implements the #else directive. @@ -1836,12 +2031,12 @@ void Preprocessor::HandleElseDirective(Token &Result) { // If this is a #else with a #else before it, report the error. if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else); + if (Callbacks) + Callbacks->Else(Result.getLocation(), CI.IfLoc); + // Finally, skip the rest of the contents of this block. SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, /*FoundElse*/true, Result.getLocation()); - - if (Callbacks) - Callbacks->Else(); } /// HandleElifDirective - Implements the #elif directive. @@ -1868,12 +2063,13 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) { // If this is a #elif with a #else before it, report the error. if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else); + + if (Callbacks) + Callbacks->Elif(ElifToken.getLocation(), + SourceRange(ConditionalBegin, ConditionalEnd), CI.IfLoc); // Finally, skip the rest of the contents of this block. SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, /*FoundElse*/CI.FoundElse, ElifToken.getLocation()); - - if (Callbacks) - Callbacks->Elif(SourceRange(ConditionalBegin, ConditionalEnd)); } diff --git a/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp b/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp index 20f624a..7cac63e 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp @@ -197,7 +197,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr); return true; case tok::numeric_constant: { - llvm::SmallString<64> IntegerBuffer; + SmallString<64> IntegerBuffer; bool NumberInvalid = false; StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, &NumberInvalid); @@ -215,9 +215,13 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, } assert(Literal.isIntegerLiteral() && "Unknown ppnumber"); + // Complain about, and drop, any ud-suffix. + if (Literal.hasUDSuffix()) + PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*integer*/1; + // long long is a C99 feature. - if (!PP.getLangOptions().C99 && Literal.isLongLong) - PP.Diag(PeekTok, PP.getLangOptions().CPlusPlus0x ? + if (!PP.getLangOpts().C99 && Literal.isLongLong) + PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus0x ? diag::warn_cxx98_compat_longlong : diag::ext_longlong); // Parse the integer literal into Result. @@ -251,7 +255,11 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, case tok::wide_char_constant: { // L'x' case tok::utf16_char_constant: // u'x' case tok::utf32_char_constant: // U'x' - llvm::SmallString<32> CharBuffer; + // Complain about, and drop, any ud-suffix. + if (PeekTok.hasUDSuffix()) + PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0; + + SmallString<32> CharBuffer; bool CharInvalid = false; StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid); if (CharInvalid) @@ -282,7 +290,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Val = Literal.getValue(); // Set the signedness. UTF-16 and UTF-32 are always unsigned if (!Literal.isUTF16() && !Literal.isUTF32()) - Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned); + Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned); if (Result.Val.getBitWidth() > Val.getBitWidth()) { Result.Val = Val.extend(Result.Val.getBitWidth()); @@ -646,7 +654,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, case tok::comma: // Comma is invalid in pp expressions in c89/c++ mode, but is valid in C99 // if not being evaluated. - if (!PP.getLangOptions().C99 || ValueLive) + if (!PP.getLangOpts().C99 || ValueLive) PP.Diag(OpLoc, diag::ext_pp_comma_expr) << LHS.getRange() << RHS.getRange(); Res = RHS.Val; // LHS = LHS,RHS -> RHS. @@ -703,8 +711,6 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, LHS.Val = Res; LHS.setEnd(RHS.getRange().getEnd()); } - - return false; } /// EvaluateDirectiveExpression - Evaluate an integer constant expression that diff --git a/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp b/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp index 25a98ae..b6689df 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp @@ -16,8 +16,12 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PathV2.h" +#include "llvm/ADT/StringSwitch.h" using namespace clang; PPCallbacks::~PPCallbacks() {} @@ -198,6 +202,31 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, CurLexerKind = CLK_TokenLexer; } +/// \brief Compute the relative path that names the given file relative to +/// the given directory. +static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir, + const FileEntry *File, + SmallString<128> &Result) { + Result.clear(); + + StringRef FilePath = File->getDir()->getName(); + StringRef Path = FilePath; + while (!Path.empty()) { + if (const DirectoryEntry *CurDir = FM.getDirectory(Path)) { + if (CurDir == Dir) { + Result = FilePath.substr(Path.size()); + llvm::sys::path::append(Result, + llvm::sys::path::filename(File->getName())); + return; + } + } + + Path = llvm::sys::path::parent_path(Path); + } + + Result = File->getName(); +} + /// HandleEndOfFile - This callback is invoked when the lexer hits the end of /// the current file. This either returns the EOF token or pops a level off /// the include stack and keeps going. @@ -216,8 +245,11 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { } } - // Complain about reaching an EOF within arc_cf_code_audited. - if (PragmaARCCFCodeAuditedLoc.isValid()) { + // Complain about reaching a true EOF within arc_cf_code_audited. + // We don't want to complain about reaching the end of a macro + // instantiation or a _Pragma. + if (PragmaARCCFCodeAuditedLoc.isValid() && + !isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) { Diag(PragmaARCCFCodeAuditedLoc, diag::err_pp_eof_in_arc_cf_code_audited); // Recover by leaving immediately. @@ -296,15 +328,17 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { CurLexer->BufferPtr = EndPos; CurLexer->FormTokenWithChars(Result, EndPos, tok::eof); - // We're done with the #included file. - CurLexer.reset(); + if (!isIncrementalProcessingEnabled()) + // We're done with lexing. + CurLexer.reset(); } else { assert(CurPTHLexer && "Got EOF but no current lexer set!"); CurPTHLexer->getEOF(Result); CurPTHLexer.reset(); } - - CurPPLexer = 0; + + if (!isIncrementalProcessingEnabled()) + CurPPLexer = 0; // This is the end of the top-level file. 'WarnUnusedMacroLocs' has collected // all macro locations that we need to warn because they are not used. @@ -312,6 +346,48 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { I=WarnUnusedMacroLocs.begin(), E=WarnUnusedMacroLocs.end(); I!=E; ++I) Diag(*I, diag::pp_macro_not_used); + // If we are building a module that has an umbrella header, make sure that + // each of the headers within the directory covered by the umbrella header + // was actually included by the umbrella header. + if (Module *Mod = getCurrentModule()) { + if (Mod->getUmbrellaHeader()) { + SourceLocation StartLoc + = SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); + + if (getDiagnostics().getDiagnosticLevel( + diag::warn_uncovered_module_header, + StartLoc) != DiagnosticsEngine::Ignored) { + ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap(); + typedef llvm::sys::fs::recursive_directory_iterator + recursive_directory_iterator; + const DirectoryEntry *Dir = Mod->getUmbrellaDir(); + llvm::error_code EC; + for (recursive_directory_iterator Entry(Dir->getName(), EC), End; + Entry != End && !EC; Entry.increment(EC)) { + using llvm::StringSwitch; + + // Check whether this entry has an extension typically associated with + // headers. + if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->path())) + .Cases(".h", ".H", ".hh", ".hpp", true) + .Default(false)) + continue; + + if (const FileEntry *Header = getFileManager().getFile(Entry->path())) + if (!getSourceManager().hasFileInfo(Header)) { + if (!ModMap.isHeaderInUnavailableModule(Header)) { + // Find the relative path that would access this header. + SmallString<128> RelativePath; + computeRelativePath(FileMgr, Dir, Header, RelativePath); + Diag(StartLoc, diag::warn_uncovered_module_header) + << RelativePath; + } + } + } + } + } + } + return true; } diff --git a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp index e10c95c..fe70585 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -24,7 +24,7 @@ #include "clang/Lex/LiteralSupport.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Config/config.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" #include <cstdio> @@ -47,13 +47,18 @@ MacroInfo *Preprocessor::getInfoForMacro(IdentifierInfo *II) const { /// setMacroInfo - Specify a macro for this identifier. /// -void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) { +void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI, + bool LoadedFromAST) { if (MI) { Macros[II] = MI; II->setHasMacroDefinition(true); + if (II->isFromAST() && !LoadedFromAST) + II->setChangedSinceDeserialization(); } else if (II->hasMacroDefinition()) { Macros.erase(II); II->setHasMacroDefinition(false); + if (II->isFromAST() && !LoadedFromAST) + II->setChangedSinceDeserialization(); } } @@ -96,7 +101,7 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning"); // Microsoft Extensions. - if (Features.MicrosoftExt) + if (LangOpts.MicrosoftExt) Ident__pragma = RegisterBuiltinMacro(*this, "__pragma"); else Ident__pragma = 0; @@ -112,6 +117,11 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, // If the token isn't an identifier, it's always literally expanded. if (II == 0) return true; + // If the information about this identifier is out of date, update it from + // the external source. + if (II->isOutOfDate()) + PP.getExternalSource()->updateOutOfDateIdentifier(*II); + // If the identifier is a macro, and if that macro is enabled, it may be // expanded so it's not a trivial expansion. if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() && @@ -296,8 +306,10 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // unexpandable. if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) { if (MacroInfo *NewMI = getMacroInfo(NewII)) - if (!NewMI->isEnabled() || NewMI == MI) + if (!NewMI->isEnabled() || NewMI == MI) { Identifier.setFlag(Token::DisableExpand); + Diag(Identifier, diag::pp_disabled_macro_expansion); + } } // Since this is not an identifier token, it can't be macro expanded, so @@ -421,8 +433,8 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // Empty arguments are standard in C99 and C++0x, and are supported as an extension in // other modes. - if (ArgTokens.size() == ArgTokenStart && !Features.C99) - Diag(Tok, Features.CPlusPlus0x ? + if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99) + Diag(Tok, LangOpts.CPlusPlus0x ? diag::warn_cxx98_compat_empty_fnmacro_arg : diag::ext_empty_fnmacro_arg); @@ -576,9 +588,15 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, /// HasFeature - Return true if we recognize and implement the feature /// specified by the identifier as a standard language feature. static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { - const LangOptions &LangOpts = PP.getLangOptions(); + const LangOptions &LangOpts = PP.getLangOpts(); + StringRef Feature = II->getName(); - return llvm::StringSwitch<bool>(II->getName()) + // Normalize the feature name, __foo__ becomes foo. + if (Feature.startswith("__") && Feature.endswith("__") && Feature.size() >= 4) + Feature = Feature.substr(2, Feature.size() - 4); + + return llvm::StringSwitch<bool>(Feature) + .Case("address_sanitizer", LangOpts.AddressSanitizer) .Case("attribute_analyzer_noreturn", true) .Case("attribute_availability", true) .Case("attribute_cf_returns_not_retained", true) @@ -603,48 +621,60 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("objc_arc", LangOpts.ObjCAutoRefCount) .Case("objc_arc_weak", LangOpts.ObjCAutoRefCount && LangOpts.ObjCRuntimeHasWeak) + .Case("objc_default_synthesize_properties", LangOpts.ObjC2) .Case("objc_fixed_enum", LangOpts.ObjC2) .Case("objc_instancetype", LangOpts.ObjC2) + .Case("objc_modules", LangOpts.ObjC2 && LangOpts.Modules) .Case("objc_nonfragile_abi", LangOpts.ObjCNonFragileABI) .Case("objc_weak_class", LangOpts.ObjCNonFragileABI) .Case("ownership_holds", true) .Case("ownership_returns", true) .Case("ownership_takes", true) - // C1X features - .Case("c_alignas", LangOpts.C1X) - .Case("c_generic_selections", LangOpts.C1X) - .Case("c_static_assert", LangOpts.C1X) - // C++0x features + .Case("objc_bool", true) + .Case("objc_subscripting", LangOpts.ObjCNonFragileABI) + .Case("objc_array_literals", LangOpts.ObjC2) + .Case("objc_dictionary_literals", LangOpts.ObjC2) + .Case("arc_cf_code_audited", true) + // C11 features + .Case("c_alignas", LangOpts.C11) + .Case("c_atomic", LangOpts.C11) + .Case("c_generic_selections", LangOpts.C11) + .Case("c_static_assert", LangOpts.C11) + // C++11 features .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus0x) .Case("cxx_alias_templates", LangOpts.CPlusPlus0x) .Case("cxx_alignas", LangOpts.CPlusPlus0x) + .Case("cxx_atomic", LangOpts.CPlusPlus0x) .Case("cxx_attributes", LangOpts.CPlusPlus0x) .Case("cxx_auto_type", LangOpts.CPlusPlus0x) - //.Case("cxx_constexpr", false); + .Case("cxx_constexpr", LangOpts.CPlusPlus0x) .Case("cxx_decltype", LangOpts.CPlusPlus0x) + .Case("cxx_decltype_incomplete_return_types", LangOpts.CPlusPlus0x) .Case("cxx_default_function_template_args", LangOpts.CPlusPlus0x) + .Case("cxx_defaulted_functions", LangOpts.CPlusPlus0x) .Case("cxx_delegating_constructors", LangOpts.CPlusPlus0x) .Case("cxx_deleted_functions", LangOpts.CPlusPlus0x) .Case("cxx_explicit_conversions", LangOpts.CPlusPlus0x) - //.Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x) + .Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x) .Case("cxx_implicit_moves", LangOpts.CPlusPlus0x) //.Case("cxx_inheriting_constructors", false) .Case("cxx_inline_namespaces", LangOpts.CPlusPlus0x) - //.Case("cxx_lambdas", false) + .Case("cxx_lambdas", LangOpts.CPlusPlus0x) + .Case("cxx_local_type_template_args", LangOpts.CPlusPlus0x) .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus0x) .Case("cxx_noexcept", LangOpts.CPlusPlus0x) .Case("cxx_nullptr", LangOpts.CPlusPlus0x) .Case("cxx_override_control", LangOpts.CPlusPlus0x) .Case("cxx_range_for", LangOpts.CPlusPlus0x) - //.Case("cxx_raw_string_literals", false) + .Case("cxx_raw_string_literals", LangOpts.CPlusPlus0x) .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus0x) .Case("cxx_rvalue_references", LangOpts.CPlusPlus0x) .Case("cxx_strong_enums", LangOpts.CPlusPlus0x) .Case("cxx_static_assert", LangOpts.CPlusPlus0x) .Case("cxx_trailing_return", LangOpts.CPlusPlus0x) - //.Case("cxx_unicode_literals", false) - //.Case("cxx_unrestricted_unions", false) - //.Case("cxx_user_literals", false) + .Case("cxx_unicode_literals", LangOpts.CPlusPlus0x) + .Case("cxx_unrestricted_unions", LangOpts.CPlusPlus0x) + .Case("cxx_user_literals", LangOpts.CPlusPlus0x) .Case("cxx_variadic_templates", LangOpts.CPlusPlus0x) // Type traits .Case("has_nothrow_assign", LangOpts.CPlusPlus) @@ -668,6 +698,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { PP.getIdentifierInfo("__is_empty")->getTokenID() != tok::identifier) .Case("is_enum", LangOpts.CPlusPlus) + .Case("is_final", LangOpts.CPlusPlus) .Case("is_literal", LangOpts.CPlusPlus) .Case("is_standard_layout", LangOpts.CPlusPlus) // __is_pod is available only if the horrible @@ -680,8 +711,11 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { != tok::identifier) .Case("is_polymorphic", LangOpts.CPlusPlus) .Case("is_trivial", LangOpts.CPlusPlus) + .Case("is_trivially_assignable", LangOpts.CPlusPlus) + .Case("is_trivially_constructible", LangOpts.CPlusPlus) .Case("is_trivially_copyable", LangOpts.CPlusPlus) .Case("is_union", LangOpts.CPlusPlus) + .Case("modules", LangOpts.Modules) .Case("tls", PP.getTargetInfo().isTLSSupported()) .Case("underlying_type", LangOpts.CPlusPlus) .Default(false); @@ -700,19 +734,28 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) { DiagnosticsEngine::Ext_Error) return false; - const LangOptions &LangOpts = PP.getLangOptions(); + const LangOptions &LangOpts = PP.getLangOpts(); + StringRef Extension = II->getName(); + + // Normalize the extension name, __foo__ becomes foo. + if (Extension.startswith("__") && Extension.endswith("__") && + Extension.size() >= 4) + Extension = Extension.substr(2, Extension.size() - 4); // Because we inherit the feature list from HasFeature, this string switch // must be less restrictive than HasFeature's. - return llvm::StringSwitch<bool>(II->getName()) - // C1X features supported by other languages as extensions. + return llvm::StringSwitch<bool>(Extension) + // C11 features supported by other languages as extensions. .Case("c_alignas", true) + .Case("c_atomic", true) .Case("c_generic_selections", true) .Case("c_static_assert", true) // C++0x features supported by other languages as extensions. + .Case("cxx_atomic", LangOpts.CPlusPlus) .Case("cxx_deleted_functions", LangOpts.CPlusPlus) .Case("cxx_explicit_conversions", LangOpts.CPlusPlus) .Case("cxx_inline_namespaces", LangOpts.CPlusPlus) + .Case("cxx_local_type_template_args", LangOpts.CPlusPlus) .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus) .Case("cxx_override_control", LangOpts.CPlusPlus) .Case("cxx_range_for", LangOpts.CPlusPlus) @@ -724,7 +767,12 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) { /// HasAttribute - Return true if we recognize and implement the attribute /// specified by the given identifier. static bool HasAttribute(const IdentifierInfo *II) { - return llvm::StringSwitch<bool>(II->getName()) + StringRef Name = II->getName(); + // Normalize the attribute name, __foo__ becomes foo. + if (Name.startswith("__") && Name.endswith("__") && Name.size() >= 4) + Name = Name.substr(2, Name.size() - 4); + + return llvm::StringSwitch<bool>(Name) #include "clang/Lex/AttrSpellings.inc" .Default(false); } @@ -753,7 +801,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, PP.getCurrentLexer()->LexIncludeFilename(Tok); // Reserve a buffer to get the spelling. - llvm::SmallString<128> FilenameBuffer; + SmallString<128> FilenameBuffer; StringRef Filename; SourceLocation EndLoc; @@ -784,6 +832,16 @@ static bool EvaluateHasIncludeCommon(Token &Tok, return false; } + // Get ')'. + PP.LexNonComment(Tok); + + // Ensure we have a trailing ). + if (Tok.isNot(tok::r_paren)) { + PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName(); + PP.Diag(LParenLoc, diag::note_matching) << "("; + return false; + } + bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); // If GetIncludeFilenameSpelling set the start ptr to null, there was an // error. @@ -795,20 +853,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, const FileEntry *File = PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL); - // Get the result value. Result = true means the file exists. - bool Result = File != 0; - - // Get ')'. - PP.LexNonComment(Tok); - - // Ensure we have a trailing ). - if (Tok.isNot(tok::r_paren)) { - PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName(); - PP.Diag(LParenLoc, diag::note_matching) << "("; - return false; - } - - return Result; + // Get the result value. A result of true means the file exists. + return File != 0; } /// EvaluateHasInclude - Process a '__has_include("path")' expression. @@ -855,7 +901,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { ++NumBuiltinMacroExpanded; - llvm::SmallString<128> TmpBuffer; + SmallString<128> TmpBuffer; llvm::raw_svector_ostream OS(TmpBuffer); // Set up the return result. @@ -902,7 +948,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } // Escape this filename. Turn '\' -> '\\' '"' -> '\"' - llvm::SmallString<128> FN; + SmallString<128> FN; if (PLoc.isValid()) { FN += PLoc.getFilename(); Lexer::Stringify(FN); @@ -1010,7 +1056,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } OS << (int)Value; - Tok.setKind(tok::numeric_constant); + if (IsValid) + Tok.setKind(tok::numeric_constant); } else if (II == Ident__has_include || II == Ident__has_include_next) { // The argument to these two builtins should be a parenthesized @@ -1049,6 +1096,9 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // from macro expansion. SmallVector<Token, 4> StrToks; while (Tok.is(tok::string_literal)) { + // Complain about, and drop, any ud-suffix. + if (Tok.hasUDSuffix()) + Diag(Tok, diag::err_invalid_string_udl); StrToks.push_back(Tok); LexUnexpandedToken(Tok); } diff --git a/contrib/llvm/tools/clang/lib/Lex/PTHLexer.cpp b/contrib/llvm/tools/clang/lib/Lex/PTHLexer.cpp index e0c4cf0..f104f96 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PTHLexer.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PTHLexer.cpp @@ -438,7 +438,7 @@ static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) { PTHManager *PTHManager::Create(const std::string &file, DiagnosticsEngine &Diags) { // Memory map the PTH file. - llvm::OwningPtr<llvm::MemoryBuffer> File; + OwningPtr<llvm::MemoryBuffer> File; if (llvm::MemoryBuffer::getFile(file, File)) { // FIXME: Add ec.message() to this diag. @@ -488,7 +488,7 @@ PTHManager *PTHManager::Create(const std::string &file, return 0; // FIXME: Proper error diagnostic? } - llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); + OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); // Warn if the PTH file is empty. We still want to create a PTHManager // as the PTH could be used with -include-pth. @@ -514,7 +514,7 @@ PTHManager *PTHManager::Create(const std::string &file, return 0; } - llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable, + OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable, BufBeg)); // Get the location of the spelling cache. diff --git a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp index f6532c2..e2a192b 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp @@ -115,10 +115,61 @@ void Preprocessor::HandlePragmaDirective(unsigned Introducer) { DiscardUntilEndOfDirective(); } +namespace { +/// \brief Helper class for \see Preprocessor::Handle_Pragma. +class LexingFor_PragmaRAII { + Preprocessor &PP; + bool InMacroArgPreExpansion; + bool Failed; + Token &OutTok; + Token PragmaTok; + +public: + LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion, + Token &Tok) + : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), + Failed(false), OutTok(Tok) { + if (InMacroArgPreExpansion) { + PragmaTok = OutTok; + PP.EnableBacktrackAtThisPos(); + } + } + + ~LexingFor_PragmaRAII() { + if (InMacroArgPreExpansion) { + if (Failed) { + PP.CommitBacktrackedTokens(); + } else { + PP.Backtrack(); + OutTok = PragmaTok; + } + } + } + + void failed() { + Failed = true; + } +}; +} + /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then /// return the first token after the directive. The _Pragma token has just /// been read into 'Tok'. void Preprocessor::Handle_Pragma(Token &Tok) { + + // This works differently if we are pre-expanding a macro argument. + // In that case we don't actually "activate" the pragma now, we only lex it + // until we are sure it is lexically correct and then we backtrack so that + // we activate the pragma whenever we encounter the tokens again in the token + // stream. This ensures that we will activate it in the correct location + // or that we will ignore it if it never enters the token stream, e.g: + // + // #define EMPTY(x) + // #define INACTIVE(x) EMPTY(x) + // INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\"")) + + LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok); + // Remember the pragma token location. SourceLocation PragmaLoc = Tok.getLocation(); @@ -126,27 +177,45 @@ void Preprocessor::Handle_Pragma(Token &Tok) { Lex(Tok); if (Tok.isNot(tok::l_paren)) { Diag(PragmaLoc, diag::err__Pragma_malformed); - return; + return _PragmaLexing.failed(); } // Read the '"..."'. Lex(Tok); if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) { Diag(PragmaLoc, diag::err__Pragma_malformed); - return; + // Skip this token, and the ')', if present. + if (Tok.isNot(tok::r_paren)) + Lex(Tok); + if (Tok.is(tok::r_paren)) + Lex(Tok); + return _PragmaLexing.failed(); + } + + if (Tok.hasUDSuffix()) { + Diag(Tok, diag::err_invalid_string_udl); + // Skip this token, and the ')', if present. + Lex(Tok); + if (Tok.is(tok::r_paren)) + Lex(Tok); + return _PragmaLexing.failed(); } // Remember the string. - std::string StrVal = getSpelling(Tok); + Token StrTok = Tok; // Read the ')'. Lex(Tok); if (Tok.isNot(tok::r_paren)) { Diag(PragmaLoc, diag::err__Pragma_malformed); - return; + return _PragmaLexing.failed(); } + if (InMacroArgPreExpansion) + return; + SourceLocation RParenLoc = Tok.getLocation(); + std::string StrVal = getSpelling(StrTok); // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1: // "The string literal is destringized by deleting the L prefix, if present, @@ -304,6 +373,8 @@ void Preprocessor::HandlePragmaPoison(Token &PoisonTok) { // Finally, poison it! II->setIsPoisoned(); + if (II->isFromAST()) + II->setChangedSinceDeserialization(); } } @@ -351,7 +422,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { return; // Reserve a buffer to get the spelling. - llvm::SmallString<128> FilenameBuffer; + SmallString<128> FilenameBuffer; bool Invalid = false; StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid); if (Invalid) @@ -440,6 +511,8 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { // "foo " "bar" "Baz" SmallVector<Token, 4> StrToks; while (Tok.is(tok::string_literal)) { + if (Tok.hasUDSuffix()) + Diag(Tok, diag::err_invalid_string_udl); StrToks.push_back(Tok); Lex(Tok); } @@ -516,6 +589,8 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) { // "foo " "bar" "Baz" SmallVector<Token, 4> StrToks; while (Tok.is(tok::string_literal)) { + if (Tok.hasUDSuffix()) + Diag(Tok, diag::err_invalid_string_udl); StrToks.push_back(Tok); Lex(Tok); } @@ -575,6 +650,11 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) { return 0; } + if (Tok.hasUDSuffix()) { + Diag(Tok, diag::err_invalid_string_udl); + return 0; + } + // Remember the macro string. std::string StrVal = getSpelling(Tok); @@ -661,6 +741,111 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { } } +void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) { + // We will either get a quoted filename or a bracketed filename, and we + // have to track which we got. The first filename is the source name, + // and the second name is the mapped filename. If the first is quoted, + // the second must be as well (cannot mix and match quotes and brackets). + + // Get the open paren + Lex(Tok); + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::warn_pragma_include_alias_expected) << "("; + return; + } + + // We expect either a quoted string literal, or a bracketed name + Token SourceFilenameTok; + CurPPLexer->LexIncludeFilename(SourceFilenameTok); + if (SourceFilenameTok.is(tok::eod)) { + // The diagnostic has already been handled + return; + } + + StringRef SourceFileName; + SmallString<128> FileNameBuffer; + if (SourceFilenameTok.is(tok::string_literal) || + SourceFilenameTok.is(tok::angle_string_literal)) { + SourceFileName = getSpelling(SourceFilenameTok, FileNameBuffer); + } else if (SourceFilenameTok.is(tok::less)) { + // This could be a path instead of just a name + FileNameBuffer.push_back('<'); + SourceLocation End; + if (ConcatenateIncludeName(FileNameBuffer, End)) + return; // Diagnostic already emitted + SourceFileName = FileNameBuffer.str(); + } else { + Diag(Tok, diag::warn_pragma_include_alias_expected_filename); + return; + } + FileNameBuffer.clear(); + + // Now we expect a comma, followed by another include name + Lex(Tok); + if (Tok.isNot(tok::comma)) { + Diag(Tok, diag::warn_pragma_include_alias_expected) << ","; + return; + } + + Token ReplaceFilenameTok; + CurPPLexer->LexIncludeFilename(ReplaceFilenameTok); + if (ReplaceFilenameTok.is(tok::eod)) { + // The diagnostic has already been handled + return; + } + + StringRef ReplaceFileName; + if (ReplaceFilenameTok.is(tok::string_literal) || + ReplaceFilenameTok.is(tok::angle_string_literal)) { + ReplaceFileName = getSpelling(ReplaceFilenameTok, FileNameBuffer); + } else if (ReplaceFilenameTok.is(tok::less)) { + // This could be a path instead of just a name + FileNameBuffer.push_back('<'); + SourceLocation End; + if (ConcatenateIncludeName(FileNameBuffer, End)) + return; // Diagnostic already emitted + ReplaceFileName = FileNameBuffer.str(); + } else { + Diag(Tok, diag::warn_pragma_include_alias_expected_filename); + return; + } + + // Finally, we expect the closing paren + Lex(Tok); + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::warn_pragma_include_alias_expected) << ")"; + return; + } + + // Now that we have the source and target filenames, we need to make sure + // they're both of the same type (angled vs non-angled) + StringRef OriginalSource = SourceFileName; + + bool SourceIsAngled = + GetIncludeFilenameSpelling(SourceFilenameTok.getLocation(), + SourceFileName); + bool ReplaceIsAngled = + GetIncludeFilenameSpelling(ReplaceFilenameTok.getLocation(), + ReplaceFileName); + if (!SourceFileName.empty() && !ReplaceFileName.empty() && + (SourceIsAngled != ReplaceIsAngled)) { + unsigned int DiagID; + if (SourceIsAngled) + DiagID = diag::warn_pragma_include_alias_mismatch_angle; + else + DiagID = diag::warn_pragma_include_alias_mismatch_quote; + + Diag(SourceFilenameTok.getLocation(), DiagID) + << SourceFileName + << ReplaceFileName; + + return; + } + + // Now we can let the include handler know about this mapping + getHeaderSearchInfo().AddIncludeAlias(OriginalSource, ReplaceFileName); +} + /// AddPragmaHandler - Add the specified pragma handler to the preprocessor. /// If 'Namespace' is non-null, then it is a token required to exist on the /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". @@ -712,8 +897,10 @@ void Preprocessor::RemovePragmaHandler(StringRef Namespace, // If this is a non-default namespace and it is now empty, remove // it. - if (NS != PragmaHandlers && NS->IsEmpty()) + if (NS != PragmaHandlers && NS->IsEmpty()) { PragmaHandlers->RemovePragmaHandler(NS); + delete NS; + } } bool Preprocessor::LexOnOffSwitch(tok::OnOffSwitch &Result) { @@ -939,6 +1126,15 @@ struct PragmaCommentHandler : public PragmaHandler { } }; +/// PragmaIncludeAliasHandler - "#pragma include_alias("...")". +struct PragmaIncludeAliasHandler : public PragmaHandler { + PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {} + virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &IncludeAliasTok) { + PP.HandlePragmaIncludeAlias(IncludeAliasTok); + } +}; + /// PragmaMessageHandler - "#pragma message("...")". struct PragmaMessageHandler : public PragmaHandler { PragmaMessageHandler() : PragmaHandler("message") {} @@ -1089,7 +1285,8 @@ void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler("STDC", new PragmaSTDC_UnknownHandler()); // MS extensions. - if (Features.MicrosoftExt) { + if (LangOpts.MicrosoftExt) { AddPragmaHandler(new PragmaCommentHandler()); + AddPragmaHandler(new PragmaIncludeAliasHandler()); } } diff --git a/contrib/llvm/tools/clang/lib/Lex/PreprocessingRecord.cpp b/contrib/llvm/tools/clang/lib/Lex/PreprocessingRecord.cpp index 2816609..89d19fd 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PreprocessingRecord.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PreprocessingRecord.cpp @@ -38,10 +38,13 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, } PreprocessingRecord::PreprocessingRecord(SourceManager &SM, - bool IncludeNestedMacroExpansions) - : SourceMgr(SM), IncludeNestedMacroExpansions(IncludeNestedMacroExpansions), + bool RecordConditionalDirectives) + : SourceMgr(SM), + RecordCondDirectives(RecordConditionalDirectives), CondDirectiveNextIdx(0), ExternalSource(0) { + if (RecordCondDirectives) + CondDirectiveStack.push_back(CondDirectiveNextIdx++); } /// \brief Returns a pair of [Begin, End) iterators of preprocessed entities @@ -49,35 +52,108 @@ PreprocessingRecord::PreprocessingRecord(SourceManager &SM, std::pair<PreprocessingRecord::iterator, PreprocessingRecord::iterator> PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) { if (Range.isInvalid()) - return std::make_pair(iterator(this, 0), iterator(this, 0)); - assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin())); + return std::make_pair(iterator(), iterator()); + + if (CachedRangeQuery.Range == Range) { + return std::make_pair(iterator(this, CachedRangeQuery.Result.first), + iterator(this, CachedRangeQuery.Result.second)); + } + + std::pair<PPEntityID, PPEntityID> + Res = getPreprocessedEntitiesInRangeSlow(Range); + + CachedRangeQuery.Range = Range; + CachedRangeQuery.Result = Res; + + return std::make_pair(iterator(this, Res.first), iterator(this, Res.second)); +} + +static bool isPreprocessedEntityIfInFileID(PreprocessedEntity *PPE, FileID FID, + SourceManager &SM) { + assert(!FID.isInvalid()); + if (!PPE) + return false; + + SourceLocation Loc = PPE->getSourceRange().getBegin(); + if (Loc.isInvalid()) + return false; + + if (SM.isInFileID(SM.getFileLoc(Loc), FID)) + return true; + else + return false; +} + +/// \brief Returns true if the preprocessed entity that \arg PPEI iterator +/// points to is coming from the file \arg FID. +/// +/// Can be used to avoid implicit deserializations of preallocated +/// preprocessed entities if we only care about entities of a specific file +/// and not from files #included in the range given at +/// \see getPreprocessedEntitiesInRange. +bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { + if (FID.isInvalid()) + return false; + + PPEntityID PPID = PPEI.Position; + if (PPID < 0) { + assert(unsigned(-PPID-1) < LoadedPreprocessedEntities.size() && + "Out-of bounds loaded preprocessed entity"); + assert(ExternalSource && "No external source to load from"); + unsigned LoadedIndex = LoadedPreprocessedEntities.size()+PPID; + if (PreprocessedEntity *PPE = LoadedPreprocessedEntities[LoadedIndex]) + return isPreprocessedEntityIfInFileID(PPE, FID, SourceMgr); + + // See if the external source can see if the entity is in the file without + // deserializing it. + llvm::Optional<bool> + IsInFile = ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID); + if (IsInFile.hasValue()) + return IsInFile.getValue(); + + // The external source did not provide a definite answer, go and deserialize + // the entity to check it. + return isPreprocessedEntityIfInFileID( + getLoadedPreprocessedEntity(LoadedIndex), + FID, SourceMgr); + } + + assert(unsigned(PPID) < PreprocessedEntities.size() && + "Out-of bounds local preprocessed entity"); + return isPreprocessedEntityIfInFileID(PreprocessedEntities[PPID], + FID, SourceMgr); +} +/// \brief Returns a pair of [Begin, End) iterators of preprocessed entities +/// that source range \arg R encompasses. +std::pair<PreprocessingRecord::PPEntityID, PreprocessingRecord::PPEntityID> +PreprocessingRecord::getPreprocessedEntitiesInRangeSlow(SourceRange Range) { + assert(Range.isValid()); + assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin())); + std::pair<unsigned, unsigned> Local = findLocalPreprocessedEntitiesInRange(Range); - + // Check if range spans local entities. if (!ExternalSource || SourceMgr.isLocalSourceLocation(Range.getBegin())) - return std::make_pair(iterator(this, Local.first), - iterator(this, Local.second)); - + return std::make_pair(Local.first, Local.second); + std::pair<unsigned, unsigned> Loaded = ExternalSource->findPreprocessedEntitiesInRange(Range); - + // Check if range spans local entities. if (Loaded.first == Loaded.second) - return std::make_pair(iterator(this, Local.first), - iterator(this, Local.second)); - + return std::make_pair(Local.first, Local.second); + unsigned TotalLoaded = LoadedPreprocessedEntities.size(); - + // Check if range spans loaded entities. if (Local.first == Local.second) - return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded), - iterator(this, int(Loaded.second)-TotalLoaded)); - + return std::make_pair(int(Loaded.first)-TotalLoaded, + int(Loaded.second)-TotalLoaded); + // Range spands loaded and local entities. - return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded), - iterator(this, Local.second)); + return std::make_pair(int(Loaded.first)-TotalLoaded, Local.second); } std::pair<unsigned, unsigned> @@ -168,33 +244,58 @@ unsigned PreprocessingRecord::findEndLocalPreprocessedEntity( return I - PreprocessedEntities.begin(); } -void PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { +PreprocessingRecord::PPEntityID +PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { assert(Entity); SourceLocation BeginLoc = Entity->getSourceRange().getBegin(); - + + if (!isa<class InclusionDirective>(Entity)) { + assert((PreprocessedEntities.empty() || + !SourceMgr.isBeforeInTranslationUnit(BeginLoc, + PreprocessedEntities.back()->getSourceRange().getBegin())) && + "a macro directive was encountered out-of-order"); + PreprocessedEntities.push_back(Entity); + return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false); + } + // Check normal case, this entity begin location is after the previous one. if (PreprocessedEntities.empty() || !SourceMgr.isBeforeInTranslationUnit(BeginLoc, PreprocessedEntities.back()->getSourceRange().getBegin())) { PreprocessedEntities.push_back(Entity); - return; + return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false); } - // The entity's location is not after the previous one; this can happen rarely - // e.g. with "#include MACRO". - // Iterate the entities vector in reverse until we find the right place to - // insert the new entity. - for (std::vector<PreprocessedEntity *>::iterator - RI = PreprocessedEntities.end(), Begin = PreprocessedEntities.begin(); - RI != Begin; --RI) { - std::vector<PreprocessedEntity *>::iterator I = RI; + // The entity's location is not after the previous one; this can happen with + // include directives that form the filename using macros, e.g: + // "#include MACRO(STUFF)". + + typedef std::vector<PreprocessedEntity *>::iterator pp_iter; + + // Usually there are few macro expansions when defining the filename, do a + // linear search for a few entities. + unsigned count = 0; + for (pp_iter RI = PreprocessedEntities.end(), + Begin = PreprocessedEntities.begin(); + RI != Begin && count < 4; --RI, ++count) { + pp_iter I = RI; --I; if (!SourceMgr.isBeforeInTranslationUnit(BeginLoc, (*I)->getSourceRange().getBegin())) { - PreprocessedEntities.insert(RI, Entity); - return; + pp_iter insertI = PreprocessedEntities.insert(RI, Entity); + return getPPEntityID(insertI - PreprocessedEntities.begin(), + /*isLoaded=*/false); } } + + // Linear search unsuccessful. Do a binary search. + pp_iter I = std::upper_bound(PreprocessedEntities.begin(), + PreprocessedEntities.end(), + BeginLoc, + PPEntityComp<&SourceRange::getBegin>(SourceMgr)); + pp_iter insertI = PreprocessedEntities.insert(I, Entity); + return getPPEntityID(insertI - PreprocessedEntities.begin(), + /*isLoaded=*/false); } void PreprocessingRecord::SetExternalSource( @@ -258,7 +359,8 @@ MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) { void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI, SourceRange Range) { - if (!IncludeNestedMacroExpansions && Id.getLocation().isMacroID()) + // We don't record nested macro expansions. + if (Id.getLocation().isMacroID()) return; if (MI->isBuiltinMacro()) @@ -274,17 +376,12 @@ void PreprocessingRecord::MacroDefined(const Token &Id, SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc()); MacroDefinition *Def = new (*this) MacroDefinition(Id.getIdentifierInfo(), R); - addPreprocessedEntity(Def); - MacroDefinitions[MI] = getPPEntityID(PreprocessedEntities.size()-1, - /*isLoaded=*/false); + MacroDefinitions[MI] = addPreprocessedEntity(Def); } void PreprocessingRecord::MacroUndefined(const Token &Id, const MacroInfo *MI) { - llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos - = MacroDefinitions.find(MI); - if (Pos != MacroDefinitions.end()) - MacroDefinitions.erase(Pos); + MacroDefinitions.erase(MI); } void PreprocessingRecord::InclusionDirective( @@ -317,7 +414,6 @@ void PreprocessingRecord::InclusionDirective( default: llvm_unreachable("Unknown include directive kind"); - return; } clang::InclusionDirective *ID @@ -326,6 +422,95 @@ void PreprocessingRecord::InclusionDirective( addPreprocessedEntity(ID); } +bool PreprocessingRecord::rangeIntersectsConditionalDirective( + SourceRange Range) const { + if (Range.isInvalid()) + return false; + + CondDirectiveLocsTy::const_iterator + low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), + Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr)); + if (low == CondDirectiveLocs.end()) + return false; + + if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc())) + return false; + + CondDirectiveLocsTy::const_iterator + upp = std::upper_bound(low, CondDirectiveLocs.end(), + Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr)); + unsigned uppIdx; + if (upp != CondDirectiveLocs.end()) + uppIdx = upp->getIdx(); + else + uppIdx = 0; + + return low->getIdx() != uppIdx; +} + +unsigned PreprocessingRecord::findCondDirectiveIdx(SourceLocation Loc) const { + if (Loc.isInvalid()) + return 0; + + CondDirectiveLocsTy::const_iterator + low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), + Loc, CondDirectiveLoc::Comp(SourceMgr)); + if (low == CondDirectiveLocs.end()) + return 0; + return low->getIdx(); +} + +void PreprocessingRecord::addCondDirectiveLoc(CondDirectiveLoc DirLoc) { + // Ignore directives in system headers. + if (SourceMgr.isInSystemHeader(DirLoc.getLoc())) + return; + + assert(CondDirectiveLocs.empty() || + SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(), + DirLoc.getLoc())); + CondDirectiveLocs.push_back(DirLoc); +} + +void PreprocessingRecord::If(SourceLocation Loc, SourceRange ConditionRange) { + if (RecordCondDirectives) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.push_back(CondDirectiveNextIdx++); + } +} + +void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok) { + if (RecordCondDirectives) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.push_back(CondDirectiveNextIdx++); + } +} + +void PreprocessingRecord::Ifndef(SourceLocation Loc,const Token &MacroNameTok) { + if (RecordCondDirectives) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + CondDirectiveStack.push_back(CondDirectiveNextIdx++); + } +} + +void PreprocessingRecord::Elif(SourceLocation Loc, SourceRange ConditionRange, + SourceLocation IfLoc) { + if (RecordCondDirectives) + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +} + +void PreprocessingRecord::Else(SourceLocation Loc, SourceLocation IfLoc) { + if (RecordCondDirectives) + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +} + +void PreprocessingRecord::Endif(SourceLocation Loc, SourceLocation IfLoc) { + if (RecordCondDirectives) { + addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); + assert(!CondDirectiveStack.empty()); + CondDirectiveStack.pop_back(); + } +} + size_t PreprocessingRecord::getTotalMemory() const { return BumpAlloc.getTotalMemory() + llvm::capacity_in_bytes(MacroDefinitions) diff --git a/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp b/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp index 31662ad..06e5685 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp @@ -40,7 +40,7 @@ #include "clang/Basic/FileManager.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/APFloat.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Capacity.h" @@ -54,18 +54,19 @@ Preprocessor::Preprocessor(DiagnosticsEngine &diags, LangOptions &opts, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup* IILookup, bool OwnsHeaders, - bool DelayInitialization) - : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()), + bool DelayInitialization, + bool IncrProcessing) + : Diags(&diags), LangOpts(opts), Target(target),FileMgr(Headers.getFileMgr()), SourceMgr(SM), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), - ExternalSource(0), - Identifiers(opts, IILookup), CodeComplete(0), + ExternalSource(0), Identifiers(opts, IILookup), + IncrementalProcessing(IncrProcessing), CodeComplete(0), CodeCompletionFile(0), CodeCompletionOffset(0), CodeCompletionReached(0), SkipMainFilePreamble(0, true), CurPPLexer(0), CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), MacroArgCache(0), Record(0), MIChainHead(0), MICache(0) { OwnsHeaderSearch = OwnsHeaders; - + if (!DelayInitialization) { assert(Target && "Must provide target information for PP initialization"); Initialize(*Target); @@ -74,9 +75,6 @@ Preprocessor::Preprocessor(DiagnosticsEngine &diags, LangOptions &opts, Preprocessor::~Preprocessor() { assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); - assert(((MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty()) || - isCodeCompletionReached()) && - "Preprocessor::HandleEndOfTokenLexer should have cleared those"); while (!IncludeMacroStack.empty()) { delete IncludeMacroStack.back().TheLexer; @@ -133,11 +131,11 @@ void Preprocessor::Initialize(const TargetInfo &Target) { KeepComments = false; KeepMacroComments = false; SuppressIncludeNotFoundError = false; - AutoModuleImport = false; // Macro expansion is enabled. DisableMacroExpansion = false; InMacroArgs = false; + InMacroArgPreExpansion = false; NumCachedTokenLexers = 0; CachedLexPos = 0; @@ -157,7 +155,7 @@ void Preprocessor::Initialize(const TargetInfo &Target) { // Initialize builtin macros like __LINE__ and friends. RegisterBuiltinMacros(); - if(Features.Borland) { + if(LangOpts.Borland) { Ident__exception_info = getIdentifierInfo("_exception_info"); Ident___exception_info = getIdentifierInfo("__exception_info"); Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); @@ -171,7 +169,9 @@ void Preprocessor::Initialize(const TargetInfo &Target) { Ident__exception_info = Ident__exception_code = Ident__abnormal_termination = 0; Ident___exception_info = Ident___exception_code = Ident___abnormal_termination = 0; Ident_GetExceptionInfo = Ident_GetExceptionCode = Ident_AbnormalTermination = 0; - } + } + + HeaderInfo.setTarget(Target); } void Preprocessor::setPTHManager(PTHManager* pm) { @@ -270,6 +270,17 @@ Preprocessor::macro_end(bool IncludeExternalMacros) const { return Macros.end(); } +void Preprocessor::recomputeCurLexerKind() { + if (CurLexer) + CurLexerKind = CLK_Lexer; + else if (CurPTHLexer) + CurLexerKind = CLK_PTHLexer; + else if (CurTokenLexer) + CurLexerKind = CLK_TokenLexer; + else + CurLexerKind = CLK_CachingLexer; +} + bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, unsigned CompleteLine, unsigned CompleteColumn) { @@ -372,7 +383,12 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, Tok.setLiteralData(DestPtr); } - +Module *Preprocessor::getCurrentModule() { + if (getLangOpts().CurrentModule.empty()) + return 0; + + return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); +} //===----------------------------------------------------------------------===// // Preprocessor Initialization Methods @@ -388,19 +404,23 @@ void Preprocessor::EnterMainSourceFile() { assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); FileID MainFileID = SourceMgr.getMainFileID(); - // Enter the main file source buffer. - EnterSourceFile(MainFileID, 0, SourceLocation()); - - // If we've been asked to skip bytes in the main file (e.g., as part of a - // precompiled preamble), do so now. - if (SkipMainFilePreamble.first > 0) - CurLexer->SkipBytes(SkipMainFilePreamble.first, - SkipMainFilePreamble.second); + // If MainFileID is loaded it means we loaded an AST file, no need to enter + // a main file. + if (!SourceMgr.isLoadedFileID(MainFileID)) { + // Enter the main file source buffer. + EnterSourceFile(MainFileID, 0, SourceLocation()); - // Tell the header info that the main file was entered. If the file is later - // #imported, it won't be re-entered. - if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) - HeaderInfo.IncrementIncludeCount(FE); + // If we've been asked to skip bytes in the main file (e.g., as part of a + // precompiled preamble), do so now. + if (SkipMainFilePreamble.first > 0) + CurLexer->SkipBytes(SkipMainFilePreamble.first, + SkipMainFilePreamble.second); + + // Tell the header info that the main file was entered. If the file is later + // #imported, it won't be re-entered. + if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) + HeaderInfo.IncrementIncludeCount(FE); + } // Preprocess Predefines to populate the initial preprocessor state. llvm::MemoryBuffer *SB = @@ -437,7 +457,7 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { Identifier.getLength())); } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. - llvm::SmallString<64> IdentifierBuffer; + SmallString<64> IdentifierBuffer; StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); II = getIdentifierInfo(CleanedStr); } @@ -492,6 +512,13 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { IdentifierInfo &II = *Identifier.getIdentifierInfo(); + // If the information about this identifier is out of date, update it from + // the external source. + if (II.isOutOfDate()) { + ExternalSource->updateOutOfDateIdentifier(II); + Identifier.setKind(II.getTokenID()); + } + // If this identifier was poisoned, and if it was not produced from a macro // expansion, emit an error. if (II.isPoisoned() && CurPPLexer) { @@ -500,8 +527,10 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { // If this is a macro to be expanded, do it. if (MacroInfo *MI = getMacroInfo(&II)) { - if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) { - if (MI->isEnabled()) { + if (!DisableMacroExpansion) { + if (Identifier.isExpandDisabled()) { + Diag(Identifier, diag::pp_disabled_macro_expansion); + } else if (MI->isEnabled()) { if (!HandleMacroExpandedIdentifier(Identifier, MI)) return; } else { @@ -509,6 +538,7 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { // expanded, even if it's in a context where it could be expanded in the // future. Identifier.setFlag(Token::DisableExpand); + Diag(Identifier, diag::pp_disabled_macro_expansion); } } } @@ -537,43 +567,59 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { if (II.isExtensionToken() && !DisableMacroExpansion) Diag(Identifier, diag::ext_token_used); - // If this is the '__import_module__' keyword, note that the next token - // indicates a module name. - if (II.getTokenID() == tok::kw___import_module__ && - !InMacroArgs && !DisableMacroExpansion) { + // If this is the '__experimental_modules_import' contextual keyword, note + // that the next token indicates a module name. + // + // Note that we do not treat '__experimental_modules_import' as a contextual + // keyword when we're in a caching lexer, because caching lexers only get + // used in contexts where import declarations are disallowed. + if (II.isModulesImport() && !InMacroArgs && !DisableMacroExpansion && + getLangOpts().Modules && CurLexerKind != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); + ModuleImportPath.clear(); + ModuleImportExpectsIdentifier = true; CurLexerKind = CLK_LexAfterModuleImport; } } -/// \brief Lex a token following the __import_module__ keyword. +/// \brief Lex a token following the 'import' contextual keyword. +/// void Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. - if (CurLexer) - CurLexerKind = CLK_Lexer; - else if (CurPTHLexer) - CurLexerKind = CLK_PTHLexer; - else if (CurTokenLexer) - CurLexerKind = CLK_TokenLexer; - else - CurLexerKind = CLK_CachingLexer; + recomputeCurLexerKind(); // Lex the next token. Lex(Result); // The token sequence // - // __import_module__ identifier + // import identifier (. identifier)* // - // indicates a module import directive. We already saw the __import_module__ - // keyword, so now we're looking for the identifier. - if (Result.getKind() != tok::identifier) + // indicates a module import directive. We already saw the 'import' + // contextual keyword, so now we're looking for the identifiers. + if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { + // We expected to see an identifier here, and we did; continue handling + // identifiers. + ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), + Result.getLocation())); + ModuleImportExpectsIdentifier = false; + CurLexerKind = CLK_LexAfterModuleImport; return; + } - // Load the module. - (void)TheModuleLoader.loadModule(ModuleImportLoc, - *Result.getIdentifierInfo(), - Result.getLocation()); + // If we're expecting a '.' or a ';', and we got a '.', then wait until we + // see the next identifier. + if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { + ModuleImportExpectsIdentifier = true; + CurLexerKind = CLK_LexAfterModuleImport; + return; + } + + // If we have a non-empty module path, load the named module. + if (!ModuleImportPath.empty()) + (void)TheModuleLoader.loadModule(ModuleImportLoc, ModuleImportPath, + Module::MacrosVisible, + /*IsIncludeDirective=*/false); } void Preprocessor::AddCommentHandler(CommentHandler *Handler) { @@ -610,12 +656,11 @@ CommentHandler::~CommentHandler() { } CodeCompletionHandler::~CodeCompletionHandler() { } -void Preprocessor::createPreprocessingRecord( - bool IncludeNestedMacroExpansions) { +void Preprocessor::createPreprocessingRecord(bool RecordConditionalDirectives) { if (Record) return; Record = new PreprocessingRecord(getSourceManager(), - IncludeNestedMacroExpansions); + RecordConditionalDirectives); addPPCallbacks(Record); } diff --git a/contrib/llvm/tools/clang/lib/Lex/PreprocessorLexer.cpp b/contrib/llvm/tools/clang/lib/Lex/PreprocessorLexer.cpp index 0da9ef5..a72bbca 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PreprocessorLexer.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PreprocessorLexer.cpp @@ -17,6 +17,8 @@ #include "clang/Basic/SourceManager.h" using namespace clang; +void PreprocessorLexer::anchor() { } + PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) : PP(pp), FID(fid), InitialNumSLocEntries(0), ParsingPreprocessorDirective(false), diff --git a/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp b/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp index dc6d686..84a46ed 100644 --- a/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp @@ -45,7 +45,7 @@ static bool IsStringPrefix(StringRef Str, bool CPlusPlus0x) { /// IsIdentifierStringPrefix - Return true if the spelling of the token /// is literally 'L', 'u', 'U', or 'u8'. Including raw versions. bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const { - const LangOptions &LangOpts = PP.getLangOptions(); + const LangOptions &LangOpts = PP.getLangOpts(); if (!Tok.needsCleaning()) { if (Tok.getLength() < 1 || Tok.getLength() > 3) @@ -85,6 +85,19 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { TokenInfo[tok::hash ] |= aci_custom_firstchar; TokenInfo[tok::arrow ] |= aci_custom_firstchar; + // These tokens have custom code in C++11 mode. + if (PP.getLangOpts().CPlusPlus0x) { + TokenInfo[tok::string_literal ] |= aci_custom; + TokenInfo[tok::wide_string_literal ] |= aci_custom; + TokenInfo[tok::utf8_string_literal ] |= aci_custom; + TokenInfo[tok::utf16_string_literal] |= aci_custom; + TokenInfo[tok::utf32_string_literal] |= aci_custom; + TokenInfo[tok::char_constant ] |= aci_custom; + TokenInfo[tok::wide_char_constant ] |= aci_custom; + TokenInfo[tok::utf16_char_constant ] |= aci_custom; + TokenInfo[tok::utf32_char_constant ] |= aci_custom; + } + // These tokens change behavior if followed by an '='. TokenInfo[tok::amp ] |= aci_avoid_equal; // &= TokenInfo[tok::plus ] |= aci_avoid_equal; // += @@ -179,12 +192,32 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, switch (PrevKind) { default: llvm_unreachable("InitAvoidConcatTokenInfo built wrong"); - return true; case tok::raw_identifier: llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); - return true; + case tok::string_literal: + case tok::wide_string_literal: + case tok::utf8_string_literal: + case tok::utf16_string_literal: + case tok::utf32_string_literal: + case tok::char_constant: + case tok::wide_char_constant: + case tok::utf16_char_constant: + case tok::utf32_char_constant: + if (!PP.getLangOpts().CPlusPlus0x) + return false; + + // In C++11, a string or character literal followed by an identifier is a + // single token. + if (Tok.getIdentifierInfo()) + return true; + + // A ud-suffix is an identifier. If the previous token ends with one, treat + // it as an identifier. + if (!PrevTok.hasUDSuffix()) + return false; + // FALL THROUGH. case tok::identifier: // id+id or id+number or id+L"foo". // id+'.'... will not append. if (Tok.is(tok::numeric_constant)) @@ -203,13 +236,15 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, // Otherwise, this is a narrow character or string. If the *identifier* // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo". return IsIdentifierStringPrefix(PrevTok); + case tok::numeric_constant: return isalnum(FirstChar) || Tok.is(tok::numeric_constant) || - FirstChar == '+' || FirstChar == '-' || FirstChar == '.'; + FirstChar == '+' || FirstChar == '-' || FirstChar == '.' || + (PP.getLangOpts().CPlusPlus0x && FirstChar == '_'); case tok::period: // ..., .*, .1234 return (FirstChar == '.' && PrevPrevTok.is(tok::period)) || isdigit(FirstChar) || - (PP.getLangOptions().CPlusPlus && FirstChar == '*'); + (PP.getLangOpts().CPlusPlus && FirstChar == '*'); case tok::amp: // && return FirstChar == '&'; case tok::plus: // ++ @@ -228,10 +263,10 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, return FirstChar == '>' || FirstChar == ':'; case tok::colon: // ::, :> return FirstChar == '>' || - (PP.getLangOptions().CPlusPlus && FirstChar == ':'); + (PP.getLangOpts().CPlusPlus && FirstChar == ':'); case tok::hash: // ##, #@, %:%: return FirstChar == '#' || FirstChar == '@' || FirstChar == '%'; case tok::arrow: // ->* - return PP.getLangOptions().CPlusPlus && FirstChar == '*'; + return PP.getLangOpts().CPlusPlus && FirstChar == '*'; } } diff --git a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp index a580544..696754c 100644 --- a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp @@ -17,7 +17,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/LexDiagnostic.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" using namespace clang; @@ -450,7 +450,7 @@ void TokenLexer::Lex(Token &Tok) { /// are more ## after it, chomp them iteratively. Return the result as Tok. /// If this returns true, the caller should immediately return the token. bool TokenLexer::PasteTokens(Token &Tok) { - llvm::SmallString<128> Buffer; + SmallString<128> Buffer; const char *ResultTokStrPtr = 0; SourceLocation StartLoc = Tok.getLocation(); SourceLocation PasteOpLoc; @@ -527,7 +527,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Make a lexer to lex this string from. Lex just this one token. // Make a lexer object so that we lex and expand the paste result. Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), - PP.getLangOptions(), ScratchBufStart, + PP.getLangOpts(), ScratchBufStart, ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); // Lex a token in raw mode. This way it won't look up identifiers @@ -546,14 +546,14 @@ bool TokenLexer::PasteTokens(Token &Tok) { if (isInvalid) { // Test for the Microsoft extension of /##/ turning into // here on the // error path. - if (PP.getLangOptions().MicrosoftExt && Tok.is(tok::slash) && + if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) && RHS.is(tok::slash)) { HandleMicrosoftCommentPaste(Tok); return true; } // Do not emit the error when preprocessing assembler code. - if (!PP.getLangOptions().AsmPreprocessor) { + if (!PP.getLangOpts().AsmPreprocessor) { // Explicitly convert the token location to have proper expansion // information so that the user knows where it came from. SourceManager &SM = PP.getSourceManager(); @@ -563,7 +563,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { // error to a warning that defaults to an error. This allows // disabling it. PP.Diag(Loc, - PP.getLangOptions().MicrosoftExt ? diag::err_pp_bad_paste_ms + PP.getLangOpts().MicrosoftExt ? diag::err_pp_bad_paste_ms : diag::err_pp_bad_paste) << Buffer.str(); } |