diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen')
66 files changed, 14375 insertions, 4963 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h index a65f270..530a7ef 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h @@ -18,20 +18,25 @@ namespace llvm { class Value; class LLVMContext; class DataLayout; + class Type; } namespace clang { class ASTContext; class TargetInfo; - namespace CodeGen { - class ABIArgInfo; - class Address; - class CGCXXABI; - class CGFunctionInfo; - class CodeGenFunction; - class CodeGenTypes; - } +namespace CodeGen { + class ABIArgInfo; + class Address; + class CGCXXABI; + class CGFunctionInfo; + class CodeGenFunction; + class CodeGenTypes; + class SwiftABIInfo; + +namespace swiftcall { + class SwiftAggLowering; +} // FIXME: All of this stuff should be part of the target interface // somehow. It is currently here because it is not clear how to factor @@ -55,6 +60,8 @@ namespace clang { virtual ~ABIInfo(); + virtual bool supportsSwift() const { return false; } + CodeGen::CGCXXABI &getCXXABI() const; ASTContext &getContext() const; llvm::LLVMContext &getVMContext() const; @@ -85,6 +92,8 @@ namespace clang { CodeGen::Address VAListAddr, QualType Ty) const = 0; + bool isAndroid() const; + /// Emit the target dependent code to load a value of /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr. virtual CodeGen::Address EmitMSVAArg(CodeGen::CodeGenFunction &CGF, @@ -110,7 +119,35 @@ namespace clang { CodeGen::ABIArgInfo getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const; + + + }; + + /// A refining implementation of ABIInfo for targets that support swiftcall. + /// + /// If we find ourselves wanting multiple such refinements, they'll probably + /// be independent refinements, and we should probably find another way + /// to do it than simple inheritance. + class SwiftABIInfo : public ABIInfo { + public: + SwiftABIInfo(CodeGen::CodeGenTypes &cgt) : ABIInfo(cgt) {} + + bool supportsSwift() const final override { return true; } + + virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type*> types, + bool asReturnValue) const = 0; + + virtual bool isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const; + + static bool classof(const ABIInfo *info) { + return info->supportsSwift(); + } }; + +} // end namespace CodeGen } // end namespace clang #endif diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp index 6d746c2..165b6dd 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp @@ -16,19 +16,21 @@ #include "clang/Frontend/Utils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/FunctionInfo.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/TargetRegistry.h" @@ -42,6 +44,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> using namespace clang; @@ -58,9 +61,7 @@ class EmitAssemblyHelper { Timer CodeGenerationTime; - mutable legacy::PassManager *CodeGenPasses; - mutable legacy::PassManager *PerModulePasses; - mutable legacy::FunctionPassManager *PerFunctionPasses; + std::unique_ptr<raw_pwrite_stream> OS; private: TargetIRAnalysis getTargetIRAnalysis() const { @@ -70,70 +71,44 @@ private: return TargetIRAnalysis(); } - legacy::PassManager *getCodeGenPasses() const { - if (!CodeGenPasses) { - CodeGenPasses = new legacy::PassManager(); - CodeGenPasses->add( - createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - } - return CodeGenPasses; - } - - legacy::PassManager *getPerModulePasses() const { - if (!PerModulePasses) { - PerModulePasses = new legacy::PassManager(); - PerModulePasses->add( - createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - } - return PerModulePasses; - } - - legacy::FunctionPassManager *getPerFunctionPasses() const { - if (!PerFunctionPasses) { - PerFunctionPasses = new legacy::FunctionPassManager(TheModule); - PerFunctionPasses->add( - createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - } - return PerFunctionPasses; - } + /// Set LLVM command line options passed through -backend-option. + void setCommandLineOpts(); - void CreatePasses(FunctionInfoIndex *FunctionIndex); + void CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM, + ModuleSummaryIndex *ModuleSummary); /// Generates the TargetMachine. - /// Returns Null if it is unable to create the target machine. + /// Leaves TM unchanged if it is unable to create the target machine. /// Some of our clang tests specify triples which are not built /// into clang. This is okay because these tests check the generated /// IR, and they require DataLayout which depends on the triple. /// In this case, we allow this method to fail and not report an error. /// When MustCreateTM is used, we print an error if we are unable to load /// the requested target. - TargetMachine *CreateTargetMachine(bool MustCreateTM); + void CreateTargetMachine(bool MustCreateTM); /// Add passes necessary to emit assembly or LLVM IR. /// /// \return True on success. - bool AddEmitPasses(BackendAction Action, raw_pwrite_stream &OS); + bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action, + raw_pwrite_stream &OS); public: EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, const LangOptions &LOpts, Module *M) : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts), - TheModule(M), CodeGenerationTime("Code Generation Time"), - CodeGenPasses(nullptr), PerModulePasses(nullptr), - PerFunctionPasses(nullptr) {} + TheModule(M), CodeGenerationTime("Code Generation Time") {} ~EmitAssemblyHelper() { - delete CodeGenPasses; - delete PerModulePasses; - delete PerFunctionPasses; if (CodeGenOpts.DisableFree) BuryPointer(std::move(TM)); } std::unique_ptr<TargetMachine> TM; - void EmitAssembly(BackendAction Action, raw_pwrite_stream *OS); + void EmitAssembly(BackendAction Action, + std::unique_ptr<raw_pwrite_stream> OS); }; // We need this wrapper to access LangOpts and CGOpts from extension functions @@ -172,8 +147,19 @@ static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder, PM.add(createAddDiscriminatorsPass()); } +static void addCleanupPassesForSampleProfiler( + const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { + // instcombine is needed before sample profile annotation because it converts + // certain function calls to be inlinable. simplifycfg and sroa are needed + // before instcombine for necessary preparation. E.g. load store is eliminated + // properly so that instcombine will not introduce unecessary liverange. + PM.add(createCFGSimplificationPass()); + PM.add(createSROAPass()); + PM.add(createInstructionCombiningPass()); +} + static void addBoundsCheckingPass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { + legacy::PassManagerBase &PM) { PM.add(createBoundsCheckingPass()); } @@ -189,6 +175,7 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, Opts.TraceBB = CGOpts.SanitizeCoverageTraceBB; Opts.TraceCmp = CGOpts.SanitizeCoverageTraceCmp; Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters; + Opts.TracePC = CGOpts.SanitizeCoverageTracePC; PM.add(createSanitizerCoverageModulePass(Opts)); } @@ -198,14 +185,17 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, static_cast<const PassManagerBuilderWrapper&>(Builder); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address); - PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/false, Recover)); + bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope; + PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, + UseAfterScope)); PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { - PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/true, - /*Recover*/true)); + PM.add(createAddressSanitizerFunctionPass( + /*CompileKernel*/ true, + /*Recover*/ true, /*UseAfterScope*/ false)); PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true, /*Recover*/true)); } @@ -243,6 +233,19 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles)); } +static void addEfficiencySanitizerPass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + const PassManagerBuilderWrapper &BuilderWrapper = + static_cast<const PassManagerBuilderWrapper&>(Builder); + const LangOptions &LangOpts = BuilderWrapper.getLangOpts(); + EfficiencySanitizerOptions Opts; + if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyCacheFrag)) + Opts.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; + else if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyWorkingSet)) + Opts.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; + PM.add(createEfficiencySanitizerPass(Opts)); +} + static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); @@ -277,7 +280,9 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts, MPM->add(createRewriteSymbolsPass(DL)); } -void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { +void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, + legacy::FunctionPassManager &FPM, + ModuleSummaryIndex *ModuleSummary) { if (CodeGenOpts.DisableLLVMPasses) return; @@ -300,7 +305,8 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { switch (Inlining) { case CodeGenOptions::NoInlining: break; - case CodeGenOptions::NormalInlining: { + case CodeGenOptions::NormalInlining: + case CodeGenOptions::OnlyHintInlining: { PMBuilder.Inliner = createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize); break; @@ -321,22 +327,28 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; - PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; + PMBuilder.PrepareForThinLTO = CodeGenOpts.EmitSummaryIndex; PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; - legacy::PassManager *MPM = getPerModulePasses(); - // If we are performing a ThinLTO importing compile, invoke the LTO - // pipeline and pass down the in-memory function index. - if (FunctionIndex) { - PMBuilder.FunctionIndex = FunctionIndex; - PMBuilder.populateLTOPassManager(*MPM); + // pipeline and pass down the in-memory module summary index. + if (ModuleSummary) { + PMBuilder.ModuleSummary = ModuleSummary; + PMBuilder.populateThinLTOPassManager(MPM); return; } + // Add target-specific passes that need to run as early as possible. + if (TM) + PMBuilder.addExtension( + PassManagerBuilder::EP_EarlyAsPossible, + [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + TM->addEarlyAsPossiblePasses(PM); + }); + PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, addAddDiscriminatorsPass); @@ -401,15 +413,20 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { addDataFlowSanitizerPass); } + if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) { + PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, + addEfficiencySanitizerPass); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addEfficiencySanitizerPass); + } + // Set up the per-function pass manager. - legacy::FunctionPassManager *FPM = getPerFunctionPasses(); if (CodeGenOpts.VerifyModule) - FPM->add(createVerifierPass()); - PMBuilder.populateFunctionPassManager(*FPM); + FPM.add(createVerifierPass()); // Set up the per-module pass manager. if (!CodeGenOpts.RewriteMapFiles.empty()) - addSymbolRewriterPass(CodeGenOpts, MPM); + addSymbolRewriterPass(CodeGenOpts, &MPM); if (!CodeGenOpts.DisableGCov && (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) { @@ -424,25 +441,56 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData; Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody; - MPM->add(createGCOVProfilerPass(Options)); - if (CodeGenOpts.getDebugInfo() == CodeGenOptions::NoDebugInfo) - MPM->add(createStripSymbolsPass(true)); + MPM.add(createGCOVProfilerPass(Options)); + if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo) + MPM.add(createStripSymbolsPass(true)); } - if (CodeGenOpts.ProfileInstrGenerate) { + if (CodeGenOpts.hasProfileClangInstr()) { InstrProfOptions Options; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; - MPM->add(createInstrProfilingPass(Options)); + MPM.add(createInstrProfilingLegacyPass(Options)); + } + if (CodeGenOpts.hasProfileIRInstr()) { + if (!CodeGenOpts.InstrProfileOutput.empty()) + PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; + else + PMBuilder.PGOInstrGen = "default.profraw"; + } + if (CodeGenOpts.hasProfileIRUse()) + PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; + + if (!CodeGenOpts.SampleProfileFile.empty()) { + MPM.add(createPruneEHPass()); + MPM.add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile)); + PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, + addCleanupPassesForSampleProfiler); } - if (!CodeGenOpts.SampleProfileFile.empty()) - MPM->add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile)); + PMBuilder.populateFunctionPassManager(FPM); + PMBuilder.populateModulePassManager(MPM); +} - PMBuilder.populateModulePassManager(*MPM); +void EmitAssemblyHelper::setCommandLineOpts() { + SmallVector<const char *, 16> BackendArgs; + BackendArgs.push_back("clang"); // Fake program name. + if (!CodeGenOpts.DebugPass.empty()) { + BackendArgs.push_back("-debug-pass"); + BackendArgs.push_back(CodeGenOpts.DebugPass.c_str()); + } + if (!CodeGenOpts.LimitFloatPrecision.empty()) { + BackendArgs.push_back("-limit-float-precision"); + BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); + } + for (const std::string &BackendOption : CodeGenOpts.BackendOptions) + BackendArgs.push_back(BackendOption.c_str()); + BackendArgs.push_back(nullptr); + llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, + BackendArgs.data()); } -TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { +void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { // Create the TargetMachine for generating code. std::string Error; std::string Triple = TheModule->getTargetTriple(); @@ -450,7 +498,7 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { if (!TheTarget) { if (MustCreateTM) Diags.Report(diag::err_fe_unable_to_create_target) << Error; - return nullptr; + return; } unsigned CodeModel = @@ -464,27 +512,11 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { assert(CodeModel != ~0u && "invalid code model!"); llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel); - SmallVector<const char *, 16> BackendArgs; - BackendArgs.push_back("clang"); // Fake program name. - if (!CodeGenOpts.DebugPass.empty()) { - BackendArgs.push_back("-debug-pass"); - BackendArgs.push_back(CodeGenOpts.DebugPass.c_str()); - } - if (!CodeGenOpts.LimitFloatPrecision.empty()) { - BackendArgs.push_back("-limit-float-precision"); - BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); - } - for (const std::string &BackendOption : CodeGenOpts.BackendOptions) - BackendArgs.push_back(BackendOption.c_str()); - BackendArgs.push_back(nullptr); - llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, - BackendArgs.data()); - std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp. - llvm::Reloc::Model RM = llvm::Reloc::Default; + llvm::Optional<llvm::Reloc::Model> RM; if (CodeGenOpts.RelocationModel == "static") { RM = llvm::Reloc::Static; } else if (CodeGenOpts.RelocationModel == "pic") { @@ -539,38 +571,29 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { Options.UseInitArray = CodeGenOpts.UseInitArray; Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; Options.CompressDebugSections = CodeGenOpts.CompressDebugSections; + Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; // Set EABI version. - Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(CodeGenOpts.EABIVersion) + Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion) .Case("4", llvm::EABI::EABI4) .Case("5", llvm::EABI::EABI5) .Case("gnu", llvm::EABI::GNU) .Default(llvm::EABI::Default); + if (LangOpts.SjLjExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::SjLj; + Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD; Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; - Options.PositionIndependentExecutable = LangOpts.PIELevel != 0; Options.FunctionSections = CodeGenOpts.FunctionSections; Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; - switch (CodeGenOpts.getDebuggerTuning()) { - case CodeGenOptions::DebuggerKindGDB: - Options.DebuggerTuning = llvm::DebuggerKind::GDB; - break; - case CodeGenOptions::DebuggerKindLLDB: - Options.DebuggerTuning = llvm::DebuggerKind::LLDB; - break; - case CodeGenOptions::DebuggerKindSCE: - Options.DebuggerTuning = llvm::DebuggerKind::SCE; - break; - default: - break; - } + Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; @@ -582,24 +605,18 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; Options.MCOptions.ABIName = TargetOpts.ABI; - TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU, - FeaturesStr, Options, - RM, CM, OptLevel); - - return TM; + TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, + Options, RM, CM, OptLevel)); } -bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, +bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, + BackendAction Action, raw_pwrite_stream &OS) { - - // Create the code generator passes. - legacy::PassManager *PM = getCodeGenPasses(); - // Add LibraryInfo. llvm::Triple TargetTriple(TheModule->getTargetTriple()); std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); - PM->add(new TargetLibraryInfoWrapperPass(*TLII)); + CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII)); // Normal mode, emit a .s or .o file by running the code generator. Note, // this also adds codegenerator level optimization passes. @@ -615,9 +632,9 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, // "codegen" passes so that it isn't run multiple times when there is // inlining happening. if (CodeGenOpts.OptimizationLevel > 0) - PM->add(createObjCARCContractPass()); + CodeGenPasses.add(createObjCARCContractPass()); - if (TM->addPassesToEmitFile(*PM, OS, CGFT, + if (TM->addPassesToEmitFile(CodeGenPasses, OS, CGFT, /*DisableVerify=*/!CodeGenOpts.VerifyModule)) { Diags.Report(diag::err_fe_unable_to_interface_with_target); return false; @@ -627,14 +644,15 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, } void EmitAssemblyHelper::EmitAssembly(BackendAction Action, - raw_pwrite_stream *OS) { + std::unique_ptr<raw_pwrite_stream> OS) { TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); + setCommandLineOpts(); + bool UsesCodeGen = (Action != Backend_EmitNothing && Action != Backend_EmitBC && Action != Backend_EmitLL); - if (!TM) - TM.reset(CreateTargetMachine(UsesCodeGen)); + CreateTargetMachine(UsesCodeGen); if (UsesCodeGen && !TM) return; @@ -644,41 +662,54 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, // If we are performing a ThinLTO importing compile, load the function // index into memory and pass it into CreatePasses, which will add it // to the PassManagerBuilder and invoke LTO passes. - std::unique_ptr<FunctionInfoIndex> FunctionIndex; + std::unique_ptr<ModuleSummaryIndex> ModuleSummary; if (!CodeGenOpts.ThinLTOIndexFile.empty()) { - ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr = - llvm::getFunctionIndexForFile(CodeGenOpts.ThinLTOIndexFile, - [&](const DiagnosticInfo &DI) { - TheModule->getContext().diagnose(DI); - }); + ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr = + llvm::getModuleSummaryIndexForFile( + CodeGenOpts.ThinLTOIndexFile, [&](const DiagnosticInfo &DI) { + TheModule->getContext().diagnose(DI); + }); if (std::error_code EC = IndexOrErr.getError()) { std::string Error = EC.message(); errs() << "Error loading index file '" << CodeGenOpts.ThinLTOIndexFile << "': " << Error << "\n"; return; } - FunctionIndex = std::move(IndexOrErr.get()); - assert(FunctionIndex && "Expected non-empty function index"); + ModuleSummary = std::move(IndexOrErr.get()); + assert(ModuleSummary && "Expected non-empty module summary index"); } - CreatePasses(FunctionIndex.get()); + legacy::PassManager PerModulePasses; + PerModulePasses.add( + createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + + legacy::FunctionPassManager PerFunctionPasses(TheModule); + PerFunctionPasses.add( + createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + + CreatePasses(PerModulePasses, PerFunctionPasses, ModuleSummary.get()); + + legacy::PassManager CodeGenPasses; + CodeGenPasses.add( + createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - getPerModulePasses()->add(createBitcodeWriterPass( - *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitFunctionSummary)); + PerModulePasses.add(createBitcodeWriterPass( + *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex, + CodeGenOpts.EmitSummaryIndex)); break; case Backend_EmitLL: - getPerModulePasses()->add( + PerModulePasses.add( createPrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists)); break; default: - if (!AddEmitPasses(Action, *OS)) + if (!AddEmitPasses(CodeGenPasses, Action, *OS)) return; } @@ -688,46 +719,165 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, // Run passes. For now we do all passes at once, but eventually we // would like to have the option of streaming code generation. - if (PerFunctionPasses) { + { PrettyStackTraceString CrashInfo("Per-function optimization"); - PerFunctionPasses->doInitialization(); + PerFunctionPasses.doInitialization(); for (Function &F : *TheModule) if (!F.isDeclaration()) - PerFunctionPasses->run(F); - PerFunctionPasses->doFinalization(); + PerFunctionPasses.run(F); + PerFunctionPasses.doFinalization(); } - if (PerModulePasses) { + { PrettyStackTraceString CrashInfo("Per-module optimization passes"); - PerModulePasses->run(*TheModule); + PerModulePasses.run(*TheModule); } - if (CodeGenPasses) { + { PrettyStackTraceString CrashInfo("Code generation"); - CodeGenPasses->run(*TheModule); + CodeGenPasses.run(*TheModule); } } void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, StringRef TDesc, + const LangOptions &LOpts, const llvm::DataLayout &TDesc, Module *M, BackendAction Action, - raw_pwrite_stream *OS) { + std::unique_ptr<raw_pwrite_stream> OS) { EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M); - AsmHelper.EmitAssembly(Action, OS); + AsmHelper.EmitAssembly(Action, std::move(OS)); - // If an optional clang TargetInfo description string was passed in, use it to - // verify the LLVM TargetMachine's DataLayout. - if (AsmHelper.TM && !TDesc.empty()) { + // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's + // DataLayout. + if (AsmHelper.TM) { std::string DLDesc = M->getDataLayout().getStringRepresentation(); - if (DLDesc != TDesc) { + if (DLDesc != TDesc.getStringRepresentation()) { unsigned DiagID = Diags.getCustomDiagID( DiagnosticsEngine::Error, "backend data layout '%0' does not match " "expected target description '%1'"); - Diags.Report(DiagID) << DLDesc << TDesc; + Diags.Report(DiagID) << DLDesc << TDesc.getStringRepresentation(); + } + } +} + +static const char* getSectionNameForBitcode(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__bitcode"; + case Triple::COFF: + case Triple::ELF: + case Triple::UnknownObjectFormat: + return ".llvmbc"; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +static const char* getSectionNameForCommandline(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__cmdline"; + case Triple::COFF: + case Triple::ELF: + case Triple::UnknownObjectFormat: + return ".llvmcmd"; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +// With -fembed-bitcode, save a copy of the llvm IR as data in the +// __LLVM,__bitcode section. +void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, + llvm::MemoryBufferRef Buf) { + if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off) + return; + + // Save llvm.compiler.used and remote it. + SmallVector<Constant*, 2> UsedArray; + SmallSet<GlobalValue*, 4> UsedGlobals; + Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0); + GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true); + for (auto *GV : UsedGlobals) { + if (GV->getName() != "llvm.embedded.module" && + GV->getName() != "llvm.cmdline") + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + } + if (Used) + Used->eraseFromParent(); + + // Embed the bitcode for the llvm module. + std::string Data; + ArrayRef<uint8_t> ModuleData; + Triple T(M->getTargetTriple()); + // Create a constant that contains the bitcode. + // In case of embedding a marker, ignore the input Buf and use the empty + // ArrayRef. It is also legal to create a bitcode marker even Buf is empty. + if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) { + if (!isBitcode((const unsigned char *)Buf.getBufferStart(), + (const unsigned char *)Buf.getBufferEnd())) { + // If the input is LLVM Assembly, bitcode is produced by serializing + // the module. Use-lists order need to be perserved in this case. + llvm::raw_string_ostream OS(Data); + llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); + ModuleData = + ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size()); + } else + // If the input is LLVM bitcode, write the input byte stream directly. + ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(), + Buf.getBufferSize()); + } + llvm::Constant *ModuleConstant = + llvm::ConstantDataArray::get(M->getContext(), ModuleData); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, + ModuleConstant); + GV->setSection(getSectionNameForBitcode(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = + M->getGlobalVariable("llvm.embedded.module", true)) { + assert(Old->hasOneUse() && + "llvm.embedded.module can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.embedded.module"); + } + + // Skip if only bitcode needs to be embedded. + if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) { + // Embed command-line options. + ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CGOpts.CmdArgs.data()), + CGOpts.CmdArgs.size()); + llvm::Constant *CmdConstant = + llvm::ConstantDataArray::get(M->getContext(), CmdData); + GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true, + llvm::GlobalValue::PrivateLinkage, + CmdConstant); + GV->setSection(getSectionNameForCommandline(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = + M->getGlobalVariable("llvm.cmdline", true)) { + assert(Old->hasOneUse() && + "llvm.cmdline can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.cmdline"); } } + + if (UsedArray.empty()) + return; + + // Recreate llvm.compiler.used. + ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); + auto *NewUsed = new GlobalVariable( + *M, ATy, false, llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); + NewUsed->setSection("llvm.metadata"); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp index 24de30b..7b747c1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp @@ -79,7 +79,7 @@ namespace { auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment()); AtomicSizeInBits = C.toBits( C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1) - .RoundUpToAlignment(lvalue.getAlignment())); + .alignTo(lvalue.getAlignment())); auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldPointer()); auto OffsetInChars = (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) * @@ -221,11 +221,13 @@ namespace { /// \param IsWeak true if atomic operation is weak, false otherwise. /// \returns Pair of values: previous value from storage (value type) and /// boolean flag (i1 type) with true if success and false otherwise. - std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange( - RValue Expected, RValue Desired, - llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, - llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, - bool IsWeak = false); + std::pair<RValue, llvm::Value *> + EmitAtomicCompareExchange(RValue Expected, RValue Desired, + llvm::AtomicOrdering Success = + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering Failure = + llvm::AtomicOrdering::SequentiallyConsistent, + bool IsWeak = false); /// \brief Emits atomic update. /// \param AO Atomic ordering. @@ -241,11 +243,6 @@ namespace { /// Materialize an atomic r-value in atomic-layout memory. Address materializeRValue(RValue rvalue) const; - /// \brief Translates LLVM atomic ordering to GNU atomic ordering for - /// libcalls. - static AtomicExpr::AtomicOrderingKind - translateAtomicOrdering(const llvm::AtomicOrdering AO); - /// \brief Creates temp alloca for intermediate operations on atomic value. Address CreateTempAlloca() const; private: @@ -260,13 +257,17 @@ namespace { /// \brief Emits atomic compare-and-exchange op as a libcall. llvm::Value *EmitAtomicCompareExchangeLibcall( llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr, - llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, - llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent); + llvm::AtomicOrdering Success = + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering Failure = + llvm::AtomicOrdering::SequentiallyConsistent); /// \brief Emits atomic compare-and-exchange op as LLVM instruction. std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp( llvm::Value *ExpectedVal, llvm::Value *DesiredVal, - llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, - llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, + llvm::AtomicOrdering Success = + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering Failure = + llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak = false); /// \brief Emit atomic update as libcalls. void @@ -286,25 +287,6 @@ namespace { }; } -AtomicExpr::AtomicOrderingKind -AtomicInfo::translateAtomicOrdering(const llvm::AtomicOrdering AO) { - switch (AO) { - case llvm::Unordered: - case llvm::NotAtomic: - case llvm::Monotonic: - return AtomicExpr::AO_ABI_memory_order_relaxed; - case llvm::Acquire: - return AtomicExpr::AO_ABI_memory_order_acquire; - case llvm::Release: - return AtomicExpr::AO_ABI_memory_order_release; - case llvm::AcquireRelease: - return AtomicExpr::AO_ABI_memory_order_acq_rel; - case llvm::SequentiallyConsistent: - return AtomicExpr::AO_ABI_memory_order_seq_cst; - } - llvm_unreachable("Unhandled AtomicOrdering"); -} - Address AtomicInfo::CreateTempAlloca() const { Address TempAlloca = CGF.CreateMemTemp( (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy @@ -323,8 +305,7 @@ static RValue emitAtomicLibcall(CodeGenFunction &CGF, QualType resultType, CallArgList &args) { const CGFunctionInfo &fnInfo = - CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args, - FunctionType::ExtInfo(), RequiredArgs::All); + CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args); llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args); @@ -422,33 +403,39 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, /// instructions to cope with the provided (but possibly only dynamically known) /// FailureOrder. static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, - bool IsWeak, Address Dest, - Address Ptr, Address Val1, - Address Val2, + bool IsWeak, Address Dest, Address Ptr, + Address Val1, Address Val2, llvm::Value *FailureOrderVal, uint64_t Size, llvm::AtomicOrdering SuccessOrder) { llvm::AtomicOrdering FailureOrder; if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) { - switch (FO->getSExtValue()) { - default: - FailureOrder = llvm::Monotonic; - break; - case AtomicExpr::AO_ABI_memory_order_consume: - case AtomicExpr::AO_ABI_memory_order_acquire: - FailureOrder = llvm::Acquire; - break; - case AtomicExpr::AO_ABI_memory_order_seq_cst: - FailureOrder = llvm::SequentiallyConsistent; - break; - } - if (FailureOrder >= SuccessOrder) { - // Don't assert on undefined behaviour. + auto FOS = FO->getSExtValue(); + if (!llvm::isValidAtomicOrderingCABI(FOS)) + FailureOrder = llvm::AtomicOrdering::Monotonic; + else + switch ((llvm::AtomicOrderingCABI)FOS) { + case llvm::AtomicOrderingCABI::relaxed: + case llvm::AtomicOrderingCABI::release: + case llvm::AtomicOrderingCABI::acq_rel: + FailureOrder = llvm::AtomicOrdering::Monotonic; + break; + case llvm::AtomicOrderingCABI::consume: + case llvm::AtomicOrderingCABI::acquire: + FailureOrder = llvm::AtomicOrdering::Acquire; + break; + case llvm::AtomicOrderingCABI::seq_cst: + FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent; + break; + } + if (isStrongerThan(FailureOrder, SuccessOrder)) { + // Don't assert on undefined behavior "failure argument shall be no + // stronger than the success argument". FailureOrder = - llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); + llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); } - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, - SuccessOrder, FailureOrder); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, + FailureOrder); return; } @@ -456,9 +443,10 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr, *SeqCstBB = nullptr; MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn); - if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release) + if (SuccessOrder != llvm::AtomicOrdering::Monotonic && + SuccessOrder != llvm::AtomicOrdering::Release) AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn); - if (SuccessOrder == llvm::SequentiallyConsistent) + if (SuccessOrder == llvm::AtomicOrdering::SequentiallyConsistent) SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn); llvm::BasicBlock *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn); @@ -472,25 +460,25 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, // doesn't fold to a constant for the ordering. CGF.Builder.SetInsertPoint(MonotonicBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::Monotonic); + Size, SuccessOrder, llvm::AtomicOrdering::Monotonic); CGF.Builder.CreateBr(ContBB); if (AcquireBB) { CGF.Builder.SetInsertPoint(AcquireBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::Acquire); + Size, SuccessOrder, llvm::AtomicOrdering::Acquire); CGF.Builder.CreateBr(ContBB); - SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume), + SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); - SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire), + SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire), AcquireBB); } if (SeqCstBB) { CGF.Builder.SetInsertPoint(SeqCstBB); - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::SequentiallyConsistent); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, + llvm::AtomicOrdering::SequentiallyConsistent); CGF.Builder.CreateBr(ContBB); - SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst), + SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); } @@ -1037,40 +1025,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { E->getOp() == AtomicExpr::AO__atomic_load_n; if (isa<llvm::ConstantInt>(Order)) { - int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); - switch (ord) { - case AtomicExpr::AO_ABI_memory_order_relaxed: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Monotonic); - break; - case AtomicExpr::AO_ABI_memory_order_consume: - case AtomicExpr::AO_ABI_memory_order_acquire: - if (IsStore) - break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Acquire); - break; - case AtomicExpr::AO_ABI_memory_order_release: - if (IsLoad) - break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Release); - break; - case AtomicExpr::AO_ABI_memory_order_acq_rel: - if (IsLoad || IsStore) - break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AcquireRelease); - break; - case AtomicExpr::AO_ABI_memory_order_seq_cst: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::SequentiallyConsistent); - break; - default: // invalid order - // We should not ever get here normally, but it's hard to - // enforce that in general. - break; - } + auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); + // We should not ever get to a case where the ordering isn't a valid C ABI + // value, but it's hard to enforce that in general. + if (llvm::isValidAtomicOrderingCABI(ord)) + switch ((llvm::AtomicOrderingCABI)ord) { + case llvm::AtomicOrderingCABI::relaxed: + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Monotonic); + break; + case llvm::AtomicOrderingCABI::consume: + case llvm::AtomicOrderingCABI::acquire: + if (IsStore) + break; // Avoid crashing on code with undefined behavior + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrderingCABI::release: + if (IsLoad) + break; // Avoid crashing on code with undefined behavior + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrderingCABI::acq_rel: + if (IsLoad || IsStore) + break; // Avoid crashing on code with undefined behavior + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::AcquireRelease); + break; + case llvm::AtomicOrderingCABI::seq_cst: + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::SequentiallyConsistent); + break; + } if (RValTy->isVoidType()) return RValue::get(nullptr); @@ -1105,39 +1092,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Monotonic); + Size, llvm::AtomicOrdering::Monotonic); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Acquire); + Size, llvm::AtomicOrdering::Acquire); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire), AcquireBB); } if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Release); + Size, llvm::AtomicOrdering::Release); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_release), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); } if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AcquireRelease); + Size, llvm::AtomicOrdering::AcquireRelease); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acq_rel), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::SequentiallyConsistent); + Size, llvm::AtomicOrdering::SequentiallyConsistent); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); // Cleanup and return @@ -1257,9 +1244,9 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, CGF.getContext().VoidPtrTy); Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)), CGF.getContext().VoidPtrTy); - Args.add(RValue::get( - llvm::ConstantInt::get(CGF.IntTy, translateAtomicOrdering(AO))), - CGF.getContext().IntTy); + Args.add( + RValue::get(llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(AO))), + CGF.getContext().IntTy); emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args); } @@ -1287,28 +1274,21 @@ bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) { bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType()); // An atomic is inline if we don't need to use a libcall. bool AtomicIsInline = !AI.shouldUseLibcall(); + // MSVC doesn't seem to do this for types wider than a pointer. + if (getContext().getTypeSize(LV.getType()) > + getContext().getTypeSize(getContext().getIntPtrType())) + return false; return IsVolatile && AtomicIsInline; } -/// An type is a candidate for having its loads and stores be made atomic if -/// we are operating under /volatile:ms *and* we know the access is volatile and -/// performing such an operation can be performed without a libcall. -bool CodeGenFunction::typeIsSuitableForInlineAtomic(QualType Ty, - bool IsVolatile) const { - // An atomic is inline if we don't need to use a libcall (e.g. it is builtin). - bool AtomicIsInline = getContext().getTargetInfo().hasBuiltinAtomic( - getContext().getTypeSize(Ty), getContext().getTypeAlign(Ty)); - return CGM.getCodeGenOpts().MSVolatile && IsVolatile && AtomicIsInline; -} - RValue CodeGenFunction::EmitAtomicLoad(LValue LV, SourceLocation SL, AggValueSlot Slot) { llvm::AtomicOrdering AO; bool IsVolatile = LV.isVolatileQualified(); if (LV.getType()->isAtomicType()) { - AO = llvm::SequentiallyConsistent; + AO = llvm::AtomicOrdering::SequentiallyConsistent; } else { - AO = llvm::Acquire; + AO = llvm::AtomicOrdering::Acquire; IsVolatile = true; } return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot); @@ -1462,11 +1442,11 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr, CGF.getContext().VoidPtrTy); Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)), CGF.getContext().VoidPtrTy); - Args.add(RValue::get(llvm::ConstantInt::get( - CGF.IntTy, translateAtomicOrdering(Success))), + Args.add(RValue::get( + llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Success))), CGF.getContext().IntTy); - Args.add(RValue::get(llvm::ConstantInt::get( - CGF.IntTy, translateAtomicOrdering(Failure))), + Args.add(RValue::get( + llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Failure))), CGF.getContext().IntTy); auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange", CGF.getContext().BoolTy, Args); @@ -1477,8 +1457,9 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr, std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange( RValue Expected, RValue Desired, llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) { - if (Failure >= Success) - // Don't assert on undefined behavior. + if (isStrongerThan(Failure, Success)) + // Don't assert on undefined behavior "failure argument shall be no stronger + // than the success argument". Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success); // Check whether we should use a library call. @@ -1727,9 +1708,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue, bool IsVolatile = lvalue.isVolatileQualified(); llvm::AtomicOrdering AO; if (lvalue.getType()->isAtomicType()) { - AO = llvm::SequentiallyConsistent; + AO = llvm::AtomicOrdering::SequentiallyConsistent; } else { - AO = llvm::Release; + AO = llvm::AtomicOrdering::Release; IsVolatile = true; } return EmitAtomicStore(rvalue, lvalue, AO, IsVolatile, isInit); @@ -1772,9 +1753,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, getContext().VoidPtrTy); args.add(RValue::get(EmitCastToVoidPtr(srcAddr.getPointer())), getContext().VoidPtrTy); - args.add(RValue::get(llvm::ConstantInt::get( - IntTy, AtomicInfo::translateAtomicOrdering(AO))), - getContext().IntTy); + args.add( + RValue::get(llvm::ConstantInt::get(IntTy, (int)llvm::toCABI(AO))), + getContext().IntTy); emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); return; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp index ba2941e..e3658ab 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp @@ -125,10 +125,15 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, llvm::Constant *init = llvm::ConstantStruct::getAnon(elements); + unsigned AddrSpace = 0; + if (C.getLangOpts().OpenCL) + AddrSpace = C.getTargetAddressSpace(LangAS::opencl_constant); llvm::GlobalVariable *global = new llvm::GlobalVariable(CGM.getModule(), init->getType(), true, llvm::GlobalValue::InternalLinkage, - init, "__block_descriptor_tmp"); + init, "__block_descriptor_tmp", nullptr, + llvm::GlobalValue::NotThreadLocal, + AddrSpace); return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType()); } @@ -262,6 +267,11 @@ static bool isSafeForCXXConstantCapture(QualType type) { static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, CodeGenFunction *CGF, const VarDecl *var) { + // Return if this is a function paramter. We shouldn't try to + // rematerialize default arguments of function parameters. + if (isa<ParmVarDecl>(var)) + return nullptr; + QualType type = var->getType(); // We can only do this if the variable is const. @@ -508,7 +518,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, // At this point, we just have to add padding if the end align still // isn't aligned right. if (endAlign < maxFieldAlign) { - CharUnits newBlockSize = blockSize.RoundUpToAlignment(maxFieldAlign); + CharUnits newBlockSize = blockSize.alignTo(maxFieldAlign); CharUnits padding = newBlockSize - blockSize; // If we haven't yet added any fields, remember that there was an @@ -775,35 +785,34 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Compute the address of the thing we're going to move into the // block literal. Address src = Address::invalid(); - if (BlockInfo && CI.isNested()) { - // We need to use the capture from the enclosing block. - const CGBlockInfo::Capture &enclosingCapture = - BlockInfo->getCapture(variable); - - // This is a [[type]]*, except that a byref entry wil just be an i8**. - src = Builder.CreateStructGEP(LoadBlockStruct(), - enclosingCapture.getIndex(), - enclosingCapture.getOffset(), - "block.capture.addr"); - } else if (blockDecl->isConversionFromLambda()) { + + if (blockDecl->isConversionFromLambda()) { // The lambda capture in a lambda's conversion-to-block-pointer is // special; we'll simply emit it directly. src = Address::invalid(); - } else { - // Just look it up in the locals map, which will give us back a - // [[type]]*. If that doesn't work, do the more elaborate DRE - // emission. - auto it = LocalDeclMap.find(variable); - if (it != LocalDeclMap.end()) { - src = it->second; + } else if (CI.isByRef()) { + if (BlockInfo && CI.isNested()) { + // We need to use the capture from the enclosing block. + const CGBlockInfo::Capture &enclosingCapture = + BlockInfo->getCapture(variable); + + // This is a [[type]]*, except that a byref entry wil just be an i8**. + src = Builder.CreateStructGEP(LoadBlockStruct(), + enclosingCapture.getIndex(), + enclosingCapture.getOffset(), + "block.capture.addr"); } else { - DeclRefExpr declRef( - const_cast<VarDecl *>(variable), - /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type, - VK_LValue, SourceLocation()); - src = EmitDeclRefLValue(&declRef).getAddress(); + auto I = LocalDeclMap.find(variable); + assert(I != LocalDeclMap.end()); + src = I->second; } - } + } else { + DeclRefExpr declRef(const_cast<VarDecl *>(variable), + /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), + type.getNonReferenceType(), VK_LValue, + SourceLocation()); + src = EmitDeclRefLValue(&declRef).getAddress(); + }; // For byrefs, we just write the pointer to the byref struct into // the block field. There's no need to chase the forwarding @@ -837,8 +846,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // If it's a reference variable, copy the reference into the block field. } else if (type->isReferenceType()) { - llvm::Value *ref = Builder.CreateLoad(src, "ref.val"); - Builder.CreateStore(ref, blockField); + Builder.CreateStore(src.getPointer(), blockField); // If this is an ARC __strong block-pointer variable, don't do a // block copy. @@ -924,7 +932,10 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() { UnsignedLongTy, UnsignedLongTy, nullptr); // Now form a pointer to that. - BlockDescriptorType = llvm::PointerType::getUnqual(BlockDescriptorType); + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant); + BlockDescriptorType = llvm::PointerType::get(BlockDescriptorType, AddrSpace); return BlockDescriptorType; } @@ -1109,8 +1120,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, } if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum, localAddr, Builder); @@ -1174,9 +1185,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // Create the function declaration. const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType(); - const CGFunctionInfo &fnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration( - fnType->getReturnType(), args, fnType->getExtInfo(), - fnType->isVariadic()); + const CGFunctionInfo &fnInfo = + CGM.getTypes().arrangeBlockFunctionDeclaration(fnType, args); if (CGM.ReturnSlotInterferesWithArgs(fnInfo)) blockInfo.UsesStret = true; @@ -1260,8 +1270,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const VarDecl *variable = CI.getVariable(); DI->EmitLocation(Builder, variable->getLocation()); - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); if (capture.isConstant()) { auto addr = LocalDeclMap.find(variable)->second; @@ -1329,8 +1339,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { C.VoidPtrTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); // FIXME: it would be nice if these were mergeable with things with // identical semantics. @@ -1505,8 +1515,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { C.VoidPtrTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); // FIXME: We'd like to put these into a mergable by content, with // internal linkage. @@ -1791,8 +1801,8 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, Context.VoidPtrTy); args.push_back(&src); - const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration( - R, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); @@ -1864,8 +1874,8 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, Context.VoidPtrTy); args.push_back(&src); - const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration( - R, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); @@ -2108,7 +2118,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { bool packed = false; CharUnits varAlign = getContext().getDeclAlign(D); - CharUnits varOffset = size.RoundUpToAlignment(varAlign); + CharUnits varOffset = size.alignTo(varAlign); // We may have to insert padding. if (varOffset != size) { @@ -2285,9 +2295,36 @@ void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) { /// Adjust the declaration of something from the blocks API. static void configureBlocksRuntimeObject(CodeGenModule &CGM, llvm::Constant *C) { - if (!CGM.getLangOpts().BlocksRuntimeOptional) return; - auto *GV = cast<llvm::GlobalValue>(C->stripPointerCasts()); + + if (CGM.getTarget().getTriple().isOSBinFormatCOFF()) { + IdentifierInfo &II = CGM.getContext().Idents.get(C->getName()); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + assert((isa<llvm::Function>(C->stripPointerCasts()) || + isa<llvm::GlobalVariable>(C->stripPointerCasts())) && + "expected Function or GlobalVariable"); + + const NamedDecl *ND = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((ND = dyn_cast<FunctionDecl>(Result)) || + (ND = dyn_cast<VarDecl>(Result))) + break; + + // TODO: support static blocks runtime + if (GV->isDeclaration() && (!ND || !ND->hasAttr<DLLExportAttr>())) { + GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + } else { + GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + } + } + + if (!CGM.getLangOpts().BlocksRuntimeOptional) + return; + if (GV->isDeclaration() && GV->hasExternalLinkage()) GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } @@ -2335,5 +2372,5 @@ llvm::Constant *CodeGenModule::getNSConcreteStackBlock() { Int8PtrTy->getPointerTo(), nullptr); configureBlocksRuntimeObject(*this, NSConcreteStackBlock); - return NSConcreteStackBlock; + return NSConcreteStackBlock; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h b/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h index 489f341..027435d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H #define LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H +#include "llvm/IR/DataLayout.h" #include "llvm/IR/IRBuilder.h" #include "Address.h" #include "CodeGenTypeCache.h" @@ -22,9 +23,7 @@ class CodeGenFunction; /// \brief This is an IRBuilder insertion helper that forwards to /// CodeGenFunction::InsertHelper, which adds necessary metadata to /// instructions. -template <bool PreserveNames> -class CGBuilderInserter - : protected llvm::IRBuilderDefaultInserter<PreserveNames> { +class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter { public: CGBuilderInserter() = default; explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {} @@ -38,17 +37,10 @@ private: CodeGenFunction *CGF = nullptr; }; -// Don't preserve names on values in an optimized build. -#ifdef NDEBUG -#define PreserveNames false -#else -#define PreserveNames true -#endif - -typedef CGBuilderInserter<PreserveNames> CGBuilderInserterTy; +typedef CGBuilderInserter CGBuilderInserterTy; -typedef llvm::IRBuilder<PreserveNames, llvm::ConstantFolder, - CGBuilderInserterTy> CGBuilderBaseTy; +typedef llvm::IRBuilder<llvm::ConstantFolder, CGBuilderInserterTy> + CGBuilderBaseTy; class CGBuilderTy : public CGBuilderBaseTy { /// Storing a reference to the type cache here makes it a lot easier @@ -194,6 +186,12 @@ public: Addr.getPointer(), Index, Name), Addr.getAlignment().alignmentAtOffset(Offset)); } + Address CreateStructGEP(Address Addr, unsigned Index, + const llvm::StructLayout *Layout, + const llvm::Twine &Name = "") { + auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); + return CreateStructGEP(Addr, Index, Offset, Name); + } /// Given /// %addr = [n x T]* ... @@ -298,8 +296,6 @@ public: } }; -#undef PreserveNames - } // end namespace CodeGen } // end namespace clang diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index 787ac53..a5fc531 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include <sstream> using namespace clang; @@ -105,9 +106,8 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::Type *ValueType = Args[1]->getType(); Args[1] = EmitToInt(CGF, Args[1], T, IntType); - llvm::Value *Result = - CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], - llvm::SequentiallyConsistent); + llvm::Value *Result = CGF.Builder.CreateAtomicRMW( + Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); return EmitFromInt(CGF, Result, T, ValueType); } @@ -167,9 +167,8 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, Args[1] = EmitToInt(CGF, Args[1], T, IntType); Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); - llvm::Value *Result = - CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], - llvm::SequentiallyConsistent); + llvm::Value *Result = CGF.Builder.CreateAtomicRMW( + Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); if (Invert) Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, @@ -206,9 +205,9 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, Args[1] = EmitToInt(CGF, Args[1], T, IntType); Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); - Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], - llvm::SequentiallyConsistent, - llvm::SequentiallyConsistent); + Value *Pair = CGF.Builder.CreateAtomicCmpXchg( + Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering::SequentiallyConsistent); if (ReturnBool) // Extract boolean success flag and zext it to int. return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), @@ -219,6 +218,51 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, ValueType); } +// Emit a simple mangled intrinsic that has 1 argument and a return type +// matching the argument type. +static Value *emitUnaryBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, Src0); +} + +// Emit an intrinsic that has 2 operands of the same type as its result. +static Value *emitBinaryBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, { Src0, Src1 }); +} + +// Emit an intrinsic that has 3 operands of the same type as its result. +static Value *emitTernaryBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); +} + +// Emit an intrinsic that has 1 float or double operand, and 1 integer. +static Value *emitFPIntBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, {Src0, Src1}); +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); @@ -248,8 +292,8 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { if (CGF.getTarget().isBigEndian()) { Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); V = CGF.Builder.CreateLShr(V, ShiftCst); - } - // We are truncating value in order to extract the higher-order + } + // We are truncating value in order to extract the higher-order // double, which we will be using to extract the sign from. IntTy = llvm::IntegerType::get(C, Width); V = CGF.Builder.CreateTrunc(V, IntTy); @@ -288,6 +332,17 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, return CGF.Builder.CreateExtractValue(Tmp, 0); } +static Value *emitRangedBuiltin(CodeGenFunction &CGF, + unsigned IntrinsicID, + int low, int high) { + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); + llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + llvm::Instruction *Call = CGF.Builder.CreateCall(F); + Call->setMetadata(llvm::LLVMContext::MD_range, RNode); + return Call; +} + namespace { struct WidthAndSignedness { unsigned Width; @@ -465,9 +520,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: case Builtin::BI__builtin_fabsl: { - Value *Arg1 = EmitScalarExpr(E->getArg(0)); - Value *Result = EmitFAbs(*this, Arg1); - return RValue::get(Result); + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); } case Builtin::BI__builtin_fmod: case Builtin::BI__builtin_fmodf: @@ -477,7 +530,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); return RValue::get(Result); } - + case Builtin::BI__builtin_copysign: + case Builtin::BI__builtin_copysignf: + case Builtin::BI__builtin_copysignl: { + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); + } + case Builtin::BI__builtin_ceil: + case Builtin::BI__builtin_ceilf: + case Builtin::BI__builtin_ceill: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); + } + case Builtin::BI__builtin_floor: + case Builtin::BI__builtin_floorf: + case Builtin::BI__builtin_floorl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); + } + case Builtin::BI__builtin_trunc: + case Builtin::BI__builtin_truncf: + case Builtin::BI__builtin_truncl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); + } + case Builtin::BI__builtin_rint: + case Builtin::BI__builtin_rintf: + case Builtin::BI__builtin_rintl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); + } + case Builtin::BI__builtin_nearbyint: + case Builtin::BI__builtin_nearbyintf: + case Builtin::BI__builtin_nearbyintl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); + } + case Builtin::BI__builtin_round: + case Builtin::BI__builtin_roundf: + case Builtin::BI__builtin_roundl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); + } + case Builtin::BI__builtin_fmin: + case Builtin::BI__builtin_fminf: + case Builtin::BI__builtin_fminl: { + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); + } + case Builtin::BI__builtin_fmax: + case Builtin::BI__builtin_fmaxf: + case Builtin::BI__builtin_fmaxl: { + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); + } case Builtin::BI__builtin_conj: case Builtin::BI__builtin_conjf: case Builtin::BI__builtin_conjl: { @@ -645,10 +742,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); - return RValue::get(Builder.CreateCall(F, ArgValue)); + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); + } + case Builtin::BI__builtin_bitreverse8: + case Builtin::BI__builtin_bitreverse16: + case Builtin::BI__builtin_bitreverse32: + case Builtin::BI__builtin_bitreverse64: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); } case Builtin::BI__builtin_object_size: { unsigned Type = @@ -751,13 +851,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } - case Builtin::BI__builtin_isinf: { - // isinf(x) --> fabs(x) == infinity + case Builtin::BI__builtin_isinf: + case Builtin::BI__builtin_isfinite: { + // isinf(x) --> fabs(x) == infinity + // isfinite(x) --> fabs(x) != infinity + // x != NaN via the ordered compare in either case. Value *V = EmitScalarExpr(E->getArg(0)); - V = EmitFAbs(*this, V); - - V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); + Value *Fabs = EmitFAbs(*this, V); + Constant *Infinity = ConstantFP::getInfinity(V->getType()); + CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) + ? CmpInst::FCMP_OEQ + : CmpInst::FCMP_ONE; + Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); + return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); } case Builtin::BI__builtin_isinf_sign: { @@ -795,19 +901,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } - case Builtin::BI__builtin_isfinite: { - // isfinite(x) --> x == x && fabs(x) != infinity; - Value *V = EmitScalarExpr(E->getArg(0)); - Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); - - Value *Abs = EmitFAbs(*this, V); - Value *IsNotInf = - Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); - - V = Builder.CreateAnd(Eq, IsNotInf, "and"); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); - } - case Builtin::BI__builtin_fpclassify: { Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); @@ -1258,7 +1351,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::StoreInst *Store = Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, StoreSize); - Store->setAtomic(llvm::Release); + Store->setAtomic(llvm::AtomicOrdering::Release); return RValue::get(nullptr); } @@ -1270,7 +1363,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // any way to safely use it... but in practice, it mostly works // to use it with non-atomic loads and stores to get acquire/release // semantics. - Builder.CreateFence(llvm::SequentiallyConsistent); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); return RValue::get(nullptr); } @@ -1294,9 +1387,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), getContext().VoidPtrTy); const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, - FunctionType::ExtInfo(), - RequiredArgs::All); + CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); @@ -1320,30 +1411,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, switch (ord) { case 0: // memory_order_relaxed default: // invalid order - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::Monotonic); + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::Monotonic); break; - case 1: // memory_order_consume - case 2: // memory_order_acquire - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::Acquire); + case 1: // memory_order_consume + case 2: // memory_order_acquire + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::Acquire); break; - case 3: // memory_order_release - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::Release); + case 3: // memory_order_release + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::Release); break; - case 4: // memory_order_acq_rel - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::AcquireRelease); + case 4: // memory_order_acq_rel + + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::AcquireRelease); break; - case 5: // memory_order_seq_cst - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::SequentiallyConsistent); + case 5: // memory_order_seq_cst + Result = Builder.CreateAtomicRMW( + llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::SequentiallyConsistent); break; } Result->setVolatile(Volatile); @@ -1360,9 +1448,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, createBasicBlock("seqcst", CurFn) }; llvm::AtomicOrdering Orders[5] = { - llvm::Monotonic, llvm::Acquire, llvm::Release, - llvm::AcquireRelease, llvm::SequentiallyConsistent - }; + llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, + llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, + llvm::AtomicOrdering::SequentiallyConsistent}; Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); @@ -1406,13 +1494,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, switch (ord) { case 0: // memory_order_relaxed default: // invalid order - Store->setOrdering(llvm::Monotonic); + Store->setOrdering(llvm::AtomicOrdering::Monotonic); break; case 3: // memory_order_release - Store->setOrdering(llvm::Release); + Store->setOrdering(llvm::AtomicOrdering::Release); break; case 5: // memory_order_seq_cst - Store->setOrdering(llvm::SequentiallyConsistent); + Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); break; } return RValue::get(nullptr); @@ -1426,8 +1514,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, createBasicBlock("seqcst", CurFn) }; llvm::AtomicOrdering Orders[3] = { - llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent - }; + llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, + llvm::AtomicOrdering::SequentiallyConsistent}; Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); @@ -1466,16 +1554,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; case 1: // memory_order_consume case 2: // memory_order_acquire - Builder.CreateFence(llvm::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); break; case 3: // memory_order_release - Builder.CreateFence(llvm::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); break; case 4: // memory_order_acq_rel - Builder.CreateFence(llvm::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); break; case 5: // memory_order_seq_cst - Builder.CreateFence(llvm::SequentiallyConsistent, Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + Scope); break; } return RValue::get(nullptr); @@ -1492,23 +1581,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); Builder.SetInsertPoint(AcquireBB); - Builder.CreateFence(llvm::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(1), AcquireBB); SI->addCase(Builder.getInt32(2), AcquireBB); Builder.SetInsertPoint(ReleaseBB); - Builder.CreateFence(llvm::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(3), ReleaseBB); Builder.SetInsertPoint(AcqRelBB); - Builder.CreateFence(llvm::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(4), AcqRelBB); Builder.SetInsertPoint(SeqCstBB); - Builder.CreateFence(llvm::SequentiallyConsistent, Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(5), SeqCstBB); @@ -1794,7 +1883,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; } - + llvm::Value *Carry; llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); Builder.CreateStore(Sum, SumOutPtr); @@ -1839,9 +1928,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Comparand = Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); - auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, - SequentiallyConsistent, - SequentiallyConsistent); + auto Result = + Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + AtomicOrdering::SequentiallyConsistent, + AtomicOrdering::SequentiallyConsistent); Result->setVolatile(true); return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, @@ -1853,44 +1943,47 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(1)), - SequentiallyConsistent, - SequentiallyConsistent); + AtomicOrdering::SequentiallyConsistent, + AtomicOrdering::SequentiallyConsistent); CXI->setVolatile(true); return RValue::get(Builder.CreateExtractValue(CXI, 0)); } case Builtin::BI_InterlockedIncrement: { + llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( AtomicRMWInst::Add, EmitScalarExpr(E->getArg(0)), - ConstantInt::get(Int32Ty, 1), - llvm::SequentiallyConsistent); + ConstantInt::get(IntTy, 1), + llvm::AtomicOrdering::SequentiallyConsistent); RMWI->setVolatile(true); - return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1))); + return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1))); } case Builtin::BI_InterlockedDecrement: { + llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( AtomicRMWInst::Sub, EmitScalarExpr(E->getArg(0)), - ConstantInt::get(Int32Ty, 1), - llvm::SequentiallyConsistent); + ConstantInt::get(IntTy, 1), + llvm::AtomicOrdering::SequentiallyConsistent); RMWI->setVolatile(true); - return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1))); + return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1))); } case Builtin::BI_InterlockedExchangeAdd: { AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( AtomicRMWInst::Add, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - llvm::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent); RMWI->setVolatile(true); return RValue::get(RMWI); } case Builtin::BI__readfsdword: { + llvm::Type *IntTy = ConvertType(E->getType()); Value *IntToPtr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(CGM.Int32Ty, 257)); + llvm::PointerType::get(IntTy, 257)); LoadInst *Load = - Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true); + Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true); return RValue::get(Load); } @@ -1963,6 +2056,323 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); break; } + + // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions + case Builtin::BIread_pipe: + case Builtin::BIwrite_pipe: { + Value *Arg0 = EmitScalarExpr(E->getArg(0)), + *Arg1 = EmitScalarExpr(E->getArg(1)); + + // Type of the generic packet parameter. + unsigned GenericAS = + getContext().getTargetAddressSpace(LangAS::opencl_generic); + llvm::Type *I8PTy = llvm::PointerType::get( + llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); + + // Testing which overloaded version we should generate the call for. + if (2U == E->getNumArgs()) { + const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" + : "__write_pipe_2"; + // Creating a generic function type to be able to call with any builtin or + // user defined type. + llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast})); + } else { + assert(4 == E->getNumArgs() && + "Illegal number of parameters to pipe function"); + const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" + : "__write_pipe_4"; + + llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy}; + Value *Arg2 = EmitScalarExpr(E->getArg(2)), + *Arg3 = EmitScalarExpr(E->getArg(3)); + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); + // We know the third argument is an integer type, but we may need to cast + // it to i32. + if (Arg2->getType() != Int32Ty) + Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast})); + } + } + // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write + // functions + case Builtin::BIreserve_read_pipe: + case Builtin::BIreserve_write_pipe: + case Builtin::BIwork_group_reserve_read_pipe: + case Builtin::BIwork_group_reserve_write_pipe: + case Builtin::BIsub_group_reserve_read_pipe: + case Builtin::BIsub_group_reserve_write_pipe: { + // Composing the mangled name for the function. + const char *Name; + if (BuiltinID == Builtin::BIreserve_read_pipe) + Name = "__reserve_read_pipe"; + else if (BuiltinID == Builtin::BIreserve_write_pipe) + Name = "__reserve_write_pipe"; + else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) + Name = "__work_group_reserve_read_pipe"; + else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) + Name = "__work_group_reserve_write_pipe"; + else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) + Name = "__sub_group_reserve_read_pipe"; + else + Name = "__sub_group_reserve_write_pipe"; + + Value *Arg0 = EmitScalarExpr(E->getArg(0)), + *Arg1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); + + // Building the generic function prototype. + llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + // We know the second argument is an integer type, but we may need to cast + // it to i32. + if (Arg1->getType() != Int32Ty) + Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); + } + // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write + // functions + case Builtin::BIcommit_read_pipe: + case Builtin::BIcommit_write_pipe: + case Builtin::BIwork_group_commit_read_pipe: + case Builtin::BIwork_group_commit_write_pipe: + case Builtin::BIsub_group_commit_read_pipe: + case Builtin::BIsub_group_commit_write_pipe: { + const char *Name; + if (BuiltinID == Builtin::BIcommit_read_pipe) + Name = "__commit_read_pipe"; + else if (BuiltinID == Builtin::BIcommit_write_pipe) + Name = "__commit_write_pipe"; + else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) + Name = "__work_group_commit_read_pipe"; + else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) + Name = "__work_group_commit_write_pipe"; + else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) + Name = "__sub_group_commit_read_pipe"; + else + Name = "__sub_group_commit_write_pipe"; + + Value *Arg0 = EmitScalarExpr(E->getArg(0)), + *Arg1 = EmitScalarExpr(E->getArg(1)); + + // Building the generic function prototype. + llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()}; + llvm::FunctionType *FTy = + llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), + llvm::ArrayRef<llvm::Type *>(ArgTys), false); + + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); + } + // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions + case Builtin::BIget_pipe_num_packets: + case Builtin::BIget_pipe_max_packets: { + const char *Name; + if (BuiltinID == Builtin::BIget_pipe_num_packets) + Name = "__get_pipe_num_packets"; + else + Name = "__get_pipe_max_packets"; + + // Building the generic function prototype. + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgTys[] = {Arg0->getType()}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0})); + } + + // OpenCL v2.0 s6.13.9 - Address space qualifier functions. + case Builtin::BIto_global: + case Builtin::BIto_local: + case Builtin::BIto_private: { + auto Arg0 = EmitScalarExpr(E->getArg(0)); + auto NewArgT = llvm::PointerType::get(Int8Ty, + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); + auto NewRetT = llvm::PointerType::get(Int8Ty, + CGM.getContext().getTargetAddressSpace( + E->getType()->getPointeeType().getAddressSpace())); + auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); + llvm::Value *NewArg; + if (Arg0->getType()->getPointerAddressSpace() != + NewArgT->getPointerAddressSpace()) + NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); + else + NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); + auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); + auto NewCall = + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); + return RValue::get(Builder.CreateBitOrPointerCast(NewCall, + ConvertType(E->getType()))); + } + + // OpenCL v2.0, s6.13.17 - Enqueue kernel function. + // It contains four different overload formats specified in Table 6.13.17.1. + case Builtin::BIenqueue_kernel: { + StringRef Name; // Generated function call name + unsigned NumArgs = E->getNumArgs(); + + llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); + llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); + + llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); + llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); + llvm::Value *Range = EmitScalarExpr(E->getArg(2)); + + if (NumArgs == 4) { + // The most basic form of the call with parameters: + // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) + Name = "__enqueue_kernel_basic"; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); + + llvm::Value *Block = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); + } + assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); + + // Could have events and/or vaargs. + if (E->getArg(3)->getType()->isBlockPointerType()) { + // No events passed, but has variadic arguments. + Name = "__enqueue_kernel_vaargs"; + llvm::Value *Block = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + // Create a vector of the arguments, as well as a constant value to + // express to the runtime the number of variadic arguments. + std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, + ConstantInt::get(IntTy, NumArgs - 4)}; + std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, + IntTy}; + + // Add the variadics. + for (unsigned I = 4; I < NumArgs; ++I) { + llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); + unsigned TypeSizeInBytes = + getContext() + .getTypeSizeInChars(E->getArg(I)->getType()) + .getQuantity(); + Args.push_back(TypeSizeInBytes < 4 + ? Builder.CreateZExt(ArgSize, Int32Ty) + : ArgSize); + } + + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + } + // Any calls now have event arguments passed. + if (NumArgs >= 7) { + llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); + unsigned AS4 = + E->getArg(4)->getType()->isArrayType() + ? E->getArg(4)->getType().getAddressSpace() + : E->getArg(4)->getType()->getPointeeType().getAddressSpace(); + llvm::Type *EventPtrAS4Ty = + EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4)); + unsigned AS5 = + E->getArg(5)->getType()->getPointeeType().getAddressSpace(); + llvm::Type *EventPtrAS5Ty = + EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5)); + + llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3)); + llvm::Value *EventList = + E->getArg(4)->getType()->isArrayType() + ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() + : EmitScalarExpr(E->getArg(4)); + llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); + llvm::Value *Block = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); + + std::vector<llvm::Type *> ArgTys = { + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, + EventList, ClkEvent, Block}; + + if (NumArgs == 7) { + // Has events but no variadics. + Name = "__enqueue_kernel_basic_events"; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + } + // Has event info and variadics + // Pass the number of variadics to the runtime function too. + Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); + ArgTys.push_back(Int32Ty); + Name = "__enqueue_kernel_events_vaargs"; + + // Add the variadics. + for (unsigned I = 7; I < NumArgs; ++I) { + llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); + unsigned TypeSizeInBytes = + getContext() + .getTypeSizeInChars(E->getArg(I)->getType()) + .getQuantity(); + Args.push_back(TypeSizeInBytes < 4 + ? Builder.CreateZExt(ArgSize, Int32Ty) + : ArgSize); + } + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + } + } + // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block + // parameter. + case Builtin::BIget_kernel_work_group_size: { + Value *Arg = EmitScalarExpr(E->getArg(0)); + Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, Int8PtrTy, false), + "__get_kernel_work_group_size_impl"), + Arg)); + } + case Builtin::BIget_kernel_preferred_work_group_size_multiple: { + Value *Arg = EmitScalarExpr(E->getArg(0)); + Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, Int8PtrTy, false), + "__get_kernel_preferred_work_group_multiple_impl"), + Arg)); + } + case Builtin::BIprintf: + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) + return EmitCUDADevicePrintfCallExpr(E, ReturnValue); + break; + case Builtin::BI__builtin_canonicalize: + case Builtin::BI__builtin_canonicalizef: + case Builtin::BI__builtin_canonicalizel: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); + + case Builtin::BI__builtin_thread_pointer: { + if (!getContext().getTargetInfo().isTLSSupported()) + CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); + // Fall through - it's already mapped to the intrinsic by GCCBuiltin. + break; + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -2155,7 +2565,7 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, } Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { - unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); + unsigned nElts = V->getType()->getVectorNumElements(); Value* SV = llvm::ConstantVector::getSplat(nElts, C); return Builder.CreateShuffleVector(V, V, SV, "lane"); } @@ -3073,14 +3483,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vext_v: case NEON::BI__builtin_neon_vextq_v: { int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); + Indices.push_back(i+CV); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Value *SV = llvm::ConstantVector::get(Indices); - return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); + return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { @@ -3278,14 +3687,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(Builder.getInt32(i+vi)); - Indices.push_back(Builder.getInt32(i+e+vi)); + Indices.push_back(i+vi); + Indices.push_back(i+e+vi); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -3307,13 +3715,12 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); + Indices.push_back(2*i+vi); Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -3326,14 +3733,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); - Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); + Indices.push_back((i + vi*e) >> 1); + Indices.push_back(((i + vi*e) >> 1)+e); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -3381,19 +3787,19 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, if (ExtOp) TblOps.push_back(ExtOp); - // Build a vector containing sequential number like (0, 1, 2, ..., 15) - SmallVector<Constant*, 16> Indices; + // Build a vector containing sequential number like (0, 1, 2, ..., 15) + SmallVector<uint32_t, 16> Indices; llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { - Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); - Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); + Indices.push_back(2*i); + Indices.push_back(2*i+1); } - Value *SV = llvm::ConstantVector::get(Indices); int PairPos = 0, End = Ops.size() - 1; while (PairPos < End) { TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], - Ops[PairPos+1], SV, Name)); + Ops[PairPos+1], Indices, + Name)); PairPos += 2; } @@ -3402,13 +3808,13 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, if (PairPos == End) { Value *ZeroTbl = ConstantAggregateZero::get(TblTy); TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], - ZeroTbl, SV, Name)); + ZeroTbl, Indices, Name)); } Function *TblF; TblOps.push_back(IndexOp); TblF = CGF.CGM.getIntrinsic(IntID, ResTy); - + return CGF.EmitNeonCall(TblF, TblOps, Name); } @@ -3452,7 +3858,9 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType, bool IsRead) { + llvm::Type *ValueType, + bool IsRead, + StringRef SysReg = "") { // write and register intrinsics only support 32 and 64 bit operations. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) && "Unsupported size for register."); @@ -3461,8 +3869,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, CodeGen::CodeGenModule &CGM = CGF.CGM; LLVMContext &Context = CGM.getLLVMContext(); - const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); + if (SysReg.empty()) { + const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); + SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); + } llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); @@ -3602,6 +4012,74 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); } + if (BuiltinID == ARM::BI__builtin_arm_mcrr || + BuiltinID == ARM::BI__builtin_arm_mcrr2) { + Function *F; + + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin"); + case ARM::BI__builtin_arm_mcrr: + F = CGM.getIntrinsic(Intrinsic::arm_mcrr); + break; + case ARM::BI__builtin_arm_mcrr2: + F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); + break; + } + + // MCRR{2} instruction has 5 operands but + // the intrinsic has 4 because Rt and Rt2 + // are represented as a single unsigned 64 + // bit integer in the intrinsic definition + // but internally it's represented as 2 32 + // bit integers. + + Value *Coproc = EmitScalarExpr(E->getArg(0)); + Value *Opc1 = EmitScalarExpr(E->getArg(1)); + Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); + Value *CRm = EmitScalarExpr(E->getArg(3)); + + Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); + Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); + Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); + Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); + + return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); + } + + if (BuiltinID == ARM::BI__builtin_arm_mrrc || + BuiltinID == ARM::BI__builtin_arm_mrrc2) { + Function *F; + + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin"); + case ARM::BI__builtin_arm_mrrc: + F = CGM.getIntrinsic(Intrinsic::arm_mrrc); + break; + case ARM::BI__builtin_arm_mrrc2: + F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); + break; + } + + Value *Coproc = EmitScalarExpr(E->getArg(0)); + Value *Opc1 = EmitScalarExpr(E->getArg(1)); + Value *CRm = EmitScalarExpr(E->getArg(2)); + Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); + + // Returns an unsigned 64 bit integer, represented + // as two 32 bit integers. + + Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); + Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); + Rt = Builder.CreateZExt(Rt, Int64Ty); + Rt1 = Builder.CreateZExt(Rt1, Int64Ty); + + Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); + RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); + RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); + + return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); + } + if (BuiltinID == ARM::BI__builtin_arm_ldrexd || ((BuiltinID == ARM::BI__builtin_arm_ldrex || BuiltinID == ARM::BI__builtin_arm_ldaex) && @@ -3914,7 +4392,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // the first argument, but the LLVM intrinsic expects it as the third one. case ARM::BI_MoveToCoprocessor: case ARM::BI_MoveToCoprocessor2: { - Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? + Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? Intrinsic::arm_mcr : Intrinsic::arm_mcr2); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], Ops[3], Ops[4], Ops[5]}); @@ -4478,11 +4956,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F); } - if (BuiltinID == AArch64::BI__builtin_thread_pointer) { - Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer); - return Builder.CreateCall(F); - } - // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { @@ -5150,22 +5623,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } - case NEON::BI__builtin_neon_vfms_v: - case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types - // FIXME: probably remove when we no longer support aarch64_simd.h - // (arm_neon.h delegates to vfma). - - // The ARM builtins (and instructions) have the addend as the first - // operand, but the 'fma' intrinsics have it last. Swap it around here. - Value *Subtrahend = Ops[0]; - Value *Multiplicand = Ops[2]; - Ops[0] = Multiplicand; - Ops[2] = Subtrahend; - Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - Ops[1] = Builder.CreateFNeg(Ops[1]); - Int = Intrinsic::fma; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls"); - } case NEON::BI__builtin_neon_vmull_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; @@ -5988,14 +6445,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); - Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); + Indices.push_back(i+vi); + Indices.push_back(i+e+vi); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -6008,13 +6464,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); + Indices.push_back(2*i+vi); Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -6027,14 +6482,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); - Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); + Indices.push_back((i + vi*e) >> 1); + Indices.push_back(((i + vi*e) >> 1)+e); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -6110,6 +6564,118 @@ BuildVector(ArrayRef<llvm::Value*> Ops) { return Result; } +// Convert the mask from an integer type to a vector of i1. +static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, + unsigned NumElts) { + + llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), + cast<IntegerType>(Mask->getType())->getBitWidth()); + Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (NumElts < 8) { + uint32_t Indices[4]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, + makeArrayRef(Indices, NumElts), + "extract"); + } + return MaskVec; +} + +static Value *EmitX86MaskedStore(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops, + unsigned Align) { + // Cast the pointer to right type. + Ops[0] = CGF.Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(Ops[1]->getType())); + + // If the mask is all ones just emit a regular store. + if (const auto *C = dyn_cast<Constant>(Ops[2])) + if (C->isAllOnesValue()) + return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + Ops[1]->getType()->getVectorNumElements()); + + return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); +} + +static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops, unsigned Align) { + // Cast the pointer to right type. + Ops[0] = CGF.Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(Ops[1]->getType())); + + // If the mask is all ones just emit a regular store. + if (const auto *C = dyn_cast<Constant>(Ops[2])) + if (C->isAllOnesValue()) + return CGF.Builder.CreateAlignedLoad(Ops[0], Align); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + Ops[1]->getType()->getVectorNumElements()); + + return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); +} + +static Value *EmitX86Select(CodeGenFunction &CGF, + Value *Mask, Value *Op0, Value *Op1) { + + // If the mask is all ones just return first argument. + if (const auto *C = dyn_cast<Constant>(Mask)) + if (C->isAllOnesValue()) + return Op0; + + Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); + + return CGF.Builder.CreateSelect(Mask, Op0, Op1); +} + +static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, + bool Signed, SmallVectorImpl<Value *> &Ops) { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *Cmp; + + if (CC == 3) { + Cmp = Constant::getNullValue( + llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); + } else if (CC == 7) { + Cmp = Constant::getAllOnesValue( + llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); + } else { + ICmpInst::Predicate Pred; + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case 0: Pred = ICmpInst::ICMP_EQ; break; + case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 4: Pred = ICmpInst::ICMP_NE; break; + case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + } + Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); + } + + const auto *C = dyn_cast<Constant>(Ops.back()); + if (!C || !C->isAllOnesValue()) + Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); + + if (NumElts < 8) { + uint32_t Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = i % NumElts + NumElts; + Cmp = CGF.Builder.CreateShuffleVector( + Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); + } + return CGF.Builder.CreateBitCast(Cmp, + IntegerType::get(CGF.getLLVMContext(), + std::max(NumElts, 8U))); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_ms_va_start || @@ -6160,6 +6726,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); } + // These exist so that the builtin that takes an immediate can be bounds + // checked by clang to avoid passing bad immediates to the backend. Since + // AVX has a larger immediate than SSE we would need separate builtins to + // do the different bounds checking. Rather than create a clang specific + // SSE only builtin, this implements eight separate builtins to match gcc + // implementation. + auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { + Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops); + }; + + // For the vector forms of FP comparisons, translate the builtins directly to + // IR. + // TODO: The builtins could be removed if the SSE header files used vector + // extension comparisons directly (vector ordered/unordered may need + // additional support via __builtin_isnan()). + auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { + Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); + llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); + Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); + return Builder.CreateBitCast(Sext, FPVecTy); + }; + switch (BuiltinID) { default: return nullptr; case X86::BI__builtin_cpu_supports: { @@ -6188,6 +6779,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, AVX512F, BMI, BMI2, + AES, + PCLMUL, + AVX512VL, + AVX512BW, + AVX512DQ, + AVX512CD, + AVX512ER, + AVX512PF, + AVX512VBMI, + AVX512IFMA, MAX }; @@ -6198,6 +6799,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, .Case("sse", X86Features::SSE) .Case("sse2", X86Features::SSE2) .Case("sse3", X86Features::SSE3) + .Case("ssse3", X86Features::SSSE3) .Case("sse4.1", X86Features::SSE4_1) .Case("sse4.2", X86Features::SSE4_2) .Case("avx", X86Features::AVX) @@ -6209,6 +6811,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, .Case("avx512f", X86Features::AVX512F) .Case("bmi", X86Features::BMI) .Case("bmi2", X86Features::BMI2) + .Case("aes", X86Features::AES) + .Case("pclmul", X86Features::PCLMUL) + .Case("avx512vl", X86Features::AVX512VL) + .Case("avx512bw", X86Features::AVX512BW) + .Case("avx512dq", X86Features::AVX512DQ) + .Case("avx512cd", X86Features::AVX512CD) + .Case("avx512er", X86Features::AVX512ER) + .Case("avx512pf", X86Features::AVX512PF) + .Case("avx512vbmi", X86Features::AVX512VBMI) + .Case("avx512ifma", X86Features::AVX512IFMA) .Default(X86Features::MAX); assert(Feature != X86Features::MAX && "Invalid feature!"); @@ -6237,7 +6849,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Check the value of the bit corresponding to the feature requested. Value *Bitset = Builder.CreateAnd( - Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature)); + Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); } case X86::BI_mm_prefetch: { @@ -6312,6 +6924,78 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } + case X86::BI__builtin_ia32_storedqudi128_mask: + case X86::BI__builtin_ia32_storedqusi128_mask: + case X86::BI__builtin_ia32_storedquhi128_mask: + case X86::BI__builtin_ia32_storedquqi128_mask: + case X86::BI__builtin_ia32_storeupd128_mask: + case X86::BI__builtin_ia32_storeups128_mask: + case X86::BI__builtin_ia32_storedqudi256_mask: + case X86::BI__builtin_ia32_storedqusi256_mask: + case X86::BI__builtin_ia32_storedquhi256_mask: + case X86::BI__builtin_ia32_storedquqi256_mask: + case X86::BI__builtin_ia32_storeupd256_mask: + case X86::BI__builtin_ia32_storeups256_mask: + case X86::BI__builtin_ia32_storedqudi512_mask: + case X86::BI__builtin_ia32_storedqusi512_mask: + case X86::BI__builtin_ia32_storedquhi512_mask: + case X86::BI__builtin_ia32_storedquqi512_mask: + case X86::BI__builtin_ia32_storeupd512_mask: + case X86::BI__builtin_ia32_storeups512_mask: + return EmitX86MaskedStore(*this, Ops, 1); + + case X86::BI__builtin_ia32_movdqa32store128_mask: + case X86::BI__builtin_ia32_movdqa64store128_mask: + case X86::BI__builtin_ia32_storeaps128_mask: + case X86::BI__builtin_ia32_storeapd128_mask: + case X86::BI__builtin_ia32_movdqa32store256_mask: + case X86::BI__builtin_ia32_movdqa64store256_mask: + case X86::BI__builtin_ia32_storeaps256_mask: + case X86::BI__builtin_ia32_storeapd256_mask: + case X86::BI__builtin_ia32_movdqa32store512_mask: + case X86::BI__builtin_ia32_movdqa64store512_mask: + case X86::BI__builtin_ia32_storeaps512_mask: + case X86::BI__builtin_ia32_storeapd512_mask: { + unsigned Align = + getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); + return EmitX86MaskedStore(*this, Ops, Align); + } + case X86::BI__builtin_ia32_loadups128_mask: + case X86::BI__builtin_ia32_loadups256_mask: + case X86::BI__builtin_ia32_loadups512_mask: + case X86::BI__builtin_ia32_loadupd128_mask: + case X86::BI__builtin_ia32_loadupd256_mask: + case X86::BI__builtin_ia32_loadupd512_mask: + case X86::BI__builtin_ia32_loaddquqi128_mask: + case X86::BI__builtin_ia32_loaddquqi256_mask: + case X86::BI__builtin_ia32_loaddquqi512_mask: + case X86::BI__builtin_ia32_loaddquhi128_mask: + case X86::BI__builtin_ia32_loaddquhi256_mask: + case X86::BI__builtin_ia32_loaddquhi512_mask: + case X86::BI__builtin_ia32_loaddqusi128_mask: + case X86::BI__builtin_ia32_loaddqusi256_mask: + case X86::BI__builtin_ia32_loaddqusi512_mask: + case X86::BI__builtin_ia32_loaddqudi128_mask: + case X86::BI__builtin_ia32_loaddqudi256_mask: + case X86::BI__builtin_ia32_loaddqudi512_mask: + return EmitX86MaskedLoad(*this, Ops, 1); + + case X86::BI__builtin_ia32_loadaps128_mask: + case X86::BI__builtin_ia32_loadaps256_mask: + case X86::BI__builtin_ia32_loadaps512_mask: + case X86::BI__builtin_ia32_loadapd128_mask: + case X86::BI__builtin_ia32_loadapd256_mask: + case X86::BI__builtin_ia32_loadapd512_mask: + case X86::BI__builtin_ia32_movdqa32load128_mask: + case X86::BI__builtin_ia32_movdqa32load256_mask: + case X86::BI__builtin_ia32_movdqa32load512_mask: + case X86::BI__builtin_ia32_movdqa64load128_mask: + case X86::BI__builtin_ia32_movdqa64load256_mask: + case X86::BI__builtin_ia32_movdqa64load512_mask: { + unsigned Align = + getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); + return EmitX86MaskedLoad(*this, Ops, Align); + } case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); @@ -6330,103 +7014,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case X86::BI__builtin_ia32_palignr128: - case X86::BI__builtin_ia32_palignr256: { + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr128_mask: + case X86::BI__builtin_ia32_palignr256_mask: + case X86::BI__builtin_ia32_palignr512_mask: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); assert(NumElts % 16 == 0); - unsigned NumLanes = NumElts / 16; - unsigned NumLaneElts = NumElts / NumLanes; // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. - if (ShiftVal >= (2 * NumLaneElts)) + if (ShiftVal >= 32) return llvm::Constant::getNullValue(ConvertType(E->getType())); // If palignr is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. - if (ShiftVal > NumLaneElts) { - ShiftVal -= NumLaneElts; + if (ShiftVal > 16) { + ShiftVal -= 16; Ops[1] = Ops[0]; Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - uint32_t Indices[32]; + uint32_t Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = 0; i != NumLaneElts; ++i) { + for (unsigned l = 0; l != NumElts; l += 16) { + for (unsigned i = 0; i != 16; ++i) { unsigned Idx = ShiftVal + i; - if (Idx >= NumLaneElts) - Idx += NumElts - NumLaneElts; // End of lane, switch operand. + if (Idx >= 16) + Idx += NumElts - 16; // End of lane, switch operand. Indices[l + i] = Idx + l; } } - Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), - makeArrayRef(Indices, NumElts)); - return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); - } - case X86::BI__builtin_ia32_pslldqi256: { - // Shift value is in bits so divide by 8. - unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; + Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], + makeArrayRef(Indices, NumElts), + "palignr"); - // If pslldq is shifting the vector more than 15 bytes, emit zero. - if (shiftVal >= 16) - return llvm::Constant::getNullValue(ConvertType(E->getType())); - - uint32_t Indices[32]; - // 256-bit pslldq operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != 32; l += 16) { - for (unsigned i = 0; i != 16; ++i) { - unsigned Idx = 32 + i - shiftVal; - if (Idx < 32) Idx -= 16; // end of lane, switch operand. - Indices[l + i] = Idx + l; - } - } - - llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Value *Zero = llvm::Constant::getNullValue(VecTy); + // If this isn't a masked builtin, just return the align operation. + if (Ops.size() == 3) + return Align; - Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); - llvm::Type *ResultType = ConvertType(E->getType()); - return Builder.CreateBitCast(SV, ResultType, "cast"); + return EmitX86Select(*this, Ops[4], Align, Ops[3]); } - case X86::BI__builtin_ia32_psrldqi256: { - // Shift value is in bits so divide by 8. - unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; - - // If psrldq is shifting the vector more than 15 bytes, emit zero. - if (shiftVal >= 16) - return llvm::Constant::getNullValue(ConvertType(E->getType())); - uint32_t Indices[32]; - // 256-bit psrldq operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != 32; l += 16) { - for (unsigned i = 0; i != 16; ++i) { - unsigned Idx = i + shiftVal; - if (Idx >= 16) Idx += 16; // end of lane, switch operand. - Indices[l + i] = Idx + l; - } - } - - llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Value *Zero = llvm::Constant::getNullValue(VecTy); - - Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); - llvm::Type *ResultType = ConvertType(E->getType()); - return Builder.CreateBitCast(SV, ResultType, "cast"); - } - case X86::BI__builtin_ia32_movntps: - case X86::BI__builtin_ia32_movntps256: - case X86::BI__builtin_ia32_movntpd: - case X86::BI__builtin_ia32_movntpd256: - case X86::BI__builtin_ia32_movntdq: - case X86::BI__builtin_ia32_movntdq256: case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: { llvm::MDNode *Node = llvm::MDNode::get( @@ -6439,17 +7070,156 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - // If the operand is an integer, we can't assume alignment. Otherwise, - // assume natural alignment. - QualType ArgTy = E->getArg(1)->getType(); - unsigned Align; - if (ArgTy->isIntegerType()) - Align = 1; - else - Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); - SI->setAlignment(Align); + // No alignment for scalar intrinsic store. + SI->setAlignment(1); + return SI; + } + case X86::BI__builtin_ia32_movntsd: + case X86::BI__builtin_ia32_movntss: { + llvm::MDNode *Node = llvm::MDNode::get( + getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + + // Extract the 0'th element of the source vector. + Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); + + // Convert the type of the pointer to a pointer to the stored type. + Value *BC = Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(Scl->getType()), + "cast"); + + // Unaligned nontemporal store of the scalar value. + StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); + SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + SI->setAlignment(1); return SI; } + + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: + return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); + case X86::BI__builtin_ia32_pcmpeqb128_mask: + case X86::BI__builtin_ia32_pcmpeqb256_mask: + case X86::BI__builtin_ia32_pcmpeqb512_mask: + case X86::BI__builtin_ia32_pcmpeqw128_mask: + case X86::BI__builtin_ia32_pcmpeqw256_mask: + case X86::BI__builtin_ia32_pcmpeqw512_mask: + case X86::BI__builtin_ia32_pcmpeqd128_mask: + case X86::BI__builtin_ia32_pcmpeqd256_mask: + case X86::BI__builtin_ia32_pcmpeqd512_mask: + case X86::BI__builtin_ia32_pcmpeqq128_mask: + case X86::BI__builtin_ia32_pcmpeqq256_mask: + case X86::BI__builtin_ia32_pcmpeqq512_mask: + return EmitX86MaskedCompare(*this, 0, false, Ops); + case X86::BI__builtin_ia32_pcmpgtb128_mask: + case X86::BI__builtin_ia32_pcmpgtb256_mask: + case X86::BI__builtin_ia32_pcmpgtb512_mask: + case X86::BI__builtin_ia32_pcmpgtw128_mask: + case X86::BI__builtin_ia32_pcmpgtw256_mask: + case X86::BI__builtin_ia32_pcmpgtw512_mask: + case X86::BI__builtin_ia32_pcmpgtd128_mask: + case X86::BI__builtin_ia32_pcmpgtd256_mask: + case X86::BI__builtin_ia32_pcmpgtd512_mask: + case X86::BI__builtin_ia32_pcmpgtq128_mask: + case X86::BI__builtin_ia32_pcmpgtq256_mask: + case X86::BI__builtin_ia32_pcmpgtq512_mask: + return EmitX86MaskedCompare(*this, 6, true, Ops); + case X86::BI__builtin_ia32_cmpb128_mask: + case X86::BI__builtin_ia32_cmpb256_mask: + case X86::BI__builtin_ia32_cmpb512_mask: + case X86::BI__builtin_ia32_cmpw128_mask: + case X86::BI__builtin_ia32_cmpw256_mask: + case X86::BI__builtin_ia32_cmpw512_mask: + case X86::BI__builtin_ia32_cmpd128_mask: + case X86::BI__builtin_ia32_cmpd256_mask: + case X86::BI__builtin_ia32_cmpd512_mask: + case X86::BI__builtin_ia32_cmpq128_mask: + case X86::BI__builtin_ia32_cmpq256_mask: + case X86::BI__builtin_ia32_cmpq512_mask: { + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + return EmitX86MaskedCompare(*this, CC, true, Ops); + } + case X86::BI__builtin_ia32_ucmpb128_mask: + case X86::BI__builtin_ia32_ucmpb256_mask: + case X86::BI__builtin_ia32_ucmpb512_mask: + case X86::BI__builtin_ia32_ucmpw128_mask: + case X86::BI__builtin_ia32_ucmpw256_mask: + case X86::BI__builtin_ia32_ucmpw512_mask: + case X86::BI__builtin_ia32_ucmpd128_mask: + case X86::BI__builtin_ia32_ucmpd256_mask: + case X86::BI__builtin_ia32_ucmpd512_mask: + case X86::BI__builtin_ia32_ucmpq128_mask: + case X86::BI__builtin_ia32_ucmpq256_mask: + case X86::BI__builtin_ia32_ucmpq512_mask: { + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + return EmitX86MaskedCompare(*this, CC, false, Ops); + } + + case X86::BI__builtin_ia32_vplzcntd_128_mask: + case X86::BI__builtin_ia32_vplzcntd_256_mask: + case X86::BI__builtin_ia32_vplzcntd_512_mask: + case X86::BI__builtin_ia32_vplzcntq_128_mask: + case X86::BI__builtin_ia32_vplzcntq_256_mask: + case X86::BI__builtin_ia32_vplzcntq_512_mask: { + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + return EmitX86Select(*this, Ops[2], + Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), + Ops[1]); + } + + // TODO: Handle 64/512-bit vector widths of min/max. + case X86::BI__builtin_ia32_pmaxsb128: + case X86::BI__builtin_ia32_pmaxsw128: + case X86::BI__builtin_ia32_pmaxsd128: + case X86::BI__builtin_ia32_pmaxsb256: + case X86::BI__builtin_ia32_pmaxsw256: + case X86::BI__builtin_ia32_pmaxsd256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pmaxub128: + case X86::BI__builtin_ia32_pmaxuw128: + case X86::BI__builtin_ia32_pmaxud128: + case X86::BI__builtin_ia32_pmaxub256: + case X86::BI__builtin_ia32_pmaxuw256: + case X86::BI__builtin_ia32_pmaxud256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pminsb128: + case X86::BI__builtin_ia32_pminsw128: + case X86::BI__builtin_ia32_pminsd128: + case X86::BI__builtin_ia32_pminsb256: + case X86::BI__builtin_ia32_pminsw256: + case X86::BI__builtin_ia32_pminsd256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pminub128: + case X86::BI__builtin_ia32_pminuw128: + case X86::BI__builtin_ia32_pminud128: + case X86::BI__builtin_ia32_pminub256: + case X86::BI__builtin_ia32_pminuw256: + case X86::BI__builtin_ia32_pminud256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { @@ -6492,154 +7262,107 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0]); return Builder.CreateExtractValue(Call, 1); } - // SSE comparison intrisics + + // SSE packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpeqpd: + return getVectorFCmpIR(CmpInst::FCMP_OEQ); case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpltpd: + return getVectorFCmpIR(CmpInst::FCMP_OLT); case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmplepd: + return getVectorFCmpIR(CmpInst::FCMP_OLE); case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpunordpd: + return getVectorFCmpIR(CmpInst::FCMP_UNO); case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpneqpd: + return getVectorFCmpIR(CmpInst::FCMP_UNE); case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnltpd: + return getVectorFCmpIR(CmpInst::FCMP_UGE); case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpnlepd: + return getVectorFCmpIR(CmpInst::FCMP_UGT); case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpordpd: + return getVectorFCmpIR(CmpInst::FCMP_ORD); + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: { + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + // If this one of the SSE immediates, we can use native IR. + if (CC < 8) { + FCmpInst::Predicate Pred; + switch (CC) { + case 0: Pred = FCmpInst::FCMP_OEQ; break; + case 1: Pred = FCmpInst::FCMP_OLT; break; + case 2: Pred = FCmpInst::FCMP_OLE; break; + case 3: Pred = FCmpInst::FCMP_UNO; break; + case 4: Pred = FCmpInst::FCMP_UNE; break; + case 5: Pred = FCmpInst::FCMP_UGE; break; + case 6: Pred = FCmpInst::FCMP_UGT; break; + case 7: Pred = FCmpInst::FCMP_ORD; break; + } + return getVectorFCmpIR(Pred); + } + + // We can't handle 8-31 immediates with native IR, use the intrinsic. + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpps: + ID = Intrinsic::x86_sse_cmp_ps; + break; + case X86::BI__builtin_ia32_cmpps256: + ID = Intrinsic::x86_avx_cmp_ps_256; + break; + case X86::BI__builtin_ia32_cmppd: + ID = Intrinsic::x86_sse2_cmp_pd; + break; + case X86::BI__builtin_ia32_cmppd256: + ID = Intrinsic::x86_avx_cmp_pd_256; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + + // SSE scalar comparison intrinsics case X86::BI__builtin_ia32_cmpeqss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); case X86::BI__builtin_ia32_cmpltss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); case X86::BI__builtin_ia32_cmpless: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); case X86::BI__builtin_ia32_cmpunordss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); case X86::BI__builtin_ia32_cmpneqss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); case X86::BI__builtin_ia32_cmpnltss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); case X86::BI__builtin_ia32_cmpnless: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); case X86::BI__builtin_ia32_cmpordss: - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpordpd: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); case X86::BI__builtin_ia32_cmpeqsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); case X86::BI__builtin_ia32_cmpltsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); case X86::BI__builtin_ia32_cmplesd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); case X86::BI__builtin_ia32_cmpunordsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); case X86::BI__builtin_ia32_cmpneqsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); case X86::BI__builtin_ia32_cmpnltsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); case X86::BI__builtin_ia32_cmpnlesd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); case X86::BI__builtin_ia32_cmpordsd: - // These exist so that the builtin that takes an immediate can be bounds - // checked by clang to avoid passing bad immediates to the backend. Since - // AVX has a larger immediate than SSE we would need separate builtins to - // do the different bounds checking. Rather than create a clang specific - // SSE only builtin, this implements eight separate builtins to match gcc - // implementation. - - // Choose the immediate. - unsigned Imm; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpeqsd: - Imm = 0; - break; - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmpltsd: - Imm = 1; - break; - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmplesd: - Imm = 2; - break; - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpunordsd: - Imm = 3; - break; - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpneqsd: - Imm = 4; - break; - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnltsd: - Imm = 5; - break; - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpnlesd: - Imm = 6; - break; - case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpordss: - case X86::BI__builtin_ia32_cmpordpd: - case X86::BI__builtin_ia32_cmpordsd: - Imm = 7; - break; - } - - // Choose the intrinsic ID. - const char *name; - Intrinsic::ID ID; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpordps: - name = "cmpps"; - ID = Intrinsic::x86_sse_cmp_ps; - break; - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpordss: - name = "cmpss"; - ID = Intrinsic::x86_sse_cmp_ss; - break; - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpordpd: - name = "cmppd"; - ID = Intrinsic::x86_sse2_cmp_pd; - break; - case X86::BI__builtin_ia32_cmpeqsd: - case X86::BI__builtin_ia32_cmpltsd: - case X86::BI__builtin_ia32_cmplesd: - case X86::BI__builtin_ia32_cmpunordsd: - case X86::BI__builtin_ia32_cmpneqsd: - case X86::BI__builtin_ia32_cmpnltsd: - case X86::BI__builtin_ia32_cmpnlesd: - case X86::BI__builtin_ia32_cmpordsd: - name = "cmpsd"; - ID = Intrinsic::x86_sse2_cmp_sd; - break; - } - - Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops, name); + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); } } @@ -6812,6 +7535,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } + + // Absolute value + case PPC::BI__builtin_vsx_xvabsdp: + case PPC::BI__builtin_vsx_xvabssp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); + return Builder.CreateCall(F, X); + } + // FMA variations case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: @@ -6851,44 +7584,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } } -// Emit an intrinsic that has 1 float or double. -static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, Src0); -} - -// Emit an intrinsic that has 3 float or double operands. -static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, {Src0, Src1, Src2}); -} - -// Emit an intrinsic that has 1 float or double operand, and 1 integer. -static Value *emitFPIntBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, {Src0, Src1}); -} - Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgpu_div_scale: - case AMDGPU::BI__builtin_amdgpu_div_scalef: { + case AMDGPU::BI__builtin_amdgcn_div_scale: + case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out // argument. @@ -6898,7 +7598,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, + llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -6913,40 +7613,85 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Builder.CreateStore(FlagExt, FlagOutPtr); return Result; } - case AMDGPU::BI__builtin_amdgpu_div_fmas: - case AMDGPU::BI__builtin_amdgpu_div_fmasf: { + case AMDGPU::BI__builtin_amdgcn_div_fmas: + case AMDGPU::BI__builtin_amdgcn_div_fmasf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, + llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); } - case AMDGPU::BI__builtin_amdgpu_div_fixup: - case AMDGPU::BI__builtin_amdgpu_div_fixupf: - return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); - case AMDGPU::BI__builtin_amdgpu_trig_preop: - case AMDGPU::BI__builtin_amdgpu_trig_preopf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); - case AMDGPU::BI__builtin_amdgpu_rcp: - case AMDGPU::BI__builtin_amdgpu_rcpf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); - case AMDGPU::BI__builtin_amdgpu_rsq: - case AMDGPU::BI__builtin_amdgpu_rsqf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); - case AMDGPU::BI__builtin_amdgpu_rsq_clamped: - case AMDGPU::BI__builtin_amdgpu_rsq_clampedf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); - case AMDGPU::BI__builtin_amdgpu_ldexp: - case AMDGPU::BI__builtin_amdgpu_ldexpf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); - case AMDGPU::BI__builtin_amdgpu_class: - case AMDGPU::BI__builtin_amdgpu_classf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class); - default: + case AMDGPU::BI__builtin_amdgcn_div_fixup: + case AMDGPU::BI__builtin_amdgcn_div_fixupf: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); + case AMDGPU::BI__builtin_amdgcn_trig_preop: + case AMDGPU::BI__builtin_amdgcn_trig_preopf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); + case AMDGPU::BI__builtin_amdgcn_rcp: + case AMDGPU::BI__builtin_amdgcn_rcpf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); + case AMDGPU::BI__builtin_amdgcn_rsq: + case AMDGPU::BI__builtin_amdgcn_rsqf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); + case AMDGPU::BI__builtin_amdgcn_rsq_clamp: + case AMDGPU::BI__builtin_amdgcn_rsq_clampf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); + case AMDGPU::BI__builtin_amdgcn_sinf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); + case AMDGPU::BI__builtin_amdgcn_cosf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); + case AMDGPU::BI__builtin_amdgcn_log_clampf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); + case AMDGPU::BI__builtin_amdgcn_ldexp: + case AMDGPU::BI__builtin_amdgcn_ldexpf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); + case AMDGPU::BI__builtin_amdgcn_frexp_mant: + case AMDGPU::BI__builtin_amdgcn_frexp_mantf: { + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); + } + case AMDGPU::BI__builtin_amdgcn_frexp_exp: + case AMDGPU::BI__builtin_amdgcn_frexp_expf: { + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp); + } + case AMDGPU::BI__builtin_amdgcn_fract: + case AMDGPU::BI__builtin_amdgcn_fractf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); + case AMDGPU::BI__builtin_amdgcn_lerp: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + case AMDGPU::BI__builtin_amdgcn_class: + case AMDGPU::BI__builtin_amdgcn_classf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); + + case AMDGPU::BI__builtin_amdgcn_read_exec: { + CallInst *CI = cast<CallInst>( + EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); + CI->setConvergent(); + return CI; + } + + // amdgcn workitem + case AMDGPU::BI__builtin_amdgcn_workitem_id_x: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); + case AMDGPU::BI__builtin_amdgcn_workitem_id_y: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); + case AMDGPU::BI__builtin_amdgcn_workitem_id_z: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); + + // r600 intrinsics + case AMDGPU::BI__builtin_r600_recipsqrt_ieee: + case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: + return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); + case AMDGPU::BI__builtin_r600_read_tidig_x: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); + case AMDGPU::BI__builtin_r600_read_tidig_y: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); + case AMDGPU::BI__builtin_r600_read_tidig_z: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); + default: return nullptr; } } @@ -7196,6 +7941,17 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + auto MakeLdg = [&](unsigned IntrinsicID) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + AlignmentSource AlignSource; + clang::CharUnits Align = + getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); + return Builder.CreateCall( + CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), + Ptr->getType()}), + {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); + }; + switch (BuiltinID) { case NVPTX::BI__nvvm_atom_add_gen_i: case NVPTX::BI__nvvm_atom_add_gen_l: @@ -7264,6 +8020,56 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(FnALAF32, {Ptr, Val}); } + case NVPTX::BI__nvvm_atom_inc_gen_ui: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *FnALI32 = + CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); + return Builder.CreateCall(FnALI32, {Ptr, Val}); + } + + case NVPTX::BI__nvvm_atom_dec_gen_ui: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *FnALD32 = + CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); + return Builder.CreateCall(FnALD32, {Ptr, Val}); + } + + case NVPTX::BI__nvvm_ldg_c: + case NVPTX::BI__nvvm_ldg_c2: + case NVPTX::BI__nvvm_ldg_c4: + case NVPTX::BI__nvvm_ldg_s: + case NVPTX::BI__nvvm_ldg_s2: + case NVPTX::BI__nvvm_ldg_s4: + case NVPTX::BI__nvvm_ldg_i: + case NVPTX::BI__nvvm_ldg_i2: + case NVPTX::BI__nvvm_ldg_i4: + case NVPTX::BI__nvvm_ldg_l: + case NVPTX::BI__nvvm_ldg_ll: + case NVPTX::BI__nvvm_ldg_ll2: + case NVPTX::BI__nvvm_ldg_uc: + case NVPTX::BI__nvvm_ldg_uc2: + case NVPTX::BI__nvvm_ldg_uc4: + case NVPTX::BI__nvvm_ldg_us: + case NVPTX::BI__nvvm_ldg_us2: + case NVPTX::BI__nvvm_ldg_us4: + case NVPTX::BI__nvvm_ldg_ui: + case NVPTX::BI__nvvm_ldg_ui2: + case NVPTX::BI__nvvm_ldg_ui4: + case NVPTX::BI__nvvm_ldg_ul: + case NVPTX::BI__nvvm_ldg_ull: + case NVPTX::BI__nvvm_ldg_ull2: + // PTX Interoperability section 2.2: "For a vector with an even number of + // elements, its alignment is set to number of elements times the alignment + // of its member: n*alignof(t)." + return MakeLdg(Intrinsic::nvvm_ldg_global_i); + case NVPTX::BI__nvvm_ldg_f: + case NVPTX::BI__nvvm_ldg_f2: + case NVPTX::BI__nvvm_ldg_f4: + case NVPTX::BI__nvvm_ldg_d: + case NVPTX::BI__nvvm_ldg_d2: + return MakeLdg(Intrinsic::nvvm_ldg_global_f); default: return nullptr; } @@ -7272,9 +8078,9 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_memory_size: { + case WebAssembly::BI__builtin_wasm_current_memory: { llvm::Type *ResultType = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_grow_memory: { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp new file mode 100644 index 0000000..ea3b888 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp @@ -0,0 +1,117 @@ +//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generates code for built-in CUDA calls which are not runtime-specific. +// (Runtime-specific codegen lives in CGCUDARuntime.) +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "clang/Basic/Builtins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/MathExtras.h" + +using namespace clang; +using namespace CodeGen; + +static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { + llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), + llvm::Type::getInt8PtrTy(M.getContext())}; + llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( + llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); + + if (auto* F = M.getFunction("vprintf")) { + // Our CUDA system header declares vprintf with the right signature, so + // nobody else should have been able to declare vprintf with a bogus + // signature. + assert(F->getFunctionType() == VprintfFuncType); + return F; + } + + // vprintf doesn't already exist; create a declaration and insert it into the + // module. + return llvm::Function::Create( + VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); +} + +// Transforms a call to printf into a call to the NVPTX vprintf syscall (which +// isn't particularly special; it's invoked just like a regular function). +// vprintf takes two args: A format string, and a pointer to a buffer containing +// the varargs. +// +// For example, the call +// +// printf("format string", arg1, arg2, arg3); +// +// is converted into something resembling +// +// struct Tmp { +// Arg1 a1; +// Arg2 a2; +// Arg3 a3; +// }; +// char* buf = alloca(sizeof(Tmp)); +// *(Tmp*)buf = {a1, a2, a3}; +// vprintf("format string", buf); +// +// buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the +// args is itself aligned to its preferred alignment. +// +// Note that by the time this function runs, E's args have already undergone the +// standard C vararg promotion (short -> int, float -> double, etc.). +RValue +CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue) { + assert(getLangOpts().CUDA); + assert(getLangOpts().CUDAIsDevice); + assert(E->getBuiltinCallee() == Builtin::BIprintf); + assert(E->getNumArgs() >= 1); // printf always has at least one arg. + + const llvm::DataLayout &DL = CGM.getDataLayout(); + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + + CallArgList Args; + EmitCallArgs(Args, + E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), + E->arguments(), E->getDirectCallee(), + /* ParamsToSkip = */ 0); + + // We don't know how to emit non-scalar varargs. + if (std::any_of(Args.begin() + 1, Args.end(), + [](const CallArg &A) { return !A.RV.isScalar(); })) { + CGM.ErrorUnsupported(E, "non-scalar arg to printf"); + return RValue::get(llvm::ConstantInt::get(IntTy, 0)); + } + + // Construct and fill the args buffer that we'll pass to vprintf. + llvm::Value *BufferPtr; + if (Args.size() <= 1) { + // If there are no args, pass a null pointer to vprintf. + BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); + } else { + llvm::SmallVector<llvm::Type *, 8> ArgTypes; + for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) + ArgTypes.push_back(Args[I].RV.getScalarVal()->getType()); + llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); + llvm::Value *Alloca = CreateTempAlloca(AllocaTy); + + for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { + llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); + llvm::Value *Arg = Args[I].RV.getScalarVal(); + Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType())); + } + BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); + } + + // Invoke vprintf and return. + llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); + return RValue::get( + Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr})); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp index 045e19b..6a04d4e 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp @@ -38,6 +38,7 @@ private: llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module llvm::SmallVector<llvm::Function *, 16> EmittedKernels; + llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; /// Keeps track of variables containing handles of GPU binaries. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() @@ -47,7 +48,7 @@ private: llvm::Constant *getLaunchFn() const; /// Creates a function to register all kernel stubs generated in this module. - llvm::Function *makeRegisterKernelsFn(); + llvm::Function *makeRegisterGlobalsFn(); /// Helper function that generates a constant string and returns a pointer to /// the start of the string. The result of this function can be used anywhere @@ -68,6 +69,10 @@ public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; + void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { + DeviceVars.push_back(std::make_pair(&Var, Flags)); + } + /// Creates module constructor function llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function @@ -93,10 +98,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { // cudaError_t cudaSetupArgument(void *, size_t, size_t) - std::vector<llvm::Type*> Params; - Params.push_back(VoidPtrTy); - Params.push_back(SizeTy); - Params.push_back(SizeTy); + llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy}; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, Params, false), "cudaSetupArgument"); @@ -158,19 +160,28 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, CGF.EmitBlock(EndBlock); } -/// Creates internal function to register all kernel stubs generated in this -/// module with the CUDA runtime. +/// Creates a function that sets up state on the host side for CUDA objects that +/// have a presence on both the host and device sides. Specifically, registers +/// the host side of kernel functions and device global variables with the CUDA +/// runtime. /// \code -/// void __cuda_register_kernels(void** GpuBinaryHandle) { +/// void __cuda_register_globals(void** GpuBinaryHandle) { /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...); /// ... /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...); +/// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...); +/// ... +/// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...); /// } /// \endcode -llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { +llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { + // No need to register anything + if (EmittedKernels.empty() && DeviceVars.empty()) + return nullptr; + llvm::Function *RegisterKernelsFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_register_kernels", &TheModule); + llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule); llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc); CGBuilderTy Builder(CGM, Context); @@ -178,7 +189,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { // void __cudaRegisterFunction(void **, const char *, char *, const char *, // int, uint3*, uint3*, dim3*, dim3*, int*) - std::vector<llvm::Type *> RegisterFuncParams = { + llvm::Type *RegisterFuncParams[] = { VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()}; llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction( @@ -186,18 +197,44 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { "__cudaRegisterFunction"); // Extract GpuBinaryHandle passed as the first argument passed to - // __cuda_register_kernels() and generate __cudaRegisterFunction() call for + // __cuda_register_globals() and generate __cudaRegisterFunction() call for // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); for (llvm::Function *Kernel : EmittedKernels) { llvm::Constant *KernelName = makeConstantString(Kernel->getName()); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); - llvm::Value *args[] = { + llvm::Value *Args[] = { &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy), KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr, NullPtr, NullPtr, NullPtr, llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; - Builder.CreateCall(RegisterFunc, args); + Builder.CreateCall(RegisterFunc, Args); + } + + // void __cudaRegisterVar(void **, char *, char *, const char *, + // int, int, int, int) + llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy, + CharPtrTy, IntTy, IntTy, + IntTy, IntTy}; + llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, RegisterVarParams, false), + "__cudaRegisterVar"); + for (auto &Pair : DeviceVars) { + llvm::GlobalVariable *Var = Pair.first; + unsigned Flags = Pair.second; + llvm::Constant *VarName = makeConstantString(Var->getName()); + uint64_t VarSize = + CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); + llvm::Value *Args[] = { + &GpuBinaryHandlePtr, + Builder.CreateBitCast(Var, VoidPtrTy), + VarName, + VarName, + llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0), + llvm::ConstantInt::get(IntTy, VarSize), + llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0), + llvm::ConstantInt::get(IntTy, 0)}; + Builder.CreateCall(RegisterVar, Args); } Builder.CreateRetVoid(); @@ -208,15 +245,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { /// \code /// void __cuda_module_ctor(void*) { /// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0); -/// __cuda_register_kernels(Handle0); +/// __cuda_register_globals(Handle0); /// ... /// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN); -/// __cuda_register_kernels(HandleN); +/// __cuda_register_globals(HandleN); /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { - // void __cuda_register_kernels(void* handle); - llvm::Function *RegisterKernelsFunc = makeRegisterKernelsFn(); + // No need to generate ctors/dtors if there are no GPU binaries. + if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty()) + return nullptr; + + // void __cuda_register_globals(void* handle); + llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // void ** __cudaRegisterFatBinary(void *); llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), @@ -259,6 +300,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage, llvm::ConstantStruct::get(FatbinWrapperTy, Values), "__cuda_fatbin_wrapper"); + // NVIDIA's cuobjdump looks for fatbins in this section. + FatbinWrapper->setSection(".nvFatBinSegment"); // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper); llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( @@ -270,8 +313,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle, CGM.getPointerAlign()); - // Call __cuda_register_kernels(GpuBinaryHandle); - CtorBuilder.CreateCall(RegisterKernelsFunc, RegisterFatbinCall); + // Call __cuda_register_globals(GpuBinaryHandle); + if (RegisterGlobalsFunc) + CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); // Save GpuBinaryHandle so we can unregister it in destructor. GpuBinaryHandles.push_back(GpuBinaryHandle); @@ -291,6 +335,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { + // No need for destructor if we don't have handles to unregister. + if (GpuBinaryHandles.empty()) + return nullptr; + // void __cudaUnregisterFatBinary(void ** handle); llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h index dcacf97..0168f4f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h @@ -18,6 +18,7 @@ namespace llvm { class Function; +class GlobalVariable; } namespace clang { @@ -37,6 +38,12 @@ protected: CodeGenModule &CGM; public: + // Global variable properties that must be passed to CUDA runtime. + enum DeviceVarFlags { + ExternDeviceVar = 0x01, // extern + ConstantDeviceVar = 0x02, // __constant__ + }; + CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {} virtual ~CGCUDARuntime(); @@ -46,6 +53,7 @@ public: /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; + virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp index 6847df9..40f1bc4 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp @@ -164,7 +164,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, // members with attribute "AlwaysInline" and expect no reference to // be generated. It is desirable to reenable this optimisation after // corresponding LLVM changes. - Replacements[MangledName] = Aliasee; + addReplacement(MangledName, Aliasee); return false; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h index 3f240b1..9e10ec0 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h @@ -106,6 +106,16 @@ public: virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; } + /// Returns true if the target allows calling a function through a pointer + /// with a different signature than the actual function (or equivalently, + /// bitcasting a function or function pointer to a different function type). + /// In principle in the most general case this could depend on the target, the + /// calling convention, and the actual types of the arguments and return + /// value. Here it just means whether the signature mismatch could *ever* be + /// allowed; in other words, does the target do strict checking of signatures + /// for all calls. + virtual bool canCallMismatchedFunctionType() const { return true; } + /// If the C++ ABI requires the given type be returned in a particular way, /// this method sets RetAI and returns true. virtual bool classifyReturnType(CGFunctionInfo &FI) const = 0; @@ -326,6 +336,12 @@ public: virtual void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy, FunctionArgList &Params) = 0; + /// Get the ABI-specific "this" parameter adjustment to apply in the prologue + /// of a virtual function. + virtual CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) { + return CharUnits::Zero(); + } + /// Perform ABI-specific "this" parameter adjustment in a virtual function /// prologue. virtual llvm::Value *adjustThisParameterInVirtualFunctionPrologue( diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp index 9359850..242b596 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp @@ -14,6 +14,7 @@ #include "CGCall.h" #include "ABIInfo.h" +#include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CodeGenFunction.h" @@ -25,9 +26,11 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -39,7 +42,7 @@ using namespace CodeGen; /***/ -static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) { +unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { switch (CC) { default: return llvm::CallingConv::C; case CC_X86StdCall: return llvm::CallingConv::X86_StdCall; @@ -55,7 +58,10 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) { // TODO: Add support for __vectorcall to LLVM. case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; - case CC_SpirKernel: return llvm::CallingConv::SPIR_KERNEL; + case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + case CC_PreserveMost: return llvm::CallingConv::PreserveMost; + case CC_PreserveAll: return llvm::CallingConv::PreserveAll; + case CC_Swift: return llvm::CallingConv::Swift; } } @@ -90,15 +96,25 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) { return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(), /*instanceMethod=*/false, /*chainCall=*/false, None, - FTNP->getExtInfo(), RequiredArgs(0)); + FTNP->getExtInfo(), {}, RequiredArgs(0)); } /// Adds the formal paramaters in FPT to the given prefix. If any parameter in /// FPT has pass_object_size attrs, then we'll add parameters for those, too. static void appendParameterTypes(const CodeGenTypes &CGT, SmallVectorImpl<CanQualType> &prefix, - const CanQual<FunctionProtoType> &FPT, + SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, + CanQual<FunctionProtoType> FPT, const FunctionDecl *FD) { + // Fill out paramInfos. + if (FPT->hasExtParameterInfos() || !paramInfos.empty()) { + assert(paramInfos.size() <= prefix.size()); + auto protoParamInfos = FPT->getExtParameterInfos(); + paramInfos.reserve(prefix.size() + protoParamInfos.size()); + paramInfos.resize(prefix.size()); + paramInfos.append(protoParamInfos.begin(), protoParamInfos.end()); + } + // Fast path: unknown target. if (FD == nullptr) { prefix.append(FPT->param_type_begin(), FPT->param_type_end()); @@ -125,13 +141,17 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, SmallVectorImpl<CanQualType> &prefix, CanQual<FunctionProtoType> FTP, const FunctionDecl *FD) { - RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size()); + SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; + RequiredArgs Required = + RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD); // FIXME: Kill copy. - appendParameterTypes(CGT, prefix, FTP, FD); + appendParameterTypes(CGT, prefix, paramInfos, FTP, FD); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); + return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod, /*chainCall=*/false, prefix, - FTP->getExtInfo(), required); + FTP->getExtInfo(), paramInfos, + Required); } /// Arrange the argument and result information for a value of the @@ -173,6 +193,12 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { if (D->hasAttr<SysVABIAttr>()) return IsWindows ? CC_X86_64SysV : CC_C; + if (D->hasAttr<PreserveMostAttr>()) + return CC_PreserveMost; + + if (D->hasAttr<PreserveAllAttr>()) + return CC_PreserveAll; + return CC_C; } @@ -219,16 +245,33 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { return arrangeFreeFunctionType(prototype, MD); } +bool CodeGenTypes::inheritingCtorHasParams( + const InheritedConstructor &Inherited, CXXCtorType Type) { + // Parameters are unnecessary if we're constructing a base class subobject + // and the inherited constructor lives in a virtual base. + return Type == Ctor_Complete || + !Inherited.getShadowDecl()->constructsVirtualBase() || + !Target.getCXXABI().hasConstructorVariants(); + } + const CGFunctionInfo & CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, StructorType Type) { SmallVector<CanQualType, 16> argTypes; + SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; argTypes.push_back(GetThisType(Context, MD->getParent())); + bool PassParams = true; + GlobalDecl GD; if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { GD = GlobalDecl(CD, toCXXCtorType(Type)); + + // A base class inheriting constructor doesn't get forwarded arguments + // needed to construct a virtual base (or base class thereof). + if (auto Inherited = CD->getInheritedConstructor()) + PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type)); } else { auto *DD = dyn_cast<CXXDestructorDecl>(MD); GD = GlobalDecl(DD, toCXXDtorType(Type)); @@ -237,12 +280,14 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, CanQual<FunctionProtoType> FTP = GetFormalType(MD); // Add the formal parameters. - appendParameterTypes(*this, argTypes, FTP, MD); + if (PassParams) + appendParameterTypes(*this, argTypes, paramInfos, FTP, MD); TheCXXABI.buildStructorSignature(MD, Type, argTypes); RequiredArgs required = - (MD->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All); + (PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size()) + : RequiredArgs::All); FunctionType::ExtInfo extInfo = FTP->getExtInfo(); CanQualType resultType = TheCXXABI.HasThisReturn(GD) @@ -252,7 +297,53 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, : Context.VoidTy; return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true, /*chainCall=*/false, argTypes, extInfo, - required); + paramInfos, required); +} + +static SmallVector<CanQualType, 16> +getArgTypesForCall(ASTContext &ctx, const CallArgList &args) { + SmallVector<CanQualType, 16> argTypes; + for (auto &arg : args) + argTypes.push_back(ctx.getCanonicalParamType(arg.Ty)); + return argTypes; +} + +static SmallVector<CanQualType, 16> +getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) { + SmallVector<CanQualType, 16> argTypes; + for (auto &arg : args) + argTypes.push_back(ctx.getCanonicalParamType(arg->getType())); + return argTypes; +} + +static void addExtParameterInfosForCall( + llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, + const FunctionProtoType *proto, + unsigned prefixArgs, + unsigned totalArgs) { + assert(proto->hasExtParameterInfos()); + assert(paramInfos.size() <= prefixArgs); + assert(proto->getNumParams() + prefixArgs <= totalArgs); + + // Add default infos for any prefix args that don't already have infos. + paramInfos.resize(prefixArgs); + + // Add infos for the prototype. + auto protoInfos = proto->getExtParameterInfos(); + paramInfos.append(protoInfos.begin(), protoInfos.end()); + + // Add default infos for the variadic arguments. + paramInfos.resize(totalArgs); +} + +static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> +getExtParameterInfosForCall(const FunctionProtoType *proto, + unsigned prefixArgs, unsigned totalArgs) { + llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> result; + if (proto->hasExtParameterInfos()) { + addExtParameterInfosForCall(result, proto, prefixArgs, totalArgs); + } + return result; } /// Arrange a call to a C++ method, passing the given arguments. @@ -267,7 +358,7 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); CanQual<FunctionProtoType> FPT = GetFormalType(D); - RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs); + RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D); GlobalDecl GD(D, CtorKind); CanQualType ResultType = TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() @@ -276,9 +367,11 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, : Context.VoidTy; FunctionType::ExtInfo Info = FPT->getExtInfo(); + auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs, + ArgTypes.size()); return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true, /*chainCall=*/false, ArgTypes, Info, - Required); + ParamInfos, Required); } /// Arrange the argument and result information for the declaration or @@ -299,7 +392,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>(); return arrangeLLVMFunctionInfo( noProto->getReturnType(), /*instanceMethod=*/false, - /*chainCall=*/false, None, noProto->getExtInfo(), RequiredArgs::All); + /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); } assert(isa<FunctionProtoType>(FTy)); @@ -328,7 +421,7 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, argTys.push_back(Context.getCanonicalParamType(receiverType)); argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType())); // FIXME: Kill copy? - for (const auto *I : MD->params()) { + for (const auto *I : MD->parameters()) { argTys.push_back(Context.getCanonicalParamType(I->getType())); } @@ -345,7 +438,18 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, return arrangeLLVMFunctionInfo( GetReturnType(MD->getReturnType()), /*instanceMethod=*/false, - /*chainCall=*/false, argTys, einfo, required); + /*chainCall=*/false, argTys, einfo, {}, required); +} + +const CGFunctionInfo & +CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType, + const CallArgList &args) { + auto argTypes = getArgTypesForCall(Context, args); + FunctionType::ExtInfo einfo; + + return arrangeLLVMFunctionInfo( + GetReturnType(returnType), /*instanceMethod=*/false, + /*chainCall=*/false, argTypes, einfo, {}, RequiredArgs::All); } const CGFunctionInfo & @@ -374,7 +478,7 @@ CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) { CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) }; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, ArgTys, - FTP->getExtInfo(), RequiredArgs(1)); + FTP->getExtInfo(), {}, RequiredArgs(1)); } const CGFunctionInfo & @@ -394,7 +498,8 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, /*IsVariadic=*/false, /*IsCXXMethod=*/true); return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true, /*chainCall=*/false, ArgTys, - FunctionType::ExtInfo(CC), RequiredArgs::All); + FunctionType::ExtInfo(CC), {}, + RequiredArgs::All); } /// Arrange a call as unto a free function, except possibly with an @@ -408,6 +513,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, bool chainCall) { assert(args.size() >= numExtraRequiredArgs); + llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; + // In most cases, there are no optional arguments. RequiredArgs required = RequiredArgs::All; @@ -417,6 +524,10 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, if (proto->isVariadic()) required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs); + if (proto->hasExtParameterInfos()) + addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs, + args.size()); + // If we don't have a prototype at all, but we're supposed to // explicitly use the variadic convention for unprototyped calls, // treat all of the arguments as required but preserve the nominal @@ -433,7 +544,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty)); return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()), /*instanceMethod=*/false, chainCall, - argTypes, fnType->getExtInfo(), required); + argTypes, fnType->getExtInfo(), paramInfos, + required); } /// Figure out the rules for calling a function with the given formal @@ -448,7 +560,7 @@ CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args, chainCall ? 1 : 0, chainCall); } -/// A block function call is essentially a free-function call with an +/// A block function is essentially a free function with an /// extra implicit argument. const CGFunctionInfo & CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args, @@ -458,54 +570,99 @@ CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args, } const CGFunctionInfo & -CodeGenTypes::arrangeFreeFunctionCall(QualType resultType, - const CallArgList &args, - FunctionType::ExtInfo info, - RequiredArgs required) { +CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto, + const FunctionArgList ¶ms) { + auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size()); + auto argTypes = getArgTypesForDeclaration(Context, params); + + return arrangeLLVMFunctionInfo( + GetReturnType(proto->getReturnType()), + /*instanceMethod*/ false, /*chainCall*/ false, argTypes, + proto->getExtInfo(), paramInfos, + RequiredArgs::forPrototypePlus(proto, 1, nullptr)); +} + +const CGFunctionInfo & +CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType, + const CallArgList &args) { // FIXME: Kill copy. SmallVector<CanQualType, 16> argTypes; for (const auto &Arg : args) argTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); return arrangeLLVMFunctionInfo( GetReturnType(resultType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, info, required); + /*chainCall=*/false, argTypes, FunctionType::ExtInfo(), + /*paramInfos=*/ {}, RequiredArgs::All); } -/// Arrange a call to a C++ method, passing the given arguments. const CGFunctionInfo & -CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, - const FunctionProtoType *FPT, - RequiredArgs required) { - // FIXME: Kill copy. - SmallVector<CanQualType, 16> argTypes; - for (const auto &Arg : args) - argTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); +CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType, + const FunctionArgList &args) { + auto argTypes = getArgTypesForDeclaration(Context, args); + + return arrangeLLVMFunctionInfo( + GetReturnType(resultType), /*instanceMethod=*/false, /*chainCall=*/false, + argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); +} - FunctionType::ExtInfo info = FPT->getExtInfo(); +const CGFunctionInfo & +CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType, + ArrayRef<CanQualType> argTypes) { return arrangeLLVMFunctionInfo( - GetReturnType(FPT->getReturnType()), /*instanceMethod=*/true, - /*chainCall=*/false, argTypes, info, required); + resultType, /*instanceMethod=*/false, /*chainCall=*/false, + argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); } -const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration( - QualType resultType, const FunctionArgList &args, - const FunctionType::ExtInfo &info, bool isVariadic) { +/// Arrange a call to a C++ method, passing the given arguments. +const CGFunctionInfo & +CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, + const FunctionProtoType *proto, + RequiredArgs required) { + unsigned numRequiredArgs = + (proto->isVariadic() ? required.getNumRequiredArgs() : args.size()); + unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams(); + auto paramInfos = + getExtParameterInfosForCall(proto, numPrefixArgs, args.size()); + // FIXME: Kill copy. - SmallVector<CanQualType, 16> argTypes; - for (auto Arg : args) - argTypes.push_back(Context.getCanonicalParamType(Arg->getType())); + auto argTypes = getArgTypesForCall(Context, args); - RequiredArgs required = - (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All); + FunctionType::ExtInfo info = proto->getExtInfo(); return arrangeLLVMFunctionInfo( - GetReturnType(resultType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, info, required); + GetReturnType(proto->getReturnType()), /*instanceMethod=*/true, + /*chainCall=*/false, argTypes, info, paramInfos, required); } const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() { return arrangeLLVMFunctionInfo( getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, - None, FunctionType::ExtInfo(), RequiredArgs::All); + None, FunctionType::ExtInfo(), {}, RequiredArgs::All); +} + +const CGFunctionInfo & +CodeGenTypes::arrangeCall(const CGFunctionInfo &signature, + const CallArgList &args) { + assert(signature.arg_size() <= args.size()); + if (signature.arg_size() == args.size()) + return signature; + + SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; + auto sigParamInfos = signature.getExtParameterInfos(); + if (!sigParamInfos.empty()) { + paramInfos.append(sigParamInfos.begin(), sigParamInfos.end()); + paramInfos.resize(args.size()); + } + + auto argTypes = getArgTypesForCall(Context, args); + + assert(signature.getRequiredArgs().allowsOptionalArgs()); + return arrangeLLVMFunctionInfo(signature.getReturnType(), + signature.isInstanceMethod(), + signature.isChainCall(), + argTypes, + signature.getExtInfo(), + paramInfos, + signature.getRequiredArgs()); } /// Arrange the argument and result information for an abstract value @@ -517,25 +674,26 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, bool chainCall, ArrayRef<CanQualType> argTypes, FunctionType::ExtInfo info, + ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs required) { assert(std::all_of(argTypes.begin(), argTypes.end(), std::mem_fun_ref(&CanQualType::isCanonicalAsParam))); - unsigned CC = ClangCallConvToLLVMCallConv(info.getCC()); - // Lookup or create unique function info. llvm::FoldingSetNodeID ID; - CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, required, - resultType, argTypes); + CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos, + required, resultType, argTypes); void *insertPos = nullptr; CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos); if (FI) return *FI; + unsigned CC = ClangCallConvToLLVMCallConv(info.getCC()); + // Construct the function info. We co-allocate the ArgInfos. FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info, - resultType, argTypes, required); + paramInfos, resultType, argTypes, required); FunctionInfos.InsertNode(FI, insertPos); bool inserted = FunctionsBeingProcessed.insert(FI).second; @@ -543,7 +701,11 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, assert(inserted && "Recursively being processed?"); // Compute ABI information. - getABIInfo().computeInfo(*FI); + if (info.getCC() != CC_Swift) { + getABIInfo().computeInfo(*FI); + } else { + swiftcall::computeABIInfo(CGM, *FI); + } // Loop over all of the computed argument and return value info. If any of // them are direct or extend without a specified coerce type, specify the @@ -566,11 +728,16 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, bool instanceMethod, bool chainCall, const FunctionType::ExtInfo &info, + ArrayRef<ExtParameterInfo> paramInfos, CanQualType resultType, ArrayRef<CanQualType> argTypes, RequiredArgs required) { - void *buffer = operator new(sizeof(CGFunctionInfo) + - sizeof(ArgInfo) * (argTypes.size() + 1)); + assert(paramInfos.empty() || paramInfos.size() == argTypes.size()); + + void *buffer = + operator new(totalSizeToAlloc<ArgInfo, ExtParameterInfo>( + argTypes.size() + 1, paramInfos.size())); + CGFunctionInfo *FI = new(buffer) CGFunctionInfo(); FI->CallingConvention = llvmCC; FI->EffectiveCallingConvention = llvmCC; @@ -585,9 +752,12 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->ArgStruct = nullptr; FI->ArgStructAlign = 0; FI->NumArgs = argTypes.size(); + FI->HasExtParameterInfos = !paramInfos.empty(); FI->getArgsBuffer()[0].type = resultType; for (unsigned i = 0, e = argTypes.size(); i != e; ++i) FI->getArgsBuffer()[i + 1].type = argTypes[i]; + for (unsigned i = 0, e = paramInfos.size(); i != e; ++i) + FI->getExtParameterInfosBuffer()[i] = paramInfos[i]; return FI; } @@ -634,7 +804,8 @@ struct RecordExpansion : TypeExpansion { RecordExpansion(SmallVector<const CXXBaseSpecifier *, 1> &&Bases, SmallVector<const FieldDecl *, 1> &&Fields) - : TypeExpansion(TEK_Record), Bases(Bases), Fields(Fields) {} + : TypeExpansion(TEK_Record), Bases(std::move(Bases)), + Fields(std::move(Fields)) {} static bool classof(const TypeExpansion *TE) { return TE->Kind == TEK_Record; } @@ -773,7 +944,7 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF, } void CodeGenFunction::ExpandTypeFromArgs( - QualType Ty, LValue LV, SmallVectorImpl<llvm::Argument *>::iterator &AI) { + QualType Ty, LValue LV, SmallVectorImpl<llvm::Value *>::iterator &AI) { assert(LV.isSimple() && "Unexpected non-simple lvalue during struct expansion."); @@ -798,7 +969,7 @@ void CodeGenFunction::ExpandTypeFromArgs( } for (auto FD : RExp->Fields) { // FIXME: What are the right qualifiers here? - LValue SubLV = EmitLValueForField(LV, FD); + LValue SubLV = EmitLValueForFieldInitialization(LV, FD); ExpandTypeFromArgs(FD->getType(), SubLV, AI); } } else if (isa<ComplexExpansion>(Exp.get())) { @@ -1220,11 +1391,13 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context, // ignore and inalloca doesn't have matching LLVM parameters. IRArgs.NumberOfArgs = 0; break; - case ABIArgInfo::Expand: { + case ABIArgInfo::CoerceAndExpand: + IRArgs.NumberOfArgs = AI.getCoerceAndExpandTypeSequence().size(); + break; + case ABIArgInfo::Expand: IRArgs.NumberOfArgs = getExpansionSize(ArgType, Context); break; } - } if (IRArgs.NumberOfArgs > 0) { IRArgs.FirstArgIndex = IRArgNo; @@ -1323,6 +1496,10 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { case ABIArgInfo::Ignore: resultType = llvm::Type::getVoidTy(getLLVMContext()); break; + + case ABIArgInfo::CoerceAndExpand: + resultType = retAI.getUnpaddedCoerceAndExpandType(); + break; } ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI, true); @@ -1390,6 +1567,15 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { break; } + case ABIArgInfo::CoerceAndExpand: { + auto ArgTypesIter = ArgTypes.begin() + FirstIRArg; + for (auto EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) { + *ArgTypesIter++ = EltTy; + } + assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs); + break; + } + case ABIArgInfo::Expand: auto ArgTypesIter = ArgTypes.begin() + FirstIRArg; getExpandedTypes(it->type, ArgTypesIter); @@ -1450,6 +1636,7 @@ void CodeGenModule::ConstructAttributeList( const Decl *TargetDecl = CalleeInfo.getCalleeDecl(); + bool HasAnyX86InterruptAttr = false; // FIXME: handle sseregparm someday... if (TargetDecl) { if (TargetDecl->hasAttr<ReturnsTwiceAttr>()) @@ -1487,6 +1674,7 @@ void CodeGenModule::ConstructAttributeList( if (TargetDecl->hasAttr<ReturnsNonNullAttr>()) RetAttrs.addAttribute(llvm::Attribute::NonNull); + HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>(); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); } @@ -1526,10 +1714,11 @@ void CodeGenModule::ConstructAttributeList( } bool DisableTailCalls = - CodeGenOpts.DisableTailCalls || + CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr || (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>()); - FuncAttrs.addAttribute("disable-tail-calls", - llvm::toStringRef(DisableTailCalls)); + FuncAttrs.addAttribute( + "disable-tail-calls", + llvm::toStringRef(DisableTailCalls)); FuncAttrs.addAttribute("less-precise-fpmad", llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); @@ -1543,9 +1732,13 @@ void CodeGenModule::ConstructAttributeList( llvm::toStringRef(CodeGenOpts.SoftFloat)); FuncAttrs.addAttribute("stack-protector-buffer-size", llvm::utostr(CodeGenOpts.SSPBufferSize)); + FuncAttrs.addAttribute("no-signed-zeros-fp-math", + llvm::toStringRef(CodeGenOpts.NoSignedZeros)); if (CodeGenOpts.StackRealignment) FuncAttrs.addAttribute("stackrealign"); + if (CodeGenOpts.Backchain) + FuncAttrs.addAttribute("backchain"); // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then @@ -1594,6 +1787,18 @@ void CodeGenModule::ConstructAttributeList( } } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions and calls in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as + // __syncthreads(), and so can't have certain optimizations applied around + // them). LLVM will remove this attribute where it safely can. + FuncAttrs.addAttribute(llvm::Attribute::Convergent); + + // Respect -fcuda-flush-denormals-to-zero. + if (getLangOpts().CUDADeviceFlushDenormalsToZero) + FuncAttrs.addAttribute("nvptx-f32ftz", "true"); + } + ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); @@ -1620,6 +1825,9 @@ void CodeGenModule::ConstructAttributeList( break; } + case ABIArgInfo::CoerceAndExpand: + break; + case ABIArgInfo::Expand: llvm_unreachable("Invalid ABI kind for return argument"); } @@ -1639,10 +1847,13 @@ void CodeGenModule::ConstructAttributeList( getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs)); } + bool hasUsedSRet = false; + // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { llvm::AttrBuilder SRETAttrs; SRETAttrs.addAttribute(llvm::Attribute::StructRet); + hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); PAL.push_back(llvm::AttributeSet::get( @@ -1727,7 +1938,8 @@ void CodeGenModule::ConstructAttributeList( } case ABIArgInfo::Ignore: case ABIArgInfo::Expand: - continue; + case ABIArgInfo::CoerceAndExpand: + break; case ABIArgInfo::InAlloca: // inalloca disables readnone and readonly. @@ -1745,6 +1957,41 @@ void CodeGenModule::ConstructAttributeList( Attrs.addAttribute(llvm::Attribute::NonNull); } + switch (FI.getExtParameterInfo(ArgNo).getABI()) { + case ParameterABI::Ordinary: + break; + + case ParameterABI::SwiftIndirectResult: { + // Add 'sret' if we haven't already used it for something, but + // only if the result is void. + if (!hasUsedSRet && RetTy->isVoidType()) { + Attrs.addAttribute(llvm::Attribute::StructRet); + hasUsedSRet = true; + } + + // Add 'noalias' in either case. + Attrs.addAttribute(llvm::Attribute::NoAlias); + + // Add 'dereferenceable' and 'alignment'. + auto PTy = ParamType->getPointeeType(); + if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { + auto info = getContext().getTypeInfoInChars(PTy); + Attrs.addDereferenceableAttr(info.first.getQuantity()); + Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(), + info.second.getQuantity())); + } + break; + } + + case ParameterABI::SwiftErrorResult: + Attrs.addAttribute(llvm::Attribute::SwiftError); + break; + + case ParameterABI::SwiftContext: + Attrs.addAttribute(llvm::Attribute::SwiftSelf); + break; + } + if (Attrs.hasAttributes()) { unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); @@ -1810,6 +2057,18 @@ static const NonNullAttr *getNonNullAttr(const Decl *FD, const ParmVarDecl *PVD, return nullptr; } +namespace { + struct CopyBackSwiftError final : EHScopeStack::Cleanup { + Address Temp; + Address Arg; + CopyBackSwiftError(Address temp, Address arg) : Temp(temp), Arg(arg) {} + void Emit(CodeGenFunction &CGF, Flags flags) override { + llvm::Value *errorValue = CGF.Builder.CreateLoad(Temp); + CGF.Builder.CreateStore(errorValue, Arg); + } + }; +} + void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::Function *Fn, const FunctionArgList &Args) { @@ -1835,7 +2094,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI); // Flattened function arguments. - SmallVector<llvm::Argument *, 16> FnArgs; + SmallVector<llvm::Value *, 16> FnArgs; FnArgs.reserve(IRFunctionArgs.totalIRArgs()); for (auto &Arg : Fn->args()) { FnArgs.push_back(&Arg); @@ -1856,7 +2115,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // Name the struct return parameter. if (IRFunctionArgs.hasSRetArg()) { - auto AI = FnArgs[IRFunctionArgs.getSRetArgNo()]; + auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]); AI->setName("agg.result"); AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1, llvm::Attribute::NoAlias)); @@ -1944,8 +2203,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, ArgI.getCoerceToType() == ConvertType(Ty) && ArgI.getDirectOffset() == 0) { assert(NumIRArgs == 1); - auto AI = FnArgs[FirstIRArg]; - llvm::Value *V = AI; + llvm::Value *V = FnArgs[FirstIRArg]; + auto AI = cast<llvm::Argument>(V); if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) { if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), @@ -2014,6 +2273,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, AI->getArgNo() + 1, llvm::Attribute::NoAlias)); + // LLVM expects swifterror parameters to be used in very restricted + // ways. Copy the value into a less-restricted temporary. + if (FI.getExtParameterInfo(ArgNo).getABI() + == ParameterABI::SwiftErrorResult) { + QualType pointeeTy = Ty->getPointeeType(); + assert(pointeeTy->isPointerType()); + Address temp = + CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp"); + Address arg = Address(V, getContext().getTypeAlignInChars(pointeeTy)); + llvm::Value *incomingErrorValue = Builder.CreateLoad(arg); + Builder.CreateStore(incomingErrorValue, temp); + V = temp.getPointer(); + + // Push a cleanup to copy the value back at the end of the function. + // The convention does not guarantee that the value will be written + // back if the function exits with an unwind exception. + EHStack.pushCleanup<CopyBackSwiftError>(NormalCleanup, temp, arg); + } + // Ensure the argument is the correct type. if (V->getType() != ArgI.getCoerceToType()) V = Builder.CreateBitCast(V, ArgI.getCoerceToType()); @@ -2100,6 +2378,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, break; } + case ABIArgInfo::CoerceAndExpand: { + // Reconstruct into a temporary. + Address alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg)); + ArgVals.push_back(ParamValue::forIndirect(alloca)); + + auto coercionType = ArgI.getCoerceAndExpandType(); + alloca = Builder.CreateElementBitCast(alloca, coercionType); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + unsigned argIndex = FirstIRArg; + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + llvm::Type *eltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) + continue; + + auto eltAddr = Builder.CreateStructGEP(alloca, i, layout); + auto elt = FnArgs[argIndex++]; + Builder.CreateStore(elt, eltAddr); + } + assert(argIndex == FirstIRArg + NumIRArgs); + break; + } + case ABIArgInfo::Expand: { // If this structure was expanded into multiple arguments then // we need to create a temporary and reconstruct it from the @@ -2462,9 +2763,26 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, // In ARC, end functions that return a retainable type with a call // to objc_autoreleaseReturnValue. if (AutoreleaseResult) { +#ifndef NDEBUG + // Type::isObjCRetainabletype has to be called on a QualType that hasn't + // been stripped of the typedefs, so we cannot use RetTy here. Get the + // original return type of FunctionDecl, CurCodeDecl, and BlockDecl from + // CurCodeDecl or BlockInfo. + QualType RT; + + if (auto *FD = dyn_cast<FunctionDecl>(CurCodeDecl)) + RT = FD->getReturnType(); + else if (auto *MD = dyn_cast<ObjCMethodDecl>(CurCodeDecl)) + RT = MD->getReturnType(); + else if (isa<BlockDecl>(CurCodeDecl)) + RT = BlockInfo->BlockExpression->getFunctionType()->getReturnType(); + else + llvm_unreachable("Unexpected function/method type"); + assert(getLangOpts().ObjCAutoRefCount && !FI.isReturnsRetained() && - RetTy->isObjCRetainableType()); + RT->isObjCRetainableType()); +#endif RV = emitAutoreleaseOfResult(*this, RV); } @@ -2473,6 +2791,40 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case ABIArgInfo::Ignore: break; + case ABIArgInfo::CoerceAndExpand: { + auto coercionType = RetAI.getCoerceAndExpandType(); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + // Load all of the coerced elements out into results. + llvm::SmallVector<llvm::Value*, 4> results; + Address addr = Builder.CreateElementBitCast(ReturnValue, coercionType); + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + auto coercedEltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType)) + continue; + + auto eltAddr = Builder.CreateStructGEP(addr, i, layout); + auto elt = Builder.CreateLoad(eltAddr); + results.push_back(elt); + } + + // If we have one result, it's the single direct result type. + if (results.size() == 1) { + RV = results[0]; + + // Otherwise, we need to make a first-class aggregate. + } else { + // Construct a return type that lacks padding elements. + llvm::Type *returnType = RetAI.getUnpaddedCoerceAndExpandType(); + + RV = llvm::UndefValue::get(returnType); + for (unsigned i = 0, e = results.size(); i != e; ++i) { + RV = Builder.CreateInsertValue(RV, results[i], i); + } + } + break; + } + case ABIArgInfo::Expand: llvm_unreachable("Invalid ABI kind for return argument"); } @@ -2536,23 +2888,15 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, QualType type = param->getType(); - // For the most part, we just need to load the alloca, except: - // 1) aggregate r-values are actually pointers to temporaries, and - // 2) references to non-scalars are pointers directly to the aggregate. - // I don't know why references to scalars are different here. - if (const ReferenceType *ref = type->getAs<ReferenceType>()) { - if (!hasScalarEvaluationKind(ref->getPointeeType())) - return args.add(RValue::getAggregate(local), type); - - // Locals which are references to scalars are represented - // with allocas holding the pointer. - return args.add(RValue::get(Builder.CreateLoad(local)), type); - } - assert(!isInAllocaArgument(CGM.getCXXABI(), type) && "cannot emit delegate call arguments for inalloca arguments!"); - args.add(convertTempToRValue(local, type, loc), type); + // For the most part, we just need to load the alloca, except that + // aggregate r-values are actually pointers to temporaries. + if (type->isReferenceType()) + args.add(RValue::get(Builder.CreateLoad(local)), type); + else + args.add(convertTempToRValue(local, type, loc), type); } static bool isProvablyNull(llvm::Value *addr) { @@ -2863,10 +3207,10 @@ void CodeGenFunction::EmitCallArgs( size_t CallArgsStart = Args.size(); for (int I = ArgTypes.size() - 1; I >= 0; --I) { CallExpr::const_arg_iterator Arg = ArgRange.begin() + I; + MaybeEmitImplicitObjectSize(I, *Arg); EmitCallArg(Args, *Arg, ArgTypes[I]); EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], (*Arg)->getExprLoc(), CalleeDecl, ParamsToSkip + I); - MaybeEmitImplicitObjectSize(I, *Arg); } // Un-reverse the arguments we just evaluated so they match up with the LLVM @@ -3046,24 +3390,13 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, return EmitRuntimeCall(callee, None, name); } -/// Emits a simple call (never an invoke) to the given runtime -/// function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, - const llvm::Twine &name) { - llvm::CallInst *call = Builder.CreateCall(callee, args, name); - call->setCallingConv(getRuntimeCC()); - return call; -} - // Calls which may throw must have operand bundles indicating which funclet // they are nested within. static void -getBundlesForFunclet(llvm::Value *Callee, - llvm::Instruction *CurrentFuncletPad, +getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad, SmallVectorImpl<llvm::OperandBundleDef> &BundleList) { - // There is no need for a funclet operand bundle if we aren't inside a funclet. + // There is no need for a funclet operand bundle if we aren't inside a + // funclet. if (!CurrentFuncletPad) return; @@ -3075,6 +3408,19 @@ getBundlesForFunclet(llvm::Value *Callee, BundleList.emplace_back("funclet", CurrentFuncletPad); } +/// Emits a simple call (never an invoke) to the given runtime function. +llvm::CallInst * +CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, + ArrayRef<llvm::Value*> args, + const llvm::Twine &name) { + SmallVector<llvm::OperandBundleDef, 1> BundleList; + getBundlesForFunclet(callee, CurrentFuncletPad, BundleList); + + llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name); + call->setCallingConv(getRuntimeCC()); + return call; +} + /// Emits a call or invoke to the given noreturn runtime function. void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef<llvm::Value*> args) { @@ -3098,8 +3444,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, } } -/// Emits a call or invoke instruction to the given nullary runtime -/// function. +/// Emits a call or invoke instruction to the given nullary runtime function. llvm::CallSite CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, const Twine &name) { @@ -3123,13 +3468,16 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, ArrayRef<llvm::Value *> Args, const Twine &Name) { llvm::BasicBlock *InvokeDest = getInvokeDest(); + SmallVector<llvm::OperandBundleDef, 1> BundleList; + getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList); llvm::Instruction *Inst; if (!InvokeDest) - Inst = Builder.CreateCall(Callee, Args, Name); + Inst = Builder.CreateCall(Callee, Args, BundleList, Name); else { llvm::BasicBlock *ContBB = createBasicBlock("invoke.cont"); - Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, Name); + Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, BundleList, + Name); EmitBlock(ContBB); } @@ -3208,7 +3556,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); size_t UnusedReturnSize = 0; - if (RetAI.isIndirect() || RetAI.isInAlloca()) { + if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { if (!ReturnValue.isNull()) { SRetPtr = ReturnValue.getValue(); } else { @@ -3222,12 +3570,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } if (IRFunctionArgs.hasSRetArg()) { IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer(); - } else { + } else if (RetAI.isInAlloca()) { Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex()); Builder.CreateStore(SRetPtr.getPointer(), Addr); } } + Address swiftErrorTemp = Address::invalid(); + Address swiftErrorArg = Address::invalid(); + assert(CallInfo.arg_size() == CallArgs.size() && "Mismatch between function signature & arguments."); unsigned ArgNo = 0; @@ -3334,6 +3685,25 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, else V = Builder.CreateLoad(RV.getAggregateAddress()); + // Implement swifterror by copying into a new swifterror argument. + // We'll write back in the normal path out of the call. + if (CallInfo.getExtParameterInfo(ArgNo).getABI() + == ParameterABI::SwiftErrorResult) { + assert(!swiftErrorTemp.isValid() && "multiple swifterror args"); + + QualType pointeeTy = I->Ty->getPointeeType(); + swiftErrorArg = + Address(V, getContext().getTypeAlignInChars(pointeeTy)); + + swiftErrorTemp = + CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp"); + V = swiftErrorTemp.getPointer(); + cast<llvm::AllocaInst>(V)->setSwiftError(true); + + llvm::Value *errorValue = Builder.CreateLoad(swiftErrorArg); + Builder.CreateStore(errorValue, swiftErrorTemp); + } + // We might have to widen integers, but we should never truncate. if (ArgInfo.getCoerceToType() != V->getType() && V->getType()->isIntegerTy()) @@ -3344,6 +3714,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (FirstIRArg < IRFuncTy->getNumParams() && V->getType() != IRFuncTy->getParamType(FirstIRArg)) V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg)); + IRCallArgs[FirstIRArg] = V; break; } @@ -3402,6 +3773,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, break; } + case ABIArgInfo::CoerceAndExpand: { + auto coercionType = ArgInfo.getCoerceAndExpandType(); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + llvm::Value *tempSize = nullptr; + Address addr = Address::invalid(); + if (RV.isAggregate()) { + addr = RV.getAggregateAddress(); + } else { + assert(RV.isScalar()); // complex should always just be direct + + llvm::Type *scalarType = RV.getScalarVal()->getType(); + auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType); + auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType); + + tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize); + + // Materialize to a temporary. + addr = CreateTempAlloca(RV.getScalarVal()->getType(), + CharUnits::fromQuantity(std::max(layout->getAlignment(), + scalarAlign))); + EmitLifetimeStart(scalarSize, addr.getPointer()); + + Builder.CreateStore(RV.getScalarVal(), addr); + } + + addr = Builder.CreateElementBitCast(addr, coercionType); + + unsigned IRArgPos = FirstIRArg; + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + llvm::Type *eltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; + Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + llvm::Value *elt = Builder.CreateLoad(eltAddr); + IRCallArgs[IRArgPos++] = elt; + } + assert(IRArgPos == FirstIRArg + NumIRArgs); + + if (tempSize) { + EmitLifetimeEnd(tempSize, addr.getPointer()); + } + + break; + } + case ABIArgInfo::Expand: unsigned IRArgPos = FirstIRArg; ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos); @@ -3541,6 +3957,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CS.setAttributes(Attrs); CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); + // Insert instrumentation or attach profile metadata at indirect call sites. + // For more details, see the comment before the definition of + // IPVK_IndirectCallTarget in InstrProfData.inc. + if (!CS.getCalledFunction()) + PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget, + CS.getInstruction(), Callee); + // In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC // optimizer it can aggressively ignore unwind edges. if (CGM.getLangOpts().ObjCAutoRefCount) @@ -3567,9 +3990,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } llvm::Instruction *CI = CS.getInstruction(); - if (Builder.isNamePreserving() && !CI->getType()->isVoidTy()) + if (!CI->getType()->isVoidTy()) CI->setName("call"); + // Perform the swifterror writeback. + if (swiftErrorTemp.isValid()) { + llvm::Value *errorResult = Builder.CreateLoad(swiftErrorTemp); + Builder.CreateStore(errorResult, swiftErrorArg); + } + // Emit any writebacks immediately. Arguably this should happen // after any return-value munging. if (CallArgs.hasWritebacks()) @@ -3587,6 +4016,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, RValue Ret = [&] { switch (RetAI.getKind()) { + case ABIArgInfo::CoerceAndExpand: { + auto coercionType = RetAI.getCoerceAndExpandType(); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + Address addr = SRetPtr; + addr = Builder.CreateElementBitCast(addr, coercionType); + + assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType()); + bool requiresExtract = isa<llvm::StructType>(CI->getType()); + + unsigned unpaddedIndex = 0; + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + llvm::Type *eltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; + Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + llvm::Value *elt = CI; + if (requiresExtract) + elt = Builder.CreateExtractValue(elt, unpaddedIndex++); + else + assert(unpaddedIndex == 0); + Builder.CreateStore(elt, eltAddr); + } + // FALLTHROUGH + } + case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation()); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp index 2e566de..7ed891f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp @@ -26,6 +26,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" +#include "llvm/Transforms/Utils/SanitizerStats.h" using namespace clang; using namespace CodeGen; @@ -94,7 +95,7 @@ CodeGenModule::getDynamicOffsetAlignment(CharUnits actualBaseAlign, // unless we someday add some sort of attribute to change the // assumed alignment of 'this'. So our goal here is pretty much // just to allow the user to explicitly say that a pointer is - // under-aligned and then safely access its fields and v-tables. + // under-aligned and then safely access its fields and vtables. if (actualBaseAlign >= expectedBaseAlign) { return expectedTargetAlign; } @@ -745,7 +746,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, ArrayRef<VarDecl *> ArrayIndexes; if (MemberInit->getNumArrayIndices()) - ArrayIndexes = MemberInit->getArrayIndexes(); + ArrayIndexes = MemberInit->getArrayIndices(); CGF.EmitInitializerForField(Field, LHS, MemberInit->getInit(), ArrayIndexes); } @@ -986,7 +987,7 @@ namespace { CodeGenFunction &CGF; SanitizerSet OldSanOpts; }; -} +} // end anonymous namespace namespace { class FieldMemcpyizer { @@ -1071,7 +1072,6 @@ namespace { const CXXRecordDecl *ClassDecl; private: - void emitMemcpyIR(Address DestPtr, Address SrcPtr, CharUnits Size) { llvm::PointerType *DPT = DestPtr.getType(); llvm::Type *DBP = @@ -1087,13 +1087,12 @@ namespace { } void addInitialField(FieldDecl *F) { - FirstField = F; - LastField = F; - FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex()); - LastFieldOffset = FirstFieldOffset; - LastAddedFieldIndex = F->getFieldIndex(); - return; - } + FirstField = F; + LastField = F; + FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex()); + LastFieldOffset = FirstFieldOffset; + LastAddedFieldIndex = F->getFieldIndex(); + } void addNextField(FieldDecl *F) { // For the most part, the following invariant will hold: @@ -1127,7 +1126,6 @@ namespace { class ConstructorMemcpyizer : public FieldMemcpyizer { private: - /// Get source argument for copy constructor. Returns null if not a copy /// constructor. static const VarDecl *getTrivialCopySource(CodeGenFunction &CGF, @@ -1232,7 +1230,6 @@ namespace { class AssignmentMemcpyizer : public FieldMemcpyizer { private: - // Returns the memcpyable field copied by the given statement, if one // exists. Otherwise returns null. FieldDecl *getMemcpyableField(Stmt *S) { @@ -1306,7 +1303,6 @@ namespace { SmallVector<Stmt*, 16> AggregatedStmts; public: - AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD, FunctionArgList &Args) : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]), @@ -1607,6 +1603,7 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) LexicalScope Scope(*this, RootCS->getSourceRange()); + incrementProfileCounter(RootCS); AssignmentMemcpyizer AM(*this, AssignOp, Args); for (auto *I : RootCS->body()) AM.emitAssignment(I); @@ -1628,6 +1625,7 @@ namespace { struct CallDtorDeleteConditional final : EHScopeStack::Cleanup { llvm::Value *ShouldDeleteCondition; + public: CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition) : ShouldDeleteCondition(ShouldDeleteCondition) { @@ -1917,7 +1915,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, /// \param zeroInitialize true if each element should be /// zero-initialized before it is constructed void CodeGenFunction::EmitCXXAggrConstructorCall( - const CXXConstructorDecl *ctor, const ConstantArrayType *arrayType, + const CXXConstructorDecl *ctor, const ArrayType *arrayType, Address arrayBegin, const CXXConstructExpr *E, bool zeroInitialize) { QualType elementType; llvm::Value *numElements = @@ -2050,6 +2048,62 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, bool ForVirtualBase, bool Delegating, Address This, const CXXConstructExpr *E) { + CallArgList Args; + + // Push the this ptr. + Args.add(RValue::get(This.getPointer()), D->getThisType(getContext())); + + // If this is a trivial constructor, emit a memcpy now before we lose + // the alignment information on the argument. + // FIXME: It would be better to preserve alignment information into CallArg. + if (isMemcpyEquivalentSpecialMember(D)) { + assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); + + const Expr *Arg = E->getArg(0); + QualType SrcTy = Arg->getType(); + Address Src = EmitLValue(Arg).getAddress(); + QualType DestTy = getContext().getTypeDeclType(D->getParent()); + EmitAggregateCopyCtor(This, Src, DestTy, SrcTy); + return; + } + + // Add the rest of the user-supplied arguments. + const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); + EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor()); + + EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args); +} + +static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, + const CXXConstructorDecl *Ctor, + CXXCtorType Type, CallArgList &Args) { + // We can't forward a variadic call. + if (Ctor->isVariadic()) + return false; + + if (CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) { + // If the parameters are callee-cleanup, it's not safe to forward. + for (auto *P : Ctor->parameters()) + if (P->getType().isDestructedType()) + return false; + + // Likewise if they're inalloca. + const CGFunctionInfo &Info = + CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0); + if (Info.usesInAlloca()) + return false; + } + + // Anything else should be OK. + return true; +} + +void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, + CXXCtorType Type, + bool ForVirtualBase, + bool Delegating, + Address This, + CallArgList &Args) { const CXXRecordDecl *ClassDecl = D->getParent(); // C++11 [class.mfct.non-static]p2: @@ -2060,7 +2114,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, This.getPointer(), getContext().getRecordType(ClassDecl)); if (D->isTrivial() && D->isDefaultConstructor()) { - assert(E->getNumArgs() == 0 && "trivial default ctor with args"); + assert(Args.size() == 1 && "trivial default ctor with args"); return; } @@ -2068,24 +2122,24 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, // union copy constructor, we must emit a memcpy, because the AST does not // model that copy. if (isMemcpyEquivalentSpecialMember(D)) { - assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); + assert(Args.size() == 2 && "unexpected argcount for trivial ctor"); - const Expr *Arg = E->getArg(0); - QualType SrcTy = Arg->getType(); - Address Src = EmitLValue(Arg).getAddress(); + QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType(); + Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy)); QualType DestTy = getContext().getTypeDeclType(ClassDecl); EmitAggregateCopyCtor(This, Src, DestTy, SrcTy); return; } - CallArgList Args; - - // Push the this ptr. - Args.add(RValue::get(This.getPointer()), D->getThisType(getContext())); - - // Add the rest of the user-supplied arguments. - const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); - EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor()); + // Check whether we can actually emit the constructor before trying to do so. + if (auto Inherited = D->getInheritedConstructor()) { + if (getTypes().inheritingCtorHasParams(Inherited, Type) && + !canEmitDelegateCallArgs(*this, D, Type, Args)) { + EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase, + Delegating, Args); + return; + } + } // Insert any ABI-specific implicit constructor arguments. unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs( @@ -2115,6 +2169,95 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, EmitVTableAssumptionLoads(ClassDecl, This); } +void CodeGenFunction::EmitInheritedCXXConstructorCall( + const CXXConstructorDecl *D, bool ForVirtualBase, Address This, + bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) { + CallArgList Args; + CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()), + /*NeedsCopy=*/false); + + // Forward the parameters. + if (InheritedFromVBase && + CGM.getTarget().getCXXABI().hasConstructorVariants()) { + // Nothing to do; this construction is not responsible for constructing + // the base class containing the inherited constructor. + // FIXME: Can we just pass undef's for the remaining arguments if we don't + // have constructor variants? + Args.push_back(ThisArg); + } else if (!CXXInheritedCtorInitExprArgs.empty()) { + // The inheriting constructor was inlined; just inject its arguments. + assert(CXXInheritedCtorInitExprArgs.size() >= D->getNumParams() && + "wrong number of parameters for inherited constructor call"); + Args = CXXInheritedCtorInitExprArgs; + Args[0] = ThisArg; + } else { + // The inheriting constructor was not inlined. Emit delegating arguments. + Args.push_back(ThisArg); + const auto *OuterCtor = cast<CXXConstructorDecl>(CurCodeDecl); + assert(OuterCtor->getNumParams() == D->getNumParams()); + assert(!OuterCtor->isVariadic() && "should have been inlined"); + + for (const auto *Param : OuterCtor->parameters()) { + assert(getContext().hasSameUnqualifiedType( + OuterCtor->getParamDecl(Param->getFunctionScopeIndex())->getType(), + Param->getType())); + EmitDelegateCallArg(Args, Param, E->getLocation()); + + // Forward __attribute__(pass_object_size). + if (Param->hasAttr<PassObjectSizeAttr>()) { + auto *POSParam = SizeArguments[Param]; + assert(POSParam && "missing pass_object_size value for forwarding"); + EmitDelegateCallArg(Args, POSParam, E->getLocation()); + } + } + } + + EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false, + This, Args); +} + +void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( + const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase, + bool Delegating, CallArgList &Args) { + InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType)); + + // Save the arguments to be passed to the inherited constructor. + CXXInheritedCtorInitExprArgs = Args; + + FunctionArgList Params; + QualType RetType = BuildFunctionArgList(CurGD, Params); + FnRetTy = RetType; + + // Insert any ABI-specific implicit constructor arguments. + CGM.getCXXABI().addImplicitConstructorArgs(*this, Ctor, CtorType, + ForVirtualBase, Delegating, Args); + + // Emit a simplified prolog. We only need to emit the implicit params. + assert(Args.size() >= Params.size() && "too few arguments for call"); + for (unsigned I = 0, N = Args.size(); I != N; ++I) { + if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) { + const RValue &RV = Args[I].RV; + assert(!RV.isComplex() && "complex indirect params not supported"); + ParamValue Val = RV.isScalar() + ? ParamValue::forDirect(RV.getScalarVal()) + : ParamValue::forIndirect(RV.getAggregateAddress()); + EmitParmDecl(*Params[I], Val, I + 1); + } + } + + // Create a return value slot if the ABI implementation wants one. + // FIXME: This is dumb, we should ask the ABI not to try to set the return + // value instead. + if (!RetType->isVoidType()) + ReturnValue = CreateIRTemp(RetType, "retval.inhctor"); + + CGM.getCXXABI().EmitInstanceFunctionProlog(*this); + CXXThisValue = CXXABIThisValue; + + // Directly emit the constructor initializers. + EmitCtorPrologue(Ctor, CtorType, Params); +} + void CodeGenFunction::EmitVTableAssumptionLoad(const VPtr &Vptr, Address This) { llvm::Value *VTableGlobal = CGM.getCXXABI().getVTableAddressPoint(Vptr.Base, Vptr.VTableClass); @@ -2147,19 +2290,6 @@ void CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, Address This, Address Src, const CXXConstructExpr *E) { - if (isMemcpyEquivalentSpecialMember(D)) { - assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); - assert(D->isCopyOrMoveConstructor() && - "trivial 1-arg ctor not a copy/move ctor"); - EmitAggregateCopyCtor(This, Src, - getContext().getTypeDeclType(D->getParent()), - (*E->arg_begin())->getType()); - return; - } - llvm::Value *Callee = CGM.getAddrOfCXXStructor(D, StructorType::Complete); - assert(D->isInstance() && - "Trying to emit a member call expr on a static method!"); - const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); CallArgList Args; @@ -2177,8 +2307,7 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(), /*ParamsToSkip*/ 1); - EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All), - Callee, ReturnValueSlot(), Args, D); + EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args); } void @@ -2192,21 +2321,17 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor, assert(I != E && "no parameters to constructor"); // this - DelegateArgs.add(RValue::get(LoadCXXThis()), (*I)->getType()); + Address This = LoadCXXThisAddress(); + DelegateArgs.add(RValue::get(This.getPointer()), (*I)->getType()); ++I; - // vtt - if (llvm::Value *VTT = GetVTTParameter(GlobalDecl(Ctor, CtorType), - /*ForVirtualBase=*/false, - /*Delegating=*/true)) { - QualType VoidPP = getContext().getPointerType(getContext().VoidPtrTy); - DelegateArgs.add(RValue::get(VTT), VoidPP); - - if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) { - assert(I != E && "cannot skip vtt parameter, already done with args"); - assert((*I)->getType() == VoidPP && "skipping parameter not of vtt type"); - ++I; - } + // FIXME: The location of the VTT parameter in the parameter list is + // specific to the Itanium ABI and shouldn't be hardcoded here. + if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) { + assert(I != E && "cannot skip vtt parameter, already done with args"); + assert((*I)->getType()->isPointerType() && + "skipping parameter not of vtt type"); + ++I; } // Explicit arguments. @@ -2216,11 +2341,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor, EmitDelegateCallArg(DelegateArgs, param, Loc); } - llvm::Value *Callee = - CGM.getAddrOfCXXStructor(Ctor, getFromCtorType(CtorType)); - EmitCall(CGM.getTypes() - .arrangeCXXStructorDeclaration(Ctor, getFromCtorType(CtorType)), - Callee, ReturnValueSlot(), DelegateArgs, Ctor); + EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false, + /*Delegating=*/true, This, DelegateArgs); } namespace { @@ -2289,7 +2411,7 @@ namespace { /*Delegating=*/false, Addr); } }; -} +} // end anonymous namespace void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D, Address Addr) { @@ -2487,15 +2609,35 @@ LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) { RD->bases_begin()->getType()->getAsCXXRecordDecl()); } -void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, +void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, + llvm::Value *VTable, + SourceLocation Loc) { + if (CGM.getCodeGenOpts().WholeProgramVTables && + CGM.HasHiddenLTOVisibility(RD)) { + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); + llvm::Value *TypeId = + llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD); + + llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); + llvm::Value *TypeTest = + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test), + {CastedVTable, TypeId}); + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest); + } + + if (SanOpts.has(SanitizerKind::CFIVCall)) + EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); +} + +void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXRecordDecl *RD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc) { - const CXXRecordDecl *ClassDecl = MD->getParent(); if (!SanOpts.has(SanitizerKind::CFICastStrict)) - ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl); + RD = LeastDerivedClassWithSameLayout(RD); - EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc); + EmitVTablePtrCheck(RD, VTable, TCK, Loc); } void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T, @@ -2547,26 +2689,41 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc) { - if (CGM.IsCFIBlacklistedRecord(RD)) + if (!CGM.getCodeGenOpts().SanitizeCfiCrossDso && + !CGM.HasHiddenLTOVisibility(RD)) + return; + + std::string TypeName = RD->getQualifiedNameAsString(); + if (getContext().getSanitizerBlacklist().isBlacklistedType(TypeName)) return; SanitizerScope SanScope(this); + llvm::SanitizerStatKind SSK; + switch (TCK) { + case CFITCK_VCall: + SSK = llvm::SanStat_CFI_VCall; + break; + case CFITCK_NVCall: + SSK = llvm::SanStat_CFI_NVCall; + break; + case CFITCK_DerivedCast: + SSK = llvm::SanStat_CFI_DerivedCast; + break; + case CFITCK_UnrelatedCast: + SSK = llvm::SanStat_CFI_UnrelatedCast; + break; + case CFITCK_ICall: + llvm_unreachable("not expecting CFITCK_ICall"); + } + EmitSanitizerStatReport(SSK); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); - llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD); + llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); - llvm::Value *BitSetTest = - Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test), - {CastedVTable, BitSetName}); - - if (CGM.getCodeGenOpts().SanitizeCfiCrossDso) { - if (auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD)) { - EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedVTable); - return; - } - } + llvm::Value *TypeTest = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId}); SanitizerMask M; switch (TCK) { @@ -2582,15 +2739,70 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, case CFITCK_UnrelatedCast: M = SanitizerKind::CFIUnrelatedCast; break; + case CFITCK_ICall: + llvm_unreachable("not expecting CFITCK_ICall"); } llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(Int8Ty, TCK), EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(QualType(RD->getTypeForDecl(), 0)), - llvm::ConstantInt::get(Int8Ty, TCK), }; - EmitCheck(std::make_pair(BitSetTest, M), "cfi_bad_type", StaticData, - CastedVTable); + + auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD); + if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) { + EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, CastedVTable, StaticData); + return; + } + + if (CGM.getCodeGenOpts().SanitizeTrap.has(M)) { + EmitTrapCheck(TypeTest); + return; + } + + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, AllVtables}); + EmitCheck(std::make_pair(TypeTest, M), "cfi_check_fail", StaticData, + {CastedVTable, ValidVtable}); +} + +bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { + if (!CGM.getCodeGenOpts().WholeProgramVTables || + !SanOpts.has(SanitizerKind::CFIVCall) || + !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) || + !CGM.HasHiddenLTOVisibility(RD)) + return false; + + std::string TypeName = RD->getQualifiedNameAsString(); + return !getContext().getSanitizerBlacklist().isBlacklistedType(TypeName); +} + +llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( + const CXXRecordDecl *RD, llvm::Value *VTable, uint64_t VTableByteOffset) { + SanitizerScope SanScope(this); + + EmitSanitizerStatReport(llvm::SanStat_CFI_VCall); + + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); + llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD); + + llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); + llvm::Value *CheckedLoad = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), + {CastedVTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset), + TypeId}); + llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); + + EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), + "cfi_check_fail", nullptr, nullptr); + + return Builder.CreateBitCast( + Builder.CreateExtractValue(CheckedLoad, 0), + cast<llvm::PointerType>(VTable->getType())->getElementType()); } // FIXME: Ideally Expr::IgnoreParenNoopCasts should do this, but it doesn't do @@ -2731,7 +2943,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); // Add the rest of the parameters. - for (auto param : BD->params()) + for (auto param : BD->parameters()) EmitDelegateCallArg(CallArgs, param, param->getLocStart()); assert(!Lambda->isGenericLambda() && @@ -2761,7 +2973,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { CallArgs.add(RValue::get(ThisPtr), ThisType); // Add the rest of the parameters. - for (auto Param : MD->params()) + for (auto Param : MD->parameters()) EmitDelegateCallArg(CallArgs, Param, Param->getLocStart()); const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp index ba7dcf7..b3278b3 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp @@ -112,7 +112,7 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) { /// Push an entry of the given size onto this protected-scope stack. char *EHScopeStack::allocate(size_t Size) { - Size = llvm::RoundUpToAlignment(Size, ScopeStackAlignment); + Size = llvm::alignTo(Size, ScopeStackAlignment); if (!StartOfBuffer) { unsigned Capacity = 1024; while (Capacity < Size) Capacity *= 2; @@ -143,7 +143,7 @@ char *EHScopeStack::allocate(size_t Size) { } void EHScopeStack::deallocate(size_t Size) { - StartOfData += llvm::RoundUpToAlignment(Size, ScopeStackAlignment); + StartOfData += llvm::alignTo(Size, ScopeStackAlignment); } bool EHScopeStack::containsOnlyLifetimeMarkers( @@ -157,6 +157,20 @@ bool EHScopeStack::containsOnlyLifetimeMarkers( return true; } +bool EHScopeStack::requiresLandingPad() const { + for (stable_iterator si = getInnermostEHScope(); si != stable_end(); ) { + // Skip lifetime markers. + if (auto *cleanup = dyn_cast<EHCleanupScope>(&*find(si))) + if (cleanup->isLifetimeMarker()) { + si = cleanup->getEnclosingEHScope(); + continue; + } + return true; + } + + return false; +} + EHScopeStack::stable_iterator EHScopeStack::getInnermostActiveNormalCleanup() const { for (stable_iterator si = getInnermostNormalCleanup(), se = stable_end(); @@ -174,6 +188,7 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) { bool IsNormalCleanup = Kind & NormalCleanup; bool IsEHCleanup = Kind & EHCleanup; bool IsActive = !(Kind & InactiveCleanup); + bool IsLifetimeMarker = Kind & LifetimeMarker; EHCleanupScope *Scope = new (Buffer) EHCleanupScope(IsNormalCleanup, IsEHCleanup, @@ -186,6 +201,8 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) { InnermostNormalCleanup = stable_begin(); if (IsEHCleanup) InnermostEHScope = stable_begin(); + if (IsLifetimeMarker) + Scope->setLifetimeMarker(); return Scope->getCleanupBuffer(); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h index 909f00b..98d01b1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h @@ -86,11 +86,6 @@ protected: /// The amount of extra storage needed by the Cleanup. /// Always a multiple of the scope-stack alignment. unsigned CleanupSize : 12; - - /// The number of fixups required by enclosing scopes (not including - /// this one). If this is the top cleanup scope, all the fixups - /// from this index onwards belong to this scope. - unsigned FixupDepth : 32 - 18 - NumCommonBits; // currently 12 }; class FilterBitFields { @@ -188,6 +183,7 @@ public: EHScopeStack::stable_iterator enclosingEHScope) : EHScope(Catch, enclosingEHScope) { CatchBits.NumHandlers = numHandlers; + assert(CatchBits.NumHandlers == numHandlers && "NumHandlers overflow?"); } unsigned getNumHandlers() const { @@ -263,6 +259,11 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope { }; mutable struct ExtInfo *ExtInfo; + /// The number of fixups required by enclosing scopes (not including + /// this one). If this is the top cleanup scope, all the fixups + /// from this index onwards belong to this scope. + unsigned FixupDepth; + struct ExtInfo &getExtInfo() { if (!ExtInfo) ExtInfo = new struct ExtInfo(); return *ExtInfo; @@ -288,8 +289,9 @@ public: unsigned cleanupSize, unsigned fixupDepth, EHScopeStack::stable_iterator enclosingNormal, EHScopeStack::stable_iterator enclosingEH) - : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal), - NormalBlock(nullptr), ActiveFlag(nullptr), ExtInfo(nullptr) { + : EHScope(EHScope::Cleanup, enclosingEH), + EnclosingNormal(enclosingNormal), NormalBlock(nullptr), + ActiveFlag(nullptr), ExtInfo(nullptr), FixupDepth(fixupDepth) { CleanupBits.IsNormalCleanup = isNormal; CleanupBits.IsEHCleanup = isEH; CleanupBits.IsActive = isActive; @@ -297,7 +299,6 @@ public: CleanupBits.TestFlagInNormalCleanup = false; CleanupBits.TestFlagInEHCleanup = false; CleanupBits.CleanupSize = cleanupSize; - CleanupBits.FixupDepth = fixupDepth; assert(CleanupBits.CleanupSize == cleanupSize && "cleanup size overflow"); } @@ -343,7 +344,7 @@ public: return CleanupBits.TestFlagInEHCleanup; } - unsigned getFixupDepth() const { return CleanupBits.FixupDepth; } + unsigned getFixupDepth() const { return FixupDepth; } EHScopeStack::stable_iterator getEnclosingNormalCleanup() const { return EnclosingNormal; } @@ -451,6 +452,7 @@ public: EHFilterScope(unsigned numFilters) : EHScope(Filter, EHScopeStack::stable_end()) { FilterBits.NumFilters = numFilters; + assert(FilterBits.NumFilters == numFilters && "NumFilters overflow"); } static size_t getSizeForNumFilters(unsigned numFilters) { @@ -540,7 +542,7 @@ public: Size = EHPadEndScope::getSize(); break; } - Ptr += llvm::RoundUpToAlignment(Size, ScopeStackAlignment); + Ptr += llvm::alignTo(Size, ScopeStackAlignment); return *this; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 5df8519..0607a51 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -13,6 +13,7 @@ #include "CGDebugInfo.h" #include "CGBlocks.h" +#include "CGRecordLayout.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CodeGenFunction.h" @@ -168,10 +169,10 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, } // Check namespace. - if (const NamespaceDecl *NSDecl = dyn_cast<NamespaceDecl>(Context)) + if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context)) return getOrCreateNameSpace(NSDecl); - if (const RecordDecl *RDecl = dyn_cast<RecordDecl>(Context)) + if (const auto *RDecl = dyn_cast<RecordDecl>(Context)) if (!RDecl->isDependentType()) return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl), getOrCreateMainFile()); @@ -184,30 +185,32 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) { FunctionTemplateSpecializationInfo *Info = FD->getTemplateSpecializationInfo(); - if (!Info && FII && !CGM.getCodeGenOpts().EmitCodeView) + // Emit the unqualified name in normal operation. LLVM and the debugger can + // compute the fully qualified name from the scope chain. If we're only + // emitting line table info, there won't be any scope chains, so emit the + // fully qualified name here so that stack traces are more accurate. + // FIXME: Do this when emitting DWARF as well as when emitting CodeView after + // evaluating the size impact. + bool UseQualifiedName = DebugKind == codegenoptions::DebugLineTablesOnly && + CGM.getCodeGenOpts().EmitCodeView; + + if (!Info && FII && !UseQualifiedName) return FII->getName(); - // Otherwise construct human readable name for debug info. SmallString<128> NS; llvm::raw_svector_ostream OS(NS); PrintingPolicy Policy(CGM.getLangOpts()); - - if (CGM.getCodeGenOpts().EmitCodeView) { - // Print a fully qualified name like MSVC would. - Policy.MSVCFormatting = true; - FD->printQualifiedName(OS, Policy); - } else { - // Print the unqualified name with some template arguments. This is what - // DWARF-based debuggers expect. + Policy.MSVCFormatting = CGM.getCodeGenOpts().EmitCodeView; + if (!UseQualifiedName) FD->printName(OS); - // Add any template specialization args. - if (Info) { - const TemplateArgumentList *TArgs = Info->TemplateArguments; - const TemplateArgument *Args = TArgs->data(); - unsigned NumArgs = TArgs->size(); - TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs, - Policy); - } + else + FD->printQualifiedName(OS, Policy); + + // Add any template specialization args. + if (Info) { + const TemplateArgumentList *TArgs = Info->TemplateArguments; + TemplateSpecializationType::PrintTemplateArgumentList(OS, TArgs->asArray(), + Policy); } // Copy this name on the side and use its reference. @@ -219,21 +222,18 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) { llvm::raw_svector_ostream OS(MethodName); OS << (OMD->isInstanceMethod() ? '-' : '+') << '['; const DeclContext *DC = OMD->getDeclContext(); - if (const ObjCImplementationDecl *OID = - dyn_cast<const ObjCImplementationDecl>(DC)) { + if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) { OS << OID->getName(); - } else if (const ObjCInterfaceDecl *OID = - dyn_cast<const ObjCInterfaceDecl>(DC)) { + } else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) { OS << OID->getName(); - } else if (const ObjCCategoryDecl *OC = dyn_cast<ObjCCategoryDecl>(DC)) { + } else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) { if (OC->IsClassExtension()) { OS << OC->getClassInterface()->getName(); } else { - OS << ((const NamedDecl *)OC)->getIdentifier()->getNameStart() << '(' + OS << OC->getIdentifier()->getNameStart() << '(' << OC->getIdentifier()->getNameStart() << ')'; } - } else if (const ObjCCategoryImplDecl *OCD = - dyn_cast<const ObjCCategoryImplDecl>(DC)) { + } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '(' << OCD->getIdentifier()->getNameStart() << ')'; } else if (isa<ObjCProtocolDecl>(DC)) { @@ -254,20 +254,56 @@ StringRef CGDebugInfo::getSelectorName(Selector S) { } StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { - // quick optimization to avoid having to intern strings that are already - // stored reliably elsewhere - if (!isa<ClassTemplateSpecializationDecl>(RD)) - return RD->getName(); - - SmallString<128> Name; - { + if (isa<ClassTemplateSpecializationDecl>(RD)) { + SmallString<128> Name; llvm::raw_svector_ostream OS(Name); RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(), /*Qualified*/ false); + + // Copy this name on the side and use its reference. + return internString(Name); } - // Copy this name on the side and use its reference. - return internString(Name); + // quick optimization to avoid having to intern strings that are already + // stored reliably elsewhere + if (const IdentifierInfo *II = RD->getIdentifier()) + return II->getName(); + + // The CodeView printer in LLVM wants to see the names of unnamed types: it is + // used to reconstruct the fully qualified type names. + if (CGM.getCodeGenOpts().EmitCodeView) { + if (const TypedefNameDecl *D = RD->getTypedefNameForAnonDecl()) { + assert(RD->getDeclContext() == D->getDeclContext() && + "Typedef should not be in another decl context!"); + assert(D->getDeclName().getAsIdentifierInfo() && + "Typedef was not named!"); + return D->getDeclName().getAsIdentifierInfo()->getName(); + } + + if (CGM.getLangOpts().CPlusPlus) { + StringRef Name; + + ASTContext &Context = CGM.getContext(); + if (const DeclaratorDecl *DD = Context.getDeclaratorForUnnamedTagDecl(RD)) + // Anonymous types without a name for linkage purposes have their + // declarator mangled in if they have one. + Name = DD->getName(); + else if (const TypedefNameDecl *TND = + Context.getTypedefNameForUnnamedTagDecl(RD)) + // Anonymous types without a name for linkage purposes have their + // associate typedef mangled in if they have one. + Name = TND->getName(); + + if (!Name.empty()) { + SmallString<256> UnnamedType("<unnamed-type-"); + UnnamedType += Name; + UnnamedType += '>'; + return internString(UnnamedType); + } + } + } + + return StringRef(); } llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { @@ -383,6 +419,8 @@ void CGDebugInfo::CreateCompileUnit() { LangTag = llvm::dwarf::DW_LANG_C_plus_plus; } else if (LO.ObjC1) { LangTag = llvm::dwarf::DW_LANG_ObjC; + } else if (LO.RenderScript) { + LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript; } else if (LO.C99) { LangTag = llvm::dwarf::DW_LANG_C99; } else { @@ -396,16 +434,27 @@ void CGDebugInfo::CreateCompileUnit() { if (LO.ObjC1) RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1; + llvm::DICompileUnit::DebugEmissionKind EmissionKind; + switch (DebugKind) { + case codegenoptions::NoDebugInfo: + case codegenoptions::LocTrackingOnly: + EmissionKind = llvm::DICompileUnit::NoDebug; + break; + case codegenoptions::DebugLineTablesOnly: + EmissionKind = llvm::DICompileUnit::LineTablesOnly; + break; + case codegenoptions::LimitedDebugInfo: + case codegenoptions::FullDebugInfo: + EmissionKind = llvm::DICompileUnit::FullDebug; + break; + } + // Create new compile unit. // FIXME - Eliminate TheCU. TheCU = DBuilder.createCompileUnit( LangTag, remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, - CGM.getCodeGenOpts().SplitDwarfFile, - DebugKind <= CodeGenOptions::DebugLineTablesOnly - ? llvm::DIBuilder::LineTablesOnly - : llvm::DIBuilder::FullDebug, - 0 /* DWOid */, DebugKind != CodeGenOptions::LocTrackingOnly); + CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -463,39 +512,11 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return SelTy; } - case BuiltinType::OCLImage1d: - return getOrCreateStructPtrType("opencl_image1d_t", OCLImage1dDITy); - case BuiltinType::OCLImage1dArray: - return getOrCreateStructPtrType("opencl_image1d_array_t", - OCLImage1dArrayDITy); - case BuiltinType::OCLImage1dBuffer: - return getOrCreateStructPtrType("opencl_image1d_buffer_t", - OCLImage1dBufferDITy); - case BuiltinType::OCLImage2d: - return getOrCreateStructPtrType("opencl_image2d_t", OCLImage2dDITy); - case BuiltinType::OCLImage2dArray: - return getOrCreateStructPtrType("opencl_image2d_array_t", - OCLImage2dArrayDITy); - case BuiltinType::OCLImage2dDepth: - return getOrCreateStructPtrType("opencl_image2d_depth_t", - OCLImage2dDepthDITy); - case BuiltinType::OCLImage2dArrayDepth: - return getOrCreateStructPtrType("opencl_image2d_array_depth_t", - OCLImage2dArrayDepthDITy); - case BuiltinType::OCLImage2dMSAA: - return getOrCreateStructPtrType("opencl_image2d_msaa_t", - OCLImage2dMSAADITy); - case BuiltinType::OCLImage2dArrayMSAA: - return getOrCreateStructPtrType("opencl_image2d_array_msaa_t", - OCLImage2dArrayMSAADITy); - case BuiltinType::OCLImage2dMSAADepth: - return getOrCreateStructPtrType("opencl_image2d_msaa_depth_t", - OCLImage2dMSAADepthDITy); - case BuiltinType::OCLImage2dArrayMSAADepth: - return getOrCreateStructPtrType("opencl_image2d_array_msaa_depth_t", - OCLImage2dArrayMSAADepthDITy); - case BuiltinType::OCLImage3d: - return getOrCreateStructPtrType("opencl_image3d_t", OCLImage3dDITy); +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: \ + return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \ + SingletonId); +#include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: return DBuilder.createBasicType( "opencl_sampler_t", CGM.getContext().getTypeSize(BT), @@ -545,7 +566,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::Half: case BuiltinType::Float: case BuiltinType::LongDouble: + case BuiltinType::Float128: case BuiltinType::Double: + // FIXME: For targets where long double and __float128 have the same size, + // they are currently indistinguishable in the debugger without some + // special treatment. However, there is currently no consensus on encoding + // and this should be updated once a DWARF encoding exists for distinct + // floating point types of the same size. Encoding = llvm::dwarf::DW_ATE_float; break; } @@ -660,10 +687,6 @@ static SmallString<256> getUniqueTagTypeName(const TagType *Ty, if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible()) return FullName; - // Microsoft Mangler does not have support for mangleCXXRTTIName yet. - if (CGM.getTarget().getCXXABI().isMicrosoft()) - return FullName; - // TODO: This is using the RTTI name. Is there a better way to get // a unique string for a type? llvm::raw_svector_ostream Out(FullName); @@ -817,10 +840,10 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, /*qualified*/ false); TemplateSpecializationType::PrintTemplateArgumentList( - OS, Ty->getArgs(), Ty->getNumArgs(), + OS, Ty->template_arguments(), CGM.getContext().getPrintingPolicy()); - TypeAliasDecl *AliasDecl = cast<TypeAliasTemplateDecl>( + auto *AliasDecl = cast<TypeAliasTemplateDecl>( Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl(); SourceLocation Loc = AliasDecl->getLocation(); @@ -842,6 +865,39 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, getDeclContextDescriptor(Ty->getDecl())); } +static unsigned getDwarfCC(CallingConv CC) { + switch (CC) { + case CC_C: + // Avoid emitting DW_AT_calling_convention if the C convention was used. + return 0; + + case CC_X86StdCall: + return llvm::dwarf::DW_CC_BORLAND_stdcall; + case CC_X86FastCall: + return llvm::dwarf::DW_CC_BORLAND_msfastcall; + case CC_X86ThisCall: + return llvm::dwarf::DW_CC_BORLAND_thiscall; + case CC_X86VectorCall: + return llvm::dwarf::DW_CC_LLVM_vectorcall; + case CC_X86Pascal: + return llvm::dwarf::DW_CC_BORLAND_pascal; + + // FIXME: Create new DW_CC_ codes for these calling conventions. + case CC_X86_64Win64: + case CC_X86_64SysV: + case CC_AAPCS: + case CC_AAPCS_VFP: + case CC_IntelOclBicc: + case CC_SpirFunction: + case CC_OpenCLKernel: + case CC_Swift: + case CC_PreserveMost: + case CC_PreserveAll: + return 0; + } + return 0; +} + llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, llvm::DIFile *Unit) { SmallVector<llvm::Metadata *, 16> EltTys; @@ -853,15 +909,16 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, // otherwise emit it as a variadic function. if (isa<FunctionNoProtoType>(Ty)) EltTys.push_back(DBuilder.createUnspecifiedParameter()); - else if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(Ty)) { - for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i) - EltTys.push_back(getOrCreateType(FPT->getParamType(i), Unit)); + else if (const auto *FPT = dyn_cast<FunctionProtoType>(Ty)) { + for (const QualType &ParamType : FPT->param_types()) + EltTys.push_back(getOrCreateType(ParamType, Unit)); if (FPT->isVariadic()) EltTys.push_back(DBuilder.createUnspecifiedParameter()); } llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys); - return DBuilder.createSubroutineType(EltTypeArray); + return DBuilder.createSubroutineType(EltTypeArray, 0, + getDwarfCC(Ty->getCallConv())); } /// Convert an AccessSpecifier into the corresponding DINode flag. @@ -890,10 +947,38 @@ static unsigned getAccessFlag(AccessSpecifier Access, const RecordDecl *RD) { llvm_unreachable("unexpected access enumerator"); } -llvm::DIType *CGDebugInfo::createFieldType( - StringRef name, QualType type, uint64_t sizeInBitsOverride, - SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits, - llvm::DIFile *tunit, llvm::DIScope *scope, const RecordDecl *RD) { +llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, + llvm::DIScope *RecordTy, + const RecordDecl *RD) { + StringRef Name = BitFieldDecl->getName(); + QualType Ty = BitFieldDecl->getType(); + SourceLocation Loc = BitFieldDecl->getLocation(); + llvm::DIFile *VUnit = getOrCreateFile(Loc); + llvm::DIType *DebugType = getOrCreateType(Ty, VUnit); + + // Get the location for the field. + llvm::DIFile *File = getOrCreateFile(Loc); + unsigned Line = getLineNumber(Loc); + + const CGBitFieldInfo &BitFieldInfo = + CGM.getTypes().getCGRecordLayout(RD).getBitFieldInfo(BitFieldDecl); + uint64_t SizeInBits = BitFieldInfo.Size; + assert(SizeInBits > 0 && "found named 0-width bitfield"); + unsigned AlignInBits = CGM.getContext().getTypeAlign(Ty); + uint64_t StorageOffsetInBits = + CGM.getContext().toBits(BitFieldInfo.StorageOffset); + uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset; + unsigned Flags = getAccessFlag(BitFieldDecl->getAccess(), RD); + return DBuilder.createBitFieldMemberType( + RecordTy, Name, File, Line, SizeInBits, AlignInBits, OffsetInBits, + StorageOffsetInBits, Flags, DebugType); +} + +llvm::DIType * +CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, + AccessSpecifier AS, uint64_t offsetInBits, + llvm::DIFile *tunit, llvm::DIScope *scope, + const RecordDecl *RD) { llvm::DIType *debugType = getOrCreateType(type, tunit); // Get the location for the field. @@ -906,9 +991,6 @@ llvm::DIType *CGDebugInfo::createFieldType( TypeInfo TI = CGM.getContext().getTypeInfo(type); SizeInBits = TI.Width; AlignInBits = TI.Align; - - if (sizeInBitsOverride) - SizeInBits = sizeInBitsOverride; } unsigned flags = getAccessFlag(AS, RD); @@ -930,19 +1012,15 @@ void CGDebugInfo::CollectRecordLambdaFields( I != E; ++I, ++Field, ++fieldno) { const LambdaCapture &C = *I; if (C.capturesVariable()) { + SourceLocation Loc = C.getLocation(); + assert(!Field->isBitField() && "lambdas don't have bitfield members!"); VarDecl *V = C.getCapturedVar(); - llvm::DIFile *VUnit = getOrCreateFile(C.getLocation()); StringRef VName = V->getName(); - uint64_t SizeInBitsOverride = 0; - if (Field->isBitField()) { - SizeInBitsOverride = Field->getBitWidthValue(CGM.getContext()); - assert(SizeInBitsOverride && "found named 0-width bitfield"); - } - llvm::DIType *fieldType = createFieldType( - VName, Field->getType(), SizeInBitsOverride, C.getLocation(), - Field->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy, - CXXDecl); - elements.push_back(fieldType); + llvm::DIFile *VUnit = getOrCreateFile(Loc); + llvm::DIType *FieldType = createFieldType( + VName, Field->getType(), Loc, Field->getAccess(), + layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); + elements.push_back(FieldType); } else if (C.capturesThis()) { // TODO: Need to handle 'this' in some way by probably renaming the // this of the lambda class and having a field member of 'this' or @@ -952,7 +1030,7 @@ void CGDebugInfo::CollectRecordLambdaFields( llvm::DIFile *VUnit = getOrCreateFile(f->getLocation()); QualType type = f->getType(); llvm::DIType *fieldType = createFieldType( - "this", type, 0, f->getLocation(), f->getAccess(), + "this", type, f->getLocation(), f->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); elements.push_back(fieldType); @@ -1000,24 +1078,23 @@ void CGDebugInfo::CollectRecordNormalField( if (name.empty() && !type->isRecordType()) return; - uint64_t SizeInBitsOverride = 0; + llvm::DIType *FieldType; if (field->isBitField()) { - SizeInBitsOverride = field->getBitWidthValue(CGM.getContext()); - assert(SizeInBitsOverride && "found named 0-width bitfield"); + FieldType = createBitFieldType(field, RecordTy, RD); + } else { + FieldType = + createFieldType(name, type, field->getLocation(), field->getAccess(), + OffsetInBits, tunit, RecordTy, RD); } - llvm::DIType *fieldType = - createFieldType(name, type, SizeInBitsOverride, field->getLocation(), - field->getAccess(), OffsetInBits, tunit, RecordTy, RD); - - elements.push_back(fieldType); + elements.push_back(FieldType); } void CGDebugInfo::CollectRecordFields( const RecordDecl *record, llvm::DIFile *tunit, SmallVectorImpl<llvm::Metadata *> &elements, llvm::DICompositeType *RecordTy) { - const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(record); + const auto *CXXDecl = dyn_cast<CXXRecordDecl>(record); if (CXXDecl && CXXDecl->isLambda()) CollectRecordLambdaFields(CXXDecl, elements, RecordTy); @@ -1031,6 +1108,8 @@ void CGDebugInfo::CollectRecordFields( // the corresponding declarations in the source program. for (const auto *I : record->decls()) if (const auto *V = dyn_cast<VarDecl>(I)) { + if (V->hasAttr<NoDebugAttr>()) + continue; // Reuse the existing static member declaration if one exists auto MI = StaticDataMemberCache.find(V->getCanonicalDecl()); if (MI != StaticDataMemberCache.end()) { @@ -1112,13 +1191,14 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( if (Func->getExtProtoInfo().RefQualifier == RQ_RValue) Flags |= llvm::DINode::FlagRValueReference; - return DBuilder.createSubroutineType(EltTypeArray, Flags); + return DBuilder.createSubroutineType(EltTypeArray, Flags, + getDwarfCC(Func->getCallConv())); } /// isFunctionLocalClass - Return true if CXXRecordDecl is defined /// inside a function. static bool isFunctionLocalClass(const CXXRecordDecl *RD) { - if (const CXXRecordDecl *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext())) + if (const auto *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext())) return isFunctionLocalClass(NRD); if (isa<FunctionDecl>(RD->getDeclContext())) return true; @@ -1136,6 +1216,11 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // Since a single ctor/dtor corresponds to multiple functions, it doesn't // make sense to give a single ctor/dtor a linkage name. StringRef MethodLinkageName; + // FIXME: 'isFunctionLocalClass' seems like an arbitrary/unintentional + // property to use here. It may've been intended to model "is non-external + // type" but misses cases of non-function-local but non-external classes such + // as those in anonymous namespaces as well as the reverse - external types + // that are function local, such as those in (non-local) inline functions. if (!IsCtorOrDtor && !isFunctionLocalClass(Method->getParent())) MethodLinkageName = CGM.getMangledName(Method); @@ -1151,6 +1236,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( llvm::DIType *ContainingType = nullptr; unsigned Virtuality = 0; unsigned VIndex = 0; + unsigned Flags = 0; + int ThisAdjustment = 0; if (Method->isVirtual()) { if (Method->isPure()) @@ -1158,26 +1245,45 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( else Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual; - // It doesn't make sense to give a virtual destructor a vtable index, - // since a single destructor has two entries in the vtable. - // FIXME: Add proper support for debug info for virtual calls in - // the Microsoft ABI, where we may use multiple vptrs to make a vftable - // lookup if we have multiple or virtual inheritance. - if (!isa<CXXDestructorDecl>(Method) && - !CGM.getTarget().getCXXABI().isMicrosoft()) - VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method); + if (CGM.getTarget().getCXXABI().isItaniumFamily()) { + // It doesn't make sense to give a virtual destructor a vtable index, + // since a single destructor has two entries in the vtable. + if (!isa<CXXDestructorDecl>(Method)) + VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method); + } else { + // Emit MS ABI vftable information. There is only one entry for the + // deleting dtor. + const auto *DD = dyn_cast<CXXDestructorDecl>(Method); + GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method); + MicrosoftVTableContext::MethodVFTableLocation ML = + CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD); + VIndex = ML.Index; + + // CodeView only records the vftable offset in the class that introduces + // the virtual method. This is possible because, unlike Itanium, the MS + // C++ ABI does not include all virtual methods from non-primary bases in + // the vtable for the most derived class. For example, if C inherits from + // A and B, C's primary vftable will not include B's virtual methods. + if (Method->begin_overridden_methods() == Method->end_overridden_methods()) + Flags |= llvm::DINode::FlagIntroducedVirtual; + + // The 'this' adjustment accounts for both the virtual and non-virtual + // portions of the adjustment. Presumably the debugger only uses it when + // it knows the dynamic type of an object. + ThisAdjustment = CGM.getCXXABI() + .getVirtualFunctionPrologueThisAdjustment(GD) + .getQuantity(); + } ContainingType = RecordTy; } - unsigned Flags = 0; if (Method->isImplicit()) Flags |= llvm::DINode::FlagArtificial; Flags |= getAccessFlag(Method->getAccess(), Method->getParent()); - if (const CXXConstructorDecl *CXXC = dyn_cast<CXXConstructorDecl>(Method)) { + if (const auto *CXXC = dyn_cast<CXXConstructorDecl>(Method)) { if (CXXC->isExplicit()) Flags |= llvm::DINode::FlagExplicit; - } else if (const CXXConversionDecl *CXXC = - dyn_cast<CXXConversionDecl>(Method)) { + } else if (const auto *CXXC = dyn_cast<CXXConversionDecl>(Method)) { if (CXXC->isExplicit()) Flags |= llvm::DINode::FlagExplicit; } @@ -1191,9 +1297,9 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit); llvm::DISubprogram *SP = DBuilder.createMethod( RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine, - MethodTy, /*isLocalToUnit=*/false, - /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags, - CGM.getLangOpts().Optimize, TParamsArray.get()); + MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality, + VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get()); SPCache[Method->getCanonicalDecl()].reset(SP); @@ -1246,7 +1352,7 @@ void CGDebugInfo::CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile *Unit, unsigned BFlags = 0; uint64_t BaseOffset; - const CXXRecordDecl *Base = + const auto *Base = cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl()); if (BI.isVirtual()) { @@ -1334,8 +1440,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, llvm::Constant *V = nullptr; // Special case member data pointer null values since they're actually -1 // instead of zero. - if (const MemberPointerType *MPT = - dyn_cast<MemberPointerType>(T.getTypePtr())) + if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr())) // But treat member function pointers as simple zero integers because // it's easier than having a special case in LLVM's CodeGen. If LLVM // CodeGen grows handling for values of non-null member function @@ -1346,7 +1451,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, if (!V) V = llvm::ConstantInt::get(CGM.Int8Ty, 0); TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, cast<llvm::Constant>(V))); + TheCU, Name, TTy, V)); } break; case TemplateArgument::Template: TemplateParams.push_back(DBuilder.createTemplateTemplateParameter( @@ -1367,7 +1472,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, assert(V && "Expression in template argument isn't constant"); llvm::DIType *TTy = getOrCreateType(T, Unit); TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, cast<llvm::Constant>(V->stripPointerCasts()))); + TheCU, Name, TTy, V->stripPointerCasts())); } break; // And the following should never occur: case TemplateArgument::TemplateExpansion: @@ -1446,7 +1551,7 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy, SourceLocation Loc) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); llvm::DIType *T = getOrCreateType(RTy, getOrCreateFile(Loc)); return T; } @@ -1458,22 +1563,17 @@ llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D, llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, SourceLocation Loc) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!D.isNull() && "null type"); llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc)); assert(T && "could not create debug info for type"); - // Composite types with UIDs were already retained by DIBuilder - // because they are only referenced by name in the IR. - if (auto *CTy = dyn_cast<llvm::DICompositeType>(T)) - if (!CTy->getIdentifier().empty()) - return T; RetainedTypes.push_back(D.getAsOpaquePtr()); return T; } void CGDebugInfo::completeType(const EnumDecl *ED) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getEnumType(ED); void *TyPtr = Ty.getAsOpaquePtr(); @@ -1486,16 +1586,16 @@ void CGDebugInfo::completeType(const EnumDecl *ED) { } void CGDebugInfo::completeType(const RecordDecl *RD) { - if (DebugKind > CodeGenOptions::LimitedDebugInfo || + if (DebugKind > codegenoptions::LimitedDebugInfo || !CGM.getLangOpts().CPlusPlus) completeRequiredType(RD); } void CGDebugInfo::completeRequiredType(const RecordDecl *RD) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; - if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) + if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) if (CXXDecl->isDynamicClass()) return; @@ -1509,7 +1609,7 @@ void CGDebugInfo::completeRequiredType(const RecordDecl *RD) { } void CGDebugInfo::completeClassData(const RecordDecl *RD) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getRecordType(RD); void *TyPtr = Ty.getAsOpaquePtr(); @@ -1523,23 +1623,38 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) { static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I, CXXRecordDecl::method_iterator End) { - for (; I != End; ++I) - if (FunctionDecl *Tmpl = I->getInstantiatedFromMemberFunction()) + for (CXXMethodDecl *MD : llvm::make_range(I, End)) + if (FunctionDecl *Tmpl = MD->getInstantiatedFromMemberFunction()) if (!Tmpl->isImplicit() && Tmpl->isThisDeclarationADefinition() && - !I->getMemberSpecializationInfo()->isExplicitSpecialization()) + !MD->getMemberSpecializationInfo()->isExplicitSpecialization()) return true; return false; } -static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind, - bool DebugTypeExtRefs, - const RecordDecl *RD, +/// Does a type definition exist in an imported clang module? +static bool isDefinedInClangModule(const RecordDecl *RD) { + if (!RD || !RD->isFromASTFile()) + return false; + if (!RD->isExternallyVisible() && RD->getName().empty()) + return false; + if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { + assert(CXXDecl->isCompleteDefinition() && "incomplete record definition"); + if (CXXDecl->getTemplateSpecializationKind() != TSK_Undeclared) + // Make sure the instantiation is actually in a module. + if (CXXDecl->field_begin() != CXXDecl->field_end()) + return CXXDecl->field_begin()->isFromASTFile(); + } + + return true; +} + +static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, + bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { - // Does the type exist in an imported clang module? - if (DebugTypeExtRefs && RD->isFromASTFile() && RD->getDefinition()) - return true; + if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) + return true; - if (DebugKind > CodeGenOptions::LimitedDebugInfo) + if (DebugKind > codegenoptions::LimitedDebugInfo) return false; if (!LangOpts.CPlusPlus) @@ -1548,7 +1663,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind, if (!RD->isCompleteDefinitionRequired()) return true; - const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD); + const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD); if (!CXXDecl) return false; @@ -1557,8 +1672,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind, return true; TemplateSpecializationKind Spec = TSK_Undeclared; - if (const ClassTemplateSpecializationDecl *SD = - dyn_cast<ClassTemplateSpecializationDecl>(RD)) + if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) Spec = SD->getSpecializationKind(); if (Spec == TSK_ExplicitInstantiationDeclaration && @@ -1600,7 +1714,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { if (!D || !D->isCompleteDefinition()) return FwdDecl; - if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) + if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) CollectContainingType(CXXDecl, FwdDecl); // Push the struct on region stack. @@ -1615,7 +1729,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { // gdb tests will depend on a certain ordering at printout. The debug // information offsets are still correct if we merge them all together // though. - const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD); + const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD); if (CXXDecl) { CollectCXXBases(CXXDecl, DefUnit, EltTys, FwdDecl); CollectVTableInfo(CXXDecl, DefUnit, EltTys); @@ -1676,8 +1790,11 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty, if (!ID) return nullptr; - // Return a forward declaration if this type was imported from a clang module. - if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition()) + // Return a forward declaration if this type was imported from a clang module, + // and this is not the compile unit with the implementation of the type (which + // may contain hidden ivars). + if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition() && + !ID->getImplementation()) return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, ID->getName(), getDeclContextDescriptor(ID), Unit, 0); @@ -1739,11 +1856,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, bool IsRootModule = M ? !M->Parent : true; if (CreateSkeletonCU && IsRootModule) { + // PCH files don't have a signature field in the control block, + // but LLVM detects skeleton CUs by looking for a non-zero DWO id. + uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL; llvm::DIBuilder DIB(CGM.getModule()); DIB.createCompileUnit(TheCU->getSourceLanguage(), Mod.getModuleName(), Mod.getPath(), TheCU->getProducer(), true, StringRef(), 0, Mod.getASTFile(), - llvm::DIBuilder::FullDebug, Mod.getSignature()); + llvm::DICompileUnit::FullDebug, Signature); DIB.finalize(); } llvm::DIModule *Parent = @@ -1942,7 +2062,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { uint64_t Align; // FIXME: make getTypeAlign() aware of VLAs and incomplete array types - if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(Ty)) { + if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) { Size = 0; Align = CGM.getContext().getTypeAlign(CGM.getContext().getBaseElementType(VAT)); @@ -1975,7 +2095,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { // int x[0]; // }; int64_t Count = -1; // Count == -1 is an unbounded array. - if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty)) + if (const auto *CAT = dyn_cast<ConstantArrayType>(Ty)) Count = CAT->getSize().getZExtValue(); // FIXME: Verify this is right for VLAs. @@ -2003,12 +2123,35 @@ llvm::DIType *CGDebugInfo::CreateType(const RValueReferenceType *Ty, llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, llvm::DIFile *U) { - uint64_t Size = - !Ty->isIncompleteType() ? CGM.getContext().getTypeSize(Ty) : 0; + unsigned Flags = 0; + uint64_t Size = 0; + + if (!Ty->isIncompleteType()) { + Size = CGM.getContext().getTypeSize(Ty); + + // Set the MS inheritance model. There is no flag for the unspecified model. + if (CGM.getTarget().getCXXABI().isMicrosoft()) { + switch (Ty->getMostRecentCXXRecordDecl()->getMSInheritanceModel()) { + case MSInheritanceAttr::Keyword_single_inheritance: + Flags |= llvm::DINode::FlagSingleInheritance; + break; + case MSInheritanceAttr::Keyword_multiple_inheritance: + Flags |= llvm::DINode::FlagMultipleInheritance; + break; + case MSInheritanceAttr::Keyword_virtual_inheritance: + Flags |= llvm::DINode::FlagVirtualInheritance; + break; + case MSInheritanceAttr::Keyword_unspecified_inheritance: + break; + } + } + } + llvm::DIType *ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U); if (Ty->isMemberDataPointerType()) return DBuilder.createMemberPointerType( - getOrCreateType(Ty->getPointeeType(), U), ClassType, Size); + getOrCreateType(Ty->getPointeeType(), U), ClassType, Size, /*Align=*/0, + Flags); const FunctionProtoType *FPT = Ty->getPointeeType()->getAs<FunctionProtoType>(); @@ -2016,7 +2159,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType( Ty->getClass(), FPT->getTypeQuals())), FPT, U), - ClassType, Size); + ClassType, Size, /*Align=*/0, Flags); } llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) { @@ -2048,13 +2191,23 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) { // If this is just a forward declaration, construct an appropriately // marked node and just return it. if (isImportedFromModule || !ED->getDefinition()) { + // Note that it is possible for enums to be created as part of + // their own declcontext. In this case a FwdDecl will be created + // twice. This doesn't cause a problem because both FwdDecls are + // entered into the ReplaceMap: finalize() will replace the first + // FwdDecl with the second and then replace the second with + // complete type. llvm::DIScope *EDContext = getDeclContextDescriptor(ED); llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation()); + llvm::TempDIScope TmpContext(DBuilder.createReplaceableCompositeType( + llvm::dwarf::DW_TAG_enumeration_type, "", TheCU, DefUnit, 0)); + unsigned Line = getLineNumber(ED->getLocation()); StringRef EDName = ED->getName(); llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType( llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line, 0, Size, Align, llvm::DINode::FlagFwdDecl, FullName); + ReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(Ty), std::make_tuple(static_cast<llvm::Metadata *>(RetTy))); @@ -2168,7 +2321,7 @@ llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) { void CGDebugInfo::completeTemplateDefinition( const ClassTemplateSpecializationDecl &SD) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; completeClassData(&SD); @@ -2220,8 +2373,12 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) { // option. FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager()); if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) { + // This is a (sub-)module. auto Info = ExternalASTSource::ASTSourceDescriptor(*M); return getOrCreateModuleRef(Info, /*SkeletonCU=*/false); + } else { + // This the precompiled header being built. + return getOrCreateModuleRef(PCHDescriptor, /*SkeletonCU=*/false); } } @@ -2369,11 +2526,34 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, 0, FullName); + // Elements of composite types usually have back to the type, creating + // uniquing cycles. Distinct nodes are more efficient. + switch (RealDecl->getTag()) { + default: + llvm_unreachable("invalid composite type tag"); + + case llvm::dwarf::DW_TAG_array_type: + case llvm::dwarf::DW_TAG_enumeration_type: + // Array elements and most enumeration elements don't have back references, + // so they don't tend to be involved in uniquing cycles and there is some + // chance of merging them when linking together two modules. Only make + // them distinct if they are ODR-uniqued. + if (FullName.empty()) + break; + + case llvm::dwarf::DW_TAG_structure_type: + case llvm::dwarf::DW_TAG_union_type: + case llvm::dwarf::DW_TAG_class_type: + // Immediatley resolve to a distinct node. + RealDecl = + llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl)); + break; + } + RegionMap[Ty->getDecl()].reset(RealDecl); TypeCache[QualType(Ty, 0).getAsOpaquePtr()].reset(RealDecl); - if (const ClassTemplateSpecializationDecl *TSpecial = - dyn_cast<ClassTemplateSpecializationDecl>(RD)) + if (const auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) DBuilder.replaceArrays(RealDecl, llvm::DINodeArray(), CollectCXXTemplateParams(TSpecial, DefUnit)); return RealDecl; @@ -2420,7 +2600,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, llvm::DIScope *&FDContext, llvm::DINodeArray &TParamsArray, unsigned &Flags) { - const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); + const auto *FD = cast<FunctionDecl>(GD.getDecl()); Name = getFunctionName(FD); // Use mangled name as linkage name for C/C++ functions. if (FD->hasPrototype()) { @@ -2430,13 +2610,12 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, // No need to replicate the linkage name if it isn't different from the // subprogram name, no need to have it at all unless coverage is enabled or // debug is set to more than just line tables. - if (LinkageName == Name || - (!CGM.getCodeGenOpts().EmitGcovArcs && - !CGM.getCodeGenOpts().EmitGcovNotes && - DebugKind <= CodeGenOptions::DebugLineTablesOnly)) + if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs && + !CGM.getCodeGenOpts().EmitGcovNotes && + DebugKind <= codegenoptions::DebugLineTablesOnly)) LinkageName = StringRef(); - if (DebugKind >= CodeGenOptions::LimitedDebugInfo) { + if (DebugKind >= codegenoptions::LimitedDebugInfo) { if (const NamespaceDecl *NSDecl = dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) FDContext = getOrCreateNameSpace(NSDecl); @@ -2513,15 +2692,15 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { SmallVector<QualType, 16> ArgTypes; for (const ParmVarDecl *Parm: FD->parameters()) ArgTypes.push_back(Parm->getType()); - QualType FnType = - CGM.getContext().getFunctionType(FD->getReturnType(), ArgTypes, - FunctionProtoType::ExtProtoInfo()); + CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); + QualType FnType = CGM.getContext().getFunctionType( + FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl( DContext, Name, LinkageName, Unit, Line, getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(), /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(FD)); - const FunctionDecl *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl()); + const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl()); FwdDeclReplaceMap.emplace_back(std::piecewise_construct, std::make_tuple(CanonDecl), std::make_tuple(SP)); @@ -2553,7 +2732,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { // we would otherwise do to get a type for a pointee. (forward declarations in // limited debug info, full definitions (if the type definition is available) // in unlimited debug info) - if (const TypeDecl *TD = dyn_cast<TypeDecl>(D)) + if (const auto *TD = dyn_cast<TypeDecl>(D)) return getOrCreateType(CGM.getContext().getTypeDeclType(TD), getOrCreateFile(TD->getLocation())); auto I = DeclCache.find(D->getCanonicalDecl()); @@ -2563,7 +2742,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { // No definition for now. Emit a forward definition that might be // merged with a potential upcoming definition. - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) + if (const auto *FD = dyn_cast<FunctionDecl>(D)) return getFunctionForwardDeclaration(FD); else if (const auto *VD = dyn_cast<VarDecl>(D)) return getGlobalVariableForwardDeclaration(VD); @@ -2572,10 +2751,10 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { } llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { - if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly) return nullptr; - const FunctionDecl *FD = dyn_cast<FunctionDecl>(D); + const auto *FD = dyn_cast<FunctionDecl>(D); if (!FD) return nullptr; @@ -2584,8 +2763,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { auto MI = SPCache.find(FD->getCanonicalDecl()); if (MI == SPCache.end()) { - if (const CXXMethodDecl *MD = - dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) { + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) { return CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()), cast<llvm::DICompositeType>(S)); } @@ -2612,14 +2790,18 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F) { - if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly) // Create fake but valid subroutine type. Otherwise -verify would fail, and // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields. return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None)); - if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) + if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) return getOrCreateMethodType(Method, F); - if (const ObjCMethodDecl *OMethod = dyn_cast<ObjCMethodDecl>(D)) { + + const auto *FTy = FnType->getAs<FunctionType>(); + CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C; + + if (const auto *OMethod = dyn_cast<ObjCMethodDecl>(D)) { // Add "self" and "_cmd" SmallVector<llvm::Metadata *, 16> Elts; @@ -2645,28 +2827,28 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, Elts.push_back(DBuilder.createArtificialType( getOrCreateType(CGM.getContext().getObjCSelType(), F))); // Get rest of the arguments. - for (const auto *PI : OMethod->params()) + for (const auto *PI : OMethod->parameters()) Elts.push_back(getOrCreateType(PI->getType(), F)); // Variadic methods need a special marker at the end of the type list. if (OMethod->isVariadic()) Elts.push_back(DBuilder.createUnspecifiedParameter()); llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts); - return DBuilder.createSubroutineType(EltTypeArray); + return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC)); } // Handle variadic function types; they need an additional // unspecified parameter. - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) + if (const auto *FD = dyn_cast<FunctionDecl>(D)) if (FD->isVariadic()) { SmallVector<llvm::Metadata *, 16> EltTys; EltTys.push_back(getOrCreateType(FD->getReturnType(), F)); - if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FnType)) - for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i) - EltTys.push_back(getOrCreateType(FPT->getParamType(i), F)); + if (const auto *FPT = dyn_cast<FunctionProtoType>(FnType)) + for (QualType ParamType : FPT->param_types()) + EltTys.push_back(getOrCreateType(ParamType, F)); EltTys.push_back(DBuilder.createUnspecifiedParameter()); llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys); - return DBuilder.createSubroutineType(EltTypeArray); + return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC)); } return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F)); @@ -2691,7 +2873,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (!HasDecl) { // Use llvm function name. LinkageName = Fn->getName(); - } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + } else if (const auto *FD = dyn_cast<FunctionDecl>(D)) { // If there is a subprogram for this function available then use it. auto FI = SPCache.find(FD->getCanonicalDecl()); if (FI != SPCache.end()) { @@ -2704,7 +2886,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, } collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext, TParamsArray, Flags); - } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) { + } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DINode::FlagPrototyped; } else { @@ -2712,7 +2894,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, Name = Fn->getName(); Flags |= llvm::DINode::FlagPrototyped; } - if (!Name.empty() && Name[0] == '\01') + if (Name.startswith("\01")) Name = Name.substr(1); if (!HasDecl || D->isImplicit()) { @@ -2731,7 +2913,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, // are emitted as CU level entities by the backend. llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), Fn->hasInternalLinkage(), + getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(), true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(D)); Fn->setSubprogram(SP); @@ -2739,7 +2921,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, // code for the initialization of globals. Do not record these decls // as they will overwrite the actual VarDecl Decl in the cache. if (HasDecl && isa<FunctionDecl>(D)) - DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(SP)); + DeclCache[D->getCanonicalDecl()].reset(SP); // Push the function onto the lexical block stack. LexicalBlockStack.emplace_back(SP); @@ -2765,7 +2947,7 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, // If there is a DISubprogram for this function available then use it. collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext, TParamsArray, Flags); - } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) { + } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DINode::FlagPrototyped; } else { @@ -2783,11 +2965,11 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = 0; - DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), - false /*internalLinkage*/, true /*definition*/, - ScopeLine, Flags, CGM.getLangOpts().Optimize, - TParamsArray.get(), getFunctionDeclaration(D)); + DBuilder.retainType(DBuilder.createFunction( + FDContext, Name, LinkageName, Unit, LineNo, + getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/, + false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get(), getFunctionDeclaration(D))); } void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { @@ -2820,7 +3002,7 @@ void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder, Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back())); - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; // Create a new lexical block and push it on the stack. @@ -2834,7 +3016,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder, // Provide an entry in the line table for the end of the block. EmitLocation(Builder, Loc); - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; LexicalBlockStack.pop_back(); @@ -2896,8 +3078,7 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, CGM.getTarget().getPointerAlign(0))) { CharUnits FieldOffsetInBytes = CGM.getContext().toCharUnitsFromBits(FieldOffset); - CharUnits AlignedOffsetInBytes = - FieldOffsetInBytes.RoundUpToAlignment(Align); + CharUnits AlignedOffsetInBytes = FieldOffsetInBytes.alignTo(Align); CharUnits NumPaddingBytes = AlignedOffsetInBytes - FieldOffsetInBytes; if (NumPaddingBytes.isPositive()) { @@ -2930,8 +3111,10 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); + if (VD->hasAttr<NoDebugAttr>()) + return; bool Unwritten = VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) && @@ -2969,7 +3152,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, // otherwise it is 'self' or 'this'. if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1) Flags |= llvm::DINode::FlagObjectPointer; - if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage)) + if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() && !VD->getType()->isPointerType()) Expr.push_back(llvm::dwarf::DW_OP_deref); @@ -3005,10 +3188,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, return; } else if (isa<VariableArrayType>(VD->getType())) Expr.push_back(llvm::dwarf::DW_OP_deref); - } else if (const RecordType *RT = dyn_cast<RecordType>(VD->getType())) { + } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) { // If VD is an anonymous union then Storage represents value for // all union fields. - const RecordDecl *RD = cast<RecordDecl>(RT->getDecl()); + const auto *RD = cast<RecordDecl>(RT->getDecl()); if (RD->isUnion() && RD->isAnonymousStructOrUnion()) { // GDB has trouble finding local variables in anonymous unions, so we emit // artifical local variables for each of the members. @@ -3056,7 +3239,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); EmitDeclare(VD, Storage, llvm::None, Builder); } @@ -3071,11 +3254,13 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder, const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (Builder.GetInsertBlock() == nullptr) return; + if (VD->hasAttr<NoDebugAttr>()) + return; bool isByRef = VD->hasAttr<BlocksAttr>(); @@ -3139,7 +3324,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI, unsigned ArgNo, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); EmitDeclare(VD, AI, ArgNo, Builder); } @@ -3158,7 +3343,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, unsigned ArgNo, llvm::Value *LocalAddr, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); ASTContext &C = CGM.getContext(); const BlockDecl *blockDecl = block.getBlockDecl(); @@ -3175,25 +3360,25 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, CGM.getDataLayout().getStructLayout(block.StructureType); SmallVector<llvm::Metadata *, 16> fields; - fields.push_back(createFieldType("__isa", C.VoidPtrTy, 0, loc, AS_public, + fields.push_back(createFieldType("__isa", C.VoidPtrTy, loc, AS_public, blockLayout->getElementOffsetInBits(0), tunit, tunit)); - fields.push_back(createFieldType("__flags", C.IntTy, 0, loc, AS_public, + fields.push_back(createFieldType("__flags", C.IntTy, loc, AS_public, blockLayout->getElementOffsetInBits(1), tunit, tunit)); - fields.push_back(createFieldType("__reserved", C.IntTy, 0, loc, AS_public, + fields.push_back(createFieldType("__reserved", C.IntTy, loc, AS_public, blockLayout->getElementOffsetInBits(2), tunit, tunit)); auto *FnTy = block.getBlockExpr()->getFunctionType(); auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar()); - fields.push_back(createFieldType("__FuncPtr", FnPtrType, 0, loc, AS_public, + fields.push_back(createFieldType("__FuncPtr", FnPtrType, loc, AS_public, blockLayout->getElementOffsetInBits(3), tunit, tunit)); fields.push_back(createFieldType( "__descriptor", C.getPointerType(block.NeedsCopyDispose ? C.getBlockDescriptorExtendedType() : C.getBlockDescriptorType()), - 0, loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit)); + loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit)); // We want to sort the captures by offset, not because DWARF // requires this, but because we're paranoid about debuggers. @@ -3227,19 +3412,22 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Sort by offset. llvm::array_pod_sort(chunks.begin(), chunks.end()); - for (SmallVectorImpl<BlockLayoutChunk>::iterator i = chunks.begin(), - e = chunks.end(); - i != e; ++i) { - uint64_t offsetInBits = i->OffsetInBits; - const BlockDecl::Capture *capture = i->Capture; + for (const BlockLayoutChunk &Chunk : chunks) { + uint64_t offsetInBits = Chunk.OffsetInBits; + const BlockDecl::Capture *capture = Chunk.Capture; // If we have a null capture, this must be the C++ 'this' capture. if (!capture) { - const CXXMethodDecl *method = - cast<CXXMethodDecl>(blockDecl->getNonClosureContext()); - QualType type = method->getThisType(C); + QualType type; + if (auto *Method = + cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext())) + type = Method->getThisType(C); + else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent())) + type = QualType(RDecl->getTypeForDecl(), 0); + else + llvm_unreachable("unexpected block declcontext"); - fields.push_back(createFieldType("this", type, 0, loc, AS_public, + fields.push_back(createFieldType("this", type, loc, AS_public, offsetInBits, tunit, tunit)); continue; } @@ -3259,7 +3447,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, DBuilder.createMemberType(tunit, name, tunit, line, PtrInfo.Width, PtrInfo.Align, offsetInBits, 0, fieldType); } else { - fieldType = createFieldType(name, variable->getType(), 0, loc, AS_public, + fieldType = createFieldType(name, variable->getType(), loc, AS_public, offsetInBits, tunit, tunit); } fields.push_back(fieldType); @@ -3328,8 +3516,7 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls( // Ignore unnamed fields, but recurse into anonymous records. if (FieldName.empty()) { - const RecordType *RT = dyn_cast<RecordType>(Field->getType()); - if (RT) + if (const auto *RT = dyn_cast<RecordType>(Field->getType())) GV = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName, Var, DContext); continue; @@ -3337,14 +3524,16 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls( // Use VarDecl's Tag, Scope and Line number. GV = DBuilder.createGlobalVariable(DContext, FieldName, LinkageName, Unit, LineNo, FieldTy, - Var->hasInternalLinkage(), Var, nullptr); + Var->hasLocalLinkage(), Var, nullptr); } return GV; } void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, const VarDecl *D) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); + if (D->hasAttr<NoDebugAttr>()) + return; // Create global variable debug descriptor. llvm::DIFile *Unit = nullptr; llvm::DIScope *DContext = nullptr; @@ -3368,21 +3557,23 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, } else { GV = DBuilder.createGlobalVariable( DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasInternalLinkage(), Var, + Var->hasLocalLinkage(), Var, getOrCreateStaticDataMemberDeclarationOrNull(D)); } - DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(GV)); + DeclCache[D->getCanonicalDecl()].reset(GV); } void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, llvm::Constant *Init) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); + if (VD->hasAttr<NoDebugAttr>()) + return; // Create the descriptor for the variable. llvm::DIFile *Unit = getOrCreateFile(VD->getLocation()); StringRef Name = VD->getName(); llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit); - if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(VD)) { - const EnumDecl *ED = cast<EnumDecl>(ECD->getDeclContext()); + if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) { + const auto *ED = cast<EnumDecl>(ECD->getDeclContext()); assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?"); Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit); } @@ -3400,6 +3591,9 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, auto *RD = cast<RecordDecl>(VarD->getDeclContext()); getDeclContextDescriptor(VarD); // Ensure that the type is retained even though it's otherwise unreferenced. + // + // FIXME: This is probably unnecessary, since Ty should reference RD + // through its scope. RetainedTypes.push_back( CGM.getContext().getRecordType(RD).getAsOpaquePtr()); return; @@ -3423,7 +3617,7 @@ llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) { } void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; const NamespaceDecl *NSDecl = UD.getNominatedNamespace(); if (!NSDecl->isAnonymousNamespace() || @@ -3436,13 +3630,23 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { } void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; assert(UD.shadow_size() && "We shouldn't be codegening an invalid UsingDecl containing no decls"); // Emitting one decl is sufficient - debuggers can detect that this is an // overloaded name & provide lookup for all the overloads. const UsingShadowDecl &USD = **UD.shadow_begin(); + + // FIXME: Skip functions with undeduced auto return type for now since we + // don't currently have the plumbing for separate declarations & definitions + // of free functions and mismatched types (auto in the declaration, concrete + // return type in the definition) + if (const auto *FD = dyn_cast<FunctionDecl>(USD.getUnderlyingDecl())) + if (const auto *AT = + FD->getType()->getAs<FunctionProtoType>()->getContainedAutoType()) + if (AT->getDeducedType().isNull()) + return; if (llvm::DINode *Target = getDeclarationOrDefinition(USD.getUnderlyingDecl())) DBuilder.createImportedDeclaration( @@ -3451,6 +3655,8 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { } void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { + if (CGM.getCodeGenOpts().getDebuggerTuning() != llvm::DebuggerKind::LLDB) + return; if (Module *M = ID.getImportedModule()) { auto Info = ExternalASTSource::ASTSourceDescriptor(*M); DBuilder.createImportedDeclaration( @@ -3462,13 +3668,13 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { llvm::DIImportedEntity * CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return nullptr; auto &VH = NamespaceAliasCache[&NA]; if (VH) return cast<llvm::DIImportedEntity>(VH); llvm::DIImportedEntity *R; - if (const NamespaceAliasDecl *Underlying = + if (const auto *Underlying = dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace())) // This could cache & dedup here rather than relying on metadata deduping. R = DBuilder.createImportedDeclaration( @@ -3557,7 +3763,7 @@ void CGDebugInfo::finalize() { } void CGDebugInfo::EmitExplicitCastType(QualType Ty) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile())) diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h index a68dd33f..366dd81 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h @@ -16,6 +16,7 @@ #include "CGBuilder.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExternalASTSource.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" #include "clang/Frontend/CodeGenOptions.h" @@ -52,28 +53,20 @@ class CGDebugInfo { friend class ApplyDebugLocation; friend class SaveAndRestoreLocation; CodeGenModule &CGM; - const CodeGenOptions::DebugInfoKind DebugKind; + const codegenoptions::DebugInfoKind DebugKind; bool DebugTypeExtRefs; llvm::DIBuilder DBuilder; llvm::DICompileUnit *TheCU = nullptr; ModuleMap *ClangModuleMap = nullptr; + ExternalASTSource::ASTSourceDescriptor PCHDescriptor; SourceLocation CurLoc; llvm::DIType *VTablePtrType = nullptr; llvm::DIType *ClassTy = nullptr; llvm::DICompositeType *ObjTy = nullptr; llvm::DIType *SelTy = nullptr; - llvm::DIType *OCLImage1dDITy = nullptr; - llvm::DIType *OCLImage1dArrayDITy = nullptr; - llvm::DIType *OCLImage1dBufferDITy = nullptr; - llvm::DIType *OCLImage2dDITy = nullptr; - llvm::DIType *OCLImage2dArrayDITy = nullptr; - llvm::DIType *OCLImage2dDepthDITy = nullptr; - llvm::DIType *OCLImage2dArrayDepthDITy = nullptr; - llvm::DIType *OCLImage2dMSAADITy = nullptr; - llvm::DIType *OCLImage2dArrayMSAADITy = nullptr; - llvm::DIType *OCLImage2dMSAADepthDITy = nullptr; - llvm::DIType *OCLImage2dArrayMSAADepthDITy = nullptr; - llvm::DIType *OCLImage3dDITy = nullptr; +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + llvm::DIType *SingletonId = nullptr; +#include "clang/Basic/OpenCLImageTypes.def" llvm::DIType *OCLEventDITy = nullptr; llvm::DIType *OCLClkEventDITy = nullptr; llvm::DIType *OCLQueueDITy = nullptr; @@ -107,7 +100,7 @@ class CGDebugInfo { /// compilation. std::vector<std::pair<const TagType *, llvm::TrackingMDRef>> ReplaceMap; - /// Cache of replaceable forward declarartions (functions and + /// Cache of replaceable forward declarations (functions and /// variables) to RAUW at the end of compilation. std::vector<std::pair<const DeclaratorDecl *, llvm::TrackingMDRef>> FwdDeclReplaceMap; @@ -239,11 +232,16 @@ class CGDebugInfo { llvm::DIFile *F); llvm::DIType *createFieldType(StringRef name, QualType type, - uint64_t sizeInBitsOverride, SourceLocation loc, - AccessSpecifier AS, uint64_t offsetInBits, - llvm::DIFile *tunit, llvm::DIScope *scope, + SourceLocation loc, AccessSpecifier AS, + uint64_t offsetInBits, llvm::DIFile *tunit, + llvm::DIScope *scope, const RecordDecl *RD = nullptr); + /// Create new bit field member. + llvm::DIType *createBitFieldType(const FieldDecl *BitFieldDecl, + llvm::DIScope *RecordTy, + const RecordDecl *RD); + /// Helpers for collecting fields of a record. /// @{ void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl, @@ -275,6 +273,8 @@ public: void finalize(); + /// Module debugging: Support for building PCMs. + /// @{ /// Set the main CU's DwoId field to \p Signature. void setDwoId(uint64_t Signature); @@ -283,6 +283,14 @@ public: /// the module of origin of each Decl. void setModuleMap(ModuleMap &MMap) { ClangModuleMap = &MMap; } + /// When generating debug information for a clang module or + /// precompiled header, this module map will be used to determine + /// the module of origin of each Decl. + void setPCHDescriptor(ExternalASTSource::ASTSourceDescriptor PCH) { + PCHDescriptor = PCH; + } + /// @} + /// Update the current source location. If \arg loc is invalid it is /// ignored. void setLocation(SourceLocation Loc); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp index b78e80d..89407cd 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp @@ -16,11 +16,13 @@ #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGOpenCLRuntime.h" +#include "CGOpenMPRuntime.h" #include "CodeGenModule.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -29,10 +31,10 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" + using namespace clang; using namespace CodeGen; - void CodeGenFunction::EmitDecl(const Decl &D) { switch (D.getKind()) { case Decl::BuiltinTemplate: @@ -71,6 +73,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::ObjCImplementation: case Decl::ObjCProperty: case Decl::ObjCCompatibleAlias: + case Decl::PragmaComment: + case Decl::PragmaDetectMismatch: case Decl::AccessSpec: case Decl::LinkageSpec: case Decl::ObjCPropertyImpl: @@ -81,6 +85,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Captured: case Decl::ClassScopeFunctionSpecialization: case Decl::UsingShadow: + case Decl::ConstructorUsingShadow: case Decl::ObjCTypeParam: llvm_unreachable("Declaration should not be in declstmts!"); case Decl::Function: // void X(); @@ -92,6 +97,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Label: // __label__ x; case Decl::Import: case Decl::OMPThreadPrivate: + case Decl::OMPCapturedExpr: case Decl::Empty: // None of these decls require codegen support. return; @@ -115,6 +121,9 @@ void CodeGenFunction::EmitDecl(const Decl &D) { return EmitVarDecl(VD); } + case Decl::OMPDeclareReduction: + return CGM.EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(&D), this); + case Decl::Typedef: // typedef int X; case Decl::TypeAlias: { // using X = int; [C++0x] const TypedefNameDecl &TD = cast<TypedefNameDecl>(D); @@ -363,8 +372,15 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *var = cast<llvm::GlobalVariable>(addr->stripPointerCasts()); + + // CUDA's local and local static __shared__ variables should not + // have any non-empty initializers. This is ensured by Sema. + // Whatever initializer such variable may have when it gets here is + // a no-op and should not be emitted. + bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + D.hasAttr<CUDASharedAttr>(); // If this value has an initializer, emit it. - if (D.getInit()) + if (D.getInit() && !isCudaSharedVar) var = AddInitializerToStaticVarDecl(D, var); var->setAlignment(alignment.getQuantity()); @@ -394,7 +410,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, // Emit global variable debug descriptor for static vars. CGDebugInfo *DI = getDebugInfo(); if (DI && - CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) { + CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { DI->setLocation(D.getLocation()); DI->EmitGlobalVariable(var, &D); } @@ -513,20 +529,7 @@ namespace { CGF.EmitCall(FnInfo, CleanupFn, ReturnValueSlot(), Args); } }; - - /// A cleanup to call @llvm.lifetime.end. - class CallLifetimeEnd final : public EHScopeStack::Cleanup { - llvm::Value *Addr; - llvm::Value *Size; - public: - CallLifetimeEnd(Address addr, llvm::Value *size) - : Addr(addr.getPointer()), Size(size) {} - - void Emit(CodeGenFunction &CGF, Flags flags) override { - CGF.EmitLifetimeEnd(Size, Addr); - } - }; -} +} // end anonymous namespace /// EmitAutoVarWithLifetime - Does the setup required for an automatic /// variable with lifetime. @@ -644,7 +647,6 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, } init = castExpr->getSubExpr(); - continue; } return false; } @@ -665,10 +667,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, EmitStoreThroughLValue(RValue::get(value), lvalue, true); return; } - + if (const CXXDefaultInitExpr *DIE = dyn_cast<CXXDefaultInitExpr>(init)) init = DIE->getExpr(); - + // If we're emitting a value with lifetime, we have to do the // initialization *before* we leave the cleanup scopes. if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) { @@ -715,8 +717,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, llvm_unreachable("present but none"); case Qualifiers::OCL_ExplicitNone: - // nothing to do - value = EmitScalarExpr(init); + value = EmitARCUnsafeUnretainedScalarExpr(init); break; case Qualifiers::OCL_Strong: { @@ -819,7 +820,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init, } return true; } - + if (llvm::ConstantDataSequential *CDS = dyn_cast<llvm::ConstantDataSequential>(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { @@ -848,9 +849,9 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc, Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile); return; } - - if (llvm::ConstantDataSequential *CDS = - dyn_cast<llvm::ConstantDataSequential>(Init)) { + + if (llvm::ConstantDataSequential *CDS = + dyn_cast<llvm::ConstantDataSequential>(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { llvm::Constant *Elt = CDS->getElementAsConstant(i); @@ -877,7 +878,6 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc, } } - /// shouldUseMemSetPlusStoresToInitialize - Decide whether we should use memset /// plus some stores to initialize a local variable instead of using a memcpy /// from a constant global. It is beneficial to use memset if the global is all @@ -907,18 +907,29 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { EmitAutoVarCleanups(emission); } +/// shouldEmitLifetimeMarkers - Decide whether we need emit the life-time +/// markers. +static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts, + const LangOptions &LangOpts) { + // Asan uses markers for use-after-scope checks. + if (CGOpts.SanitizeAddressUseAfterScope) + return true; + + // Disable lifetime markers in msan builds. + // FIXME: Remove this when msan works with lifetime markers. + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + return false; + + // For now, only in optimized builds. + return CGOpts.OptimizationLevel != 0; +} + /// Emit a lifetime.begin marker if some criteria are satisfied. /// \return a pointer to the temporary size Value if a marker was emitted, null /// otherwise llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, llvm::Value *Addr) { - // For now, only in optimized builds. - if (CGM.getCodeGenOpts().OptimizationLevel == 0) - return nullptr; - - // Disable lifetime markers in msan builds. - // FIXME: Remove this when msan works with lifetime markers. - if (getLangOpts().Sanitize.has(SanitizerKind::Memory)) + if (!shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), getLangOpts())) return nullptr; llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size); @@ -1086,8 +1097,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Emit debug info for local var declaration. if (HaveInsertPoint()) if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { DI->setLocation(D.getLocation()); DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); } @@ -1163,6 +1174,7 @@ bool CodeGenFunction::isTrivialInitializer(const Expr *Init) { return false; } + void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { assert(emission.Variable && "emission was not valid!"); @@ -1250,7 +1262,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::GlobalValue::PrivateLinkage, constant, Name); GV->setAlignment(Loc.getAlignment().getQuantity()); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Address SrcPtr = Address(GV, Loc.getAlignment()); if (SrcPtr.getType() != BP) @@ -1381,13 +1393,10 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { // Make sure we call @llvm.lifetime.end. This needs to happen // *last*, so the cleanup needs to be pushed *first*. - if (emission.useLifetimeMarkers()) { - EHStack.pushCleanup<CallLifetimeEnd>(NormalCleanup, + if (emission.useLifetimeMarkers()) + EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, emission.getAllocatedAddress(), emission.getSizeForLifetimeMarkers()); - EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin()); - cleanup.setLifetimeMarker(); - } // Check the type for a cleanup. if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) @@ -1662,7 +1671,7 @@ namespace { ElementType, ElementAlign, Destroyer); } }; -} +} // end anonymous namespace /// pushIrregularPartialArrayCleanup - Push an EH cleanup to destroy /// already-constructed elements of the given array. The cleanup @@ -1731,7 +1740,7 @@ namespace { CGF.EmitARCRelease(Param, Precise); } }; -} +} // end anonymous namespace /// Emit an alloca (or GlobalValue depending on target) /// for the specified parameter and set up LocalDeclMap. @@ -1852,8 +1861,8 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // Emit debug info for param declaration. if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder); } } @@ -1861,3 +1870,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, DeclPtr.getPointer()); } + +void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, + CodeGenFunction *CGF) { + if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) + return; + getOpenMPRuntime().emitUserDefinedReduction(CGF, D); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp index adba731..89d142e 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp @@ -86,13 +86,21 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *function; llvm::Constant *argument; - // Special-case non-array C++ destructors, where there's a function - // with the right signature that we can just call. - const CXXRecordDecl *record = nullptr; - if (dtorKind == QualType::DK_cxx_destructor && - (record = type->getAsCXXRecordDecl())) { - assert(!record->hasTrivialDestructor()); - CXXDestructorDecl *dtor = record->getDestructor(); + // Special-case non-array C++ destructors, if they have the right signature. + // Under some ABIs, destructors return this instead of void, and cannot be + // passed directly to __cxa_atexit if the target does not allow this mismatch. + const CXXRecordDecl *Record = type->getAsCXXRecordDecl(); + bool CanRegisterDestructor = + Record && (!CGM.getCXXABI().HasThisReturn( + GlobalDecl(Record->getDestructor(), Dtor_Complete)) || + CGM.getCXXABI().canCallMismatchedFunctionType()); + // If __cxa_atexit is disabled via a flag, a different helper function is + // generated elsewhere which uses atexit instead, and it takes the destructor + // directly. + bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit; + if (Record && (CanRegisterDestructor || UsingExternalHelper)) { + assert(!Record->hasTrivialDestructor()); + CXXDestructorDecl *dtor = Record->getDestructor(); function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete); argument = llvm::ConstantExpr::getBitCast( @@ -304,6 +312,17 @@ void CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, llvm::GlobalVariable *Addr, bool PerformInit) { + + // According to E.2.3.1 in CUDA-7.5 Programming guide: __device__, + // __constant__ and __shared__ variables defined in namespace scope, + // that are of class type, cannot have a non-empty constructor. All + // the checks have been done in Sema by now. Whatever initializers + // are allowed are empty and we just need to ignore them here. + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + D->hasAttr<CUDASharedAttr>())) + return; + // Check if we've already initialized this decl. auto I = DelayedCXXInitPosition.find(D); if (I != DelayedCXXInitPosition.end() && I->second == ~0U) @@ -587,8 +606,8 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( getContext().VoidPtrTy); args.push_back(&dst); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( FTy, "__cxx_global_array_dtor", FI, VD->getLocation()); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp index fce2e75..4a7dc42 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp @@ -686,8 +686,10 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() { assert(EHStack.requiresLandingPad()); assert(!EHStack.empty()); - // If exceptions are disabled, there are usually no landingpads. However, when - // SEH is enabled, functions using SEH still get landingpads. + // If exceptions are disabled and SEH is not in use, then there is no invoke + // destination. SEH "works" even if exceptions are off. In practice, this + // means that C++ destructors and other EH cleanups don't run, which is + // consistent with MSVC's behavior. const LangOptions &LO = CGM.getLangOpts(); if (!LO.Exceptions) { if (!LO.Borland && !LO.MicrosoftExt) @@ -1326,11 +1328,13 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() { TerminateHandler = createBasicBlock("terminate.handler"); Builder.SetInsertPoint(TerminateHandler); llvm::Value *Exn = nullptr; + SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( + CurrentFuncletPad); if (EHPersonality::get(*this).usesFuncletPads()) { llvm::Value *ParentPad = CurrentFuncletPad; if (!ParentPad) ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext()); - Builder.CreateCleanupPad(ParentPad); + CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad); } else { if (getLangOpts().CPlusPlus) Exn = getExceptionFromSlot(); @@ -1422,12 +1426,8 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { Args.add(RValue::get(FP), ArgTys[1]); // Arrange a two-arg function info and type. - FunctionProtoType::ExtProtoInfo EPI; - const auto *FPT = cast<FunctionProtoType>( - Context.getFunctionType(Context.VoidTy, ArgTys, EPI)); const CGFunctionInfo &FnInfo = - CGM.getTypes().arrangeFreeFunctionCall(Args, FPT, - /*chainCall=*/false); + CGM.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, Args); CGF.EmitCall(FnInfo, OutlinedFinally, ReturnValueSlot(), Args); } @@ -1623,14 +1623,13 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, SmallString<128> Name; { llvm::raw_svector_ostream OS(Name); - const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl; - const NamedDecl *Parent = dyn_cast_or_null<NamedDecl>(ParentCodeDecl); - assert(Parent && "FIXME: handle unnamed decls (lambdas, blocks) with SEH"); + const FunctionDecl *ParentSEHFn = ParentCGF.CurSEHParent; + assert(ParentSEHFn && "No CurSEHParent!"); MangleContext &Mangler = CGM.getCXXABI().getMangleContext(); if (IsFilter) - Mangler.mangleSEHFilterExpression(Parent, OS); + Mangler.mangleSEHFilterExpression(ParentSEHFn, OS); else - Mangler.mangleSEHFinallyBlock(Parent, OS); + Mangler.mangleSEHFinallyBlock(ParentSEHFn, OS); } FunctionArgList Args; @@ -1656,8 +1655,8 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy; llvm::Function *ParentFn = ParentCGF.CurFn; - const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration( - RetTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false); + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Function *Fn = llvm::Function::Create( @@ -1677,6 +1676,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart()); + CurSEHParent = ParentCGF.CurSEHParent; CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn); EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter); @@ -1708,12 +1708,6 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF, const Stmt *FinallyBlock = Finally.getBlock(); startOutlinedSEHHelper(ParentCGF, false, FinallyBlock); - // Mark finally block calls as nounwind and noinline to make LLVM's job a - // little easier. - // FIXME: Remove these restrictions in the future. - CurFn->addFnAttr(llvm::Attribute::NoUnwind); - CurFn->addFnAttr(llvm::Attribute::NoInline); - // Emit the original filter expression, convert to i32, and return. EmitStmt(FinallyBlock); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp index 507ce3d..5f3b290 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGCall.h" +#include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -32,6 +33,8 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include "llvm/Transforms/Utils/SanitizerStats.h" using namespace clang; using namespace CodeGen; @@ -65,8 +68,6 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, /// block. llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, const Twine &Name) { - if (!Builder.isNamePreserving()) - return new llvm::AllocaInst(Ty, nullptr, "", AllocaInsertPt); return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt); } @@ -361,9 +362,16 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { ConvertTypeForMem(E->getType()) ->getPointerTo(Object.getAddressSpace())), Object.getAlignment()); - // We should not have emitted the initializer for this temporary as a - // constant. - assert(!Var->hasInitializer()); + + // createReferenceTemporary will promote the temporary to a global with a + // constant initializer if it can. It can only do this to a value of + // ARC-manageable type if the value is global and therefore "immune" to + // ref-counting operations. Therefore we have no need to emit either a + // dynamic initialization or a cleanup and we can just return the address + // of the temporary. + if (Var->hasInitializer()) + return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl); + Var->setInitializer(CGM.EmitNullConstant(E->getType())); } LValue RefTempDst = MakeAddrLValue(Object, M->getType(), @@ -416,6 +424,23 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true); } } else { + switch (M->getStorageDuration()) { + case SD_Automatic: + case SD_FullExpression: + if (auto *Size = EmitLifetimeStart( + CGM.getDataLayout().getTypeAllocSize(Object.getElementType()), + Object.getPointer())) { + if (M->getStorageDuration() == SD_Automatic) + pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker, + Object, Size); + else + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Object, + Size); + } + break; + default: + break; + } EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true); } pushTemporaryCleanup(*this, M, E, Object); @@ -577,7 +602,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (Checks.size() > 0) { llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(Loc), + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(Ty), llvm::ConstantInt::get(SizeTy, AlignVal), llvm::ConstantInt::get(Int8Ty, TCK) @@ -824,7 +849,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, getNaturalPointeeTypeAlignment(E->getType(), Source)); } - if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) { + if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) && + CE->getCastKind() == CK_BitCast) { if (auto PT = E->getType()->getAs<PointerType>()) EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(), /*MayBeNull=*/true, @@ -1265,10 +1291,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, } // Atomic operations have to be done on integral types. - if (Ty->isAtomicType() || typeIsSuitableForInlineAtomic(Ty, Volatile)) { - LValue lvalue = + LValue AtomicLValue = LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); - return EmitAtomicLoad(lvalue, Loc).getScalarVal(); + if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) { + return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile); @@ -1376,12 +1402,11 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, Value = EmitToMemory(Value, Ty); + LValue AtomicLValue = + LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); if (Ty->isAtomicType() || - (!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) { - EmitAtomicStore(RValue::get(Value), - LValue::MakeAddr(Addr, Ty, getContext(), - AlignSource, TBAAInfo), - isInit); + (!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) { + EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit); return; } @@ -1733,8 +1758,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) { unsigned NumSrcElts = VTy->getNumElements(); - unsigned NumDstElts = - cast<llvm::VectorType>(Vec->getType())->getNumElements(); + unsigned NumDstElts = Vec->getType()->getVectorNumElements(); if (NumDstElts == NumSrcElts) { // Use shuffle vector is the src and destination are the same number of // elements and restore the vector mask since it is on the side it will be @@ -1947,6 +1971,21 @@ LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr, return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source); } +Address CodeGenFunction::EmitLoadOfPointer(Address Ptr, + const PointerType *PtrTy, + AlignmentSource *Source) { + llvm::Value *Addr = Builder.CreateLoad(Ptr); + return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source, + /*forPointeeType=*/true)); +} + +LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr, + const PointerType *PtrTy) { + AlignmentSource Source; + Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source); + return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source); +} + static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, const Expr *E, const VarDecl *VD) { QualType T = E->getType(); @@ -2066,12 +2105,11 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { if (auto *FD = LambdaCaptureFields.lookup(VD)) return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); else if (CapturedStmtInfo) { - auto it = LocalDeclMap.find(VD); - if (it != LocalDeclMap.end()) { - if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { - return EmitLoadOfReferenceLValue(it->second, RefTy); - } - return MakeAddrLValue(it->second, T); + auto I = LocalDeclMap.find(VD); + if (I != LocalDeclMap.end()) { + if (auto RefTy = VD->getType()->getAs<ReferenceType>()) + return EmitLoadOfReferenceLValue(I->second, RefTy); + return MakeAddrLValue(I->second, T); } LValue CapLVal = EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), @@ -2210,13 +2248,15 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { return LV; } - assert(E->getSubExpr()->getType()->isAnyComplexType()); + QualType T = ExprTy->castAs<ComplexType>()->getElementType(); Address Component = (E->getOpcode() == UO_Real ? emitAddrOfRealComponent(LV.getAddress(), LV.getType()) : emitAddrOfImagComponent(LV.getAddress(), LV.getType())); - return MakeAddrLValue(Component, ExprTy, LV.getAlignmentSource()); + LValue ElemLV = MakeAddrLValue(Component, T, LV.getAlignmentSource()); + ElemLV.getQuals().addQualifiers(LV.getQuals()); + return ElemLV; } case UO_PreInc: case UO_PreDec: { @@ -2302,7 +2342,7 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) { auto *GV = new llvm::GlobalVariable( CGM.getModule(), Descriptor->getType(), /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, Descriptor); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CGM.getSanitizerMetadata()->disableSanitizerForGlobal(GV); // Remember the descriptor for this type. @@ -2352,7 +2392,33 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) { PresumedLoc PLoc = getContext().getSourceManager().getPresumedLoc(Loc); if (PLoc.isValid()) { - auto FilenameGV = CGM.GetAddrOfConstantCString(PLoc.getFilename(), ".src"); + StringRef FilenameString = PLoc.getFilename(); + + int PathComponentsToStrip = + CGM.getCodeGenOpts().EmitCheckPathComponentsToStrip; + if (PathComponentsToStrip < 0) { + assert(PathComponentsToStrip != INT_MIN); + int PathComponentsToKeep = -PathComponentsToStrip; + auto I = llvm::sys::path::rbegin(FilenameString); + auto E = llvm::sys::path::rend(FilenameString); + while (I != E && --PathComponentsToKeep) + ++I; + + FilenameString = FilenameString.substr(I - E); + } else if (PathComponentsToStrip > 0) { + auto I = llvm::sys::path::begin(FilenameString); + auto E = llvm::sys::path::end(FilenameString); + while (I != E && PathComponentsToStrip--) + ++I; + + if (I != E) + FilenameString = + FilenameString.substr(I - llvm::sys::path::begin(FilenameString)); + else + FilenameString = llvm::sys::path::filename(FilenameString); + } + + auto FilenameGV = CGM.GetAddrOfConstantCString(FilenameString, ".src"); CGM.getSanitizerMetadata()->disableSanitizerForGlobal( cast<llvm::GlobalVariable>(FilenameGV.getPointer())); Filename = FilenameGV.getPointer(); @@ -2483,24 +2549,26 @@ void CodeGenFunction::EmitCheck( Branch->setMetadata(llvm::LLVMContext::MD_prof, Node); EmitBlock(Handlers); - // Emit handler arguments and create handler function type. - llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); - auto *InfoPtr = - new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, - llvm::GlobalVariable::PrivateLinkage, Info); - InfoPtr->setUnnamedAddr(true); - CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - + // Handler functions take an i8* pointing to the (handler-specific) static + // information block, followed by a sequence of intptr_t arguments + // representing operand values. SmallVector<llvm::Value *, 4> Args; SmallVector<llvm::Type *, 4> ArgTypes; Args.reserve(DynamicArgs.size() + 1); ArgTypes.reserve(DynamicArgs.size() + 1); - // Handler functions take an i8* pointing to the (handler-specific) static - // information block, followed by a sequence of intptr_t arguments - // representing operand values. - Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); - ArgTypes.push_back(Int8PtrTy); + // Emit handler arguments and create handler function type. + if (!StaticArgs.empty()) { + llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); + auto *InfoPtr = + new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, + llvm::GlobalVariable::PrivateLinkage, Info); + InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); + Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); + ArgTypes.push_back(Int8PtrTy); + } + for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) { Args.push_back(EmitCheckValue(DynamicArgs[i])); ArgTypes.push_back(IntPtrTy); @@ -2532,10 +2600,9 @@ void CodeGenFunction::EmitCheck( EmitBlock(Cont); } -void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond, - llvm::ConstantInt *TypeId, - llvm::Value *Ptr) { - auto &Ctx = getLLVMContext(); +void CodeGenFunction::EmitCfiSlowPathCheck( + SanitizerMask Kind, llvm::Value *Cond, llvm::ConstantInt *TypeId, + llvm::Value *Ptr, ArrayRef<llvm::Constant *> StaticArgs) { llvm::BasicBlock *Cont = createBasicBlock("cfi.cont"); llvm::BasicBlock *CheckBB = createBasicBlock("cfi.slowpath"); @@ -2547,19 +2614,122 @@ void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond, EmitBlock(CheckBB); - llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction( - "__cfi_slowpath", - llvm::FunctionType::get( - llvm::Type::getVoidTy(Ctx), - {llvm::Type::getInt64Ty(Ctx), - llvm::PointerType::getUnqual(llvm::Type::getInt8Ty(Ctx))}, - false)); - llvm::CallInst *CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); + bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind); + + llvm::CallInst *CheckCall; + if (WithDiag) { + llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); + auto *InfoPtr = + new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, + llvm::GlobalVariable::PrivateLinkage, Info); + InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); + + llvm::Constant *SlowPathDiagFn = CGM.getModule().getOrInsertFunction( + "__cfi_slowpath_diag", + llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, + false)); + CheckCall = Builder.CreateCall( + SlowPathDiagFn, + {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)}); + } else { + llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction( + "__cfi_slowpath", + llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy}, false)); + CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); + } + CheckCall->setDoesNotThrow(); EmitBlock(Cont); } +// This function is basically a switch over the CFI failure kind, which is +// extracted from CFICheckFailData (1st function argument). Each case is either +// llvm.trap or a call to one of the two runtime handlers, based on +// -fsanitize-trap and -fsanitize-recover settings. Default case (invalid +// failure kind) traps, but this should really never happen. CFICheckFailData +// can be nullptr if the calling module has -fsanitize-trap behavior for this +// check kind; in this case __cfi_check_fail traps as well. +void CodeGenFunction::EmitCfiCheckFail() { + SanitizerScope SanScope(this); + FunctionArgList Args; + ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr, + getContext().VoidPtrTy); + ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr, + getContext().VoidPtrTy); + Args.push_back(&ArgData); + Args.push_back(&ArgAddr); + + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, Args); + + llvm::Function *F = llvm::Function::Create( + llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy}, false), + llvm::GlobalValue::WeakODRLinkage, "__cfi_check_fail", &CGM.getModule()); + F->setVisibility(llvm::GlobalValue::HiddenVisibility); + + StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args, + SourceLocation()); + + llvm::Value *Data = + EmitLoadOfScalar(GetAddrOfLocalVar(&ArgData), /*Volatile=*/false, + CGM.getContext().VoidPtrTy, ArgData.getLocation()); + llvm::Value *Addr = + EmitLoadOfScalar(GetAddrOfLocalVar(&ArgAddr), /*Volatile=*/false, + CGM.getContext().VoidPtrTy, ArgAddr.getLocation()); + + // Data == nullptr means the calling module has trap behaviour for this check. + llvm::Value *DataIsNotNullPtr = + Builder.CreateICmpNE(Data, llvm::ConstantPointerNull::get(Int8PtrTy)); + EmitTrapCheck(DataIsNotNullPtr); + + llvm::StructType *SourceLocationTy = + llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr); + llvm::StructType *CfiCheckFailDataTy = + llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr); + + llvm::Value *V = Builder.CreateConstGEP2_32( + CfiCheckFailDataTy, + Builder.CreatePointerCast(Data, CfiCheckFailDataTy->getPointerTo(0)), 0, + 0); + Address CheckKindAddr(V, getIntAlign()); + llvm::Value *CheckKind = Builder.CreateLoad(CheckKindAddr); + + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateZExt( + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test), + {Addr, AllVtables}), + IntPtrTy); + + const std::pair<int, SanitizerMask> CheckKinds[] = { + {CFITCK_VCall, SanitizerKind::CFIVCall}, + {CFITCK_NVCall, SanitizerKind::CFINVCall}, + {CFITCK_DerivedCast, SanitizerKind::CFIDerivedCast}, + {CFITCK_UnrelatedCast, SanitizerKind::CFIUnrelatedCast}, + {CFITCK_ICall, SanitizerKind::CFIICall}}; + + SmallVector<std::pair<llvm::Value *, SanitizerMask>, 5> Checks; + for (auto CheckKindMaskPair : CheckKinds) { + int Kind = CheckKindMaskPair.first; + SanitizerMask Mask = CheckKindMaskPair.second; + llvm::Value *Cond = + Builder.CreateICmpNE(CheckKind, llvm::ConstantInt::get(Int8Ty, Kind)); + if (CGM.getLangOpts().Sanitize.has(Mask)) + EmitCheck(std::make_pair(Cond, Mask), "cfi_check_fail", {}, + {Data, Addr, ValidVtable}); + else + EmitTrapCheck(Cond); + } + + FinishFunction(); + // The only reference to this function will be created during LTO link. + // Make sure it survives until then. + CGM.addUsedGlobal(F); +} + void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) { llvm::BasicBlock *Cont = createBasicBlock("cont"); @@ -2827,22 +2997,55 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, return LV; } +static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, + AlignmentSource &AlignSource, + QualType BaseTy, QualType ElTy, + bool IsLowerBound) { + LValue BaseLVal; + if (auto *ASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParenImpCasts())) { + BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); + if (BaseTy->isArrayType()) { + Address Addr = BaseLVal.getAddress(); + AlignSource = BaseLVal.getAlignmentSource(); + + // If the array type was an incomplete type, we need to make sure + // the decay ends up being the right type. + llvm::Type *NewTy = CGF.ConvertType(BaseTy); + Addr = CGF.Builder.CreateElementBitCast(Addr, NewTy); + + // Note that VLA pointers are always decayed, so we don't need to do + // anything here. + if (!BaseTy->isVariableArrayType()) { + assert(isa<llvm::ArrayType>(Addr.getElementType()) && + "Expected pointer to array"); + Addr = CGF.Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), + "arraydecay"); + } + + return CGF.Builder.CreateElementBitCast(Addr, + CGF.ConvertTypeForMem(ElTy)); + } + CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource); + return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); + } + return CGF.EmitPointerWithAlignment(Base, &AlignSource); +} + LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound) { - LValue Base; + QualType BaseTy; if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts())) - Base = EmitOMPArraySectionExpr(ASE, IsLowerBound); + BaseTy = OMPArraySectionExpr::getBaseOriginalType(ASE); else - Base = EmitLValue(E->getBase()); - QualType BaseTy = Base.getType(); - llvm::Value *Idx = nullptr; + BaseTy = E->getBase()->getType(); QualType ResultExprTy; if (auto *AT = getContext().getAsArrayType(BaseTy)) ResultExprTy = AT->getElementType(); else ResultExprTy = BaseTy->getPointeeType(); - if (IsLowerBound || (!IsLowerBound && E->getColonLoc().isInvalid())) { + llvm::Value *Idx = nullptr; + if (IsLowerBound || E->getColonLoc().isInvalid()) { // Requesting lower bound or upper bound, but without provided length and // without ':' symbol for the default length -> length = 1. // Idx = LowerBound ?: 0; @@ -2853,9 +3056,9 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, } else Idx = llvm::ConstantInt::getNullValue(IntPtrTy); } else { - // Try to emit length or lower bound as constant. If this is possible, 1 is - // subtracted from constant length or lower bound. Otherwise, emit LLVM IR - // (LB + Len) - 1. + // Try to emit length or lower bound as constant. If this is possible, 1 + // is subtracted from constant length or lower bound. Otherwise, emit LLVM + // IR (LB + Len) - 1. auto &C = CGM.getContext(); auto *Length = E->getLength(); llvm::APSInt ConstLength; @@ -2901,12 +3104,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, Idx = llvm::ConstantInt::get(IntPtrTy, ConstLength + ConstLowerBound); } else { // Idx = ArraySize - 1; - if (auto *VAT = C.getAsVariableArrayType(BaseTy)) { + QualType ArrayTy = BaseTy->isPointerType() + ? E->getBase()->IgnoreParenImpCasts()->getType() + : BaseTy; + if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) { Length = VAT->getSizeExpr(); if (Length->isIntegerConstantExpr(ConstLength, C)) Length = nullptr; } else { - auto *CAT = C.getAsConstantArrayType(BaseTy); + auto *CAT = C.getAsConstantArrayType(ArrayTy); ConstLength = CAT->getSize(); } if (Length) { @@ -2925,52 +3131,56 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, } assert(Idx); - llvm::Value *EltPtr; - QualType FixedSizeEltType = ResultExprTy; + Address EltPtr = Address::invalid(); + AlignmentSource AlignSource; if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) { + // The base must be a pointer, which is not an aggregate. Emit + // it. It needs to be emitted first in case it's what captures + // the VLA bounds. + Address Base = + emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy, + VLA->getElementType(), IsLowerBound); // The element count here is the total number of non-VLA elements. - llvm::Value *numElements = getVLASize(VLA).first; - FixedSizeEltType = getFixedSizeElementType(getContext(), VLA); + llvm::Value *NumElements = getVLASize(VLA).first; // Effectively, the multiply by the VLA size is part of the GEP. // GEP indexes are signed, and scaling an index isn't permitted to // signed-overflow, so we use the same semantics for our explicit // multiply. We suppress this if overflow is not undefined behavior. - if (getLangOpts().isSignedOverflowDefined()) { - Idx = Builder.CreateMul(Idx, numElements); - EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx"); - } else { - Idx = Builder.CreateNSWMul(Idx, numElements); - EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx"); - } - } else if (BaseTy->isConstantArrayType()) { - llvm::Value *ArrayPtr = Base.getPointer(); - llvm::Value *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); - llvm::Value *Args[] = {Zero, Idx}; - if (getLangOpts().isSignedOverflowDefined()) - EltPtr = Builder.CreateGEP(ArrayPtr, Args, "arrayidx"); + Idx = Builder.CreateMul(Idx, NumElements); else - EltPtr = Builder.CreateInBoundsGEP(ArrayPtr, Args, "arrayidx"); - } else { - // The base must be a pointer, which is not an aggregate. Emit it. - if (getLangOpts().isSignedOverflowDefined()) - EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx"); + Idx = Builder.CreateNSWMul(Idx, NumElements); + EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), + !getLangOpts().isSignedOverflowDefined()); + } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { + // If this is A[i] where A is an array, the frontend will have decayed the + // base to be a ArrayToPointerDecay implicit cast. While correct, it is + // inefficient at -O0 to emit a "gep A, 0, 0" when codegen'ing it, then a + // "gep x, i" here. Emit one "gep A, 0, i". + assert(Array->getType()->isArrayType() && + "Array to pointer decay must have array source type!"); + LValue ArrayLV; + // For simple multidimensional array indexing, set the 'accessed' flag for + // better bounds-checking of the base expression. + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Array)) + ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true); else - EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx"); - } - - CharUnits EltAlign = - Base.getAlignment().alignmentOfArrayElement( - getContext().getTypeSizeInChars(FixedSizeEltType)); - - // Limit the alignment to that of the result type. - LValue LV = MakeAddrLValue(Address(EltPtr, EltAlign), ResultExprTy, - Base.getAlignmentSource()); + ArrayLV = EmitLValue(Array); - LV.getQuals().setAddressSpace(BaseTy.getAddressSpace()); + // Propagate the alignment from the array itself to the result. + EltPtr = emitArraySubscriptGEP( + *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + ResultExprTy, !getLangOpts().isSignedOverflowDefined()); + AlignSource = ArrayLV.getAlignmentSource(); + } else { + Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource, + BaseTy, ResultExprTy, IsLowerBound); + EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, + !getLangOpts().isSignedOverflowDefined()); + } - return LV; + return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource); } LValue CodeGenFunction:: @@ -3508,6 +3718,10 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV, case TEK_Aggregate: return FieldLV.asAggregateRValue(); case TEK_Scalar: + // This routine is used to load fields one-by-one to perform a copy, so + // don't load reference fields. + if (FD->getType()->isReferenceType()) + return RValue::get(FieldLV.getPointer()); return EmitLoadOfLValue(FieldLV, Loc); } llvm_unreachable("bad evaluation kind"); @@ -3851,25 +4065,28 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee, if (SanOpts.has(SanitizerKind::CFIICall) && (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) { SanitizerScope SanScope(this); + EmitSanitizerStatReport(llvm::SanStat_CFI_ICall); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0)); - llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD); + llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); llvm::Value *CastedCallee = Builder.CreateBitCast(Callee, Int8PtrTy); - llvm::Value *BitSetTest = - Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test), - {CastedCallee, BitSetName}); + llvm::Value *TypeTest = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedCallee, TypeId}); - auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD); - if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && TypeId) { - EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedCallee); + auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD); + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(Int8Ty, CFITCK_ICall), + EmitCheckSourceLocation(E->getLocStart()), + EmitCheckTypeDescriptor(QualType(FnType, 0)), + }; + if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) { + EmitCfiSlowPathCheck(SanitizerKind::CFIICall, TypeTest, CrossDsoTypeId, + CastedCallee, StaticData); } else { - llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(E->getLocStart()), - EmitCheckTypeDescriptor(QualType(FnType, 0)), - }; - EmitCheck(std::make_pair(BitSetTest, SanitizerKind::CFIICall), - "cfi_bad_icall", StaticData, CastedCallee); + EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIICall), + "cfi_check_fail", StaticData, + {CastedCallee, llvm::UndefValue::get(IntPtrTy)}); } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp index a4547a9..6d18843 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp @@ -175,6 +175,7 @@ public: } void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E); void VisitCXXConstructExpr(const CXXConstructExpr *E); + void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E); void VisitLambdaExpr(LambdaExpr *E); void VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E); void VisitExprWithCleanups(ExprWithCleanups *E); @@ -967,12 +968,9 @@ void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { Address ArgValue = Address::invalid(); Address ArgPtr = CGF.EmitVAArg(VE, ArgValue); + // If EmitVAArg fails, emit an error. if (!ArgPtr.isValid()) { - // If EmitVAArg fails, we fall back to the LLVM instruction. - llvm::Value *Val = Builder.CreateVAArg(ArgValue.getPointer(), - CGF.ConvertType(VE->getType())); - if (!Dest.isIgnored()) - Builder.CreateStore(Val, Dest.getAddress()); + CGF.ErrorUnsupported(VE, "aggregate va_arg expression"); return; } @@ -1001,6 +999,14 @@ AggExprEmitter::VisitCXXConstructExpr(const CXXConstructExpr *E) { CGF.EmitCXXConstructExpr(E, Slot); } +void AggExprEmitter::VisitCXXInheritedCtorInitExpr( + const CXXInheritedCtorInitExpr *E) { + AggValueSlot Slot = EnsureSlot(E->getType()); + CGF.EmitInheritedCXXConstructorCall( + E->getConstructor(), E->constructsVBase(), Slot.getAddress(), + E->inheritedFromVBase(), E); +} + void AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { AggValueSlot Slot = EnsureSlot(E->getType()); @@ -1174,6 +1180,38 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { unsigned NumInitElements = E->getNumInits(); RecordDecl *record = E->getType()->castAs<RecordType>()->getDecl(); + // We'll need to enter cleanup scopes in case any of the element + // initializers throws an exception. + SmallVector<EHScopeStack::stable_iterator, 16> cleanups; + llvm::Instruction *cleanupDominator = nullptr; + + unsigned curInitIndex = 0; + + // Emit initialization of base classes. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) { + assert(E->getNumInits() >= CXXRD->getNumBases() && + "missing initializer for base class"); + for (auto &Base : CXXRD->bases()) { + assert(!Base.isVirtual() && "should not see vbases here"); + auto *BaseRD = Base.getType()->getAsCXXRecordDecl(); + Address V = CGF.GetAddressOfDirectBaseInCompleteClass( + Dest.getAddress(), CXXRD, BaseRD, + /*isBaseVirtual*/ false); + AggValueSlot AggSlot = + AggValueSlot::forAddr(V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased); + CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); + + if (QualType::DestructionKind dtorKind = + Base.getType().isDestructedType()) { + CGF.pushDestroy(dtorKind, V, Base.getType()); + cleanups.push_back(CGF.EHStack.stable_begin()); + } + } + } + // Prepare a 'this' for CXXDefaultInitExprs. CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress()); @@ -1207,14 +1245,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { return; } - // We'll need to enter cleanup scopes in case any of the member - // initializers throw an exception. - SmallVector<EHScopeStack::stable_iterator, 16> cleanups; - llvm::Instruction *cleanupDominator = nullptr; - // Here we iterate over the fields; this makes it simpler to both // default-initialize fields and skip over unnamed fields. - unsigned curInitIndex = 0; for (const auto *field : record->fields()) { // We're done once we hit the flexible array member. if (field->getType()->isIncompleteArrayType()) @@ -1320,6 +1352,10 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) { CharUnits NumNonZeroBytes = CharUnits::Zero(); unsigned ILEElement = 0; + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(SD)) + while (ILEElement != CXXRD->getNumBases()) + NumNonZeroBytes += + GetNumNonZeroBytesInInit(ILE->getInit(ILEElement++), CGF); for (const auto *Field : SD->fields()) { // We're done once we hit the flexible array member or run out of // InitListExpr elements. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp index 604cde7..eec2ace 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp @@ -24,10 +24,11 @@ using namespace clang; using namespace CodeGen; -static RequiredArgs commonEmitCXXMemberOrOperatorCall( - CodeGenFunction &CGF, const CXXMethodDecl *MD, llvm::Value *Callee, - ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam, - QualType ImplicitParamTy, const CallExpr *CE, CallArgList &Args) { +static RequiredArgs +commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, + llvm::Value *This, llvm::Value *ImplicitParam, + QualType ImplicitParamTy, const CallExpr *CE, + CallArgList &Args) { assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) || isa<CXXOperatorCallExpr>(CE)); assert(MD->isInstance() && @@ -53,7 +54,7 @@ static RequiredArgs commonEmitCXXMemberOrOperatorCall( } const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); - RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size()); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD); // And the rest of the call args. if (CE) { @@ -76,21 +77,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); CallArgList Args; RequiredArgs required = commonEmitCXXMemberOrOperatorCall( - *this, MD, Callee, ReturnValue, This, ImplicitParam, ImplicitParamTy, CE, - Args); + *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args); return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), Callee, ReturnValue, Args, MD); } -RValue CodeGenFunction::EmitCXXStructorCall( - const CXXMethodDecl *MD, llvm::Value *Callee, ReturnValueSlot ReturnValue, - llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, - const CallExpr *CE, StructorType Type) { +RValue CodeGenFunction::EmitCXXDestructorCall( + const CXXDestructorDecl *DD, llvm::Value *Callee, llvm::Value *This, + llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, + StructorType Type) { CallArgList Args; - commonEmitCXXMemberOrOperatorCall(*this, MD, Callee, ReturnValue, This, - ImplicitParam, ImplicitParamTy, CE, Args); - return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(MD, Type), - Callee, ReturnValue, Args, MD); + commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam, + ImplicitParamTy, CE, Args); + return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type), + Callee, ReturnValueSlot(), Args, DD); } static CXXRecordDecl *getCXXRecord(const Expr *E) { @@ -259,7 +259,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (SanOpts.has(SanitizerKind::CFINVCall) && MD->getParent()->isDynamicClass()) { llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy, MD->getParent()); - EmitVTablePtrCheckForCall(MD, VTable, CFITCK_NVCall, CE->getLocStart()); + EmitVTablePtrCheckForCall(MD->getParent(), VTable, CFITCK_NVCall, + CE->getLocStart()); } if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) @@ -273,7 +274,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (MD->isVirtual()) { This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall( - *this, MD, This, UseVirtualCall); + *this, CalleeDecl, This, UseVirtualCall); } return EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This.getPointer(), @@ -323,10 +324,11 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // Push the this ptr. Args.add(RValue::get(ThisPtrForCall), ThisType); - RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1); - + RequiredArgs required = + RequiredArgs::forPrototypePlus(FPT, 1, /*FD=*/nullptr); + // And the rest of the call args - EmitCallArgs(Args, FPT, E->arguments(), E->getDirectCallee()); + EmitCallArgs(Args, FPT, E->arguments()); return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), Callee, ReturnValue, Args); } @@ -369,6 +371,9 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF, std::vector<CharUnits> VBPtrOffsets = CGF.CGM.getCXXABI().getVBPtrOffsets(Base); for (CharUnits VBPtrOffset : VBPtrOffsets) { + // Stop before we hit any virtual base pointers located in virtual bases. + if (VBPtrOffset >= NVSize) + break; std::pair<CharUnits, CharUnits> LastStore = Stores.pop_back_val(); CharUnits LastStoreOffset = LastStore.first; CharUnits LastStoreSize = LastStore.second; @@ -471,8 +476,8 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, } } - if (const ConstantArrayType *arrayType - = getContext().getAsConstantArrayType(E->getType())) { + if (const ArrayType *arrayType + = getContext().getAsArrayType(E->getType())) { EmitCXXAggrConstructorCall(CD, arrayType, Dest.getAddress(), E); } else { CXXCtorType Type = Ctor_Complete; @@ -1010,15 +1015,18 @@ void CodeGenFunction::EmitNewArrayInitializer( if (auto *ILE = dyn_cast<InitListExpr>(Init)) { if (const RecordType *RType = ILE->getType()->getAs<RecordType>()) { if (RType->getDecl()->isStruct()) { - unsigned NumFields = 0; + unsigned NumElements = 0; + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RType->getDecl())) + NumElements = CXXRD->getNumBases(); for (auto *Field : RType->getDecl()->fields()) if (!Field->isUnnamedBitfield()) - ++NumFields; - if (ILE->getNumInits() == NumFields) + ++NumElements; + // FIXME: Recurse into nested InitListExprs. + if (ILE->getNumInits() == NumElements) for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i) if (!isa<ImplicitValueInitExpr>(ILE->getInit(i))) - --NumFields; - if (ILE->getNumInits() == NumFields && TryMemsetInitialization()) + --NumElements; + if (ILE->getNumInits() == NumElements && TryMemsetInitialization()) return; } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp index ee049f1..803b399 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp @@ -111,7 +111,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) { // Round up the field offset to the alignment of the field type. CharUnits AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment); + NextFieldOffsetInChars.alignTo(FieldAlignment); if (AlignedNextFieldOffsetInChars < FieldOffsetInChars) { // We need to append padding. @@ -121,7 +121,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) { "Did not add enough padding!"); AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment); + NextFieldOffsetInChars.alignTo(FieldAlignment); } if (AlignedNextFieldOffsetInChars > FieldOffsetInChars) { @@ -162,8 +162,8 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, if (FieldOffset > NextFieldOffsetInBits) { // We need to add padding. CharUnits PadSize = Context.toCharUnitsFromBits( - llvm::RoundUpToAlignment(FieldOffset - NextFieldOffsetInBits, - Context.getTargetInfo().getCharAlign())); + llvm::alignTo(FieldOffset - NextFieldOffsetInBits, + Context.getTargetInfo().getCharAlign())); AppendPadding(PadSize); } @@ -334,7 +334,7 @@ void ConstStructBuilder::ConvertStructToPacked() { CharUnits ElementAlign = CharUnits::fromQuantity( CGM.getDataLayout().getABITypeAlignment(C->getType())); CharUnits AlignedElementOffsetInChars = - ElementOffsetInChars.RoundUpToAlignment(ElementAlign); + ElementOffsetInChars.alignTo(ElementAlign); if (AlignedElementOffsetInChars > ElementOffsetInChars) { // We need some padding. @@ -368,7 +368,14 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { unsigned FieldNo = 0; unsigned ElementNo = 0; - + + // Bail out if we have base classes. We could support these, but they only + // arise in C++1z where we will have already constant folded most interesting + // cases. FIXME: There are still a few more cases we can handle this way. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->getNumBases()) + return false; + for (RecordDecl::field_iterator Field = RD->field_begin(), FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) { // If this is a union, skip all the fields that aren't being initialized. @@ -508,13 +515,12 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { } else { // Append tail padding if necessary. CharUnits LLVMSizeInChars = - NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment); + NextFieldOffsetInChars.alignTo(LLVMStructAlignment); if (LLVMSizeInChars != LayoutSizeInChars) AppendTailPadding(LayoutSizeInChars); - LLVMSizeInChars = - NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment); + LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); // Check if we need to convert the struct to a packed struct. if (NextFieldOffsetInChars <= LayoutSizeInChars && @@ -526,8 +532,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { "Converting to packed did not help!"); } - LLVMSizeInChars = - NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment); + LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); assert(LayoutSizeInChars == LLVMSizeInChars && "Tail padding mismatch!"); @@ -546,8 +551,9 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { llvm::Constant *Result = llvm::ConstantStruct::get(STy, Elements); - assert(NextFieldOffsetInChars.RoundUpToAlignment(getAlignment(Result)) == - getSizeInChars(Result) && "Size mismatch!"); + assert(NextFieldOffsetInChars.alignTo(getAlignment(Result)) == + getSizeInChars(Result) && + "Size mismatch!"); return Result; } @@ -758,6 +764,12 @@ public: return Visit(DIE->getExpr()); } + llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E) { + if (!E->cleanupsHaveSideEffects()) + return Visit(E->getSubExpr()); + return nullptr; + } + llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) { return Visit(E->GetTemporaryExpr()); } @@ -1125,6 +1137,13 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, unsigned FieldNo = -1; unsigned ElementNo = 0; + // Bail out if we have base classes. We could support these, but they only + // arise in C++1z where we will have already constant folded most interesting + // cases. FIXME: There are still a few more cases we can handle this way. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->getNumBases()) + return false; + for (FieldDecl *Field : RD->fields()) { ++FieldNo; @@ -1301,8 +1320,14 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, // Convert to the appropriate type; this could be an lvalue for // an integer. - if (isa<llvm::PointerType>(DestTy)) + if (isa<llvm::PointerType>(DestTy)) { + // Convert the integer to a pointer-sized integer before converting it + // to a pointer. + C = llvm::ConstantExpr::getIntegerCast( + C, getDataLayout().getIntPtrType(DestTy), + /*isSigned=*/false); return llvm::ConstantExpr::getIntToPtr(C, DestTy); + } // If the types don't match this should only be a truncate. if (C->getType() != DestTy) diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp index 268e796..120dacf 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -818,7 +818,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, "Splatted expr doesn't match with vector element type?"); // Splat the element across to all elements - unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); + unsigned NumElements = DstTy->getVectorNumElements(); return Builder.CreateVectorSplat(NumElements, Src, "splat"); } @@ -984,8 +984,7 @@ Value *ScalarExprEmitter::VisitExpr(Expr *E) { Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { // Vector Mask Case - if (E->getNumSubExprs() == 2 || - (E->getNumSubExprs() == 3 && E->getExpr(2)->getType()->isVectorType())) { + if (E->getNumSubExprs() == 2) { Value *LHS = CGF.EmitScalarExpr(E->getExpr(0)); Value *RHS = CGF.EmitScalarExpr(E->getExpr(1)); Value *Mask; @@ -993,22 +992,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { llvm::VectorType *LTy = cast<llvm::VectorType>(LHS->getType()); unsigned LHSElts = LTy->getNumElements(); - if (E->getNumSubExprs() == 3) { - Mask = CGF.EmitScalarExpr(E->getExpr(2)); - - // Shuffle LHS & RHS into one input vector. - SmallVector<llvm::Constant*, 32> concat; - for (unsigned i = 0; i != LHSElts; ++i) { - concat.push_back(Builder.getInt32(2*i)); - concat.push_back(Builder.getInt32(2*i+1)); - } - - Value* CV = llvm::ConstantVector::get(concat); - LHS = Builder.CreateShuffleVector(LHS, RHS, CV, "concat"); - LHSElts *= 2; - } else { - Mask = RHS; - } + Mask = RHS; llvm::VectorType *MTy = cast<llvm::VectorType>(Mask->getType()); @@ -1366,8 +1350,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { QualType DestTy = CE->getType(); CastKind Kind = CE->getCastKind(); - if (!DestTy->isVoidType()) - TestAndClearIgnoreResultAssign(); + // These cases are generally not written to ignore the result of + // evaluating their sub-expressions, so we clear this now. + bool Ignored = TestAndClearIgnoreResultAssign(); // Since almost all cast kinds apply to scalars, this switch doesn't have // a default case, so the compiler will warn on a missing case. The cases @@ -1410,7 +1395,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } case CK_AddressSpaceConversion: { Value *Src = Visit(const_cast<Expr*>(E)); - return Builder.CreateAddrSpaceCast(Src, ConvertType(DestTy)); + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return Builder.CreatePointerBitCastOrAddrSpaceCast(Src, + ConvertType(DestTy)); } case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: @@ -1494,11 +1482,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return CGF.EmitARCRetainScalarExpr(E); case CK_ARCConsumeObject: return CGF.EmitObjCConsumeObject(E->getType(), Visit(E)); - case CK_ARCReclaimReturnedObject: { - llvm::Value *value = Visit(E); - value = CGF.EmitARCRetainAutoreleasedReturnValue(value); - return CGF.EmitObjCConsumeObject(E->getType(), value); - } + case CK_ARCReclaimReturnedObject: + return CGF.EmitARCReclaimReturnedObject(E, /*allowUnsafe*/ Ignored); case CK_ARCExtendBlockObject: return CGF.EmitARCExtendBlockObject(E); @@ -1544,7 +1529,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Type *DstTy = ConvertType(DestTy); Value *Elt = Visit(const_cast<Expr*>(E)); // Splat the element across to all elements - unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); + unsigned NumElements = DstTy->getVectorNumElements(); return Builder.CreateVectorSplat(NumElements, Elt, "splat"); } @@ -1654,13 +1639,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type); if (isPre) { Builder.CreateStore(True, LV.getAddress(), LV.isVolatileQualified()) - ->setAtomic(llvm::SequentiallyConsistent); + ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent); return Builder.getTrue(); } // For atomic bool increment, we just store true and return it for // preincrement, do an atomic swap with true for postincrement - return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - LV.getPointer(), True, llvm::SequentiallyConsistent); + return Builder.CreateAtomicRMW( + llvm::AtomicRMWInst::Xchg, LV.getPointer(), True, + llvm::AtomicOrdering::SequentiallyConsistent); } // Special case for atomic increment / decrement on integers, emit // atomicrmw instructions. We skip this if we want to be doing overflow @@ -1677,7 +1663,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *amt = CGF.EmitToMemory( llvm::ConstantInt::get(ConvertType(type), 1, true), type); llvm::Value *old = Builder.CreateAtomicRMW(aop, - LV.getPointer(), amt, llvm::SequentiallyConsistent); + LV.getPointer(), amt, llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } value = EmitLoadOfLValue(LV, E->getExprLoc()); @@ -1794,15 +1780,19 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, amt = llvm::ConstantFP::get(VMContext, llvm::APFloat(static_cast<double>(amount))); else { - // Remaining types are either Half or LongDouble. Convert from float. + // Remaining types are Half, LongDouble or __float128. Convert from float. llvm::APFloat F(static_cast<float>(amount)); bool ignored; + const llvm::fltSemantics *FS; // Don't use getFloatTypeSemantics because Half isn't // necessarily represented using the "half" LLVM type. - F.convert(value->getType()->isHalfTy() - ? CGF.getTarget().getHalfFormat() - : CGF.getTarget().getLongDoubleFormat(), - llvm::APFloat::rmTowardZero, &ignored); + if (value->getType()->isFP128Ty()) + FS = &CGF.getTarget().getFloat128Format(); + else if (value->getType()->isHalfTy()) + FS = &CGF.getTarget().getHalfFormat(); + else + FS = &CGF.getTarget().getLongDoubleFormat(); + F.convert(*FS, llvm::APFloat::rmTowardZero, &ignored); amt = llvm::ConstantFP::get(VMContext, F); } value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec"); @@ -2159,7 +2149,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( E->getExprLoc()), LHSTy); Builder.CreateAtomicRMW(aop, LHSLV.getPointer(), amt, - llvm::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent); return LHSLV; } } @@ -2716,7 +2706,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom"); bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) && - Ops.Ty->hasSignedIntegerRepresentation(); + Ops.Ty->hasSignedIntegerRepresentation() && + !CGF.getLangOpts().isSignedOverflowDefined(); bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent); // OpenCL 6.3j: shift values are effectively % word size of LHS. if (CGF.getLangOpts().OpenCL) @@ -2993,15 +2984,17 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { std::tie(LHS, RHS) = CGF.EmitARCStoreAutoreleasing(E); break; + case Qualifiers::OCL_ExplicitNone: + std::tie(LHS, RHS) = CGF.EmitARCStoreUnsafeUnretained(E, Ignore); + break; + case Qualifiers::OCL_Weak: RHS = Visit(E->getRHS()); LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); RHS = CGF.EmitARCStoreWeak(LHS.getAddress(), RHS, Ignore); break; - // No reason to do any of these differently. case Qualifiers::OCL_None: - case Qualifiers::OCL_ExplicitNone: // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. RHS = Visit(E->getRHS()); @@ -3366,9 +3359,11 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { llvm::Type *ArgTy = ConvertType(VE->getType()); - // If EmitVAArg fails, we fall back to the LLVM instruction. - if (!ArgPtr.isValid()) - return Builder.CreateVAArg(ArgValue.getPointer(), ArgTy); + // If EmitVAArg fails, emit an error. + if (!ArgPtr.isValid()) { + CGF.ErrorUnsupported(VE, "va_arg expression"); + return llvm::UndefValue::get(ArgTy); + } // FIXME Volatility. llvm::Value *Val = Builder.CreateLoad(ArgPtr); @@ -3388,50 +3383,48 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) { return CGF.EmitBlockLiteral(block); } +// Convert a vec3 to vec4, or vice versa. +static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF, + Value *Src, unsigned NumElementsDst) { + llvm::Value *UnV = llvm::UndefValue::get(Src->getType()); + SmallVector<llvm::Constant*, 4> Args; + Args.push_back(Builder.getInt32(0)); + Args.push_back(Builder.getInt32(1)); + Args.push_back(Builder.getInt32(2)); + if (NumElementsDst == 4) + Args.push_back(llvm::UndefValue::get(CGF.Int32Ty)); + llvm::Constant *Mask = llvm::ConstantVector::get(Args); + return Builder.CreateShuffleVector(Src, UnV, Mask); +} + Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { Value *Src = CGF.EmitScalarExpr(E->getSrcExpr()); llvm::Type *DstTy = ConvertType(E->getType()); - // Going from vec4->vec3 or vec3->vec4 is a special case and requires - // a shuffle vector instead of a bitcast. llvm::Type *SrcTy = Src->getType(); - if (isa<llvm::VectorType>(DstTy) && isa<llvm::VectorType>(SrcTy)) { - unsigned numElementsDst = cast<llvm::VectorType>(DstTy)->getNumElements(); - unsigned numElementsSrc = cast<llvm::VectorType>(SrcTy)->getNumElements(); - if ((numElementsDst == 3 && numElementsSrc == 4) - || (numElementsDst == 4 && numElementsSrc == 3)) { - - - // In the case of going from int4->float3, a bitcast is needed before - // doing a shuffle. - llvm::Type *srcElemTy = - cast<llvm::VectorType>(SrcTy)->getElementType(); - llvm::Type *dstElemTy = - cast<llvm::VectorType>(DstTy)->getElementType(); - - if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy()) - || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) { - // Create a float type of the same size as the source or destination. - llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy, - numElementsSrc); - - Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast"); - } - - llvm::Value *UnV = llvm::UndefValue::get(Src->getType()); - - SmallVector<llvm::Constant*, 3> Args; - Args.push_back(Builder.getInt32(0)); - Args.push_back(Builder.getInt32(1)); - Args.push_back(Builder.getInt32(2)); - - if (numElementsDst == 4) - Args.push_back(llvm::UndefValue::get(CGF.Int32Ty)); - - llvm::Constant *Mask = llvm::ConstantVector::get(Args); + unsigned NumElementsSrc = isa<llvm::VectorType>(SrcTy) ? + cast<llvm::VectorType>(SrcTy)->getNumElements() : 0; + unsigned NumElementsDst = isa<llvm::VectorType>(DstTy) ? + cast<llvm::VectorType>(DstTy)->getNumElements() : 0; + + // Going from vec3 to non-vec3 is a special case and requires a shuffle + // vector to get a vec4, then a bitcast if the target type is different. + if (NumElementsSrc == 3 && NumElementsDst != 3) { + Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); + Src = Builder.CreateBitCast(Src, DstTy); + Src->setName("astype"); + return Src; + } - return Builder.CreateShuffleVector(Src, UnV, Mask, "astype"); - } + // Going from non-vec3 to vec3 is a special case and requires a bitcast + // to vec4 if the original type is not vec4, then a shuffle vector to + // get a vec3. + if (NumElementsSrc != 3 && NumElementsDst == 3) { + auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); + Src = Builder.CreateBitCast(Src, Vec4Ty); + Src = ConvertVec3AndVec4(Builder, CGF, Src, 3); + Src->setName("astype"); + return Src; } return Builder.CreateBitCast(Src, DstTy, "astype"); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp index 0afe7db..51474f1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp @@ -19,12 +19,15 @@ using namespace clang::CodeGen; using namespace llvm; -static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { +static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, + llvm::DebugLoc Location) { if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && Attrs.VectorizeEnable == LoopAttributes::Unspecified && - Attrs.UnrollEnable == LoopAttributes::Unspecified) + Attrs.UnrollEnable == LoopAttributes::Unspecified && + Attrs.DistributeEnable == LoopAttributes::Unspecified && + !Location) return nullptr; SmallVector<Metadata *, 4> Args; @@ -32,6 +35,10 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { auto TempNode = MDNode::getTemporary(Ctx, None); Args.push_back(TempNode.get()); + // If we have a valid debug location for the loop, add it. + if (Location) + Args.push_back(Location.getAsMDNode()); + // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), @@ -78,6 +85,14 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { Args.push_back(MDNode::get(Ctx, Vals)); } + if (Attrs.DistributeEnable != LoopAttributes::Unspecified) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt1Ty(Ctx), (Attrs.DistributeEnable == + LoopAttributes::Enable)))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + // Set the first operand to itself. MDNode *LoopID = MDNode::get(Ctx, Args); LoopID->replaceOperandWith(0, LoopID); @@ -87,7 +102,8 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0), - InterleaveCount(0), UnrollCount(0) {} + InterleaveCount(0), UnrollCount(0), + DistributeEnable(LoopAttributes::Unspecified) {} void LoopAttributes::clear() { IsParallel = false; @@ -98,37 +114,60 @@ void LoopAttributes::clear() { UnrollEnable = LoopAttributes::Unspecified; } -LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs) +LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, + llvm::DebugLoc Location) : LoopID(nullptr), Header(Header), Attrs(Attrs) { - LoopID = createMetadata(Header->getContext(), Attrs); + LoopID = createMetadata(Header->getContext(), Attrs, Location); } -void LoopInfoStack::push(BasicBlock *Header) { - Active.push_back(LoopInfo(Header, StagedAttrs)); +void LoopInfoStack::push(BasicBlock *Header, llvm::DebugLoc Location) { + Active.push_back(LoopInfo(Header, StagedAttrs, Location)); // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, - ArrayRef<const clang::Attr *> Attrs) { + ArrayRef<const clang::Attr *> Attrs, + llvm::DebugLoc Location) { // Identify loop hint attributes from Attrs. for (const auto *Attr : Attrs) { const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr); + const OpenCLUnrollHintAttr *OpenCLHint = + dyn_cast<OpenCLUnrollHintAttr>(Attr); // Skip non loop hint attributes - if (!LH) + if (!LH && !OpenCLHint) { continue; + } - auto *ValueExpr = LH->getValue(); + LoopHintAttr::OptionType Option = LoopHintAttr::Unroll; + LoopHintAttr::LoopHintState State = LoopHintAttr::Disable; unsigned ValueInt = 1; - if (ValueExpr) { - llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx); - ValueInt = ValueAPS.getSExtValue(); - } + // Translate opencl_unroll_hint attribute argument to + // equivalent LoopHintAttr enums. + // OpenCL v2.0 s6.11.5: + // 0 - full unroll (no argument). + // 1 - disable unroll. + // other positive integer n - unroll by n. + if (OpenCLHint) { + ValueInt = OpenCLHint->getUnrollHint(); + if (ValueInt == 0) { + State = LoopHintAttr::Full; + } else if (ValueInt != 1) { + Option = LoopHintAttr::UnrollCount; + State = LoopHintAttr::Numeric; + } + } else if (LH) { + auto *ValueExpr = LH->getValue(); + if (ValueExpr) { + llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx); + ValueInt = ValueAPS.getSExtValue(); + } - LoopHintAttr::OptionType Option = LH->getOption(); - LoopHintAttr::LoopHintState State = LH->getState(); + Option = LH->getOption(); + State = LH->getState(); + } switch (State) { case LoopHintAttr::Disable: switch (Option) { @@ -143,6 +182,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: setUnrollState(LoopAttributes::Disable); break; + case LoopHintAttr::Distribute: + setDistributeState(false); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: @@ -159,6 +201,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: setUnrollState(LoopAttributes::Enable); break; + case LoopHintAttr::Distribute: + setDistributeState(true); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: @@ -178,6 +223,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: + case LoopHintAttr::Distribute: llvm_unreachable("Options cannot be used to assume mem safety."); break; } @@ -192,6 +238,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: + case LoopHintAttr::Distribute: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -210,6 +257,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: + case LoopHintAttr::Distribute: llvm_unreachable("Options cannot be assigned a value."); break; } @@ -218,7 +266,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, } /// Stage the attributes. - push(Header); + push(Header, Location); } void LoopInfoStack::pop() { @@ -237,7 +285,7 @@ void LoopInfoStack::InsertHelper(Instruction *I) const { if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) { for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i) if (TI->getSuccessor(i) == L.getHeader()) { - TI->setMetadata("llvm.loop", L.getLoopID()); + TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID()); break; } return; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h index ec33906..a0111ed 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Value.h" #include "llvm/Support/Compiler.h" @@ -57,13 +58,17 @@ struct LoopAttributes { /// \brief llvm.unroll. unsigned UnrollCount; + + /// \brief Value for llvm.loop.distribute.enable metadata. + LVEnableState DistributeEnable; }; /// \brief Information used when generating a structured loop. class LoopInfo { public: /// \brief Construct a new LoopInfo for the loop with entry Header. - LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs); + LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, + llvm::DebugLoc Location); /// \brief Get the loop id metadata for this loop. llvm::MDNode *getLoopID() const { return LoopID; } @@ -95,12 +100,14 @@ public: /// \brief Begin a new structured loop. The set of staged attributes will be /// applied to the loop and then cleared. - void push(llvm::BasicBlock *Header); + void push(llvm::BasicBlock *Header, + llvm::DebugLoc Location = llvm::DebugLoc()); /// \brief Begin a new structured loop. Stage attributes from the Attrs list. /// The staged attributes are applied to the loop and then cleared. void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx, - llvm::ArrayRef<const Attr *> Attrs); + llvm::ArrayRef<const Attr *> Attrs, + llvm::DebugLoc Location = llvm::DebugLoc()); /// \brief End the current loop. void pop(); @@ -126,6 +133,12 @@ public: Enable ? LoopAttributes::Enable : LoopAttributes::Disable; } + /// \brief Set the next pushed loop as a distribution candidate. + void setDistributeState(bool Enable = true) { + StagedAttrs.DistributeEnable = + Enable ? LoopAttributes::Enable : LoopAttributes::Disable; + } + /// \brief Set the next pushed loop unroll state. void setUnrollState(const LoopAttributes::LVEnableState &State) { StagedAttrs.UnrollEnable = State; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp index 2d5991b..db894ce 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp @@ -590,9 +590,7 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy); llvm::Value *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(Context.VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall(CGF.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, args), fn, ReturnValueSlot(), args); } @@ -856,10 +854,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, llvm::Value *copyCppAtomicObjectFn = CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, - args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall( + CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), copyCppAtomicObjectFn, ReturnValueSlot(), args); } @@ -901,21 +897,29 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // Currently, all atomic accesses have to be through integer // types, so there's no point in trying to pick a prettier type. - llvm::Type *bitcastType = - llvm::Type::getIntNTy(getLLVMContext(), - getContext().toBits(strategy.getIvarSize())); + uint64_t ivarSize = getContext().toBits(strategy.getIvarSize()); + llvm::Type *bitcastType = llvm::Type::getIntNTy(getLLVMContext(), ivarSize); bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay // Perform an atomic load. This does not impose ordering constraints. Address ivarAddr = LV.getAddress(); ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType); llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load"); - load->setAtomic(llvm::Unordered); + load->setAtomic(llvm::AtomicOrdering::Unordered); // Store that value into the return address. Doing this with a // bitcast is likely to produce some pretty ugly IR, but it's not // the *most* terrible thing in the world. - Builder.CreateStore(load, Builder.CreateBitCast(ReturnValue, bitcastType)); + llvm::Type *retTy = ConvertType(getterMethod->getReturnType()); + uint64_t retTySize = CGM.getDataLayout().getTypeSizeInBits(retTy); + llvm::Value *ivarVal = load; + if (ivarSize > retTySize) { + llvm::Type *newTy = llvm::Type::getIntNTy(getLLVMContext(), retTySize); + ivarVal = Builder.CreateTrunc(load, newTy); + bitcastType = newTy->getPointerTo(); + } + Builder.CreateStore(ivarVal, + Builder.CreateBitCast(ReturnValue, bitcastType)); // Make sure we don't do an autorelease. AutoreleaseResult = false; @@ -950,8 +954,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // runtime already should have computed it to build the function. llvm::Instruction *CallInstruction; RValue RV = EmitCall( - getTypes().arrangeFreeFunctionCall( - propType, args, FunctionType::ExtInfo(), RequiredArgs::All), + getTypes().arrangeBuiltinFunctionCall(propType, args), getPropertyFn, ReturnValueSlot(), args, CGCalleeInfo(), &CallInstruction); if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction)) @@ -1015,7 +1018,6 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, AutoreleaseResult = false; } - value = Builder.CreateBitCast(value, ConvertType(propType)); value = Builder.CreateBitCast( value, ConvertType(GetterMethodDecl->getReturnType())); } @@ -1067,10 +1069,8 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy); llvm::Value *copyStructFn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, - args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall( + CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), copyStructFn, ReturnValueSlot(), args); } @@ -1105,10 +1105,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, llvm::Value *copyCppAtomicObjectFn = CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, - args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall( + CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), copyCppAtomicObjectFn, ReturnValueSlot(), args); } @@ -1192,7 +1190,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, // Perform an atomic store. There are no memory ordering requirements. llvm::StoreInst *store = Builder.CreateStore(load, ivarAddr); - store->setAtomic(llvm::Unordered); + store->setAtomic(llvm::AtomicOrdering::Unordered); return; } @@ -1238,9 +1236,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, if (setOptimizedPropertyFn) { args.add(RValue::get(arg), getContext().getObjCIdType()); args.add(RValue::get(ivarOffset), getContext().getPointerDiffType()); - EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args), setOptimizedPropertyFn, ReturnValueSlot(), args); } else { args.add(RValue::get(ivarOffset), getContext().getPointerDiffType()); @@ -1251,9 +1247,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, getContext().BoolTy); // FIXME: We shouldn't need to get the function info here, the runtime // already should have computed it to build the function. - EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args), setPropertyFn, ReturnValueSlot(), args); } @@ -1498,6 +1492,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ ArrayType::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); + RunCleanupsScope ForScope(*this); + // Emit the collection pointer. In ARC, we do a retain. llvm::Value *Collection; if (getLangOpts().ObjCAutoRefCount) { @@ -1610,9 +1606,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ Args2.add(RValue::get(V), getContext().getObjCIdType()); // FIXME: We shouldn't need to get the function info here, the runtime already // should have computed it to build the function. - EmitCall(CGM.getTypes().arrangeFreeFunctionCall(getContext().VoidTy, Args2, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall( + CGM.getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, Args2), EnumerationMutationFn, ReturnValueSlot(), Args2); // Otherwise, or if the mutation function returns, just continue. @@ -1739,10 +1734,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ if (DI) DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); - // Leave the cleanup we entered in ARC. - if (getLangOpts().ObjCAutoRefCount) - PopCleanupBlock(); - + ForScope.ForceCleanup(); EmitBlock(LoopEnd.getBlock()); } @@ -1980,20 +1972,14 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, return result; } -/// Retain the given object which is the result of a function call. -/// call i8* \@objc_retainAutoreleasedReturnValue(i8* %value) -/// -/// Yes, this function name is one character away from a different -/// call with completely different semantics. -llvm::Value * -CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { +static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { // Fetch the void(void) inline asm which marks that we're going to - // retain the autoreleased return value. + // do something with the autoreleased return value. llvm::InlineAsm *&marker - = CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker; + = CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker; if (!marker) { StringRef assembly - = CGM.getTargetCodeGenInfo() + = CGF.CGM.getTargetCodeGenInfo() .getARCRetainAutoreleasedReturnValueMarker(); // If we have an empty assembly string, there's nothing to do. @@ -2001,9 +1987,9 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { // Otherwise, at -O0, build an inline asm that we're going to call // in a moment. - } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) { + } else if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) { llvm::FunctionType *type = - llvm::FunctionType::get(VoidTy, /*variadic*/false); + llvm::FunctionType::get(CGF.VoidTy, /*variadic*/false); marker = llvm::InlineAsm::get(type, assembly, "", /*sideeffects*/ true); @@ -2012,25 +1998,50 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { // optimizer to pick up. } else { llvm::NamedMDNode *metadata = - CGM.getModule().getOrInsertNamedMetadata( + CGF.CGM.getModule().getOrInsertNamedMetadata( "clang.arc.retainAutoreleasedReturnValueMarker"); assert(metadata->getNumOperands() <= 1); if (metadata->getNumOperands() == 0) { - metadata->addOperand(llvm::MDNode::get( - getLLVMContext(), llvm::MDString::get(getLLVMContext(), assembly))); + auto &ctx = CGF.getLLVMContext(); + metadata->addOperand(llvm::MDNode::get(ctx, + llvm::MDString::get(ctx, assembly))); } } } // Call the marker asm if we made one, which we do only at -O0. if (marker) - Builder.CreateCall(marker); + CGF.Builder.CreateCall(marker); +} +/// Retain the given object which is the result of a function call. +/// call i8* \@objc_retainAutoreleasedReturnValue(i8* %value) +/// +/// Yes, this function name is one character away from a different +/// call with completely different semantics. +llvm::Value * +CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { + emitAutoreleasedReturnValueMarker(*this); return emitARCValueOperation(*this, value, - CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, + CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, "objc_retainAutoreleasedReturnValue"); } +/// Claim a possibly-autoreleased return value at +0. This is only +/// valid to do in contexts which do not rely on the retain to keep +/// the object valid for for all of its uses; for example, when +/// the value is ignored, or when it is being assigned to an +/// __unsafe_unretained variable. +/// +/// call i8* \@objc_unsafeClaimAutoreleasedReturnValue(i8* %value) +llvm::Value * +CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) { + emitAutoreleasedReturnValueMarker(*this); + return emitARCValueOperation(*this, value, + CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue, + "objc_unsafeClaimAutoreleasedReturnValue"); +} + /// Release the given object. /// call void \@objc_release(i8* %value) void CodeGenFunction::EmitARCRelease(llvm::Value *value, @@ -2446,25 +2457,22 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type); } -static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, - llvm::Value *value); +typedef llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + llvm::Value *value)> + ValueTransform; -/// Given that the given expression is some sort of call (which does -/// not return retained), emit a retain following it. -static llvm::Value *emitARCRetainCall(CodeGenFunction &CGF, const Expr *e) { - llvm::Value *value = CGF.EmitScalarExpr(e); - return emitARCRetainAfterCall(CGF, value); -} - -static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, - llvm::Value *value) { +/// Insert code immediately after a call. +static llvm::Value *emitARCOperationAfterCall(CodeGenFunction &CGF, + llvm::Value *value, + ValueTransform doAfterCall, + ValueTransform doFallback) { if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(value)) { CGBuilderTy::InsertPoint ip = CGF.Builder.saveIP(); // Place the retain immediately following the call. CGF.Builder.SetInsertPoint(call->getParent(), ++llvm::BasicBlock::iterator(call)); - value = CGF.EmitARCRetainAutoreleasedReturnValue(value); + value = doAfterCall(CGF, value); CGF.Builder.restoreIP(ip); return value; @@ -2474,7 +2482,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, // Place the retain at the beginning of the normal destination block. llvm::BasicBlock *BB = invoke->getNormalDest(); CGF.Builder.SetInsertPoint(BB, BB->begin()); - value = CGF.EmitARCRetainAutoreleasedReturnValue(value); + value = doAfterCall(CGF, value); CGF.Builder.restoreIP(ip); return value; @@ -2483,7 +2491,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, // the operand. } else if (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(value)) { llvm::Value *operand = bitcast->getOperand(0); - operand = emitARCRetainAfterCall(CGF, operand); + operand = emitARCOperationAfterCall(CGF, operand, doAfterCall, doFallback); bitcast->setOperand(0, operand); return bitcast; @@ -2491,7 +2499,46 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, } else { // Retain using the non-block variant: we never need to do a copy // of a block that's been returned to us. - return CGF.EmitARCRetainNonBlock(value); + return doFallback(CGF, value); + } +} + +/// Given that the given expression is some sort of call (which does +/// not return retained), emit a retain following it. +static llvm::Value *emitARCRetainCallResult(CodeGenFunction &CGF, + const Expr *e) { + llvm::Value *value = CGF.EmitScalarExpr(e); + return emitARCOperationAfterCall(CGF, value, + [](CodeGenFunction &CGF, llvm::Value *value) { + return CGF.EmitARCRetainAutoreleasedReturnValue(value); + }, + [](CodeGenFunction &CGF, llvm::Value *value) { + return CGF.EmitARCRetainNonBlock(value); + }); +} + +/// Given that the given expression is some sort of call (which does +/// not return retained), perform an unsafeClaim following it. +static llvm::Value *emitARCUnsafeClaimCallResult(CodeGenFunction &CGF, + const Expr *e) { + llvm::Value *value = CGF.EmitScalarExpr(e); + return emitARCOperationAfterCall(CGF, value, + [](CodeGenFunction &CGF, llvm::Value *value) { + return CGF.EmitARCUnsafeClaimAutoreleasedReturnValue(value); + }, + [](CodeGenFunction &CGF, llvm::Value *value) { + return value; + }); +} + +llvm::Value *CodeGenFunction::EmitARCReclaimReturnedObject(const Expr *E, + bool allowUnsafeClaim) { + if (allowUnsafeClaim && + CGM.getLangOpts().ObjCRuntime.hasARCUnsafeClaimAutoreleasedReturnValue()) { + return emitARCUnsafeClaimCallResult(*this, E); + } else { + llvm::Value *value = emitARCRetainCallResult(*this, E); + return EmitObjCConsumeObject(E->getType(), value); } } @@ -2531,17 +2578,52 @@ static bool shouldEmitSeparateBlockRetain(const Expr *e) { return true; } -/// Try to emit a PseudoObjectExpr at +1. +namespace { +/// A CRTP base class for emitting expressions of retainable object +/// pointer type in ARC. +template <typename Impl, typename Result> class ARCExprEmitter { +protected: + CodeGenFunction &CGF; + Impl &asImpl() { return *static_cast<Impl*>(this); } + + ARCExprEmitter(CodeGenFunction &CGF) : CGF(CGF) {} + +public: + Result visit(const Expr *e); + Result visitCastExpr(const CastExpr *e); + Result visitPseudoObjectExpr(const PseudoObjectExpr *e); + Result visitBinaryOperator(const BinaryOperator *e); + Result visitBinAssign(const BinaryOperator *e); + Result visitBinAssignUnsafeUnretained(const BinaryOperator *e); + Result visitBinAssignAutoreleasing(const BinaryOperator *e); + Result visitBinAssignWeak(const BinaryOperator *e); + Result visitBinAssignStrong(const BinaryOperator *e); + + // Minimal implementation: + // Result visitLValueToRValue(const Expr *e) + // Result visitConsumeObject(const Expr *e) + // Result visitExtendBlockObject(const Expr *e) + // Result visitReclaimReturnedObject(const Expr *e) + // Result visitCall(const Expr *e) + // Result visitExpr(const Expr *e) + // + // Result emitBitCast(Result result, llvm::Type *resultType) + // llvm::Value *getValueOfResult(Result result) +}; +} + +/// Try to emit a PseudoObjectExpr under special ARC rules. /// /// This massively duplicates emitPseudoObjectRValue. -static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, - const PseudoObjectExpr *E) { +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitPseudoObjectExpr(const PseudoObjectExpr *E) { SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques; // Find the result expression. const Expr *resultExpr = E->getResultExpr(); assert(resultExpr); - TryEmitResult result; + Result result; for (PseudoObjectExpr::const_semantics_iterator i = E->semantics_begin(), e = E->semantics_end(); i != e; ++i) { @@ -2557,8 +2639,9 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, // expression, try to evaluate the source as +1. if (ov == resultExpr) { assert(!OVMA::shouldBindAsLValue(ov)); - result = tryEmitARCRetainScalarExpr(CGF, ov->getSourceExpr()); - opaqueData = OVMA::bind(CGF, ov, RValue::get(result.getPointer())); + result = asImpl().visit(ov->getSourceExpr()); + opaqueData = OVMA::bind(CGF, ov, + RValue::get(asImpl().getValueOfResult(result))); // Otherwise, just bind it. } else { @@ -2569,7 +2652,7 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, // Otherwise, if the expression is the result, evaluate it // and remember the result. } else if (semantic == resultExpr) { - result = tryEmitARCRetainScalarExpr(CGF, semantic); + result = asImpl().visit(semantic); // Otherwise, evaluate the expression in an ignored context. } else { @@ -2584,146 +2667,240 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, return result; } -static TryEmitResult -tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) { +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>::visitCastExpr(const CastExpr *e) { + switch (e->getCastKind()) { + + // No-op casts don't change the type, so we just ignore them. + case CK_NoOp: + return asImpl().visit(e->getSubExpr()); + + // These casts can change the type. + case CK_CPointerToObjCPointerCast: + case CK_BlockPointerToObjCPointerCast: + case CK_AnyPointerToBlockPointerCast: + case CK_BitCast: { + llvm::Type *resultType = CGF.ConvertType(e->getType()); + assert(e->getSubExpr()->getType()->hasPointerRepresentation()); + Result result = asImpl().visit(e->getSubExpr()); + return asImpl().emitBitCast(result, resultType); + } + + // Handle some casts specially. + case CK_LValueToRValue: + return asImpl().visitLValueToRValue(e->getSubExpr()); + case CK_ARCConsumeObject: + return asImpl().visitConsumeObject(e->getSubExpr()); + case CK_ARCExtendBlockObject: + return asImpl().visitExtendBlockObject(e->getSubExpr()); + case CK_ARCReclaimReturnedObject: + return asImpl().visitReclaimReturnedObject(e->getSubExpr()); + + // Otherwise, use the default logic. + default: + return asImpl().visitExpr(e); + } +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinaryOperator(const BinaryOperator *e) { + switch (e->getOpcode()) { + case BO_Comma: + CGF.EmitIgnoredExpr(e->getLHS()); + CGF.EnsureInsertPoint(); + return asImpl().visit(e->getRHS()); + + case BO_Assign: + return asImpl().visitBinAssign(e); + + default: + return asImpl().visitExpr(e); + } +} + +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>::visitBinAssign(const BinaryOperator *e) { + switch (e->getLHS()->getType().getObjCLifetime()) { + case Qualifiers::OCL_ExplicitNone: + return asImpl().visitBinAssignUnsafeUnretained(e); + + case Qualifiers::OCL_Weak: + return asImpl().visitBinAssignWeak(e); + + case Qualifiers::OCL_Autoreleasing: + return asImpl().visitBinAssignAutoreleasing(e); + + case Qualifiers::OCL_Strong: + return asImpl().visitBinAssignStrong(e); + + case Qualifiers::OCL_None: + return asImpl().visitExpr(e); + } + llvm_unreachable("bad ObjC ownership qualifier"); +} + +/// The default rule for __unsafe_unretained emits the RHS recursively, +/// stores into the unsafe variable, and propagates the result outward. +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>:: + visitBinAssignUnsafeUnretained(const BinaryOperator *e) { + // Recursively emit the RHS. + // For __block safety, do this before emitting the LHS. + Result result = asImpl().visit(e->getRHS()); + + // Perform the store. + LValue lvalue = + CGF.EmitCheckedLValue(e->getLHS(), CodeGenFunction::TCK_Store); + CGF.EmitStoreThroughLValue(RValue::get(asImpl().getValueOfResult(result)), + lvalue); + + return result; +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinAssignAutoreleasing(const BinaryOperator *e) { + return asImpl().visitExpr(e); +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinAssignWeak(const BinaryOperator *e) { + return asImpl().visitExpr(e); +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinAssignStrong(const BinaryOperator *e) { + return asImpl().visitExpr(e); +} + +/// The general expression-emission logic. +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>::visit(const Expr *e) { // We should *never* see a nested full-expression here, because if // we fail to emit at +1, our caller must not retain after we close - // out the full-expression. + // out the full-expression. This isn't as important in the unsafe + // emitter. assert(!isa<ExprWithCleanups>(e)); - // The desired result type, if it differs from the type of the - // ultimate opaque expression. - llvm::Type *resultType = nullptr; - - while (true) { - e = e->IgnoreParens(); - - // There's a break at the end of this if-chain; anything - // that wants to keep looping has to explicitly continue. - if (const CastExpr *ce = dyn_cast<CastExpr>(e)) { - switch (ce->getCastKind()) { - // No-op casts don't change the type, so we just ignore them. - case CK_NoOp: - e = ce->getSubExpr(); - continue; - - case CK_LValueToRValue: { - TryEmitResult loadResult - = tryEmitARCRetainLoadOfScalar(CGF, ce->getSubExpr()); - if (resultType) { - llvm::Value *value = loadResult.getPointer(); - value = CGF.Builder.CreateBitCast(value, resultType); - loadResult.setPointer(value); - } - return loadResult; - } + // Look through parens, __extension__, generic selection, etc. + e = e->IgnoreParens(); - // These casts can change the type, so remember that and - // soldier on. We only need to remember the outermost such - // cast, though. - case CK_CPointerToObjCPointerCast: - case CK_BlockPointerToObjCPointerCast: - case CK_AnyPointerToBlockPointerCast: - case CK_BitCast: - if (!resultType) - resultType = CGF.ConvertType(ce->getType()); - e = ce->getSubExpr(); - assert(e->getType()->hasPointerRepresentation()); - continue; - - // For consumptions, just emit the subexpression and thus elide - // the retain/release pair. - case CK_ARCConsumeObject: { - llvm::Value *result = CGF.EmitScalarExpr(ce->getSubExpr()); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } + // Handle certain kinds of casts. + if (const CastExpr *ce = dyn_cast<CastExpr>(e)) { + return asImpl().visitCastExpr(ce); - // Block extends are net +0. Naively, we could just recurse on - // the subexpression, but actually we need to ensure that the - // value is copied as a block, so there's a little filter here. - case CK_ARCExtendBlockObject: { - llvm::Value *result; // will be a +0 value + // Handle the comma operator. + } else if (auto op = dyn_cast<BinaryOperator>(e)) { + return asImpl().visitBinaryOperator(op); - // If we can't safely assume the sub-expression will produce a - // block-copied value, emit the sub-expression at +0. - if (shouldEmitSeparateBlockRetain(ce->getSubExpr())) { - result = CGF.EmitScalarExpr(ce->getSubExpr()); + // TODO: handle conditional operators here - // Otherwise, try to emit the sub-expression at +1 recursively. - } else { - TryEmitResult subresult - = tryEmitARCRetainScalarExpr(CGF, ce->getSubExpr()); - result = subresult.getPointer(); - - // If that produced a retained value, just use that, - // possibly casting down. - if (subresult.getInt()) { - if (resultType) - result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } + // For calls and message sends, use the retained-call logic. + // Delegate inits are a special case in that they're the only + // returns-retained expression that *isn't* surrounded by + // a consume. + } else if (isa<CallExpr>(e) || + (isa<ObjCMessageExpr>(e) && + !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) { + return asImpl().visitCall(e); - // Otherwise it's +0. - } + // Look through pseudo-object expressions. + } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) { + return asImpl().visitPseudoObjectExpr(pseudo); + } - // Retain the object as a block, then cast down. - result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } + return asImpl().visitExpr(e); +} - // For reclaims, emit the subexpression as a retained call and - // skip the consumption. - case CK_ARCReclaimReturnedObject: { - llvm::Value *result = emitARCRetainCall(CGF, ce->getSubExpr()); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } +namespace { - default: - break; - } +/// An emitter for +1 results. +struct ARCRetainExprEmitter : + public ARCExprEmitter<ARCRetainExprEmitter, TryEmitResult> { - // Skip __extension__. - } else if (const UnaryOperator *op = dyn_cast<UnaryOperator>(e)) { - if (op->getOpcode() == UO_Extension) { - e = op->getSubExpr(); - continue; - } + ARCRetainExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {} + + llvm::Value *getValueOfResult(TryEmitResult result) { + return result.getPointer(); + } - // For calls and message sends, use the retained-call logic. - // Delegate inits are a special case in that they're the only - // returns-retained expression that *isn't* surrounded by - // a consume. - } else if (isa<CallExpr>(e) || - (isa<ObjCMessageExpr>(e) && - !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) { - llvm::Value *result = emitARCRetainCall(CGF, e); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - - // Look through pseudo-object expressions. - } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) { - TryEmitResult result - = tryEmitARCRetainPseudoObject(CGF, pseudo); - if (resultType) { - llvm::Value *value = result.getPointer(); - value = CGF.Builder.CreateBitCast(value, resultType); - result.setPointer(value); + TryEmitResult emitBitCast(TryEmitResult result, llvm::Type *resultType) { + llvm::Value *value = result.getPointer(); + value = CGF.Builder.CreateBitCast(value, resultType); + result.setPointer(value); + return result; + } + + TryEmitResult visitLValueToRValue(const Expr *e) { + return tryEmitARCRetainLoadOfScalar(CGF, e); + } + + /// For consumptions, just emit the subexpression and thus elide + /// the retain/release pair. + TryEmitResult visitConsumeObject(const Expr *e) { + llvm::Value *result = CGF.EmitScalarExpr(e); + return TryEmitResult(result, true); + } + + /// Block extends are net +0. Naively, we could just recurse on + /// the subexpression, but actually we need to ensure that the + /// value is copied as a block, so there's a little filter here. + TryEmitResult visitExtendBlockObject(const Expr *e) { + llvm::Value *result; // will be a +0 value + + // If we can't safely assume the sub-expression will produce a + // block-copied value, emit the sub-expression at +0. + if (shouldEmitSeparateBlockRetain(e)) { + result = CGF.EmitScalarExpr(e); + + // Otherwise, try to emit the sub-expression at +1 recursively. + } else { + TryEmitResult subresult = asImpl().visit(e); + + // If that produced a retained value, just use that. + if (subresult.getInt()) { + return subresult; } - return result; + + // Otherwise it's +0. + result = subresult.getPointer(); } - // Conservatively halt the search at any other expression kind. - break; + // Retain the object as a block. + result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true); + return TryEmitResult(result, true); } - // We didn't find an obvious production, so emit what we've got and - // tell the caller that we didn't manage to retain. - llvm::Value *result = CGF.EmitScalarExpr(e); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, false); + /// For reclaims, emit the subexpression as a retained call and + /// skip the consumption. + TryEmitResult visitReclaimReturnedObject(const Expr *e) { + llvm::Value *result = emitARCRetainCallResult(CGF, e); + return TryEmitResult(result, true); + } + + /// When we have an undecorated call, retroactively do a claim. + TryEmitResult visitCall(const Expr *e) { + llvm::Value *result = emitARCRetainCallResult(CGF, e); + return TryEmitResult(result, true); + } + + // TODO: maybe special-case visitBinAssignWeak? + + TryEmitResult visitExpr(const Expr *e) { + // We didn't find an obvious production, so emit what we've got and + // tell the caller that we didn't manage to retain. + llvm::Value *result = CGF.EmitScalarExpr(e); + return TryEmitResult(result, false); + } +}; +} + +static TryEmitResult +tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) { + return ARCRetainExprEmitter(CGF).visit(e); } static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF, @@ -2807,6 +2984,96 @@ llvm::Value *CodeGenFunction::EmitObjCThrowOperand(const Expr *expr) { return EmitScalarExpr(expr); } +namespace { + +/// An emitter for assigning into an __unsafe_unretained context. +struct ARCUnsafeUnretainedExprEmitter : + public ARCExprEmitter<ARCUnsafeUnretainedExprEmitter, llvm::Value*> { + + ARCUnsafeUnretainedExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {} + + llvm::Value *getValueOfResult(llvm::Value *value) { + return value; + } + + llvm::Value *emitBitCast(llvm::Value *value, llvm::Type *resultType) { + return CGF.Builder.CreateBitCast(value, resultType); + } + + llvm::Value *visitLValueToRValue(const Expr *e) { + return CGF.EmitScalarExpr(e); + } + + /// For consumptions, just emit the subexpression and perform the + /// consumption like normal. + llvm::Value *visitConsumeObject(const Expr *e) { + llvm::Value *value = CGF.EmitScalarExpr(e); + return CGF.EmitObjCConsumeObject(e->getType(), value); + } + + /// No special logic for block extensions. (This probably can't + /// actually happen in this emitter, though.) + llvm::Value *visitExtendBlockObject(const Expr *e) { + return CGF.EmitARCExtendBlockObject(e); + } + + /// For reclaims, perform an unsafeClaim if that's enabled. + llvm::Value *visitReclaimReturnedObject(const Expr *e) { + return CGF.EmitARCReclaimReturnedObject(e, /*unsafe*/ true); + } + + /// When we have an undecorated call, just emit it without adding + /// the unsafeClaim. + llvm::Value *visitCall(const Expr *e) { + return CGF.EmitScalarExpr(e); + } + + /// Just do normal scalar emission in the default case. + llvm::Value *visitExpr(const Expr *e) { + return CGF.EmitScalarExpr(e); + } +}; +} + +static llvm::Value *emitARCUnsafeUnretainedScalarExpr(CodeGenFunction &CGF, + const Expr *e) { + return ARCUnsafeUnretainedExprEmitter(CGF).visit(e); +} + +/// EmitARCUnsafeUnretainedScalarExpr - Semantically equivalent to +/// immediately releasing the resut of EmitARCRetainScalarExpr, but +/// avoiding any spurious retains, including by performing reclaims +/// with objc_unsafeClaimAutoreleasedReturnValue. +llvm::Value *CodeGenFunction::EmitARCUnsafeUnretainedScalarExpr(const Expr *e) { + // Look through full-expressions. + if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) { + enterFullExpression(cleanups); + RunCleanupsScope scope(*this); + return emitARCUnsafeUnretainedScalarExpr(*this, cleanups->getSubExpr()); + } + + return emitARCUnsafeUnretainedScalarExpr(*this, e); +} + +std::pair<LValue,llvm::Value*> +CodeGenFunction::EmitARCStoreUnsafeUnretained(const BinaryOperator *e, + bool ignored) { + // Evaluate the RHS first. If we're ignoring the result, assume + // that we can emit at an unsafe +0. + llvm::Value *value; + if (ignored) { + value = EmitARCUnsafeUnretainedScalarExpr(e->getRHS()); + } else { + value = EmitScalarExpr(e->getRHS()); + } + + // Emit the LHS and perform the store. + LValue lvalue = EmitLValue(e->getLHS()); + EmitStoreOfScalar(value, lvalue); + + return std::pair<LValue,llvm::Value*>(std::move(lvalue), value); +} + std::pair<LValue,llvm::Value*> CodeGenFunction::EmitARCStoreStrong(const BinaryOperator *e, bool ignored) { @@ -2935,8 +3202,8 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); @@ -3016,8 +3283,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp index f0af3e9..caafef8 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp @@ -35,11 +35,9 @@ #include "llvm/Support/Compiler.h" #include <cstdarg> - using namespace clang; using namespace CodeGen; - namespace { /// Class that lazily initialises the runtime function. Avoids inserting the /// types and the function declaration into a module if they're not used, and @@ -161,6 +159,7 @@ protected: /// runtime provides some LLVM passes that can use this to do things like /// automatic IMP caching and speculative inlining. unsigned msgSendMDKind; + /// Helper function that generates a constant string and returns a pointer to /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. @@ -170,6 +169,7 @@ protected: return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(), Array.getPointer(), Zeros); } + /// Emits a linkonce_odr string, whose name is the prefix followed by the /// string value. This allows the linker to combine the strings between /// different modules. Used for EH typeinfo names, selector strings, and a @@ -186,6 +186,7 @@ protected: return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(), ConstStr, Zeros); } + /// Generates a global structure, initialized by the elements in the vector. /// The element types must match the types of the structure elements in the /// first argument. @@ -201,6 +202,7 @@ protected: GV->setAlignment(Align.getQuantity()); return GV; } + /// Generates a global array. The vector must contain the same number of /// elements that the array type declares, of the type specified as the array /// element type. @@ -216,6 +218,7 @@ protected: GV->setAlignment(Align.getQuantity()); return GV; } + /// Generates a global array, inferring the array type from the specified /// element type and the size of the initialiser. llvm::GlobalVariable *MakeGlobalArray(llvm::Type *Ty, @@ -227,6 +230,7 @@ protected: llvm::ArrayType *ArrayTy = llvm::ArrayType::get(Ty, V.size()); return MakeGlobal(ArrayTy, V, Align, Name, linkage); } + /// Returns a property name and encoding string. llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD, const Decl *Container) { @@ -245,6 +249,7 @@ protected: } return MakeConstantString(PD->getNameAsString()); } + /// Push the property attributes into two structure fields. void PushPropertyAttributes(std::vector<llvm::Constant*> &Fields, ObjCPropertyDecl *property, bool isSynthesized=true, bool @@ -273,6 +278,7 @@ protected: Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0)); Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0)); } + /// Ensures that the value has the required type, by inserting a bitcast if /// required. This function lets us avoid inserting bitcasts that are /// redundant. @@ -284,12 +290,14 @@ protected: if (V.getType() == Ty) return V; return B.CreateBitCast(V, Ty); } + // Some zeros used for GEPs in lots of places. llvm::Constant *Zeros[2]; /// Null pointer value. Mainly used as a terminator in various arrays. llvm::Constant *NULLPtr; /// LLVM context. llvm::LLVMContext &VMContext; + private: /// Placeholder for the class. Lots of things refer to the class before we've /// actually emitted it. We use this alias as a placeholder, and then replace @@ -360,7 +368,6 @@ protected: LazyRuntimeFunction SyncExitFn; private: - /// Function called if fast enumeration detects that the collection is /// modified during the update. LazyRuntimeFunction EnumerationMutationFn; @@ -385,7 +392,7 @@ private: /// Objective-C 1 property structures when targeting the GCC runtime or it /// will abort. const int ProtocolVersion; -private: + /// Generates an instance variable list structure. This is a structure /// containing a size and an array of structures containing instance variable /// metadata. This is used purely for introspection in the fragile ABI. In @@ -393,6 +400,7 @@ private: llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, ArrayRef<llvm::Constant *> IvarTypes, ArrayRef<llvm::Constant *> IvarOffsets); + /// Generates a method list structure. This is a structure containing a size /// and an array of structures containing method metadata. /// @@ -403,23 +411,28 @@ private: ArrayRef<Selector> MethodSels, ArrayRef<llvm::Constant *> MethodTypes, bool isClassMethodList); + /// Emits an empty protocol. This is used for \@protocol() where no protocol /// is found. The runtime will (hopefully) fix up the pointer to refer to the /// real protocol. llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName); + /// Generates a list of property metadata structures. This follows the same /// pattern as method and instance variable metadata lists. llvm::Constant *GeneratePropertyList(const ObjCImplementationDecl *OID, SmallVectorImpl<Selector> &InstanceMethodSels, SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes); + /// Generates a list of referenced protocols. Classes, categories, and /// protocols all use this structure. llvm::Constant *GenerateProtocolList(ArrayRef<std::string> Protocols); + /// To ensure that all protocols are seen by the runtime, we add a category on /// a class defined in the runtime, declaring no methods, but adopting the /// protocols. This is a horribly ugly hack, but it allows us to collect all /// of the protocols without changing the ABI. void GenerateProtocolHolderCategory(); + /// Generates a class structure. llvm::Constant *GenerateClassStructure( llvm::Constant *MetaClass, @@ -436,25 +449,31 @@ private: llvm::Constant *StrongIvarBitmap, llvm::Constant *WeakIvarBitmap, bool isMeta=false); + /// Generates a method list. This is used by protocols to define the required /// and optional methods. llvm::Constant *GenerateProtocolMethodList( ArrayRef<llvm::Constant *> MethodNames, ArrayRef<llvm::Constant *> MethodTypes); + /// Returns a selector with the specified type encoding. An empty string is /// used to return an untyped selector (with the types field set to NULL). llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding); + /// Returns the variable used to store the offset of an instance variable. llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar); /// Emits a reference to a class. This allows the linker to object if there /// is no class of the matching name. + protected: void EmitClassRef(const std::string &className); + /// Emits a pointer to the named class virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name, bool isWeak); + /// Looks up the method for sending a message to the specified object. This /// mechanism differs between the GCC and GNU runtimes, so this method must be /// overridden in subclasses. @@ -463,6 +482,7 @@ protected: llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) = 0; + /// Looks up the method for sending a message to a superclass. This /// mechanism differs between the GCC and GNU runtimes, so this method must /// be overridden in subclasses. @@ -470,6 +490,7 @@ protected: Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) = 0; + /// Libobjc2 uses a bitfield representation where small(ish) bitfields are /// stored in a 64-bit value with the low bit set to 1 and the remaining 63 /// bits set to their values, LSB first, while larger ones are stored in a @@ -482,6 +503,7 @@ protected: /// a bitfield with the 64th bit set will be (int64_t)&{ 2, [0, 1<<31] }, /// while a bitfield / with the 63rd bit set will be 1<<64. llvm::Constant *MakeBitField(ArrayRef<bool> bits); + public: CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, unsigned protocolClassVersion); @@ -569,11 +591,12 @@ public: return NULLPtr; } - llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) override { return nullptr; } }; + /// Class representing the legacy GCC Objective-C ABI. This is the default when /// -fobjc-nonfragile-abi is not specified. /// @@ -590,6 +613,7 @@ class CGObjCGCC : public CGObjCGNU { /// structure describing the receiver and the class, and a selector as /// arguments. Returns the IMP for the corresponding method. LazyRuntimeFunction MsgLookupSuperFn; + protected: llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, @@ -602,23 +626,26 @@ protected: imp->setMetadata(msgSendMDKind, node); return imp.getInstruction(); } + llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { - CGBuilderTy &Builder = CGF.Builder; - llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper, - PtrToObjCSuperTy).getPointer(), cmd}; - return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); - } - public: - CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) { - // IMP objc_msg_lookup(id, SEL); - MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, - nullptr); - // IMP objc_msg_lookup_super(struct objc_super*, SEL); - MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); - } + CGBuilderTy &Builder = CGF.Builder; + llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper, + PtrToObjCSuperTy).getPointer(), cmd}; + return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); + } + +public: + CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) { + // IMP objc_msg_lookup(id, SEL); + MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, + nullptr); + // IMP objc_msg_lookup_super(struct objc_super*, SEL); + MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, + PtrToObjCSuperTy, SelectorTy, nullptr); + } }; + /// Class used when targeting the new GNUstep runtime ABI. class CGObjCGNUstep : public CGObjCGNU { /// The slot lookup function. Returns a pointer to a cacheable structure @@ -646,8 +673,10 @@ class CGObjCGNUstep : public CGObjCGNU { /// Type of an slot structure pointer. This is returned by the various /// lookup functions. llvm::Type *SlotTy; + public: llvm::Constant *GetEHType(QualType T) override; + protected: llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, @@ -689,6 +718,7 @@ class CGObjCGNUstep : public CGObjCGNU { Receiver = Builder.CreateLoad(ReceiverPtr, true); return imp; } + llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { @@ -702,6 +732,7 @@ class CGObjCGNUstep : public CGObjCGNU { return Builder.CreateAlignedLoad(Builder.CreateStructGEP(nullptr, slot, 4), CGF.getPointerAlign()); } + public: CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; @@ -753,6 +784,7 @@ class CGObjCGNUstep : public CGObjCGNU { CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy, PtrTy, PtrTy, nullptr); } + llvm::Constant *GetCppAtomicObjectGetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. @@ -760,6 +792,7 @@ class CGObjCGNUstep : public CGObjCGNU { VersionTuple(1, 7)); return CxxAtomicObjectGetFn; } + llvm::Constant *GetCppAtomicObjectSetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. @@ -767,6 +800,7 @@ class CGObjCGNUstep : public CGObjCGNU { VersionTuple(1, 7)); return CxxAtomicObjectSetFn; } + llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, bool copy) override { // The optimised property functions omit the GC check, and so are not @@ -821,32 +855,29 @@ protected: llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { - CGBuilderTy &Builder = CGF.Builder; - llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper.getPointer(), - PtrToObjCSuperTy), cmd}; + CGBuilderTy &Builder = CGF.Builder; + llvm::Value *lookupArgs[] = { + EnforceType(Builder, ObjCSuper.getPointer(), PtrToObjCSuperTy), cmd, + }; - if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) - return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs); - else - return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); - } + if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) + return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs); + else + return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); + } - llvm::Value *GetClassNamed(CodeGenFunction &CGF, - const std::string &Name, bool isWeak) override { + llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name, + bool isWeak) override { if (isWeak) return CGObjCGNU::GetClassNamed(CGF, Name, isWeak); EmitClassRef(Name); - std::string SymbolName = "_OBJC_CLASS_" + Name; - llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(SymbolName); - if (!ClassSymbol) ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, SymbolName); - return ClassSymbol; } @@ -865,7 +896,6 @@ public: }; } // end anonymous namespace - /// Emits a reference to a dummy variable which is emitted with each class. /// This ensures that a linker error will be generated when trying to link /// together modules where a referenced class is not defined. @@ -1021,8 +1051,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, } llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF, - const std::string &Name, - bool isWeak) { + const std::string &Name, bool isWeak) { llvm::Constant *ClassName = MakeConstantString(Name); // With the incompatible ABI, this will need to be replaced with a direct // reference to the class symbol. For the compatible nonfragile ABI we are @@ -1044,15 +1073,48 @@ llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF, // techniques can modify the name -> class mapping. llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF, const ObjCInterfaceDecl *OID) { - return GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported()); + auto *Value = + GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) { + if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) { + auto DLLStorage = llvm::GlobalValue::DefaultStorageClass; + if (OID->hasAttr<DLLExportAttr>()) + DLLStorage = llvm::GlobalValue::DLLExportStorageClass; + else if (OID->hasAttr<DLLImportAttr>()) + DLLStorage = llvm::GlobalValue::DLLImportStorageClass; + ClassSymbol->setDLLStorageClass(DLLStorage); + } + } + return Value; } + llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) { - return GetClassNamed(CGF, "NSAutoreleasePool", false); + auto *Value = GetClassNamed(CGF, "NSAutoreleasePool", false); + if (CGM.getTriple().isOSBinFormatCOFF()) { + if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) { + IdentifierInfo &II = CGF.CGM.getContext().Idents.get("NSAutoreleasePool"); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + const VarDecl *VD = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((VD = dyn_cast<VarDecl>(Result))) + break; + + auto DLLStorage = llvm::GlobalValue::DefaultStorageClass; + if (!VD || VD->hasAttr<DLLImportAttr>()) + DLLStorage = llvm::GlobalValue::DLLImportStorageClass; + else if (VD->hasAttr<DLLExportAttr>()) + DLLStorage = llvm::GlobalValue::DLLExportStorageClass; + + ClassSymbol->setDLLStorageClass(DLLStorage); + } + } + return Value; } llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding) { - SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel]; llvm::GlobalAlias *SelValue = nullptr; @@ -1247,8 +1309,6 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, } llvm::Value *cmd = GetSelector(CGF, Sel); - - CallArgList ActualArgs; ActualArgs.add(RValue::get(EnforceType(Builder, Receiver, IdTy)), ASTIdTy); @@ -1497,21 +1557,17 @@ GenerateMethodList(StringRef ClassName, IMPTy, //Method pointer nullptr); std::vector<llvm::Constant*> Methods; - std::vector<llvm::Constant*> Elements; for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) { - Elements.clear(); llvm::Constant *Method = TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName, MethodSels[i], isClassMethodList)); assert(Method && "Can't generate metadata for method that doesn't exist"); llvm::Constant *C = MakeConstantString(MethodSels[i].getAsString()); - Elements.push_back(C); - Elements.push_back(MethodTypes[i]); Method = llvm::ConstantExpr::getBitCast(Method, IMPTy); - Elements.push_back(Method); - Methods.push_back(llvm::ConstantStruct::get(ObjCMethodTy, Elements)); + Methods.push_back( + llvm::ConstantStruct::get(ObjCMethodTy, {C, MethodTypes[i], Method})); } // Array of method structures @@ -1554,23 +1610,18 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, IntTy, nullptr); std::vector<llvm::Constant*> Ivars; - std::vector<llvm::Constant*> Elements; for (unsigned int i = 0, e = IvarNames.size() ; i < e ; i++) { - Elements.clear(); - Elements.push_back(IvarNames[i]); - Elements.push_back(IvarTypes[i]); - Elements.push_back(IvarOffsets[i]); - Ivars.push_back(llvm::ConstantStruct::get(ObjCIvarTy, Elements)); + Ivars.push_back(llvm::ConstantStruct::get( + ObjCIvarTy, {IvarNames[i], IvarTypes[i], IvarOffsets[i]})); } // Array of method structures llvm::ArrayType *ObjCIvarArrayTy = llvm::ArrayType::get(ObjCIvarTy, IvarNames.size()); - - Elements.clear(); - Elements.push_back(llvm::ConstantInt::get(IntTy, (int)IvarNames.size())); - Elements.push_back(llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars)); + llvm::Constant *Elements[] = { + llvm::ConstantInt::get(IntTy, (int)IvarNames.size()), + llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars)}; // Structure containing array and array count llvm::StructType *ObjCIvarListTy = llvm::StructType::get(IntTy, ObjCIvarArrayTy, @@ -1682,12 +1733,9 @@ GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames, PtrToInt8Ty, nullptr); std::vector<llvm::Constant*> Methods; - std::vector<llvm::Constant*> Elements; for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) { - Elements.clear(); - Elements.push_back(MethodNames[i]); - Elements.push_back(MethodTypes[i]); - Methods.push_back(llvm::ConstantStruct::get(ObjCMethodDescTy, Elements)); + Methods.push_back(llvm::ConstantStruct::get( + ObjCMethodDescTy, {MethodNames[i], MethodTypes[i]})); } llvm::ArrayType *ObjCMethodArrayTy = llvm::ArrayType::get(ObjCMethodDescTy, MethodNames.size()); @@ -1762,17 +1810,13 @@ llvm::Constant *CGObjCGNU::GenerateEmptyProtocol( MethodList->getType(), MethodList->getType(), nullptr); - std::vector<llvm::Constant*> Elements; // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. - Elements.push_back(llvm::ConstantExpr::getIntToPtr( - llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); - Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name")); - Elements.push_back(ProtocolList); - Elements.push_back(MethodList); - Elements.push_back(MethodList); - Elements.push_back(MethodList); - Elements.push_back(MethodList); + llvm::Constant *Elements[] = { + llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy), + MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList, + MethodList, MethodList, MethodList, MethodList}; return MakeGlobal(ProtocolTy, Elements, CGM.getPointerAlign(), ".objc_protocol"); } @@ -1849,7 +1893,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { // Add all of the property methods need adding to the method list and to the // property metadata list. - for (auto *property : PD->properties()) { + for (auto *property : PD->instance_properties()) { std::vector<llvm::Constant*> Fields; Fields.push_back(MakePropertyEncodingString(property, nullptr)); @@ -1920,19 +1964,14 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { PropertyList->getType(), OptionalPropertyList->getType(), nullptr); - std::vector<llvm::Constant*> Elements; // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. - Elements.push_back(llvm::ConstantExpr::getIntToPtr( - llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); - Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name")); - Elements.push_back(ProtocolList); - Elements.push_back(InstanceMethodList); - Elements.push_back(ClassMethodList); - Elements.push_back(OptionalInstanceMethodList); - Elements.push_back(OptionalClassMethodList); - Elements.push_back(PropertyList); - Elements.push_back(OptionalPropertyList); + llvm::Constant *Elements[] = { + llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy), + MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList, + InstanceMethodList, ClassMethodList, OptionalInstanceMethodList, + OptionalClassMethodList, PropertyList, OptionalPropertyList}; ExistingProtocols[ProtocolName] = llvm::ConstantExpr::getBitCast(MakeGlobal(ProtocolTy, Elements, CGM.getPointerAlign(), ".objc_protocol"), IdTy); @@ -2058,20 +2097,20 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) { E = Protos.end(); I != E; ++I) Protocols.push_back((*I)->getNameAsString()); - std::vector<llvm::Constant*> Elements; - Elements.push_back(MakeConstantString(CategoryName)); - Elements.push_back(MakeConstantString(ClassName)); - // Instance method list - Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList( - ClassName, CategoryName, InstanceMethodSels, InstanceMethodTypes, - false), PtrTy)); - // Class method list - Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList( - ClassName, CategoryName, ClassMethodSels, ClassMethodTypes, true), - PtrTy)); - // Protocol list - Elements.push_back(llvm::ConstantExpr::getBitCast( - GenerateProtocolList(Protocols), PtrTy)); + llvm::Constant *Elements[] = { + MakeConstantString(CategoryName), MakeConstantString(ClassName), + // Instance method list + llvm::ConstantExpr::getBitCast( + GenerateMethodList(ClassName, CategoryName, InstanceMethodSels, + InstanceMethodTypes, false), + PtrTy), + // Class method list + llvm::ConstantExpr::getBitCast(GenerateMethodList(ClassName, CategoryName, + ClassMethodSels, + ClassMethodTypes, true), + PtrTy), + // Protocol list + llvm::ConstantExpr::getBitCast(GenerateProtocolList(Protocols), PtrTy)}; Categories.push_back(llvm::ConstantExpr::getBitCast( MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, PtrTy, PtrTy, PtrTy, nullptr), Elements, CGM.getPointerAlign()), @@ -2167,18 +2206,19 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // Get the class name ObjCInterfaceDecl *ClassDecl = - const_cast<ObjCInterfaceDecl *>(OID->getClassInterface()); + const_cast<ObjCInterfaceDecl *>(OID->getClassInterface()); std::string ClassName = ClassDecl->getNameAsString(); + // Emit the symbol that is used to generate linker errors if this class is // referenced in other modules but not declared. std::string classSymbolName = "__objc_class_name_" + ClassName; - if (llvm::GlobalVariable *symbol = - TheModule.getGlobalVariable(classSymbolName)) { + if (auto *symbol = TheModule.getGlobalVariable(classSymbolName)) { symbol->setInitializer(llvm::ConstantInt::get(LongTy, 0)); } else { new llvm::GlobalVariable(TheModule, LongTy, false, - llvm::GlobalValue::ExternalLinkage, llvm::ConstantInt::get(LongTy, 0), - classSymbolName); + llvm::GlobalValue::ExternalLinkage, + llvm::ConstantInt::get(LongTy, 0), + classSymbolName); } // Get the size of instances. @@ -2256,7 +2296,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { MakeGlobalArray(PtrToIntTy, IvarOffsetValues, CGM.getPointerAlign(), ".ivar.offsets"); - // Collect information about instance methods SmallVector<Selector, 16> InstanceMethodSels; SmallVector<llvm::Constant*, 16> InstanceMethodTypes; @@ -2270,7 +2309,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { llvm::Constant *Properties = GeneratePropertyList(OID, InstanceMethodSels, InstanceMethodTypes); - // Collect information about class methods SmallVector<Selector, 16> ClassMethodSels; SmallVector<llvm::Constant*, 16> ClassMethodTypes; @@ -2343,19 +2381,35 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { ++ivarIndex; } llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0); + //Generate metaclass for class methods - llvm::Constant *MetaClassStruct = GenerateClassStructure(NULLPtr, - NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], GenerateIvarList( - empty, empty, empty), ClassMethodList, NULLPtr, - NULLPtr, NULLPtr, ZeroPtr, ZeroPtr, true); + llvm::Constant *MetaClassStruct = GenerateClassStructure( + NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], + GenerateIvarList(empty, empty, empty), ClassMethodList, NULLPtr, NULLPtr, + NULLPtr, ZeroPtr, ZeroPtr, true); + if (CGM.getTriple().isOSBinFormatCOFF()) { + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + cast<llvm::GlobalValue>(MetaClassStruct)->setDLLStorageClass(Storage); + } // Generate the class structure - llvm::Constant *ClassStruct = - GenerateClassStructure(MetaClassStruct, SuperClass, 0x11L, - ClassName.c_str(), nullptr, - llvm::ConstantInt::get(LongTy, instanceSize), IvarList, - MethodList, GenerateProtocolList(Protocols), IvarOffsetArray, - Properties, StrongIvarBitmap, WeakIvarBitmap); + llvm::Constant *ClassStruct = GenerateClassStructure( + MetaClassStruct, SuperClass, 0x11L, ClassName.c_str(), nullptr, + llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList, + GenerateProtocolList(Protocols), IvarOffsetArray, Properties, + StrongIvarBitmap, WeakIvarBitmap); + if (CGM.getTriple().isOSBinFormatCOFF()) { + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + cast<llvm::GlobalValue>(ClassStruct)->setDLLStorageClass(Storage); + } // Resolve the class aliases, if they exist. if (ClassPtrAlias) { @@ -2376,7 +2430,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { Classes.push_back(ClassStruct); } - llvm::Function *CGObjCGNU::ModuleInitFunction() { // Only emit an ObjC load function if no Objective-C stuff has been called if (Classes.empty() && Categories.empty() && ConstantStrings.empty() && @@ -2651,12 +2704,15 @@ llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic, llvm::Constant *CGObjCGNU::GetGetStructFunction() { return GetStructPropertyFn; } + llvm::Constant *CGObjCGNU::GetSetStructFunction() { return SetStructPropertyFn; } + llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() { return nullptr; } + llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() { return nullptr; } @@ -2685,7 +2741,6 @@ void CGObjCGNU::EmitTryStmt(CodeGenFunction &CGF, // In Objective-C++ mode, we actually emit something equivalent to the C++ // exception handler. EmitTryCatchStmt(CGF, S, EnterCatchFn, ExitCatchFn, ExceptionReThrowFn); - return ; } void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, @@ -2800,7 +2855,7 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( // to replace it with the real version for a library. In non-PIC code you // must compile with the fragile ABI if you want to use ivars from a // GCC-compiled class. - if (CGM.getLangOpts().PICLevel || CGM.getLangOpts().PIELevel) { + if (CGM.getLangOpts().PICLevel) { llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule, Int32Ty, false, llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess"); @@ -2848,7 +2903,12 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, const ObjCIvarDecl *Ivar) { if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { Interface = FindIvarInterface(CGM.getContext(), Interface, Ivar); - if (RuntimeVersion < 10) + + // The MSVC linker cannot have a single global defined as LinkOnceAnyLinkage + // and ExternalLinkage, so create a reference to the ivar global and rely on + // the definition being created as part of GenerateClass. + if (RuntimeVersion < 10 || + CGF.CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) return CGF.Builder.CreateZExtOrBitCast( CGF.Builder.CreateDefaultAlignedLoad(CGF.Builder.CreateAlignedLoad( ObjCIvarOffsetVariable(Interface, Ivar), diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp index e30b287..5ab9fc4 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp @@ -236,17 +236,14 @@ public: CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // id objc_getProperty (id, SEL, ptrdiff_t, bool) - SmallVector<CanQualType,4> Params; CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType()); CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType()); - Params.push_back(IdType); - Params.push_back(SelType); - Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); - Params.push_back(Ctx.BoolTy); + CanQualType Params[] = { + IdType, SelType, + Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(), Ctx.BoolTy}; llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - IdType, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(IdType, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_getProperty"); } @@ -254,19 +251,18 @@ public: CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_setProperty (id, SEL, ptrdiff_t, id, bool, bool) - SmallVector<CanQualType,6> Params; CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType()); CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType()); - Params.push_back(IdType); - Params.push_back(SelType); - Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); - Params.push_back(IdType); - Params.push_back(Ctx.BoolTy); - Params.push_back(Ctx.BoolTy); + CanQualType Params[] = { + IdType, + SelType, + Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(), + IdType, + Ctx.BoolTy, + Ctx.BoolTy}; llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_setProperty"); } @@ -290,9 +286,8 @@ public: Params.push_back(IdType); Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); const char *name; if (atomic && copy) name = "objc_setProperty_atomic_copy"; @@ -317,9 +312,8 @@ public: Params.push_back(Ctx.BoolTy); Params.push_back(Ctx.BoolTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct"); } @@ -336,10 +330,8 @@ public: Params.push_back(Ctx.VoidPtrTy); Params.push_back(Ctx.VoidPtrTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false, false, - Params, - FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic"); } @@ -350,12 +342,25 @@ public: SmallVector<CanQualType,1> Params; Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType())); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation"); } + llvm::Constant *getLookUpClassFn() { + CodeGen::CodeGenTypes &Types = CGM.getTypes(); + ASTContext &Ctx = CGM.getContext(); + // Class objc_lookUpClass (const char *) + SmallVector<CanQualType,1> Params; + Params.push_back( + Ctx.getCanonicalType(Ctx.getPointerType(Ctx.CharTy.withConst()))); + llvm::FunctionType *FTy = + Types.GetFunctionType(Types.arrangeBuiltinFunctionDeclaration( + Ctx.getCanonicalType(Ctx.getObjCClassType()), + Params)); + return CGM.CreateRuntimeFunction(FTy, "objc_lookUpClass"); + } + /// GcReadWeakFn -- LLVM objc_read_weak (id *src) function. llvm::Constant *getGcReadWeakFn() { // id objc_read_weak (id *) @@ -576,7 +581,6 @@ public: return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.Int32Ty, params, false), "objc_exception_match"); - } /// SetJmpFn - LLVM _setjmp function. @@ -600,7 +604,6 @@ public: /// modern abi class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper { public: - // MethodListnfABITy - LLVM for struct _method_list_t llvm::StructType *MethodListnfABITy; @@ -967,7 +970,8 @@ protected: llvm::Constant *EmitPropertyList(Twine Name, const Decl *Container, const ObjCContainerDecl *OCD, - const ObjCCommonTypesHelper &ObjCTypes); + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty); /// EmitProtocolMethodTypes - Generate the array of extended method type /// strings. The return value has type Int8PtrPtrTy. @@ -981,13 +985,20 @@ protected: SmallVectorImpl<llvm::Constant*> &Properties, const Decl *Container, const ObjCProtocolDecl *Proto, - const ObjCCommonTypesHelper &ObjCTypes); + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty); /// GetProtocolRef - Return a reference to the internal protocol /// description, creating an empty one if it has not been /// defined. The return value has type ProtocolPtrTy. llvm::Constant *GetProtocolRef(const ObjCProtocolDecl *PD); + /// Return a reference to the given Class using runtime calls rather than + /// by a symbol reference. + llvm::Value *EmitClassRefViaRuntime(CodeGenFunction &CGF, + const ObjCInterfaceDecl *ID, + ObjCCommonTypesHelper &ObjCTypes); + public: /// CreateMetadataVar - Create a global variable with internal /// linkage for use by the Objective-C runtime. @@ -1079,7 +1090,8 @@ private: /// has type ClassExtensionPtrTy. llvm::Constant *EmitClassExtension(const ObjCImplementationDecl *ID, CharUnits instanceSize, - bool hasMRCWeakIvars); + bool hasMRCWeakIvars, + bool isClassProperty); /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy, /// for the given class. @@ -1119,9 +1131,8 @@ private: /// EmitMethodList - Emit the method list for the given /// implementation. The return value has type MethodListPtrTy. - llvm::Constant *EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods); + llvm::Constant *EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods); /// EmitMethodDescList - Emit a method description list for a list of /// method declarations. @@ -1134,9 +1145,8 @@ private: /// - begin, end: The method list to output. /// /// The return value has type MethodDescriptionListPtrTy. - llvm::Constant *EmitMethodDescList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods); + llvm::Constant *EmitMethodDescList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods); /// GetOrEmitProtocol - Get the protocol object for the given /// declaration, emitting it if necessary. The return value has type @@ -1255,7 +1265,7 @@ public: /// GetClassGlobal - Return the global variable for the Objective-C /// class of the given name. - llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) override { llvm_unreachable("CGObjCMac::GetClassGlobal"); } @@ -1293,9 +1303,8 @@ private: /// AddModuleClassList - Add the given list of class pointers to the /// module with the provided symbol and section names. - void AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container, - const char *SymbolName, - const char *SectionName); + void AddModuleClassList(ArrayRef<llvm::GlobalValue *> Container, + StringRef SymbolName, StringRef SectionName); llvm::GlobalVariable * BuildClassRoTInitializer(unsigned flags, unsigned InstanceStart, @@ -1314,9 +1323,8 @@ private: /// EmitMethodList - Emit the method list for the given /// implementation. The return value has type MethodListnfABITy. - llvm::Constant *EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods); + llvm::Constant *EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods); /// EmitIvarList - Emit the ivar list for the given /// implementation. If ForClass is true the list of class ivars /// (i.e. metaclass ivars) is emitted, otherwise the list of @@ -1357,7 +1365,7 @@ private: /// GetClassGlobal - Return the global variable for the Objective-C /// class of the given name. - llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) override; /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy, @@ -1398,13 +1406,9 @@ private: llvm::Constant *GetInterfaceEHType(const ObjCInterfaceDecl *ID, bool ForDefinition); - const char *getMetaclassSymbolPrefix() const { - return "OBJC_METACLASS_$_"; - } + StringRef getMetaclassSymbolPrefix() const { return "OBJC_METACLASS_$_"; } - const char *getClassSymbolPrefix() const { - return "OBJC_CLASS_$_"; - } + StringRef getClassSymbolPrefix() const { return "OBJC_CLASS_$_"; } void GetClassSizeInfo(const ObjCImplementationDecl *OID, uint32_t &InstanceStart, @@ -1506,12 +1510,15 @@ public: llvm::Constant *GetSetStructFunction() override { return ObjCTypes.getCopyStructFn(); } + llvm::Constant *GetGetStructFunction() override { return ObjCTypes.getCopyStructFn(); } + llvm::Constant *GetCppAtomicObjectSetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } + llvm::Constant *GetCppAtomicObjectGetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } @@ -1934,7 +1941,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, // Emit a null-check if there's a consumed argument other than the receiver. bool RequiresNullCheck = false; if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) { - for (const auto *ParamDecl : Method->params()) { + for (const auto *ParamDecl : Method->parameters()) { if (ParamDecl->hasAttr<NSConsumedAttr>()) { if (!nullReturn.NullBB) nullReturn.init(CGF, Arg0); @@ -2027,6 +2034,7 @@ namespace { bool IsDisordered = false; llvm::SmallVector<IvarInfo, 8> IvarsInfo; + public: IvarLayoutBuilder(CodeGenModule &CGM, CharUnits instanceBegin, CharUnits instanceEnd, bool forStrongLayout) @@ -2062,7 +2070,7 @@ namespace { printf("\n"); } }; -} +} // end anonymous namespace llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM, const CGBlockInfo &blockInfo) { @@ -2141,7 +2149,6 @@ void IvarLayoutBuilder::visitBlock(const CGBlockInfo &blockInfo) { } } - /// getBlockCaptureLifetime - This routine returns life time of the captured /// block variable for the purpose of block layout meta-data generation. FQT is /// the type of the variable captured in the block. @@ -2629,7 +2636,6 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM, return getBitmapBlockLayout(false); } - llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM, QualType T) { assert(CGM.getLangOpts().getGC() == LangOptions::NonGC); @@ -2677,6 +2683,25 @@ llvm::Constant *CGObjCCommonMac::GetProtocolRef(const ObjCProtocolDecl *PD) { return GetOrEmitProtocolRef(PD); } +llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime( + CodeGenFunction &CGF, + const ObjCInterfaceDecl *ID, + ObjCCommonTypesHelper &ObjCTypes) { + llvm::Constant *lookUpClassFn = ObjCTypes.getLookUpClassFn(); + + llvm::Value *className = + CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString()) + .getPointer(); + ASTContext &ctx = CGF.CGM.getContext(); + className = + CGF.Builder.CreateBitCast(className, + CGF.ConvertType( + ctx.getPointerType(ctx.CharTy.withConst()))); + llvm::CallInst *call = CGF.Builder.CreateCall(lookUpClassFn, className); + call->setDoesNotThrow(); + return call; +} + /* // Objective-C 1.0 extensions struct _objc_protocol { @@ -2798,6 +2823,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) { struct objc_method_description_list *optional_class_methods; struct objc_property_list *instance_properties; const char ** extendedMethodTypes; + struct objc_property_list *class_properties; }; */ llvm::Constant * @@ -2816,13 +2842,16 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD, "__OBJC,__cat_cls_meth,regular,no_dead_strip", OptClassMethods), EmitPropertyList("OBJC_$_PROP_PROTO_LIST_" + PD->getName(), nullptr, PD, - ObjCTypes), + ObjCTypes, false), EmitProtocolMethodTypes("OBJC_PROTOCOL_METHOD_TYPES_" + PD->getName(), - MethodTypesExt, ObjCTypes)}; + MethodTypesExt, ObjCTypes), + EmitPropertyList("OBJC_$_CLASS_PROP_PROTO_LIST_" + PD->getName(), nullptr, + PD, ObjCTypes, true)}; // Return null if no extension bits are used. if (Values[1]->isNullValue() && Values[2]->isNullValue() && - Values[3]->isNullValue() && Values[4]->isNullValue()) + Values[3]->isNullValue() && Values[4]->isNullValue() && + Values[5]->isNullValue()) return llvm::Constant::getNullValue(ObjCTypes.ProtocolExtensionPtrTy); llvm::Constant *Init = @@ -2878,10 +2907,15 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet, SmallVectorImpl<llvm::Constant *> &Properties, const Decl *Container, const ObjCProtocolDecl *Proto, - const ObjCCommonTypesHelper &ObjCTypes) { + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty) { for (const auto *P : Proto->protocols()) - PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes); + PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes, + IsClassProperty); + for (const auto *PD : Proto->properties()) { + if (IsClassProperty != PD->isClassProperty()) + continue; if (!PropertySet.insert(PD->getIdentifier()).second) continue; llvm::Constant *Prop[] = { @@ -2907,7 +2941,17 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet, llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, const Decl *Container, const ObjCContainerDecl *OCD, - const ObjCCommonTypesHelper &ObjCTypes) { + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty) { + if (IsClassProperty) { + // Make this entry NULL for OS X with deployment target < 10.11, for iOS + // with deployment target < 9.0. + const llvm::Triple &Triple = CGM.getTarget().getTriple(); + if ((Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 11)) || + (Triple.isiOS() && Triple.isOSVersionLT(9))) + return llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + } + SmallVector<llvm::Constant *, 16> Properties; llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet; @@ -2919,10 +2963,15 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) for (const ObjCCategoryDecl *ClassExt : OID->known_extensions()) for (auto *PD : ClassExt->properties()) { + if (IsClassProperty != PD->isClassProperty()) + continue; PropertySet.insert(PD->getIdentifier()); AddProperty(PD); } + for (const auto *PD : OCD->properties()) { + if (IsClassProperty != PD->isClassProperty()) + continue; // Don't emit duplicate metadata for properties that were already in a // class extension. if (!PropertySet.insert(PD->getIdentifier()).second) @@ -2932,11 +2981,13 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) { for (const auto *P : OID->all_referenced_protocols()) - PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes); + PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes, + IsClassProperty); } else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD)) { for (const auto *P : CD->protocols()) - PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes); + PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes, + IsClassProperty); } // Return null for empty list. @@ -3001,8 +3052,8 @@ CGObjCMac::GetMethodDescriptionConstant(const ObjCMethodDecl *MD) { } llvm::Constant * -CGObjCMac::EmitMethodDescList(Twine Name, const char *Section, - ArrayRef<llvm::Constant*> Methods) { +CGObjCMac::EmitMethodDescList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods) { // Return null for empty list. if (Methods.empty()) return llvm::Constant::getNullValue(ObjCTypes.MethodDescriptionListPtrTy); @@ -3029,6 +3080,7 @@ CGObjCMac::EmitMethodDescList(Twine Name, const char *Section, struct _objc_protocol_list *protocols; uint32_t size; // <rdar://4585769> struct _objc_property_list *instance_properties; + struct _objc_property_list *class_properties; }; */ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { @@ -3055,7 +3107,7 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { // Class methods should always be defined. ClassMethods.push_back(GetMethodConstant(I)); - llvm::Constant *Values[7]; + llvm::Constant *Values[8]; Values[0] = GetClassName(OCD->getName()); Values[1] = GetClassName(Interface->getObjCRuntimeNameAsString()); LazySymbols.insert(Interface->getIdentifier()); @@ -3077,9 +3129,12 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { // If there is no category @interface then there can be no properties. if (Category) { Values[6] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(), - OCD, Category, ObjCTypes); + OCD, Category, ObjCTypes, false); + Values[7] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), + OCD, Category, ObjCTypes, true); } else { Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + Values[7] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); } llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.CategoryTy, @@ -3274,7 +3329,8 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { Values[ 8] = llvm::Constant::getNullValue(ObjCTypes.CachePtrTy); Values[ 9] = Protocols; Values[10] = BuildStrongIvarLayout(ID, CharUnits::Zero(), Size); - Values[11] = EmitClassExtension(ID, Size, hasMRCWeak); + Values[11] = EmitClassExtension(ID, Size, hasMRCWeak, + false/*isClassProperty*/); llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy, Values); std::string Name("OBJC_CLASS_"); @@ -3338,8 +3394,9 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID, Values[ 9] = Protocols; // ivar_layout for metaclass is always NULL. Values[10] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy); - // The class extension is always unused for metaclasses. - Values[11] = llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy); + // The class extension is used to store class properties for metaclasses. + Values[11] = EmitClassExtension(ID, CharUnits::Zero(), false/*hasMRCWeak*/, + true/*isClassProperty*/); llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy, Values); @@ -3413,19 +3470,28 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) { */ llvm::Constant * CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID, - CharUnits InstanceSize, bool hasMRCWeakIvars) { + CharUnits InstanceSize, bool hasMRCWeakIvars, + bool isClassProperty) { uint64_t Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassExtensionTy); llvm::Constant *Values[3]; Values[0] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size); - Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize, - hasMRCWeakIvars); - Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(), - ID, ID->getClassInterface(), ObjCTypes); + if (isClassProperty) { + llvm::Type *PtrTy = CGM.Int8PtrTy; + Values[1] = llvm::Constant::getNullValue(PtrTy); + } else + Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize, + hasMRCWeakIvars); + if (isClassProperty) + Values[2] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getName(), + ID, ID->getClassInterface(), ObjCTypes, true); + else + Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(), + ID, ID->getClassInterface(), ObjCTypes, false); // Return null if no extension bits are used. - if (Values[1]->isNullValue() && Values[2]->isNullValue()) + if ((!Values[1] || Values[1]->isNullValue()) && Values[2]->isNullValue()) return llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy); llvm::Constant *Init = @@ -3530,9 +3596,8 @@ llvm::Constant *CGObjCMac::GetMethodConstant(const ObjCMethodDecl *MD) { return llvm::ConstantStruct::get(ObjCTypes.MethodTy, Method); } -llvm::Constant *CGObjCMac::EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods) { +llvm::Constant *CGObjCMac::EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods) { // Return null for empty list. if (Methods.empty()) return llvm::Constant::getNullValue(ObjCTypes.MethodListPtrTy); @@ -3607,6 +3672,7 @@ llvm::Constant *CGObjCMac::GetOptimizedPropertySetFunction(bool atomic, llvm::Constant *CGObjCMac::GetGetStructFunction() { return ObjCTypes.getCopyStructFn(); } + llvm::Constant *CGObjCMac::GetSetStructFunction() { return ObjCTypes.getCopyStructFn(); } @@ -3614,6 +3680,7 @@ llvm::Constant *CGObjCMac::GetSetStructFunction() { llvm::Constant *CGObjCMac::GetCppAtomicObjectGetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } + llvm::Constant *CGObjCMac::GetCppAtomicObjectSetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } @@ -3711,7 +3778,7 @@ namespace { void emitWriteHazard(); void emitHazardsInNewBlocks(); }; -} +} // end anonymous namespace /// Create the fragile-ABI read and write hazards based on the current /// state of the function, which is presumed to be immediately prior @@ -4332,7 +4399,6 @@ void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF, llvm::Value *args[] = { src, dst.getPointer() }; CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(), args, "weakassign"); - return; } /// EmitObjCGlobalAssign - Code gen for assigning to a __strong object. @@ -4358,7 +4424,6 @@ void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF, else CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignThreadLocalFn(), args, "threadlocalassign"); - return; } /// EmitObjCIvarAssign - Code gen for assigning to a __strong object. @@ -4380,7 +4445,6 @@ void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF, dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy); llvm::Value *args[] = { src, dst.getPointer(), ivarOffset }; CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args); - return; } /// EmitObjCStrongCastAssign - Code gen for assigning to a __strong cast object. @@ -4401,7 +4465,6 @@ void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF, llvm::Value *args[] = { src, dst.getPointer() }; CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(), args, "strongassign"); - return; } void CGObjCMac::EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, @@ -4455,7 +4518,8 @@ enum ImageInfoFlags { // A flag indicating that the module has no instances of a @synthesize of a // superclass variable. <rdar://problem/6803242> eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang. - eImageInfo_ImageIsSimulated = (1 << 5) + eImageInfo_ImageIsSimulated = (1 << 5), + eImageInfo_ClassProperties = (1 << 6) }; void CGObjCCommonMac::EmitImageInfo() { @@ -4507,6 +4571,10 @@ void CGObjCCommonMac::EmitImageInfo() { Triple.getArch() == llvm::Triple::x86_64)) Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated", eImageInfo_ImageIsSimulated); + + // Indicate whether we are generating class properties. + Mod.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties", + eImageInfo_ClassProperties); } // struct objc_module { @@ -4602,6 +4670,11 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF, llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF, const ObjCInterfaceDecl *ID) { + // If the class has the objc_runtime_visible attribute, we need to + // use the Objective-C runtime to get the class. + if (ID->hasAttr<ObjCRuntimeVisibleAttr>()) + return EmitClassRefViaRuntime(CGF, ID, ObjCTypes); + return EmitClassRefFromId(CGF, ID->getIdentifier()); } @@ -4954,7 +5027,7 @@ CGObjCCommonMac::BuildIvarLayout(const ObjCImplementationDecl *OMD, baseOffset = CharUnits::Zero(); } - baseOffset = baseOffset.RoundUpToAlignment(CGM.getPointerAlign()); + baseOffset = baseOffset.alignTo(CGM.getPointerAlign()); } else { CGM.getContext().DeepCollectObjCIvars(OI, true, ivars); @@ -5131,9 +5204,8 @@ void CGObjCMac::FinishModule() { } CGObjCNonFragileABIMac::CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm) - : CGObjCCommonMac(cgm), - ObjCTypes(cgm) { - ObjCEmptyCacheVar = ObjCEmptyVtableVar = nullptr; + : CGObjCCommonMac(cgm), ObjCTypes(cgm), ObjCEmptyCacheVar(nullptr), + ObjCEmptyVtableVar(nullptr) { ObjCABI = 2; } @@ -5223,7 +5295,6 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_cache * CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache"); CachePtrTy = llvm::PointerType::getUnqual(CacheTy); - } ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) @@ -5256,12 +5327,13 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_method_description_list *optional_class_methods; // struct _objc_property_list *instance_properties; // const char ** extendedMethodTypes; + // struct _objc_property_list *class_properties; // } ProtocolExtensionTy = llvm::StructType::create("struct._objc_protocol_extension", IntTy, MethodDescriptionListPtrTy, MethodDescriptionListPtrTy, PropertyListPtrTy, - Int8PtrPtrTy, nullptr); + Int8PtrPtrTy, PropertyListPtrTy, nullptr); // struct _objc_protocol_extension * ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy); @@ -5359,14 +5431,17 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *class_name; // struct _objc_method_list *instance_method; // struct _objc_method_list *class_method; + // struct _objc_protocol_list *protocols; // uint32_t size; // sizeof(struct _objc_category) // struct _objc_property_list *instance_properties;// category's @property + // struct _objc_property_list *class_properties; // } CategoryTy = llvm::StructType::create("struct._objc_category", Int8PtrTy, Int8PtrTy, MethodListPtrTy, MethodListPtrTy, ProtocolListPtrTy, - IntTy, PropertyListPtrTy, nullptr); + IntTy, PropertyListPtrTy, PropertyListPtrTy, + nullptr); // Global metadata structures @@ -5405,7 +5480,6 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) llvm::StructType::create("struct._objc_exception_data", llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize), StackPtrTy, nullptr); - } ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm) @@ -5434,6 +5508,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const uint32_t flags; // = 0 // const char ** extendedMethodTypes; // const char *demangledName; + // const struct _prop_list_t * class_properties; // } // Holder for struct _protocol_list_t * @@ -5446,7 +5521,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, - Int8PtrTy, + Int8PtrTy, PropertyListPtrTy, nullptr); // struct _protocol_t* @@ -5539,6 +5614,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const struct _method_list_t * const class_methods; // const struct _protocol_list_t * const protocols; // const struct _prop_list_t * const properties; + // const struct _prop_list_t * const class_properties; + // const uint32_t size; // } CategorynfABITy = llvm::StructType::create("struct._category_t", Int8PtrTy, ClassnfABIPtrTy, @@ -5546,6 +5623,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, PropertyListPtrTy, + PropertyListPtrTy, + IntTy, nullptr); // New types for nonfragile abi messaging. @@ -5609,10 +5688,9 @@ llvm::Function *CGObjCNonFragileABIMac::ModuleInitFunction() { return nullptr; } -void CGObjCNonFragileABIMac:: -AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container, - const char *SymbolName, - const char *SectionName) { +void CGObjCNonFragileABIMac::AddModuleClassList( + ArrayRef<llvm::GlobalValue *> Container, StringRef SymbolName, + StringRef SectionName) { unsigned NumClasses = Container.size(); if (!NumClasses) @@ -5814,13 +5892,16 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer( if (flags & NonFragileABI_Class_Meta) { Values[ 7] = llvm::Constant::getNullValue(ObjCTypes.IvarListnfABIPtrTy); Values[ 8] = GetIvarLayoutName(nullptr, ObjCTypes); - Values[ 9] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + Values[ 9] = EmitPropertyList( + "\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), + ID, ID->getClassInterface(), ObjCTypes, true); } else { Values[ 7] = EmitIvarList(ID); Values[ 8] = BuildWeakIvarLayout(ID, beginInstance, endInstance, hasMRCWeak); - Values[ 9] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), - ID, ID->getClassInterface(), ObjCTypes); + Values[ 9] = EmitPropertyList( + "\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), + ID, ID->getClassInterface(), ObjCTypes, false); } llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassRonfABITy, Values); @@ -5870,8 +5951,9 @@ llvm::GlobalVariable *CGObjCNonFragileABIMac::BuildClassMetaData( GV->setSection("__DATA, __objc_data"); GV->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy)); - if (HiddenVisibility) - GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (!CGM.getTriple().isOSBinFormatCOFF()) + if (HiddenVisibility) + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); return GV; } @@ -5896,49 +5978,60 @@ void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID, InstanceStart = RL.getFieldOffset(0) / CGM.getContext().getCharWidth(); } +static llvm::GlobalValue::DLLStorageClassTypes getStorage(CodeGenModule &CGM, + StringRef Name) { + IdentifierInfo &II = CGM.getContext().Idents.get(Name); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + const VarDecl *VD = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((VD = dyn_cast<VarDecl>(Result))) + break; + + if (!VD) + return llvm::GlobalValue::DLLImportStorageClass; + if (VD->hasAttr<DLLExportAttr>()) + return llvm::GlobalValue::DLLExportStorageClass; + if (VD->hasAttr<DLLImportAttr>()) + return llvm::GlobalValue::DLLImportStorageClass; + return llvm::GlobalValue::DefaultStorageClass; +} + void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { - std::string ClassName = ID->getObjCRuntimeNameAsString(); if (!ObjCEmptyCacheVar) { - ObjCEmptyCacheVar = new llvm::GlobalVariable( - CGM.getModule(), - ObjCTypes.CacheTy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, - "_objc_empty_cache"); - - // Make this entry NULL for any iOS device target, any iOS simulator target, - // OS X with deployment target 10.9 or later. + ObjCEmptyCacheVar = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.CacheTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + "_objc_empty_cache"); + if (CGM.getTriple().isOSBinFormatCOFF()) + ObjCEmptyCacheVar->setDLLStorageClass(getStorage(CGM, "_objc_empty_cache")); + + // Only OS X with deployment version <10.9 use the empty vtable symbol const llvm::Triple &Triple = CGM.getTarget().getTriple(); - if (Triple.isiOS() || Triple.isWatchOS() || - (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9))) - // This entry will be null. - ObjCEmptyVtableVar = nullptr; - else - ObjCEmptyVtableVar = new llvm::GlobalVariable( - CGM.getModule(), - ObjCTypes.ImpnfABITy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, - "_objc_empty_vtable"); - } - assert(ID->getClassInterface() && - "CGObjCNonFragileABIMac::GenerateClass - class is 0"); + if (Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 9)) + ObjCEmptyVtableVar = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ImpnfABITy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + "_objc_empty_vtable"); + } + // FIXME: Is this correct (that meta class size is never computed)? uint32_t InstanceStart = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassnfABITy); uint32_t InstanceSize = InstanceStart; uint32_t flags = NonFragileABI_Class_Meta; - llvm::SmallString<64> ObjCMetaClassName(getMetaclassSymbolPrefix()); - llvm::SmallString<64> ObjCClassName(getClassSymbolPrefix()); - llvm::SmallString<64> TClassName; llvm::GlobalVariable *SuperClassGV, *IsAGV; + StringRef ClassName = ID->getObjCRuntimeNameAsString(); + const auto *CI = ID->getClassInterface(); + assert(CI && "CGObjCNonFragileABIMac::GenerateClass - class is 0"); + // Build the flags for the metaclass. - bool classIsHidden = - ID->getClassInterface()->getVisibility() == HiddenVisibility; + bool classIsHidden = (CGM.getTriple().isOSBinFormatCOFF()) + ? !CI->hasAttr<DLLExportAttr>() + : CI->getVisibility() == HiddenVisibility; if (classIsHidden) flags |= NonFragileABI_Class_Hidden; @@ -5947,45 +6040,59 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { if (ID->hasNonZeroConstructors() || ID->hasDestructors()) { flags |= NonFragileABI_Class_HasCXXStructors; if (!ID->hasNonZeroConstructors()) - flags |= NonFragileABI_Class_HasCXXDestructorOnly; + flags |= NonFragileABI_Class_HasCXXDestructorOnly; } - if (!ID->getClassInterface()->getSuperClass()) { + if (!CI->getSuperClass()) { // class is root flags |= NonFragileABI_Class_Root; - TClassName = ObjCClassName; - TClassName += ClassName; - SuperClassGV = GetClassGlobal(TClassName.str(), - ID->getClassInterface()->isWeakImported()); - TClassName = ObjCMetaClassName; - TClassName += ClassName; - IsAGV = GetClassGlobal(TClassName.str(), - ID->getClassInterface()->isWeakImported()); + + SuperClassGV = GetClassGlobal((getClassSymbolPrefix() + ClassName).str(), + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLImportAttr>()) + SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + + IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + ClassName).str(), + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLImportAttr>()) + IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } else { // Has a root. Current class is not a root. const ObjCInterfaceDecl *Root = ID->getClassInterface(); while (const ObjCInterfaceDecl *Super = Root->getSuperClass()) Root = Super; - TClassName = ObjCMetaClassName ; - TClassName += Root->getObjCRuntimeNameAsString(); - IsAGV = GetClassGlobal(TClassName.str(), + + const auto *Super = CI->getSuperClass(); + StringRef RootClassName = Root->getObjCRuntimeNameAsString(); + StringRef SuperClassName = Super->getObjCRuntimeNameAsString(); + + IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + RootClassName).str(), Root->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (Root->hasAttr<DLLImportAttr>()) + IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); // work on super class metadata symbol. - TClassName = ObjCMetaClassName; - TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString(); - SuperClassGV = GetClassGlobal( - TClassName.str(), - ID->getClassInterface()->getSuperClass()->isWeakImported()); - } - llvm::GlobalVariable *CLASS_RO_GV = BuildClassRoTInitializer(flags, - InstanceStart, - InstanceSize,ID); - TClassName = ObjCMetaClassName; - TClassName += ClassName; - llvm::GlobalVariable *MetaTClass = BuildClassMetaData( - TClassName.str(), IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden, - ID->getClassInterface()->isWeakImported()); + SuperClassGV = + GetClassGlobal((getMetaclassSymbolPrefix() + SuperClassName).str(), + Super->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (Super->hasAttr<DLLImportAttr>()) + SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + + llvm::GlobalVariable *CLASS_RO_GV = + BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID); + + llvm::GlobalVariable *MetaTClass = + BuildClassMetaData((getMetaclassSymbolPrefix() + ClassName).str(), IsAGV, + SuperClassGV, CLASS_RO_GV, classIsHidden, + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLExportAttr>()) + MetaTClass->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); DefinedMetaClasses.push_back(MetaTClass); // Metadata for the class @@ -6006,34 +6113,38 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { flags |= NonFragileABI_Class_HasCXXDestructorOnly; } - if (hasObjCExceptionAttribute(CGM.getContext(), ID->getClassInterface())) + if (hasObjCExceptionAttribute(CGM.getContext(), CI)) flags |= NonFragileABI_Class_Exception; - if (!ID->getClassInterface()->getSuperClass()) { + if (!CI->getSuperClass()) { flags |= NonFragileABI_Class_Root; SuperClassGV = nullptr; } else { // Has a root. Current class is not a root. - TClassName = ObjCClassName; - TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString(); - SuperClassGV = GetClassGlobal( - TClassName.str(), - ID->getClassInterface()->getSuperClass()->isWeakImported()); + const auto *Super = CI->getSuperClass(); + StringRef SuperClassName = Super->getObjCRuntimeNameAsString(); + + SuperClassGV = + GetClassGlobal((getClassSymbolPrefix() + SuperClassName).str(), + Super->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (Super->hasAttr<DLLImportAttr>()) + SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } + GetClassSizeInfo(ID, InstanceStart, InstanceSize); - CLASS_RO_GV = BuildClassRoTInitializer(flags, - InstanceStart, - InstanceSize, - ID); + CLASS_RO_GV = + BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID); - TClassName = ObjCClassName; - TClassName += ClassName; llvm::GlobalVariable *ClassMD = - BuildClassMetaData(TClassName.str(), MetaTClass, SuperClassGV, CLASS_RO_GV, - classIsHidden, - ID->getClassInterface()->isWeakImported()); + BuildClassMetaData((getClassSymbolPrefix() + ClassName).str(), MetaTClass, + SuperClassGV, CLASS_RO_GV, classIsHidden, + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLExportAttr>()) + ClassMD->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); DefinedClasses.push_back(ClassMD); - ImplementedClasses.push_back(ID->getClassInterface()); + ImplementedClasses.push_back(CI); // Determine if this class is also "non-lazy". if (ImplementationIsNonLazy(ID)) @@ -6041,7 +6152,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { // Force the definition of the EHType if necessary. if (flags & NonFragileABI_Class_Exception) - GetInterfaceEHType(ID->getClassInterface(), true); + GetInterfaceEHType(CI, true); // Make sure method definition entries are all clear for next implementation. MethodDefinitions.clear(); } @@ -6093,6 +6204,8 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, /// const struct _method_list_t * const class_methods; /// const struct _protocol_list_t * const protocols; /// const struct _prop_list_t * const properties; +/// const struct _prop_list_t * const class_properties; +/// const uint32_t size; /// } /// void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { @@ -6107,7 +6220,7 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { llvm::SmallString<64> ExtClassName(getClassSymbolPrefix()); ExtClassName += Interface->getObjCRuntimeNameAsString(); - llvm::Constant *Values[6]; + llvm::Constant *Values[8]; Values[0] = GetClassName(OCD->getIdentifier()->getName()); // meta-class entry symbol llvm::GlobalVariable *ClassGV = @@ -6156,12 +6269,18 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { Category->protocol_begin(), Category->protocol_end()); Values[5] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(), - OCD, Category, ObjCTypes); + OCD, Category, ObjCTypes, false); + Values[6] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), + OCD, Category, ObjCTypes, true); } else { Values[4] = llvm::Constant::getNullValue(ObjCTypes.ProtocolListnfABIPtrTy); Values[5] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); } + unsigned Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy); + Values[7] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size); + llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.CategorynfABITy, Values); @@ -6210,9 +6329,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetMethodConstant( /// } /// llvm::Constant * -CGObjCNonFragileABIMac::EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods) { +CGObjCNonFragileABIMac::EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods) { // Return null for empty list. if (Methods.empty()) return llvm::Constant::getNullValue(ObjCTypes.MethodListnfABIPtrTy); @@ -6242,18 +6360,28 @@ CGObjCNonFragileABIMac::EmitMethodList(Twine Name, llvm::GlobalVariable * CGObjCNonFragileABIMac::ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) { - const ObjCInterfaceDecl *Container = Ivar->getContainingInterface(); llvm::SmallString<64> Name("OBJC_IVAR_$_"); Name += Container->getObjCRuntimeNameAsString(); Name += "."; Name += Ivar->getName(); - llvm::GlobalVariable *IvarOffsetGV = - CGM.getModule().getGlobalVariable(Name); - if (!IvarOffsetGV) - IvarOffsetGV = new llvm::GlobalVariable( - CGM.getModule(), ObjCTypes.IvarOffsetVarTy, false, - llvm::GlobalValue::ExternalLinkage, nullptr, Name.str()); + llvm::GlobalVariable *IvarOffsetGV = CGM.getModule().getGlobalVariable(Name); + if (!IvarOffsetGV) { + IvarOffsetGV = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.IvarOffsetVarTy, + false, llvm::GlobalValue::ExternalLinkage, + nullptr, Name.str()); + if (CGM.getTriple().isOSBinFormatCOFF()) { + bool IsPrivateOrPackage = + Ivar->getAccessControl() == ObjCIvarDecl::Private || + Ivar->getAccessControl() == ObjCIvarDecl::Package; + + if (ID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage) + IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + else if (ID->hasAttr<DLLImportAttr>()) + IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + } return IvarOffsetGV; } @@ -6267,14 +6395,17 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID, IvarOffsetGV->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy)); - // FIXME: This matches gcc, but shouldn't the visibility be set on the use as - // well (i.e., in ObjCIvarOffsetVariable). - if (Ivar->getAccessControl() == ObjCIvarDecl::Private || - Ivar->getAccessControl() == ObjCIvarDecl::Package || - ID->getVisibility() == HiddenVisibility) - IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility); - else - IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility); + if (!CGM.getTriple().isOSBinFormatCOFF()) { + // FIXME: This matches gcc, but shouldn't the visibility be set on the use + // as well (i.e., in ObjCIvarOffsetVariable). + if (Ivar->getAccessControl() == ObjCIvarDecl::Private || + Ivar->getAccessControl() == ObjCIvarDecl::Package || + ID->getVisibility() == HiddenVisibility) + IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility); + else + IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility); + } + IvarOffsetGV->setSection("__DATA, __objc_ivar"); return IvarOffsetGV; } @@ -6361,7 +6492,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( const ObjCProtocolDecl *PD) { llvm::GlobalVariable *&Entry = Protocols[PD->getIdentifier()]; - if (!Entry) { + if (!Entry) // We use the initializer as a marker of whether this is a forward // reference or not. At module finalization we add the empty // contents for protocols which were referenced but never defined. @@ -6370,8 +6501,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( false, llvm::GlobalValue::ExternalLinkage, nullptr, "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString()); - Entry->setSection("__DATA,__datacoal_nt,coalesced"); - } return Entry; } @@ -6391,6 +6520,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( /// const uint32_t flags; // = 0 /// const char ** extendedMethodTypes; /// const char *demangledName; +/// const struct _prop_list_t * class_properties; /// } /// @endcode /// @@ -6442,7 +6572,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( MethodTypesExt.insert(MethodTypesExt.end(), OptMethodTypesExt.begin(), OptMethodTypesExt.end()); - llvm::Constant *Values[12]; + llvm::Constant *Values[13]; // isa is NULL Values[0] = llvm::Constant::getNullValue(ObjCTypes.ObjectPtrTy); Values[1] = GetClassName(PD->getObjCRuntimeNameAsString()); @@ -6466,8 +6596,9 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( + PD->getObjCRuntimeNameAsString(), "__DATA, __objc_const", OptClassMethods); - Values[7] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), - nullptr, PD, ObjCTypes); + Values[7] = EmitPropertyList( + "\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), + nullptr, PD, ObjCTypes, false); uint32_t Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ProtocolnfABITy); Values[8] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size); @@ -6477,6 +6608,10 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( MethodTypesExt, ObjCTypes); // const char *demangledName; Values[11] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy); + + Values[12] = EmitPropertyList( + "\01l_OBJC_$_CLASS_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), + nullptr, PD, ObjCTypes, true); llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ProtocolnfABITy, Values); @@ -6492,7 +6627,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString()); Entry->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABITy)); - Entry->setSection("__DATA,__datacoal_nt,coalesced"); Protocols[PD->getIdentifier()] = Entry; } @@ -6640,7 +6774,7 @@ static void appendSelectorForMessageRefTable(std::string &buffer, } } -/// Emit a "v-table" message send. We emit a weak hidden-visibility +/// Emit a "vtable" message send. We emit a weak hidden-visibility /// struct, initially containing the selector pointer and a pointer to /// a "fixup" variant of the appropriate objc_msgSend. To call, we /// load and call the function pointer, passing the address of the @@ -6734,7 +6868,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, bool requiresnullCheck = false; if (CGM.getLangOpts().ObjCAutoRefCount && method) - for (const auto *ParamDecl : method->params()) { + for (const auto *ParamDecl : method->parameters()) { if (ParamDecl->hasAttr<NSConsumedAttr>()) { if (!nullReturn.NullBB) nullReturn.init(CGF, arg0); @@ -6783,7 +6917,7 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF, } llvm::GlobalVariable * -CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name, bool Weak) { +CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, bool Weak) { llvm::GlobalValue::LinkageTypes L = Weak ? llvm::GlobalValue::ExternalWeakLinkage : llvm::GlobalValue::ExternalLinkage; @@ -6806,9 +6940,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, llvm::GlobalVariable *&Entry = ClassReferences[II]; if (!Entry) { - std::string ClassName( - getClassSymbolPrefix() + - (ID ? ID->getObjCRuntimeNameAsString() : II->getName()).str()); + StringRef Name = ID ? ID->getObjCRuntimeNameAsString() : II->getName(); + std::string ClassName = (getClassSymbolPrefix() + Name).str(); llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, Weak); Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false, llvm::GlobalValue::PrivateLinkage, @@ -6822,6 +6955,11 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF, const ObjCInterfaceDecl *ID) { + // If the class has the objc_runtime_visible attribute, we need to + // use the Objective-C runtime to get the class. + if (ID->hasAttr<ObjCRuntimeVisibleAttr>()) + return EmitClassRefViaRuntime(CGF, ID, ObjCTypes); + return EmitClassRefFromId(CGF, ID->getIdentifier(), ID->isWeakImported(), ID); } @@ -7100,27 +7238,28 @@ CGObjCNonFragileABIMac::EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF, llvm::Constant * CGObjCNonFragileABIMac::GetEHType(QualType T) { // There's a particular fixed type info for 'id'. - if (T->isObjCIdType() || - T->isObjCQualifiedIdType()) { - llvm::Constant *IDEHType = - CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id"); - if (!IDEHType) + if (T->isObjCIdType() || T->isObjCQualifiedIdType()) { + auto *IDEHType = CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id"); + if (!IDEHType) { IDEHType = - new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, "OBJC_EHTYPE_id"); + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + "OBJC_EHTYPE_id"); + if (CGM.getTriple().isOSBinFormatCOFF()) + IDEHType->setDLLStorageClass(getStorage(CGM, "OBJC_EHTYPE_id")); + } return IDEHType; } // All other types should be Objective-C interface pointer types. - const ObjCObjectPointerType *PT = - T->getAs<ObjCObjectPointerType>(); + const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>(); assert(PT && "Invalid @catch type."); + const ObjCInterfaceType *IT = PT->getInterfaceType(); assert(IT && "Invalid @catch type."); + return GetInterfaceEHType(IT->getDecl(), false); -} +} void CGObjCNonFragileABIMac::EmitTryStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtTryStmt &S) { @@ -7153,6 +7292,7 @@ llvm::Constant * CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, bool ForDefinition) { llvm::GlobalVariable * &Entry = EHTypeReferences[ID->getIdentifier()]; + StringRef ClassName = ID->getObjCRuntimeNameAsString(); // If we don't need a definition, return the entry if found or check // if we use an external reference. @@ -7162,38 +7302,43 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, // If this type (or a super class) has the __objc_exception__ // attribute, emit an external reference. - if (hasObjCExceptionAttribute(CGM.getContext(), ID)) - return Entry = - new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, - llvm::GlobalValue::ExternalLinkage, - nullptr, - ("OBJC_EHTYPE_$_" + - ID->getObjCRuntimeNameAsString())); + if (hasObjCExceptionAttribute(CGM.getContext(), ID)) { + std::string EHTypeName = ("OBJC_EHTYPE_$_" + ClassName).str(); + Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, + false, llvm::GlobalValue::ExternalLinkage, + nullptr, EHTypeName); + if (CGM.getTriple().isOSBinFormatCOFF()) { + if (ID->hasAttr<DLLExportAttr>()) + Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + else if (ID->hasAttr<DLLImportAttr>()) + Entry->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + return Entry; + } } - // Otherwise we need to either make a new entry or fill in the - // initializer. + // Otherwise we need to either make a new entry or fill in the initializer. assert((!Entry || !Entry->hasInitializer()) && "Duplicate EHType definition"); - llvm::SmallString<64> ClassName(getClassSymbolPrefix()); - ClassName += ID->getObjCRuntimeNameAsString(); + std::string VTableName = "objc_ehtype_vtable"; - llvm::GlobalVariable *VTableGV = - CGM.getModule().getGlobalVariable(VTableName); - if (!VTableGV) - VTableGV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, VTableName); + auto *VTableGV = CGM.getModule().getGlobalVariable(VTableName); + if (!VTableGV) { + VTableGV = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + VTableName); + if (CGM.getTriple().isOSBinFormatCOFF()) + VTableGV->setDLLStorageClass(getStorage(CGM, VTableName)); + } llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2); - llvm::Constant *Values[] = { llvm::ConstantExpr::getGetElementPtr(VTableGV->getValueType(), VTableGV, VTableIdx), GetClassName(ID->getObjCRuntimeNameAsString()), - GetClassGlobal(ClassName.str())}; - llvm::Constant *Init = - llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values); + GetClassGlobal((getClassSymbolPrefix() + ClassName).str()), + }; + llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values); llvm::GlobalValue::LinkageTypes L = ForDefinition ? llvm::GlobalValue::ExternalLinkage @@ -7201,24 +7346,25 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, if (Entry) { Entry->setInitializer(Init); } else { - llvm::SmallString<64> EHTYPEName("OBJC_EHTYPE_$_"); - EHTYPEName += ID->getObjCRuntimeNameAsString(); - Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, - L, - Init, - EHTYPEName.str()); + Entry = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, L, + Init, ("OBJC_EHTYPE_$_" + ClassName).str()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (hasObjCExceptionAttribute(CGM.getContext(), ID)) + if (ID->hasAttr<DLLExportAttr>()) + Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } assert(Entry->getLinkage() == L); - if (ID->getVisibility() == HiddenVisibility) - Entry->setVisibility(llvm::GlobalValue::HiddenVisibility); - Entry->setAlignment(CGM.getDataLayout().getABITypeAlignment( - ObjCTypes.EHTypeTy)); + if (!CGM.getTriple().isOSBinFormatCOFF()) + if (ID->getVisibility() == HiddenVisibility) + Entry->setVisibility(llvm::GlobalValue::HiddenVisibility); + + const auto &DL = CGM.getDataLayout(); + Entry->setAlignment(DL.getABITypeAlignment(ObjCTypes.EHTypeTy)); if (ForDefinition) Entry->setSection("__DATA,__objc_const"); - else - Entry->setSection("__DATA,__datacoal_nt,coalesced"); return Entry; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp index 7be9ae9..0caf6d9 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -120,9 +120,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth(); uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign(); uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext()); - CharUnits StorageSize = - CGF.CGM.getContext().toCharUnitsFromBits( - llvm::RoundUpToAlignment(BitOffset + BitFieldSize, AlignmentBits)); + CharUnits StorageSize = CGF.CGM.getContext().toCharUnitsFromBits( + llvm::alignTo(BitOffset + BitFieldSize, AlignmentBits)); CharUnits Alignment = CGF.CGM.getContext().toCharUnitsFromBits(AlignmentBits); // Allocate a new CGBitFieldInfo object to describe this access. @@ -364,25 +363,15 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, llvm::PointerType *signatureType = CGM.getTypes().GetFunctionType(signature)->getPointerTo(); - // If that's not variadic, there's no need to recompute the ABI - // arrangement. - if (!signature.isVariadic()) - return MessageSendInfo(signature, signatureType); + const CGFunctionInfo &signatureForCall = + CGM.getTypes().arrangeCall(signature, callArgs); - // Otherwise, there is. - FunctionType::ExtInfo einfo = signature.getExtInfo(); - const CGFunctionInfo &argsInfo = - CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, einfo, - signature.getRequiredArgs()); - - return MessageSendInfo(argsInfo, signatureType); + return MessageSendInfo(signatureForCall, signatureType); } // There's no method; just use a default CC. const CGFunctionInfo &argsInfo = - CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, - FunctionType::ExtInfo(), - RequiredArgs::All); + CGM.getTypes().arrangeUnprototypedObjCMessageSend(resultType, callArgs); // Derive the signature to call from that. llvm::PointerType *signatureType = diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h index 28d88dd..6c33059 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h @@ -280,7 +280,7 @@ public: virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM, QualType T) = 0; - virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + virtual llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) = 0; struct MessageSendInfo { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp index 6866789..38aebea 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -40,46 +40,12 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { default: llvm_unreachable("Unexpected opencl builtin type!"); return nullptr; - case BuiltinType::OCLImage1d: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image1d_t"), ImgAddrSpc); - case BuiltinType::OCLImage1dArray: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image1d_array_t"), ImgAddrSpc); - case BuiltinType::OCLImage1dBuffer: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image1d_buffer_t"), ImgAddrSpc); - case BuiltinType::OCLImage2d: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image2d_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dArray: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image2d_array_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dDepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_depth_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dArrayDepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_array_depth_t"), +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: \ + return llvm::PointerType::get( \ + llvm::StructType::create(Ctx, "opencl." #ImgType "_" #Suffix "_t"), \ ImgAddrSpc); - case BuiltinType::OCLImage2dMSAA: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_msaa_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dArrayMSAA: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_t"), - ImgAddrSpc); - case BuiltinType::OCLImage2dMSAADepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_msaa_depth_t"), - ImgAddrSpc); - case BuiltinType::OCLImage2dArrayMSAADepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_depth_t"), - ImgAddrSpc); - case BuiltinType::OCLImage3d: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image3d_t"), ImgAddrSpc); +#include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: return llvm::IntegerType::get(Ctx, 32); case BuiltinType::OCLEvent: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5cfacac..6a0edbe 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -72,6 +72,8 @@ public: /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} + CGOpenMPRegionKind getRegionKind() const { return RegionKind; } OpenMPDirectiveKind getDirectiveKind() const { return Kind; } @@ -82,6 +84,8 @@ public: return Info->getKind() == CR_OpenMP; } + ~CGOpenMPRegionInfo() override = default; + protected: CGOpenMPRegionKind RegionKind; RegionCodeGenTy CodeGen; @@ -90,7 +94,7 @@ protected: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, @@ -100,6 +104,7 @@ public: ThreadIDVar(ThreadIDVar) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } + /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } @@ -120,16 +125,65 @@ private: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: + class UntiedTaskActionTy final : public PrePostActionTy { + bool Untied; + const VarDecl *PartIDVar; + const RegionCodeGenTy UntiedCodeGen; + llvm::SwitchInst *UntiedSwitch = nullptr; + + public: + UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, + const RegionCodeGenTy &UntiedCodeGen) + : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} + void Enter(CodeGenFunction &CGF) override { + if (Untied) { + // Emit task switching point. + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs<PointerType>()); + auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); + auto *DoneBB = CGF.createBasicBlock(".untied.done."); + UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); + CGF.EmitBlock(DoneBB); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(0), + CGF.Builder.GetInsertBlock()); + emitUntiedSwitch(CGF); + } + } + void emitUntiedSwitch(CodeGenFunction &CGF) const { + if (Untied) { + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs<PointerType>()); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + PartIdLVal); + UntiedCodeGen(CGF); + CodeGenFunction::JumpDest CurPoint = + CGF.getJumpDestInCurrentScope(".untied.next."); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + CGF.Builder.GetInsertBlock()); + CGF.EmitBranchThroughCleanup(CurPoint); + CGF.EmitBlock(CurPoint.getBlock()); + } + } + unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } + }; CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) + OpenMPDirectiveKind Kind, bool HasCancel, + const UntiedTaskActionTy &Action) : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), - ThreadIDVar(ThreadIDVar) { + ThreadIDVar(ThreadIDVar), Action(Action) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } + /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } @@ -140,6 +194,10 @@ public: /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + Action.emitUntiedSwitch(CGF); + } + static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == @@ -150,6 +208,8 @@ private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; + /// Action for emitting code for untied tasks. + const UntiedTaskActionTy &Action; }; /// \brief API for inlined captured statement code generation in OpenMP @@ -162,12 +222,14 @@ public: : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), OldCSI(OldCSI), OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} + // \brief Retrieve the value of the context parameter. llvm::Value *getContextValue() const override { if (OuterRegionInfo) return OuterRegionInfo->getContextValue(); llvm_unreachable("No context value for inlined OpenMP region"); } + void setContextValue(llvm::Value *V) override { if (OuterRegionInfo) { OuterRegionInfo->setContextValue(V); @@ -175,6 +237,7 @@ public: } llvm_unreachable("No context value for inlined OpenMP region"); } + /// \brief Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) @@ -183,11 +246,13 @@ public: // captured variables, we can use the original one. return nullptr; } + FieldDecl *getThisFieldDecl() const override { if (OuterRegionInfo) return OuterRegionInfo->getThisFieldDecl(); return nullptr; } + /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { @@ -203,6 +268,11 @@ public: llvm_unreachable("No helper name for inlined OpenMP construct"); } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + if (OuterRegionInfo) + OuterRegionInfo->emitUntiedSwitch(CGF); + } + CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -210,6 +280,8 @@ public: cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; } + ~CGOpenMPInlinedRegionInfo() override = default; + private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; @@ -221,7 +293,7 @@ private: /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has /// the information to generate that. -class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, const RegionCodeGenTy &CodeGen, StringRef HelperName) @@ -245,9 +317,75 @@ private: StringRef HelperName; }; +static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { + llvm_unreachable("No codegen for expressions"); +} +/// \brief API for generation of expressions captured in a innermost OpenMP +/// region. +class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { +public: + CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) + : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, + OMPD_unknown, + /*HasCancel=*/false), + PrivScope(CGF) { + // Make sure the globals captured in the provided statement are local by + // using the privatization logic. We assume the same variable is not + // captured more than once. + for (auto &C : CS.captures()) { + if (!C.capturesVariable() && !C.capturesVariableByCopy()) + continue; + + const VarDecl *VD = C.getCapturedVar(); + if (VD->isLocalVarDeclOrParm()) + continue; + + DeclRefExpr DRE(const_cast<VarDecl *>(VD), + /*RefersToEnclosingVariableOrCapture=*/false, + VD->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); + PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } + (void)PrivScope.Privatize(); + } + + /// \brief Lookup the captured field decl for a variable. + const FieldDecl *lookup(const VarDecl *VD) const override { + if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) + return FD; + return nullptr; + } + + /// \brief Emit the captured statement body. + void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { + llvm_unreachable("No body for expressions"); + } + + /// \brief Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override { + llvm_unreachable("No thread id for expressions"); + } + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { + llvm_unreachable("No helper name for expressions"); + } + + static bool classof(const CGCapturedStmtInfo *Info) { return false; } + +private: + /// Private scope to capture global variables. + CodeGenFunction::OMPPrivateScope PrivScope; +}; + /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; + llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; + FieldDecl *LambdaThisCaptureField = nullptr; public: /// \brief Constructs region for combined constructs. @@ -260,30 +398,306 @@ public: // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + LambdaThisCaptureField = CGF.LambdaThisCaptureField; + CGF.LambdaThisCaptureField = nullptr; } + ~InlinedOpenMPRegionRAII() { // Restore original CapturedStmtInfo only if we're done with code emission. auto *OldCSI = cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); delete CGF.CapturedStmtInfo; CGF.CapturedStmtInfo = OldCSI; + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + CGF.LambdaThisCaptureField = LambdaThisCaptureField; + } +}; + +/// \brief Values for bit flags used in the ident_t to describe the fields. +/// All enumeric elements are named and described in accordance with the code +/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +enum OpenMPLocationFlags { + /// \brief Use trampoline for internal microtask. + OMP_IDENT_IMD = 0x01, + /// \brief Use c-style ident structure. + OMP_IDENT_KMPC = 0x02, + /// \brief Atomic reduction option for kmpc_reduce. + OMP_ATOMIC_REDUCE = 0x10, + /// \brief Explicit 'barrier' directive. + OMP_IDENT_BARRIER_EXPL = 0x20, + /// \brief Implicit barrier in code. + OMP_IDENT_BARRIER_IMPL = 0x40, + /// \brief Implicit barrier in 'for' directive. + OMP_IDENT_BARRIER_IMPL_FOR = 0x40, + /// \brief Implicit barrier in 'sections' directive. + OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, + /// \brief Implicit barrier in 'single' directive. + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 +}; + +/// \brief Describes ident structure that describes a source location. +/// All descriptions are taken from +/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +/// Original structure: +/// typedef struct ident { +/// kmp_int32 reserved_1; /**< might be used in Fortran; +/// see above */ +/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; +/// KMP_IDENT_KMPC identifies this union +/// member */ +/// kmp_int32 reserved_2; /**< not really used in Fortran any more; +/// see above */ +///#if USE_ITT_BUILD +/// /* but currently used for storing +/// region-specific ITT */ +/// /* contextual information. */ +///#endif /* USE_ITT_BUILD */ +/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for +/// C++ */ +/// char const *psource; /**< String describing the source location. +/// The string is composed of semi-colon separated +// fields which describe the source file, +/// the function and a pair of line numbers that +/// delimit the construct. +/// */ +/// } ident_t; +enum IdentFieldIndex { + /// \brief might be used in Fortran + IdentField_Reserved_1, + /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. + IdentField_Flags, + /// \brief Not really used in Fortran any more + IdentField_Reserved_2, + /// \brief Source[4] in Fortran, do not use for C++ + IdentField_Reserved_3, + /// \brief String describing the source location. The string is composed of + /// semi-colon separated fields which describe the source file, the function + /// and a pair of line numbers that delimit the construct. + IdentField_PSource +}; + +/// \brief Schedule types for 'omp for' loops (these enumerators are taken from +/// the enum sched_type in kmp.h). +enum OpenMPSchedType { + /// \brief Lower bound for default (unordered) versions. + OMP_sch_lower = 32, + OMP_sch_static_chunked = 33, + OMP_sch_static = 34, + OMP_sch_dynamic_chunked = 35, + OMP_sch_guided_chunked = 36, + OMP_sch_runtime = 37, + OMP_sch_auto = 38, + /// static with chunk adjustment (e.g., simd) + OMP_sch_static_balanced_chunked = 45, + /// \brief Lower bound for 'ordered' versions. + OMP_ord_lower = 64, + OMP_ord_static_chunked = 65, + OMP_ord_static = 66, + OMP_ord_dynamic_chunked = 67, + OMP_ord_guided_chunked = 68, + OMP_ord_runtime = 69, + OMP_ord_auto = 70, + OMP_sch_default = OMP_sch_static, + /// \brief dist_schedule types + OMP_dist_sch_static_chunked = 91, + OMP_dist_sch_static = 92, + /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. + /// Set if the monotonic schedule modifier was present. + OMP_sch_modifier_monotonic = (1 << 29), + /// Set if the nonmonotonic schedule modifier was present. + OMP_sch_modifier_nonmonotonic = (1 << 30), +}; + +enum OpenMPRTLFunction { + /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// kmpc_micro microtask, ...); + OMPRTL__kmpc_fork_call, + /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, + /// kmp_int32 global_tid, void *data, size_t size, void ***cache); + OMPRTL__kmpc_threadprivate_cached, + /// \brief Call to void __kmpc_threadprivate_register( ident_t *, + /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); + OMPRTL__kmpc_threadprivate_register, + // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); + OMPRTL__kmpc_global_thread_num, + // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *crit); + OMPRTL__kmpc_critical, + // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 + // global_tid, kmp_critical_name *crit, uintptr_t hint); + OMPRTL__kmpc_critical_with_hint, + // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *crit); + OMPRTL__kmpc_end_critical, + // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_cancel_barrier, + // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, + // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_for_static_fini, + // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_serialized_parallel, + // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_end_serialized_parallel, + // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads); + OMPRTL__kmpc_push_num_threads, + // Call to void __kmpc_flush(ident_t *loc); + OMPRTL__kmpc_flush, + // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_master, + // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_end_master, + // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, + // int end_part); + OMPRTL__kmpc_omp_taskyield, + // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_single, + // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_end_single, + // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + OMPRTL__kmpc_omp_task_alloc, + // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * + // new_task); + OMPRTL__kmpc_omp_task, + // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, + // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), + // kmp_int32 didit); + OMPRTL__kmpc_copyprivate, + // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void + // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); + OMPRTL__kmpc_reduce, + // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 + // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, + // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name + // *lck); + OMPRTL__kmpc_reduce_nowait, + // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + OMPRTL__kmpc_end_reduce, + // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + OMPRTL__kmpc_end_reduce_nowait, + // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t * new_task); + OMPRTL__kmpc_omp_task_begin_if0, + // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t * new_task); + OMPRTL__kmpc_omp_task_complete_if0, + // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_ordered, + // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_end_ordered, + // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_omp_taskwait, + // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_taskgroup, + // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_end_taskgroup, + // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + // int proc_bind); + OMPRTL__kmpc_push_proc_bind, + // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 + // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t + // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); + OMPRTL__kmpc_omp_task_with_deps, + // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 + // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); + OMPRTL__kmpc_omp_wait_deps, + // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 + // global_tid, kmp_int32 cncl_kind); + OMPRTL__kmpc_cancellationpoint, + // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 cncl_kind); + OMPRTL__kmpc_cancel, + // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_teams, kmp_int32 thread_limit); + OMPRTL__kmpc_push_num_teams, + // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); + OMPRTL__kmpc_fork_teams, + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + OMPRTL__kmpc_taskloop, + // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 + // num_dims, struct kmp_dim *dims); + OMPRTL__kmpc_doacross_init, + // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); + OMPRTL__kmpc_doacross_fini, + // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + OMPRTL__kmpc_doacross_post, + // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + OMPRTL__kmpc_doacross_wait, + + // + // Offloading related calls + // + // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t + // *arg_types); + OMPRTL__tgt_target, + // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams, + // Call to void __tgt_register_lib(__tgt_bin_desc *desc); + OMPRTL__tgt_register_lib, + // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); + OMPRTL__tgt_unregister_lib, + // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_begin, + // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_end, + // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_update, +}; + +/// A basic class for pre|post-action for advanced codegen sequence for OpenMP +/// region. +class CleanupTy final : public EHScopeStack::Cleanup { + PrePostActionTy *Action; + +public: + explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + Action->Exit(CGF); } }; } // anonymous namespace -static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, - QualType Ty) { - AlignmentSource Source; - CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); - return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), - Ty->getPointeeType(), Source); +void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { + CodeGenFunction::RunCleanupsScope Scope(CGF); + if (PrePostAction) { + CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); + Callback(CodeGen, CGF, *PrePostAction); + } else { + PrePostActionTy Action; + Callback(CodeGen, CGF, Action); + } } LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { - return emitLoadOfPointerLValue(CGF, - CGF.GetAddrOfLocalVar(getThreadIDVariable()), - getThreadIDVariable()->getType()); + return CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(getThreadIDVariable()), + getThreadIDVariable()->getType()->castAs<PointerType>()); } void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { @@ -295,10 +709,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CGF.EHStack.pushTerminate(); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CodeGen(CGF); - } + CodeGen(CGF); CGF.EHStack.popTerminate(); } @@ -310,16 +721,11 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr), - OffloadEntriesInfoManager(CGM) { + : CGM(CGM), OffloadEntriesInfoManager(CGM) { IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, CGM.Int8PtrTy /* psource */, nullptr); - // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) - llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), - llvm::PointerType::getUnqual(CGM.Int32Ty)}; - Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); @@ -329,6 +735,90 @@ void CGOpenMPRuntime::clear() { InternalVars.clear(); } +static llvm::Function * +emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, + const Expr *CombinerInitializer, const VarDecl *In, + const VarDecl *Out, bool IsCombiner) { + // void .omp_combiner.(Ty *in, Ty *out); + auto &C = CGM.getContext(); + QualType PtrTy = C.getPointerType(Ty).withRestrict(); + FunctionArgList Args; + ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), + /*Id=*/nullptr, PtrTy); + ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), + /*Id=*/nullptr, PtrTy); + Args.push_back(&OmpOutParm); + Args.push_back(&OmpInParm); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create( + FnTy, llvm::GlobalValue::InternalLinkage, + IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Fn->addFnAttr(llvm::Attribute::AlwaysInline); + CodeGenFunction CGF(CGM); + // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. + // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CodeGenFunction::OMPPrivateScope Scope(CGF); + Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); + Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { + return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) + .getAddress(); + }); + Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); + Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { + return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) + .getAddress(); + }); + (void)Scope.Privatize(); + CGF.EmitIgnoredExpr(CombinerInitializer); + Scope.ForceCleanup(); + CGF.FinishFunction(); + return Fn; +} + +void CGOpenMPRuntime::emitUserDefinedReduction( + CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { + if (UDRMap.count(D) > 0) + return; + auto &C = CGM.getContext(); + if (!In || !Out) { + In = &C.Idents.get("omp_in"); + Out = &C.Idents.get("omp_out"); + } + llvm::Function *Combiner = emitCombinerOrInitializer( + CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), + cast<VarDecl>(D->lookup(Out).front()), + /*IsCombiner=*/true); + llvm::Function *Initializer = nullptr; + if (auto *Init = D->getInitializer()) { + if (!Priv || !Orig) { + Priv = &C.Idents.get("omp_priv"); + Orig = &C.Idents.get("omp_orig"); + } + Initializer = emitCombinerOrInitializer( + CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), + cast<VarDecl>(D->lookup(Priv).front()), + /*IsCombiner=*/false); + } + UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); + if (CGF) { + auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); + Decls.second.push_back(D); + } +} + +std::pair<llvm::Function *, llvm::Function *> +CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { + auto I = UDRMap.find(D); + if (I != UDRMap.end()) + return I->second; + emitUserDefinedReduction(/*CGF=*/nullptr, D); + return UDRMap.lookup(D); +} + // Layout information for ident_t. static CharUnits getIdentAlign(CodeGenModule &CGM) { return CGM.getPointerAlign(); @@ -337,18 +827,18 @@ static CharUnits getIdentSize(CodeGenModule &CGM) { assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); return CharUnits::fromQuantity(16) + CGM.getPointerSize(); } -static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { +static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { // All the fields except the last are i32, so this works beautifully. return unsigned(Field) * CharUnits::fromQuantity(4); } static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, - CGOpenMPRuntime::IdentFieldIndex Field, + IdentFieldIndex Field, const llvm::Twine &Name = "") { auto Offset = getOffsetOfIdentField(Field); return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); } -llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( +llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { assert(ThreadIDVar->getType()->isPointerType() && @@ -370,19 +860,39 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts) { + auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, + PrePostActionTy &) { + auto *ThreadID = getThreadID(CGF, D.getLocStart()); + auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *TaskArgs[] = { + UpLoc, ThreadID, + CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), + TaskTVar->getType()->castAs<PointerType>()) + .getPointer()}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); + }; + CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, + UntiedCodeGen); + CodeGen.setAction(Action); assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + auto *TD = dyn_cast<OMPTaskDirective>(&D); CodeGenFunction CGF(CGM, true); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, - cast<OMPTaskDirective>(D).hasCancel()); + TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateCapturedStmtFunction(*CS); + auto *Res = CGF.GenerateCapturedStmtFunction(*CS); + if (!Tied) + NumberOfParts = Action.getNumberOfParts(); + return Res; } -Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { +Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { CharUnits Align = getIdentAlign(CGM); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { @@ -399,7 +909,7 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { auto DefaultOpenMPLocation = new llvm::GlobalVariable( CGM.getModule(), IdentTy, /*isConstant*/ true, llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); - DefaultOpenMPLocation->setUnnamedAddr(true); + DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); DefaultOpenMPLocation->setAlignment(Align.getQuantity()); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); @@ -415,9 +925,10 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPLocationFlags Flags) { + unsigned Flags) { + Flags |= OMP_IDENT_KMPC; // If no debug info is generated - return global default location. - if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || + if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || Loc.isInvalid()) return getOrCreateDefaultLocation(Flags).getPointer(); @@ -517,20 +1028,34 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); if (OpenMPLocThreadIDMap.count(CGF.CurFn)) OpenMPLocThreadIDMap.erase(CGF.CurFn); + if (FunctionUDRMap.count(CGF.CurFn) > 0) { + for(auto *D : FunctionUDRMap[CGF.CurFn]) { + UDRMap.erase(D); + } + FunctionUDRMap.erase(CGF.CurFn); + } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { + if (!IdentTy) { + } return llvm::PointerType::getUnqual(IdentTy); } llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { + if (!Kmpc_MicroTy) { + // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) + llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), + llvm::PointerType::getUnqual(CGM.Int32Ty)}; + Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); + } return llvm::PointerType::getUnqual(Kmpc_MicroTy); } llvm::Constant * -CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { +CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Constant *RTLFn = nullptr; - switch (Function) { + switch (static_cast<OpenMPRTLFunction>(Function)) { case OMPRTL__kmpc_fork_call: { // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro // microtask, ...); @@ -927,6 +1452,86 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; } + case OMPRTL__kmpc_push_num_teams: { + // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, + // kmp_int32 num_teams, kmp_int32 num_threads) + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); + break; + } + case OMPRTL__kmpc_fork_teams: { + // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + getKmpc_MicroPointerTy()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); + break; + } + case OMPRTL__kmpc_taskloop: { + // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.IntTy, + CGM.VoidPtrTy, + CGM.IntTy, + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty, + CGM.IntTy, + CGM.IntTy, + CGM.Int64Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); + break; + } + case OMPRTL__kmpc_doacross_init: { + // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 + // num_dims, struct kmp_dim *dims); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); + break; + } + case OMPRTL__kmpc_doacross_fini: { + // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); + break; + } + case OMPRTL__kmpc_doacross_post: { + // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); + break; + } + case OMPRTL__kmpc_doacross_wait: { + // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -943,6 +1548,24 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_teams: { + // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -963,30 +1586,53 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); break; } + case OMPRTL__tgt_target_data_begin: { + // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); + break; + } + case OMPRTL__tgt_target_data_end: { + // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); + break; + } + case OMPRTL__tgt_target_data_update: { + // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); + break; + } } + assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; } -static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { - auto &C = CGF.getContext(); - llvm::Value *Size = nullptr; - auto SizeInChars = C.getTypeSizeInChars(Ty); - if (SizeInChars.isZero()) { - // getTypeSizeInChars() returns 0 for a VLA. - while (auto *VAT = C.getAsVariableArrayType(Ty)) { - llvm::Value *ArraySize; - std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); - Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; - } - SizeInChars = C.getTypeSizeInChars(Ty); - assert(!SizeInChars.isZero()); - Size = CGF.Builder.CreateNUWMul( - Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); - } else - Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); - return Size; -} - llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && @@ -1144,9 +1790,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), - /*isVariadic=*/false); + auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + CGM.getContext().VoidPtrTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_ctor_.", FI, Loc); @@ -1176,14 +1821,16 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), - /*isVariadic=*/false); + auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + CGM.getContext().VoidTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_dtor_.", FI, Loc); + auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, SourceLocation()); + // Create a scope with an artificial location for the body of this function. + auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); auto ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); @@ -1251,12 +1898,10 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // the condition and the dead arm of the if/else. bool CondConstant; if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { - CodeGenFunction::RunCleanupsScope Scope(CGF); - if (CondConstant) { + if (CondConstant) ThenGen(CGF); - } else { + else ElseGen(CGF); - } return; } @@ -1269,26 +1914,16 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // Emit the 'then' code. CGF.EmitBlock(ThenBlock); - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ThenGen(CGF); - } + ThenGen(CGF); CGF.EmitBranch(ContBlock); // Emit the 'else' code if present. - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ElseBlock); - } - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ElseGen(CGF); - } - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBranch(ContBlock); - } + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ElseBlock); + ElseGen(CGF); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBranch(ContBlock); // Emit the continuation block for code after the if. CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } @@ -1300,34 +1935,36 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); - auto &&ThenGen = [this, OutlinedFn, CapturedVars, - RTLoc](CodeGenFunction &CGF) { + auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, + PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); + auto &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars - CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; + CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; llvm::SmallVector<llvm::Value *, 16> RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); + auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; - auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, - Loc](CodeGenFunction &CGF) { - auto ThreadID = getThreadID(CGF, Loc); + auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); + auto ThreadID = RT.getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), - Args); + CGF.EmitRuntimeCall( + RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); // OutlinedFn(>id, &zero, CapturedStruct); - auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); + auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); @@ -1336,15 +1973,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); - llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; + llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); + RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), + EndArgs); }; - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); - ThenGen(CGF); + else { + RegionCodeGenTy ThenRCG(ThenGen); + ThenRCG(CGF); } } @@ -1397,20 +2035,39 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { } namespace { -template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { - llvm::Value *Callee; - llvm::Value *Args[N]; +/// Common pre(post)-action for different OpenMP constructs. +class CommonActionTy final : public PrePostActionTy { + llvm::Value *EnterCallee; + ArrayRef<llvm::Value *> EnterArgs; + llvm::Value *ExitCallee; + ArrayRef<llvm::Value *> ExitArgs; + bool Conditional; + llvm::BasicBlock *ContBlock = nullptr; public: - CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) - : Callee(Callee) { - assert(CleanupArgs.size() == N); - std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); + CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, + llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, + bool Conditional = false) + : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), + ExitArgs(ExitArgs), Conditional(Conditional) {} + void Enter(CodeGenFunction &CGF) override { + llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); + if (Conditional) { + llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); + auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); + ContBlock = CGF.createBasicBlock("omp_if.end"); + // Generate the branch (If-stmt) + CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); + CGF.EmitBlock(ThenBlock); + } } - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(Callee, Args); + void Done(CodeGenFunction &CGF) { + // Emit the rest of blocks/branches + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + void Exit(CodeGenFunction &CGF) override { + CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; } // anonymous namespace @@ -1425,45 +2082,22 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, // Prepare arguments and build a call to __kmpc_critical if (!CGF.HaveInsertPoint()) return; - CodeGenFunction::RunCleanupsScope Scope(CGF); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), getCriticalRegionLock(CriticalName)}; + llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), + std::end(Args)); if (Hint) { - llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args), - std::end(Args)); - auto *HintVal = CGF.EmitScalarExpr(Hint); - ArgsWithHint.push_back( - CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), - ArgsWithHint); - } else - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); - // Build a call to __kmpc_end_critical - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), - llvm::makeArrayRef(Args)); + EnterArgs.push_back(CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); + } + CommonActionTy Action( + createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint + : OMPRTL__kmpc_critical), + EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); + CriticalOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } -static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, - OpenMPDirectiveKind Kind, SourceLocation Loc, - const RegionCodeGenTy &BodyOpGen) { - llvm::Value *CallBool = CGF.EmitScalarConversion( - IfCond, - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), - CGF.getContext().BoolTy, Loc); - - auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); - auto *ContBlock = CGF.createBasicBlock("omp_if.end"); - // Generate the branch (If-stmt) - CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); - CGF.EmitBlock(ThenBlock); - CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); - // Emit the rest of bblocks/branches - CGF.EmitBranch(ContBlock); - CGF.EmitBlock(ContBlock, true); -} - void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) { @@ -1475,18 +2109,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, // } // Prepare arguments and build a call to __kmpc_master llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsMaster = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - MasterCallEndCleanup; - emitIfStmt( - CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<MasterCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), - llvm::makeArrayRef(Args)); - MasterOpGen(CGF); - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, + createRuntimeFunction(OMPRTL__kmpc_end_master), Args, + /*Conditional=*/true); + MasterOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_master, MasterOpGen); + Action.Done(CGF); } void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, @@ -1498,6 +2126,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); + if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, @@ -1509,16 +2139,12 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, // TaskgroupOpGen(); // __kmpc_end_taskgroup(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_taskgroup - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); - // Build a call to __kmpc_end_taskgroup - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), - llvm::makeArrayRef(Args)); - emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); - } + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, + createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), + Args); + TaskgroupOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); } /// Given an array of pointers to variables, project the address of a @@ -1549,9 +2175,7 @@ static llvm::Value *emitCopyprivateCopyFunction( C.VoidPtrTy); Args.push_back(&LHSArg); Args.push_back(&RHSArg); - FunctionType::ExtInfo EI; - auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, Args, EI, /*isVariadic=*/false); + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, ".omp.copyprivate.copy_func", &CGM.getModule()); @@ -1616,22 +2240,16 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsSingle = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - SingleCallEndCleanup; - emitIfStmt( - CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<SingleCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), - llvm::makeArrayRef(Args)); - SingleOpGen(CGF); - if (DidIt.isValid()) { - // did_it = 1; - CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); - } - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, + createRuntimeFunction(OMPRTL__kmpc_end_single), Args, + /*Conditional=*/true); + SingleOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_single, SingleOpGen); + if (DidIt.isValid()) { + // did_it = 1; + CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); + } + Action.Done(CGF); // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, // <copy_func>, did_it); if (DidIt.isValid()) { @@ -1655,7 +2273,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, auto *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); - auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy); + auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, CGF.VoidPtrTy); @@ -1681,14 +2299,14 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered - CodeGenFunction::RunCleanupsScope Scope(CGF); if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); - // Build a call to __kmpc_end_ordered - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), - llvm::makeArrayRef(Args)); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, + createRuntimeFunction(OMPRTL__kmpc_end_ordered), + Args); + OrderedOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); + return; } emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } @@ -1700,21 +2318,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, return; // Build call __kmpc_cancel_barrier(loc, thread_id); // Build call __kmpc_barrier(loc, thread_id); - OpenMPLocationFlags Flags = OMP_IDENT_KMPC; - if (Kind == OMPD_for) { - Flags = - static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); - } else if (Kind == OMPD_sections) { - Flags = static_cast<OpenMPLocationFlags>(Flags | - OMP_IDENT_BARRIER_IMPL_SECTIONS); - } else if (Kind == OMPD_single) { - Flags = - static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); - } else if (Kind == OMPD_barrier) { - Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); - } else { - Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); - } + unsigned Flags; + if (Kind == OMPD_for) + Flags = OMP_IDENT_BARRIER_IMPL_FOR; + else if (Kind == OMPD_sections) + Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; + else if (Kind == OMPD_single) + Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; + else if (Kind == OMPD_barrier) + Flags = OMP_IDENT_BARRIER_EXPL; + else + Flags = OMP_IDENT_BARRIER_IMPL; // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), @@ -1745,28 +2359,6 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); } -/// \brief Schedule types for 'omp for' loops (these enumerators are taken from -/// the enum sched_type in kmp.h). -enum OpenMPSchedType { - /// \brief Lower bound for default (unordered) versions. - OMP_sch_lower = 32, - OMP_sch_static_chunked = 33, - OMP_sch_static = 34, - OMP_sch_dynamic_chunked = 35, - OMP_sch_guided_chunked = 36, - OMP_sch_runtime = 37, - OMP_sch_auto = 38, - /// \brief Lower bound for 'ordered' versions. - OMP_ord_lower = 64, - OMP_ord_static_chunked = 65, - OMP_ord_static = 66, - OMP_ord_dynamic_chunked = 67, - OMP_ord_guided_chunked = 68, - OMP_ord_runtime = 69, - OMP_ord_auto = 70, - OMP_sch_default = OMP_sch_static, -}; - /// \brief Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { @@ -1789,12 +2381,26 @@ static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, llvm_unreachable("Unexpected runtime schedule"); } +/// \brief Map the OpenMP distribute schedule to the runtime enumeration. +static OpenMPSchedType +getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { + // only static is allowed for dist_schedule + return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; +} + bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const { auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } +bool CGOpenMPRuntime::isStaticNonchunked( + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_dist_sch_static; +} + + bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { auto Schedule = getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); @@ -1802,19 +2408,57 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { return Schedule != OMP_sch_static; } +static int addMonoNonMonoModifier(OpenMPSchedType Schedule, + OpenMPScheduleClauseModifier M1, + OpenMPScheduleClauseModifier M2) { + int Modifier = 0; + switch (M1) { + case OMPC_SCHEDULE_MODIFIER_monotonic: + Modifier = OMP_sch_modifier_monotonic; + break; + case OMPC_SCHEDULE_MODIFIER_nonmonotonic: + Modifier = OMP_sch_modifier_nonmonotonic; + break; + case OMPC_SCHEDULE_MODIFIER_simd: + if (Schedule == OMP_sch_static_chunked) + Schedule = OMP_sch_static_balanced_chunked; + break; + case OMPC_SCHEDULE_MODIFIER_last: + case OMPC_SCHEDULE_MODIFIER_unknown: + break; + } + switch (M2) { + case OMPC_SCHEDULE_MODIFIER_monotonic: + Modifier = OMP_sch_modifier_monotonic; + break; + case OMPC_SCHEDULE_MODIFIER_nonmonotonic: + Modifier = OMP_sch_modifier_nonmonotonic; + break; + case OMPC_SCHEDULE_MODIFIER_simd: + if (Schedule == OMP_sch_static_chunked) + Schedule = OMP_sch_static_balanced_chunked; + break; + case OMPC_SCHEDULE_MODIFIER_last: + case OMPC_SCHEDULE_MODIFIER_unknown: + break; + } + return Schedule | Modifier; +} + void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPScheduleClauseKind ScheduleKind, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, llvm::Value *UB, llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; OpenMPSchedType Schedule = - getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); + getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && - Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); + Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && + Schedule != OMP_sch_static_balanced_chunked)); // Call __kmpc_dispatch_init( // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, // kmp_int[32|64] lower, kmp_int[32|64] upper, @@ -1824,59 +2468,94 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, if (Chunk == nullptr) Chunk = CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc), - CGF.Builder.getInt32(Schedule), // Schedule type - CGF.Builder.getIntN(IVSize, 0), // Lower - UB, // Upper - CGF.Builder.getIntN(IVSize, 1), // Stride - Chunk // Chunk + emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + CGF.Builder.getInt32(addMonoNonMonoModifier( + Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type + CGF.Builder.getIntN(IVSize, 0), // Lower + UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } +static void emitForStaticInitCall( + CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, + llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, + OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, + unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, + Address ST, llvm::Value *Chunk) { + if (!CGF.HaveInsertPoint()) + return; + + assert(!Ordered); + assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static || + Schedule == OMP_dist_sch_static_chunked); + + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + if (Chunk == nullptr) { + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static) && + "expected static non-chunked schedule"); + // If the Chunk was not specified in the clause - use default value 1. + Chunk = CGF.Builder.getIntN(IVSize, 1); + } else { + assert((Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static_chunked) && + "expected static chunked schedule"); + } + llvm::Value *Args[] = { + UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( + Schedule, M1, M2)), // Schedule type + IL.getPointer(), // &isLastIter + LB.getPointer(), // &LB + UB.getPointer(), // &UB + ST.getPointer(), // &Stride + CGF.Builder.getIntN(IVSize, 1), // Incr + Chunk // Chunk + }; + CGF.EmitRuntimeCall(ForStaticInitFunction, Args); +} + void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPScheduleClauseKind ScheduleKind, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk) { - if (!CGF.HaveInsertPoint()) - return; - OpenMPSchedType Schedule = - getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); - assert(!Ordered); - assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || - Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); - - // Call __kmpc_for_static_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, - // kmp_int[32|64] incr, kmp_int[32|64] chunk); - if (Chunk == nullptr) { - assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && - "expected static non-chunked schedule"); - // If the Chunk was not specified in the clause - use default value 1. - Chunk = CGF.Builder.getIntN(IVSize, 1); - } else { - assert((Schedule == OMP_sch_static_chunked || - Schedule == OMP_ord_static_chunked) && - "expected static chunked schedule"); - } - llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc), - CGF.Builder.getInt32(Schedule), // Schedule type - IL.getPointer(), // &isLastIter - LB.getPointer(), // &LB - UB.getPointer(), // &UB - ST.getPointer(), // &Stride - CGF.Builder.getIntN(IVSize, 1), // Incr - Chunk // Chunk - }; - CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); + OpenMPSchedType ScheduleNum = + getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + auto *ThreadId = getThreadID(CGF, Loc); + auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, + ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, + Ordered, IL, LB, UB, ST, Chunk); +} + +void CGOpenMPRuntime::emitDistributeStaticInit( + CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, + bool Ordered, Address IL, Address LB, Address UB, Address ST, + llvm::Value *Chunk) { + OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + auto *ThreadId = getThreadID(CGF, Loc); + auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, + ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, + OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, + UB, ST, Chunk); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, @@ -1884,8 +2563,7 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc)}; + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), Args); } @@ -1897,8 +2575,7 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc)}; + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); } @@ -1912,7 +2589,8 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, // kmp_int[32|64] *p_stride); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), IL.getPointer(), // &isLastIter LB.getPointer(), // &Lower UB.getPointer(), // &Upper @@ -1991,8 +2669,18 @@ enum KmpTaskTFields { KmpTaskTRoutine, /// \brief Partition id for the untied tasks. KmpTaskTPartId, - /// \brief Function with call of destructors for private variables. - KmpTaskTDestructors, + /// Function with call of destructors for private variables. + Data1, + /// Task priority. + Data2, + /// (Taskloops only) Lower bound. + KmpTaskTLowerBound, + /// (Taskloops only) Upper bound. + KmpTaskTUpperBound, + /// (Taskloops only) Stride. + KmpTaskTStride, + /// (Taskloops only) Is last iteration flag. + KmpTaskTLastIter, }; } // anonymous namespace @@ -2005,11 +2693,11 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum, unsigned Order) { + unsigned Order) { assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation."); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); ++OffloadingEntriesNum; } @@ -2017,30 +2705,27 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum, llvm::Constant *Addr, - llvm::Constant *ID) { + llvm::Constant *Addr, llvm::Constant *ID) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { - assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, - ColNum) && + assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && "Entry must exist."); - auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName] - [LineNum][ColNum]; + auto &Entry = + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); return; } else { OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = - Entry; + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; } } bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( - unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum) const { + unsigned DeviceID, unsigned FileID, StringRef ParentName, + unsigned LineNum) const { auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); if (PerDevice == OffloadEntriesTargetRegion.end()) return false; @@ -2053,11 +2738,8 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( auto PerLine = PerParentName->second.find(LineNum); if (PerLine == PerParentName->second.end()) return false; - auto PerColumn = PerLine->second.find(ColNum); - if (PerColumn == PerLine->second.end()) - return false; // Fail if this entry is already registered. - if (PerColumn->second.getAddress() || PerColumn->second.getID()) + if (PerLine->second.getAddress() || PerLine->second.getID()) return false; return true; } @@ -2069,8 +2751,7 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( for (auto &F : D.second) for (auto &P : F.second) for (auto &L : P.second) - for (auto &C : L.second) - Action(D.first, F.first, P.first(), L.first, C.first, C.second); + Action(D.first, F.first, P.first(), L.first, L.second); } /// \brief Create a Ctor/Dtor-like function whose body is emitted through @@ -2087,9 +2768,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, CodeGenFunction CGF(CGM); GlobalDecl(); - auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, Args, FunctionType::ExtInfo(), - /*isVariadic=*/false); + auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto *Fn = CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); @@ -2123,11 +2802,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, + llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, ".omp_offloading.entries_begin"); llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, + llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, ".omp_offloading.entries_end"); // Create all device images @@ -2139,10 +2818,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { StringRef T = Devices[i].getTriple(); auto *ImgBegin = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T)); + /*Initializer=*/nullptr, + Twine(".omp_offloading.img_start.") + Twine(T)); auto *ImgEnd = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T)); + /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); llvm::Constant *Dev = llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, @@ -2160,7 +2840,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { M, DeviceImagesInitTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, DeviceImagesInit, ".omp_offloading.device_images"); - DeviceImages->setUnnamedAddr(true); + DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // This is a Zero array to be used in the creation of the constant expressions llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), @@ -2190,12 +2870,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { IdentInfo, C.CharTy); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_unreg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_reg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); @@ -2203,15 +2885,16 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { return RegFn; } -void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, - uint64_t Size) { +void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, + llvm::Constant *Addr, uint64_t Size) { + StringRef Name = Addr->getName(); auto *TgtOffloadEntryType = cast<llvm::StructType>( CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); llvm::LLVMContext &C = CGM.getModule().getContext(); llvm::Module &M = CGM.getModule(); // Make sure the address has the right type. - llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy); + llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); // Create constant string with the name. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); @@ -2220,7 +2903,7 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, StrPtrInit, ".omp_offloading.entry_name"); - Str->setUnnamedAddr(true); + Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); // Create the entry struct. @@ -2236,7 +2919,6 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, // We can't have any padding between symbols, so we need to have 1-byte // alignment. Entry->setAlignment(1); - return; } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { @@ -2272,7 +2954,6 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = [&]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, - unsigned Column, OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { llvm::SmallVector<llvm::Metadata *, 32> Ops; // Generate metadata for target regions. Each entry of this metadata @@ -2282,15 +2963,13 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // - Entry 2 -> File ID of the file where the entry was identified. // - Entry 3 -> Mangled name of the function where the entry was identified. // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Column in the file where the entry was identified. - // - Entry 6 -> Order the entry was created. + // - Entry 5 -> Order the entry was created. // The first element of the metadata node is the kind. Ops.push_back(getMDInt(E.getKind())); Ops.push_back(getMDInt(DeviceID)); Ops.push_back(getMDInt(FileID)); Ops.push_back(getMDString(ParentName)); Ops.push_back(getMDInt(Line)); - Ops.push_back(getMDInt(Column)); Ops.push_back(getMDInt(E.getOrder())); // Save this entry in the right position of the ordered entries array. @@ -2310,7 +2989,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { E)) { assert(CE->getID() && CE->getAddress() && "Entry ID and Addr are invalid!"); - createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0); + createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); } else llvm_unreachable("Unsupported entry kind."); } @@ -2365,7 +3044,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), - /*Column=*/getMDInt(5), /*Order=*/getMDInt(6)); + /*Order=*/getMDInt(5)); break; } } @@ -2509,21 +3188,45 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { } static RecordDecl * -createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, +createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, + QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy) { auto &C = CGM.getContext(); // Build struct kmp_task_t { // void * shareds; // kmp_routine_entry_t routine; // kmp_int32 part_id; - // kmp_routine_entry_t destructors; + // kmp_cmplrdata_t data1; + // kmp_cmplrdata_t data2; + // For taskloops additional fields: + // kmp_uint64 lb; + // kmp_uint64 ub; + // kmp_int64 st; + // kmp_int32 liter; // }; + auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); + UD->startDefinition(); + addFieldToRecordDecl(C, UD, KmpInt32Ty); + addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); + UD->completeDefinition(); + QualType KmpCmplrdataTy = C.getRecordType(UD); auto *RD = C.buildImplicitRecord("kmp_task_t"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); addFieldToRecordDecl(C, RD, KmpInt32Ty); - addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); + addFieldToRecordDecl(C, RD, KmpCmplrdataTy); + addFieldToRecordDecl(C, RD, KmpCmplrdataTy); + if (isOpenMPTaskLoopDirective(Kind)) { + QualType KmpUInt64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + QualType KmpInt64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + addFieldToRecordDecl(C, RD, KmpUInt64Ty); + addFieldToRecordDecl(C, RD, KmpUInt64Ty); + addFieldToRecordDecl(C, RD, KmpInt64Ty); + addFieldToRecordDecl(C, RD, KmpInt32Ty); + } RD->completeDefinition(); return RD; } @@ -2550,14 +3253,17 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { -/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, +/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, +/// For taskloops: +/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, /// tt->shareds); /// return 0; /// } /// \endcode static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, - QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, + OpenMPDirectiveKind Kind, QualType KmpInt32Ty, + QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap) { @@ -2569,10 +3275,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, KmpTaskTWithPrivatesPtrQTy.withRestrict()); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); - FunctionType::ExtInfo Info; auto &TaskEntryFnInfo = - CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, - /*isVariadic=*/false); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); auto *TaskEntry = llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, @@ -2583,11 +3287,15 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, + // tt, + // For taskloops: + // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); - LValue TDBase = emitLoadOfPointerLValue( - CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); + LValue TDBase = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(&TaskTypeArg), + KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); LValue Base = @@ -2595,7 +3303,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); + auto *PartidParam = PartIdLVal.getPointer(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -2609,12 +3317,37 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(), CGF.VoidPtrTy); - } else { + } else PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + + llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, + TaskPrivatesMap, + CGF.Builder + .CreatePointerBitCastOrAddrSpaceCast( + TDBase.getAddress(), CGF.VoidPtrTy) + .getPointer()}; + SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), + std::end(CommonArgs)); + if (isOpenMPTaskLoopDirective(Kind)) { + auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); + auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); + auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); + auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); + auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); + auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); + auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); + auto StLVal = CGF.EmitLValueForField(Base, *StFI); + auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); + auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); + auto LILVal = CGF.EmitLValueForField(Base, *LIFI); + auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + CallArgs.push_back(LBParam); + CallArgs.push_back(UBParam); + CallArgs.push_back(StParam); + CallArgs.push_back(LIParam); } + CallArgs.push_back(SharedsParam); - llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, - TaskPrivatesMap, SharedsParam}; CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), @@ -2638,8 +3371,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; auto &DestructorFnInfo = - CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, - /*isVariadic=*/false); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); auto *DestructorFn = llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, @@ -2651,8 +3383,9 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, Args); - LValue Base = emitLoadOfPointerLValue( - CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); + LValue Base = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(&TaskTypeArg), + KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); @@ -2682,6 +3415,7 @@ static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> FirstprivateVars, + ArrayRef<const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef<PrivateDataTy> Privates) { auto &C = CGM.getContext(); @@ -2712,10 +3446,18 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } - FunctionType::ExtInfo Info; + for (auto *E: LastprivateVars) { + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict())); + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + PrivateVarsPos[VD] = Counter; + ++Counter; + } auto &TaskPrivatesMapFnInfo = - CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, - /*isVariadic=*/false); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *TaskPrivatesMapTy = CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); auto *TaskPrivatesMap = llvm::Function::Create( @@ -2730,16 +3472,17 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, TaskPrivatesMapFnInfo, Args); // *privi = &.privates.privi; - LValue Base = emitLoadOfPointerLValue( - CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); + LValue Base = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(&TaskPrivatesArg), + TaskPrivatesArg.getType()->castAs<PointerType>()); auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); Counter = 0; for (auto *Field : PrivatesQTyRD->fields()) { auto FieldLVal = CGF.EmitLValueForField(Base, Field); auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); - auto RefLoadLVal = - emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); + auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( + RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); ++Counter; } @@ -2752,23 +3495,199 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); } -void CGOpenMPRuntime::emitTaskCall( - CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, - ArrayRef<const Expr *> PrivateCopies, - ArrayRef<const Expr *> FirstprivateVars, - ArrayRef<const Expr *> FirstprivateCopies, - ArrayRef<const Expr *> FirstprivateInits, - ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { - if (!CGF.HaveInsertPoint()) - return; +/// Emit initialization for private variables in task-based directives. +static void emitPrivatesInit(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + Address KmpTaskSharedsPtr, LValue TDBase, + const RecordDecl *KmpTaskTWithPrivatesQTyRD, + QualType SharedsTy, QualType SharedsPtrTy, + const OMPTaskDataTy &Data, + ArrayRef<PrivateDataTy> Privates, bool ForDup) { + auto &C = CGF.getContext(); + auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); + LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); + LValue SrcBase; + if (!Data.FirstprivateVars.empty()) { + SrcBase = CGF.MakeAddrLValue( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), + SharedsTy); + } + CodeGenFunction::CGCapturedStmtInfo CapturesInfo( + cast<CapturedStmt>(*D.getAssociatedStmt())); + FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); + for (auto &&Pair : Privates) { + auto *VD = Pair.second.PrivateCopy; + auto *Init = VD->getAnyInitializer(); + if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && + !CGF.isTrivialInitializer(Init)))) { + LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); + if (auto *Elem = Pair.second.PrivateElemInit) { + auto *OriginalVD = Pair.second.Original; + auto *SharedField = CapturesInfo.lookup(OriginalVD); + auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); + SharedRefLValue = CGF.MakeAddrLValue( + Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + SharedRefLValue.getType(), AlignmentSource::Decl); + QualType Type = OriginalVD->getType(); + if (Type->isArrayType()) { + // Initialize firstprivate array. + if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { + // Perform simple memcpy. + CGF.EmitAggregateAssign(PrivateLValue.getAddress(), + SharedRefLValue.getAddress(), Type); + } else { + // Initialize firstprivate array using element-by-element + // intialization. + CGF.EmitOMPAggregateAssign( + PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, + [&CGF, Elem, Init, &CapturesInfo](Address DestElement, + Address SrcElement) { + // Clean up any temporaries needed by the initialization. + CodeGenFunction::OMPPrivateScope InitScope(CGF); + InitScope.addPrivate( + Elem, [SrcElement]() -> Address { return SrcElement; }); + (void)InitScope.Privatize(); + // Emit initialization for single element. + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( + CGF, &CapturesInfo); + CGF.EmitAnyExprToMem(Init, DestElement, + Init->getType().getQualifiers(), + /*IsInitializer=*/false); + }); + } + } else { + CodeGenFunction::OMPPrivateScope InitScope(CGF); + InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { + return SharedRefLValue.getAddress(); + }); + (void)InitScope.Privatize(); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); + CGF.EmitExprAsInit(Init, VD, PrivateLValue, + /*capturedByInit=*/false); + } + } else + CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); + } + ++FI; + } +} + +/// Check if duplication function is required for taskloops. +static bool checkInitIsRequired(CodeGenFunction &CGF, + ArrayRef<PrivateDataTy> Privates) { + bool InitRequired = false; + for (auto &&Pair : Privates) { + auto *VD = Pair.second.PrivateCopy; + auto *Init = VD->getAnyInitializer(); + InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && + !CGF.isTrivialInitializer(Init)); + } + return InitRequired; +} + + +/// Emit task_dup function (for initialization of +/// private/firstprivate/lastprivate vars and last_iter flag) +/// \code +/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int +/// lastpriv) { +/// // setup lastprivate flag +/// task_dst->last = lastpriv; +/// // could be constructor calls here... +/// } +/// \endcode +static llvm::Value * +emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, + const OMPExecutableDirective &D, + QualType KmpTaskTWithPrivatesPtrQTy, + const RecordDecl *KmpTaskTWithPrivatesQTyRD, + const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, + QualType SharedsPtrTy, const OMPTaskDataTy &Data, + ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); + ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); + ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, C.IntTy); + Args.push_back(&DstArg); + Args.push_back(&SrcArg); + Args.push_back(&LastprivArg); + auto &TaskDupFnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); + auto *TaskDup = + llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, + ".omp_task_dup.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); + + LValue TDBase = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(&DstArg), + KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); + // task_dst->liter = lastpriv; + if (WithLastIter) { + auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); + LValue Base = CGF.EmitLValueForField( + TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); + LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); + llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( + CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); + CGF.EmitStoreOfScalar(Lastpriv, LILVal); + } + + // Emit initial values for private copies (if any). + assert(!Privates.empty()); + Address KmpTaskSharedsPtr = Address::invalid(); + if (!Data.FirstprivateVars.empty()) { + LValue TDBase = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(&SrcArg), + KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); + LValue Base = CGF.EmitLValueForField( + TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); + KmpTaskSharedsPtr = Address( + CGF.EmitLoadOfScalar(CGF.EmitLValueForField( + Base, *std::next(KmpTaskTQTyRD->field_begin(), + KmpTaskTShareds)), + Loc), + CGF.getNaturalTypeAlignment(SharedsTy)); + } + emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, + SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); + CGF.FinishFunction(); + return TaskDup; +} + +/// Checks if destructor function is required to be generated. +/// \return true if cleanups are required, false otherwise. +static bool +checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { + bool NeedsCleanup = false; + auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); + auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); + for (auto *FD : PrivateRD->fields()) { + NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); + if (NeedsCleanup) + break; + } + return NeedsCleanup; +} + +CGOpenMPRuntime::TaskResultTy +CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, + llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const OMPTaskDataTy &Data) { auto &C = CGM.getContext(); - llvm::SmallVector<PrivateDataTy, 8> Privates; + llvm::SmallVector<PrivateDataTy, 4> Privates; // Aggregate privates and sort them by the alignment. - auto I = PrivateCopies.begin(); - for (auto *E : PrivateVars) { + auto I = Data.PrivateCopies.begin(); + for (auto *E : Data.PrivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Privates.push_back(std::make_pair( C.getDeclAlign(VD), @@ -2776,16 +3695,26 @@ void CGOpenMPRuntime::emitTaskCall( /*PrivateElemInit=*/nullptr))); ++I; } - I = FirstprivateCopies.begin(); - auto IElemInitRef = FirstprivateInits.begin(); - for (auto *E : FirstprivateVars) { + I = Data.FirstprivateCopies.begin(); + auto IElemInitRef = Data.FirstprivateInits.begin(); + for (auto *E : Data.FirstprivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Privates.push_back(std::make_pair( C.getDeclAlign(VD), PrivateHelpersTy( VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); - ++I, ++IElemInitRef; + ++I; + ++IElemInitRef; + } + I = Data.LastprivateCopies.begin(); + for (auto *E : Data.LastprivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.push_back(std::make_pair( + C.getDeclAlign(VD), + PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), + /*PrivateElemInit=*/nullptr))); + ++I; } llvm::array_pod_sort(Privates.begin(), Privates.end(), array_pod_sort_comparator); @@ -2794,8 +3723,8 @@ void CGOpenMPRuntime::emitTaskCall( emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). if (KmpTaskTQTy.isNull()) { - KmpTaskTQTy = C.getRecordType( - createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); + KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( + CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. @@ -2806,7 +3735,7 @@ void CGOpenMPRuntime::emitTaskCall( C.getPointerType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); - auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy); + auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); QualType SharedsPtrTy = C.getPointerType(SharedsTy); // Emit initial values for private copies (if any). @@ -2818,7 +3747,8 @@ void CGOpenMPRuntime::emitTaskCall( if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( - CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); + CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, + FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { @@ -2828,8 +3758,9 @@ void CGOpenMPRuntime::emitTaskCall( // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); auto *TaskEntry = emitProxyTaskFunction( - CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, - KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); + CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, + KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, + TaskPrivatesMap); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, @@ -2837,15 +3768,27 @@ void CGOpenMPRuntime::emitTaskCall( // Task flags. Format is taken from // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, // description of kmp_tasking_flags struct. - const unsigned TiedFlag = 0x1; - const unsigned FinalFlag = 0x2; - unsigned Flags = Tied ? TiedFlag : 0; + enum { + TiedFlag = 0x1, + FinalFlag = 0x2, + DestructorsFlag = 0x8, + PriorityFlag = 0x20 + }; + unsigned Flags = Data.Tied ? TiedFlag : 0; + bool NeedsCleanup = false; + if (!Privates.empty()) { + NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); + if (NeedsCleanup) + Flags = Flags | DestructorsFlag; + } + if (Data.Priority.getInt()) + Flags = Flags | PriorityFlag; auto *TaskFlags = - Final.getPointer() - ? CGF.Builder.CreateSelect(Final.getPointer(), + Data.Final.getPointer() + ? CGF.Builder.CreateSelect(Data.Final.getPointer(), CGF.Builder.getInt32(FinalFlag), CGF.Builder.getInt32(/*C=*/0)) - : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); + : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), @@ -2875,96 +3818,71 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); } // Emit initial values for private copies (if any). - bool NeedsCleanup = false; + TaskResultTy Result; if (!Privates.empty()) { - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); - FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); - LValue SharedsBase; - if (!FirstprivateVars.empty()) { - SharedsBase = CGF.MakeAddrLValue( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), - SharedsTy); - } - CodeGenFunction::CGCapturedStmtInfo CapturesInfo( - cast<CapturedStmt>(*D.getAssociatedStmt())); - for (auto &&Pair : Privates) { - auto *VD = Pair.second.PrivateCopy; - auto *Init = VD->getAnyInitializer(); - LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); - if (Init) { - if (auto *Elem = Pair.second.PrivateElemInit) { - auto *OriginalVD = Pair.second.Original; - auto *SharedField = CapturesInfo.lookup(OriginalVD); - auto SharedRefLValue = - CGF.EmitLValueForField(SharedsBase, SharedField); - SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), - SharedRefLValue.getType(), AlignmentSource::Decl); - QualType Type = OriginalVD->getType(); - if (Type->isArrayType()) { - // Initialize firstprivate array. - if (!isa<CXXConstructExpr>(Init) || - CGF.isTrivialInitializer(Init)) { - // Perform simple memcpy. - CGF.EmitAggregateAssign(PrivateLValue.getAddress(), - SharedRefLValue.getAddress(), Type); - } else { - // Initialize firstprivate array using element-by-element - // intialization. - CGF.EmitOMPAggregateAssign( - PrivateLValue.getAddress(), SharedRefLValue.getAddress(), - Type, [&CGF, Elem, Init, &CapturesInfo]( - Address DestElement, Address SrcElement) { - // Clean up any temporaries needed by the initialization. - CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SrcElement]() -> Address { - return SrcElement; - }); - (void)InitScope.Privatize(); - // Emit initialization for single element. - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( - CGF, &CapturesInfo); - CGF.EmitAnyExprToMem(Init, DestElement, - Init->getType().getQualifiers(), - /*IsInitializer=*/false); - }); - } - } else { - CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { - return SharedRefLValue.getAddress(); - }); - (void)InitScope.Privatize(); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); - CGF.EmitExprAsInit(Init, VD, PrivateLValue, - /*capturedByInit=*/false); - } - } else { - CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); - } - } - NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); - ++FI; + emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, + SharedsTy, SharedsPtrTy, Data, Privates, + /*ForDup=*/false); + if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && + (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { + Result.TaskDupFn = emitTaskDupFunction( + CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, + KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, + /*WithLastIter=*/!Data.LastprivateVars.empty()); } } + // Fields of union "kmp_cmplrdata_t" for destructors and priority. + enum { Priority = 0, Destructors = 1 }; // Provide pointer to function with destructors for privates. - llvm::Value *DestructorFn = - NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, - KmpTaskTWithPrivatesPtrQTy, - KmpTaskTWithPrivatesQTy) - : llvm::ConstantPointerNull::get( - cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); - LValue Destructor = CGF.EmitLValueForField( - TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); - CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - DestructorFn, KmpRoutineEntryPtrTy), - Destructor); + auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); + auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); + if (NeedsCleanup) { + llvm::Value *DestructorFn = emitDestructorsFunction( + CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, + KmpTaskTWithPrivatesQTy); + LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); + LValue DestructorsLV = CGF.EmitLValueForField( + Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); + CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + DestructorFn, KmpRoutineEntryPtrTy), + DestructorsLV); + } + // Set priority. + if (Data.Priority.getInt()) { + LValue Data2LV = CGF.EmitLValueForField( + TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); + LValue PriorityLV = CGF.EmitLValueForField( + Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); + CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); + } + Result.NewTask = NewTask; + Result.TaskEntry = TaskEntry; + Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; + Result.TDBase = TDBase; + Result.KmpTaskTQTyRD = KmpTaskTQTyRD; + return Result; +} + +void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, + llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, + const Expr *IfCond, + const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint()) + return; + TaskResultTy Result = + emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); + llvm::Value *NewTask = Result.NewTask; + llvm::Value *TaskEntry = Result.TaskEntry; + llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; + LValue TDBase = Result.TDBase; + RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; + auto &C = CGM.getContext(); // Process list of dependences. Address DependenciesArray = Address::invalid(); - unsigned NumDependencies = Dependences.size(); + unsigned NumDependencies = Data.Dependences.size(); if (NumDependencies) { // Dependence kind for RTL. enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; @@ -2981,18 +3899,18 @@ void CGOpenMPRuntime::emitTaskCall( addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); KmpDependInfoRD->completeDefinition(); KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); - } else { + } else KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); - } CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[<Dependences.size()>] deps; - DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); + DependenciesArray = + CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); for (unsigned i = 0; i < NumDependencies; ++i) { - const Expr *E = Dependences[i].second; + const Expr *E = Data.Dependences[i].second; auto Addr = CGF.EmitLValue(E); llvm::Value *Size; QualType Ty = E->getType(); @@ -3006,7 +3924,7 @@ void CGOpenMPRuntime::emitTaskCall( llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); } else - Size = getTypeSize(CGF, Ty); + Size = CGF.getTypeSize(Ty); auto Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), KmpDependInfoTy); @@ -3022,7 +3940,7 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitStoreOfScalar(Size, LenLVal); // deps[i].flags = <Dependences[i].first>; RTLDependenceKindTy DepKind; - switch (Dependences[i].first) { + switch (Data.Dependences[i].first) { case OMPC_DEPEND_in: DepKind = DepIn; break; @@ -3048,8 +3966,6 @@ void CGOpenMPRuntime::emitTaskCall( // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. - // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t - // *new_task); // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence @@ -3067,19 +3983,26 @@ void CGOpenMPRuntime::emitTaskCall( DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [this, NumDependencies, - &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { - // TODO: add check for untied tasks. + auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, + NumDependencies, &TaskArgs, + &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { + if (!Data.Tied) { + auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); + auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); + } if (NumDependencies) { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), - DepTaskArgs); + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); } else { CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } + // Check if parent region is untied and build return for untied task; + if (auto *Region = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); }; - typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> - IfCallEndCleanup; llvm::Value *DepWaitTaskArgs[6]; if (NumDependencies) { @@ -3090,40 +4013,111 @@ void CGOpenMPRuntime::emitTaskCall( DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { + auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, + NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. if (NumDependencies) - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); + // Call proxy_task_entry(gtid, new_task); + auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; + CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + }; + // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), - TaskArgs); // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); - CGF.EHStack.pushCleanup<IfCallEndCleanup>( - NormalAndEHCleanup, - createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), - llvm::makeArrayRef(TaskArgs)); - - // Call proxy_task_entry(gtid, new_task); - llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + RegionCodeGenTy RCG(CodeGen); + CommonActionTy Action( + RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, + RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); + RCG.setAction(Action); + RCG(CGF); }; - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); - ThenCodeGen(CGF); + else { + RegionCodeGenTy ThenRCG(ThenCodeGen); + ThenRCG(CGF); } } +void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPLoopDirective &D, + llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, + const Expr *IfCond, + const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint()) + return; + TaskResultTy Result = + emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); + // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // libcall. + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *IfVal; + if (IfCond) { + IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, + /*isSigned=*/true); + } else + IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); + + LValue LBLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); + auto *LBVar = + cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + /*IsInitializer=*/true); + LValue UBLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); + auto *UBVar = + cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + /*IsInitializer=*/true); + LValue StLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); + auto *StVar = + cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + /*IsInitializer=*/true); + enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; + llvm::Value *TaskArgs[] = { + UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), + UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), + llvm::ConstantInt::getSigned( + CGF.IntTy, Data.Schedule.getPointer() + ? Data.Schedule.getInt() ? NumTasks : Grainsize + : NoSchedule), + Data.Schedule.getPointer() + ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, + /*isSigned=*/false) + : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), + Result.TaskDupFn + ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, + CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); +} + /// \brief Emit reduction operation for each element of array (required for /// array sections) LHS op = RHS. /// \param Type Type of array. @@ -3204,6 +4198,26 @@ static void EmitOMPAggregateReduction( CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } +/// Emit reduction combiner. If the combiner is a simple expression emit it as +/// is, otherwise consider it as combiner of UDR decl and emit it as a call of +/// UDR combiner function. +static void emitReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp) { + if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (auto *DRE = + dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) + if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { + std::pair<llvm::Function *, llvm::Function *> Reduction = + CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); + RValue Func = RValue::get(Reduction.first); + CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); + CGF.EmitIgnoredExpr(ReductionOp); + return; + } + CGF.EmitIgnoredExpr(ReductionOp); +} + static llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, @@ -3220,9 +4234,7 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, C.VoidPtrTy); Args.push_back(&LHSArg); Args.push_back(&RHSArg); - FunctionType::ExtInfo EI; - auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, Args, EI, /*isVariadic=*/false); + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, ".omp.reduction.reduction_func", &CGM.getModule()); @@ -3255,17 +4267,16 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); }); QualType PrivTy = (*IPriv)->getType(); - if (PrivTy->isArrayType()) { + if (PrivTy->isVariablyModifiedType()) { // Get array size and emit VLA type. ++Idx; Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); + auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); + auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); CodeGenFunction::OpaqueValueMapping OpaqueMap( - CGF, - cast<OpaqueValueExpr>( - CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()), - RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); + CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); CGF.EmitVariablyModifiedType(PrivTy); } } @@ -3278,20 +4289,42 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, // Emit reduction for array section. auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, - const Expr *, - const Expr *) { CGF.EmitIgnoredExpr(E); }); + EmitOMPAggregateReduction( + CGF, (*IPriv)->getType(), LHSVar, RHSVar, + [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { + emitReductionCombiner(CGF, E); + }); } else // Emit reduction for array subscript or single variable. - CGF.EmitIgnoredExpr(E); - ++IPriv, ++ILHS, ++IRHS; + emitReductionCombiner(CGF, E); + ++IPriv; + ++ILHS; + ++IRHS; } Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; } +static void emitSingleReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp, + const Expr *PrivateRef, + const DeclRefExpr *LHS, + const DeclRefExpr *RHS) { + if (PrivateRef->getType()->isArrayType()) { + // Emit reduction for array section. + auto *LHSVar = cast<VarDecl>(LHS->getDecl()); + auto *RHSVar = cast<VarDecl>(RHS->getDecl()); + EmitOMPAggregateReduction( + CGF, PrivateRef->getType(), LHSVar, RHSVar, + [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { + emitReductionCombiner(CGF, ReductionOp); + }); + } else + // Emit reduction for array subscript or single variable. + emitReductionCombiner(CGF, ReductionOp); +} + void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, @@ -3343,16 +4376,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction( - CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, - const Expr *) { CGF.EmitIgnoredExpr(E); }); - } else - CGF.EmitIgnoredExpr(E); - ++IPriv, ++ILHS, ++IRHS; + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); + ++IPriv; + ++ILHS; + ++IRHS; } return; } @@ -3361,7 +4389,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; auto Size = RHSExprs.size(); for (auto *E : Privates) { - if (E->getType()->isArrayType()) + if (E->getType()->isVariablyModifiedType()) // Reserve place for array size. ++Size; } @@ -3380,20 +4408,18 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), Elem); - if ((*IPriv)->getType()->isArrayType()) { + if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); - CGF.Builder.CreateStore( - CGF.Builder.CreateIntToPtr( - CGF.Builder.CreateIntCast( - CGF.getVLASize(CGF.getContext().getAsVariableArrayType( - (*IPriv)->getType())) - .first, - CGF.SizeTy, /*isSigned=*/false), - CGF.VoidPtrTy), - Elem); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .first, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); } } @@ -3407,11 +4433,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), // RedList, reduce_func, &<lock>); - auto *IdentTLoc = emitUpdateLocation( - CGF, Loc, - static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); + auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); auto *ThreadId = getThreadID(CGF, Loc); - auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy); + auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), CGF.VoidPtrTy); @@ -3443,38 +4467,33 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); - llvm::Value *EndArgs[] = { - IdentTLoc, // ident_t *<loc> - ThreadId, // i32 <gtid> - Lock // kmp_critical_name *&<lock> - }; - CGF.EHStack - .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( - NormalAndEHCleanup, - createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait - : OMPRTL__kmpc_end_reduce), - llvm::makeArrayRef(EndArgs)); + // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + llvm::Value *EndArgs[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + Lock // kmp_critical_name *&<lock> + }; + auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - if ((*IPriv)->getType()->isArrayType()) { - // Emit reduction for array section. - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction( - CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, - const Expr *) { CGF.EmitIgnoredExpr(E); }); - } else - // Emit reduction for array subscript or single variable. - CGF.EmitIgnoredExpr(E); - ++IPriv, ++ILHS, ++IRHS; + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); + ++IPriv; + ++ILHS; + ++IRHS; } - } + }; + RegionCodeGenTy RCG(CodeGen); + CommonActionTy Action( + nullptr, llvm::None, + createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait + : OMPRTL__kmpc_end_reduce), + EndArgs); + RCG.setAction(Action); + RCG(CGF); CGF.EmitBranch(DefaultBB); @@ -3487,103 +4506,113 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); CGF.EmitBlock(Case2BB); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - if (!WithNowait) { - // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); - llvm::Value *EndArgs[] = { - IdentTLoc, // ident_t *<loc> - ThreadId, // i32 <gtid> - Lock // kmp_critical_name *&<lock> - }; - CGF.EHStack - .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( - NormalAndEHCleanup, - createRuntimeFunction(OMPRTL__kmpc_end_reduce), - llvm::makeArrayRef(EndArgs)); - } + auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); auto IPriv = Privates.begin(); for (auto *E : ReductionOps) { - const Expr *XExpr = nullptr; - const Expr *EExpr = nullptr; - const Expr *UpExpr = nullptr; - BinaryOperatorKind BO = BO_Comma; - if (auto *BO = dyn_cast<BinaryOperator>(E)) { - if (BO->getOpcode() == BO_Assign) { - XExpr = BO->getLHS(); - UpExpr = BO->getRHS(); - } + const Expr *XExpr = nullptr; + const Expr *EExpr = nullptr; + const Expr *UpExpr = nullptr; + BinaryOperatorKind BO = BO_Comma; + if (auto *BO = dyn_cast<BinaryOperator>(E)) { + if (BO->getOpcode() == BO_Assign) { + XExpr = BO->getLHS(); + UpExpr = BO->getRHS(); } - // Try to emit update expression as a simple atomic. - auto *RHSExpr = UpExpr; - if (RHSExpr) { - // Analyze RHS part of the whole expression. - if (auto *ACO = dyn_cast<AbstractConditionalOperator>( - RHSExpr->IgnoreParenImpCasts())) { - // If this is a conditional operator, analyze its condition for - // min/max reduction operator. - RHSExpr = ACO->getCond(); - } - if (auto *BORHS = - dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { - EExpr = BORHS->getRHS(); - BO = BORHS->getOpcode(); - } + } + // Try to emit update expression as a simple atomic. + auto *RHSExpr = UpExpr; + if (RHSExpr) { + // Analyze RHS part of the whole expression. + if (auto *ACO = dyn_cast<AbstractConditionalOperator>( + RHSExpr->IgnoreParenImpCasts())) { + // If this is a conditional operator, analyze its condition for + // min/max reduction operator. + RHSExpr = ACO->getCond(); } - if (XExpr) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto &&AtomicRedGen = [this, BO, VD, IPriv, - Loc](CodeGenFunction &CGF, const Expr *XExpr, - const Expr *EExpr, const Expr *UpExpr) { - LValue X = CGF.EmitLValue(XExpr); - RValue E; - if (EExpr) - E = CGF.EmitAnyExpr(EExpr); - CGF.EmitOMPAtomicSimpleUpdateExpr( - X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue, Loc]() -> Address { - Address LHSTemp = CGF.CreateMemTemp(VD->getType()); - CGF.emitOMPSimpleStore( - CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, - VD->getType().getNonReferenceType(), Loc); - return LHSTemp; - }); - (void)PrivateScope.Privatize(); - return CGF.EmitAnyExpr(UpExpr); - }); - }; - if ((*IPriv)->getType()->isArrayType()) { - // Emit atomic reduction for array section. - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, - AtomicRedGen, XExpr, EExpr, UpExpr); - } else - // Emit atomic reduction for array subscript or single variable. - AtomicRedGen(CGF, XExpr, EExpr, UpExpr); - } else { - // Emit as a critical region. - auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, - const Expr *, const Expr *) { - emitCriticalRegion( - CGF, ".atomic_reduction", - [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc); - }; - if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, - CritRedGen); - } else - CritRedGen(CGF, nullptr, nullptr, nullptr); + if (auto *BORHS = + dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { + EExpr = BORHS->getRHS(); + BO = BORHS->getOpcode(); } - ++ILHS, ++IRHS, ++IPriv; + } + if (XExpr) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto &&AtomicRedGen = [BO, VD, IPriv, + Loc](CodeGenFunction &CGF, const Expr *XExpr, + const Expr *EExpr, const Expr *UpExpr) { + LValue X = CGF.EmitLValue(XExpr); + RValue E; + if (EExpr) + E = CGF.EmitAnyExpr(EExpr); + CGF.EmitOMPAtomicSimpleUpdateExpr( + X, E, BO, /*IsXLHSInRHSPart=*/true, + llvm::AtomicOrdering::Monotonic, Loc, + [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate( + VD, [&CGF, VD, XRValue, Loc]() -> Address { + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.emitOMPSimpleStore( + CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, + VD->getType().getNonReferenceType(), Loc); + return LHSTemp; + }); + (void)PrivateScope.Privatize(); + return CGF.EmitAnyExpr(UpExpr); + }); + }; + if ((*IPriv)->getType()->isArrayType()) { + // Emit atomic reduction for array section. + auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, + AtomicRedGen, XExpr, EExpr, UpExpr); + } else + // Emit atomic reduction for array subscript or single variable. + AtomicRedGen(CGF, XExpr, EExpr, UpExpr); + } else { + // Emit as a critical region. + auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, + const Expr *, const Expr *) { + auto &RT = CGF.CGM.getOpenMPRuntime(); + RT.emitCriticalRegion( + CGF, ".atomic_reduction", + [=](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + emitReductionCombiner(CGF, E); + }, + Loc); + }; + if ((*IPriv)->getType()->isArrayType()) { + auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, + CritRedGen); + } else + CritRedGen(CGF, nullptr, nullptr, nullptr); + } + ++ILHS; + ++IRHS; + ++IPriv; } - } + }; + RegionCodeGenTy AtomicRCG(AtomicCodeGen); + if (!WithNowait) { + // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); + llvm::Value *EndArgs[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + Lock // kmp_critical_name *&<lock> + }; + CommonActionTy Action(nullptr, llvm::None, + createRuntimeFunction(OMPRTL__kmpc_end_reduce), + EndArgs); + AtomicRCG.setAction(Action); + AtomicRCG(CGF); + } else + AtomicRCG(CGF); CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); @@ -3598,6 +4627,8 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; // Ignore return result until untied tasks are supported. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); + if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, @@ -3618,7 +4649,7 @@ enum RTCancelKind { CancelSections = 3, CancelTaskgroup = 4 }; -} +} // anonymous namespace static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { RTCancelKind CancelKind = CancelNoreq; @@ -3680,14 +4711,15 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, // kmp_int32 cncl_kind); if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - auto &&ThenGen = [this, Loc, CancelRegion, - OMPRegionInfo](CodeGenFunction &CGF) { + auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. - auto *Result = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); + auto *Result = CGF.EmitRuntimeCall( + RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); // if (__kmpc_cancel()) { // __kmpc_cancel_barrier(); // exit from construct; @@ -3698,7 +4730,7 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // __kmpc_cancel_barrier(); - emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); + RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; auto CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); @@ -3706,18 +4738,21 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) - emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); - else - ThenGen(CGF); + emitOMPIfClause(CGF, IfCond, ThenGen, + [](CodeGenFunction &, PrePostActionTy &) {}); + else { + RegionCodeGenTy ThenRCG(ThenGen); + ThenRCG(CGF); + } } } /// \brief Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line and column numbers -/// associated with the relevant entry source location. +/// consists of the file and device IDs as well as line number associated with +/// the relevant entry source location. static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum, unsigned &ColumnNum) { + unsigned &LineNum) { auto &SM = C.getSourceManager(); @@ -3737,49 +4772,45 @@ static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, DeviceID = ID.getDevice(); FileID = ID.getFile(); LineNum = PLoc.getLine(); - ColumnNum = PLoc.getColumn(); - return; } void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry) { - + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - - // Emit target region as a standalone region. - auto &&CodeGen = [&CS](CodeGenFunction &CGF) { - CGF.EmitStmt(CS.getCapturedStmt()); - }; + emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, + IsOffloadEntry, CodeGen); +} - // Create a unique name for the proxy/entry function that using the source - // location information of the current target region. The name will be - // something like: +void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( + const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + // Create a unique name for the entry function using the source location + // information of the current target region. The name will be something like: // - // .omp_offloading.DD_FFFF.PP.lBB.cCC + // __omp_offloading_DD_FFFF_PP_lBB // // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the - // mangled name of the function that encloses the target region, BB is the - // line number of the target region, and CC is the column number of the target - // region. + // mangled name of the function that encloses the target region and BB is the + // line number of the target region. unsigned DeviceID; unsigned FileID; unsigned Line; - unsigned Column; getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, - Line, Column); + Line); SmallString<64> EntryFnName; { llvm::raw_svector_ostream OS(EntryFnName); - OS << ".omp_offloading" << llvm::format(".%x", DeviceID) - << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c" - << Column; + OS << "__omp_offloading" << llvm::format("_%x", DeviceID) + << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; } + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + CodeGenFunction CGF(CGM, true); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -3813,18 +4844,122 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID); - return; + DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); } -void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - ArrayRef<llvm::Value *> CapturedVars) { - if (!CGF.HaveInsertPoint()) - return; +/// discard all CompoundStmts intervening between two constructs +static const Stmt *ignoreCompoundStmts(const Stmt *Body) { + while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) + Body = CS->body_front(); + + return Body; +} + +/// \brief Emit the num_teams clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no num_teams clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any num_teams clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the number of teams to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( + ignoreCompoundStmts(CS.getCapturedStmt()))) { + if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); + return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // If we have an enclosed teams directive but no num_teams clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + +/// \brief Emit the thread_limit clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no thread_limit clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any thread_limit clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the thread limit to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( + ignoreCompoundStmts(CS.getCapturedStmt()))) { + if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); + return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // If we have an enclosed teams directive but no thread_limit clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + +namespace { +// \brief Utility to handle information from clauses associated with a given +// construct that use mappable expressions (e.g. 'map' clause, 'to' clause). +// It provides a convenient interface to obtain the information and generate +// code for that information. +class MappableExprsHandler { +public: /// \brief Values for bit flags used to specify the mapping type for /// offloading. enum OpenMPOffloadMappingFlags { @@ -3832,105 +4967,806 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OMP_MAP_TO = 0x01, /// \brief Allocate memory on the device and move data from device to host. OMP_MAP_FROM = 0x02, - /// \brief The element passed to the device is a pointer. - OMP_MAP_PTR = 0x20, + /// \brief Always perform the requested mapping action on the element, even + /// if it was already mapped before. + OMP_MAP_ALWAYS = 0x04, + /// \brief Delete the element from the device environment, ignoring the + /// current reference count associated with the element. + OMP_MAP_DELETE = 0x08, + /// \brief The element being mapped is a pointer, therefore the pointee + /// should be mapped as well. + OMP_MAP_IS_PTR = 0x10, + /// \brief This flags signals that an argument is the first one relating to + /// a map/private clause expression. For some cases a single + /// map/privatization results in multiple arguments passed to the runtime + /// library. + OMP_MAP_FIRST_REF = 0x20, + /// \brief This flag signals that the reference being passed is a pointer to + /// private data. + OMP_MAP_PRIVATE_PTR = 0x80, /// \brief Pass the element to the device by value. - OMP_MAP_BYCOPY = 0x80, + OMP_MAP_PRIVATE_VAL = 0x100, }; - enum OpenMPOffloadingReservedDeviceIDs { - /// \brief Device ID if the device was not defined, runtime should get it - /// from environment variables in the spec. - OMP_DEVICEID_UNDEF = -1, - }; + typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; + typedef SmallVector<unsigned, 16> MapFlagsArrayTy; + +private: + /// \brief Directive from where the map clauses were extracted. + const OMPExecutableDirective &Directive; + + /// \brief Function the directive is being generated for. + CodeGenFunction &CGF; + + /// \brief Set of all first private variables in the current directive. + llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; + + llvm::Value *getExprTypeSize(const Expr *E) const { + auto ExprTy = E->getType().getCanonicalType(); + + // Reference types are ignored for mapping purposes. + if (auto *RefTy = ExprTy->getAs<ReferenceType>()) + ExprTy = RefTy->getPointeeType().getCanonicalType(); + + // Given that an array section is considered a built-in type, we need to + // do the calculation based on the length of the section instead of relying + // on CGF.getTypeSize(E->getType()). + if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { + QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( + OAE->getBase()->IgnoreParenImpCasts()) + .getCanonicalType(); + + // If there is no length associated with the expression, that means we + // are using the whole length of the base. + if (!OAE->getLength() && OAE->getColonLoc().isValid()) + return CGF.getTypeSize(BaseTy); + + llvm::Value *ElemSize; + if (auto *PTy = BaseTy->getAs<PointerType>()) + ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); + else { + auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); + assert(ATy && "Expecting array type if not a pointer type."); + ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); + } + + // If we don't have a length at this point, that is because we have an + // array section with a single element. + if (!OAE->getLength()) + return ElemSize; + + auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); + LengthVal = + CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); + return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + } + return CGF.getTypeSize(ExprTy); + } + + /// \brief Return the corresponding bits for a given map clause modifier. Add + /// a flag marking the map as a pointer if requested. Add a flag marking the + /// map as the first one of a series of maps that relate to the same map + /// expression. + unsigned getMapTypeBits(OpenMPMapClauseKind MapType, + OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, + bool AddIsFirstFlag) const { + unsigned Bits = 0u; + switch (MapType) { + case OMPC_MAP_alloc: + case OMPC_MAP_release: + // alloc and release is the default behavior in the runtime library, i.e. + // if we don't pass any bits alloc/release that is what the runtime is + // going to do. Therefore, we don't need to signal anything for these two + // type modifiers. + break; + case OMPC_MAP_to: + Bits = OMP_MAP_TO; + break; + case OMPC_MAP_from: + Bits = OMP_MAP_FROM; + break; + case OMPC_MAP_tofrom: + Bits = OMP_MAP_TO | OMP_MAP_FROM; + break; + case OMPC_MAP_delete: + Bits = OMP_MAP_DELETE; + break; + default: + llvm_unreachable("Unexpected map type!"); + break; + } + if (AddPtrFlag) + Bits |= OMP_MAP_IS_PTR; + if (AddIsFirstFlag) + Bits |= OMP_MAP_FIRST_REF; + if (MapTypeModifier == OMPC_MAP_always) + Bits |= OMP_MAP_ALWAYS; + return Bits; + } + + /// \brief Return true if the provided expression is a final array section. A + /// final array section, is one whose length can't be proved to be one. + bool isFinalArraySectionExpression(const Expr *E) const { + auto *OASE = dyn_cast<OMPArraySectionExpr>(E); + + // It is not an array section and therefore not a unity-size one. + if (!OASE) + return false; + + // An array section with no colon always refer to a single element. + if (OASE->getColonLoc().isInvalid()) + return false; + + auto *Length = OASE->getLength(); + + // If we don't have a length we have to check if the array has size 1 + // for this dimension. Also, we should always expect a length if the + // base type is pointer. + if (!Length) { + auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( + OASE->getBase()->IgnoreParenImpCasts()) + .getCanonicalType(); + if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) + return ATy->getSize().getSExtValue() != 1; + // If we don't have a constant dimension length, we have to consider + // the current section as having any size, so it is not necessarily + // unitary. If it happen to be unity size, that's user fault. + return true; + } + + // Check if the length evaluates to 1. + llvm::APSInt ConstLength; + if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) + return true; // Can have more that size 1. + + return ConstLength.getSExtValue() != 1; + } + + /// \brief Generate the base pointers, section pointers, sizes and map type + /// bits for the provided map type, map modifier, and expression components. + /// \a IsFirstComponent should be set to true if the provided set of + /// components is the first associated with a capture. + void generateInfoForComponentList( + OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + bool IsFirstComponentList) const { + + // The following summarizes what has to be generated for each map and the + // types bellow. The generated information is expressed in this order: + // base pointer, section pointer, size, flags + // (to add to the ones that come from the map type and modifier). + // + // double d; + // int i[100]; + // float *p; + // + // struct S1 { + // int i; + // float f[50]; + // } + // struct S2 { + // int i; + // float f[50]; + // S1 s; + // double *p; + // struct S2 *ps; + // } + // S2 s; + // S2 *ps; + // + // map(d) + // &d, &d, sizeof(double), noflags + // + // map(i) + // &i, &i, 100*sizeof(int), noflags + // + // map(i[1:23]) + // &i(=&i[0]), &i[1], 23*sizeof(int), noflags + // + // map(p) + // &p, &p, sizeof(float*), noflags + // + // map(p[1:24]) + // p, &p[1], 24*sizeof(float), noflags + // + // map(s) + // &s, &s, sizeof(S2), noflags + // + // map(s.i) + // &s, &(s.i), sizeof(int), noflags + // + // map(s.s.f) + // &s, &(s.i.f), 50*sizeof(int), noflags + // + // map(s.p) + // &s, &(s.p), sizeof(double*), noflags + // + // map(s.p[:22], s.a s.b) + // &s, &(s.p), sizeof(double*), noflags + // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag + // + // map(s.ps) + // &s, &(s.ps), sizeof(S2*), noflags + // + // map(s.ps->s.i) + // &s, &(s.ps), sizeof(S2*), noflags + // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag + // + // map(s.ps->ps) + // &s, &(s.ps), sizeof(S2*), noflags + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag + // + // map(s.ps->ps->ps) + // &s, &(s.ps), sizeof(S2*), noflags + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // + // map(s.ps->ps->s.f[:22]) + // &s, &(s.ps), sizeof(S2*), noflags + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag + // + // map(ps) + // &ps, &ps, sizeof(S2*), noflags + // + // map(ps->i) + // ps, &(ps->i), sizeof(int), noflags + // + // map(ps->s.f) + // ps, &(ps->s.f[0]), 50*sizeof(float), noflags + // + // map(ps->p) + // ps, &(ps->p), sizeof(double*), noflags + // + // map(ps->p[:22]) + // ps, &(ps->p), sizeof(double*), noflags + // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag + // + // map(ps->ps) + // ps, &(ps->ps), sizeof(S2*), noflags + // + // map(ps->ps->s.i) + // ps, &(ps->ps), sizeof(S2*), noflags + // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag + // + // map(ps->ps->ps) + // ps, &(ps->ps), sizeof(S2*), noflags + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // + // map(ps->ps->ps->ps) + // ps, &(ps->ps), sizeof(S2*), noflags + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // + // map(ps->ps->ps->s.f[:22]) + // ps, &(ps->ps), sizeof(S2*), noflags + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + + // extra_flag + + // Track if the map information being generated is the first for a capture. + bool IsCaptureFirstInfo = IsFirstComponentList; + + // Scan the components from the base to the complete expression. + auto CI = Components.rbegin(); + auto CE = Components.rend(); + auto I = CI; + + // Track if the map information being generated is the first for a list of + // components. + bool IsExpressionFirstInfo = true; + llvm::Value *BP = nullptr; + + if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { + // The base is the 'this' pointer. The content of the pointer is going + // to be the base of the field being mapped. + BP = CGF.EmitScalarExpr(ME->getBase()); + } else { + // The base is the reference to the variable. + // BP = &Var. + BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) + .getPointer(); + + // If the variable is a pointer and is being dereferenced (i.e. is not + // the last component), the base has to be the pointer itself, not its + // reference. + if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() && + std::next(I) != CE) { + auto PtrAddr = CGF.MakeNaturalAlignAddrLValue( + BP, I->getAssociatedDeclaration()->getType()); + BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), + I->getAssociatedDeclaration() + ->getType() + ->getAs<PointerType>()) + .getPointer(); + + // We do not need to generate individual map information for the + // pointer, it can be associated with the combined storage. + ++I; + } + } + + for (; I != CE; ++I) { + auto Next = std::next(I); + + // We need to generate the addresses and sizes if this is the last + // component, if the component is a pointer or if it is an array section + // whose length can't be proved to be one. If this is a pointer, it + // becomes the base address for the following components. + + // A final array section, is one whose length can't be proved to be one. + bool IsFinalArraySection = + isFinalArraySectionExpression(I->getAssociatedExpression()); + + // Get information on whether the element is a pointer. Have to do a + // special treatment for array sections given that they are built-in + // types. + const auto *OASE = + dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); + bool IsPointer = + (OASE && + OMPArraySectionExpr::getBaseOriginalType(OASE) + .getCanonicalType() + ->isAnyPointerType()) || + I->getAssociatedExpression()->getType()->isAnyPointerType(); + + if (Next == CE || IsPointer || IsFinalArraySection) { + + // If this is not the last component, we expect the pointer to be + // associated with an array expression or member expression. + assert((Next == CE || + isa<MemberExpr>(Next->getAssociatedExpression()) || + isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || + isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && + "Unexpected expression"); + + // Save the base we are currently using. + BasePointers.push_back(BP); + + auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); + auto *Size = getExprTypeSize(I->getAssociatedExpression()); + + Pointers.push_back(LB); + Sizes.push_back(Size); + // We need to add a pointer flag for each map that comes from the + // same expression except for the first one. We also need to signal + // this map is the first one that relates with the current capture + // (there is a set of entries for each capture). + Types.push_back(getMapTypeBits(MapType, MapTypeModifier, + !IsExpressionFirstInfo, + IsCaptureFirstInfo)); + + // If we have a final array section, we are done with this expression. + if (IsFinalArraySection) + break; + + // The pointer becomes the base for the next element. + if (Next != CE) + BP = LB; + + IsExpressionFirstInfo = false; + IsCaptureFirstInfo = false; + continue; + } + } + } + + /// \brief Return the adjusted map modifiers if the declaration a capture + /// refers to appears in a first-private clause. This is expected to be used + /// only with directives that start with 'target'. + unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, + unsigned CurrentModifiers) { + assert(Cap.capturesVariable() && "Expected capture by reference only!"); + + // A first private variable captured by reference will use only the + // 'private ptr' and 'map to' flag. Return the right flags if the captured + // declaration is known as first-private in this handler. + if (FirstPrivateDecls.count(Cap.getCapturedVar())) + return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | + MappableExprsHandler::OMP_MAP_TO; + + // We didn't modify anything. + return CurrentModifiers; + } + +public: + MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) + : Directive(Dir), CGF(CGF) { + // Extract firstprivate clause information. + for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) + for (const auto *D : C->varlists()) + FirstPrivateDecls.insert( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + } + + /// \brief Generate all the base pointers, section pointers, sizes and map + /// types for the extracted mappable expressions. + void generateAllInfo(MapValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + BasePointers.clear(); + Pointers.clear(); + Sizes.clear(); + Types.clear(); + + struct MapInfo { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + OpenMPMapClauseKind MapTypeModifier; + }; + + // We have to process the component lists that relate with the same + // declaration in a single chunk so that we can generate the map flags + // correctly. Therefore, we organize all lists in a map. + llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + + // Helper function to fill the information map for the different supported + // clauses. + auto &&InfoGen = + [&Info](const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) { + const ValueDecl *VD = + D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; + Info[VD].push_back({L, MapType, MapModifier}); + }; + + for (auto *C : Directive.getClausesOfKind<OMPMapClause>()) + for (auto L : C->component_lists()) + InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier()); + for (auto *C : Directive.getClausesOfKind<OMPToClause>()) + for (auto L : C->component_lists()) + InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown); + for (auto *C : Directive.getClausesOfKind<OMPFromClause>()) + for (auto L : C->component_lists()) + InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown); + + for (auto &M : Info) { + // We need to know when we generate information for the first component + // associated with a capture, because the mapping flags depend on it. + bool IsFirstComponentList = true; + for (MapInfo &L : M.second) { + assert(!L.Components.empty() && + "Not expecting declaration with no component lists."); + generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components, + BasePointers, Pointers, Sizes, Types, + IsFirstComponentList); + IsFirstComponentList = false; + } + } + } + + /// \brief Generate the base pointers, section pointers, sizes and map types + /// associated to a given capture. + void generateInfoForCapture(const CapturedStmt::Capture *Cap, + MapValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + assert(!Cap->capturesVariableArrayType() && + "Not expecting to generate map info for a variable array type!"); + + BasePointers.clear(); + Pointers.clear(); + Sizes.clear(); + Types.clear(); + + const ValueDecl *VD = + Cap->capturesThis() + ? nullptr + : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); + + // We need to know when we generating information for the first component + // associated with a capture, because the mapping flags depend on it. + bool IsFirstComponentList = true; + for (auto *C : Directive.getClausesOfKind<OMPMapClause>()) + for (auto L : C->decl_component_lists(VD)) { + assert(L.first == VD && + "We got information for the wrong declaration??"); + assert(!L.second.empty() && + "Not expecting declaration with no component lists."); + generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), + L.second, BasePointers, Pointers, Sizes, + Types, IsFirstComponentList); + IsFirstComponentList = false; + } + + return; + } + + /// \brief Generate the default map information for a given capture \a CI, + /// record field declaration \a RI and captured value \a CV. + void generateDefaultMapInfo( + const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV, + MappableExprsHandler::MapValuesArrayTy &CurBasePointers, + MappableExprsHandler::MapValuesArrayTy &CurPointers, + MappableExprsHandler::MapValuesArrayTy &CurSizes, + MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) { + + // Do the default mapping. + if (CI.capturesThis()) { + CurBasePointers.push_back(CV); + CurPointers.push_back(CV); + const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); + CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); + // Default map type. + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM); + } else if (CI.capturesVariableByCopy()) { + CurBasePointers.push_back(CV); + CurPointers.push_back(CV); + if (!RI.getType()->isAnyPointerType()) { + // We have to signal to the runtime captures passed by value that are + // not pointers. + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL); + CurSizes.push_back(CGF.getTypeSize(RI.getType())); + } else { + // Pointers are implicitly mapped with a zero size and no flags + // (other than first map that is added for all implicit maps). + CurMapTypes.push_back(0u); + CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); + } + } else { + assert(CI.capturesVariable() && "Expected captured reference."); + CurBasePointers.push_back(CV); + CurPointers.push_back(CV); + + const ReferenceType *PtrTy = + cast<ReferenceType>(RI.getType().getTypePtr()); + QualType ElementType = PtrTy->getPointeeType(); + CurSizes.push_back(CGF.getTypeSize(ElementType)); + // The default map type for a scalar/complex type is 'to' because by + // default the value doesn't have to be retrieved. For an aggregate + // type, the default is 'tofrom'. + CurMapTypes.push_back(ElementType->isAggregateType() + ? (MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM) + : MappableExprsHandler::OMP_MAP_TO); + + // If we have a capture by reference we may need to add the private + // pointer flag if the base declaration shows in some first-private + // clause. + CurMapTypes.back() = + adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); + } + // Every default map produces a single argument, so, it is always the + // first one. + CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF; + } +}; + +enum OpenMPOffloadingReservedDeviceIDs { + /// \brief Device ID if the device was not defined, runtime should get it + /// from environment variables in the spec. + OMP_DEVICEID_UNDEF = -1, +}; +} // anonymous namespace + +/// \brief Emit the arrays used to pass the captures and map information to the +/// offloading runtime library. If there is no map or capture information, +/// return nullptr by reference. +static void +emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray, + llvm::Value *&PointersArray, llvm::Value *&SizesArray, + llvm::Value *&MapTypesArray, + MappableExprsHandler::MapValuesArrayTy &BasePointers, + MappableExprsHandler::MapValuesArrayTy &Pointers, + MappableExprsHandler::MapValuesArrayTy &Sizes, + MappableExprsHandler::MapFlagsArrayTy &MapTypes) { + auto &CGM = CGF.CGM; + auto &Ctx = CGF.getContext(); + + BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr; + + if (unsigned PointerNumVal = BasePointers.size()) { + // Detect if we have any capture size requiring runtime evaluation of the + // size so that a constant array could be eventually used. + bool hasRuntimeEvaluationCaptureSize = false; + for (auto *S : Sizes) + if (!isa<llvm::Constant>(S)) { + hasRuntimeEvaluationCaptureSize = true; + break; + } + + llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); + QualType PointerArrayType = + Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, + /*IndexTypeQuals=*/0); + + BasePointersArray = + CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); + PointersArray = + CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); + + // If we don't have any VLA types or other types that require runtime + // evaluation, we can use a constant array for the map sizes, otherwise we + // need to fill up the arrays as we do for the pointers. + if (hasRuntimeEvaluationCaptureSize) { + QualType SizeArrayType = Ctx.getConstantArrayType( + Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, + /*IndexTypeQuals=*/0); + SizesArray = + CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); + } else { + // We expect all the sizes to be constant, so we collect them to create + // a constant array. + SmallVector<llvm::Constant *, 16> ConstSizes; + for (auto S : Sizes) + ConstSizes.push_back(cast<llvm::Constant>(S)); + + auto *SizesArrayInit = llvm::ConstantArray::get( + llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); + auto *SizesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), SizesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + SizesArrayInit, ".offload_sizes"); + SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + SizesArray = SizesArrayGbl; + } + + // The map types are always constant so we don't need to generate code to + // fill arrays. Instead, we create an array constant. + llvm::Constant *MapTypesArrayInit = + llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); + auto *MapTypesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), MapTypesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + MapTypesArrayInit, ".offload_maptypes"); + MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + MapTypesArray = MapTypesArrayGbl; + + for (unsigned i = 0; i < PointerNumVal; ++i) { + llvm::Value *BPVal = BasePointers[i]; + if (BPVal->getType()->isPointerTy()) + BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); + else { + assert(BPVal->getType()->isIntegerTy() && + "If not a pointer, the value type must be an integer."); + BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); + } + llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, + 0, i); + Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); + CGF.Builder.CreateStore(BPVal, BPAddr); + + llvm::Value *PVal = Pointers[i]; + if (PVal->getType()->isPointerTy()) + PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); + else { + assert(PVal->getType()->isIntegerTy() && + "If not a pointer, the value type must be an integer."); + PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); + } + llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, + i); + Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); + CGF.Builder.CreateStore(PVal, PAddr); + + if (hasRuntimeEvaluationCaptureSize) { + llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, + /*Idx0=*/0, + /*Idx1=*/i); + Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); + CGF.Builder.CreateStore( + CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), + SAddr); + } + } + } +} +/// \brief Emit the arguments to be passed to the runtime library based on the +/// arrays of pointers, sizes and map types. +static void emitOffloadingArraysArgument( + CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, + llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, + llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray, + llvm::Value *PointersArray, llvm::Value *SizesArray, + llvm::Value *MapTypesArray, unsigned NumElems) { + auto &CGM = CGF.CGM; + if (NumElems) { + BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray, + /*Idx0=*/0, /*Idx1=*/0); + PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray, + /*Idx0=*/0, + /*Idx1=*/0); + SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray, + /*Idx0=*/0, /*Idx1=*/0); + MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray, + /*Idx0=*/0, + /*Idx1=*/0); + } else { + BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); + MapTypesArrayArg = + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); + } +} + +void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, + llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device, + ArrayRef<llvm::Value *> CapturedVars) { + if (!CGF.HaveInsertPoint()) + return; assert(OutlinedFn && "Invalid outlined function!"); auto &Ctx = CGF.getContext(); - // Fill up the arrays with the all the captured variables. - SmallVector<llvm::Value *, 16> BasePointers; - SmallVector<llvm::Value *, 16> Pointers; - SmallVector<llvm::Value *, 16> Sizes; - SmallVector<unsigned, 16> MapTypes; + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapValuesArrayTy KernelArgs; + MappableExprsHandler::MapValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; - bool hasVLACaptures = false; + MappableExprsHandler::MapValuesArrayTy CurBasePointers; + MappableExprsHandler::MapValuesArrayTy CurPointers; + MappableExprsHandler::MapValuesArrayTy CurSizes; + MappableExprsHandler::MapFlagsArrayTy CurMapTypes; + + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); auto RI = CS.getCapturedRecordDecl()->field_begin(); - // auto II = CS.capture_init_begin(); auto CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); CI != CE; ++CI, ++RI, ++CV) { StringRef Name; QualType Ty; - llvm::Value *BasePointer; - llvm::Value *Pointer; - llvm::Value *Size; - unsigned MapType; - // VLA sizes are passed to the outlined region by copy. + CurBasePointers.clear(); + CurPointers.clear(); + CurSizes.clear(); + CurMapTypes.clear(); + + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. if (CI->capturesVariableArrayType()) { - BasePointer = Pointer = *CV; - Size = getTypeSize(CGF, RI->getType()); + CurBasePointers.push_back(*CV); + CurPointers.push_back(*CV); + CurSizes.push_back(CGF.getTypeSize(RI->getType())); // Copy to the device as an argument. No need to retrieve it. - MapType = OMP_MAP_BYCOPY; - hasVLACaptures = true; - } else if (CI->capturesThis()) { - BasePointer = Pointer = *CV; - const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr()); - Size = getTypeSize(CGF, PtrTy->getPointeeType()); - // Default map type. - MapType = OMP_MAP_TO | OMP_MAP_FROM; - } else if (CI->capturesVariableByCopy()) { - MapType = OMP_MAP_BYCOPY; - if (!RI->getType()->isAnyPointerType()) { - // If the field is not a pointer, we need to save the actual value and - // load it as a void pointer. - auto DstAddr = CGF.CreateMemTemp( - Ctx.getUIntPtrType(), - Twine(CI->getCapturedVar()->getName()) + ".casted"); - LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); - - auto *SrcAddrVal = CGF.EmitScalarConversion( - DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), - Ctx.getPointerType(RI->getType()), SourceLocation()); - LValue SrcLV = - CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); - - // Store the value using the source type pointer. - CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); - - // Load the value using the destination type pointer. - BasePointer = Pointer = - CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); - } else { - MapType |= OMP_MAP_PTR; - BasePointer = Pointer = *CV; - } - Size = getTypeSize(CGF, RI->getType()); + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | + MappableExprsHandler::OMP_MAP_FIRST_REF); } else { - assert(CI->capturesVariable() && "Expected captured reference."); - BasePointer = Pointer = *CV; - - const ReferenceType *PtrTy = - cast<ReferenceType>(RI->getType().getTypePtr()); - QualType ElementType = PtrTy->getPointeeType(); - Size = getTypeSize(CGF, ElementType); - // The default map type for a scalar/complex type is 'to' because by - // default the value doesn't have to be retrieved. For an aggregate type, - // the default is 'tofrom'. - MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) - : OMP_MAP_TO; - if (ElementType->isAnyPointerType()) - MapType |= OMP_MAP_PTR; + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers, + CurSizes, CurMapTypes); + if (CurBasePointers.empty()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, + CurPointers, CurSizes, CurMapTypes); } - - BasePointers.push_back(BasePointer); - Pointers.push_back(Pointer); - Sizes.push_back(Size); - MapTypes.push_back(MapType); + // We expect to have at least an element of information for this capture. + assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); + assert(CurBasePointers.size() == CurPointers.size() && + CurBasePointers.size() == CurSizes.size() && + CurBasePointers.size() == CurMapTypes.size() && + "Inconsistent map information sizes!"); + + // The kernel args are always the first elements of the base pointers + // associated with a capture. + KernelArgs.push_back(CurBasePointers.front()); + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } // Keep track on whether the host function has to be executed. @@ -3943,128 +5779,22 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OffloadError); // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, - hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { - unsigned PointerNumVal = BasePointers.size(); - llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); + auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, + OutlinedFnID, OffloadError, OffloadErrorQType, + &D](CodeGenFunction &CGF, PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); + // Emit the offloading arrays. llvm::Value *BasePointersArray; llvm::Value *PointersArray; llvm::Value *SizesArray; llvm::Value *MapTypesArray; - - if (PointerNumVal) { - llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); - QualType PointerArrayType = Ctx.getConstantArrayType( - Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); - - BasePointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); - PointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); - - // If we don't have any VLA types, we can use a constant array for the map - // sizes, otherwise we need to fill up the arrays as we do for the - // pointers. - if (hasVLACaptures) { - QualType SizeArrayType = Ctx.getConstantArrayType( - Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); - SizesArray = - CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); - } else { - // We expect all the sizes to be constant, so we collect them to create - // a constant array. - SmallVector<llvm::Constant *, 16> ConstSizes; - for (auto S : Sizes) - ConstSizes.push_back(cast<llvm::Constant>(S)); - - auto *SizesArrayInit = llvm::ConstantArray::get( - llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); - auto *SizesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), SizesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - SizesArrayInit, ".offload_sizes"); - SizesArrayGbl->setUnnamedAddr(true); - SizesArray = SizesArrayGbl; - } - - // The map types are always constant so we don't need to generate code to - // fill arrays. Instead, we create an array constant. - llvm::Constant *MapTypesArrayInit = - llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); - auto *MapTypesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), MapTypesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - MapTypesArrayInit, ".offload_maptypes"); - MapTypesArrayGbl->setUnnamedAddr(true); - MapTypesArray = MapTypesArrayGbl; - - for (unsigned i = 0; i < PointerNumVal; ++i) { - - llvm::Value *BPVal = BasePointers[i]; - if (BPVal->getType()->isPointerTy()) - BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); - else { - assert(BPVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); - } - llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), - BasePointersArray, 0, i); - Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(BPVal, BPAddr); - - llvm::Value *PVal = Pointers[i]; - if (PVal->getType()->isPointerTy()) - PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); - else { - assert(PVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); - } - llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, - 0, i); - Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(PVal, PAddr); - - if (hasVLACaptures) { - llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, - /*Idx0=*/0, - /*Idx1=*/i); - Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); - CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( - Sizes[i], CGM.SizeTy, /*isSigned=*/true), - SAddr); - } - } - - BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, - /*Idx0=*/0, /*Idx1=*/0); - PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, - /*Idx0=*/0, - /*Idx1=*/0); - SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, - /*Idx0=*/0, /*Idx1=*/0); - MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, - /*Idx0=*/0, - /*Idx1=*/0); - - } else { - BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); - MapTypesArray = - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); - } + emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, + MapTypesArray, BasePointers, Pointers, Sizes, + MapTypes); + emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray, + SizesArray, MapTypesArray, BasePointersArray, + PointersArray, SizesArray, MapTypesArray, + BasePointers.size()); // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host @@ -4082,23 +5812,48 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *DeviceID; if (Device) DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGM.Int32Ty, /*isSigned=*/true); + CGF.Int32Ty, /*isSigned=*/true); else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; - auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + // Emit the number of elements in the offloading arrays. + llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + + // Return value of the runtime offloading call. + llvm::Value *Return; + + auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); + auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); + + // If we have NumTeams defined this means that we have an enclosed teams + // region. Therefore we also expect to have ThreadLimit defined. These two + // values should be defined in the presence of a teams directive, regardless + // of having any clauses associated. If the user is using teams but no + // clauses, these two values will be the default that should be passed to + // the runtime library - a 32-bit integer with the value zero. + if (NumTeams) { + assert(ThreadLimit && "Thread limit expression should be available along " + "with number of teams."); + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumTeams, ThreadLimit}; + Return = CGF.EmitRuntimeCall( + RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + } else { + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, BasePointersArray, + PointersArray, SizesArray, MapTypesArray}; + Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); + } CGF.EmitStoreOfScalar(Return, OffloadError); }; // Notify that the host version must be executed. - auto &&ElseGen = [this, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), + auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), OffloadError); }; @@ -4107,15 +5862,15 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); - ThenGen(CGF); + else { + RegionCodeGenTy ThenRCG(ThenGen); + ThenRCG(CGF); } } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); - ElseGen(CGF); + RegionCodeGenTy ElseRCG(ElseGen); + ElseRCG(CGF); } // Check the error code and execute the host version if required. @@ -4126,11 +5881,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - CGF.Builder.CreateCall(OutlinedFn, BasePointers); + CGF.Builder.CreateCall(OutlinedFn, KernelArgs); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); - return; } void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, @@ -4148,26 +5902,27 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, unsigned DeviceID; unsigned FileID; unsigned Line; - unsigned Column; getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, - FileID, Line, Column); + FileID, Line); // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. - if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo( - DeviceID, FileID, ParentName, Line, Column)) + if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, + ParentName, Line)) return; llvm::Function *Fn; llvm::Constant *Addr; - emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, - /*isOffloadEntry=*/true); + std::tie(Fn, Addr) = + CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( + CGM, cast<OMPTargetDirective>(*E), ParentName, + /*isOffloadEntry=*/true); assert(Fn && Addr && "Target region emission failed."); return; } if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { - if (!E->getAssociatedStmt()) + if (!E->hasAssociatedStmt()) return; scanForTargetRegionsFunctions( @@ -4183,8 +5938,6 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, // Keep looking for target regions recursively. for (auto *II : S->children()) scanForTargetRegionsFunctions(II, ParentName); - - return; } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { @@ -4249,3 +6002,594 @@ llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // compilation unit. return createOffloadingBinaryDescriptorRegistration(); } + +void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) { + if (!CGF.HaveInsertPoint()) + return; + + auto *RTLoc = emitUpdateLocation(CGF, Loc); + CodeGenFunction::RunCleanupsScope Scope(CGF); + + // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); + llvm::Value *Args[] = { + RTLoc, + CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars + CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; + llvm::SmallVector<llvm::Value *, 16> RealArgs; + RealArgs.append(std::begin(Args), std::end(Args)); + RealArgs.append(CapturedVars.begin(), CapturedVars.end()); + + auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); + CGF.EmitRuntimeCall(RTLFn, RealArgs); +} + +void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, + const Expr *NumTeams, + const Expr *ThreadLimit, + SourceLocation Loc) { + if (!CGF.HaveInsertPoint()) + return; + + auto *RTLoc = emitUpdateLocation(CGF, Loc); + + llvm::Value *NumTeamsVal = + (NumTeams) + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), + CGF.CGM.Int32Ty, /* isSigned = */ true) + : CGF.Builder.getInt32(0); + + llvm::Value *ThreadLimitVal = + (ThreadLimit) + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), + CGF.CGM.Int32Ty, /* isSigned = */ true) + : CGF.Builder.getInt32(0); + + // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) + llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, + ThreadLimitVal}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), + PushNumTeamsArgs); +} + +void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const Expr *IfCond, + const Expr *Device, + const RegionCodeGenTy &CodeGen) { + + if (!CGF.HaveInsertPoint()) + return; + + llvm::Value *BasePointersArray = nullptr; + llvm::Value *PointersArray = nullptr; + llvm::Value *SizesArray = nullptr; + llvm::Value *MapTypesArray = nullptr; + unsigned NumOfPtrs = 0; + + // Generate the code for the opening of the data environment. Capture all the + // arguments of the runtime call by reference because they are used in the + // closing of the region. + auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray, + &SizesArray, &MapTypesArray, Device, + &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + + // Get map clause information. + MappableExprsHandler MCHandler(D, CGF); + MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + NumOfPtrs = BasePointers.size(); + + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, + MapTypesArray, BasePointers, Pointers, Sizes, + MapTypes); + + llvm::Value *BasePointersArrayArg = nullptr; + llvm::Value *PointersArrayArg = nullptr; + llvm::Value *SizesArrayArg = nullptr; + llvm::Value *MapTypesArrayArg = nullptr; + emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, + SizesArrayArg, MapTypesArrayArg, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumOfPtrs); + + // Emit device ID if any. + llvm::Value *DeviceID = nullptr; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int32Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + + // Emit the number of elements in the offloading arrays. + auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); + + llvm::Value *OffloadingArgs[] = { + DeviceID, PointerNum, BasePointersArrayArg, + PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; + auto &RT = CGF.CGM.getOpenMPRuntime(); + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), + OffloadingArgs); + }; + + // Generate code for the closing of the data region. + auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray, + &MapTypesArray, Device, + &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { + assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray && + NumOfPtrs && "Invalid data environment closing arguments."); + + llvm::Value *BasePointersArrayArg = nullptr; + llvm::Value *PointersArrayArg = nullptr; + llvm::Value *SizesArrayArg = nullptr; + llvm::Value *MapTypesArrayArg = nullptr; + emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, + SizesArrayArg, MapTypesArrayArg, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumOfPtrs); + + // Emit device ID if any. + llvm::Value *DeviceID = nullptr; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int32Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + + // Emit the number of elements in the offloading arrays. + auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); + + llvm::Value *OffloadingArgs[] = { + DeviceID, PointerNum, BasePointersArrayArg, + PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; + auto &RT = CGF.CGM.getOpenMPRuntime(); + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), + OffloadingArgs); + }; + + // In the event we get an if clause, we don't have to take any action on the + // else side. + auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; + + if (IfCond) { + emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen); + } else { + RegionCodeGenTy BeginThenRCG(BeginThenGen); + BeginThenRCG(CGF); + } + + CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen); + + if (IfCond) { + emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen); + } else { + RegionCodeGenTy EndThenRCG(EndThenGen); + EndThenRCG(CGF); + } +} + +void CGOpenMPRuntime::emitTargetDataStandAloneCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, + const Expr *Device) { + if (!CGF.HaveInsertPoint()) + return; + + assert((isa<OMPTargetEnterDataDirective>(D) || + isa<OMPTargetExitDataDirective>(D) || + isa<OMPTargetUpdateDirective>(D)) && + "Expecting either target enter, exit data, or update directives."); + + // Generate the code for the opening of the data environment. + auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + + // Get map clause information. + MappableExprsHandler MEHandler(D, CGF); + MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + + llvm::Value *BasePointersArrayArg = nullptr; + llvm::Value *PointersArrayArg = nullptr; + llvm::Value *SizesArrayArg = nullptr; + llvm::Value *MapTypesArrayArg = nullptr; + + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg, + SizesArrayArg, MapTypesArrayArg, BasePointers, + Pointers, Sizes, MapTypes); + emitOffloadingArraysArgument( + CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, + MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, + MapTypesArrayArg, BasePointers.size()); + + // Emit device ID if any. + llvm::Value *DeviceID = nullptr; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int32Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + + // Emit the number of elements in the offloading arrays. + auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + + llvm::Value *OffloadingArgs[] = { + DeviceID, PointerNum, BasePointersArrayArg, + PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; + + auto &RT = CGF.CGM.getOpenMPRuntime(); + // Select the right runtime function call for each expected standalone + // directive. + OpenMPRTLFunction RTLFn; + switch (D.getDirectiveKind()) { + default: + llvm_unreachable("Unexpected standalone target data directive."); + break; + case OMPD_target_enter_data: + RTLFn = OMPRTL__tgt_target_data_begin; + break; + case OMPD_target_exit_data: + RTLFn = OMPRTL__tgt_target_data_end; + break; + case OMPD_target_update: + RTLFn = OMPRTL__tgt_target_data_update; + break; + } + CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); + }; + + // In the event we get an if clause, we don't have to take any action on the + // else side. + auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; + + if (IfCond) { + emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); + } else { + RegionCodeGenTy ThenGenRCG(ThenGen); + ThenGenRCG(CGF); + } +} + +namespace { + /// Kind of parameter in a function with 'declare simd' directive. + enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; + /// Attribute set of the parameter. + struct ParamAttrTy { + ParamKindTy Kind = Vector; + llvm::APSInt StrideOrArg; + llvm::APSInt Alignment; + }; +} // namespace + +static unsigned evaluateCDTSize(const FunctionDecl *FD, + ArrayRef<ParamAttrTy> ParamAttrs) { + // Every vector variant of a SIMD-enabled function has a vector length (VLEN). + // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument + // of that clause. The VLEN value must be power of 2. + // In other case the notion of the function`s "characteristic data type" (CDT) + // is used to compute the vector length. + // CDT is defined in the following order: + // a) For non-void function, the CDT is the return type. + // b) If the function has any non-uniform, non-linear parameters, then the + // CDT is the type of the first such parameter. + // c) If the CDT determined by a) or b) above is struct, union, or class + // type which is pass-by-value (except for the type that maps to the + // built-in complex data type), the characteristic data type is int. + // d) If none of the above three cases is applicable, the CDT is int. + // The VLEN is then determined based on the CDT and the size of vector + // register of that ISA for which current vector version is generated. The + // VLEN is computed using the formula below: + // VLEN = sizeof(vector_register) / sizeof(CDT), + // where vector register size specified in section 3.2.1 Registers and the + // Stack Frame of original AMD64 ABI document. + QualType RetType = FD->getReturnType(); + if (RetType.isNull()) + return 0; + ASTContext &C = FD->getASTContext(); + QualType CDT; + if (!RetType.isNull() && !RetType->isVoidType()) + CDT = RetType; + else { + unsigned Offset = 0; + if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { + if (ParamAttrs[Offset].Kind == Vector) + CDT = C.getPointerType(C.getRecordType(MD->getParent())); + ++Offset; + } + if (CDT.isNull()) { + for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { + if (ParamAttrs[I + Offset].Kind == Vector) { + CDT = FD->getParamDecl(I)->getType(); + break; + } + } + } + } + if (CDT.isNull()) + CDT = C.IntTy; + CDT = CDT->getCanonicalTypeUnqualified(); + if (CDT->isRecordType() || CDT->isUnionType()) + CDT = C.IntTy; + return C.getTypeSize(CDT); +} + +static void +emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, + llvm::APSInt VLENVal, + ArrayRef<ParamAttrTy> ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State) { + struct ISADataTy { + char ISA; + unsigned VecRegSize; + }; + ISADataTy ISAData[] = { + { + 'b', 128 + }, // SSE + { + 'c', 256 + }, // AVX + { + 'd', 256 + }, // AVX2 + { + 'e', 512 + }, // AVX512 + }; + llvm::SmallVector<char, 2> Masked; + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + Masked.push_back('N'); + Masked.push_back('M'); + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + Masked.push_back('N'); + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + Masked.push_back('M'); + break; + } + for (auto Mask : Masked) { + for (auto &Data : ISAData) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << "_ZGV" << Data.ISA << Mask; + if (!VLENVal) { + Out << llvm::APSInt::getUnsigned(Data.VecRegSize / + evaluateCDTSize(FD, ParamAttrs)); + } else + Out << VLENVal; + for (auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind){ + case LinearWithVarStride: + Out << 's' << ParamAttr.StrideOrArg; + break; + case Linear: + Out << 'l'; + if (!!ParamAttr.StrideOrArg) + Out << ParamAttr.StrideOrArg; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + } + Out << '_' << Fn->getName(); + Fn->addFnAttr(Out.str()); + } + } +} + +void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, + llvm::Function *Fn) { + ASTContext &C = CGM.getContext(); + FD = FD->getCanonicalDecl(); + // Map params to their positions in function decl. + llvm::DenseMap<const Decl *, unsigned> ParamPositions; + if (isa<CXXMethodDecl>(FD)) + ParamPositions.insert({FD, 0}); + unsigned ParamPos = ParamPositions.size(); + for (auto *P : FD->parameters()) { + ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); + ++ParamPos; + } + for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { + llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); + // Mark uniform parameters. + for (auto *E : Attr->uniforms()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + if (isa<CXXThisExpr>(E)) + Pos = ParamPositions[FD]; + else { + auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + } + ParamAttrs[Pos].Kind = Uniform; + } + // Get alignment info. + auto NI = Attr->alignments_begin(); + for (auto *E : Attr->aligneds()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + QualType ParmTy; + if (isa<CXXThisExpr>(E)) { + Pos = ParamPositions[FD]; + ParmTy = E->getType(); + } else { + auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + ParmTy = PVD->getType(); + } + ParamAttrs[Pos].Alignment = + (*NI) ? (*NI)->EvaluateKnownConstInt(C) + : llvm::APSInt::getUnsigned( + C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) + .getQuantity()); + ++NI; + } + // Mark linear parameters. + auto SI = Attr->steps_begin(); + auto MI = Attr->modifiers_begin(); + for (auto *E : Attr->linears()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + if (isa<CXXThisExpr>(E)) + Pos = ParamPositions[FD]; + else { + auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + } + auto &ParamAttr = ParamAttrs[Pos]; + ParamAttr.Kind = Linear; + if (*SI) { + if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, + Expr::SE_AllowSideEffects)) { + if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { + if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { + ParamAttr.Kind = LinearWithVarStride; + ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( + ParamPositions[StridePVD->getCanonicalDecl()]); + } + } + } + } + ++SI; + ++MI; + } + llvm::APSInt VLENVal; + if (const Expr *VLEN = Attr->getSimdlen()) + VLENVal = VLEN->EvaluateKnownConstInt(C); + OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); + if (CGM.getTriple().getArch() == llvm::Triple::x86 || + CGM.getTriple().getArch() == llvm::Triple::x86_64) + emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + } +} + +namespace { +/// Cleanup action for doacross support. +class DoacrossCleanupTy final : public EHScopeStack::Cleanup { +public: + static const int DoacrossFinArgs = 2; + +private: + llvm::Value *RTLFn; + llvm::Value *Args[DoacrossFinArgs]; + +public: + DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) + : RTLFn(RTLFn) { + assert(CallArgs.size() == DoacrossFinArgs); + std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); + } + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + CGF.EmitRuntimeCall(RTLFn, Args); + } +}; +} // namespace + +void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, + const OMPLoopDirective &D) { + if (!CGF.HaveInsertPoint()) + return; + + ASTContext &C = CGM.getContext(); + QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + RecordDecl *RD; + if (KmpDimTy.isNull()) { + // Build struct kmp_dim { // loop bounds info casted to kmp_int64 + // kmp_int64 lo; // lower + // kmp_int64 up; // upper + // kmp_int64 st; // stride + // }; + RD = C.buildImplicitRecord("kmp_dim"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + RD->completeDefinition(); + KmpDimTy = C.getRecordType(RD); + } else + RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); + + Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); + CGF.EmitNullInitialization(DimsAddr, KmpDimTy); + enum { LowerFD = 0, UpperFD, StrideFD }; + // Fill dims with data. + LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); + // dims.upper = num_iterations; + LValue UpperLVal = + CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); + llvm::Value *NumIterVal = CGF.EmitScalarConversion( + CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), + Int64Ty, D.getNumIterations()->getExprLoc()); + CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); + // dims.stride = 1; + LValue StrideLVal = + CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); + CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), + StrideLVal); + + // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, + // kmp_int32 num_dims, struct kmp_dim * dims); + llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), + getThreadID(CGF, D.getLocStart()), + llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + DimsAddr.getPointer(), CGM.VoidPtrTy)}; + + llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); + CGF.EmitRuntimeCall(RTLFn, Args); + llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { + emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; + llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); + CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); +} + +void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C) { + QualType Int64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + const Expr *CounterVal = C->getCounterValue(); + assert(CounterVal); + llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), + CounterVal->getType(), Int64Ty, + CounterVal->getExprLoc()); + Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); + CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); + llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), + getThreadID(CGF, C->getLocStart()), + CntAddr.getPointer()}; + llvm::Value *RTLFn; + if (C->getDependencyKind() == OMPC_DEPEND_source) + RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); + else { + assert(C->getDependencyKind() == OMPC_DEPEND_sink); + RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); + } + CGF.EmitRuntimeCall(RTLFn, Args); +} + diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h index b325637..270de8d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -14,18 +14,19 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H +#include "CGValue.h" #include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/Function.h" #include "llvm/IR/ValueHandle.h" namespace llvm { class ArrayType; class Constant; -class Function; class FunctionType; class GlobalVariable; class StructType; @@ -36,222 +37,116 @@ class Value; namespace clang { class Expr; class GlobalDecl; +class OMPDependClause; class OMPExecutableDirective; +class OMPLoopDirective; class VarDecl; +class OMPDeclareReductionDecl; +class IdentifierInfo; namespace CodeGen { class Address; class CodeGenFunction; class CodeGenModule; -typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy; +/// A basic class for pre|post-action for advanced codegen sequence for OpenMP +/// region. +class PrePostActionTy { +public: + explicit PrePostActionTy() {} + virtual void Enter(CodeGenFunction &CGF) {} + virtual void Exit(CodeGenFunction &CGF) {} + virtual ~PrePostActionTy() {} +}; -class CGOpenMPRuntime { -private: - enum OpenMPRTLFunction { - /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, - /// kmpc_micro microtask, ...); - OMPRTL__kmpc_fork_call, - /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, - /// kmp_int32 global_tid, void *data, size_t size, void ***cache); - OMPRTL__kmpc_threadprivate_cached, - /// \brief Call to void __kmpc_threadprivate_register( ident_t *, - /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); - OMPRTL__kmpc_threadprivate_register, - // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); - OMPRTL__kmpc_global_thread_num, - // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *crit); - OMPRTL__kmpc_critical, - // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 - // global_tid, kmp_critical_name *crit, uintptr_t hint); - OMPRTL__kmpc_critical_with_hint, - // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *crit); - OMPRTL__kmpc_end_critical, - // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_cancel_barrier, - // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_barrier, - // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_for_static_fini, - // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_serialized_parallel, - // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_end_serialized_parallel, - // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 num_threads); - OMPRTL__kmpc_push_num_threads, - // Call to void __kmpc_flush(ident_t *loc); - OMPRTL__kmpc_flush, - // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_master, - // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_end_master, - // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, - // int end_part); - OMPRTL__kmpc_omp_taskyield, - // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_single, - // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_end_single, - // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, - // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, - // kmp_routine_entry_t *task_entry); - OMPRTL__kmpc_omp_task_alloc, - // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * - // new_task); - OMPRTL__kmpc_omp_task, - // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, - // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), - // kmp_int32 didit); - OMPRTL__kmpc_copyprivate, - // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void - // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); - OMPRTL__kmpc_reduce, - // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 - // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, - // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name - // *lck); - OMPRTL__kmpc_reduce_nowait, - // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *lck); - OMPRTL__kmpc_end_reduce, - // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *lck); - OMPRTL__kmpc_end_reduce_nowait, - // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, - // kmp_task_t * new_task); - OMPRTL__kmpc_omp_task_begin_if0, - // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, - // kmp_task_t * new_task); - OMPRTL__kmpc_omp_task_complete_if0, - // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_ordered, - // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_end_ordered, - // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_omp_taskwait, - // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_taskgroup, - // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_end_taskgroup, - // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, - // int proc_bind); - OMPRTL__kmpc_push_proc_bind, - // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 - // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t - // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); - OMPRTL__kmpc_omp_task_with_deps, - // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 - // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 - // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); - OMPRTL__kmpc_omp_wait_deps, - // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 - // global_tid, kmp_int32 cncl_kind); - OMPRTL__kmpc_cancellationpoint, - // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 cncl_kind); - OMPRTL__kmpc_cancel, - - // - // Offloading related calls - // - // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t - // *arg_types); - OMPRTL__tgt_target, - // Call to void __tgt_register_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_register_lib, - // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_unregister_lib, - }; +/// Class provides a way to call simple version of codegen for OpenMP region, or +/// an advanced with possible pre|post-actions in codegen. +class RegionCodeGenTy final { + intptr_t CodeGen; + typedef void (*CodeGenTy)(intptr_t, CodeGenFunction &, PrePostActionTy &); + CodeGenTy Callback; + mutable PrePostActionTy *PrePostAction; + RegionCodeGenTy() = delete; + RegionCodeGenTy &operator=(const RegionCodeGenTy &) = delete; + template <typename Callable> + static void CallbackFn(intptr_t CodeGen, CodeGenFunction &CGF, + PrePostActionTy &Action) { + return (*reinterpret_cast<Callable *>(CodeGen))(CGF, Action); + } - /// \brief Values for bit flags used in the ident_t to describe the fields. - /// All enumeric elements are named and described in accordance with the code - /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h - enum OpenMPLocationFlags { - /// \brief Use trampoline for internal microtask. - OMP_IDENT_IMD = 0x01, - /// \brief Use c-style ident structure. - OMP_IDENT_KMPC = 0x02, - /// \brief Atomic reduction option for kmpc_reduce. - OMP_ATOMIC_REDUCE = 0x10, - /// \brief Explicit 'barrier' directive. - OMP_IDENT_BARRIER_EXPL = 0x20, - /// \brief Implicit barrier in code. - OMP_IDENT_BARRIER_IMPL = 0x40, - /// \brief Implicit barrier in 'for' directive. - OMP_IDENT_BARRIER_IMPL_FOR = 0x40, - /// \brief Implicit barrier in 'sections' directive. - OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, - /// \brief Implicit barrier in 'single' directive. - OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 - }; +public: + template <typename Callable> + RegionCodeGenTy( + Callable &&CodeGen, + typename std::enable_if< + !std::is_same<typename std::remove_reference<Callable>::type, + RegionCodeGenTy>::value>::type * = nullptr) + : CodeGen(reinterpret_cast<intptr_t>(&CodeGen)), + Callback(CallbackFn<typename std::remove_reference<Callable>::type>), + PrePostAction(nullptr) {} + void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; } + void operator()(CodeGenFunction &CGF) const; +}; + +struct OMPTaskDataTy final { + SmallVector<const Expr *, 4> PrivateVars; + SmallVector<const Expr *, 4> PrivateCopies; + SmallVector<const Expr *, 4> FirstprivateVars; + SmallVector<const Expr *, 4> FirstprivateCopies; + SmallVector<const Expr *, 4> FirstprivateInits; + SmallVector<const Expr *, 4> LastprivateVars; + SmallVector<const Expr *, 4> LastprivateCopies; + SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; + llvm::PointerIntPair<llvm::Value *, 1, bool> Final; + llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule; + llvm::PointerIntPair<llvm::Value *, 1, bool> Priority; + unsigned NumberOfParts = 0; + bool Tied = true; + bool Nogroup = false; +}; + +class CGOpenMPRuntime { +protected: CodeGenModule &CGM; + + /// \brief Creates offloading entry for the provided entry ID \a ID, + /// address \a Addr and size \a Size. + virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, + uint64_t Size); + + /// \brief Helper to emit outlined function for 'target' directive. + /// \param D Directive to emit. + /// \param ParentName Name of the function that encloses the target region. + /// \param OutlinedFn Outlined function value to be defined by this call. + /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// \param CodeGen Lambda codegen specific to an accelerator device. + /// An oulined function may not be an entry if, e.g. the if clause always + /// evaluates to false. + virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, + StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen); + +private: /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. - llvm::Constant *DefaultOpenMPPSource; + llvm::Constant *DefaultOpenMPPSource = nullptr; /// \brief Map of flags and corresponding default locations. typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy; OpenMPDefaultLocMapTy OpenMPDefaultLocMap; - Address getOrCreateDefaultLocation(OpenMPLocationFlags Flags); + Address getOrCreateDefaultLocation(unsigned Flags); -public: - /// \brief Describes ident structure that describes a source location. - /// All descriptions are taken from - /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h - /// Original structure: - /// typedef struct ident { - /// kmp_int32 reserved_1; /**< might be used in Fortran; - /// see above */ - /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; - /// KMP_IDENT_KMPC identifies this union - /// member */ - /// kmp_int32 reserved_2; /**< not really used in Fortran any more; - /// see above */ - ///#if USE_ITT_BUILD - /// /* but currently used for storing - /// region-specific ITT */ - /// /* contextual information. */ - ///#endif /* USE_ITT_BUILD */ - /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for - /// C++ */ - /// char const *psource; /**< String describing the source location. - /// The string is composed of semi-colon separated - // fields which describe the source file, - /// the function and a pair of line numbers that - /// delimit the construct. - /// */ - /// } ident_t; - enum IdentFieldIndex { - /// \brief might be used in Fortran - IdentField_Reserved_1, - /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. - IdentField_Flags, - /// \brief Not really used in Fortran any more - IdentField_Reserved_2, - /// \brief Source[4] in Fortran, do not use for C++ - IdentField_Reserved_3, - /// \brief String describing the source location. The string is composed of - /// semi-colon separated fields which describe the source file, the function - /// and a pair of line numbers that delimit the construct. - IdentField_PSource - }; -private: - llvm::StructType *IdentTy; + llvm::StructType *IdentTy = nullptr; /// \brief Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy; OpenMPDebugLocMapTy OpenMPDebugLocMap; /// \brief The type for a microtask which gets passed to __kmpc_fork_call(). /// Original representation is: /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...); - llvm::FunctionType *Kmpc_MicroTy; + llvm::FunctionType *Kmpc_MicroTy = nullptr; /// \brief Stores debug location and ThreadID for the function. struct DebugLocThreadIdTy { llvm::Value *DebugLoc; @@ -261,6 +156,20 @@ private: typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy> OpenMPLocThreadIDMapTy; OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap; + /// Map of UDRs and corresponding combiner/initializer. + typedef llvm::DenseMap<const OMPDeclareReductionDecl *, + std::pair<llvm::Function *, llvm::Function *>> + UDRMapTy; + UDRMapTy UDRMap; + /// Map of functions and locally defined UDRs. + typedef llvm::DenseMap<llvm::Function *, + SmallVector<const OMPDeclareReductionDecl *, 4>> + FunctionUDRMapTy; + FunctionUDRMapTy FunctionUDRMap; + IdentifierInfo *In = nullptr; + IdentifierInfo *Out = nullptr; + IdentifierInfo *Priv = nullptr; + IdentifierInfo *Orig = nullptr; /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; @@ -272,7 +181,7 @@ private: llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator> InternalVars; /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); - llvm::Type *KmpRoutineEntryPtrTy; + llvm::Type *KmpRoutineEntryPtrTy = nullptr; QualType KmpRoutineEntryPtrQTy; /// \brief Type typedef struct kmp_task { /// void * shareds; /**< pointer to block of pointers to @@ -293,6 +202,12 @@ private: /// } flags; /// } kmp_depend_info_t; QualType KmpDependInfoTy; + /// struct kmp_dim { // loop bounds info casted to kmp_int64 + /// kmp_int64 lo; // lower + /// kmp_int64 up; // upper + /// kmp_int64 st; // stride + /// }; + QualType KmpDimTy; /// \brief Type struct __tgt_offload_entry{ /// void *addr; // Pointer to the offload entry info. /// // (function or global) @@ -402,30 +317,27 @@ private: /// \brief Initialize target region entry. void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum, unsigned Order); + unsigned Order); /// \brief Register target region entry. void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum, llvm::Constant *Addr, + llvm::Constant *Addr, llvm::Constant *ID); /// \brief Return true if a target region entry with the provided /// information exists. bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - unsigned ColNum) const; + StringRef ParentName, unsigned LineNum) const; /// brief Applies action \a Action on all registered entries. typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned, - unsigned, OffloadEntryInfoTargetRegion &)> + OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy; void actOnTargetRegionEntriesInfo( const OffloadTargetRegionEntryInfoActTy &Action); private: // Storage for target region entries kind. The storage is to be indexed by - // file ID, device ID, parent function name, lane number, and column number. + // file ID, device ID, parent function name and line number. typedef llvm::DenseMap<unsigned, OffloadEntryInfoTargetRegion> - OffloadEntriesTargetRegionPerColumn; - typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerColumn> OffloadEntriesTargetRegionPerLine; typedef llvm::StringMap<OffloadEntriesTargetRegionPerLine> OffloadEntriesTargetRegionPerParentName; @@ -442,10 +354,6 @@ private: /// compilation unit. The function that does the registration is returned. llvm::Function *createOffloadingBinaryDescriptorRegistration(); - /// \brief Creates offloading entry for the provided address \a Addr, - /// name \a Name and size \a Size. - void createOffloadEntry(llvm::Constant *Addr, StringRef Name, uint64_t Size); - /// \brief Creates all the offload entries in the current compilation unit /// along with the associated metadata. void createOffloadEntriesAndInfoMetadata(); @@ -476,7 +384,7 @@ private: /// \param Flags Flags for OpenMP location. /// llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPLocationFlags Flags = OMP_IDENT_KMPC); + unsigned Flags = 0); /// \brief Returns pointer to ident_t type. llvm::Type *getIdentTyPointerTy(); @@ -487,7 +395,7 @@ private: /// \brief Returns specified OpenMP runtime function. /// \param Function OpenMP runtime function. /// \return Specified function. - llvm::Constant *createRuntimeFunction(OpenMPRTLFunction Function); + llvm::Constant *createRuntimeFunction(unsigned Function); /// \brief Returns __kmpc_for_static_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. @@ -530,7 +438,7 @@ private: const llvm::Twine &Name); /// \brief Set of threadprivate variables with the generated initializer. - llvm::DenseSet<const VarDecl *> ThreadPrivateWithDefinition; + llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition; /// \brief Emits initialization code for the threadprivate variables. /// \param VDAddr Address of the global variable \a VD. @@ -549,11 +457,52 @@ private: /// llvm::Value *getCriticalRegionLock(StringRef CriticalName); + struct TaskResultTy { + llvm::Value *NewTask = nullptr; + llvm::Value *TaskEntry = nullptr; + llvm::Value *NewTaskNewTaskTTy = nullptr; + LValue TDBase; + RecordDecl *KmpTaskTQTyRD = nullptr; + llvm::Value *TaskDupFn = nullptr; + }; + /// Emit task region for the task directive. The task region is emitted in + /// several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// \param D Current task directive. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param Data Additional data for task generation like tiednsee, final + /// state, list of privates etc. + TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, + llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const OMPTaskDataTy &Data); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Emit code for the specified user defined reduction construct. + virtual void emitUserDefinedReduction(CodeGenFunction *CGF, + const OMPDeclareReductionDecl *D); + /// Get combiner/initializer for the specified user-defined reduction, if any. + virtual std::pair<llvm::Function *, llvm::Function *> + getUserDefinedReduction(const OMPDeclareReductionDecl *D); /// \brief Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). @@ -562,22 +511,30 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value *emitParallelOutlinedFunction( + virtual llvm::Value *emitParallelOrTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); /// \brief Emits outlined function for the OpenMP task directive \a D. This - /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 - /// PartID, struct context_vars*). + /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* + /// TaskT). /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param PartIDVar Variable for partition id in the current OpenMP untied + /// task region. + /// \param TaskTVar Variable for task_t argument. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. + /// \param Tied true if task is generated for tied task, false otherwise. + /// \param NumberOfParts Number of parts in untied task. Ignored for tied + /// tasks. /// virtual llvm::Value *emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts); /// \brief Cleans up references to the objects in finished function. /// @@ -664,6 +621,14 @@ public: virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const; + /// \brief Check if the specified \a ScheduleKind is static non-chunked. + /// This kind of distribute directive is emitted without outer loop. + /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind, + bool Chunked) const; + /// \brief Check if the specified \a ScheduleKind is dynamic. /// This kind of worksharing directive is emitted without outer loop. /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause. @@ -671,9 +636,9 @@ public: virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const; virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPScheduleClauseKind SchedKind, - unsigned IVSize, bool IVSigned, - bool Ordered, llvm::Value *UB, + const OpenMPScheduleTy &ScheduleKind, + unsigned IVSize, bool IVSigned, bool Ordered, + llvm::Value *UB, llvm::Value *Chunk = nullptr); /// \brief Call the appropriate runtime routine to initialize it before start @@ -685,7 +650,7 @@ public: /// /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. - /// \param SchedKind Schedule kind, specified by the 'schedule' clause. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. /// \param IVSigned Sign of the interation variable. /// \param Ordered true if loop is ordered, false otherwise. @@ -701,12 +666,36 @@ public: /// For the default (nullptr) value, the chunk 1 will be used. /// virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPScheduleClauseKind SchedKind, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, - Address IL, Address LB, - Address UB, Address ST, + Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk = nullptr); + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause. + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the interation variable. + /// \param Ordered true if loop is ordered, false otherwise. + /// \param IL Address of the output variable in which the flag of the + /// last iteration is returned. + /// \param LB Address of the output variable in which the lower iteration + /// number is returned. + /// \param UB Address of the output variable in which the upper iteration + /// number is returned. + /// \param ST Address of the output variable in which the stride value is + /// returned nesessary to generated the static_chunked scheduled loop. + /// \param Chunk Value of the chunk for the static_chunked scheduled loop. + /// For the default (nullptr) value, the chunk 1 will be used. + /// + virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDistScheduleClauseKind SchedKind, + unsigned IVSize, bool IVSigned, + bool Ordered, Address IL, Address LB, + Address UB, Address ST, + llvm::Value *Chunk = nullptr); + /// \brief Call the appropriate runtime routine to notify that we finished /// iteration of the ordered loop with the dynamic scheduling. /// @@ -807,12 +796,6 @@ public: /// kmp_task_t *new_task), where new_task is a resulting structure from /// previous items. /// \param D Current task directive. - /// \param Tied true if the task is tied (the task is tied to the thread that - /// can suspend its task region), false - untied (the task is not tied to any - /// thread). - /// \param Final Contains either constant bool value, or llvm::Value * of i1 - /// type for final clause. If the value is true, the task forces all of its - /// child tasks to become final and included tasks. /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 /// /*part_id*/, captured_struct */*__context*/); /// \param SharedsTy A type which contains references the shared variables. @@ -820,29 +803,47 @@ public: /// TaskFunction. /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr /// otherwise. - /// \param PrivateVars List of references to private variables for the task - /// directive. - /// \param PrivateCopies List of private copies for each private variable in - /// \p PrivateVars. - /// \param FirstprivateVars List of references to private variables for the - /// task directive. - /// \param FirstprivateCopies List of private copies for each private variable - /// in \p FirstprivateVars. - /// \param FirstprivateInits List of references to auto generated variables - /// used for initialization of a single array element. Used if firstprivate - /// variable is of array type. - /// \param Dependences List of dependences for the 'task' construct, including - /// original expression and dependency type. - virtual void emitTaskCall( - CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, + /// \param Data Additional data for task generation like tiednsee, final + /// state, list of privates etc. + virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, + llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, + const OMPTaskDataTy &Data); + + /// Emit task region for the taskloop directive. The taskloop region is + /// emitted in several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t + /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int + /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task + /// is a resulting structure from + /// previous items. + /// \param D Current task directive. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param Data Additional data for task generation like tiednsee, final + /// state, list of privates etc. + virtual void emitTaskLoopCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, - ArrayRef<const Expr *> PrivateCopies, - ArrayRef<const Expr *> FirstprivateVars, - ArrayRef<const Expr *> FirstprivateCopies, - ArrayRef<const Expr *> FirstprivateInits, - ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences); + const Expr *IfCond, const OMPTaskDataTy &Data); /// \brief Emit code for the directive that does not require outlining. /// @@ -926,13 +927,15 @@ public: /// \param OutlinedFn Outlined function value to be defined by this call. /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// \param CodeGen Code generation sequence for the \a D directive. /// An oulined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry); + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen); /// \brief Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of @@ -972,6 +975,68 @@ public: /// was emitted in the current module and return the function that registers /// it. virtual llvm::Function *emitRegistrationFunction(); + + /// \brief Emits code for teams call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// \param OutlinedFn Outlined function to be run by team masters. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// + virtual void emitTeamsCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars); + + /// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code + /// for num_teams clause. + /// \param NumTeams An integer expression of teams. + /// \param ThreadLimit An integer expression of threads. + virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, + const Expr *ThreadLimit, SourceLocation Loc); + + /// \brief Emit the target data mapping code associated with \a D. + /// \param D Directive to emit. + /// \param IfCond Expression evaluated in if clause associated with the target + /// directive, or null if no if clause is used. + /// \param Device Expression evaluated in device clause associated with the + /// target directive, or null if no device clause is used. + /// \param CodeGen Function that emits the enclosed region. + virtual void emitTargetDataCalls(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const Expr *IfCond, const Expr *Device, + const RegionCodeGenTy &CodeGen); + + /// \brief Emit the data mapping/movement code associated with the directive + /// \a D that should be of the form 'target [{enter|exit} data | update]'. + /// \param D Directive to emit. + /// \param IfCond Expression evaluated in if clause associated with the target + /// directive, or null if no if clause is used. + /// \param Device Expression evaluated in device clause associated with the + /// target directive, or null if no device clause is used. + virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const Expr *IfCond, + const Expr *Device); + + /// Marks function \a Fn with properly mangled versions of vector functions. + /// \param FD Function marked as 'declare simd'. + /// \param Fn LLVM function that must be marked with 'declare simd' + /// attributes. + virtual void emitDeclareSimdFunction(const FunctionDecl *FD, + llvm::Function *Fn); + + /// Emit initialization for doacross loop nesting support. + /// \param D Loop-based construct used in doacross nesting construct. + virtual void emitDoacrossInit(CodeGenFunction &CGF, + const OMPLoopDirective &D); + + /// Emit code for doacross ordered directive with 'depend' clause. + /// \param C 'depend' clause with 'sink|source' dependency kind. + virtual void emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C); }; } // namespace CodeGen diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp new file mode 100644 index 0000000..d64f6df --- /dev/null +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -0,0 +1,396 @@ +//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This provides a class for OpenMP runtime code generation specialized to NVPTX +// targets. +// +//===----------------------------------------------------------------------===// + +#include "CGOpenMPRuntimeNVPTX.h" +#include "clang/AST/DeclOpenMP.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtOpenMP.h" + +using namespace clang; +using namespace CodeGen; + +/// \brief Get the GPU warp size. +llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateCall( + llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), + llvm::None, "nvptx_warp_size"); +} + +/// \brief Get the id of the current thread on the GPU. +llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateCall( + llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), + llvm::None, "nvptx_tid"); +} + +// \brief Get the maximum number of threads in a block of the GPU. +llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateCall( + llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), + llvm::None, "nvptx_num_threads"); +} + +/// \brief Get barrier to synchronize all threads in a block. +void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + Bld.CreateCall(llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); +} + +// \brief Synchronize all GPU threads in a block. +void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { + getNVPTXCTABarrier(CGF); +} + +/// \brief Get the thread id of the OMP master thread. +/// The master thread id is the first thread (lane) of the last warp in the +/// GPU block. Warp size is assumed to be some power of 2. +/// Thread id is 0 indexed. +/// E.g: If NumThreads is 33, master id is 32. +/// If NumThreads is 64, master id is 32. +/// If NumThreads is 1024, master id is 992. +llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Value *NumThreads = getNVPTXNumThreads(CGF); + + // We assume that the warp size is a power of 2. + llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1)); + + return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)), + Bld.CreateNot(Mask), "master_tid"); +} + +namespace { +enum OpenMPRTLFunctionNVPTX { + /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, + /// kmp_int32 thread_limit); + OMPRTL_NVPTX__kmpc_kernel_init, +}; + +// NVPTX Address space +enum ADDRESS_SPACE { + ADDRESS_SPACE_SHARED = 3, +}; +} // namespace + +CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState( + CodeGenModule &CGM) + : WorkerFn(nullptr), CGFI(nullptr) { + createWorkerFunction(CGM); +} + +void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( + CodeGenModule &CGM) { + // Create an worker function with no arguments. + CGFI = &CGM.getTypes().arrangeNullaryFunction(); + + WorkerFn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage, + /* placeholder */ "_worker", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI); + WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage); + WorkerFn->addFnAttr(llvm::Attribute::NoInline); +} + +void CGOpenMPRuntimeNVPTX::initializeEnvironment() { + // + // Initialize master-worker control state in shared memory. + // + + auto DL = CGM.getDataLayout(); + ActiveWorkers = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false, + llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0, + llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED); + ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty)); + + WorkID = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false, + llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0, + llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED); + WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty)); +} + +void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { + auto &Ctx = CGM.getContext(); + + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); + CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {}); + emitWorkerLoop(CGF, WST); + CGF.FinishFunction(); +} + +void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, + WorkerFunctionState &WST) { + // + // The workers enter this loop and wait for parallel work from the master. + // When the master encounters a parallel region it sets up the work + variable + // arguments, and wakes up the workers. The workers first check to see if + // they are required for the parallel region, i.e., within the # of requested + // parallel threads. The activated workers load the variable arguments and + // execute the parallel work. + // + + CGBuilderTy &Bld = CGF.Builder; + + llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work"); + llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers"); + llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel"); + llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel"); + llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); + + CGF.EmitBranch(AwaitBB); + + // Workers wait for work from master. + CGF.EmitBlock(AwaitBB); + // Wait for parallel work + syncCTAThreads(CGF); + // On termination condition (workid == 0), exit loop. + llvm::Value *ShouldTerminate = Bld.CreateICmpEQ( + Bld.CreateAlignedLoad(WorkID, WorkID->getAlignment()), + llvm::Constant::getNullValue(WorkID->getType()->getElementType()), + "should_terminate"); + Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB); + + // Activate requested workers. + CGF.EmitBlock(SelectWorkersBB); + llvm::Value *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ActiveThread = Bld.CreateICmpSLT( + ThreadID, + Bld.CreateAlignedLoad(ActiveWorkers, ActiveWorkers->getAlignment()), + "active_thread"); + Bld.CreateCondBr(ActiveThread, ExecuteBB, BarrierBB); + + // Signal start of parallel region. + CGF.EmitBlock(ExecuteBB); + // TODO: Add parallel work. + + // Signal end of parallel region. + CGF.EmitBlock(TerminateBB); + CGF.EmitBranch(BarrierBB); + + // All active and inactive workers wait at a barrier after parallel region. + CGF.EmitBlock(BarrierBB); + // Barrier after parallel region. + syncCTAThreads(CGF); + CGF.EmitBranch(AwaitBB); + + // Exit target region. + CGF.EmitBlock(ExitBB); +} + +// Setup NVPTX threads for master-worker OpenMP scheme. +void CGOpenMPRuntimeNVPTX::emitEntryHeader(CodeGenFunction &CGF, + EntryFunctionState &EST, + WorkerFunctionState &WST) { + CGBuilderTy &Bld = CGF.Builder; + + // Get the master thread id. + llvm::Value *MasterID = getMasterThreadID(CGF); + // Current thread's identifier. + llvm::Value *ThreadID = getNVPTXThreadID(CGF); + + // Setup BBs in entry function. + llvm::BasicBlock *WorkerCheckBB = CGF.createBasicBlock(".check.for.worker"); + llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker"); + llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); + EST.ExitBB = CGF.createBasicBlock(".exit"); + + // The head (master thread) marches on while its body of companion threads in + // the warp go to sleep. + llvm::Value *ShouldDie = + Bld.CreateICmpUGT(ThreadID, MasterID, "excess_in_master_warp"); + Bld.CreateCondBr(ShouldDie, EST.ExitBB, WorkerCheckBB); + + // Select worker threads... + CGF.EmitBlock(WorkerCheckBB); + llvm::Value *IsWorker = Bld.CreateICmpULT(ThreadID, MasterID, "is_worker"); + Bld.CreateCondBr(IsWorker, WorkerBB, MasterBB); + + // ... and send to worker loop, awaiting parallel invocation. + CGF.EmitBlock(WorkerBB); + CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None); + CGF.EmitBranch(EST.ExitBB); + + // Only master thread executes subsequent serial code. + CGF.EmitBlock(MasterBB); + + // First action in sequential region: + // Initialize the state of the OpenMP runtime library on the GPU. + llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), getNVPTXThreadID(CGF)}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), + Args); +} + +void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF, + EntryFunctionState &EST) { + CGBuilderTy &Bld = CGF.Builder; + llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier"); + CGF.EmitBranch(TerminateBB); + + CGF.EmitBlock(TerminateBB); + // Signal termination condition. + Bld.CreateAlignedStore( + llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID, + WorkID->getAlignment()); + // Barrier to terminate worker threads. + syncCTAThreads(CGF); + // Master thread jumps to exit point. + CGF.EmitBranch(EST.ExitBB); + + CGF.EmitBlock(EST.ExitBB); +} + +/// \brief Returns specified OpenMP runtime function for the current OpenMP +/// implementation. Specialized for the NVPTX device. +/// \param Function OpenMP runtime function. +/// \return Specified function. +llvm::Constant * +CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { + llvm::Constant *RTLFn = nullptr; + switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) { + case OMPRTL_NVPTX__kmpc_kernel_init: { + // Build void __kmpc_kernel_init(kmp_int32 omp_handle, + // kmp_int32 thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); + break; + } + } + return RTLFn; +} + +void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, + llvm::Constant *Addr, + uint64_t Size) { + auto *F = dyn_cast<llvm::Function>(Addr); + // TODO: Add support for global variables on the device after declare target + // support. + if (!F) + return; + llvm::Module *M = F->getParent(); + llvm::LLVMContext &Ctx = M->getContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + + llvm::Metadata *MDVals[] = { + llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"), + llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + +void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( + const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + if (!IsOffloadEntry) // Nothing to do. + return; + + assert(!ParentName.empty() && "Invalid target region parent name!"); + + EntryFunctionState EST; + WorkerFunctionState WST(CGM); + + // Emit target region as a standalone region. + class NVPTXPrePostActionTy : public PrePostActionTy { + CGOpenMPRuntimeNVPTX &RT; + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST; + + public: + NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) + : RT(RT), EST(EST), WST(WST) {} + void Enter(CodeGenFunction &CGF) override { + RT.emitEntryHeader(CGF, EST, WST); + } + void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); } + } Action(*this, EST, WST); + CodeGen.setAction(Action); + emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, + IsOffloadEntry, CodeGen); + + // Create the worker function + emitWorkerFunction(WST); + + // Now change the name of the worker function to correspond to this target + // region's entry function. + WST.WorkerFn->setName(OutlinedFn->getName() + "_worker"); +} + +CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) + : CGOpenMPRuntime(CGM), ActiveWorkers(nullptr), WorkID(nullptr) { + if (!CGM.getLangOpts().OpenMPIsDevice) + llvm_unreachable("OpenMP NVPTX can only handle device code."); + + // Called once per module during initialization. + initializeEnvironment(); +} + +void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, + const Expr *NumTeams, + const Expr *ThreadLimit, + SourceLocation Loc) {} + +llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + + llvm::Function *OutlinedFun = nullptr; + if (isa<OMPTeamsDirective>(D)) { + llvm::Value *OutlinedFunVal = + CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( + D, ThreadIDVar, InnermostKind, CodeGen); + OutlinedFun = cast<llvm::Function>(OutlinedFunVal); + OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); + } else + llvm_unreachable("parallel directive is not yet supported for nvptx " + "backend."); + + return OutlinedFun; +} + +void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) { + if (!CGF.HaveInsertPoint()) + return; + + Address ZeroAddr = + CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), + /*Name*/ ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h new file mode 100644 index 0000000..a6c64b2 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -0,0 +1,179 @@ +//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This provides a class for OpenMP runtime code generation specialized to NVPTX +// targets. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H +#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H + +#include "CGOpenMPRuntime.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtOpenMP.h" +#include "llvm/IR/CallSite.h" + +namespace clang { +namespace CodeGen { + +class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { +public: + class EntryFunctionState { + public: + llvm::BasicBlock *ExitBB; + + EntryFunctionState() : ExitBB(nullptr){}; + }; + + class WorkerFunctionState { + public: + llvm::Function *WorkerFn; + const CGFunctionInfo *CGFI; + + WorkerFunctionState(CodeGenModule &CGM); + + private: + void createWorkerFunction(CodeGenModule &CGM); + }; + + /// \brief Helper for target entry function. Guide the master and worker + /// threads to their respective locations. + void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + WorkerFunctionState &WST); + + /// \brief Signal termination of OMP execution. + void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + +private: + // + // NVPTX calls. + // + + /// \brief Get the GPU warp size. + llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF); + + /// \brief Get the id of the current thread on the GPU. + llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF); + + // \brief Get the maximum number of threads in a block of the GPU. + llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF); + + /// \brief Get barrier to synchronize all threads in a block. + void getNVPTXCTABarrier(CodeGenFunction &CGF); + + // \brief Synchronize all GPU threads in a block. + void syncCTAThreads(CodeGenFunction &CGF); + + // + // OMP calls. + // + + /// \brief Get the thread id of the OMP master thread. + /// The master thread id is the first thread (lane) of the last warp in the + /// GPU block. Warp size is assumed to be some power of 2. + /// Thread id is 0 indexed. + /// E.g: If NumThreads is 33, master id is 32. + /// If NumThreads is 64, master id is 32. + /// If NumThreads is 1024, master id is 992. + llvm::Value *getMasterThreadID(CodeGenFunction &CGF); + + // + // Private state and methods. + // + + // Master-worker control state. + // Number of requested OMP threads in parallel region. + llvm::GlobalVariable *ActiveWorkers; + // Outlined function for the workers to execute. + llvm::GlobalVariable *WorkID; + + /// \brief Initialize master-worker control state. + void initializeEnvironment(); + + /// \brief Emit the worker function for the current target region. + void emitWorkerFunction(WorkerFunctionState &WST); + + /// \brief Helper for worker function. Emit body of worker loop. + void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); + + /// \brief Returns specified OpenMP runtime function for the current OpenMP + /// implementation. Specialized for the NVPTX device. + /// \param Function OpenMP runtime function. + /// \return Specified function. + llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + + // + // Base class overrides. + // + + /// \brief Creates offloading entry for the provided entry ID \a ID, + /// address \a Addr and size \a Size. + void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, + uint64_t Size) override; + + /// \brief Emit outlined function for 'target' directive on the NVPTX + /// device. + /// \param D Directive to emit. + /// \param ParentName Name of the function that encloses the target region. + /// \param OutlinedFn Outlined function value to be defined by this call. + /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// An outlined function may not be an entry if, e.g. the if clause always + /// evaluates to false. + void emitTargetOutlinedFunction(const OMPExecutableDirective &D, + StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen) override; + +public: + explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + + /// \brief This function ought to emit, in the general case, a call to + // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed + // as these numbers are obtained through the PTX grid and block configuration. + /// \param NumTeams An integer expression of teams. + /// \param ThreadLimit An integer expression of threads. + void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, + const Expr *ThreadLimit, SourceLocation Loc) override; + + /// \brief Emits inlined function for the specified OpenMP parallel + // directive but an inlined function for teams. + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + llvm::Value * + emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; + + /// \brief Emits code for teams call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// \param OutlinedFn Outlined function to be run by team masters. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// + void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) override; +}; + +} // CodeGen namespace. +} // clang namespace. + +#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h index d4ad33e..7b9c27d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h @@ -11,7 +11,7 @@ #define LLVM_CLANG_LIB_CODEGEN_CGRECORDLAYOUT_H #include "clang/AST/CharUnits.h" -#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/DerivedTypes.h" diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 375b59c..7d530a2 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -121,7 +121,7 @@ struct CGRecordLowering { /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments. llvm::Type *getIntNType(uint64_t NumBits) { return llvm::Type::getIntNTy(Types.getLLVMContext(), - (unsigned)llvm::RoundUpToAlignment(NumBits, 8)); + (unsigned)llvm::alignTo(NumBits, 8)); } /// \brief Gets an llvm type of size NumBytes and alignment 1. llvm::Type *getByteArrayType(CharUnits NumBytes) { @@ -555,7 +555,7 @@ void CGRecordLowering::clipTailPadding() { if (Member->Offset < Tail) { assert(Prior->Kind == MemberInfo::Field && !Prior->FD && "Only storage fields have tail padding!"); - Prior->Data = getByteArrayType(bitsToCharUnits(llvm::RoundUpToAlignment( + Prior->Data = getByteArrayType(bitsToCharUnits(llvm::alignTo( cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8))); } if (Member->Data) @@ -609,8 +609,8 @@ void CGRecordLowering::insertPadding() { CharUnits Offset = Member->Offset; assert(Offset >= Size); // Insert padding if we need to. - if (Offset != Size.RoundUpToAlignment(Packed ? CharUnits::One() : - getAlignment(Member->Data))) + if (Offset != + Size.alignTo(Packed ? CharUnits::One() : getAlignment(Member->Data))) Padding.push_back(std::make_pair(Size, Offset - Size)); Size = Offset + getSize(Member->Data); } @@ -842,7 +842,7 @@ void CGRecordLayout::print(raw_ostream &OS) const { OS << "]>\n"; } -void CGRecordLayout::dump() const { +LLVM_DUMP_METHOD void CGRecordLayout::dump() const { print(llvm::errs()); } @@ -855,6 +855,6 @@ void CGBitFieldInfo::print(raw_ostream &OS) const { << " StorageOffset:" << StorageOffset.getQuantity() << ">"; } -void CGBitFieldInfo::dump() const { +LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const { print(llvm::errs()); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp index cc4fa2e..d815863 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp @@ -256,15 +256,45 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { case Stmt::OMPTargetDataDirectiveClass: EmitOMPTargetDataDirective(cast<OMPTargetDataDirective>(*S)); break; + case Stmt::OMPTargetEnterDataDirectiveClass: + EmitOMPTargetEnterDataDirective(cast<OMPTargetEnterDataDirective>(*S)); + break; + case Stmt::OMPTargetExitDataDirectiveClass: + EmitOMPTargetExitDataDirective(cast<OMPTargetExitDataDirective>(*S)); + break; + case Stmt::OMPTargetParallelDirectiveClass: + EmitOMPTargetParallelDirective(cast<OMPTargetParallelDirective>(*S)); + break; + case Stmt::OMPTargetParallelForDirectiveClass: + EmitOMPTargetParallelForDirective(cast<OMPTargetParallelForDirective>(*S)); + break; case Stmt::OMPTaskLoopDirectiveClass: EmitOMPTaskLoopDirective(cast<OMPTaskLoopDirective>(*S)); break; case Stmt::OMPTaskLoopSimdDirectiveClass: EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S)); break; -case Stmt::OMPDistributeDirectiveClass: + case Stmt::OMPDistributeDirectiveClass: EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S)); - break; + break; + case Stmt::OMPTargetUpdateDirectiveClass: + EmitOMPTargetUpdateDirective(cast<OMPTargetUpdateDirective>(*S)); + break; + case Stmt::OMPDistributeParallelForDirectiveClass: + EmitOMPDistributeParallelForDirective( + cast<OMPDistributeParallelForDirective>(*S)); + break; + case Stmt::OMPDistributeParallelForSimdDirectiveClass: + EmitOMPDistributeParallelForSimdDirective( + cast<OMPDistributeParallelForSimdDirective>(*S)); + break; + case Stmt::OMPDistributeSimdDirectiveClass: + EmitOMPDistributeSimdDirective(cast<OMPDistributeSimdDirective>(*S)); + break; + case Stmt::OMPTargetParallelForSimdDirectiveClass: + EmitOMPTargetParallelForSimdDirective( + cast<OMPTargetParallelForSimdDirective>(*S)); + break; } } @@ -542,13 +572,17 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // unequal to 0. The condition must be a scalar type. LexicalScope ConditionScope(*this, S.getCond()->getSourceRange()); + if (S.getInit()) + EmitStmt(S.getInit()); + if (S.getConditionVariable()) EmitAutoVarDecl(*S.getConditionVariable()); // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. bool CondConstant; - if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant)) { + if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant, + S.isConstexpr())) { // Figure out which block (then or else) is executed. const Stmt *Executed = S.getThen(); const Stmt *Skipped = S.getElse(); @@ -557,7 +591,7 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // If the skipped block has no labels in it, just emit the executed block. // This avoids emitting dead code and simplifies the CFG substantially. - if (!ContainsLabel(Skipped)) { + if (S.isConstexpr() || !ContainsLabel(Skipped)) { if (CondConstant) incrementProfileCounter(&S); if (Executed) { @@ -617,7 +651,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond"); EmitBlock(LoopHeader.getBlock()); - LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs); + LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs, + Builder.getCurrentDebugLocation()); // Create an exit block for when the condition fails, which will // also become the break target. @@ -708,7 +743,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, // Emit the body of the loop. llvm::BasicBlock *LoopBody = createBasicBlock("do.body"); - LoopStack.push(LoopBody, CGM.getContext(), DoAttrs); + LoopStack.push(LoopBody, CGM.getContext(), DoAttrs, + Builder.getCurrentDebugLocation()); EmitBlockWithFallThrough(LoopBody, &S); { @@ -760,6 +796,8 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, LexicalScope ForScope(*this, S.getSourceRange()); + llvm::DebugLoc DL = Builder.getCurrentDebugLocation(); + // Evaluate the first part before the loop. if (S.getInit()) EmitStmt(S.getInit()); @@ -771,7 +809,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, llvm::BasicBlock *CondBlock = Continue.getBlock(); EmitBlock(CondBlock); - LoopStack.push(CondBlock, CGM.getContext(), ForAttrs); + LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, DL); // If the for loop doesn't have an increment we can just use the // condition as the continue block. Otherwise we'll need to create @@ -856,9 +894,12 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, LexicalScope ForScope(*this, S.getSourceRange()); + llvm::DebugLoc DL = Builder.getCurrentDebugLocation(); + // Evaluate the first pieces before the loop. EmitStmt(S.getRangeStmt()); - EmitStmt(S.getBeginEndStmt()); + EmitStmt(S.getBeginStmt()); + EmitStmt(S.getEndStmt()); // Start the loop with a block that tests the condition. // If there's an increment, the continue scope will be overwritten @@ -866,7 +907,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, llvm::BasicBlock *CondBlock = createBasicBlock("for.cond"); EmitBlock(CondBlock); - LoopStack.push(CondBlock, CGM.getContext(), ForAttrs); + LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, DL); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -1147,7 +1188,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { // If the body of the case is just a 'break', try to not emit an empty block. // If we're profiling or we're not optimizing, leave the block in for better // debug and coverage analysis. - if (!CGM.getCodeGenOpts().ProfileInstrGenerate && + if (!CGM.getCodeGenOpts().hasProfileClangInstr() && CGM.getCodeGenOpts().OptimizationLevel > 0 && isa<BreakStmt>(S.getSubStmt())) { JumpDest Block = BreakContinueStack.back().BreakBlock; @@ -1194,7 +1235,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { if (SwitchWeights) SwitchWeights->push_back(getProfileCount(NextCase)); - if (CGM.getCodeGenOpts().ProfileInstrGenerate) { + if (CGM.getCodeGenOpts().hasProfileClangInstr()) { CaseDest = createBasicBlock("sw.bb"); EmitBlockWithFallThrough(CaseDest, &S); } @@ -1208,6 +1249,14 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { } void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) { + // If there is no enclosing switch instance that we're aware of, then this + // default statement can be elided. This situation only happens when we've + // constant-folded the switch. + if (!SwitchInsn) { + EmitStmt(S.getSubStmt()); + return; + } + llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest(); assert(DefaultBlock->empty() && "EmitDefaultStmt: Default block already defined?"); @@ -1274,6 +1323,10 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, // Handle this as two cases: we might be looking for the SwitchCase (if so // the skipped statements must be skippable) or we might already have it. CompoundStmt::const_body_iterator I = CS->body_begin(), E = CS->body_end(); + bool StartedInLiveCode = FoundCase; + unsigned StartSize = ResultStmts.size(); + + // If we've not found the case yet, scan through looking for it. if (Case) { // Keep track of whether we see a skipped declaration. The code could be // using the declaration even if it is skipped, so we can't optimize out @@ -1283,7 +1336,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, // If we're looking for the case, just see if we can skip each of the // substatements. for (; Case && I != E; ++I) { - HadSkippedDecl |= isa<DeclStmt>(*I); + HadSkippedDecl |= CodeGenFunction::mightAddDeclToScope(*I); switch (CollectStatementsForCase(*I, Case, FoundCase, ResultStmts)) { case CSFC_Failure: return CSFC_Failure; @@ -1319,11 +1372,19 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, break; } } + + if (!FoundCase) + return CSFC_Success; + + assert(!HadSkippedDecl && "fallthrough after skipping decl"); } // If we have statements in our range, then we know that the statements are // live and need to be added to the set of statements we're tracking. + bool AnyDecls = false; for (; I != E; ++I) { + AnyDecls |= CodeGenFunction::mightAddDeclToScope(*I); + switch (CollectStatementsForCase(*I, nullptr, FoundCase, ResultStmts)) { case CSFC_Failure: return CSFC_Failure; case CSFC_FallThrough: @@ -1341,7 +1402,24 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, } } - return Case ? CSFC_Success : CSFC_FallThrough; + // If we're about to fall out of a scope without hitting a 'break;', we + // can't perform the optimization if there were any decls in that scope + // (we'd lose their end-of-lifetime). + if (AnyDecls) { + // If the entire compound statement was live, there's one more thing we + // can try before giving up: emit the whole thing as a single statement. + // We can do that unless the statement contains a 'break;'. + // FIXME: Such a break must be at the end of a construct within this one. + // We could emit this by just ignoring the BreakStmts entirely. + if (StartedInLiveCode && !CodeGenFunction::containsBreak(S)) { + ResultStmts.resize(StartSize); + ResultStmts.push_back(S); + } else { + return CSFC_Failure; + } + } + + return CSFC_FallThrough; } // Okay, this is some other statement that we don't handle explicitly, like a @@ -1438,6 +1516,9 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { incrementProfileCounter(Case); RunCleanupsScope ExecutedScope(*this); + if (S.getInit()) + EmitStmt(S.getInit()); + // Emit the condition variable if needed inside the entire cleanup scope // used by this special case for constant folded switches. if (S.getConditionVariable()) @@ -1465,6 +1546,10 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog"); RunCleanupsScope ConditionScope(*this); + + if (S.getInit()) + EmitStmt(S.getInit()); + if (S.getConditionVariable()) EmitAutoVarDecl(*S.getConditionVariable()); llvm::Value *CondV = EmitScalarExpr(S.getCond()); @@ -1537,16 +1622,13 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // If the switch has a condition wrapped by __builtin_unpredictable, // create metadata that specifies that the switch is unpredictable. // Don't bother if not optimizing because that metadata would not be used. - if (CGM.getCodeGenOpts().OptimizationLevel != 0) { - if (const CallExpr *Call = dyn_cast<CallExpr>(S.getCond())) { - const Decl *TargetDecl = Call->getCalleeDecl(); - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) { - if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { - llvm::MDBuilder MDHelper(getLLVMContext()); - SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, - MDHelper.createUnpredictable()); - } - } + auto *Call = dyn_cast<CallExpr>(S.getCond()); + if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { + auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl()); + if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + llvm::MDBuilder MDHelper(getLLVMContext()); + SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, + MDHelper.createUnpredictable()); } } @@ -2035,6 +2117,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::ConstantAsMetadata::get(Loc))); } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all inline asm blocks in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as bar.sync, + // and so can't have certain optimizations applied around them). + Result->addAttribute(llvm::AttributeSet::FunctionIndex, + llvm::Attribute::Convergent); + } + // Extract all of the register value results from the asm. std::vector<llvm::Value*> RegResults; if (ResultRegTypes.size() == 1) { @@ -2147,8 +2237,7 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, - /*IsVariadic=*/false); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Function *F = diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index d0ee891..d214340 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -11,15 +11,117 @@ // //===----------------------------------------------------------------------===// +#include "CGCleanup.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/DeclOpenMP.h" +#include "llvm/IR/CallSite.h" using namespace clang; using namespace CodeGen; +namespace { +/// Lexical scope for OpenMP executable constructs, that handles correct codegen +/// for captured expressions. +class OMPLexicalScope final : public CodeGenFunction::LexicalScope { + void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + for (const auto *C : S.clauses()) { + if (auto *CPI = OMPClauseWithPreInit::get(C)) { + if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) + CGF.EmitVarDecl(cast<VarDecl>(*I)); + else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + } + } + } + CodeGenFunction::OMPPrivateScope InlinedShareds; + + static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { + return CGF.LambdaCaptureFields.lookup(VD) || + (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || + (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); + } + +public: + OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, + bool AsInlined = false) + : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), + InlinedShareds(CGF) { + emitPreInitStmt(CGF, S); + if (AsInlined) { + if (S.hasAssociatedStmt()) { + auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); + for (auto &C : CS->captures()) { + if (C.capturesVariable() || C.capturesVariableByCopy()) { + auto *VD = C.getCapturedVar(); + DeclRefExpr DRE(const_cast<VarDecl *>(VD), + isCapturedVar(CGF, VD) || + (CGF.CapturedStmtInfo && + InlinedShareds.isGlobalVarCaptured(VD)), + VD->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); + InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } + } + (void)InlinedShareds.Privatize(); + } + } + } +}; + +/// Private scope for OpenMP loop-based directives, that supports capturing +/// of used expression from loop statement. +class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { + void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { + if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { + if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { + for (const auto *I : PreInits->decls()) + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } + } + } + +public: + OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) + : CodeGenFunction::RunCleanupsScope(CGF) { + emitPreInitStmt(CGF, S); + } +}; + +} // namespace + +llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { + auto &C = getContext(); + llvm::Value *Size = nullptr; + auto SizeInChars = C.getTypeSizeInChars(Ty); + if (SizeInChars.isZero()) { + // getTypeSizeInChars() returns 0 for a VLA. + while (auto *VAT = C.getAsVariableArrayType(Ty)) { + llvm::Value *ArraySize; + std::tie(ArraySize, Ty) = getVLASize(VAT); + Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize; + } + SizeInChars = C.getTypeSizeInChars(Ty); + if (SizeInChars.isZero()) + return llvm::ConstantInt::get(SizeTy, /*V=*/0); + Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); + } else + Size = CGM.getSize(SizeInChars); + return Size; +} + void CodeGenFunction::GenerateOpenMPCapturedVars( const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { const RecordDecl *RD = S.getCapturedRecordDecl(); @@ -34,10 +136,33 @@ void CodeGenFunction::GenerateOpenMPCapturedVars( CapturedVars.push_back(Val); } else if (CurCap->capturesThis()) CapturedVars.push_back(CXXThisValue); - else if (CurCap->capturesVariableByCopy()) - CapturedVars.push_back( - EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); - else { + else if (CurCap->capturesVariableByCopy()) { + llvm::Value *CV = + EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal(); + + // If the field is not a pointer, we need to save the actual value + // and load it as a void pointer. + if (!CurField->getType()->isAnyPointerType()) { + auto &Ctx = getContext(); + auto DstAddr = CreateMemTemp( + Ctx.getUIntPtrType(), + Twine(CurCap->getCapturedVar()->getName()) + ".casted"); + LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); + + auto *SrcAddrVal = EmitScalarConversion( + DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), + Ctx.getPointerType(CurField->getType()), SourceLocation()); + LValue SrcLV = + MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); + + // Store the value using the source type pointer. + EmitStoreThroughLValue(RValue::get(CV), SrcLV); + + // Load the value using the destination type pointer. + CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); + } + CapturedVars.push_back(CV); + } else { assert(CurCap->capturesVariable() && "Expected capture by reference."); CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); } @@ -107,8 +232,15 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { assert(I->capturesVariableArrayType()); II = &getContext().Idents.get("vla"); } - if (ArgType->isVariablyModifiedType()) - ArgType = getContext().getVariableArrayDecayedType(ArgType); + if (ArgType->isVariablyModifiedType()) { + bool IsReference = ArgType->isLValueReferenceType(); + ArgType = + getContext().getCanonicalParamType(ArgType.getNonReferenceType()); + if (IsReference && !ArgType->isPointerType()) { + ArgType = getContext().getLValueReferenceType( + ArgType, /*SpelledAsLValue=*/false); + } + } Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, FD->getLocation(), II, ArgType)); ++I; @@ -120,8 +252,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, - /*IsVariadic=*/false); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Function *F = llvm::Function::Create( @@ -141,7 +272,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); - ++Cnt, ++I; + ++Cnt; + ++I; continue; } @@ -162,8 +294,14 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { QualType VarTy = Var->getType(); Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { - ArgAddr = EmitLoadOfReference( - ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + if (ArgLVal.getType()->isLValueReferenceType()) { + ArgAddr = EmitLoadOfReference( + ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + assert(ArgLVal.getType()->isPointerType()); + ArgAddr = EmitLoadOfPointer( + ArgAddr, ArgLVal.getType()->castAs<PointerType>()); + } } setAddrOfLocalVar( Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); @@ -172,17 +310,17 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { "Not expecting a captured pointer."); auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); - setAddrOfLocalVar(I->getCapturedVar(), - castValueFromUintptr(*this, FD->getType(), - Args[Cnt]->getName(), ArgLVal, - VarTy->isReferenceType())); + setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), + Args[Cnt]->getName(), ArgLVal, + VarTy->isReferenceType())); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); CXXThisValue = EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); } - ++Cnt, ++I; + ++Cnt; + ++I; } PGO.assignRegionCounters(GlobalDecl(CD), F); @@ -256,12 +394,77 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } +/// Check if the combiner is a call to UDR combiner and if it is so return the +/// UDR decl used for reduction. +static const OMPDeclareReductionDecl * +getReductionInit(const Expr *ReductionOp) { + if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (auto *DRE = + dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) + if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) + return DRD; + return nullptr; +} + +static void emitInitWithReductionInitializer(CodeGenFunction &CGF, + const OMPDeclareReductionDecl *DRD, + const Expr *InitOp, + Address Private, Address Original, + QualType Ty) { + if (DRD->getInitializer()) { + std::pair<llvm::Function *, llvm::Function *> Reduction = + CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); + auto *CE = cast<CallExpr>(InitOp); + auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); + const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); + const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); + auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); + auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), + [=]() -> Address { return Private; }); + PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), + [=]() -> Address { return Original; }); + (void)PrivateScope.Privatize(); + RValue Func = RValue::get(Reduction.second); + CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); + CGF.EmitIgnoredExpr(InitOp); + } else { + llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".init"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); + RValue InitRVal; + switch (CGF.getEvaluationKind(Ty)) { + case TEK_Scalar: + InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + break; + case TEK_Complex: + InitRVal = + RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + break; + case TEK_Aggregate: + InitRVal = RValue::getAggregate(LV.getAddress()); + break; + } + OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); + CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), + /*IsInitializer=*/false); + } +} + /// \brief Emit initialization of arrays of complex types. /// \param DestAddr Address of the array. /// \param Type Type of array. /// \param Init Initial expression of array. +/// \param SrcAddr Address of the original array. static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, - QualType Type, const Expr *Init) { + QualType Type, const Expr *Init, + Address SrcAddr = Address::invalid()) { + auto *DRD = getReductionInit(Init); // Perform element-by-element initialization. QualType ElementTy; @@ -270,7 +473,13 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + if (DRD) + SrcAddr = + CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + llvm::Value *SrcBegin = nullptr; + if (DRD) + SrcBegin = SrcAddr.getPointer(); auto DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); @@ -287,6 +496,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + llvm::PHINode *SrcElementPHI = nullptr; + Address SrcElementCurrent = Address::invalid(); + if (DRD) { + SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, + "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + } llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); DestElementPHI->addIncoming(DestBegin, EntryBB); @@ -297,8 +516,19 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, // Emit copy. { CodeGenFunction::RunCleanupsScope InitScope(CGF); - CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), - /*IsInitializer=*/false); + if (DRD && (DRD->getInitializer() || !Init)) { + emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, + SrcElementCurrent, ElementTy); + } else + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + if (DRD) { + // Shift the address forward by one element. + auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); } // Shift the address forward by one element. @@ -356,24 +586,42 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return false; + bool FirstprivateIsLastprivate = false; + llvm::DenseSet<const VarDecl *> Lastprivates; + for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { + for (const auto *D : C->varlists()) + Lastprivates.insert( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + } llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; + CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt())); for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); auto InitsRef = C->inits().begin(); for (auto IInit : C->private_copies()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); - if (EmittedAsFirstprivate.count(OrigVD) == 0) { - EmittedAsFirstprivate.insert(OrigVD); + bool ThisFirstprivateIsLastprivate = + Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; + auto *CapFD = CapturesInfo.lookup(OrigVD); + auto *FD = CapturedStmtInfo->lookup(OrigVD); + if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) && + !FD->getType()->isReferenceType()) { + EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); + ++IRef; + ++InitsRef; + continue; + } + FirstprivateIsLastprivate = + FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; + if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); bool IsRegistered; - DeclRefExpr DRE( - const_cast<VarDecl *>(OrigVD), - /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( - OrigVD) != nullptr, - (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); Address OriginalAddr = EmitLValue(&DRE).getAddress(); - QualType Type = OrigVD->getType(); + QualType Type = VD->getType(); if (Type->isArrayType()) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current @@ -420,10 +668,11 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, // Silence the warning about unused variable. (void)IsRegistered; } - ++IRef, ++InitsRef; + ++IRef; + ++InitsRef; } } - return !EmittedAsFirstprivate.empty(); + return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); } void CodeGenFunction::EmitOMPPrivateClause( @@ -470,7 +719,6 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); QualType Type = VD->getType(); if (CopiedVars.insert(VD->getCanonicalDecl()).second) { - // Get the address of the master variable. If we are emitting code with // TLS support, the address is passed from the master as field in the // captured declaration. @@ -525,15 +773,27 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( if (!HaveInsertPoint()) return false; bool HasAtLeastOneLastprivate = false; + llvm::DenseSet<const VarDecl *> SIMDLCVs; + if (isOpenMPSimdDirective(D.getDirectiveKind())) { + auto *LoopDirective = cast<OMPLoopDirective>(&D); + for (auto *C : LoopDirective->counters()) { + SIMDLCVs.insert( + cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); + } + } llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { HasAtLeastOneLastprivate = true; + if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) + break; auto IRef = C->varlist_begin(); auto IDestRef = C->destination_exprs().begin(); for (auto *IInit : C->private_copies()) { // Keep the address of the original variable for future update at the end // of the loop. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + // Taskloops do not require additional initialization, it is done in + // runtime support library. if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { @@ -547,27 +807,28 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( // Check if the variable is also a firstprivate: in this case IInit is // not generated. Initialization of this variable will happen in codegen // for 'firstprivate' clause. - if (IInit) { + if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [&]() -> Address { - // Emit private VarDecl with copy init. - EmitDecl(*VD); - return GetAddrOfLocalVar(VD); - }); + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); assert(IsRegistered && "lastprivate var already registered as private"); (void)IsRegistered; } } - ++IRef, ++IDestRef; + ++IRef; + ++IDestRef; } } return HasAtLeastOneLastprivate; } void CodeGenFunction::EmitOMPLastprivateClauseFinal( - const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { + const OMPExecutableDirective &D, bool NoFinals, + llvm::Value *IsLastIterCond) { if (!HaveInsertPoint()) return; // Emit following code: @@ -584,16 +845,20 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); EmitBlock(ThenBB); } - llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates; + llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; + llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { auto IC = LoopDirective->counters().begin(); for (auto F : LoopDirective->finals()) { - auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl(); - LoopCountersAndUpdates[D] = F; + auto *D = + cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); + if (NoFinals) + AlreadyEmittedVars.insert(D); + else + LoopCountersAndUpdates[D] = F; ++IC; } } - llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { auto IRef = C->varlist_begin(); auto ISrcRef = C->source_exprs().begin(); @@ -606,8 +871,8 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( // If lastprivate variable is a loop control variable for loop-based // directive, update its value before copyin back to original // variable. - if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) - EmitIgnoredExpr(UpExpr); + if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) + EmitIgnoredExpr(FinalExpr); auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); // Get the address of the original variable. @@ -624,11 +889,61 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( ++ISrcRef; ++IDestRef; } + if (auto *PostUpdate = C->getPostUpdateExpr()) + EmitIgnoredExpr(PostUpdate); } if (IsLastIterCond) EmitBlock(DoneBB, /*IsFinished=*/true); } +static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + LValue BaseLV, llvm::Value *Addr) { + Address Tmp = Address::invalid(); + Address TopTmp = Address::invalid(); + Address MostTopTmp = Address::invalid(); + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + Tmp = CGF.CreateMemTemp(BaseTy); + if (TopTmp.isValid()) + CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); + else + MostTopTmp = Tmp; + TopTmp = Tmp; + BaseTy = BaseTy->getPointeeType(); + } + llvm::Type *Ty = BaseLV.getPointer()->getType(); + if (Tmp.isValid()) + Ty = Tmp.getElementType(); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); + if (Tmp.isValid()) { + CGF.Builder.CreateStore(Addr, Tmp); + return MostTopTmp; + } + return Address(Addr, BaseLV.getAlignment()); +} + +static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + LValue BaseLV) { + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + if (auto *PtrTy = BaseTy->getAs<PointerType>()) + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + else { + BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), + BaseTy->castAs<ReferenceType>()); + } + BaseTy = BaseTy->getPointeeType(); + } + return CGF.MakeAddrLValue( + Address( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), + BaseLV.getAlignment()), + BaseLV.getType(), BaseLV.getAlignmentSource()); +} + void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { @@ -638,10 +953,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto ILHS = C->lhs_exprs().begin(); auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); for (auto IRef : C->varlists()) { auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + auto *DRD = getReductionInit(*IRed); if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { auto *Base = OASE->getBase()->IgnoreParenImpCasts(); while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) @@ -654,21 +971,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto OASELValueUB = EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); auto OriginalBaseLValue = EmitLValue(DE); - auto BaseLValue = OriginalBaseLValue; - auto *Zero = Builder.getInt64(/*C=*/0); - llvm::SmallVector<llvm::Value *, 4> Indexes; - Indexes.push_back(Zero); - auto *ItemTy = - OASELValueLB.getPointer()->getType()->getPointerElementType(); - auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); - while (Ty != ItemTy) { - Indexes.push_back(Zero); - Ty = Ty->getPointerElementType(); - } - BaseLValue = MakeAddrLValue( - Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), - OASELValueLB.getAlignment()), - OASELValueLB.getType(), OASELValueLB.getAlignmentSource()); + LValue BaseLValue = + loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), + OriginalBaseLValue); // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { @@ -676,8 +981,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( }); // Emit reduction copy. bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB, - OriginalBaseLValue]() -> Address { + OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, + OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. @@ -695,15 +1000,17 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto Emission = EmitAutoVarAlloca(*PrivateVD); auto Addr = Emission.getAllocatedAddress(); auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); + EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), + DRD ? *IRed : Init, + OASELValueLB.getAddress()); EmitAutoVarCleanups(Emission); // Emit private VarDecl with reduction init. auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), OASELValueLB.getPointer()); auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( - Ptr, OriginalBaseLValue.getPointer()->getType()); - return Address(Ptr, OriginalBaseLValue.getAlignment()); + return castToBase(*this, OrigVD->getType(), + OASELValueLB.getType(), OriginalBaseLValue, + Ptr); }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. @@ -719,21 +1026,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto *OrigVD = cast<VarDecl>(DE->getDecl()); auto ASELValue = EmitLValue(ASE); auto OriginalBaseLValue = EmitLValue(DE); - auto BaseLValue = OriginalBaseLValue; - auto *Zero = Builder.getInt64(/*C=*/0); - llvm::SmallVector<llvm::Value *, 4> Indexes; - Indexes.push_back(Zero); - auto *ItemTy = - ASELValue.getPointer()->getType()->getPointerElementType(); - auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); - while (Ty != ItemTy) { - Indexes.push_back(Zero); - Ty = Ty->getPointerElementType(); - } - BaseLValue = MakeAddrLValue( - Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), - ASELValue.getAlignment()), - ASELValue.getType(), ASELValue.getAlignmentSource()); + LValue BaseLValue = loadToBegin( + *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { @@ -741,49 +1035,114 @@ void CodeGenFunction::EmitOMPReductionClauseInit( }); // Emit reduction copy. bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, BaseLValue, ASELValue, - OriginalBaseLValue]() -> Address { + OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, + OriginalBaseLValue, DRD, IRed]() -> Address { // Emit private VarDecl with reduction init. - EmitDecl(*PrivateVD); - auto Addr = GetAddrOfLocalVar(PrivateVD); + AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); + auto Addr = Emission.getAllocatedAddress(); + if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, + ASELValue.getAddress(), + ASELValue.getType()); + } else + EmitAutoVarInit(Emission); + EmitAutoVarCleanups(Emission); auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), ASELValue.getPointer()); auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( - Ptr, OriginalBaseLValue.getPointer()->getType()); - return Address(Ptr, OriginalBaseLValue.getAlignment()); + return castToBase(*this, OrigVD->getType(), ASELValue.getType(), + OriginalBaseLValue, Ptr); }); assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); }); } else { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { + QualType Type = PrivateVD->getType(); + if (getContext().getAsArrayType(Type)) { + // Store the address of the original variable associated with the LHS + // implicit variable. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, IRef->getType(), VK_LValue, IRef->getExprLoc()); - return EmitLValue(&DRE).getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { - // Emit private VarDecl with reduction init. - EmitDecl(*PrivateVD); - return GetAddrOfLocalVar(PrivateVD); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); + Address OriginalAddr = EmitLValue(&DRE).getAddress(); + PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, + LHSVD]() -> Address { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); + return OriginalAddr; + }); + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { + if (Type->isVariablyModifiedType()) { + CodeGenFunction::OpaqueValueMapping OpaqueMap( + *this, cast<OpaqueValueExpr>( + getContext() + .getAsVariableArrayType(PrivateVD->getType()) + ->getSizeExpr()), + RValue::get( + getTypeSize(OrigVD->getType().getNonReferenceType()))); + EmitVariablyModifiedType(Type); + } + auto Emission = EmitAutoVarAlloca(*PrivateVD); + auto Addr = Emission.getAllocatedAddress(); + auto *Init = PrivateVD->getInit(); + EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), + DRD ? *IRed : Init, OriginalAddr); + EmitAutoVarCleanups(Emission); + return Emission.getAllocatedAddress(); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); + }); + } else { + // Store the address of the original variable associated with the LHS + // implicit variable. + Address OriginalAddr = Address::invalid(); + PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, + &OriginalAddr]() -> Address { + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + IRef->getType(), VK_LValue, IRef->getExprLoc()); + OriginalAddr = EmitLValue(&DRE).getAddress(); + return OriginalAddr; + }); + // Emit reduction copy. + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { + // Emit private VarDecl with reduction init. + AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); + auto Addr = Emission.getAllocatedAddress(); + if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, + OriginalAddr, + PrivateVD->getType()); + } else + EmitAutoVarInit(Emission); + EmitAutoVarCleanups(Emission); + return Addr; + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } } - ++ILHS, ++IRHS, ++IPriv; + ++ILHS; + ++IRHS; + ++IPriv; + ++IRed; } } } @@ -816,15 +1175,39 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( } } +static void emitPostUpdateForReductionClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { + if (!CGF.HaveInsertPoint()) + return; + llvm::BasicBlock *DoneBB = nullptr; + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + if (auto *PostUpdate = C->getPostUpdateExpr()) { + if (!DoneBB) { + if (auto *Cond = CondGen(CGF)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); + DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); + CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); + CGF.EmitBlock(ThenBB); + } + } + CGF.EmitIgnoredExpr(PostUpdate); + } + } + if (DoneBB) + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). + emitParallelOrTeamsOutlinedFunction(S, + *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), @@ -833,7 +1216,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, CGF, NumThreads, NumThreadsClause->getLocStart()); } if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { - CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); CGF.CGM.getOpenMPRuntime().emitProcBindClause( CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); } @@ -845,22 +1228,24 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, break; } } + + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); // Emit parallel region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPPrivateScope PrivateScope(CGF); bool Copyins = CGF.EmitOMPCopyinClause(S); - bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); - if (Copyins || Firstprivates) { + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + if (Copyins) { // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables or propagation master's thread - // values of threadprivate variables to local instances of that variables - // of all other implicit threads. + // propagation master's thread values of threadprivate variables to local + // instances of that variables of all other implicit threads. CGF.CGM.getOpenMPRuntime().emitBarrierCall( CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); @@ -872,6 +1257,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPReductionClauseFinal(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); + emitPostUpdateForReductionClause( + *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, @@ -883,9 +1270,8 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, } // Update the linear variables. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { - for (auto U : C->updates()) { + for (auto *U : C->updates()) EmitIgnoredExpr(U); - } } // On a continue in the body, jump to the end. @@ -908,7 +1294,7 @@ void CodeGenFunction::EmitOMPInnerLoop( // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.inner.for.cond"); EmitBlock(CondBlock); - LoopStack.push(CondBlock); + LoopStack.push(CondBlock, Builder.getCurrentDebugLocation()); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -950,19 +1336,21 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { return; // Emit inits for the linear variables. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { - for (auto Init : C->inits()) { + for (auto *Init : C->inits()) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); - auto *OrigVD = cast<VarDecl>( - cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl()); - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - VD->getInit()->getType(), VK_LValue, - VD->getInit()->getExprLoc()); - AutoVarEmission Emission = EmitAutoVarAlloca(*VD); - EmitExprAsInit(&DRE, VD, - MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), - /*capturedByInit=*/false); - EmitAutoVarCleanups(Emission); + if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { + AutoVarEmission Emission = EmitAutoVarAlloca(*VD); + auto *OrigVD = cast<VarDecl>(Ref->getDecl()); + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + VD->getInit()->getType(), VK_LValue, + VD->getInit()->getExprLoc()); + EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), + VD->getType()), + /*capturedByInit=*/false); + EmitAutoVarCleanups(Emission); + } else + EmitVarDecl(*VD); } // Emit the linear steps for the linear clauses. // If a step is not constant, it is pre-calculated before the loop. @@ -975,27 +1363,42 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { } } -static void emitLinearClauseFinal(CodeGenFunction &CGF, - const OMPLoopDirective &D) { - if (!CGF.HaveInsertPoint()) +void CodeGenFunction::EmitOMPLinearClauseFinal( + const OMPLoopDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { + if (!HaveInsertPoint()) return; + llvm::BasicBlock *DoneBB = nullptr; // Emit the final values of the linear variables. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { auto IC = C->varlist_begin(); - for (auto F : C->finals()) { + for (auto *F : C->finals()) { + if (!DoneBB) { + if (auto *Cond = CondGen(*this)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + auto *ThenBB = createBasicBlock(".omp.linear.pu"); + DoneBB = createBasicBlock(".omp.linear.pu.done"); + Builder.CreateCondBr(Cond, ThenBB, DoneBB); + EmitBlock(ThenBB); + } + } auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); - Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); - CodeGenFunction::OMPPrivateScope VarScope(CGF); - VarScope.addPrivate(OrigVD, - [OrigAddr]() -> Address { return OrigAddr; }); + Address OrigAddr = EmitLValue(&DRE).getAddress(); + CodeGenFunction::OMPPrivateScope VarScope(*this); + VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); (void)VarScope.Privatize(); - CGF.EmitIgnoredExpr(F); + EmitIgnoredExpr(F); ++IC; } + if (auto *PostUpdate = C->getPostUpdateExpr()) + EmitIgnoredExpr(PostUpdate); } + if (DoneBB) + EmitBlock(DoneBB, /*IsFinished=*/true); } static void emitAlignedClause(CodeGenFunction &CGF, @@ -1031,25 +1434,34 @@ static void emitAlignedClause(CodeGenFunction &CGF, } } -static void emitPrivateLoopCounters(CodeGenFunction &CGF, - CodeGenFunction::OMPPrivateScope &LoopScope, - ArrayRef<Expr *> Counters, - ArrayRef<Expr *> PrivateCounters) { - if (!CGF.HaveInsertPoint()) +void CodeGenFunction::EmitOMPPrivateLoopCounters( + const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { + if (!HaveInsertPoint()) return; - auto I = PrivateCounters.begin(); - for (auto *E : Counters) { + auto I = S.private_counters().begin(); + for (auto *E : S.counters()) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); - Address Addr = Address::invalid(); - (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { + (void)LoopScope.addPrivate(VD, [&]() -> Address { // Emit var without initialization. - auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); - CGF.EmitAutoVarCleanups(VarEmission); - Addr = VarEmission.getAllocatedAddress(); - return Addr; + if (!LocalDeclMap.count(PrivateVD)) { + auto VarEmission = EmitAutoVarAlloca(*PrivateVD); + EmitAutoVarCleanups(VarEmission); + } + DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), + /*RefersToEnclosingVariableOrCapture=*/false, + (*I)->getType(), VK_LValue, (*I)->getExprLoc()); + return EmitLValue(&DRE).getAddress(); }); - (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); + if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || + VD->hasGlobalStorage()) { + (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { + DeclRefExpr DRE(const_cast<VarDecl *>(VD), + LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), + E->getType(), VK_LValue, E->getExprLoc()); + return EmitLValue(&DRE).getAddress(); + }); + } ++I; } } @@ -1061,8 +1473,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, return; { CodeGenFunction::OMPPrivateScope PreCondScope(CGF); - emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), - S.private_counters()); + CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); (void)PreCondScope.Privatize(); // Get initial values of real counters. for (auto I : S.inits()) { @@ -1073,25 +1484,35 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); } -static void -emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, - CodeGenFunction::OMPPrivateScope &PrivateScope) { - if (!CGF.HaveInsertPoint()) +void CodeGenFunction::EmitOMPLinearClause( + const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) return; + llvm::DenseSet<const VarDecl *> SIMDLCVs; + if (isOpenMPSimdDirective(D.getDirectiveKind())) { + auto *LoopDirective = cast<OMPLoopDirective>(&D); + for (auto *C : LoopDirective->counters()) { + SIMDLCVs.insert( + cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); + } + } for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { auto CurPrivate = C->privates().begin(); for (auto *E : C->varlists()) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); - bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { - // Emit private VarDecl with copy init. - CGF.EmitVarDecl(*PrivateVD); - return CGF.GetAddrOfLocalVar(PrivateVD); - }); - assert(IsRegistered && "linear var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; + if (!SIMDLCVs.count(VD->getCanonicalDecl())) { + bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { + // Emit private VarDecl with copy init. + EmitVarDecl(*PrivateVD); + return GetAddrOfLocalVar(PrivateVD); + }); + assert(IsRegistered && "linear var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } else + EmitVarDecl(*PrivateVD); ++CurPrivate; } } @@ -1132,17 +1553,39 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, emitSimdlenSafelenClause(*this, D, IsMonotonic); } -void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { +void CodeGenFunction::EmitOMPSimdFinal( + const OMPLoopDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) { if (!HaveInsertPoint()) return; + llvm::BasicBlock *DoneBB = nullptr; auto IC = D.counters().begin(); + auto IPC = D.private_counters().begin(); for (auto F : D.finals()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); - if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); - Address OrigAddr = EmitLValue(&DRE).getAddress(); + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); + auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); + if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || + OrigVD->hasGlobalStorage() || CED) { + if (!DoneBB) { + if (auto *Cond = CondGen(*this)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + auto *ThenBB = createBasicBlock(".omp.final.then"); + DoneBB = createBasicBlock(".omp.final.done"); + Builder.CreateCondBr(Cond, ThenBB, DoneBB); + EmitBlock(ThenBB); + } + } + Address OrigAddr = Address::invalid(); + if (CED) + OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); + else { + DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), + /*RefersToEnclosingVariableOrCapture=*/false, + (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); + OrigAddr = EmitLValue(&DRE).getAddress(); + } OMPPrivateScope VarScope(*this); VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; }); @@ -1150,12 +1593,15 @@ void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { EmitIgnoredExpr(F); } ++IC; + ++IPC; } - emitLinearClauseFinal(*this, D); + if (DoneBB) + EmitBlock(DoneBB, /*IsFinished=*/true); } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); // if (PreCond) { // for (IV in 0..LastIteration) BODY; // <Final counter/linear vars updates>; @@ -1198,15 +1644,14 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { emitAlignedClause(CGF, S); CGF.EmitOMPLinearClauseInit(S); - bool HasLastprivateClause; { OMPPrivateScope LoopScope(CGF); - emitPrivateLoopCounters(CGF, LoopScope, S.counters(), - S.private_counters()); - emitPrivateLinearVars(CGF, S, LoopScope); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + CGF.EmitOMPLinearClause(S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); - HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + bool HasLastprivateClause = + CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), @@ -1215,104 +1660,42 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGF.EmitStopPoint(&S); }, [](CodeGenFunction &) {}); + CGF.EmitOMPSimdFinal( + S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); // Emit final copy of the lastprivate variables at the end of loops. - if (HasLastprivateClause) { - CGF.EmitOMPLastprivateClauseFinal(S); - } + if (HasLastprivateClause) + CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); CGF.EmitOMPReductionClauseFinal(S); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } - CGF.EmitOMPSimdFinal(S); + CGF.EmitOMPLinearClauseFinal( + S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); // Emit: if (PreCond) - end. if (ContBlock) { CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); } }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPForOuterLoop( - OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, +void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); - // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). - const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); - - assert((Ordered || - !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && - "static non-chunked schedule does not need outer loop"); - - // Emit outer loop. - // - // OpenMP [2.7.1, Loop Construct, Description, table 2-1] - // When schedule(dynamic,chunk_size) is specified, the iterations are - // distributed to threads in the team in chunks as the threads request them. - // Each thread executes a chunk of iterations, then requests another chunk, - // until no chunks remain to be distributed. Each chunk contains chunk_size - // iterations, except for the last chunk to be distributed, which may have - // fewer iterations. When no chunk_size is specified, it defaults to 1. - // - // When schedule(guided,chunk_size) is specified, the iterations are assigned - // to threads in the team in chunks as the executing threads request them. - // Each thread executes a chunk of iterations, then requests another chunk, - // until no chunks remain to be assigned. For a chunk_size of 1, the size of - // each chunk is proportional to the number of unassigned iterations divided - // by the number of threads in the team, decreasing to 1. For a chunk_size - // with value k (greater than 1), the size of each chunk is determined in the - // same way, with the restriction that the chunks do not contain fewer than k - // iterations (except for the last chunk to be assigned, which may have fewer - // than k iterations). - // - // When schedule(auto) is specified, the decision regarding scheduling is - // delegated to the compiler and/or runtime system. The programmer gives the - // implementation the freedom to choose any possible mapping of iterations to - // threads in the team. - // - // When schedule(runtime) is specified, the decision regarding scheduling is - // deferred until run time, and the schedule and chunk size are taken from the - // run-sched-var ICV. If the ICV is set to auto, the schedule is - // implementation defined - // - // while(__kmpc_dispatch_next(&LB, &UB)) { - // idx = LB; - // while (idx <= UB) { BODY; ++idx; - // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. - // } // inner loop - // } - // - // OpenMP [2.7.1, Loop Construct, Description, table 2-1] - // When schedule(static, chunk_size) is specified, iterations are divided into - // chunks of size chunk_size, and the chunks are assigned to the threads in - // the team in a round-robin fashion in the order of the thread number. - // - // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { - // while (idx <= UB) { BODY; ++idx; } // inner loop - // LB = LB + ST; - // UB = UB + ST; - // } - // - const Expr *IVExpr = S.getIterationVariable(); const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - if (DynamicOrOrdered) { - llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); - RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, Ordered, UBVal, Chunk); - } else { - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); - } - auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.dispatch.cond"); EmitBlock(CondBlock); - LoopStack.push(CondBlock); + LoopStack.push(CondBlock, Builder.getCurrentDebugLocation()); llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { @@ -1323,8 +1706,8 @@ void CodeGenFunction::EmitOMPForOuterLoop( // IV < UB BoolCondVal = EvaluateExprAsBool(S.getCond()); } else { - BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, - IL, LB, UB, ST); + BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, + LB, UB, ST); } // If there are any cleanups between here and the loop-exit scope, @@ -1384,8 +1767,167 @@ void CodeGenFunction::EmitOMPForOuterLoop( EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - if (!DynamicOrOrdered) - RT.emitForStaticFinish(*this, S.getLocEnd()); + auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { + if (!DynamicOrOrdered) + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + }; + OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); +} + +void CodeGenFunction::EmitOMPForOuterLoop( + const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + auto &RT = CGM.getOpenMPRuntime(); + + // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). + const bool DynamicOrOrdered = + Ordered || RT.isDynamic(ScheduleKind.Schedule); + + assert((Ordered || + !RT.isStaticNonchunked(ScheduleKind.Schedule, + /*Chunked=*/Chunk != nullptr)) && + "static non-chunked schedule does not need outer loop"); + + // Emit outer loop. + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(dynamic,chunk_size) is specified, the iterations are + // distributed to threads in the team in chunks as the threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be distributed. Each chunk contains chunk_size + // iterations, except for the last chunk to be distributed, which may have + // fewer iterations. When no chunk_size is specified, it defaults to 1. + // + // When schedule(guided,chunk_size) is specified, the iterations are assigned + // to threads in the team in chunks as the executing threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be assigned. For a chunk_size of 1, the size of + // each chunk is proportional to the number of unassigned iterations divided + // by the number of threads in the team, decreasing to 1. For a chunk_size + // with value k (greater than 1), the size of each chunk is determined in the + // same way, with the restriction that the chunks do not contain fewer than k + // iterations (except for the last chunk to be assigned, which may have fewer + // than k iterations). + // + // When schedule(auto) is specified, the decision regarding scheduling is + // delegated to the compiler and/or runtime system. The programmer gives the + // implementation the freedom to choose any possible mapping of iterations to + // threads in the team. + // + // When schedule(runtime) is specified, the decision regarding scheduling is + // deferred until run time, and the schedule and chunk size are taken from the + // run-sched-var ICV. If the ICV is set to auto, the schedule is + // implementation defined + // + // while(__kmpc_dispatch_next(&LB, &UB)) { + // idx = LB; + // while (idx <= UB) { BODY; ++idx; + // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. + // } // inner loop + // } + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(static, chunk_size) is specified, iterations are divided into + // chunks of size chunk_size, and the chunks are assigned to the threads in + // the team in a round-robin fashion in the order of the thread number. + // + // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { + // while (idx <= UB) { BODY; ++idx; } // inner loop + // LB = LB + ST; + // UB = UB + ST; + // } + // + + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + if (DynamicOrOrdered) { + llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); + RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, + IVSigned, Ordered, UBVal, Chunk); + } else { + RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, + Ordered, IL, LB, UB, ST, Chunk); + } + + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, + ST, IL, Chunk); +} + +void CodeGenFunction::EmitOMPDistributeOuterLoop( + OpenMPDistScheduleClauseKind ScheduleKind, + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + + auto &RT = CGM.getOpenMPRuntime(); + + // Emit outer loop. + // Same behavior as a OMPForOuterLoop, except that schedule cannot be + // dynamic + // + + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, /* Ordered = */ false, + IL, LB, UB, ST, Chunk); + + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, + S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); +} + +void CodeGenFunction::EmitOMPDistributeParallelForDirective( + const OMPDistributeParallelForDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_distribute_parallel_for, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, + /*HasCancel=*/false); + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); +} + +void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( + const OMPDistributeParallelForSimdDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_distribute_parallel_for_simd, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); +} + +void CodeGenFunction::EmitOMPDistributeSimdDirective( + const OMPDistributeSimdDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_distribute_simd, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); +} + +void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( + const OMPTargetParallelForSimdDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_target_parallel_for_simd, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } /// \brief Emit a helper variable and return corresponding lvalue. @@ -1408,42 +1950,6 @@ namespace { }; } // namespace -static std::pair<llvm::Value * /*Chunk*/, ScheduleKindModifiersTy> -emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, - bool OuterRegion) { - // Detect the loop schedule kind and chunk. - auto ScheduleKind = OMPC_SCHEDULE_unknown; - OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; - OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown; - llvm::Value *Chunk = nullptr; - if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { - ScheduleKind = C->getScheduleKind(); - M1 = C->getFirstScheduleModifier(); - M2 = C->getSecondScheduleModifier(); - if (const auto *Ch = C->getChunkSize()) { - if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) { - if (OuterRegion) { - const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl()); - CGF.EmitVarDecl(*ImpVar); - CGF.EmitStoreThroughLValue( - CGF.EmitAnyExpr(Ch), - CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), - ImpVar->getType())); - } else { - Ch = ImpRef; - } - } - if (!C->getHelperChunkSize() || !OuterRegion) { - Chunk = CGF.EmitScalarExpr(Ch); - Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType(), - S.getLocStart()); - } - } - } - return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2)); -} - bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); @@ -1464,6 +1970,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { bool HasLastprivateClause; // Check pre-condition. { + OMPLoopScope PreInitScope(*this, S); // Skip the entire loop if we don't meet the precondition. // If the condition constant folds and can be elided, avoid emitting the // whole loop. @@ -1481,24 +1988,34 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { incrementProfileCounter(&S); } + bool Ordered = false; + if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { + if (OrderedClause->getNumForLoops()) + RT.emitDoacrossInit(*this, S); + else + Ordered = true; + } + + llvm::DenseSet<const Expr *> EmittedFinals; emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); + // Emit helper vars inits. + LValue LB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + LValue ST = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); + LValue IL = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); + // Emit 'then' code. { - // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); - LValue ST = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); - LValue IL = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); - OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables. + // initialization of firstprivate variables and post-update of + // lastprivate variables. CGM.getOpenMPRuntime().emitBarrierCall( *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); @@ -1506,28 +2023,31 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { EmitOMPPrivateClause(S, LoopScope); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPReductionClauseInit(S, LoopScope); - emitPrivateLoopCounters(*this, LoopScope, S.counters(), - S.private_counters()); - emitPrivateLinearVars(*this, S, LoopScope); + EmitOMPPrivateLoopCounters(S, LoopScope); + EmitOMPLinearClause(S, LoopScope); (void)LoopScope.Privatize(); // Detect the loop schedule kind and chunk. - llvm::Value *Chunk; - OpenMPScheduleClauseKind ScheduleKind; - auto ScheduleInfo = - emitScheduleClause(*this, S, /*OuterRegion=*/false); - Chunk = ScheduleInfo.first; - ScheduleKind = ScheduleInfo.second.Kind; - const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1; - const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2; + llvm::Value *Chunk = nullptr; + OpenMPScheduleTy ScheduleKind; + if (auto *C = S.getSingleClause<OMPScheduleClause>()) { + ScheduleKind.Schedule = C->getScheduleKind(); + ScheduleKind.M1 = C->getFirstScheduleModifier(); + ScheduleKind.M2 = C->getSecondScheduleModifier(); + if (const auto *Ch = C->getChunkSize()) { + Chunk = EmitScalarExpr(Ch); + Chunk = EmitScalarConversion(Chunk, Ch->getType(), + S.getIterationVariable()->getType(), + S.getLocStart()); + } + } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; // OpenMP 4.5, 2.7.1 Loop Construct, Description. // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. - if (RT.isStaticNonchunked(ScheduleKind, + if (RT.isStaticNonchunked(ScheduleKind.Schedule, /* Chunked */ Chunk != nullptr) && !Ordered) { if (isOpenMPSimdDirective(S.getDirectiveKind())) @@ -1557,28 +2077,46 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForStaticFinish(*this, S.getLocStart()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + }; + OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } else { - const bool IsMonotonic = Ordered || - ScheduleKind == OMPC_SCHEDULE_static || - ScheduleKind == OMPC_SCHEDULE_unknown || - M1 == OMPC_SCHEDULE_MODIFIER_monotonic || - M2 == OMPC_SCHEDULE_MODIFIER_monotonic; + const bool IsMonotonic = + Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || + ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || + ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || + ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), Chunk); } + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + EmitOMPSimdFinal(S, + [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); + } EmitOMPReductionClauseFinal(S); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause( + *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivateClause) EmitOMPLastprivateClauseFinal( - S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); - } - if (isOpenMPSimdDirective(S.getDirectiveKind())) { - EmitOMPSimdFinal(S); + S, isOpenMPSimdDirective(S.getDirectiveKind()), + Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); } + EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); // We're now done with the loop, so jump to the continuation block. if (ContBlock) { EmitBranch(ContBlock); @@ -1589,13 +2127,17 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { } void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); bool HasLastprivates = false; - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, - S.hasCancel()); + { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, + S.hasCancel()); + } // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { @@ -1604,12 +2146,15 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); bool HasLastprivates = false; - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); + { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); + } // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { @@ -1626,12 +2171,12 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, return LVal; } -OpenMPDirectiveKind -CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { +void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); auto *CS = dyn_cast<CompoundStmt>(Stmt); bool HasLastprivates = false; - auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { auto &C = CGF.CGM.getContext(); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Emit helper vars inits. @@ -1697,7 +2242,8 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CodeGenFunction::OMPPrivateScope LoopScope(CGF); if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables. + // initialization of firstprivate variables and post-update of lastprivate + // variables. CGF.CGM.getOpenMPRuntime().emitBarrierCall( CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); @@ -1708,8 +2254,10 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { (void)LoopScope.Privatize(); // Emit static non-chunked loop. + OpenMPScheduleTy ScheduleKind; + ScheduleKind.Schedule = OMPC_SCHEDULE_static; CGF.CGM.getOpenMPRuntime().emitForStaticInit( - CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, + CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), UB.getAddress(), ST.getAddress()); // UB = min(UB, GlobalUB); @@ -1723,14 +2271,24 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, [](CodeGenFunction &) {}); // Tell the runtime we are done. - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + }; + CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); CGF.EmitOMPReductionClauseFinal(S); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause( + CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivates) CGF.EmitOMPLastprivateClauseFinal( - S, CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart()))); + S, /*NoFinals=*/false, + CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart()))); }; bool HasCancel = false; @@ -1738,6 +2296,7 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { HasCancel = OSD->hasCancel(); else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) HasCancel = OPSD->hasCancel(); + OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, HasCancel); // Emit barrier for lastprivates only if 'sections' directive has 'nowait' @@ -1749,23 +2308,25 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown); } - return OMPD_sections; } void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); - OpenMPDirectiveKind EmittedAs = EmitSections(S); + { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + EmitSections(S); + } // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>()) { - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), + OMPD_sections); } } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, S.hasCancel()); } @@ -1776,8 +2337,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { llvm::SmallVector<const Expr *, 8> SrcExprs; llvm::SmallVector<const Expr *, 8> AssignmentOps; // Check if there are any 'copyprivate' clauses associated with this - // 'single' - // construct. + // 'single' construct. // Build a list of copyprivate variables along with helper expressions // (<source>, <destination>, <destination>=<source> expressions) for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { @@ -1788,24 +2348,24 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { AssignmentOps.append(C->assignment_ops().begin(), C->assignment_ops().end()); } - LexicalScope Scope(*this, S.getSourceRange()); // Emit code for 'single' region along with 'copyprivate' clauses - bool HasFirstprivates; - auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { - CodeGenFunction::OMPPrivateScope SingleScope(CGF); - HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope SingleScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); CGF.EmitOMPPrivateClause(S, SingleScope); (void)SingleScope.Privatize(); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; - CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), - CopyprivateVars, DestExprs, SrcExprs, - AssignmentOps); + { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), + CopyprivateVars, DestExprs, + SrcExprs, AssignmentOps); + } // Emit an implicit barrier at the end (to avoid data race on firstprivate // init or if no 'nowait' clause was specified and no 'copyprivate' clause). - if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) && - CopyprivateVars.empty()) { + if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { CGM.getOpenMPRuntime().emitBarrierCall( *this, S.getLocStart(), S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); @@ -1813,21 +2373,23 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; Expr *Hint = nullptr; if (auto *HintClause = S.getSingleClause<OMPHintClause>()) Hint = HintClause->getHint(); + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitCriticalRegion(*this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart(), Hint); @@ -1837,9 +2399,8 @@ void CodeGenFunction::EmitOMPParallelForDirective( const OMPParallelForDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - LexicalScope Scope(*this, S.getSourceRange()); - (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); @@ -1849,9 +2410,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( const OMPParallelForSimdDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - LexicalScope Scope(*this, S.getSourceRange()); - (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); @@ -1861,93 +2420,148 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'sections' directive. - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { - (void)CGF.EmitSections(S); + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitSections(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } -void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { +void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + const TaskGenTy &TaskGen, + OMPTaskDataTy &Data) { // Emit outlined function for task construct. - LexicalScope Scope(*this, S.getSourceRange()); auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); auto *PartId = std::next(I); + auto *TaskT = std::next(I, 4); + // Check if the task is final + if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + auto *Cond = Clause->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Data.Final.setInt(CondConstant); + else + Data.Final.setPointer(EvaluateExprAsBool(Cond)); + } else { + // By default the task is not final. + Data.Final.setInt(/*IntVal=*/false); + } + // Check if the task has 'priority' clause. + if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { + // Runtime currently does not support codegen for priority clause argument. + // TODO: Add codegen for priority clause arg when runtime lib support it. + auto *Prio = Clause->getPriority(); + Data.Priority.setInt(Prio); + Data.Priority.setPointer(EmitScalarConversion( + EmitScalarExpr(Prio), Prio->getType(), + getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + Prio->getExprLoc())); + } // The first function argument for tasks is a thread id, the second one is a // part id (0 for tied tasks, >=0 for untied task). llvm::DenseSet<const VarDecl *> EmittedAsPrivate; // Get list of private variables. - llvm::SmallVector<const Expr *, 8> PrivateVars; - llvm::SmallVector<const Expr *, 8> PrivateCopies; for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { auto IRef = C->varlist_begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - PrivateVars.push_back(*IRef); - PrivateCopies.push_back(IInit); + Data.PrivateVars.push_back(*IRef); + Data.PrivateCopies.push_back(IInit); } ++IRef; } } EmittedAsPrivate.clear(); // Get list of firstprivate variables. - llvm::SmallVector<const Expr *, 8> FirstprivateVars; - llvm::SmallVector<const Expr *, 8> FirstprivateCopies; - llvm::SmallVector<const Expr *, 8> FirstprivateInits; for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); auto IElemInitRef = C->inits().begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - FirstprivateVars.push_back(*IRef); - FirstprivateCopies.push_back(IInit); - FirstprivateInits.push_back(*IElemInitRef); + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); } - ++IRef, ++IElemInitRef; + ++IRef; + ++IElemInitRef; } } - // Build list of dependences. - llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> - Dependences; - for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { - for (auto *IRef : C->varlists()) { - Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + // Get list of lastprivate variables (for taskloops). + llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + auto IRef = C->varlist_begin(); + auto ID = C->destination_exprs().begin(); + for (auto *IInit : C->private_copies()) { + auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + Data.LastprivateVars.push_back(*IRef); + Data.LastprivateCopies.push_back(IInit); + } + LastprivateDstsOrigs.insert( + {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), + cast<DeclRefExpr>(*IRef)}); + ++IRef; + ++ID; } } - auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( - CodeGenFunction &CGF) { + // Build list of dependences. + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) + for (auto *IRef : C->varlists()) + Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( + CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. - auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); OMPPrivateScope Scope(CGF); - if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || + !Data.LastprivateVars.empty()) { auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); // Map privates. - llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> - PrivatePtrs; + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; llvm::SmallVector<llvm::Value *, 16> CallArgs; CallArgs.push_back(PrivatesPtr); - for (auto *E : PrivateVars) { + for (auto *E : Data.PrivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = CGF.CreateMemTemp( + CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + for (auto *E : Data.FirstprivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } - for (auto *E : FirstprivateVars) { + for (auto *E : Data.LastprivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".lastpriv.ptr.addr"); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : LastprivateDstsOrigs) { + auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); + DeclRefExpr DRE( + const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( + OrigVD) != nullptr, + Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); + Scope.addPrivate(Pair.first, [&CGF, &DRE]() { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } for (auto &&Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); @@ -1955,30 +2569,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { } } (void)Scope.Privatize(); - if (*PartId) { - // TODO: emit code for untied tasks. - } - CGF.EmitStmt(CS->getCapturedStmt()); + + Action.Enter(CGF); + BodyGen(CGF); }; - auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( - S, *I, OMPD_task, CodeGen); - // Check if we should emit tied or untied task. - bool Tied = !S.getSingleClause<OMPUntiedClause>(); - // Check if the task is final - llvm::PointerIntPair<llvm::Value *, 1, bool> Final; - if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { - // If the condition constant folds and can be elided, try to avoid emitting - // the condition and the dead arm of the if/else. - auto *Cond = Clause->getCondition(); - bool CondConstant; - if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) - Final.setInt(CondConstant); - else - Final.setPointer(EvaluateExprAsBool(Cond)); - } else { - // By default the task is not final. - Final.setInt(/*IntVal=*/false); - } + auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, + Data.NumberOfParts); + OMPLexicalScope Scope(*this, S); + TaskGen(*this, OutlinedFn, Data); +} + +void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + // Emit outlined function for task construct. + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { @@ -1988,10 +2593,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { break; } } - CGM.getOpenMPRuntime().emitTaskCall( - *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, - CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, - FirstprivateCopies, FirstprivateInits, Dependences); + + OMPTaskDataTy Data; + // Check if we should emit tied or untied task. + Data.Tied = !S.getSingleClause<OMPUntiedClause>(); + auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(CS->getCapturedStmt()); + }; + auto &&TaskGen = [&S, SharedsTy, CapturedStruct, + IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + const OMPTaskDataTy &Data) { + CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, + SharedsTy, CapturedStruct, IfCond, + Data); + }; + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); } void CodeGenFunction::EmitOMPTaskyieldDirective( @@ -2009,10 +2625,11 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); } @@ -2026,9 +2643,130 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } +void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { + // Emit the loop iteration variable. + auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); + auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); + EmitVarDecl(*IVDecl); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on each + // iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + EmitIgnoredExpr(S.getCalcLastIteration()); + } + + auto &RT = CGM.getOpenMPRuntime(); + + // Check pre-condition. + { + OMPLoopScope PreInitScope(*this, S); + // Skip the entire loop if we don't meet the precondition. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = createBasicBlock("omp.precond.then"); + ContBlock = createBasicBlock("omp.precond.end"); + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, + getProfileCount(&S)); + EmitBlock(ThenBlock); + incrementProfileCounter(&S); + } + + // Emit 'then' code. + { + // Emit helper vars inits. + LValue LB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + LValue ST = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); + LValue IL = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); + + OMPPrivateScope LoopScope(*this); + EmitOMPPrivateLoopCounters(S, LoopScope); + (void)LoopScope.Privatize(); + + // Detect the distribute schedule kind and chunk. + llvm::Value *Chunk = nullptr; + OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; + if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { + ScheduleKind = C->getDistScheduleKind(); + if (const auto *Ch = C->getChunkSize()) { + Chunk = EmitScalarExpr(Ch); + Chunk = EmitScalarConversion(Chunk, Ch->getType(), + S.getIterationVariable()->getType(), + S.getLocStart()); + } + } + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + // OpenMP [2.10.8, distribute Construct, Description] + // If dist_schedule is specified, kind must be static. If specified, + // iterations are divided into chunks of size chunk_size, chunks are + // assigned to the teams of the league in a round-robin fashion in the + // order of the team number. When no chunk_size is specified, the + // iteration space is divided into chunks that are approximately equal + // in size, and at most one chunk is distributed to each team of the + // league. The size of the chunks is unspecified in this case. + if (RT.isStaticNonchunked(ScheduleKind, + /* Chunked */ Chunk != nullptr)) { + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, /* Ordered = */ false, + IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); + auto LoopExit = + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); + // UB = min(UB, GlobalUB); + EmitIgnoredExpr(S.getEnsureUpperBound()); + // IV = LB; + EmitIgnoredExpr(S.getInit()); + // while (idx <= UB) { BODY; ++idx; } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + EmitBlock(LoopExit.getBlock()); + // Tell the runtime we are done. + RT.emitForStaticFinish(*this, S.getLocStart()); + } else { + // Emit the outer loop, which requests its work chunk [LB..UB] from + // runtime and runs the inner loop to process it. + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); + } + } + + // We're now done with the loop, so jump to the continuation block. + if (ContBlock) { + EmitBranch(ContBlock); + EmitBlock(ContBlock, true); + } + } +} + void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { - llvm_unreachable("CodeGen for 'omp distribute' is not supported yet."); + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S); + }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, + false); } static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, @@ -2042,11 +2780,14 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, } void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { - if (!S.getAssociatedStmt()) + if (!S.getAssociatedStmt()) { + for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) + CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); return; - LexicalScope Scope(*this, S.getSourceRange()); + } auto *C = S.getSingleClause<OMPSIMDClause>(); - auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, + PrePostActionTy &Action) { if (C) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; @@ -2054,10 +2795,12 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); } else { + Action.Enter(CGF); CGF.EmitStmt( cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); } }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); } @@ -2104,8 +2847,9 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, if (LVal.isGlobalReg()) { CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); } else { - CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent - : llvm::Monotonic, + CGF.EmitAtomicStore(RVal, LVal, + IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic, LVal.isVolatile(), /*IsInit=*/false); } } @@ -2138,10 +2882,11 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, LValue VLValue = CGF.EmitLValue(V); RValue Res = XLValue.isGlobalReg() ? CGF.EmitLoadOfLValue(XLValue, Loc) - : CGF.EmitAtomicLoad(XLValue, Loc, - IsSeqCst ? llvm::SequentiallyConsistent - : llvm::Monotonic, - XLValue.isVolatile()); + : CGF.EmitAtomicLoad( + XLValue, Loc, + IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic, + XLValue.isVolatile()); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a @@ -2297,7 +3042,8 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; + auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic; auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; @@ -2346,7 +3092,8 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, LValue VLValue = CGF.EmitLValue(V); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; + auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic; QualType NewVValType; if (UE) { // 'x' is updated with some additional value. @@ -2472,6 +3219,13 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_nogroup: case OMPC_num_tasks: case OMPC_hint: + case OMPC_dist_schedule: + case OMPC_defaultmap: + case OMPC_uniform: + case OMPC_to: + case OMPC_from: + case OMPC_use_device_ptr: + case OMPC_is_device_ptr: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -2501,18 +3255,39 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } } - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, + PrePostActionTy &) { CGF.EmitStopPoint(CS); EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), S.getV(), S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } +std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> +CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( + CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, + bool IsOffloadEntry) { + llvm::Function *OutlinedFn = nullptr; + llvm::Constant *OutlinedFnID = nullptr; + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + + Action.Enter(CGF); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + return std::make_pair(OutlinedFn, OutlinedFnID); +} + void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; @@ -2558,15 +3333,50 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { ParentName = CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); - CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, - IsOffloadEntry); - + std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( + CGM, S, ParentName, IsOffloadEntry); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, CapturedVars); } -void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { - llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); +static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). + emitParallelOrTeamsOutlinedFunction(S, + *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + + const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); + const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); + const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); + if (NT || TL) { + Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; + Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; + + CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, + S.getLocStart()); + } + + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, + CapturedVars); +} + +void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { + // Emit parallel region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); } void CodeGenFunction::EmitOMPCancellationPointDirective( @@ -2590,37 +3400,261 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { CodeGenFunction::JumpDest CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { - if (Kind == OMPD_parallel || Kind == OMPD_task) + if (Kind == OMPD_parallel || Kind == OMPD_task || + Kind == OMPD_target_parallel) return ReturnBlock; assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || - Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); - return BreakContinueStack.back().BreakBlock; + Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || + Kind == OMPD_distribute_parallel_for || + Kind == OMPD_target_parallel_for); + return OMPCancelStack.getExitBlock(); } // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { - // emit the code inside the construct for now + // The target data enclosed region is implemented just by emitting the + // statement. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; + + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data, + CodeGen); + return; + } + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetEnterDataDirective( + const OMPTargetEnterDataDirective &S) { + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) + return; + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); +} + +void CodeGenFunction::EmitOMPTargetExitDataDirective( + const OMPTargetExitDataDirective &S) { + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) + return; + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); +} + +void CodeGenFunction::EmitOMPTargetParallelDirective( + const OMPTargetParallelDirective &S) { + // TODO: codegen for target parallel. +} + +void CodeGenFunction::EmitOMPTargetParallelForDirective( + const OMPTargetParallelForDirective &S) { + // TODO: codegen for target parallel for. +} + +/// Emit a helper variable and return corresponding lvalue. +static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, + const ImplicitParamDecl *PVD, + CodeGenFunction::OMPPrivateScope &Privates) { + auto *VDecl = cast<VarDecl>(Helper->getDecl()); + Privates.addPrivate( + VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); +} + +void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { + assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); + // Emit outlined function for task construct. auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_data, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_taskloop) { + IfCond = C->getCondition(); + break; + } + } + + OMPTaskDataTy Data; + // Check if taskloop must be emitted without taskgroup. + Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); + // TODO: Check if we should emit tied or untied task. + Data.Tied = true; + // Set scheduling for taskloop + if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { + // grainsize clause + Data.Schedule.setInt(/*IntVal=*/false); + Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); + } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { + // num_tasks clause + Data.Schedule.setInt(/*IntVal=*/true); + Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); + } + + auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // <Final counter/linear vars updates>; + // } + // + + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + OMPLoopScope PreInitScope(CGF, S); + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); + ContBlock = CGF.createBasicBlock("taskloop.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } + + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S); + + OMPPrivateScope LoopScope(CGF); + // Emit helper vars inits. + enum { LowerBound = 5, UpperBound, Stride, LastIter }; + auto *I = CS->getCapturedDecl()->param_begin(); + auto *LBP = std::next(I, LowerBound); + auto *UBP = std::next(I, UpperBound); + auto *STP = std::next(I, Stride); + auto *LIP = std::next(I, LastIter); + mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, + LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, + LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, + LoopScope); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + (void)LoopScope.Privatize(); + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } + + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) { + CGF.EmitOMPLastprivateClauseFinal( + S, isOpenMPSimdDirective(S.getDirectiveKind()), + CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( + CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, + (*LIP)->getType(), S.getLocStart()))); + } + }; + auto &&TaskGen = [&S, SharedsTy, CapturedStruct, + IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + const OMPTaskDataTy &Data) { + auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, + OutlinedFn, SharedsTy, + CapturedStruct, IfCond, Data); + }; + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, + CodeGen); + }; + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { - // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + EmitOMPTaskLoopBasedDirective(S); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { - // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop_simd, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + EmitOMPTaskLoopBasedDirective(S); } +// Generate the instructions for '#pragma omp target update' directive. +void CodeGenFunction::EmitOMPTargetUpdateDirective( + const OMPTargetUpdateDirective &S) { + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) + return; + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp index 4fb76710..5b90ee6 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp @@ -44,7 +44,7 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT, const CXXRecordDecl *RD) { VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/true); - llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int64Ty = CGM.Int64Ty; + llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int32Ty = CGM.Int32Ty; llvm::ArrayType *ArrayType = llvm::ArrayType::get(Int8PtrTy, Builder.getVTTComponents().size()); @@ -75,8 +75,8 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT, } llvm::Value *Idxs[] = { - llvm::ConstantInt::get(Int64Ty, 0), - llvm::ConstantInt::get(Int64Ty, AddressPoint) + llvm::ConstantInt::get(Int32Ty, 0), + llvm::ConstantInt::get(Int32Ty, AddressPoint) }; llvm::Constant *Init = llvm::ConstantExpr::getInBoundsGetElementPtr( @@ -121,7 +121,7 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) { llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, llvm::GlobalValue::ExternalLinkage); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); return GV; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp index a40aab2..9570550 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp @@ -156,9 +156,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, // Clone to thunk. llvm::ValueToValueMapTy VMap; - llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap, - /*ModuleLevelChanges=*/false); - CGM.getModule().getFunctionList().push_back(NewFn); + llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap); Fn->replaceAllUsesWith(NewFn); NewFn->takeName(Fn); Fn->eraseFromParent(); @@ -286,15 +284,14 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee, CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs); // Add the rest of the arguments. - for (const ParmVarDecl *PD : MD->params()) + for (const ParmVarDecl *PD : MD->parameters()) EmitDelegateCallArg(CallArgs, PD, PD->getLocStart()); const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); #ifndef NDEBUG - const CGFunctionInfo &CallFnInfo = - CGM.getTypes().arrangeCXXMethodCall(CallArgs, FPT, - RequiredArgs::forPrototypePlus(FPT, 1)); + const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall( + CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD)); assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() && CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() && CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention()); @@ -607,6 +604,8 @@ llvm::Constant *CodeGenVTables::CreateVTableInitializer( llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); StringRef PureCallName = CGM.getCXXABI().GetPureVirtualCallName(); PureVirtualFn = CGM.CreateRuntimeFunction(Ty, PureCallName); + if (auto *F = dyn_cast<llvm::Function>(PureVirtualFn)) + F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); PureVirtualFn = llvm::ConstantExpr::getBitCast(PureVirtualFn, CGM.Int8PtrTy); } @@ -618,6 +617,8 @@ llvm::Constant *CodeGenVTables::CreateVTableInitializer( StringRef DeletedCallName = CGM.getCXXABI().GetDeletedVirtualCallName(); DeletedVirtualFn = CGM.CreateRuntimeFunction(Ty, DeletedCallName); + if (auto *F = dyn_cast<llvm::Function>(DeletedVirtualFn)) + F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); DeletedVirtualFn = llvm::ConstantExpr::getBitCast(DeletedVirtualFn, CGM.Int8PtrTy); } @@ -696,7 +697,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, CGM.setGlobalVisibility(VTable, RD); // V-tables are always unnamed_addr. - VTable->setUnnamedAddr(true); + VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); llvm::Constant *RTTI = CGM.GetAddrOfRTTIDescriptor( CGM.getContext().getTagDeclType(Base.getBase())); @@ -708,7 +709,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, VTLayout->getNumVTableThunks(), RTTI); VTable->setInitializer(Init); - CGM.EmitVTableBitSetEntries(VTable, *VTLayout.get()); + CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); return VTable; } @@ -719,7 +720,7 @@ static bool shouldEmitAvailableExternallyVTable(const CodeGenModule &CGM, CGM.getCXXABI().canSpeculativelyEmitVTable(RD); } -/// Compute the required linkage of the v-table for the given class. +/// Compute the required linkage of the vtable for the given class. /// /// Note that we only call this at the end of the translation unit. llvm::GlobalVariable::LinkageTypes @@ -793,6 +794,10 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { return DiscardableODRLinkage; case TSK_ExplicitInstantiationDeclaration: + // Explicit instantiations in MSVC do not provide vtables, so we must emit + // our own. + if (getTarget().getCXXABI().isMicrosoft()) + return DiscardableODRLinkage; return shouldEmitAvailableExternallyVTable(*this, RD) ? llvm::GlobalVariable::AvailableExternallyLinkage : llvm::GlobalVariable::ExternalLinkage; @@ -804,7 +809,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { llvm_unreachable("Invalid TemplateSpecializationKind!"); } -/// This is a callback from Sema to tell us that that a particular v-table is +/// This is a callback from Sema to tell us that that a particular vtable is /// required to be emitted in this translation unit. /// /// This is only called for vtables that _must_ be emitted (mainly due to key @@ -832,38 +837,43 @@ CodeGenVTables::GenerateClassData(const CXXRecordDecl *RD) { /// the translation unit. /// /// The only semantic restriction here is that the object file should -/// not contain a v-table definition when that v-table is defined +/// not contain a vtable definition when that vtable is defined /// strongly elsewhere. Otherwise, we'd just like to avoid emitting -/// v-tables when unnecessary. +/// vtables when unnecessary. bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) { assert(RD->isDynamicClass() && "Non-dynamic classes have no VTable."); + // We always synthesize vtables if they are needed in the MS ABI. MSVC doesn't + // emit them even if there is an explicit template instantiation. + if (CGM.getTarget().getCXXABI().isMicrosoft()) + return false; + // If we have an explicit instantiation declaration (and not a - // definition), the v-table is defined elsewhere. + // definition), the vtable is defined elsewhere. TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind(); if (TSK == TSK_ExplicitInstantiationDeclaration) return true; // Otherwise, if the class is an instantiated template, the - // v-table must be defined here. + // vtable must be defined here. if (TSK == TSK_ImplicitInstantiation || TSK == TSK_ExplicitInstantiationDefinition) return false; // Otherwise, if the class doesn't have a key function (possibly - // anymore), the v-table must be defined here. + // anymore), the vtable must be defined here. const CXXMethodDecl *keyFunction = CGM.getContext().getCurrentKeyFunction(RD); if (!keyFunction) return false; // Otherwise, if we don't have a definition of the key function, the - // v-table must be defined somewhere else. + // vtable must be defined somewhere else. return !keyFunction->hasBody(); } /// Given that we're currently at the end of the translation unit, and -/// we've emitted a reference to the v-table for this class, should -/// we define that v-table? +/// we've emitted a reference to the vtable for this class, should +/// we define that vtable? static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM, const CXXRecordDecl *RD) { // If vtable is internal then it has to be done. @@ -875,7 +885,7 @@ static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM, } /// Given that at some point we emitted a reference to one or more -/// v-tables, and that we are now at the end of the translation unit, +/// vtables, and that we are now at the end of the translation unit, /// decide whether we should emit them. void CodeGenModule::EmitDeferredVTables() { #ifndef NDEBUG @@ -889,25 +899,47 @@ void CodeGenModule::EmitDeferredVTables() { VTables.GenerateClassData(RD); assert(savedSize == DeferredVTables.size() && - "deferred extra v-tables during v-table emission?"); + "deferred extra vtables during vtable emission?"); DeferredVTables.clear(); } -bool CodeGenModule::IsCFIBlacklistedRecord(const CXXRecordDecl *RD) { - if (RD->hasAttr<UuidAttr>() && - getContext().getSanitizerBlacklist().isBlacklistedType("attr:uuid")) +bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { + LinkageInfo LV = RD->getLinkageAndVisibility(); + if (!isExternallyVisible(LV.getLinkage())) return true; - return getContext().getSanitizerBlacklist().isBlacklistedType( - RD->getQualifiedNameAsString()); + if (RD->hasAttr<LTOVisibilityPublicAttr>() || RD->hasAttr<UuidAttr>()) + return false; + + if (getTriple().isOSBinFormatCOFF()) { + if (RD->hasAttr<DLLExportAttr>() || RD->hasAttr<DLLImportAttr>()) + return false; + } else { + if (LV.getVisibility() != HiddenVisibility) + return false; + } + + if (getCodeGenOpts().LTOVisibilityPublicStd) { + const DeclContext *DC = RD; + while (1) { + auto *D = cast<Decl>(DC); + DC = DC->getParent(); + if (isa<TranslationUnitDecl>(DC->getRedeclContext())) { + if (auto *ND = dyn_cast<NamespaceDecl>(D)) + if (const IdentifierInfo *II = ND->getIdentifier()) + if (II->isStr("std") || II->isStr("stdext")) + return false; + break; + } + } + } + + return true; } -void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable, - const VTableLayout &VTLayout) { - if (!LangOpts.Sanitize.has(SanitizerKind::CFIVCall) && - !LangOpts.Sanitize.has(SanitizerKind::CFINVCall) && - !LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) && - !LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast)) +void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, + const VTableLayout &VTLayout) { + if (!getCodeGenOpts().PrepareForLTO) return; CharUnits PointerWidth = @@ -916,12 +948,8 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable, typedef std::pair<const CXXRecordDecl *, unsigned> BSEntry; std::vector<BSEntry> BitsetEntries; // Create a bit set entry for each address point. - for (auto &&AP : VTLayout.getAddressPoints()) { - if (IsCFIBlacklistedRecord(AP.first.getBase())) - continue; - + for (auto &&AP : VTLayout.getAddressPoints()) BitsetEntries.push_back(std::make_pair(AP.first.getBase(), AP.second)); - } // Sort the bit set entries for determinism. std::sort(BitsetEntries.begin(), BitsetEntries.end(), @@ -949,10 +977,7 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable, return E1.second < E2.second; }); - llvm::NamedMDNode *BitsetsMD = - getModule().getOrInsertNamedMetadata("llvm.bitsets"); for (auto BitsetEntry : BitsetEntries) - CreateVTableBitSetEntry(BitsetsMD, VTable, - PointerWidth * BitsetEntry.second, - BitsetEntry.first); + AddVTableTypeMetadata(VTable, PointerWidth * BitsetEntry.second, + BitsetEntry.first); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h index 3ccc4cd..53a376d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h @@ -445,7 +445,7 @@ class AggValueSlot { // Qualifiers Qualifiers Quals; - unsigned short Alignment; + unsigned Alignment; /// DestructedFlag - This is set to true if some external code is /// responsible for setting up a destructor for the slot. Otherwise diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp index 643c996..166f44f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp @@ -26,45 +26,41 @@ using namespace clang; using namespace CodeGen; -CodeGenABITypes::CodeGenABITypes(ASTContext &C, llvm::Module &M, - CoverageSourceInfo *CoverageInfo) - : CGO(new CodeGenOptions), HSO(new HeaderSearchOptions), - PPO(new PreprocessorOptions), - CGM(new CodeGen::CodeGenModule(C, *HSO, *PPO, *CGO, M, C.getDiagnostics(), - CoverageInfo)) {} - -// Explicitly out-of-line because ~CodeGenModule() is private but -// CodeGenABITypes.h is part of clang's API. -CodeGenABITypes::~CodeGenABITypes() = default; - const CGFunctionInfo & -CodeGenABITypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, - QualType receiverType) { - return CGM->getTypes().arrangeObjCMessageSendSignature(MD, receiverType); +CodeGen::arrangeObjCMessageSendSignature(CodeGenModule &CGM, + const ObjCMethodDecl *MD, + QualType receiverType) { + return CGM.getTypes().arrangeObjCMessageSendSignature(MD, receiverType); } const CGFunctionInfo & -CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty, - const FunctionDecl *FD) { - return CGM->getTypes().arrangeFreeFunctionType(Ty, FD); +CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM, + CanQual<FunctionProtoType> Ty, + const FunctionDecl *FD) { + return CGM.getTypes().arrangeFreeFunctionType(Ty, FD); } const CGFunctionInfo & -CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty) { - return CGM->getTypes().arrangeFreeFunctionType(Ty); +CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM, + CanQual<FunctionNoProtoType> Ty) { + return CGM.getTypes().arrangeFreeFunctionType(Ty); } const CGFunctionInfo & -CodeGenABITypes::arrangeCXXMethodType(const CXXRecordDecl *RD, - const FunctionProtoType *FTP, - const CXXMethodDecl *MD) { - return CGM->getTypes().arrangeCXXMethodType(RD, FTP, MD); +CodeGen::arrangeCXXMethodType(CodeGenModule &CGM, + const CXXRecordDecl *RD, + const FunctionProtoType *FTP, + const CXXMethodDecl *MD) { + return CGM.getTypes().arrangeCXXMethodType(RD, FTP, MD); } -const CGFunctionInfo &CodeGenABITypes::arrangeFreeFunctionCall( - CanQualType returnType, ArrayRef<CanQualType> argTypes, - FunctionType::ExtInfo info, RequiredArgs args) { - return CGM->getTypes().arrangeLLVMFunctionInfo( +const CGFunctionInfo & +CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, + CanQualType returnType, + ArrayRef<CanQualType> argTypes, + FunctionType::ExtInfo info, + RequiredArgs args) { + return CGM.getTypes().arrangeLLVMFunctionInfo( returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes, - info, args); + info, {}, args); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp index 0a670ab..49738a2 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp @@ -46,14 +46,13 @@ namespace clang { const CodeGenOptions &CodeGenOpts; const TargetOptions &TargetOpts; const LangOptions &LangOpts; - raw_pwrite_stream *AsmOutStream; + std::unique_ptr<raw_pwrite_stream> AsmOutStream; ASTContext *Context; Timer LLVMIRGeneration; std::unique_ptr<CodeGenerator> Gen; - std::unique_ptr<llvm::Module> TheModule; SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4> LinkModules; @@ -69,11 +68,12 @@ namespace clang { const TargetOptions &TargetOpts, const LangOptions &LangOpts, bool TimePasses, const std::string &InFile, const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules, - raw_pwrite_stream *OS, LLVMContext &C, + std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C, CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts), - TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(OS), - Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"), + TargetOpts(TargetOpts), LangOpts(LangOpts), + AsmOutStream(std::move(OS)), Context(nullptr), + LLVMIRGeneration("LLVM IR Generation Time"), Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)) { llvm::TimePassesIsEnabled = TimePasses; @@ -81,7 +81,10 @@ namespace clang { this->LinkModules.push_back( std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second))); } - std::unique_ptr<llvm::Module> takeModule() { return std::move(TheModule); } + llvm::Module *getModule() const { return Gen->GetModule(); } + std::unique_ptr<llvm::Module> takeModule() { + return std::unique_ptr<llvm::Module>(Gen->ReleaseModule()); + } void releaseLinkModules() { for (auto &I : LinkModules) I.second.release(); @@ -101,8 +104,6 @@ namespace clang { Gen->Initialize(Ctx); - TheModule.reset(Gen->GetModule()); - if (llvm::TimePassesIsEnabled) LLVMIRGeneration.stopTimer(); } @@ -123,14 +124,14 @@ namespace clang { return true; } - void HandleInlineMethodDefinition(CXXMethodDecl *D) override { + void HandleInlineFunctionDefinition(FunctionDecl *D) override { PrettyStackTraceDecl CrashInfo(D, SourceLocation(), Context->getSourceManager(), - "LLVM IR generation of inline method"); + "LLVM IR generation of inline function"); if (llvm::TimePassesIsEnabled) LLVMIRGeneration.startTimer(); - Gen->HandleInlineMethodDefinition(D); + Gen->HandleInlineFunctionDefinition(D); if (llvm::TimePassesIsEnabled) LLVMIRGeneration.stopTimer(); @@ -149,25 +150,12 @@ namespace clang { } // Silently ignore if we weren't initialized for some reason. - if (!TheModule) + if (!getModule()) return; - // Make sure IR generation is happy with the module. This is released by - // the module provider. - llvm::Module *M = Gen->ReleaseModule(); - if (!M) { - // The module has been released by IR gen on failures, do not double - // free. - TheModule.release(); - return; - } - - assert(TheModule.get() == M && - "Unexpected module change during IR generation"); - // Install an inline asm handler so that diagnostics get printed through // our diagnostics hooks. - LLVMContext &Ctx = TheModule->getContext(); + LLVMContext &Ctx = getModule()->getContext(); LLVMContext::InlineAsmDiagHandlerTy OldHandler = Ctx.getInlineAsmDiagnosticHandler(); void *OldContext = Ctx.getInlineAsmDiagnosticContext(); @@ -182,13 +170,15 @@ namespace clang { for (auto &I : LinkModules) { unsigned LinkFlags = I.first; CurLinkModule = I.second.get(); - if (Linker::linkModules(*M, std::move(I.second), LinkFlags)) + if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags)) return; } + EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); + EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts, - C.getTargetInfo().getDataLayoutString(), - TheModule.get(), Action, AsmOutStream); + C.getTargetInfo().getDataLayout(), + getModule(), Action, std::move(AsmOutStream)); Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext); @@ -210,21 +200,12 @@ namespace clang { Gen->CompleteTentativeDefinition(D); } - void HandleVTable(CXXRecordDecl *RD) override { - Gen->HandleVTable(RD); - } - - void HandleLinkerOptionPragma(llvm::StringRef Opts) override { - Gen->HandleLinkerOptionPragma(Opts); + void AssignInheritanceModel(CXXRecordDecl *RD) override { + Gen->AssignInheritanceModel(RD); } - void HandleDetectMismatch(llvm::StringRef Name, - llvm::StringRef Value) override { - Gen->HandleDetectMismatch(Name, Value); - } - - void HandleDependentLibrary(llvm::StringRef Opts) override { - Gen->HandleDependentLibrary(Opts); + void HandleVTable(CXXRecordDecl *RD) override { + Gen->HandleVTable(RD); } static void InlineAsmDiagHandler(const llvm::SMDiagnostic &SM,void *Context, @@ -238,6 +219,13 @@ namespace clang { ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI); } + /// Get the best possible source location to represent a diagnostic that + /// may have associated debug info. + const FullSourceLoc + getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithDebugLocBase &D, + bool &BadDebugInfo, StringRef &Filename, + unsigned &Line, unsigned &Column) const; + void InlineAsmDiagHandler2(const llvm::SMDiagnostic &, SourceLocation LocCookie); @@ -250,6 +238,8 @@ namespace clang { /// \return True if the diagnostic has been successfully reported, false /// otherwise. bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); + /// \brief Specialized handler for unsupported backend feature diagnostic. + void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); /// \brief Specialized handlers for optimization remarks. /// Note that these handlers only accept remarks and they always handle /// them. @@ -426,25 +416,21 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { return false; if (const Decl *ND = Gen->GetDeclForMangledName(D.getFunction().getName())) { + // FIXME: Shouldn't need to truncate to uint32_t Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()), diag::warn_fe_frame_larger_than) - << D.getStackSize() << Decl::castToDeclContext(ND); + << static_cast<uint32_t>(D.getStackSize()) << Decl::castToDeclContext(ND); return true; } return false; } -void BackendConsumer::EmitOptimizationMessage( - const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) { - // We only support warnings and remarks. - assert(D.getSeverity() == llvm::DS_Remark || - D.getSeverity() == llvm::DS_Warning); - +const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( + const llvm::DiagnosticInfoWithDebugLocBase &D, bool &BadDebugInfo, StringRef &Filename, + unsigned &Line, unsigned &Column) const { SourceManager &SourceMgr = Context->getSourceManager(); FileManager &FileMgr = SourceMgr.getFileManager(); - StringRef Filename; - unsigned Line, Column; SourceLocation DILoc; if (D.isLocationAvailable()) { @@ -455,6 +441,7 @@ void BackendConsumer::EmitOptimizationMessage( // source manager, so pass 1 if Column is not set. DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); } + BadDebugInfo = DILoc.isInvalid(); } // If a location isn't available, try to approximate it using the associated @@ -463,18 +450,63 @@ void BackendConsumer::EmitOptimizationMessage( FullSourceLoc Loc(DILoc, SourceMgr); if (Loc.isInvalid()) if (const Decl *FD = Gen->GetDeclForMangledName(D.getFunction().getName())) - Loc = FD->getASTContext().getFullLoc(FD->getBodyRBrace()); + Loc = FD->getASTContext().getFullLoc(FD->getLocation()); + + if (DILoc.isInvalid() && D.isLocationAvailable()) + // If we were not able to translate the file:line:col information + // back to a SourceLocation, at least emit a note stating that + // we could not translate this location. This can happen in the + // case of #line directives. + Diags.Report(Loc, diag::note_fe_backend_invalid_loc) + << Filename << Line << Column; + + return Loc; +} + +void BackendConsumer::UnsupportedDiagHandler( + const llvm::DiagnosticInfoUnsupported &D) { + // We only support errors. + assert(D.getSeverity() == llvm::DS_Error); + + StringRef Filename; + unsigned Line, Column; + bool BadDebugInfo; + FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, + Line, Column); + + Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str(); + + if (BadDebugInfo) + // If we were not able to translate the file:line:col information + // back to a SourceLocation, at least emit a note stating that + // we could not translate this location. This can happen in the + // case of #line directives. + Diags.Report(Loc, diag::note_fe_backend_invalid_loc) + << Filename << Line << Column; +} + +void BackendConsumer::EmitOptimizationMessage( + const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) { + // We only support warnings and remarks. + assert(D.getSeverity() == llvm::DS_Remark || + D.getSeverity() == llvm::DS_Warning); + + StringRef Filename; + unsigned Line, Column; + bool BadDebugInfo = false; + FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, + Line, Column); Diags.Report(Loc, DiagID) << AddFlagValue(D.getPassName() ? D.getPassName() : "") << D.getMsg().str(); - if (DILoc.isInvalid() && D.isLocationAvailable()) + if (BadDebugInfo) // If we were not able to translate the file:line:col information // back to a SourceLocation, at least emit a note stating that // we could not translate this location. This can happen in the // case of #line directives. - Diags.Report(Loc, diag::note_fe_backend_optimization_remark_invalid_loc) + Diags.Report(Loc, diag::note_fe_backend_invalid_loc) << Filename << Line << Column; } @@ -504,7 +536,7 @@ void BackendConsumer::OptimizationRemarkHandler( // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a // regular expression that matches the name of the pass name in \p D. - if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint || + if (D.shouldAlwaysPrint() || (CodeGenOpts.OptimizationRemarkAnalysisPattern && CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) EmitOptimizationMessage( @@ -517,7 +549,7 @@ void BackendConsumer::OptimizationRemarkHandler( // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a // regular expression that matches the name of the pass name in \p D. - if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint || + if (D.shouldAlwaysPrint() || (CodeGenOpts.OptimizationRemarkAnalysisPattern && CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) EmitOptimizationMessage( @@ -530,7 +562,7 @@ void BackendConsumer::OptimizationRemarkHandler( // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a // regular expression that matches the name of the pass name in \p D. - if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint || + if (D.shouldAlwaysPrint() || (CodeGenOpts.OptimizationRemarkAnalysisPattern && CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) EmitOptimizationMessage( @@ -599,6 +631,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { // handler. OptimizationFailureHandler(cast<DiagnosticInfoOptimizationFailure>(DI)); return; + case llvm::DK_Unsupported: + UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI)); + return; default: // Plugin IDs are not bound to any value as they are set dynamically. ComputeDiagRemarkID(Severity, backend_plugin, DiagID); @@ -657,7 +692,7 @@ llvm::LLVMContext *CodeGenAction::takeLLVMContext() { return VMContext; } -static raw_pwrite_stream * +static std::unique_ptr<raw_pwrite_stream> GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) { switch (Action) { case Backend_EmitAssembly: @@ -680,7 +715,7 @@ GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) { std::unique_ptr<ASTConsumer> CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { BackendAction BA = static_cast<BackendAction>(Act); - raw_pwrite_stream *OS = GetOutputStream(CI, InFile, BA); + std::unique_ptr<raw_pwrite_stream> OS = GetOutputStream(CI, InFile, BA); if (BA != Backend_EmitNothing && !OS) return nullptr; @@ -720,7 +755,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules, - OS, *VMContext, CoverageInfo)); + std::move(OS), *VMContext, CoverageInfo)); BEConsumer = Result.get(); return std::move(Result); } @@ -729,6 +764,22 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, void *Context, unsigned LocCookie) { SM.print(nullptr, llvm::errs()); + + auto Diags = static_cast<DiagnosticsEngine *>(Context); + unsigned DiagID; + switch (SM.getKind()) { + case llvm::SourceMgr::DK_Error: + DiagID = diag::err_fe_inline_asm; + break; + case llvm::SourceMgr::DK_Warning: + DiagID = diag::warn_fe_inline_asm; + break; + case llvm::SourceMgr::DK_Note: + DiagID = diag::note_fe_inline_asm; + break; + } + + Diags->Report(DiagID).AddString("cannot compile inline asm"); } void CodeGenAction::ExecuteAction() { @@ -736,7 +787,8 @@ void CodeGenAction::ExecuteAction() { if (getCurrentFileKind() == IK_LLVM_IR) { BackendAction BA = static_cast<BackendAction>(Act); CompilerInstance &CI = getCompilerInstance(); - raw_pwrite_stream *OS = GetOutputStream(CI, getCurrentFile(), BA); + std::unique_ptr<raw_pwrite_stream> OS = + GetOutputStream(CI, getCurrentFile(), BA); if (BA != Backend_EmitNothing && !OS) return; @@ -747,6 +799,11 @@ void CodeGenAction::ExecuteAction() { if (Invalid) return; + // For ThinLTO backend invocations, ensure that the context + // merges types based on ODR identifiers. + if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) + VMContext->enableDebugTypeODRUniquing(); + llvm::SMDiagnostic Err; TheModule = parseIR(MainFile->getMemBufferRef(), Err, *VMContext); if (!TheModule) { @@ -779,11 +836,16 @@ void CodeGenAction::ExecuteAction() { TheModule->setTargetTriple(TargetOpts.Triple); } + EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(), + MainFile->getMemBufferRef()); + LLVMContext &Ctx = TheModule->getContext(); - Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler); + Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler, + &CI.getDiagnostics()); + EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts, - CI.getLangOpts(), CI.getTarget().getDataLayoutString(), - TheModule.get(), BA, OS); + CI.getLangOpts(), CI.getTarget().getDataLayout(), + TheModule.get(), BA, std::move(OS)); return; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp index e38ff0a..11e4ad9 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp @@ -25,6 +25,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtObjC.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -397,10 +398,17 @@ bool CodeGenFunction::ShouldInstrumentFunction() { return true; } +/// ShouldXRayInstrument - Return true if the current function should be +/// instrumented with XRay nop sleds. +bool CodeGenFunction::ShouldXRayInstrumentFunction() const { + return CGM.getCodeGenOpts().XRayInstrumentFunctions; +} + /// EmitFunctionInstrumentation - Emit LLVM code to call the specified /// instrumentation function with the current function and the call site, if /// function instrumentation is enabled. void CodeGenFunction::EmitFunctionInstrumentation(const char *Fn) { + auto NL = ApplyDebugLocation::CreateArtificial(*this); // void __cyg_profile_func_{enter,exit} (void *this_fn, void *call_site); llvm::PointerType *PointerTy = Int8PtrTy; llvm::Type *ProfileFuncArgs[] = { PointerTy, PointerTy }; @@ -429,12 +437,28 @@ void CodeGenFunction::EmitMCountInstrumentation() { EmitNounwindRuntimeCall(MCountFn); } +// Returns the address space id that should be produced to the +// kernel_arg_addr_space metadata. This is always fixed to the ids +// as specified in the SPIR 2.0 specification in order to differentiate +// for example in clGetKernelArgInfo() implementation between the address +// spaces with targets without unique mapping to the OpenCL address spaces +// (basically all single AS CPUs). +static unsigned ArgInfoAddressSpace(unsigned LangAS) { + switch (LangAS) { + case LangAS::opencl_global: return 1; + case LangAS::opencl_constant: return 2; + case LangAS::opencl_local: return 3; + case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs. + default: + return 0; // Assume private. + } +} + // OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument // information in the program executable. The argument information stored // includes the argument name, its type, the address and access qualifiers used. static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, CodeGenModule &CGM, llvm::LLVMContext &Context, - SmallVector<llvm::Metadata *, 5> &kernelMDArgs, CGBuilderTy &Builder, ASTContext &ASTCtx) { // Create MDNodes that represent the kernel arg metadata. // Each MDNode is a list in the form of "key", N number of values which is @@ -444,28 +468,21 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, // MDNode for the kernel argument address space qualifiers. SmallVector<llvm::Metadata *, 8> addressQuals; - addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space")); // MDNode for the kernel argument access qualifiers (images only). SmallVector<llvm::Metadata *, 8> accessQuals; - accessQuals.push_back(llvm::MDString::get(Context, "kernel_arg_access_qual")); // MDNode for the kernel argument type names. SmallVector<llvm::Metadata *, 8> argTypeNames; - argTypeNames.push_back(llvm::MDString::get(Context, "kernel_arg_type")); // MDNode for the kernel argument base type names. SmallVector<llvm::Metadata *, 8> argBaseTypeNames; - argBaseTypeNames.push_back( - llvm::MDString::get(Context, "kernel_arg_base_type")); // MDNode for the kernel argument type qualifiers. SmallVector<llvm::Metadata *, 8> argTypeQuals; - argTypeQuals.push_back(llvm::MDString::get(Context, "kernel_arg_type_qual")); // MDNode for the kernel argument names. SmallVector<llvm::Metadata *, 8> argNames; - argNames.push_back(llvm::MDString::get(Context, "kernel_arg_name")); for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { const ParmVarDecl *parm = FD->getParamDecl(i); @@ -477,7 +494,7 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, // Get address qualifier. addressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32( - ASTCtx.getTargetAddressSpace(pointeeTy.getAddressSpace())))); + ArgInfoAddressSpace(pointeeTy.getAddressSpace())))); // Get argument type name. std::string typeName = @@ -514,8 +531,7 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, uint32_t AddrSpc = 0; bool isPipe = ty->isPipeType(); if (ty->isImageType() || isPipe) - AddrSpc = - CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global); addressQuals.push_back( llvm::ConstantAsMetadata::get(Builder.getInt32(AddrSpc))); @@ -523,7 +539,8 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, // Get argument type name. std::string typeName; if (isPipe) - typeName = cast<PipeType>(ty)->getElementType().getAsString(Policy); + typeName = ty.getCanonicalType()->getAs<PipeType>()->getElementType() + .getAsString(Policy); else typeName = ty.getUnqualifiedType().getAsString(Policy); @@ -536,8 +553,9 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, std::string baseTypeName; if (isPipe) - baseTypeName = - cast<PipeType>(ty)->getElementType().getCanonicalType().getAsString(Policy); + baseTypeName = ty.getCanonicalType()->getAs<PipeType>() + ->getElementType().getCanonicalType() + .getAsString(Policy); else baseTypeName = ty.getUnqualifiedType().getCanonicalType().getAsString(Policy); @@ -561,15 +579,14 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals)); // Get image and pipe access qualifier: - // FIXME: now image and pipe share the same access qualifier maybe we can - // refine it to OpenCL access qualifier and also handle write_read if (ty->isImageType()|| ty->isPipeType()) { - const OpenCLImageAccessAttr *A = parm->getAttr<OpenCLImageAccessAttr>(); + const OpenCLAccessAttr *A = parm->getAttr<OpenCLAccessAttr>(); if (A && A->isWriteOnly()) accessQuals.push_back(llvm::MDString::get(Context, "write_only")); + else if (A && A->isReadWrite()) + accessQuals.push_back(llvm::MDString::get(Context, "read_write")); else accessQuals.push_back(llvm::MDString::get(Context, "read_only")); - // FIXME: what about read_write? } else accessQuals.push_back(llvm::MDString::get(Context, "none")); @@ -577,13 +594,19 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, argNames.push_back(llvm::MDString::get(Context, parm->getName())); } - kernelMDArgs.push_back(llvm::MDNode::get(Context, addressQuals)); - kernelMDArgs.push_back(llvm::MDNode::get(Context, accessQuals)); - kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeNames)); - kernelMDArgs.push_back(llvm::MDNode::get(Context, argBaseTypeNames)); - kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeQuals)); + Fn->setMetadata("kernel_arg_addr_space", + llvm::MDNode::get(Context, addressQuals)); + Fn->setMetadata("kernel_arg_access_qual", + llvm::MDNode::get(Context, accessQuals)); + Fn->setMetadata("kernel_arg_type", + llvm::MDNode::get(Context, argTypeNames)); + Fn->setMetadata("kernel_arg_base_type", + llvm::MDNode::get(Context, argBaseTypeNames)); + Fn->setMetadata("kernel_arg_type_qual", + llvm::MDNode::get(Context, argTypeQuals)); if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata) - kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames)); + Fn->setMetadata("kernel_arg_name", + llvm::MDNode::get(Context, argNames)); } void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, @@ -594,11 +617,7 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::LLVMContext &Context = getLLVMContext(); - SmallVector<llvm::Metadata *, 5> kernelMDArgs; - kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(Fn)); - - GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs, Builder, - getContext()); + GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext()); if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) { QualType hintQTy = A->getTypeHint(); @@ -607,37 +626,29 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, hintQTy->isSignedIntegerType() || (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType()); llvm::Metadata *attrMDArgs[] = { - llvm::MDString::get(Context, "vec_type_hint"), llvm::ConstantAsMetadata::get(llvm::UndefValue::get( CGM.getTypes().ConvertType(A->getTypeHint()))), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::IntegerType::get(Context, 32), llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))}; - kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs)); } if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) { llvm::Metadata *attrMDArgs[] = { - llvm::MDString::get(Context, "work_group_size_hint"), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs)); } if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) { llvm::Metadata *attrMDArgs[] = { - llvm::MDString::get(Context, "reqd_work_group_size"), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs)); } - - llvm::MDNode *kernelMDNode = llvm::MDNode::get(Context, kernelMDArgs); - llvm::NamedMDNode *OpenCLKernelMetadata = - CGM.getModule().getOrInsertNamedMetadata("opencl.kernels"); - OpenCLKernelMetadata->addOperand(kernelMDNode); } /// Determine whether the function F ends with a return stmt. @@ -670,6 +681,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, DidCallStackSave = false; CurCodeDecl = D; + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) + if (FD->usesSEHTry()) + CurSEHParent = FD; CurFuncDecl = (D ? D->getNonClosureContext() : nullptr); FnRetTy = RetTy; CurFn = Fn; @@ -695,20 +709,46 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (SanOpts.has(SanitizerKind::SafeStack)) Fn->addFnAttr(llvm::Attribute::SafeStack); + // Apply xray attributes to the function (as a string, for now) + if (D && ShouldXRayInstrumentFunction()) { + if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { + if (XRayAttr->alwaysXRayInstrument()) + Fn->addFnAttr("function-instrument", "xray-always"); + if (XRayAttr->neverXRayInstrument()) + Fn->addFnAttr("function-instrument", "xray-never"); + } else { + Fn->addFnAttr( + "xray-instruction-threshold", + llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + } + } + // Pass inline keyword to optimizer if it appears explicitly on any // declaration. Also, in the case of -fno-inline attach NoInline - // attribute to all function that are not marked AlwaysInline. + // attribute to all functions that are not marked AlwaysInline, or + // to all functions that are not marked inline or implicitly inline + // in the case of -finline-hint-functions. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (!CGM.getCodeGenOpts().NoInline) { + const CodeGenOptions& CodeGenOpts = CGM.getCodeGenOpts(); + if (!CodeGenOpts.NoInline) { for (auto RI : FD->redecls()) if (RI->isInlineSpecified()) { Fn->addFnAttr(llvm::Attribute::InlineHint); break; } + if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining && + !FD->isInlined() && !Fn->hasFnAttribute(llvm::Attribute::InlineHint)) + Fn->addFnAttr(llvm::Attribute::NoInline); } else if (!FD->hasAttr<AlwaysInlineAttr>()) Fn->addFnAttr(llvm::Attribute::NoInline); + if (CGM.getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) + CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn); } + // Add no-jump-tables value. + Fn->addFnAttr("no-jump-tables", + llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables)); + if (getLangOpts().OpenCL) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) @@ -745,9 +785,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // later. Don't create this with the builder, because we don't want it // folded. llvm::Value *Undef = llvm::UndefValue::get(Int32Ty); - AllocaInsertPt = new llvm::BitCastInst(Undef, Int32Ty, "", EntryBB); - if (Builder.isNamePreserving()) - AllocaInsertPt->setName("allocapt"); + AllocaInsertPt = new llvm::BitCastInst(Undef, Int32Ty, "allocapt", EntryBB); ReturnBlock = getJumpDestInCurrentScope("return"); @@ -755,15 +793,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // Emit subprogram debug descriptor. if (CGDebugInfo *DI = getDebugInfo()) { + // Reconstruct the type from the argument list so that implicit parameters, + // such as 'this' and 'vtt', show up in the debug info. Preserve the calling + // convention. + CallingConv CC = CallingConv::CC_C; + if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) + if (const auto *SrcFnTy = FD->getType()->getAs<FunctionType>()) + CC = SrcFnTy->getCallConv(); SmallVector<QualType, 16> ArgTypes; - for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end(); - i != e; ++i) { - ArgTypes.push_back((*i)->getType()); - } - - QualType FnType = - getContext().getFunctionType(RetTy, ArgTypes, - FunctionProtoType::ExtProtoInfo()); + for (const VarDecl *VD : Args) + ArgTypes.push_back(VD->getType()); + QualType FnType = getContext().getFunctionType( + RetTy, ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder); } @@ -823,10 +864,22 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, MD->getParent()->getCaptureFields(LambdaCaptureFields, LambdaThisCaptureField); if (LambdaThisCaptureField) { - // If this lambda captures this, load it. - LValue ThisLValue = EmitLValueForLambdaField(LambdaThisCaptureField); - CXXThisValue = EmitLoadOfLValue(ThisLValue, - SourceLocation()).getScalarVal(); + // If the lambda captures the object referred to by '*this' - either by + // value or by reference, make sure CXXThisValue points to the correct + // object. + + // Get the lvalue for the field (which is a copy of the enclosing object + // or contains the address of the enclosing object). + LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField); + if (!LambdaThisCaptureField->getType()->isPointerType()) { + // If the enclosing object was captured by value, just use its address. + CXXThisValue = ThisFieldLValue.getAddress().getPointer(); + } else { + // Load the lvalue pointed to by the field, since '*this' was captured + // by reference. + CXXThisValue = + EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal(); + } } for (auto *FD : MD->getParent()->fields()) { if (FD->hasCapturedVLAType()) { @@ -883,7 +936,7 @@ void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S) { llvm::BasicBlock *SkipCountBB = nullptr; - if (HaveInsertPoint() && CGM.getCodeGenOpts().ProfileInstrGenerate) { + if (HaveInsertPoint() && CGM.getCodeGenOpts().hasProfileClangInstr()) { // When instrumenting for profiling, the fallthrough to certain // statements needs to skip over the instrumentation code so that we // get an accurate count. @@ -904,7 +957,7 @@ void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB, static void TryMarkNoThrow(llvm::Function *F) { // LLVM treats 'nounwind' on a function as part of the type, so we // can't do this on functions that can be overwritten. - if (F->mayBeOverridden()) return; + if (F->isInterposable()) return; for (llvm::BasicBlock &BB : *F) for (llvm::Instruction &I : BB) @@ -914,18 +967,11 @@ static void TryMarkNoThrow(llvm::Function *F) { F->setDoesNotThrow(); } -void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, - const CGFunctionInfo &FnInfo) { +QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, + FunctionArgList &Args) { const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); - - // Check if we should generate debug info for this function. - if (FD->hasAttr<NoDebugAttr>()) - DebugInfo = nullptr; // disable debug info indefinitely for this function - - FunctionArgList Args; QualType ResTy = FD->getReturnType(); - CurGD = GD; const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD); if (MD && MD->isInstance()) { if (CGM.getCXXABI().HasThisReturn(GD)) @@ -935,22 +981,48 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, CGM.getCXXABI().buildThisParam(*this, Args); } - for (auto *Param : FD->params()) { - Args.push_back(Param); - if (!Param->hasAttr<PassObjectSizeAttr>()) - continue; - - IdentifierInfo *NoID = nullptr; - auto *Implicit = ImplicitParamDecl::Create( - getContext(), Param->getDeclContext(), Param->getLocation(), NoID, - getContext().getSizeType()); - SizeArguments[Param] = Implicit; - Args.push_back(Implicit); + // The base version of an inheriting constructor whose constructed base is a + // virtual base is not passed any arguments (because it doesn't actually call + // the inherited constructor). + bool PassedParams = true; + if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD)) + if (auto Inherited = CD->getInheritedConstructor()) + PassedParams = + getTypes().inheritingCtorHasParams(Inherited, GD.getCtorType()); + + if (PassedParams) { + for (auto *Param : FD->parameters()) { + Args.push_back(Param); + if (!Param->hasAttr<PassObjectSizeAttr>()) + continue; + + IdentifierInfo *NoID = nullptr; + auto *Implicit = ImplicitParamDecl::Create( + getContext(), Param->getDeclContext(), Param->getLocation(), NoID, + getContext().getSizeType()); + SizeArguments[Param] = Implicit; + Args.push_back(Implicit); + } } if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD))) CGM.getCXXABI().addImplicitStructorParams(*this, ResTy, Args); + return ResTy; +} + +void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, + const CGFunctionInfo &FnInfo) { + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); + CurGD = GD; + + FunctionArgList Args; + QualType ResTy = BuildFunctionArgList(GD, Args); + + // Check if we should generate debug info for this function. + if (FD->hasAttr<NoDebugAttr>()) + DebugInfo = nullptr; // disable debug info indefinitely for this function + SourceRange BodyRange; if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange(); CurEHLocation = BodyRange.getEnd(); @@ -1088,14 +1160,37 @@ bool CodeGenFunction::containsBreak(const Stmt *S) { return false; } +bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) { + if (!S) return false; + + // Some statement kinds add a scope and thus never add a decl to the current + // scope. Note, this list is longer than the list of statements that might + // have an unscoped decl nested within them, but this way is conservatively + // correct even if more statement kinds are added. + if (isa<IfStmt>(S) || isa<SwitchStmt>(S) || isa<WhileStmt>(S) || + isa<DoStmt>(S) || isa<ForStmt>(S) || isa<CompoundStmt>(S) || + isa<CXXForRangeStmt>(S) || isa<CXXTryStmt>(S) || + isa<ObjCForCollectionStmt>(S) || isa<ObjCAtTryStmt>(S)) + return false; + + if (isa<DeclStmt>(S)) + return true; + + for (const Stmt *SubStmt : S->children()) + if (mightAddDeclToScope(SubStmt)) + return true; + + return false; +} /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the boolean result in Result. bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, - bool &ResultBool) { + bool &ResultBool, + bool AllowLabels) { llvm::APSInt ResultInt; - if (!ConstantFoldsToSimpleInteger(Cond, ResultInt)) + if (!ConstantFoldsToSimpleInteger(Cond, ResultInt, AllowLabels)) return false; ResultBool = ResultInt.getBoolValue(); @@ -1105,15 +1200,16 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the folded value. -bool CodeGenFunction:: -ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) { +bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, + llvm::APSInt &ResultInt, + bool AllowLabels) { // FIXME: Rename and handle conversion of other evaluatable things // to bool. llvm::APSInt Int; if (!Cond->EvaluateAsInt(Int, getContext())) return false; // Not foldable, not integer or not fully evaluatable. - if (CodeGenFunction::ContainsLabel(Cond)) + if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond)) return false; // Contains a label. ResultInt = Int; @@ -1297,15 +1393,12 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // create metadata that specifies that the branch is unpredictable. // Don't bother if not optimizing because that metadata would not be used. llvm::MDNode *Unpredictable = nullptr; - if (CGM.getCodeGenOpts().OptimizationLevel != 0) { - if (const CallExpr *Call = dyn_cast<CallExpr>(Cond)) { - const Decl *TargetDecl = Call->getCalleeDecl(); - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) { - if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { - llvm::MDBuilder MDHelper(getLLVMContext()); - Unpredictable = MDHelper.createUnpredictable(); - } - } + auto *Call = dyn_cast<CallExpr>(Cond); + if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { + auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl()); + if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + llvm::MDBuilder MDHelper(getLLVMContext()); + Unpredictable = MDHelper.createUnpredictable(); } } @@ -1764,7 +1857,7 @@ void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E, llvm::Constant *Init) { assert (Init && "Invalid DeclRefExpr initializer!"); if (CGDebugInfo *Dbg = getDebugInfo()) - if (CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) Dbg->EmitGlobalVariable(E->getDecl(), Init); } @@ -1860,26 +1953,14 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I, CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I); } -template <bool PreserveNames> -void CGBuilderInserter<PreserveNames>::InsertHelper( +void CGBuilderInserter::InsertHelper( llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, llvm::BasicBlock::iterator InsertPt) const { - llvm::IRBuilderDefaultInserter<PreserveNames>::InsertHelper(I, Name, BB, - InsertPt); + llvm::IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); if (CGF) CGF->InsertHelper(I, Name, BB, InsertPt); } -#ifdef NDEBUG -#define PreserveNames false -#else -#define PreserveNames true -#endif -template void CGBuilderInserter<PreserveNames>::InsertHelper( - llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const; -#undef PreserveNames - static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, CodeGenModule &CGM, const FunctionDecl *FD, std::string &FirstMissing) { @@ -1956,3 +2037,12 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; } } + +void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { + if (!CGM.getCodeGenOpts().SanitizeStats) + return; + + llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); + IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); + CGM.getSanStats().create(IRB, SSK); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h index 4803b13..fb19a26 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h @@ -36,6 +36,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/SanitizerStats.h" namespace llvm { class BasicBlock; @@ -67,7 +68,6 @@ class ObjCMethodDecl; class ObjCImplementationDecl; class ObjCPropertyImplDecl; class TargetInfo; -class TargetCodeGenInfo; class VarDecl; class ObjCForCollectionStmt; class ObjCAtTryStmt; @@ -85,6 +85,9 @@ class BlockByrefHelpers; class BlockByrefInfo; class BlockFlags; class BlockFieldFlags; +class RegionCodeGenTy; +class TargetCodeGenInfo; +struct OMPTaskDataTy; /// The kind of evaluation to perform on values of a particular /// type. Basically, is the code in CGExprScalar, CGExprComplex, or @@ -188,6 +191,8 @@ public: CXXThisFieldDecl = *Field; else if (I->capturesVariable()) CaptureFields[I->getCapturedVar()] = *Field; + else if (I->capturesVariableByCopy()) + CaptureFields[I->getCapturedVar()] = *Field; } } @@ -275,6 +280,8 @@ public: /// potentially set the return value. bool SawAsmBlock; + const FunctionDecl *CurSEHParent = nullptr; + /// True if the current function is an outlined SEH helper. This can be a /// finally block or filter expression. bool IsOutlinedSEHHelper; @@ -295,6 +302,19 @@ public: llvm::Instruction *CurrentFuncletPad = nullptr; + class CallLifetimeEnd final : public EHScopeStack::Cleanup { + llvm::Value *Addr; + llvm::Value *Size; + + public: + CallLifetimeEnd(Address addr, llvm::Value *size) + : Addr(addr.getPointer()), Size(size) {} + + void Emit(CodeGenFunction &CGF, Flags flags) override { + CGF.EmitLifetimeEnd(Size, Addr); + } + }; + /// Header for data within LifetimeExtendedCleanupStack. struct LifetimeExtendedCleanupHeader { /// The size of the following cleanup object. @@ -637,6 +657,11 @@ public: ForceCleanup(); } + /// Checks if the global variable is captured in current function. + bool isGlobalVarCaptured(const VarDecl *VD) const { + return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0; + } + private: /// Copy all the entries in the source map over the corresponding /// entries in the destination, which must exist. @@ -940,6 +965,94 @@ private: }; SmallVector<BreakContinue, 8> BreakContinueStack; + /// Handles cancellation exit points in OpenMP-related constructs. + class OpenMPCancelExitStack { + /// Tracks cancellation exit point and join point for cancel-related exit + /// and normal exit. + struct CancelExit { + CancelExit() = default; + CancelExit(OpenMPDirectiveKind Kind, JumpDest ExitBlock, + JumpDest ContBlock) + : Kind(Kind), ExitBlock(ExitBlock), ContBlock(ContBlock) {} + OpenMPDirectiveKind Kind = OMPD_unknown; + /// true if the exit block has been emitted already by the special + /// emitExit() call, false if the default codegen is used. + bool HasBeenEmitted = false; + JumpDest ExitBlock; + JumpDest ContBlock; + }; + + SmallVector<CancelExit, 8> Stack; + + public: + OpenMPCancelExitStack() : Stack(1) {} + ~OpenMPCancelExitStack() = default; + /// Fetches the exit block for the current OpenMP construct. + JumpDest getExitBlock() const { return Stack.back().ExitBlock; } + /// Emits exit block with special codegen procedure specific for the related + /// OpenMP construct + emits code for normal construct cleanup. + void emitExit(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, + const llvm::function_ref<void(CodeGenFunction &)> &CodeGen) { + if (Stack.back().Kind == Kind && getExitBlock().isValid()) { + assert(CGF.getOMPCancelDestination(Kind).isValid()); + assert(CGF.HaveInsertPoint()); + assert(!Stack.back().HasBeenEmitted); + auto IP = CGF.Builder.saveAndClearIP(); + CGF.EmitBlock(Stack.back().ExitBlock.getBlock()); + CodeGen(CGF); + CGF.EmitBranchThroughCleanup(Stack.back().ContBlock); + CGF.Builder.restoreIP(IP); + Stack.back().HasBeenEmitted = true; + } + CodeGen(CGF); + } + /// Enter the cancel supporting \a Kind construct. + /// \param Kind OpenMP directive that supports cancel constructs. + /// \param HasCancel true, if the construct has inner cancel directive, + /// false otherwise. + void enter(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, bool HasCancel) { + Stack.push_back({Kind, + HasCancel ? CGF.getJumpDestInCurrentScope("cancel.exit") + : JumpDest(), + HasCancel ? CGF.getJumpDestInCurrentScope("cancel.cont") + : JumpDest()}); + } + /// Emits default exit point for the cancel construct (if the special one + /// has not be used) + join point for cancel/normal exits. + void exit(CodeGenFunction &CGF) { + if (getExitBlock().isValid()) { + assert(CGF.getOMPCancelDestination(Stack.back().Kind).isValid()); + bool HaveIP = CGF.HaveInsertPoint(); + if (!Stack.back().HasBeenEmitted) { + if (HaveIP) + CGF.EmitBranchThroughCleanup(Stack.back().ContBlock); + CGF.EmitBlock(Stack.back().ExitBlock.getBlock()); + CGF.EmitBranchThroughCleanup(Stack.back().ContBlock); + } + CGF.EmitBlock(Stack.back().ContBlock.getBlock()); + if (!HaveIP) { + CGF.Builder.CreateUnreachable(); + CGF.Builder.ClearInsertionPoint(); + } + } + Stack.pop_back(); + } + }; + OpenMPCancelExitStack OMPCancelStack; + + /// Controls insertion of cancellation exit blocks in worksharing constructs. + class OMPCancelStackRAII { + CodeGenFunction &CGF; + + public: + OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, + bool HasCancel) + : CGF(CGF) { + CGF.OMPCancelStack.enter(CGF, Kind, HasCancel); + } + ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); } + }; + CodeGenPGO PGO; /// Calculate branch weights appropriate for PGO data @@ -951,7 +1064,7 @@ private: public: /// Increment the profiler's counter for the given statement. void incrementProfileCounter(const Stmt *S) { - if (CGM.getCodeGenOpts().ProfileInstrGenerate) + if (CGM.getCodeGenOpts().hasProfileClangInstr()) PGO.emitCounterIncrement(Builder, S); PGO.setCurrentStmt(S); } @@ -1053,6 +1166,61 @@ public: CharUnits OldCXXThisAlignment; }; + class InlinedInheritingConstructorScope { + public: + InlinedInheritingConstructorScope(CodeGenFunction &CGF, GlobalDecl GD) + : CGF(CGF), OldCurGD(CGF.CurGD), OldCurFuncDecl(CGF.CurFuncDecl), + OldCurCodeDecl(CGF.CurCodeDecl), + OldCXXABIThisDecl(CGF.CXXABIThisDecl), + OldCXXABIThisValue(CGF.CXXABIThisValue), + OldCXXThisValue(CGF.CXXThisValue), + OldCXXABIThisAlignment(CGF.CXXABIThisAlignment), + OldCXXThisAlignment(CGF.CXXThisAlignment), + OldReturnValue(CGF.ReturnValue), OldFnRetTy(CGF.FnRetTy), + OldCXXInheritedCtorInitExprArgs( + std::move(CGF.CXXInheritedCtorInitExprArgs)) { + CGF.CurGD = GD; + CGF.CurFuncDecl = CGF.CurCodeDecl = + cast<CXXConstructorDecl>(GD.getDecl()); + CGF.CXXABIThisDecl = nullptr; + CGF.CXXABIThisValue = nullptr; + CGF.CXXThisValue = nullptr; + CGF.CXXABIThisAlignment = CharUnits(); + CGF.CXXThisAlignment = CharUnits(); + CGF.ReturnValue = Address::invalid(); + CGF.FnRetTy = QualType(); + CGF.CXXInheritedCtorInitExprArgs.clear(); + } + ~InlinedInheritingConstructorScope() { + CGF.CurGD = OldCurGD; + CGF.CurFuncDecl = OldCurFuncDecl; + CGF.CurCodeDecl = OldCurCodeDecl; + CGF.CXXABIThisDecl = OldCXXABIThisDecl; + CGF.CXXABIThisValue = OldCXXABIThisValue; + CGF.CXXThisValue = OldCXXThisValue; + CGF.CXXABIThisAlignment = OldCXXABIThisAlignment; + CGF.CXXThisAlignment = OldCXXThisAlignment; + CGF.ReturnValue = OldReturnValue; + CGF.FnRetTy = OldFnRetTy; + CGF.CXXInheritedCtorInitExprArgs = + std::move(OldCXXInheritedCtorInitExprArgs); + } + + private: + CodeGenFunction &CGF; + GlobalDecl OldCurGD; + const Decl *OldCurFuncDecl; + const Decl *OldCurCodeDecl; + ImplicitParamDecl *OldCXXABIThisDecl; + llvm::Value *OldCXXABIThisValue; + llvm::Value *OldCXXThisValue; + CharUnits OldCXXABIThisAlignment; + CharUnits OldCXXThisAlignment; + Address OldReturnValue; + QualType OldFnRetTy; + CallArgList OldCXXInheritedCtorInitExprArgs; + }; + private: /// CXXThisDecl - When generating code for a C++ member function, /// this will hold the implicit 'this' declaration. @@ -1066,6 +1234,10 @@ private: /// this expression. Address CXXDefaultInitExprThis = Address::invalid(); + /// The values of function arguments to use when evaluating + /// CXXInheritedCtorInitExprs within this context. + CallArgList CXXInheritedCtorInitExprArgs; + /// CXXStructorImplicitParamDecl - When generating code for a constructor or /// destructor, this will hold the implicit argument (e.g. VTT). ImplicitParamDecl *CXXStructorImplicitParamDecl; @@ -1149,10 +1321,7 @@ public: return getInvokeDestImpl(); } - bool currentFunctionUsesSEHTry() const { - const auto *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl); - return FD && FD->usesSEHTry(); - } + bool currentFunctionUsesSEHTry() const { return CurSEHParent != nullptr; } const TargetInfo &getTarget() const { return Target; } llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); } @@ -1292,6 +1461,8 @@ public: const BlockByrefInfo &getBlockByrefInfo(const VarDecl *var); + QualType BuildFunctionArgList(GlobalDecl GD, FunctionArgList &Args); + void GenerateCode(GlobalDecl GD, llvm::Function *Fn, const CGFunctionInfo &FnInfo); /// \brief Emit code for the start of a function. @@ -1388,6 +1559,7 @@ public: CFITCK_NVCall, CFITCK_DerivedCast, CFITCK_UnrelatedCast, + CFITCK_ICall, }; /// \brief Derived is the presumed address of an object of type T after a @@ -1399,14 +1571,29 @@ public: /// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable. /// If vptr CFI is enabled, emit a check that VTable is valid. - void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable, + void EmitVTablePtrCheckForCall(const CXXRecordDecl *RD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc); /// EmitVTablePtrCheck - Emit a check that VTable is a valid virtual table for - /// RD using llvm.bitset.test. + /// RD using llvm.type.test. void EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc); + /// If whole-program virtual table optimization is enabled, emit an assumption + /// that VTable is a member of RD's type identifier. Or, if vptr CFI is + /// enabled, emit a check that VTable is a member of RD's type identifier. + void EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, + llvm::Value *VTable, SourceLocation Loc); + + /// Returns whether we should perform a type checked load when loading a + /// virtual function for virtual calls to members of RD. This is generally + /// true when both vcall CFI and whole-program-vtables are enabled. + bool ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD); + + /// Emit a type checked load from the given vtable. + llvm::Value *EmitVTableTypeCheckedLoad(const CXXRecordDecl *RD, llvm::Value *VTable, + uint64_t VTableByteOffset); + /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given /// expr can be devirtualized. bool CanDevirtualizeMemberFunctionCall(const Expr *Base, @@ -1422,6 +1609,10 @@ public: /// instrumented with __cyg_profile_func_* calls bool ShouldInstrumentFunction(); + /// ShouldXRayInstrument - Return true if the current function should be + /// instrumented with XRay nop sleds. + bool ShouldXRayInstrumentFunction() const; + /// EmitFunctionInstrumentation - Emit LLVM code to call the specified /// instrumentation function with the current function and the call site, if /// function instrumentation is enabled. @@ -1572,6 +1763,10 @@ public: AlignmentSource *Source = nullptr); LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy); + Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, + AlignmentSource *Source = nullptr); + LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); + /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. The caller is responsible for setting an appropriate alignment on /// the alloca. @@ -1845,10 +2040,32 @@ public: void EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor, const FunctionArgList &Args); + /// Emit a call to an inheriting constructor (that is, one that invokes a + /// constructor inherited from a base class) by inlining its definition. This + /// is necessary if the ABI does not support forwarding the arguments to the + /// base class constructor (because they're variadic or similar). + void EmitInlinedInheritingCXXConstructorCall(const CXXConstructorDecl *Ctor, + CXXCtorType CtorType, + bool ForVirtualBase, + bool Delegating, + CallArgList &Args); + + /// Emit a call to a constructor inherited from a base class, passing the + /// current constructor's arguments along unmodified (without even making + /// a copy). + void EmitInheritedCXXConstructorCall(const CXXConstructorDecl *D, + bool ForVirtualBase, Address This, + bool InheritedFromVBase, + const CXXInheritedCtorInitExpr *E); + void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, Address This, const CXXConstructExpr *E); + void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, + bool ForVirtualBase, bool Delegating, + Address This, CallArgList &Args); + /// Emit assumption load for all bases. Requires to be be called only on /// most-derived class and not under construction of the object. void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This); @@ -1861,7 +2078,7 @@ public: const CXXConstructExpr *E); void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D, - const ConstantArrayType *ArrayTy, + const ArrayType *ArrayTy, Address ArrayPtr, const CXXConstructExpr *E, bool ZeroInitialization = false); @@ -2204,6 +2421,8 @@ public: void EmitCXXForRangeStmt(const CXXForRangeStmt &S, ArrayRef<const Attr *> Attrs = None); + /// Returns calculated size of the specified type. + llvm::Value *getTypeSize(QualType Ty); LValue InitCapturedStruct(const CapturedStmt &S); llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); @@ -2294,7 +2513,17 @@ public: /// it is the last iteration of the loop code in associated directive, or to /// 'i1 false' otherwise. If this item is nullptr, no final check is required. void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D, + bool NoFinals, llvm::Value *IsLastIterCond = nullptr); + /// Emit initial code for linear clauses. + void EmitOMPLinearClause(const OMPLoopDirective &D, + CodeGenFunction::OMPPrivateScope &PrivateScope); + /// Emit final code for linear clauses. + /// \param CondGen Optional conditional code for final part of codegen for + /// linear clause. + void EmitOMPLinearClauseFinal( + const OMPLoopDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); /// \brief Emit initial code for reduction variables. Creates reduction copies /// and initializes them with the values according to OpenMP standard. /// @@ -2315,6 +2544,14 @@ public: /// \param D Directive (possibly) with the 'linear' clause. void EmitOMPLinearClauseInit(const OMPLoopDirective &D); + typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/, + llvm::Value * /*OutlinedFn*/, + const OMPTaskDataTy & /*Data*/)> + TaskGenTy; + void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + const TaskGenTy &TaskGen, OMPTaskDataTy &Data); + void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPForDirective(const OMPForDirective &S); @@ -2337,14 +2574,36 @@ public: void EmitOMPAtomicDirective(const OMPAtomicDirective &S); void EmitOMPTargetDirective(const OMPTargetDirective &S); void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S); + void EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective &S); + void EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective &S); + void EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective &S); + void EmitOMPTargetParallelDirective(const OMPTargetParallelDirective &S); + void + EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective &S); void EmitOMPTeamsDirective(const OMPTeamsDirective &S); void EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S); void EmitOMPCancelDirective(const OMPCancelDirective &S); + void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S); void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); - + void EmitOMPDistributeLoop(const OMPDistributeDirective &S); + void EmitOMPDistributeParallelForDirective( + const OMPDistributeParallelForDirective &S); + void EmitOMPDistributeParallelForSimdDirective( + const OMPDistributeParallelForSimdDirective &S); + void EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective &S); + void EmitOMPTargetParallelForSimdDirective( + const OMPTargetParallelForSimdDirective &S); + + /// Emit outlined function for the target directive. + static std::pair<llvm::Function * /*OutlinedFn*/, + llvm::Constant * /*OutlinedFnID*/> + EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM, + const OMPTargetDirective &S, + StringRef ParentName, + bool IsOffloadEntry); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. @@ -2362,24 +2621,35 @@ public: const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen); JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind); + /// Emit initial code for loop counters of loop-based directives. + void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S, + OMPPrivateScope &LoopScope); private: - /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); - void EmitOMPSimdFinal(const OMPLoopDirective &D); + void EmitOMPSimdFinal( + const OMPLoopDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); /// \brief Emit code for the worksharing loop-based directive. /// \return true, if this construct has any lastprivate clause, false - /// otherwise. bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); - void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, + void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + void EmitOMPDistributeOuterLoop( + OpenMPDistScheduleClauseKind ScheduleKind, + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); /// \brief Emit code for sections directive. - OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S); + void EmitSections(const OMPExecutableDirective &S); public: @@ -2430,7 +2700,6 @@ public: void EmitAtomicInit(Expr *E, LValue lvalue); bool LValueIsSuitableForInlineAtomic(LValue Src); - bool typeIsSuitableForInlineAtomic(QualType Ty, bool IsVolatile) const; RValue EmitAtomicLoad(LValue LV, SourceLocation SL, AggValueSlot Slot = AggValueSlot::ignored()); @@ -2446,8 +2715,10 @@ public: std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange( LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, - llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, - llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, + llvm::AtomicOrdering Success = + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering Failure = + llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored()); void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO, @@ -2680,11 +2951,10 @@ public: ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *E); - RValue EmitCXXStructorCall(const CXXMethodDecl *MD, llvm::Value *Callee, - ReturnValueSlot ReturnValue, llvm::Value *This, - llvm::Value *ImplicitParam, - QualType ImplicitParamTy, const CallExpr *E, - StructorType Type); + RValue EmitCXXDestructorCall(const CXXDestructorDecl *DD, llvm::Value *Callee, + llvm::Value *This, llvm::Value *ImplicitParam, + QualType ImplicitParamTy, const CallExpr *E, + StructorType Type); RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE, @@ -2708,6 +2978,8 @@ public: RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E, ReturnValueSlot ReturnValue); + RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue); RValue EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, @@ -2798,19 +3070,25 @@ public: llvm::Value *EmitARCAutoreleaseReturnValue(llvm::Value *value); llvm::Value *EmitARCRetainAutoreleaseReturnValue(llvm::Value *value); llvm::Value *EmitARCRetainAutoreleasedReturnValue(llvm::Value *value); + llvm::Value *EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value); std::pair<LValue,llvm::Value*> EmitARCStoreAutoreleasing(const BinaryOperator *e); std::pair<LValue,llvm::Value*> EmitARCStoreStrong(const BinaryOperator *e, bool ignored); + std::pair<LValue,llvm::Value*> + EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored); llvm::Value *EmitObjCThrowOperand(const Expr *expr); llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr); llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr); llvm::Value *EmitARCExtendBlockObject(const Expr *expr); + llvm::Value *EmitARCReclaimReturnedObject(const Expr *e, + bool allowUnsafeClaim); llvm::Value *EmitARCRetainScalarExpr(const Expr *expr); llvm::Value *EmitARCRetainAutoreleaseScalarExpr(const Expr *expr); + llvm::Value *EmitARCUnsafeUnretainedScalarExpr(const Expr *expr); void EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values); @@ -2973,17 +3251,23 @@ public: /// If the statement (recursively) contains a switch or loop with a break /// inside of it, this is fine. static bool containsBreak(const Stmt *S); + + /// Determine if the given statement might introduce a declaration into the + /// current scope, by being a (possibly-labelled) DeclStmt. + static bool mightAddDeclToScope(const Stmt *S); /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the boolean result in Result. - bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result); + bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, + bool AllowLabels = false); /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the folded value. - bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result); - + bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result, + bool AllowLabels = false); + /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an /// if statement) to the specified blocks. Based on the condition, this might /// try to simplify the codegen of the conditional based on the branch. @@ -3013,8 +3297,9 @@ public: /// \brief Emit a slow path cross-DSO CFI check which calls __cfi_slowpath /// if Cond if false. - void EmitCfiSlowPathCheck(llvm::Value *Cond, llvm::ConstantInt *TypeId, - llvm::Value *Ptr); + void EmitCfiSlowPathCheck(SanitizerMask Kind, llvm::Value *Cond, + llvm::ConstantInt *TypeId, llvm::Value *Ptr, + ArrayRef<llvm::Constant *> StaticArgs); /// \brief Create a basic block that will call the trap intrinsic, and emit a /// conditional branch to it, for the -ftrapv checks. @@ -3024,6 +3309,9 @@ public: /// "trap-func-name" if specified. llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID); + /// \brief Emit a cross-DSO CFI failure handling function. + void EmitCfiCheckFail(); + /// \brief Create a check for a function parameter that may potentially be /// declared as non-null. void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, @@ -3062,7 +3350,7 @@ private: /// /// \param AI - The first function argument of the expansion. void ExpandTypeFromArgs(QualType Ty, LValue Dst, - SmallVectorImpl<llvm::Argument *>::iterator &AI); + SmallVectorImpl<llvm::Value *>::iterator &AI); /// ExpandTypeToArgs - Expand an RValue \arg RV, with the LLVM type for \arg /// Ty, into individual arguments on the provided vector \arg IRCallArgs, @@ -3189,6 +3477,8 @@ public: Address EmitPointerWithAlignment(const Expr *Addr, AlignmentSource *Source = nullptr); + void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); + private: QualType getVarArgType(const Expr *Arg); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp index 97b1662..0161cfb 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -20,6 +20,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" +#include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "CodeGenTBAA.h" @@ -86,17 +87,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, : Context(C), LangOpts(C.getLangOpts()), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), Target(C.getTargetInfo()), ABI(createCXXABI(*this)), - VMContext(M.getContext()), TBAA(nullptr), TheTargetCodeGenInfo(nullptr), - Types(*this), VTables(*this), ObjCRuntime(nullptr), - OpenCLRuntime(nullptr), OpenMPRuntime(nullptr), CUDARuntime(nullptr), - DebugInfo(nullptr), ObjCData(nullptr), - NoObjCARCExceptionsMetadata(nullptr), PGOReader(nullptr), - CFConstantStringClassRef(nullptr), ConstantStringClassRef(nullptr), - NSConstantStringType(nullptr), NSConcreteGlobalBlock(nullptr), - NSConcreteStackBlock(nullptr), BlockObjectAssign(nullptr), - BlockObjectDispose(nullptr), BlockDescriptorType(nullptr), - GenericBlockLiteralType(nullptr), LifetimeStartFn(nullptr), - LifetimeEndFn(nullptr), SanitizerMD(new SanitizerMetadata(*this)) { + VMContext(M.getContext()), Types(*this), VTables(*this), + SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); @@ -132,29 +124,30 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0. if (LangOpts.Sanitize.has(SanitizerKind::Thread) || (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0)) - TBAA = new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(), - getCXXABI().getMangleContext()); + TBAA.reset(new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(), + getCXXABI().getMangleContext())); // If debug info or coverage generation is enabled, create the CGDebugInfo // object. - if (CodeGenOpts.getDebugInfo() != CodeGenOptions::NoDebugInfo || - CodeGenOpts.EmitGcovArcs || - CodeGenOpts.EmitGcovNotes) - DebugInfo = new CGDebugInfo(*this); + if (CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo || + CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes) + DebugInfo.reset(new CGDebugInfo(*this)); Block.GlobalUniqueCount = 0; if (C.getLangOpts().ObjC1) - ObjCData = new ObjCEntrypoints(); + ObjCData.reset(new ObjCEntrypoints()); - if (!CodeGenOpts.InstrProfileInput.empty()) { - auto ReaderOrErr = - llvm::IndexedInstrProfReader::create(CodeGenOpts.InstrProfileInput); - if (std::error_code EC = ReaderOrErr.getError()) { + if (CodeGenOpts.hasProfileClangUse()) { + auto ReaderOrErr = llvm::IndexedInstrProfReader::create( + CodeGenOpts.ProfileInstrumentUsePath); + if (auto E = ReaderOrErr.takeError()) { unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "Could not read profile %0: %1"); - getDiags().Report(DiagID) << CodeGenOpts.InstrProfileInput - << EC.message(); + llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) { + getDiags().Report(DiagID) << CodeGenOpts.ProfileInstrumentUsePath + << EI.message(); + }); } else PGOReader = std::move(ReaderOrErr.get()); } @@ -165,16 +158,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, CoverageMapping.reset(new CoverageMappingModuleGen(*this, *CoverageInfo)); } -CodeGenModule::~CodeGenModule() { - delete ObjCRuntime; - delete OpenCLRuntime; - delete OpenMPRuntime; - delete CUDARuntime; - delete TheTargetCodeGenInfo; - delete TBAA; - delete DebugInfo; - delete ObjCData; -} +CodeGenModule::~CodeGenModule() {} void CodeGenModule::createObjCRuntime() { // This is just isGNUFamily(), but we want to force implementors of @@ -183,29 +167,42 @@ void CodeGenModule::createObjCRuntime() { case ObjCRuntime::GNUstep: case ObjCRuntime::GCC: case ObjCRuntime::ObjFW: - ObjCRuntime = CreateGNUObjCRuntime(*this); + ObjCRuntime.reset(CreateGNUObjCRuntime(*this)); return; case ObjCRuntime::FragileMacOSX: case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: - ObjCRuntime = CreateMacObjCRuntime(*this); + ObjCRuntime.reset(CreateMacObjCRuntime(*this)); return; } llvm_unreachable("bad runtime kind"); } void CodeGenModule::createOpenCLRuntime() { - OpenCLRuntime = new CGOpenCLRuntime(*this); + OpenCLRuntime.reset(new CGOpenCLRuntime(*this)); } void CodeGenModule::createOpenMPRuntime() { - OpenMPRuntime = new CGOpenMPRuntime(*this); + // Select a specialized code generation class based on the target, if any. + // If it does not exist use the default implementation. + switch (getTarget().getTriple().getArch()) { + + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + assert(getLangOpts().OpenMPIsDevice && + "OpenMP NVPTX is only prepared to deal with device code."); + OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); + break; + default: + OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); + break; + } } void CodeGenModule::createCUDARuntime() { - CUDARuntime = CreateNVCUDARuntime(*this); + CUDARuntime.reset(CreateNVCUDARuntime(*this)); } void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) { @@ -259,20 +256,21 @@ void CodeGenModule::applyGlobalValReplacements() { // This is only used in aliases that we created and we know they have a // linear structure. -static const llvm::GlobalObject *getAliasedGlobal(const llvm::GlobalAlias &GA) { - llvm::SmallPtrSet<const llvm::GlobalAlias*, 4> Visited; - const llvm::Constant *C = &GA; +static const llvm::GlobalObject *getAliasedGlobal( + const llvm::GlobalIndirectSymbol &GIS) { + llvm::SmallPtrSet<const llvm::GlobalIndirectSymbol*, 4> Visited; + const llvm::Constant *C = &GIS; for (;;) { C = C->stripPointerCasts(); if (auto *GO = dyn_cast<llvm::GlobalObject>(C)) return GO; // stripPointerCasts will not walk over weak aliases. - auto *GA2 = dyn_cast<llvm::GlobalAlias>(C); - if (!GA2) + auto *GIS2 = dyn_cast<llvm::GlobalIndirectSymbol>(C); + if (!GIS2) return nullptr; - if (!Visited.insert(GA2).second) + if (!Visited.insert(GIS2).second) return nullptr; - C = GA2->getAliasee(); + C = GIS2->getIndirectSymbol(); } } @@ -284,20 +282,35 @@ void CodeGenModule::checkAliases() { DiagnosticsEngine &Diags = getDiags(); for (const GlobalDecl &GD : Aliases) { const auto *D = cast<ValueDecl>(GD.getDecl()); - const AliasAttr *AA = D->getAttr<AliasAttr>(); + SourceLocation Location; + bool IsIFunc = D->hasAttr<IFuncAttr>(); + if (const Attr *A = D->getDefiningAttr()) + Location = A->getLocation(); + else + llvm_unreachable("Not an alias or ifunc?"); StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); - auto *Alias = cast<llvm::GlobalAlias>(Entry); + auto *Alias = cast<llvm::GlobalIndirectSymbol>(Entry); const llvm::GlobalValue *GV = getAliasedGlobal(*Alias); if (!GV) { Error = true; - Diags.Report(AA->getLocation(), diag::err_cyclic_alias); + Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc; } else if (GV->isDeclaration()) { Error = true; - Diags.Report(AA->getLocation(), diag::err_alias_to_undefined); + Diags.Report(Location, diag::err_alias_to_undefined) + << IsIFunc << IsIFunc; + } else if (IsIFunc) { + // Check resolver function type. + llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>( + GV->getType()->getPointerElementType()); + assert(FTy); + if (!FTy->getReturnType()->isPointerTy()) + Diags.Report(Location, diag::err_ifunc_resolver_return); + if (FTy->getNumParams()) + Diags.Report(Location, diag::err_ifunc_resolver_params); } - llvm::Constant *Aliasee = Alias->getAliasee(); + llvm::Constant *Aliasee = Alias->getIndirectSymbol(); llvm::GlobalValue *AliaseeGV; if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee)) AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0)); @@ -308,7 +321,7 @@ void CodeGenModule::checkAliases() { StringRef AliasSection = SA->getName(); if (AliasSection != AliaseeGV->getSection()) Diags.Report(SA->getLocation(), diag::warn_alias_with_section) - << AliasSection; + << AliasSection << IsIFunc << IsIFunc; } // We have to handle alias to weak aliases in here. LLVM itself disallows @@ -316,13 +329,13 @@ void CodeGenModule::checkAliases() { // compatibility with gcc we implement it by just pointing the alias // to its aliasee's aliasee. We also warn, since the user is probably // expecting the link to be weak. - if (auto GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) { - if (GA->mayBeOverridden()) { - Diags.Report(AA->getLocation(), diag::warn_alias_to_weak_alias) - << GV->getName() << GA->getName(); + if (auto GA = dyn_cast<llvm::GlobalIndirectSymbol>(AliaseeGV)) { + if (GA->isInterposable()) { + Diags.Report(Location, diag::warn_alias_to_weak_alias) + << GV->getName() << GA->getName() << IsIFunc; Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - GA->getAliasee(), Alias->getType()); - Alias->setAliasee(Aliasee); + GA->getIndirectSymbol(), Alias->getType()); + Alias->setIndirectSymbol(Aliasee); } } } @@ -332,7 +345,7 @@ void CodeGenModule::checkAliases() { for (const GlobalDecl &GD : Aliases) { StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); - auto *Alias = cast<llvm::GlobalAlias>(Entry); + auto *Alias = dyn_cast<llvm::GlobalIndirectSymbol>(Entry); Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType())); Alias->eraseFromParent(); } @@ -380,7 +393,7 @@ void CodeGenModule::Release() { OpenMPRuntime->emitRegistrationFunction()) AddGlobalCtor(OpenMPRegistrationFunction, 0); if (PGOReader) { - getModule().setMaximumFunctionCount(PGOReader->getMaximumFunctionCount()); + getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); if (PGOStats.hasDiagnostics()) PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName); } @@ -391,7 +404,11 @@ void CodeGenModule::Release() { EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) CoverageMapping->emit(); + if (CodeGenOpts.SanitizeCfiCrossDso) + CodeGenFunction(*this).EmitCfiCheckFail(); emitLLVMUsed(); + if (SanStats) + SanStats->finish(); if (CodeGenOpts.Autolink && (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) { @@ -452,16 +469,19 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } - if (uint32_t PLevel = Context.getLangOpts().PICLevel) { - llvm::PICLevel::Level PL = llvm::PICLevel::Default; - switch (PLevel) { - case 0: break; - case 1: PL = llvm::PICLevel::Small; break; - case 2: PL = llvm::PICLevel::Large; break; - default: llvm_unreachable("Invalid PIC Level"); - } + if (LangOpts.CUDAIsDevice && getTarget().getTriple().isNVPTX()) { + // Indicate whether __nvvm_reflect should be configured to flush denormal + // floating point values to 0. (This corresponds to its "__CUDA_FTZ" + // property.) + getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", + LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0); + } - getModule().setPICLevel(PL); + if (uint32_t PLevel = Context.getLangOpts().PICLevel) { + assert(PLevel < 3 && "Invalid PIC Level"); + getModule().setPICLevel(static_cast<llvm::PICLevel::Level>(PLevel)); + if (Context.getLangOpts().PIE) + getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel)); } SimplifyPersonality(); @@ -485,6 +505,11 @@ void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { Types.UpdateCompletedType(TD); } +void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) { + // Make sure that this type is translated. + Types.RefreshTypeCacheForClass(RD); +} + llvm::MDNode *CodeGenModule::getTBAAInfo(QualType QTy) { if (!TBAA) return nullptr; @@ -740,6 +765,15 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) { : llvm::GlobalValue::LinkOnceODRLinkage; } + if (isa<CXXConstructorDecl>(D) && + cast<CXXConstructorDecl>(D)->isInheritingConstructor() && + Context.getTargetInfo().getCXXABI().isMicrosoft()) { + // Our approach to inheriting constructors is fundamentally different from + // that used by the MS ABI, so keep our inheriting constructor thunks + // internal rather than trying to pick an unambiguous mangling for them. + return llvm::GlobalValue::InternalLinkage; + } + return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false); } @@ -762,8 +796,7 @@ void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F) F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); } -llvm::ConstantInt * -CodeGenModule::CreateCfiIdForTypeMetadata(llvm::Metadata *MD) { +llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { llvm::MDString *MDS = dyn_cast<llvm::MDString>(MD); if (!MDS) return nullptr; @@ -882,12 +915,6 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, F->removeFnAttr(llvm::Attribute::InlineHint); } - if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D)) - F->setUnnamedAddr(true); - else if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) - if (MD->isVirtual()) - F->setUnnamedAddr(true); - unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) F->setAlignment(alignment); @@ -970,8 +997,8 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, } } -void CodeGenModule::CreateFunctionBitSetEntry(const FunctionDecl *FD, - llvm::Function *F) { +void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD, + llvm::Function *F) { // Only if we are checking indirect calls. if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall)) return; @@ -992,25 +1019,13 @@ void CodeGenModule::CreateFunctionBitSetEntry(const FunctionDecl *FD, return; } - llvm::NamedMDNode *BitsetsMD = - getModule().getOrInsertNamedMetadata("llvm.bitsets"); - llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); - llvm::Metadata *BitsetOps[] = { - MD, llvm::ConstantAsMetadata::get(F), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))}; - BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps)); + F->addTypeMetadata(0, MD); // Emit a hash-based bit set entry for cross-DSO calls. - if (CodeGenOpts.SanitizeCfiCrossDso) { - if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) { - llvm::Metadata *BitsetOps2[] = { - llvm::ConstantAsMetadata::get(TypeId), - llvm::ConstantAsMetadata::get(F), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))}; - BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2)); - } - } + if (CodeGenOpts.SanitizeCfiCrossDso) + if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) + F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId)); } void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, @@ -1049,13 +1064,29 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, if (const SectionAttr *SA = FD->getAttr<SectionAttr>()) F->setSection(SA->getName()); - // A replaceable global allocation function does not act like a builtin by - // default, only if it is invoked by a new-expression or delete-expression. - if (FD->isReplaceableGlobalAllocationFunction()) + if (FD->isReplaceableGlobalAllocationFunction()) { + // A replaceable global allocation function does not act like a builtin by + // default, only if it is invoked by a new-expression or delete-expression. F->addAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::NoBuiltin); - CreateFunctionBitSetEntry(FD, F); + // A sane operator new returns a non-aliasing pointer. + // FIXME: Also add NonNull attribute to the return value + // for the non-nothrow forms? + auto Kind = FD->getDeclName().getCXXOverloadedOperator(); + if (getCodeGenOpts().AssumeSaneOperatorNew && + (Kind == OO_New || Kind == OO_Array_New)) + F->addAttribute(llvm::AttributeSet::ReturnIndex, + llvm::Attribute::NoAlias); + } + + if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD)) + F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + else if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) + if (MD->isVirtual()) + F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + CreateFunctionTypeMetadata(FD, F); } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { @@ -1227,13 +1258,13 @@ void CodeGenModule::EmitDeferred() { if (!DeferredVTables.empty()) { EmitDeferredVTables(); - // Emitting a v-table doesn't directly cause more v-tables to + // Emitting a vtable doesn't directly cause more vtables to // become deferred, although it can cause functions to be - // emitted that then need those v-tables. + // emitted that then need those vtables. assert(DeferredVTables.empty()); } - // Stop if we're out of both deferred v-tables and deferred declarations. + // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return; @@ -1244,19 +1275,23 @@ void CodeGenModule::EmitDeferred() { for (DeferredGlobal &G : CurDeclsToEmit) { GlobalDecl D = G.GD; - llvm::GlobalValue *GV = G.GV; G.GV = nullptr; // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but // different type. - // FIXME: Support for variables is not implemented yet. - if (isa<FunctionDecl>(D.getDecl())) - GV = cast<llvm::GlobalValue>(GetAddrOfGlobal(D, /*IsForDefinition=*/true)); - else - if (!GV) - GV = GetGlobalValue(getMangledName(D)); + llvm::GlobalValue *GV = dyn_cast<llvm::GlobalValue>( + GetAddrOfGlobal(D, /*IsForDefinition=*/true)); + + // In case of different address spaces, we may still get a cast, even with + // IsForDefinition equal to true. Query mangled names table to get + // GlobalValue. + if (!GV) + GV = GetGlobalValue(getMangledName(D)); + + // Make sure GetGlobalValue returned non-null. + assert(GV); // Check to see if we've already emitted this. This is necessary // for a couple of reasons: first, decls can end up in the @@ -1264,7 +1299,7 @@ void CodeGenModule::EmitDeferred() { // up with definitions in unusual ways (e.g. by an extern inline // function acquiring a strong function redefinition). Just // ignore these cases. - if (GV && !GV->isDeclaration()) + if (!GV->isDeclaration()) continue; // Otherwise, emit the definition and move on to the next one. @@ -1304,7 +1339,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) { new llvm::GlobalVariable(getModule(), s->getType(), true, llvm::GlobalValue::PrivateLinkage, s, ".str"); gv->setSection(AnnotationSection); - gv->setUnnamedAddr(true); + gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); AStr = gv; return gv; } @@ -1412,6 +1447,12 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // Implicit template instantiations may change linkage if they are later // explicitly instantiated, so they should not be emitted eagerly. return false; + if (const auto *VD = dyn_cast<VarDecl>(Global)) + if (Context.getInlineVariableDefinitionKind(VD) == + ASTContext::InlineVariableDefinitionKind::WeakUnknown) + // A definition of an inline constexpr static data member may change + // linkage later if it's redeclared outside the class. + return false; // If OpenMP is enabled and threadprivates must be generated like TLS, delay // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && @@ -1425,12 +1466,12 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor( const CXXUuidofExpr* E) { // Sema has verified that IIDSource has a __declspec(uuid()), and that its // well-formed. - StringRef Uuid = E->getUuidAsStringRef(Context); + StringRef Uuid = E->getUuidStr(); std::string Name = "_GUID_" + Uuid.lower(); std::replace(Name.begin(), Name.end(), '-', '_'); - // Contains a 32-bit field. - CharUnits Alignment = CharUnits::fromQuantity(4); + // The UUID descriptor should be pointer aligned. + CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes); // Look for an existing global. if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) @@ -1491,6 +1532,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (Global->hasAttr<AliasAttr>()) return EmitAliasDefinition(GD); + // IFunc like an alias whose value is resolved at runtime by calling resolver. + if (Global->hasAttr<IFuncAttr>()) + return emitIFuncDefinition(GD); + // If this is CUDA, be selective about which declarations we emit. if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { @@ -1500,18 +1545,32 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { !Global->hasAttr<CUDASharedAttr>()) return; } else { - if (!Global->hasAttr<CUDAHostAttr>() && ( - Global->hasAttr<CUDADeviceAttr>() || - Global->hasAttr<CUDAConstantAttr>() || - Global->hasAttr<CUDASharedAttr>())) + // We need to emit host-side 'shadows' for all global + // device-side variables because the CUDA runtime needs their + // size and host-side address in order to provide access to + // their device-side incarnations. + + // So device-only functions are the only things we skip. + if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() && + Global->hasAttr<CUDADeviceAttr>()) return; + + assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) && + "Expected Variable or Function"); } } - // If this is OpenMP device, check if it is legal to emit this global - // normally. - if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) - return; + if (LangOpts.OpenMP) { + // If this is OpenMP device, check if it is legal to emit this global + // normally. + if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) + return; + if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Global)) { + if (MustBeEmitted(Global)) + EmitOMPDeclareReduction(DRD); + return; + } + } // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { @@ -1533,10 +1592,23 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } else { const auto *VD = cast<VarDecl>(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); - - if (VD->isThisDeclarationADefinition() != VarDecl::Definition && - !Context.isMSStaticDataMemberInlineDefinition(VD)) + // We need to emit device-side global CUDA variables even if a + // variable does not have a definition -- we still need to define + // host-side shadow for it. + bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice && + !VD->hasDefinition() && + (VD->hasAttr<CUDAConstantAttr>() || + VD->hasAttr<CUDADeviceAttr>()); + if (!MustEmitForCuda && + VD->isThisDeclarationADefinition() != VarDecl::Definition && + !Context.isMSStaticDataMemberInlineDefinition(VD)) { + // If this declaration may have caused an inline variable definition to + // change linkage, make sure that it's emitted. + if (Context.getInlineVariableDefinitionKind(VD) == + ASTContext::InlineVariableDefinitionKind::Strong) + GetAddrOfGlobalVar(VD); return; + } } // Defer code generation to first use when possible, e.g. if this is an inline @@ -1690,7 +1762,7 @@ void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) { return; if (CGDebugInfo *DI = getModuleDebugInfo()) - if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) { + if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext())); DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation()); } @@ -1730,7 +1802,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { } if (const auto *VD = dyn_cast<VarDecl>(D)) - return EmitGlobalVarDefinition(VD); + return EmitGlobalVarDefinition(VD, !VD->hasDefinition()); llvm_unreachable("Invalid argument to EmitGlobalDefinition()"); } @@ -1771,8 +1843,8 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, // error. if (IsForDefinition && !Entry->isDeclaration()) { GlobalDecl OtherGD; - // Check that GD is not yet in ExplicitDefinitions is required to make - // sure that we issue an error only once. + // Check that GD is not yet in DiagnosedConflictingDefinitions is required + // to make sure that we issue an error only once. if (lookupRepresentativeDecl(MangledName, OtherGD) && (GD.getCanonicalDecl().getDecl() != OtherGD.getCanonicalDecl().getDecl()) && @@ -1982,10 +2054,15 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) { /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the global when it is first created. +/// +/// If IsForDefinition is true, it is guranteed that an actual global with +/// type Ty will be returned, not conversion of a variable with the same +/// mangled name but some other type. llvm::Constant * CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *Ty, - const VarDecl *D) { + const VarDecl *D, + bool IsForDefinition) { // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry) { @@ -2001,11 +2078,34 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, if (Entry->getType() == Ty) return Entry; + // If there are two attempts to define the same mangled name, issue an + // error. + if (IsForDefinition && !Entry->isDeclaration()) { + GlobalDecl OtherGD; + const VarDecl *OtherD; + + // Check that D is not yet in DiagnosedConflictingDefinitions is required + // to make sure that we issue an error only once. + if (D && lookupRepresentativeDecl(MangledName, OtherGD) && + (D->getCanonicalDecl() != OtherGD.getCanonicalDecl().getDecl()) && + (OtherD = dyn_cast<VarDecl>(OtherGD.getDecl())) && + OtherD->hasInit() && + DiagnosedConflictingDefinitions.insert(D).second) { + getDiags().Report(D->getLocation(), + diag::err_duplicate_mangled_name); + getDiags().Report(OtherGD.getDecl()->getLocation(), + diag::note_previous_definition); + } + } + // Make sure the result is of the correct type. if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty); - return llvm::ConstantExpr::getBitCast(Entry, Ty); + // (If global is requested for a definition, we always need to create a new + // global, not just return a bitcast.) + if (!IsForDefinition) + return llvm::ConstantExpr::getBitCast(Entry, Ty); } unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace()); @@ -2014,6 +2114,20 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); + // If we already created a global with the same mangled name (but different + // type) before, take its name and remove it from its parent. + if (Entry) { + GV->takeName(Entry); + + if (!Entry->use_empty()) { + llvm::Constant *NewPtrForOldDecl = + llvm::ConstantExpr::getBitCast(GV, Entry->getType()); + Entry->replaceAllUsesWith(NewPtrForOldDecl); + } + + Entry->eraseFromParent(); + } + // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end // of the file. @@ -2086,7 +2200,8 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); } else - return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl())); + return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()), /*Ty=*/nullptr, + IsForDefinition); } llvm::GlobalVariable * @@ -2134,9 +2249,12 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the /// given global variable. If Ty is non-null and if the global doesn't exist, /// then it will be created with the specified type instead of whatever the -/// normal requested type would be. +/// normal requested type would be. If IsForDefinition is true, it is guranteed +/// that an actual global with type Ty will be returned, not conversion of a +/// variable with the same mangled name but some other type. llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, - llvm::Type *Ty) { + llvm::Type *Ty, + bool IsForDefinition) { assert(D->hasGlobalStorage() && "Not a global variable"); QualType ASTTy = D->getType(); if (!Ty) @@ -2146,7 +2264,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, llvm::PointerType::get(Ty, getContext().getTargetAddressSpace(ASTTy)); StringRef MangledName = getMangledName(D); - return GetOrCreateLLVMGlobal(MangledName, PTy, D); + return GetOrCreateLLVMGlobal(MangledName, PTy, D, IsForDefinition); } /// CreateRuntimeVariable - Create a new runtime global variable with the @@ -2160,15 +2278,20 @@ CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty, void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) { assert(!D->getInit() && "Cannot emit definite definitions here!"); - if (!MustBeEmitted(D)) { - // If we have not seen a reference to this variable yet, place it - // into the deferred declarations table to be emitted if needed - // later. - StringRef MangledName = getMangledName(D); - if (!GetGlobalValue(MangledName)) { + StringRef MangledName = getMangledName(D); + llvm::GlobalValue *GV = GetGlobalValue(MangledName); + + // We already have a definition, not declaration, with the same mangled name. + // Emitting of declaration is not required (and actually overwrites emitted + // definition). + if (GV && !GV->isDeclaration()) + return; + + // If we have not seen a reference to this variable yet, place it into the + // deferred declarations table to be emitted if needed later. + if (!MustBeEmitted(D) && !GV) { DeferredDecls[MangledName] = D; return; - } } // The tentative definition is the only definition. @@ -2182,7 +2305,7 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace) { - if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { + if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) { if (D->hasAttr<CUDAConstantAttr>()) AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant); else if (D->hasAttr<CUDASharedAttr>()) @@ -2259,7 +2382,9 @@ void CodeGenModule::maybeSetTrivialComdat(const Decl &D, GO.setComdat(TheModule.getOrInsertComdat(GO.getName())); } -void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { +/// Pass IsTentative as true if you want to create a tentative definition. +void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, + bool IsTentative) { llvm::Constant *Init = nullptr; QualType ASTTy = D->getType(); CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); @@ -2269,18 +2394,13 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); - // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part - // of their declaration." - if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice - && D->hasAttr<CUDASharedAttr>()) { - if (InitExpr) { - const auto *C = dyn_cast<CXXConstructExpr>(InitExpr); - if (C == nullptr || !C->getConstructor()->hasTrivialBody()) - Error(D->getLocation(), - "__shared__ variable cannot have an initialization."); - } + // CUDA E.2.4.1 "__shared__ variables cannot have an initialization + // as part of their declaration." Sema has already checked for + // error cases, so we just need to set Init to UndefValue. + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + D->hasAttr<CUDASharedAttr>()) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); - } else if (!InitExpr) { + else if (!InitExpr) { // This is a tentative definition; tentative definitions are // implicitly initialized with { 0 }. // @@ -2318,7 +2438,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { } llvm::Type* InitType = Init->getType(); - llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType); + llvm::Constant *Entry = + GetAddrOfGlobalVar(D, InitType, /*IsForDefinition=*/!IsTentative); // Strip off a bitcast if we got one back. if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) { @@ -2350,7 +2471,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { Entry->setName(StringRef()); // Make a new global with the correct type, this is now guaranteed to work. - GV = cast<llvm::GlobalVariable>(GetAddrOfGlobalVar(D, InitType)); + GV = cast<llvm::GlobalVariable>( + GetAddrOfGlobalVar(D, InitType, /*IsForDefinition=*/!IsTentative)); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = @@ -2366,6 +2488,10 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { if (D->hasAttr<AnnotateAttr>()) AddGlobalAnnotations(D, GV); + // Set the llvm linkage type as appropriate. + llvm::GlobalValue::LinkageTypes Linkage = + getLLVMLinkageVarDefinition(D, GV->isConstant()); + // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on // the device. [...]" // CUDA B.2.2 "The __constant__ qualifier, optionally used together with @@ -2373,9 +2499,34 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { // Is accessible from all the threads within the grid and from the host // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize() // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." - if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice && - (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) { - GV->setExternallyInitialized(true); + if (GV && LangOpts.CUDA) { + if (LangOpts.CUDAIsDevice) { + if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) + GV->setExternallyInitialized(true); + } else { + // Host-side shadows of external declarations of device-side + // global variables become internal definitions. These have to + // be internal in order to prevent name conflicts with global + // host variables with the same name in a different TUs. + if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) { + Linkage = llvm::GlobalValue::InternalLinkage; + + // Shadow variables and their properties must be registered + // with CUDA runtime. + unsigned Flags = 0; + if (!D->hasDefinition()) + Flags |= CGCUDARuntime::ExternDeviceVar; + if (D->hasAttr<CUDAConstantAttr>()) + Flags |= CGCUDARuntime::ConstantDeviceVar; + getCUDARuntime().registerDeviceVar(*GV, Flags); + } else if (D->hasAttr<CUDASharedAttr>()) + // __shared__ variables are odd. Shadows do get created, but + // they are not registered with the CUDA runtime, so they + // can't really be used to access their device-side + // counterparts. It's not clear yet whether it's nvcc's bug or + // a feature, but we've got to do the same for compatibility. + Linkage = llvm::GlobalValue::InternalLinkage; + } } GV->setInitializer(Init); @@ -2392,9 +2543,6 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { GV->setAlignment(getContext().getDeclAlign(D).getQuantity()); - // Set the llvm linkage type as appropriate. - llvm::GlobalValue::LinkageTypes Linkage = - getLLVMLinkageVarDefinition(D, GV->isConstant()); // On Darwin, if the normal linkage of a C++ thread_local variable is // LinkOnce or Weak, we keep the normal linkage to prevent multiple @@ -2439,7 +2587,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) - if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) + if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) DI->EmitGlobalVariable(GV, D); } @@ -2474,7 +2622,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, if (shouldBeInCOMDAT(CGM, *D)) return true; - // Declarations with a required alignment do not have common linakge in MSVC + // Declarations with a required alignment do not have common linkage in MSVC // mode. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { if (D->hasAttr<AlignedAttr>()) @@ -2535,9 +2683,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( // explicit instantiations can occur in multiple translation units // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. - if (Linkage == GVA_StrongODR) - return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage - : llvm::Function::ExternalLinkage; + // + // We don't currently support CUDA device code spread out across multiple TUs, + // so say that CUDA templates are either external (for kernels) or internal. + // This lets llvm perform aggressive inter-procedural optimizations. + if (Linkage == GVA_StrongODR) { + if (Context.getLangOpts().AppleKext) + return llvm::Function::ExternalLinkage; + if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) + return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage + : llvm::Function::InternalLinkage; + return llvm::Function::WeakODRLinkage; + } // C++ doesn't have tentative definitions and thus cannot have common // linkage. @@ -2694,6 +2851,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, } void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { + auto DK = VD->isThisDeclarationADefinition(); + if (DK == VarDecl::Definition && VD->hasAttr<DLLImportAttr>()) + return; + TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind(); // If we have a definition, this might be a deferred decl. If the // instantiation is explicit, make sure we emit it at the end. @@ -2757,7 +2918,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { StringRef MangledName = getMangledName(GD); if (AA->getAliasee() == MangledName) { - Diags.Report(AA->getLocation(), diag::err_cyclic_alias); + Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } @@ -2788,7 +2949,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { if (Entry) { if (GA->getAliasee() == Entry) { - Diags.Report(AA->getLocation(), diag::err_cyclic_alias); + Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } @@ -2825,6 +2986,65 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { setAliasAttributes(D, GA); } +void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { + const auto *D = cast<ValueDecl>(GD.getDecl()); + const IFuncAttr *IFA = D->getAttr<IFuncAttr>(); + assert(IFA && "Not an ifunc?"); + + StringRef MangledName = getMangledName(GD); + + if (IFA->getResolver() == MangledName) { + Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; + return; + } + + // Report an error if some definition overrides ifunc. + llvm::GlobalValue *Entry = GetGlobalValue(MangledName); + if (Entry && !Entry->isDeclaration()) { + GlobalDecl OtherGD; + if (lookupRepresentativeDecl(MangledName, OtherGD) && + DiagnosedConflictingDefinitions.insert(GD).second) { + Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name); + Diags.Report(OtherGD.getDecl()->getLocation(), + diag::note_previous_definition); + } + return; + } + + Aliases.push_back(GD); + + llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); + llvm::Constant *Resolver = + GetOrCreateLLVMFunction(IFA->getResolver(), DeclTy, GD, + /*ForVTable=*/false); + llvm::GlobalIFunc *GIF = + llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage, + "", Resolver, &getModule()); + if (Entry) { + if (GIF->getResolver() == Entry) { + Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; + return; + } + assert(Entry->isDeclaration()); + + // If there is a declaration in the module, then we had an extern followed + // by the ifunc, as in: + // extern int test(); + // ... + // int test() __attribute__((ifunc("resolver"))); + // + // Remove it and replace uses of it with the ifunc. + GIF->takeName(Entry); + + Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GIF, + Entry->getType())); + Entry->eraseFromParent(); + } else + GIF->setName(MangledName); + + SetCommonAttributes(D, GIF); +} + llvm::Function *CodeGenModule::getIntrinsic(unsigned IID, ArrayRef<llvm::Type*> Tys) { return llvm::Intrinsic::getDeclaration(&getModule(), (llvm::Intrinsic::ID)IID, @@ -2889,19 +3109,40 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty); llvm::Constant *Zeros[] = { Zero, Zero }; llvm::Value *V; - + // If we don't already have it, get __CFConstantStringClassReference. if (!CFConstantStringClassRef) { llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); Ty = llvm::ArrayType::get(Ty, 0); - llvm::Constant *GV = CreateRuntimeVariable(Ty, - "__CFConstantStringClassReference"); + llvm::Constant *GV = + CreateRuntimeVariable(Ty, "__CFConstantStringClassReference"); + + if (getTarget().getTriple().isOSBinFormatCOFF()) { + IdentifierInfo &II = getContext().Idents.get(GV->getName()); + TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + llvm::GlobalValue *CGV = cast<llvm::GlobalValue>(GV); + + const VarDecl *VD = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((VD = dyn_cast<VarDecl>(Result))) + break; + + if (!VD || !VD->hasAttr<DLLExportAttr>()) { + CGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + CGV->setLinkage(llvm::GlobalValue::ExternalLinkage); + } else { + CGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + CGV->setLinkage(llvm::GlobalValue::ExternalLinkage); + } + } + // Decay array -> ptr V = llvm::ConstantExpr::getGetElementPtr(Ty, GV, Zeros); CFConstantStringClassRef = V; - } - else + } else { V = CFConstantStringClassRef; + } QualType CFTy = getContext().getCFConstantStringType(); @@ -2914,8 +3155,8 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { // Flags. llvm::Type *Ty = getTypes().ConvertType(getContext().UnsignedIntTy); - Fields[1] = isUTF16 ? llvm::ConstantInt::get(Ty, 0x07d0) : - llvm::ConstantInt::get(Ty, 0x07C8); + Fields[1] = isUTF16 ? llvm::ConstantInt::get(Ty, 0x07d0) + : llvm::ConstantInt::get(Ty, 0x07C8); // String pointer. llvm::Constant *C = nullptr; @@ -2933,21 +3174,20 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. - // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. Without - // it LLVM can merge the string with a non unnamed_addr one during LTO. Doing - // that changes the section it ends in, which surprises ld64. - if (isUTF16) { - CharUnits Align = getContext().getTypeAlignInChars(getContext().ShortTy); - GV->setAlignment(Align.getQuantity()); - GV->setSection("__TEXT,__ustring"); - } else { - CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy); - GV->setAlignment(Align.getQuantity()); - GV->setSection("__TEXT,__cstring,cstring_literals"); - } + CharUnits Align = isUTF16 + ? getContext().getTypeAlignInChars(getContext().ShortTy) + : getContext().getTypeAlignInChars(getContext().CharTy); + GV->setAlignment(Align.getQuantity()); + + // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. + // Without it LLVM can merge the string with a non unnamed_addr one during + // LTO. Doing that changes the section it ends in, which surprises ld64. + if (getTarget().getTriple().isOSBinFormatMachO()) + GV->setSection(isUTF16 ? "__TEXT,__ustring" + : "__TEXT,__cstring,cstring_literals"); // String. Fields[2] = @@ -2968,8 +3208,18 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { GV = new llvm::GlobalVariable(getModule(), C->getType(), true, llvm::GlobalVariable::PrivateLinkage, C, "_unnamed_cfstring_"); - GV->setSection("__DATA,__cfstring"); GV->setAlignment(Alignment.getQuantity()); + switch (getTarget().getTriple().getObjectFormat()) { + case llvm::Triple::UnknownObjectFormat: + llvm_unreachable("unknown file format"); + case llvm::Triple::COFF: + case llvm::Triple::ELF: + GV->setSection("cfstring"); + break; + case llvm::Triple::MachO: + GV->setSection("__DATA,__cfstring"); + break; + } Entry.second = GV; return ConstantAddress(GV, Alignment); @@ -3062,7 +3312,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) { auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), isConstant, Linkage, C, ".str"); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy); @@ -3181,7 +3431,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); GV->setAlignment(Alignment.getQuantity()); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (GV->isWeakForLinker()) { assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals"); GV->setComdat(M.getOrInsertComdat(GV->getName())); @@ -3528,6 +3778,12 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::Namespace: EmitNamespace(cast<NamespaceDecl>(D)); break; + case Decl::CXXRecord: + // Emit any static data members, they may be definitions. + for (auto *I : cast<CXXRecordDecl>(D)->decls()) + if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I)) + EmitTopLevelDecl(I); + break; // No code generation needed. case Decl::UsingShadow: case Decl::ClassTemplate: @@ -3595,7 +3851,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { ObjCRuntime->GenerateClass(OMD); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) - if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) + if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType( OMD->getClassInterface()), OMD->getLocation()); break; @@ -3611,6 +3867,31 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { ObjCRuntime->RegisterAlias(cast<ObjCCompatibleAliasDecl>(D)); break; + case Decl::PragmaComment: { + const auto *PCD = cast<PragmaCommentDecl>(D); + switch (PCD->getCommentKind()) { + case PCK_Unknown: + llvm_unreachable("unexpected pragma comment kind"); + case PCK_Linker: + AppendLinkerOptions(PCD->getArg()); + break; + case PCK_Lib: + AddDependentLib(PCD->getArg()); + break; + case PCK_Compiler: + case PCK_ExeStr: + case PCK_User: + break; // We ignore all of these. + } + break; + } + + case Decl::PragmaDetectMismatch: { + const auto *PDMD = cast<PragmaDetectMismatchDecl>(D); + AddDetectMismatch(PDMD->getName(), PDMD->getValue()); + break; + } + case Decl::LinkageSpec: EmitLinkageSpec(cast<LinkageSpecDecl>(D)); break; @@ -3653,6 +3934,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { break; } + case Decl::OMPDeclareReduction: + EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D)); + break; + default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind @@ -3775,6 +4060,10 @@ static void EmitGlobalDeclMetadata(CodeGenModule &CGM, /// to such functions with an unmangled name from inline assembly within the /// same translation unit. void CodeGenModule::EmitStaticExternCAliases() { + // Don't do anything if we're generating CUDA device code -- the NVPTX + // assembly target doesn't support aliases. + if (Context.getTargetInfo().getTriple().isNVPTX()) + return; for (auto &I : StaticExternCValues) { IdentifierInfo *Name = I.first; llvm::GlobalValue *Val = I.second; @@ -3955,27 +4244,35 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return InternalId; } -void CodeGenModule::CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD, - llvm::GlobalVariable *VTable, - CharUnits Offset, - const CXXRecordDecl *RD) { +/// Returns whether this module needs the "all-vtables" type identifier. +bool CodeGenModule::NeedAllVtablesTypeId() const { + // Returns true if at least one of vtable-based CFI checkers is enabled and + // is not in the trapping mode. + return ((LangOpts.Sanitize.has(SanitizerKind::CFIVCall) && + !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIVCall)) || + (LangOpts.Sanitize.has(SanitizerKind::CFINVCall) && + !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFINVCall)) || + (LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) && + !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIDerivedCast)) || + (LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast) && + !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIUnrelatedCast))); +} + +void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable, + CharUnits Offset, + const CXXRecordDecl *RD) { llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); - llvm::Metadata *BitsetOps[] = { - MD, llvm::ConstantAsMetadata::get(VTable), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))}; - BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps)); + VTable->addTypeMetadata(Offset.getQuantity(), MD); - if (CodeGenOpts.SanitizeCfiCrossDso) { - if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) { - llvm::Metadata *BitsetOps2[] = { - llvm::ConstantAsMetadata::get(TypeId), - llvm::ConstantAsMetadata::get(VTable), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))}; - BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2)); - } + if (CodeGenOpts.SanitizeCfiCrossDso) + if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) + VTable->addTypeMetadata(Offset.getQuantity(), + llvm::ConstantAsMetadata::get(CrossDsoTypeId)); + + if (NeedAllVtablesTypeId()) { + llvm::Metadata *MD = llvm::MDString::get(getLLVMContext(), "all-vtables"); + VTable->addTypeMetadata(Offset.getQuantity(), MD); } } @@ -4007,3 +4304,10 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, Target.getTargetOpts().Features); } } + +llvm::SanitizerStatReport &CodeGenModule::getSanStats() { + if (!SanStats) + SanStats = llvm::make_unique<llvm::SanitizerStatReport>(&getModule()); + + return *SanStats; +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h index fdb4d78..9490499 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h @@ -21,6 +21,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/AST/GlobalDecl.h" #include "clang/AST/Mangle.h" #include "clang/Basic/ABI.h" @@ -33,6 +34,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Transforms/Utils/SanitizerStats.h" namespace llvm { class Module; @@ -47,7 +49,6 @@ class IndexedInstrProfReader; } namespace clang { -class TargetCodeGenInfo; class ASTContext; class AtomicType; class FunctionDecl; @@ -91,6 +92,7 @@ class CGCUDARuntime; class BlockFieldFlags; class FunctionArgList; class CoverageMappingModuleGen; +class TargetCodeGenInfo; struct OrderGlobalInits { unsigned int priority; @@ -165,6 +167,9 @@ struct ObjCEntrypoints { /// id objc_storeWeak(id*, id); llvm::Constant *objc_storeWeak; + /// id objc_unsafeClaimAutoreleasedReturnValue(id); + llvm::Constant *objc_unsafeClaimAutoreleasedReturnValue; + /// A void(void) inline asm to use to mark that the return value of /// a call will be immediately retain. llvm::InlineAsm *retainAutoreleasedReturnValueMarker; @@ -268,9 +273,9 @@ private: std::unique_ptr<CGCXXABI> ABI; llvm::LLVMContext &VMContext; - CodeGenTBAA *TBAA; + std::unique_ptr<CodeGenTBAA> TBAA; - mutable const TargetCodeGenInfo *TheTargetCodeGenInfo; + mutable std::unique_ptr<TargetCodeGenInfo> TheTargetCodeGenInfo; // This should not be moved earlier, since its initialization depends on some // of the previous reference members being already initialized and also checks @@ -280,15 +285,16 @@ private: /// Holds information about C++ vtables. CodeGenVTables VTables; - CGObjCRuntime* ObjCRuntime; - CGOpenCLRuntime* OpenCLRuntime; - CGOpenMPRuntime* OpenMPRuntime; - CGCUDARuntime* CUDARuntime; - CGDebugInfo* DebugInfo; - ObjCEntrypoints *ObjCData; - llvm::MDNode *NoObjCARCExceptionsMetadata; + std::unique_ptr<CGObjCRuntime> ObjCRuntime; + std::unique_ptr<CGOpenCLRuntime> OpenCLRuntime; + std::unique_ptr<CGOpenMPRuntime> OpenMPRuntime; + std::unique_ptr<CGCUDARuntime> CUDARuntime; + std::unique_ptr<CGDebugInfo> DebugInfo; + std::unique_ptr<ObjCEntrypoints> ObjCData; + llvm::MDNode *NoObjCARCExceptionsMetadata = nullptr; std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader; InstrProfStats PGOStats; + std::unique_ptr<llvm::SanitizerStatReport> SanStats; // A set of references that have only been seen via a weakref so far. This is // used to remove the weak of the reference if we ever see a direct reference @@ -429,8 +435,8 @@ private: llvm::WeakVH ConstantStringClassRef; /// \brief The LLVM type corresponding to NSConstantString. - llvm::StructType *NSConstantStringType; - + llvm::StructType *NSConstantStringType = nullptr; + /// \brief The type used to describe the state of a fast enumeration in /// Objective-C's for..in loop. QualType ObjCFastEnumerationStateType; @@ -450,24 +456,24 @@ private: /// @name Cache for Blocks Runtime Globals /// @{ - llvm::Constant *NSConcreteGlobalBlock; - llvm::Constant *NSConcreteStackBlock; + llvm::Constant *NSConcreteGlobalBlock = nullptr; + llvm::Constant *NSConcreteStackBlock = nullptr; - llvm::Constant *BlockObjectAssign; - llvm::Constant *BlockObjectDispose; + llvm::Constant *BlockObjectAssign = nullptr; + llvm::Constant *BlockObjectDispose = nullptr; - llvm::Type *BlockDescriptorType; - llvm::Type *GenericBlockLiteralType; + llvm::Type *BlockDescriptorType = nullptr; + llvm::Type *GenericBlockLiteralType = nullptr; struct { int GlobalUniqueCount; } Block; /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>) - llvm::Constant *LifetimeStartFn; + llvm::Constant *LifetimeStartFn = nullptr; /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>) - llvm::Constant *LifetimeEndFn; + llvm::Constant *LifetimeEndFn = nullptr; GlobalDecl initializedGlobalDecl; @@ -583,7 +589,7 @@ public: TypeDescriptorMap[Ty] = C; } - CGDebugInfo *getModuleDebugInfo() { return DebugInfo; } + CGDebugInfo *getModuleDebugInfo() { return DebugInfo.get(); } llvm::MDNode *getNoObjCARCExceptionsMetadata() { if (!NoObjCARCExceptionsMetadata) @@ -696,11 +702,14 @@ public: unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace); /// Return the llvm::Constant for the address of the given global variable. - /// If Ty is non-null and if the global doesn't exist, then it will be greated + /// If Ty is non-null and if the global doesn't exist, then it will be created /// with the specified type instead of whatever the normal requested type - /// would be. + /// would be. If IsForDefinition is true, it is guranteed that an actual + /// global with type Ty will be returned, not conversion of a variable with + /// the same mangled name but some other type. llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D, - llvm::Type *Ty = nullptr); + llvm::Type *Ty = nullptr, + bool IsForDefinition = false); /// Return the address of the given function. If Ty is non-null, then this /// function will use the specified type if it has to create it. @@ -989,6 +998,8 @@ public: void EmitVTable(CXXRecordDecl *Class); + void RefreshTypeCacheForClass(const CXXRecordDecl *Class); + /// \brief Appends Opts to the "Linker Options" metadata value. void AppendLinkerOptions(StringRef Opts); @@ -1098,34 +1109,42 @@ public: /// \param D Threadprivate declaration. void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D); - /// Returns whether the given record is blacklisted from control flow - /// integrity checks. - bool IsCFIBlacklistedRecord(const CXXRecordDecl *RD); + /// \brief Emit a code for declare reduction construct. + void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, + CodeGenFunction *CGF = nullptr); + + /// Returns whether the given record has hidden LTO visibility and therefore + /// may participate in (single-module) CFI and whole-program vtable + /// optimization. + bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); - /// Emit bit set entries for the given vtable using the given layout if - /// vptr CFI is enabled. - void EmitVTableBitSetEntries(llvm::GlobalVariable *VTable, - const VTableLayout &VTLayout); + /// Emit type metadata for the given vtable using the given layout. + void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, + const VTableLayout &VTLayout); - /// Generate a cross-DSO type identifier for type. - llvm::ConstantInt *CreateCfiIdForTypeMetadata(llvm::Metadata *MD); + /// Generate a cross-DSO type identifier for MD. + llvm::ConstantInt *CreateCrossDsoCfiTypeId(llvm::Metadata *MD); /// Create a metadata identifier for the given type. This may either be an /// MDString (for external identifiers) or a distinct unnamed MDNode (for /// internal identifiers). llvm::Metadata *CreateMetadataIdentifierForType(QualType T); - /// Create a bitset entry for the given function and add it to BitsetsMD. - void CreateFunctionBitSetEntry(const FunctionDecl *FD, llvm::Function *F); + /// Create and attach type metadata to the given function. + void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F); - /// Create a bitset entry for the given vtable and add it to BitsetsMD. - void CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD, - llvm::GlobalVariable *VTable, CharUnits Offset, - const CXXRecordDecl *RD); + /// Returns whether this module needs the "all-vtables" type identifier. + bool NeedAllVtablesTypeId() const; + + /// Create and attach type metadata for the given vtable. + void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, + const CXXRecordDecl *RD); /// \breif Get the declaration of std::terminate for the platform. llvm::Constant *getTerminateFn(); + llvm::SanitizerStatReport &getSanStats(); + private: llvm::Constant * GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D, @@ -1136,7 +1155,8 @@ private: llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *PTy, - const VarDecl *D); + const VarDecl *D, + bool IsForDefinition = false); void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO); @@ -1147,8 +1167,9 @@ private: void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); - void EmitGlobalVarDefinition(const VarDecl *D); + void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false); void EmitAliasDefinition(GlobalDecl GD); + void emitIFuncDefinition(GlobalDecl GD); void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D); void EmitObjCIvarInitializations(ObjCImplementationDecl *D); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp index 2c0d93b..4eefdd7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp @@ -18,11 +18,14 @@ #include "clang/AST/StmtVisitor.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" +static llvm::cl::opt<bool> EnableValueProfiling( + "enable-value-profiling", llvm::cl::ZeroOrMore, + llvm::cl::desc("Enable value profiling"), llvm::cl::init(false)); + using namespace clang; using namespace CodeGen; @@ -34,12 +37,14 @@ void CodeGenPGO::setFuncName(StringRef Name, PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version); // If we're generating a profile, create a variable for the name. - if (CGM.getCodeGenOpts().ProfileInstrGenerate) + if (CGM.getCodeGenOpts().hasProfileClangInstr()) FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName); } void CodeGenPGO::setFuncName(llvm::Function *Fn) { setFuncName(Fn->getName(), Fn->getLinkage()); + // Create PGOFuncName meta data. + llvm::createPGOFuncNameMetadata(*Fn, FuncName); } namespace { @@ -406,7 +411,8 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> { RecordStmtCount(S); Visit(S->getLoopVarStmt()); Visit(S->getRangeStmt()); - Visit(S->getBeginEndStmt()); + Visit(S->getBeginStmt()); + Visit(S->getEndStmt()); uint64_t ParentCount = CurrentCount; BreakContinueStack.push_back(BreakContinue()); @@ -607,7 +613,7 @@ uint64_t PGOHash::finalize() { void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { const Decl *D = GD.getDecl(); - bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate; + bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr(); llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); if (!InstrumentRegions && !PGOReader) return; @@ -653,12 +659,18 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { FunctionHash = Walker.Hash.finalize(); } -void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { +bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) { if (SkipCoverageMapping) - return; - // Don't map the functions inside the system headers + return true; + + // Don't map the functions in system headers. + const auto &SM = CGM.getContext().getSourceManager(); auto Loc = D->getBody()->getLocStart(); - if (CGM.getContext().getSourceManager().isInSystemHeader(Loc)) + return SM.isInSystemHeader(Loc); +} + +void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { + if (skipRegionMappingForDecl(D)) return; std::string CoverageMapping; @@ -679,11 +691,7 @@ void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { void CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name, llvm::GlobalValue::LinkageTypes Linkage) { - if (SkipCoverageMapping) - return; - // Don't map the functions inside the system headers - auto Loc = D->getBody()->getLocStart(); - if (CGM.getContext().getSourceManager().isInSystemHeader(Loc)) + if (skipRegionMappingForDecl(D)) return; std::string CoverageMapping; @@ -726,7 +734,7 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader, } void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) { - if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap) + if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap) return; if (!Builder.GetInsertBlock()) return; @@ -740,21 +748,76 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) { Builder.getInt32(Counter)}); } +// This method either inserts a call to the profile run-time during +// instrumentation or puts profile data into metadata for PGO use. +void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind, + llvm::Instruction *ValueSite, llvm::Value *ValuePtr) { + + if (!EnableValueProfiling) + return; + + if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock()) + return; + + if (isa<llvm::Constant>(ValuePtr)) + return; + + bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr(); + if (InstrumentValueSites && RegionCounterMap) { + auto BuilderInsertPoint = Builder.saveIP(); + Builder.SetInsertPoint(ValueSite); + llvm::Value *Args[5] = { + llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()), + Builder.getInt64(FunctionHash), + Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()), + Builder.getInt32(ValueKind), + Builder.getInt32(NumValueSites[ValueKind]++) + }; + Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args); + Builder.restoreIP(BuilderInsertPoint); + return; + } + + llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); + if (PGOReader && haveRegionCounts()) { + // We record the top most called three functions at each call site. + // Profile metadata contains "VP" string identifying this metadata + // as value profiling data, then a uint32_t value for the value profiling + // kind, a uint64_t value for the total number of times the call is + // executed, followed by the function hash and execution count (uint64_t) + // pairs for each function. + if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind)) + return; + + llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord, + (llvm::InstrProfValueKind)ValueKind, + NumValueSites[ValueKind]); + + NumValueSites[ValueKind]++; + } +} + void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, bool IsInMainFile) { CGM.getPGOStats().addVisited(IsInMainFile); RegionCounts.clear(); - if (std::error_code EC = - PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) { - if (EC == llvm::instrprof_error::unknown_function) + llvm::Expected<llvm::InstrProfRecord> RecordExpected = + PGOReader->getInstrProfRecord(FuncName, FunctionHash); + if (auto E = RecordExpected.takeError()) { + auto IPE = llvm::InstrProfError::take(std::move(E)); + if (IPE == llvm::instrprof_error::unknown_function) CGM.getPGOStats().addMissing(IsInMainFile); - else if (EC == llvm::instrprof_error::hash_mismatch) + else if (IPE == llvm::instrprof_error::hash_mismatch) CGM.getPGOStats().addMismatched(IsInMainFile); - else if (EC == llvm::instrprof_error::malformed) + else if (IPE == llvm::instrprof_error::malformed) // TODO: Consider a more specific warning for this case. CGM.getPGOStats().addMismatched(IsInMainFile); - RegionCounts.clear(); + return; } + ProfRecord = + llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get())); + RegionCounts = ProfRecord->Counts; } /// \brief Calculate what to divide by to scale weights. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h index 6bf29ec..d03f235 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h @@ -19,7 +19,9 @@ #include "CodeGenTypes.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/MemoryBuffer.h" +#include <array> #include <memory> namespace clang { @@ -32,10 +34,12 @@ private: std::string FuncName; llvm::GlobalVariable *FuncNameVar; + std::array <unsigned, llvm::IPVK_Last + 1> NumValueSites; unsigned NumRegionCounters; uint64_t FunctionHash; std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap; std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap; + std::unique_ptr<llvm::InstrProfRecord> ProfRecord; std::vector<uint64_t> RegionCounts; uint64_t CurrentRegionCount; /// \brief A flag that is set to true when this function doesn't need @@ -44,8 +48,8 @@ private: public: CodeGenPGO(CodeGenModule &CGM) - : CGM(CGM), NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0), - SkipCoverageMapping(false) {} + : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), + FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been @@ -87,6 +91,9 @@ public: /// for an unused declaration. void emitEmptyCounterMapping(const Decl *D, StringRef FuncName, llvm::GlobalValue::LinkageTypes Linkage); + // Insert instrumentation or attach profile metadata at value sites + void valueProfile(CGBuilderTy &Builder, uint32_t ValueKind, + llvm::Instruction *ValueSite, llvm::Value *ValuePtr); private: void setFuncName(llvm::Function *Fn); void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage); @@ -96,6 +103,7 @@ private: llvm::Function *Fn); void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, bool IsInMainFile); + bool skipRegionMappingForDecl(const Decl *D); void emitCounterRegionMapping(const Decl *D); public: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp index c3c925c..04224e7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -44,8 +44,12 @@ llvm::MDNode *CodeGenTBAA::getRoot() { // if our LLVM IR is linked with LLVM IR from a different front-end // (or a different version of this front-end), their TBAA trees will // remain distinct, and the optimizer will treat them conservatively. - if (!Root) - Root = MDHelper.createTBAARoot("Simple C/C++ TBAA"); + if (!Root) { + if (Features.CPlusPlus) + Root = MDHelper.createTBAARoot("Simple C++ TBAA"); + else + Root = MDHelper.createTBAARoot("Simple C/C++ TBAA"); + } return Root; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h index 632cadd..ddb063d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h @@ -15,14 +15,11 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENTBAA_H #define LLVM_CLANG_LIB_CODEGEN_CODEGENTBAA_H +#include "clang/AST/Type.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/MDBuilder.h" - -namespace llvm { - class LLVMContext; - class MDNode; -} +#include "llvm/IR/Metadata.h" namespace clang { class ASTContext; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp index 09d9bf1..ebe55c7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp @@ -272,6 +272,17 @@ void CodeGenTypes::UpdateCompletedType(const TagDecl *TD) { DI->completeType(RD); } +void CodeGenTypes::RefreshTypeCacheForClass(const CXXRecordDecl *RD) { + QualType T = Context.getRecordType(RD); + T = Context.getCanonicalType(T); + + const Type *Ty = T.getTypePtr(); + if (RecordsWithOpaqueMemberPointers.count(Ty)) { + TypeCache.clear(); + RecordsWithOpaqueMemberPointers.clear(); + } +} + static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext, const llvm::fltSemantics &format, bool UseNativeHalf = false) { @@ -438,6 +449,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::Float: case BuiltinType::Double: case BuiltinType::LongDouble: + case BuiltinType::Float128: ResultType = getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T), /* UseNativeHalf = */ false); @@ -453,18 +465,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ResultType = llvm::IntegerType::get(getLLVMContext(), 128); break; - case BuiltinType::OCLImage1d: - case BuiltinType::OCLImage1dArray: - case BuiltinType::OCLImage1dBuffer: - case BuiltinType::OCLImage2d: - case BuiltinType::OCLImage2dArray: - case BuiltinType::OCLImage2dDepth: - case BuiltinType::OCLImage2dArrayDepth: - case BuiltinType::OCLImage2dMSAA: - case BuiltinType::OCLImage2dArrayMSAA: - case BuiltinType::OCLImage2dMSAADepth: - case BuiltinType::OCLImage2dArrayMSAADepth: - case BuiltinType::OCLImage3d: +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: +#include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: @@ -603,10 +606,13 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { } case Type::MemberPointer: { - if (!getCXXABI().isMemberPointerConvertible(cast<MemberPointerType>(Ty))) - return llvm::StructType::create(getLLVMContext()); - ResultType = - getCXXABI().ConvertMemberPointerType(cast<MemberPointerType>(Ty)); + auto *MPTy = cast<MemberPointerType>(Ty); + if (!getCXXABI().isMemberPointerConvertible(MPTy)) { + RecordsWithOpaqueMemberPointers.insert(MPTy->getClass()); + ResultType = llvm::StructType::create(getLLVMContext()); + } else { + ResultType = getCXXABI().ConvertMemberPointerType(MPTy); + } break; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h index a96f23c4..5796ab8 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h @@ -31,7 +31,6 @@ class StructType; } namespace clang { -class ABIInfo; class ASTContext; template <typename> class CanQual; class CXXConstructorDecl; @@ -51,6 +50,7 @@ class Type; typedef CanQual<Type> CanQualType; namespace CodeGen { +class ABIInfo; class CGCXXABI; class CGRecordLayout; class CodeGenModule; @@ -162,6 +162,10 @@ class CodeGenTypes { /// corresponding llvm::Type. llvm::DenseMap<const Type *, llvm::Type *> TypeCache; + llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers; + + unsigned ClangCallConvToLLVMCallConv(CallingConv CC); + public: CodeGenTypes(CodeGenModule &cgm); ~CodeGenTypes(); @@ -203,6 +207,11 @@ public: bool isFuncTypeConvertible(const FunctionType *FT); bool isFuncParamTypeConvertible(QualType Ty); + /// Determine if a C++ inheriting constructor should have parameters matching + /// those of its inherited constructor. + bool inheritingCtorHasParams(const InheritedConstructor &Inherited, + CXXCtorType Type); + /// GetFunctionTypeForVTable - Get the LLVM function type for use in a vtable, /// given a CXXMethodDecl. If the method to has an incomplete return type, /// and/or incomplete argument types, this will return the opaque type. @@ -214,9 +223,9 @@ public: /// replace the 'opaque' type we previously made for it if applicable. void UpdateCompletedType(const TagDecl *TD); - /// getNullaryFunctionInfo - Get the function info for a void() - /// function with standard CC. - const CGFunctionInfo &arrangeNullaryFunction(); + /// \brief Remove stale types from the type cache when an inheritance model + /// gets assigned to a class. + void RefreshTypeCacheForClass(const CXXRecordDecl *RD); // The arrangement methods are split into three families: // - those meant to drive the signature and prologue/epilogue @@ -239,16 +248,55 @@ public: // this for compatibility reasons. const CGFunctionInfo &arrangeGlobalDeclaration(GlobalDecl GD); + + /// Given a function info for a declaration, return the function info + /// for a call with the given arguments. + /// + /// Often this will be able to simply return the declaration info. + const CGFunctionInfo &arrangeCall(const CGFunctionInfo &declFI, + const CallArgList &args); + + /// Free functions are functions that are compatible with an ordinary + /// C function pointer type. const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD); + const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args, + const FunctionType *Ty, + bool ChainCall); + const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty, + const FunctionDecl *FD); + const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty); + + /// A nullary function is a freestanding function of type 'void ()'. + /// This method works for both calls and declarations. + const CGFunctionInfo &arrangeNullaryFunction(); + + /// A builtin function is a freestanding function using the default + /// C conventions. + const CGFunctionInfo & + arrangeBuiltinFunctionDeclaration(QualType resultType, + const FunctionArgList &args); const CGFunctionInfo & - arrangeFreeFunctionDeclaration(QualType ResTy, const FunctionArgList &Args, - const FunctionType::ExtInfo &Info, - bool isVariadic); + arrangeBuiltinFunctionDeclaration(CanQualType resultType, + ArrayRef<CanQualType> argTypes); + const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType, + const CallArgList &args); + /// Objective-C methods are C functions with some implicit parameters. const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD); const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, QualType receiverType); + const CGFunctionInfo &arrangeUnprototypedObjCMessageSend( + QualType returnType, + const CallArgList &args); + + /// Block invocation functions are C functions with an implicit parameter. + const CGFunctionInfo &arrangeBlockFunctionDeclaration( + const FunctionProtoType *type, + const FunctionArgList &args); + const CGFunctionInfo &arrangeBlockFunctionCall(const CallArgList &args, + const FunctionType *type); + /// C++ methods have some special rules and also have implicit parameters. const CGFunctionInfo &arrangeCXXMethodDeclaration(const CXXMethodDecl *MD); const CGFunctionInfo &arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, StructorType Type); @@ -256,15 +304,6 @@ public: const CXXConstructorDecl *D, CXXCtorType CtorKind, unsigned ExtraArgs); - const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args, - const FunctionType *Ty, - bool ChainCall); - const CGFunctionInfo &arrangeFreeFunctionCall(QualType ResTy, - const CallArgList &args, - FunctionType::ExtInfo info, - RequiredArgs required); - const CGFunctionInfo &arrangeBlockFunctionCall(const CallArgList &args, - const FunctionType *type); const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args, const FunctionProtoType *type, @@ -272,9 +311,6 @@ public: const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD); const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD, CXXCtorType CT); - const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty, - const FunctionDecl *FD); - const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty); const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD, const FunctionProtoType *FTP, const CXXMethodDecl *MD); @@ -290,6 +326,7 @@ public: bool chainCall, ArrayRef<CanQualType> argTypes, FunctionType::ExtInfo info, + ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs args); /// \brief Compute a new LLVM record layout object for the given record. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp index 03e22cd..b011a0f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -15,10 +15,12 @@ #include "CodeGenFunction.h" #include "clang/AST/StmtVisitor.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Optional.h" -#include "llvm/ProfileData/CoverageMapping.h" -#include "llvm/ProfileData/CoverageMappingReader.h" -#include "llvm/ProfileData/CoverageMappingWriter.h" +#include "llvm/ProfileData/Coverage/CoverageMapping.h" +#include "llvm/ProfileData/Coverage/CoverageMappingReader.h" +#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/FileSystem.h" @@ -128,6 +130,16 @@ public: return strcmp(SM.getBufferName(SM.getSpellingLoc(Loc)), "<built-in>") == 0; } + /// \brief Check whether \c Loc is included or expanded from \c Parent. + bool isNestedIn(SourceLocation Loc, FileID Parent) { + do { + Loc = getIncludeOrExpansionLoc(Loc); + if (Loc.isInvalid()) + return false; + } while (!SM.isInFileID(Loc, Parent)); + return true; + } + /// \brief Get the start of \c S ignoring macro arguments and builtin macros. SourceLocation getStart(const Stmt *S) { SourceLocation Loc = S->getLocStart(); @@ -152,14 +164,17 @@ public: void gatherFileIDs(SmallVectorImpl<unsigned> &Mapping) { FileIDMapping.clear(); - SmallVector<FileID, 8> Visited; + llvm::SmallSet<FileID, 8> Visited; SmallVector<std::pair<SourceLocation, unsigned>, 8> FileLocs; for (const auto &Region : SourceRegions) { SourceLocation Loc = Region.getStartLoc(); FileID File = SM.getFileID(Loc); - if (std::find(Visited.begin(), Visited.end(), File) != Visited.end()) + if (!Visited.insert(File).second) + continue; + + // Do not map FileID's associated with system headers. + if (SM.isInSystemHeader(SM.getSpellingLoc(Loc))) continue; - Visited.push_back(File); unsigned Depth = 0; for (SourceLocation Parent = getIncludeOrExpansionLoc(Loc); @@ -191,12 +206,6 @@ public: return None; } - /// \brief Return true if the given clang's file id has a corresponding - /// coverage file id. - bool hasExistingCoverageFileID(FileID File) const { - return FileIDMapping.count(File); - } - /// \brief Gather all the regions that were skipped by the preprocessor /// using the constructs like #if. void gatherSkippedRegions() { @@ -246,6 +255,10 @@ public: SourceLocation LocStart = Region.getStartLoc(); assert(SM.getFileID(LocStart).isValid() && "region in invalid file"); + // Ignore regions from system headers. + if (SM.isInSystemHeader(SM.getSpellingLoc(LocStart))) + continue; + auto CovFileID = getCoverageFileID(LocStart); // Ignore regions that don't have a file, such as builtin macros. if (!CovFileID) @@ -309,7 +322,27 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { if (!D->hasBody()) return; auto Body = D->getBody(); - SourceRegions.emplace_back(Counter(), getStart(Body), getEnd(Body)); + SourceLocation Start = getStart(Body); + SourceLocation End = getEnd(Body); + if (!SM.isWrittenInSameFile(Start, End)) { + // Walk up to find the common ancestor. + // Correct the locations accordingly. + FileID StartFileID = SM.getFileID(Start); + FileID EndFileID = SM.getFileID(End); + while (StartFileID != EndFileID && !isNestedIn(End, StartFileID)) { + Start = getIncludeOrExpansionLoc(Start); + assert(Start.isValid() && + "Declaration start location not nested within a known region"); + StartFileID = SM.getFileID(Start); + } + while (StartFileID != EndFileID) { + End = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(End)); + assert(End.isValid() && + "Declaration end location not nested within a known region"); + EndFileID = SM.getFileID(End); + } + } + SourceRegions.emplace_back(Counter(), Start, End); } /// \brief Write the mapping data to the output stream @@ -318,6 +351,9 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { gatherFileIDs(FileIDMapping); emitSourceRegions(); + if (MappingRegions.empty()) + return; + CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions); Writer.write(OS); } @@ -356,10 +392,6 @@ struct CounterCoverageMappingBuilder return addCounters(addCounters(C1, C2), C3); } - Counter addCounters(Counter C1, Counter C2, Counter C3, Counter C4) { - return addCounters(addCounters(C1, C2, C3), C4); - } - /// \brief Return the region counter for the given statement. /// /// This should only be called on statements that have a dedicated counter. @@ -433,31 +465,43 @@ struct CounterCoverageMappingBuilder Visit(S); Counter ExitCount = getRegion().getCounter(); popRegions(Index); + + // The statement may be spanned by an expansion. Make sure we handle a file + // exit out of this expansion before moving to the next statement. + if (SM.isBeforeInTranslationUnit(getStart(S), S->getLocStart())) + MostRecentLocation = getEnd(S); + return ExitCount; } + /// \brief Check whether a region with bounds \c StartLoc and \c EndLoc + /// is already added to \c SourceRegions. + bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc) { + return SourceRegions.rend() != + std::find_if(SourceRegions.rbegin(), SourceRegions.rend(), + [&](const SourceMappingRegion &Region) { + return Region.getStartLoc() == StartLoc && + Region.getEndLoc() == EndLoc; + }); + } + /// \brief Adjust the most recently visited location to \c EndLoc. /// /// This should be used after visiting any statements in non-source order. void adjustForOutOfOrderTraversal(SourceLocation EndLoc) { MostRecentLocation = EndLoc; - // Avoid adding duplicate regions if we have a completed region on the top - // of the stack and are adjusting to the end of a virtual file. + // The code region for a whole macro is created in handleFileExit() when + // it detects exiting of the virtual file of that macro. If we visited + // statements in non-source order, we might already have such a region + // added, for example, if a body of a loop is divided among multiple + // macros. Avoid adding duplicate regions in such case. if (getRegion().hasEndLoc() && - MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation)) + MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation) && + isRegionAlreadyAdded(getStartOfFileOrMacro(MostRecentLocation), + MostRecentLocation)) MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation); } - /// \brief Check whether \c Loc is included or expanded from \c Parent. - bool isNestedIn(SourceLocation Loc, FileID Parent) { - do { - Loc = getIncludeOrExpansionLoc(Loc); - if (Loc.isInvalid()) - return false; - } while (!SM.isInFileID(Loc, Parent)); - return true; - } - /// \brief Adjust regions and state when \c NewLoc exits a file. /// /// If moving from our most recently tracked location to \c NewLoc exits any @@ -563,6 +607,9 @@ struct CounterCoverageMappingBuilder emitExpansionRegions(); gatherSkippedRegions(); + if (MappingRegions.empty()) + return; + CoverageMappingWriter Writer(VirtualFileMapping, Builder.getExpressions(), MappingRegions); Writer.write(OS); @@ -579,6 +626,11 @@ struct CounterCoverageMappingBuilder void VisitDecl(const Decl *D) { Stmt *Body = D->getBody(); + + // Do not propagate region counts into system headers. + if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) + return; + propagateCounts(getRegionCounter(Body), Body); } @@ -769,7 +821,9 @@ struct CounterCoverageMappingBuilder BreakContinueStack.back().ContinueCount, BC.ContinueCount); Counter ExitCount = getRegionCounter(S); - pushRegion(ExitCount); + SourceLocation ExitLoc = getEnd(S); + pushRegion(ExitCount, getStart(S), ExitLoc); + handleFileExit(ExitLoc); } void VisitSwitchCase(const SwitchCase *S) { @@ -822,7 +876,12 @@ struct CounterCoverageMappingBuilder void VisitCXXTryStmt(const CXXTryStmt *S) { extendRegion(S); - Visit(S->getTryBlock()); + // Handle macros that generate the "try" but not the rest. + extendRegion(S->getTryBlock()); + + Counter ParentCount = getRegion().getCounter(); + propagateCounts(ParentCount, S->getTryBlock()); + for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I) Visit(S->getHandler(I)); @@ -911,7 +970,7 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, void CoverageMappingModuleGen::addFunctionMappingRecord( llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, - const std::string &CoverageMapping, bool isUsed) { + const std::string &CoverageMapping, bool IsUsed) { llvm::LLVMContext &Ctx = CGM.getLLVMContext(); if (!FunctionRecordTy) { #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType, @@ -929,10 +988,10 @@ void CoverageMappingModuleGen::addFunctionMappingRecord( }; FunctionRecords.push_back(llvm::ConstantStruct::get( FunctionRecordTy, makeArrayRef(FunctionRecordVals))); - if (!isUsed) + if (!IsUsed) FunctionNames.push_back( llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx))); - CoverageMappings += CoverageMapping; + CoverageMappings.push_back(CoverageMapping); if (CGM.getCodeGenOpts().DumpCoverageMapping) { // Dump the coverage mapping data for this function by decoding the @@ -978,8 +1037,10 @@ void CoverageMappingModuleGen::emit() { std::string FilenamesAndCoverageMappings; llvm::raw_string_ostream OS(FilenamesAndCoverageMappings); CoverageFilenamesSectionWriter(FilenameRefs).write(OS); - OS << CoverageMappings; - size_t CoverageMappingSize = CoverageMappings.size(); + std::string RawCoverageMappings = + llvm::join(CoverageMappings.begin(), CoverageMappings.end(), ""); + OS << RawCoverageMappings; + size_t CoverageMappingSize = RawCoverageMappings.size(); size_t FilenamesSize = OS.str().size() - CoverageMappingSize; // Append extra zeroes if necessary to ensure that the size of the filenames // and coverage mappings is a multiple of 8. @@ -1035,7 +1096,7 @@ void CoverageMappingModuleGen::emit() { // to pass the list of names referenced to codegen. new llvm::GlobalVariable(CGM.getModule(), NamesArrTy, true, llvm::GlobalValue::InternalLinkage, NamesArrVal, - llvm::getCoverageNamesVarName()); + llvm::getCoverageUnusedNamesVarName()); } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h index 9ae2bcf..c202fe8 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h @@ -56,7 +56,7 @@ class CoverageMappingModuleGen { std::vector<llvm::Constant *> FunctionRecords; std::vector<llvm::Constant *> FunctionNames; llvm::StructType *FunctionRecordTy; - std::string CoverageMappings; + std::vector<std::string> CoverageMappings; public: CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo) @@ -72,7 +72,7 @@ public: StringRef FunctionNameValue, uint64_t FunctionHash, const std::string &CoverageMapping, - bool isUsed = true); + bool IsUsed = true); /// \brief Emit the coverage mapping data for a translation unit. void emit(); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h index 85cd154..4717a66 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h @@ -89,7 +89,10 @@ enum CleanupKind : unsigned { InactiveCleanup = 0x4, InactiveEHCleanup = EHCleanup | InactiveCleanup, InactiveNormalCleanup = NormalCleanup | InactiveCleanup, - InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup + InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup, + + LifetimeMarker = 0x8, + NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup, }; /// A stack of scopes which respond to exceptions, including cleanups @@ -341,9 +344,7 @@ public: /// Determines whether the exception-scopes stack is empty. bool empty() const { return StartOfData == EndOfBuffer; } - bool requiresLandingPad() const { - return InnermostEHScope != stable_end(); - } + bool requiresLandingPad() const; /// Determines whether there are any normal cleanups on the stack. bool hasNormalCleanups() const { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index e02c8dc..6051594 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -154,17 +154,9 @@ public: Address Ptr, QualType ElementType, const CXXDestructorDecl *Dtor) override; - /// Itanium says that an _Unwind_Exception has to be "double-word" - /// aligned (and thus the end of it is also so-aligned), meaning 16 - /// bytes. Of course, that was written for the actual Itanium, - /// which is a 64-bit platform. Classically, the ABI doesn't really - /// specify the alignment on other platforms, but in practice - /// libUnwind declares the struct with __attribute__((aligned)), so - /// we assume that alignment here. (It's generally 16 bytes, but - /// some targets overwrite it.) CharUnits getAlignmentOfExnObject() { - auto align = CGM.getContext().getTargetDefaultAlignForAttributeAligned(); - return CGM.getContext().toCharUnitsFromBits(align); + unsigned Align = CGM.getContext().getTargetInfo().getExnObjectAlignment(); + return CGM.getContext().toCharUnitsFromBits(Align); } void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override; @@ -451,6 +443,7 @@ private: (isa<CXXDestructorDecl>(GD.getDecl()) && GD.getDtorType() != Dtor_Deleting); } + bool canCallMismatchedFunctionType() const override { return false; } }; } @@ -1496,7 +1489,8 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, DC->getParent()->isTranslationUnit()) EmitFundamentalRTTIDescriptors(); - CGM.EmitVTableBitSetEntries(VTable, VTLayout); + if (!VTable->isDeclarationForLinker()) + CGM.EmitVTableTypeMetadata(VTable, VTLayout); } bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField( @@ -1528,8 +1522,8 @@ ItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base, .getVTableLayout(VTableClass) .getAddressPoint(Base); llvm::Value *Indices[] = { - llvm::ConstantInt::get(CGM.Int64Ty, 0), - llvm::ConstantInt::get(CGM.Int64Ty, AddressPoint) + llvm::ConstantInt::get(CGM.Int32Ty, 0), + llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint) }; return llvm::ConstantExpr::getInBoundsGetElementPtr(VTable->getValueType(), @@ -1568,7 +1562,7 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, if (VTable) return VTable; - // Queue up this v-table for possible deferred emission. + // Queue up this vtable for possible deferred emission. CGM.addDeferredVTable(RD); SmallString<256> Name; @@ -1581,7 +1575,7 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, VTable = CGM.CreateOrReplaceCXXRuntimeVariable( Name, ArrayType, llvm::GlobalValue::ExternalLinkage); - VTable->setUnnamedAddr(true); + VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (RD->hasAttr<DLLImportAttr>()) VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -1601,14 +1595,18 @@ llvm::Value *ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl()); llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent()); - if (CGF.SanOpts.has(SanitizerKind::CFIVCall)) - CGF.EmitVTablePtrCheckForCall(MethodDecl, VTable, - CodeGenFunction::CFITCK_VCall, Loc); - uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD); - llvm::Value *VFuncPtr = - CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn"); - return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); + if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) { + return CGF.EmitVTableTypeCheckedLoad( + MethodDecl->getParent(), VTable, + VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8); + } else { + CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc); + + llvm::Value *VFuncPtr = + CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn"); + return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); + } } llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall( @@ -1913,10 +1911,18 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, bool shouldPerformInit) { CGBuilderTy &Builder = CGF.Builder; - // We only need to use thread-safe statics for local non-TLS variables; - // global initialization is always single-threaded. + // Inline variables that weren't instantiated from variable templates have + // partially-ordered initialization within their translation unit. + bool NonTemplateInline = + D.isInline() && + !isTemplateInstantiation(D.getTemplateSpecializationKind()); + + // We only need to use thread-safe statics for local non-TLS variables and + // inline variables; other global initialization is always single-threaded + // or (through lazy dynamic loading in multiple threads) unsequenced. bool threadsafe = getContext().getLangOpts().ThreadsafeStatics && - D.isLocalVarDecl() && !D.getTLSKind(); + (D.isLocalVarDecl() || NonTemplateInline) && + !D.getTLSKind(); // If we have a global variable with internal linkage and thread-safe statics // are disabled, we can just let the guard variable be of type i8. @@ -1970,7 +1976,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, if (!D.isLocalVarDecl() && C && CGM.getTarget().getTriple().isOSBinFormatELF()) { guard->setComdat(C); - CGF.CurFn->setComdat(C); + // An inline variable's guard function is run from the per-TU + // initialization function, not via a dedicated global ctor function, so + // we can't put it in a comdat. + if (!NonTemplateInline) + CGF.CurFn->setComdat(C); } else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) { guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName())); } @@ -2008,7 +2018,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // // In LLVM, we do this by marking the load Acquire. if (threadsafe) - LI->setAtomic(llvm::Acquire); + LI->setAtomic(llvm::AtomicOrdering::Acquire); // For ARM, we should only check the first bit, rather than the entire byte: // @@ -2178,17 +2188,28 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, getMangleContext().mangleItaniumThreadLocalWrapper(VD, Out); } + // FIXME: If VD is a definition, we should regenerate the function attributes + // before returning. if (llvm::Value *V = CGM.getModule().getNamedValue(WrapperName)) return cast<llvm::Function>(V); - llvm::Type *RetTy = Val->getType(); - if (VD->getType()->isReferenceType()) - RetTy = RetTy->getPointerElementType(); + QualType RetQT = VD->getType(); + if (RetQT->isReferenceType()) + RetQT = RetQT.getNonReferenceType(); + + const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + getContext().getPointerType(RetQT), FunctionArgList()); - llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, false); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Wrapper = llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM), WrapperName.str(), &CGM.getModule()); + + CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper); + + if (VD->hasDefinition()) + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); + // Always resolve references to the wrapper at link time. if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) && !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) && @@ -2227,6 +2248,11 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CodeGenFunction(CGM) .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits, Address(Guard, GuardAlign)); + // On Darwin platforms, use CXX_FAST_TLS calling convention. + if (CGM.getTarget().getTriple().isOSDarwin()) { + InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); + InitFunc->addFnAttr(llvm::Attribute::NoUnwind); + } } for (const VarDecl *VD : CXXThreadLocals) { llvm::GlobalVariable *Var = @@ -2264,6 +2290,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( Init = llvm::Function::Create( FnTy, llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(), &CGM.getModule()); + const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); + CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init)); } if (Init) @@ -2274,8 +2302,11 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper); CGBuilderTy Builder(CGM, Entry); if (InitIsInitFunc) { - if (Init) - Builder.CreateCall(Init); + if (Init) { + llvm::CallInst *CallVal = Builder.CreateCall(Init); + if (isThreadWrapperReplaceable(VD, CGM)) + CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); + } } else { // Don't know whether we have an init function. Call it if it exists. llvm::Value *Have = Builder.CreateIsNotNull(Init); @@ -2491,6 +2522,11 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { // long, unsigned long, long long, unsigned long long, float, double, // long double, char16_t, char32_t, and the IEEE 754r decimal and // half-precision floating point types. + // + // GCC also emits RTTI for __int128. + // FIXME: We do not emit RTTI information for decimal types here. + + // Types added here must also be added to EmitFundamentalRTTIDescriptors. switch (Ty->getKind()) { case BuiltinType::Void: case BuiltinType::NullPtr: @@ -2513,29 +2549,23 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::Float: case BuiltinType::Double: case BuiltinType::LongDouble: + case BuiltinType::Float128: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Int128: case BuiltinType::UInt128: - case BuiltinType::OCLImage1d: - case BuiltinType::OCLImage1dArray: - case BuiltinType::OCLImage1dBuffer: - case BuiltinType::OCLImage2d: - case BuiltinType::OCLImage2dArray: - case BuiltinType::OCLImage2dDepth: - case BuiltinType::OCLImage2dArrayDepth: - case BuiltinType::OCLImage2dMSAA: - case BuiltinType::OCLImage2dArrayMSAA: - case BuiltinType::OCLImage2dMSAADepth: - case BuiltinType::OCLImage2dArrayMSAADepth: - case BuiltinType::OCLImage3d: + return true; + +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: +#include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: - return true; + return false; case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) @@ -2864,7 +2894,7 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) { // We want to operate on the canonical type. - Ty = CGM.getContext().getCanonicalType(Ty); + Ty = Ty.getCanonicalType(); // Check if we've already emitted an RTTI descriptor for this type. SmallString<256> Name; @@ -3327,6 +3357,7 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type) { } void ItaniumCXXABI::EmitFundamentalRTTIDescriptors() { + // Types added here must also be added to TypeInfoIsInStandardLibrary. QualType FundamentalTypes[] = { getContext().VoidTy, getContext().NullPtrTy, getContext().BoolTy, getContext().WCharTy, @@ -3335,10 +3366,11 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors() { getContext().UnsignedShortTy, getContext().IntTy, getContext().UnsignedIntTy, getContext().LongTy, getContext().UnsignedLongTy, getContext().LongLongTy, - getContext().UnsignedLongLongTy, getContext().HalfTy, + getContext().UnsignedLongLongTy, getContext().Int128Ty, + getContext().UnsignedInt128Ty, getContext().HalfTy, getContext().FloatTy, getContext().DoubleTy, - getContext().LongDoubleTy, getContext().Char16Ty, - getContext().Char32Ty, + getContext().LongDoubleTy, getContext().Float128Ty, + getContext().Char16Ty, getContext().Char32Ty }; for (const QualType &FundamentalType : FundamentalTypes) EmitFundamentalRTTIDescriptor(FundamentalType); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 93210d5..41cd53c 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -254,8 +254,8 @@ public: CXXDtorType Type, bool ForVirtualBase, bool Delegating, Address This) override; - void emitVTableBitSetEntries(VPtrInfo *Info, const CXXRecordDecl *RD, - llvm::GlobalVariable *VTable); + void emitVTableTypeMetadata(VPtrInfo *Info, const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable); void emitVTableDefinitions(CodeGenVTables &CGVT, const CXXRecordDecl *RD) override; @@ -551,7 +551,7 @@ private: return llvm::Constant::getAllOnesValue(CGM.IntTy); } - CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD); + CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) override; void GetNullMemberPointerFields(const MemberPointerType *MPT, @@ -942,7 +942,6 @@ MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value, llvm::Value *Offset = GetVirtualBaseClassOffset(CGF, Value, SrcDecl, PolymorphicBase); llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset); - Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty); CharUnits VBaseAlign = CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase); return std::make_pair(Address(Ptr, VBaseAlign), Offset); @@ -976,8 +975,8 @@ llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, llvm::Type *StdTypeInfoPtrTy) { - llvm::Value *Offset; - std::tie(ThisPtr, Offset) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy); + std::tie(ThisPtr, std::ignore) = + performBaseAdjustment(CGF, ThisPtr, SrcRecordTy); auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction(); return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy); } @@ -1002,6 +1001,7 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall( llvm::Value *Offset; std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy); llvm::Value *ThisPtr = This.getPointer(); + Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty); // PVOID __RTDynamicCast( // PVOID inptr, @@ -1025,8 +1025,7 @@ llvm::Value * MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy, QualType DestTy) { - llvm::Value *Offset; - std::tie(Value, Offset) = performBaseAdjustment(CGF, Value, SrcRecordTy); + std::tie(Value, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy); // PVOID __RTCastToVoid( // PVOID inptr) @@ -1152,16 +1151,14 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers( llvm::Value *VBaseOffset = GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first); - // FIXME: it doesn't look right that we SExt in GetVirtualBaseClassOffset() - // just to Trunc back immediately. - VBaseOffset = Builder.CreateTruncOrBitCast(VBaseOffset, CGF.Int32Ty); uint64_t ConstantVBaseOffset = Layout.getVBaseClassOffset(I->first).getQuantity(); // vtorDisp_for_vbase = vbptr[vbase_idx] - offsetof(RD, vbase). llvm::Value *VtorDispValue = Builder.CreateSub( - VBaseOffset, llvm::ConstantInt::get(CGM.Int32Ty, ConstantVBaseOffset), + VBaseOffset, llvm::ConstantInt::get(CGM.PtrDiffTy, ConstantVBaseOffset), "vtordisp.value"); + VtorDispValue = Builder.CreateTruncOrBitCast(VtorDispValue, CGF.Int32Ty); if (!Int8This) Int8This = Builder.CreateBitCast(getThisValue(CGF), @@ -1467,16 +1464,18 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs( // Add the 'most_derived' argument second if we are variadic or last if not. const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); - llvm::Value *MostDerivedArg = - llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete); - RValue RV = RValue::get(MostDerivedArg); - if (MostDerivedArg) { - if (FPT->isVariadic()) - Args.insert(Args.begin() + 1, - CallArg(RV, getContext().IntTy, /*needscopy=*/false)); - else - Args.add(RV, getContext().IntTy); + llvm::Value *MostDerivedArg; + if (Delegating) { + MostDerivedArg = getStructorImplicitParamValue(CGF); + } else { + MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete); } + RValue RV = RValue::get(MostDerivedArg); + if (FPT->isVariadic()) + Args.insert(Args.begin() + 1, + CallArg(RV, getContext().IntTy, /*needscopy=*/false)); + else + Args.add(RV, getContext().IntTy); return 1; // Added one arg. } @@ -1494,24 +1493,18 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, This, false); } - CGF.EmitCXXStructorCall(DD, Callee, ReturnValueSlot(), This.getPointer(), - /*ImplicitParam=*/nullptr, - /*ImplicitParamTy=*/QualType(), nullptr, - getFromDtorType(Type)); + CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(), + /*ImplicitParam=*/nullptr, + /*ImplicitParamTy=*/QualType(), nullptr, + getFromDtorType(Type)); } -void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info, - const CXXRecordDecl *RD, - llvm::GlobalVariable *VTable) { - if (!getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIVCall) && - !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFINVCall) && - !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIDerivedCast) && - !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIUnrelatedCast)) +void MicrosoftCXXABI::emitVTableTypeMetadata(VPtrInfo *Info, + const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable) { + if (!CGM.getCodeGenOpts().PrepareForLTO) return; - llvm::NamedMDNode *BitsetsMD = - CGM.getModule().getOrInsertNamedMetadata("llvm.bitsets"); - // The location of the first virtual function pointer in the virtual table, // aka the "address point" on Itanium. This is at offset 0 if RTTI is // disabled, or sizeof(void*) if RTTI is enabled. @@ -1522,15 +1515,13 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info, : CharUnits::Zero(); if (Info->PathToBaseWithVPtr.empty()) { - if (!CGM.IsCFIBlacklistedRecord(RD)) - CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD); + CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD); return; } // Add a bitset entry for the least derived base belonging to this vftable. - if (!CGM.IsCFIBlacklistedRecord(Info->PathToBaseWithVPtr.back())) - CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, - Info->PathToBaseWithVPtr.back()); + CGM.AddVTableTypeMetadata(VTable, AddressPoint, + Info->PathToBaseWithVPtr.back()); // Add a bitset entry for each derived class that is laid out at the same // offset as the least derived base. @@ -1548,13 +1539,12 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info, Offset = VBI->second.VBaseOffset; if (!Offset.isZero()) return; - if (!CGM.IsCFIBlacklistedRecord(DerivedRD)) - CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, DerivedRD); + CGM.AddVTableTypeMetadata(VTable, AddressPoint, DerivedRD); } // Finally do the same for the most derived class. - if (Info->FullOffsetInMDC.isZero() && !CGM.IsCFIBlacklistedRecord(RD)) - CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD); + if (Info->FullOffsetInMDC.isZero()) + CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD); } void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, @@ -1567,12 +1557,14 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, if (VTable->hasInitializer()) continue; - llvm::Constant *RTTI = getContext().getLangOpts().RTTIData - ? getMSCompleteObjectLocator(RD, Info) - : nullptr; - const VTableLayout &VTLayout = VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC); + + llvm::Constant *RTTI = nullptr; + if (any_of(VTLayout.vtable_components(), + [](const VTableComponent &VTC) { return VTC.isRTTIKind(); })) + RTTI = getMSCompleteObjectLocator(RD, Info); + llvm::Constant *Init = CGVT.CreateVTableInitializer( RD, VTLayout.vtable_component_begin(), VTLayout.getNumVTableComponents(), VTLayout.vtable_thunk_begin(), @@ -1580,7 +1572,7 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, VTable->setInitializer(Init); - emitVTableBitSetEntries(Info, RD, VTable); + emitVTableTypeMetadata(Info, RD, VTable); } } @@ -1642,7 +1634,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, if (DeferredVFTables.insert(RD).second) { // We haven't processed this record type before. - // Queue up this v-table for possible deferred emission. + // Queue up this vtable for possible deferred emission. CGM.addDeferredVTable(RD); #ifndef NDEBUG @@ -1671,7 +1663,16 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, SmallString<256> VFTableName; mangleVFTableName(getMangleContext(), RD, VFPtr, VFTableName); - llvm::GlobalValue::LinkageTypes VFTableLinkage = CGM.getVTableLinkage(RD); + // Classes marked __declspec(dllimport) need vftables generated on the + // import-side in order to support features like constexpr. No other + // translation unit relies on the emission of the local vftable, translation + // units are expected to generate them as needed. + // + // Because of this unique behavior, we maintain this logic here instead of + // getVTableLinkage. + llvm::GlobalValue::LinkageTypes VFTableLinkage = + RD->hasAttr<DLLImportAttr>() ? llvm::GlobalValue::LinkOnceODRLinkage + : CGM.getVTableLinkage(RD); bool VFTableComesFromAnotherTU = llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage) || llvm::GlobalValue::isExternalLinkage(VFTableLinkage); @@ -1705,7 +1706,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, VTable = new llvm::GlobalVariable(CGM.getModule(), VTableType, /*isConstant=*/true, VTableLinkage, /*Initializer=*/nullptr, VTableName); - VTable->setUnnamedAddr(true); + VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); llvm::Comdat *C = nullptr; if (!VFTableComesFromAnotherTU && @@ -1733,7 +1734,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, /*AddressSpace=*/0, VFTableLinkage, VFTableName.str(), VTableGEP, &CGM.getModule()); - VFTable->setUnnamedAddr(true); + VFTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); } else { // We don't need a GlobalAlias to be a symbol for the VTable if we won't // be referencing any RTTI data. @@ -1744,9 +1745,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, if (C) VTable->setComdat(C); - if (RD->hasAttr<DLLImportAttr>()) - VFTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - else if (RD->hasAttr<DLLExportAttr>()) + if (RD->hasAttr<DLLExportAttr>()) VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); VFTablesMap[ID] = VFTable; @@ -1813,13 +1812,20 @@ llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, MicrosoftVTableContext::MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD); - if (CGF.SanOpts.has(SanitizerKind::CFIVCall)) - CGF.EmitVTablePtrCheck(getClassAtVTableLocation(getContext(), GD, ML), - VTable, CodeGenFunction::CFITCK_VCall, Loc); - llvm::Value *VFuncPtr = - Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn"); - return Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); + if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) { + return CGF.EmitVTableTypeCheckedLoad( + getClassAtVTableLocation(getContext(), GD, ML), VTable, + ML.Index * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8); + } else { + if (CGM.getCodeGenOpts().PrepareForLTO) + CGF.EmitTypeMetadataCodeForVCall( + getClassAtVTableLocation(getContext(), GD, ML), VTable, Loc); + + llvm::Value *VFuncPtr = + Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn"); + return Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); + } } llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( @@ -1843,10 +1849,9 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( DtorType == Dtor_Deleting); This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true); - RValue RV = CGF.EmitCXXStructorCall(Dtor, Callee, ReturnValueSlot(), - This.getPointer(), - ImplicitParam, Context.IntTy, CE, - StructorType::Deleting); + RValue RV = + CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam, + Context.IntTy, CE, StructorType::Deleting); return RV.getScalarVal(); } @@ -1916,7 +1921,7 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk( ThunkFn->addFnAttr("thunk"); // These thunks can be compared, so they are not unnamed. - ThunkFn->setUnnamedAddr(false); + ThunkFn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); // Start codegen. CodeGenFunction CGF(CGM); @@ -1973,7 +1978,7 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD, "vbtable with this name already exists: mangling bug?"); llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (RD->hasAttr<DLLImportAttr>()) GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -2030,6 +2035,9 @@ void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT, llvm::ArrayType::get(CGM.IntTy, Offsets.size()); llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets); GV->setInitializer(Init); + + if (RD->hasAttr<DLLImportAttr>()) + GV->setLinkage(llvm::GlobalVariable::AvailableExternallyLinkage); } llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF, @@ -2302,7 +2310,7 @@ struct ResetGuardBit final : EHScopeStack::Cleanup { CGBuilderTy &Builder = CGF.Builder; llvm::LoadInst *LI = Builder.CreateLoad(Guard); llvm::ConstantInt *Mask = - llvm::ConstantInt::get(CGF.IntTy, ~(1U << GuardNum)); + llvm::ConstantInt::get(CGF.IntTy, ~(1ULL << GuardNum)); Builder.CreateStore(Builder.CreateAnd(LI, Mask), Guard); } }; @@ -2415,7 +2423,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, // } // Test our bit from the guard variable. - llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << GuardNum); + llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum); llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr); llvm::Value *IsInitialized = Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero); @@ -3631,7 +3639,8 @@ MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo *Info) { } static QualType decomposeTypeForEH(ASTContext &Context, QualType T, - bool &IsConst, bool &IsVolatile) { + bool &IsConst, bool &IsVolatile, + bool &IsUnaligned) { T = Context.getExceptionObjectType(T); // C++14 [except.handle]p3: @@ -3641,10 +3650,12 @@ static QualType decomposeTypeForEH(ASTContext &Context, QualType T, // - a qualification conversion IsConst = false; IsVolatile = false; + IsUnaligned = false; QualType PointeeType = T->getPointeeType(); if (!PointeeType.isNull()) { IsConst = PointeeType.isConstQualified(); IsVolatile = PointeeType.isVolatileQualified(); + IsUnaligned = PointeeType.getQualifiers().hasUnaligned(); } // Member pointer types like "const int A::*" are represented by having RTTI @@ -3667,8 +3678,9 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type, // TypeDescriptors for exceptions never have qualified pointer types, // qualifiers are stored seperately in order to support qualification // conversions. - bool IsConst, IsVolatile; - Type = decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile); + bool IsConst, IsVolatile, IsUnaligned; + Type = + decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile, IsUnaligned); bool IsReference = CatchHandlerType->isReferenceType(); @@ -3677,6 +3689,8 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type, Flags |= 1; if (IsVolatile) Flags |= 2; + if (IsUnaligned) + Flags |= 4; if (IsReference) Flags |= 8; @@ -3961,7 +3975,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T, auto *GV = new llvm::GlobalVariable( CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(CTType, Fields), MangledName); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setSection(".xdata"); if (GV->isWeakForLinker()) GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName())); @@ -4079,7 +4093,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) { CTA = new llvm::GlobalVariable( CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(CTAType, Fields), MangledName); - CTA->setUnnamedAddr(true); + CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CTA->setSection(".xdata"); if (CTA->isWeakForLinker()) CTA->setComdat(CGM.getModule().getOrInsertComdat(CTA->getName())); @@ -4087,8 +4101,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) { } llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { - bool IsConst, IsVolatile; - T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile); + bool IsConst, IsVolatile, IsUnaligned; + T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile, IsUnaligned); // The CatchableTypeArray enumerates the various (CV-unqualified) types that // the exception object may be caught as. @@ -4104,8 +4118,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { SmallString<256> MangledName; { llvm::raw_svector_ostream Out(MangledName); - getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, NumEntries, - Out); + getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, IsUnaligned, + NumEntries, Out); } // Reuse a previously generated ThrowInfo if we have generated an appropriate @@ -4121,6 +4135,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { Flags |= 1; if (IsVolatile) Flags |= 2; + if (IsUnaligned) + Flags |= 4; // The cleanup-function (a destructor) must be called when the exception // object's lifetime ends. @@ -4146,7 +4162,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { auto *GV = new llvm::GlobalVariable( CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName)); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setSection(".xdata"); if (GV->isWeakForLinker()) GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName())); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp index 0be5c55..952d162 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp @@ -25,7 +25,9 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include <memory> + using namespace clang; +using namespace CodeGen; namespace { class CodeGeneratorImpl : public CodeGenerator { @@ -36,13 +38,21 @@ namespace { const CodeGenOptions CodeGenOpts; // Intentionally copied in. unsigned HandlingTopLevelDecls; + + /// Use this when emitting decls to block re-entrant decl emission. It will + /// emit all deferred decls on scope exit. Set EmitDeferred to false if decl + /// emission must be deferred longer, like at the end of a tag definition. struct HandlingTopLevelDeclRAII { CodeGeneratorImpl &Self; - HandlingTopLevelDeclRAII(CodeGeneratorImpl &Self) : Self(Self) { + bool EmitDeferred; + HandlingTopLevelDeclRAII(CodeGeneratorImpl &Self, + bool EmitDeferred = true) + : Self(Self), EmitDeferred(EmitDeferred) { ++Self.HandlingTopLevelDecls; } ~HandlingTopLevelDeclRAII() { - if (--Self.HandlingTopLevelDecls == 0) + unsigned Level = --Self.HandlingTopLevelDecls; + if (Level == 0 && EmitDeferred) Self.EmitDeferredDecls(); } }; @@ -57,15 +67,16 @@ namespace { SmallVector<CXXMethodDecl *, 8> DeferredInlineMethodDefinitions; public: - CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string &ModuleName, + CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName, const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO, const CodeGenOptions &CGO, llvm::LLVMContext &C, CoverageSourceInfo *CoverageInfo = nullptr) : Diags(diags), Ctx(nullptr), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0), - CoverageInfo(CoverageInfo), - M(new llvm::Module(ModuleName, C)) {} + CoverageInfo(CoverageInfo), M(new llvm::Module(ModuleName, C)) { + C.setDiscardValueNames(CGO.DiscardValueNames); + } ~CodeGeneratorImpl() override { // There should normally not be any leftover inline method definitions. @@ -73,11 +84,19 @@ namespace { Diags.hasErrorOccurred()); } - llvm::Module* GetModule() override { + CodeGenModule &CGM() { + return *Builder; + } + + llvm::Module *GetModule() { return M.get(); } - const Decl *GetDeclForMangledName(StringRef MangledName) override { + llvm::Module *ReleaseModule() { + return M.release(); + } + + const Decl *GetDeclForMangledName(StringRef MangledName) { GlobalDecl Result; if (!Builder->lookupRepresentativeDecl(MangledName, Result)) return nullptr; @@ -92,19 +111,23 @@ namespace { return D; } - llvm::Module *ReleaseModule() override { return M.release(); } + llvm::Constant *GetAddrOfGlobal(GlobalDecl global, bool isForDefinition) { + return Builder->GetAddrOfGlobal(global, isForDefinition); + } void Initialize(ASTContext &Context) override { Ctx = &Context; M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple()); - M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString()); + M->setDataLayout(Ctx->getTargetInfo().getDataLayout()); Builder.reset(new CodeGen::CodeGenModule(Context, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags, CoverageInfo)); - for (size_t i = 0, e = CodeGenOpts.DependentLibraries.size(); i < e; ++i) - HandleDependentLibrary(CodeGenOpts.DependentLibraries[i]); + for (auto &&Lib : CodeGenOpts.DependentLibraries) + Builder->AddDependentLib(Lib); + for (auto &&Opt : CodeGenOpts.LinkerOptions) + Builder->AppendLinkerOptions(Opt); } void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override { @@ -140,12 +163,23 @@ namespace { DeferredInlineMethodDefinitions.clear(); } - void HandleInlineMethodDefinition(CXXMethodDecl *D) override { + void HandleInlineFunctionDefinition(FunctionDecl *D) override { if (Diags.hasErrorOccurred()) return; assert(D->doesThisDeclarationHaveABody()); + // Handle friend functions. + if (D->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) { + if (Ctx->getTargetInfo().getCXXABI().isMicrosoft() + && !D->getLexicalDeclContext()->isDependentContext()) + Builder->EmitTopLevelDecl(D); + return; + } + + // Otherwise, must be a method. + auto MD = cast<CXXMethodDecl>(D); + // We may want to emit this definition. However, that decision might be // based on computing the linkage, and we have to defer that in case we // are inside of something that will change the method's final linkage, @@ -154,13 +188,13 @@ namespace { // void bar(); // void foo() { bar(); } // } A; - DeferredInlineMethodDefinitions.push_back(D); + DeferredInlineMethodDefinitions.push_back(MD); // Provide some coverage mapping even for methods that aren't emitted. // Don't do this for templated classes though, as they may not be // instantiable. - if (!D->getParent()->getDescribedClassTemplate()) - Builder->AddDeferredUnusedCoverageMapping(D); + if (!MD->getParent()->getDescribedClassTemplate()) + Builder->AddDeferredUnusedCoverageMapping(MD); } /// HandleTagDeclDefinition - This callback is invoked each time a TagDecl @@ -171,6 +205,10 @@ namespace { if (Diags.hasErrorOccurred()) return; + // Don't allow re-entrant calls to CodeGen triggered by PCH + // deserialization to emit deferred decls. + HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false); + Builder->UpdateCompletedType(D); // For MSVC compatibility, treat declarations of static data members with @@ -185,27 +223,50 @@ namespace { } } } + // For OpenMP emit declare reduction functions, if required. + if (Ctx->getLangOpts().OpenMP) { + for (Decl *Member : D->decls()) { + if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Member)) { + if (Ctx->DeclMustBeEmitted(DRD)) + Builder->EmitGlobal(DRD); + } + } + } } void HandleTagDeclRequiredDefinition(const TagDecl *D) override { if (Diags.hasErrorOccurred()) return; + // Don't allow re-entrant calls to CodeGen triggered by PCH + // deserialization to emit deferred decls. + HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false); + if (CodeGen::CGDebugInfo *DI = Builder->getModuleDebugInfo()) if (const RecordDecl *RD = dyn_cast<RecordDecl>(D)) DI->completeRequiredType(RD); } void HandleTranslationUnit(ASTContext &Ctx) override { + // Release the Builder when there is no error. + if (!Diags.hasErrorOccurred() && Builder) + Builder->Release(); + + // If there are errors before or when releasing the Builder, reset + // the module to stop here before invoking the backend. if (Diags.hasErrorOccurred()) { if (Builder) Builder->clear(); M.reset(); return; } + } - if (Builder) - Builder->Release(); + void AssignInheritanceModel(CXXRecordDecl *RD) override { + if (Diags.hasErrorOccurred()) + return; + + Builder->RefreshTypeCacheForClass(RD); } void CompleteTentativeDefinition(VarDecl *D) override { @@ -221,26 +282,35 @@ namespace { Builder->EmitVTable(RD); } + }; +} - void HandleLinkerOptionPragma(llvm::StringRef Opts) override { - Builder->AppendLinkerOptions(Opts); - } +void CodeGenerator::anchor() { } - void HandleDetectMismatch(llvm::StringRef Name, - llvm::StringRef Value) override { - Builder->AddDetectMismatch(Name, Value); - } +CodeGenModule &CodeGenerator::CGM() { + return static_cast<CodeGeneratorImpl*>(this)->CGM(); +} - void HandleDependentLibrary(llvm::StringRef Lib) override { - Builder->AddDependentLib(Lib); - } - }; +llvm::Module *CodeGenerator::GetModule() { + return static_cast<CodeGeneratorImpl*>(this)->GetModule(); } -void CodeGenerator::anchor() { } +llvm::Module *CodeGenerator::ReleaseModule() { + return static_cast<CodeGeneratorImpl*>(this)->ReleaseModule(); +} + +const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) { + return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name); +} + +llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global, + bool isForDefinition) { + return static_cast<CodeGeneratorImpl*>(this) + ->GetAddrOfGlobal(global, isForDefinition); +} CodeGenerator *clang::CreateLLVMCodeGen( - DiagnosticsEngine &Diags, const std::string &ModuleName, + DiagnosticsEngine &Diags, llvm::StringRef ModuleName, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PreprocessorOpts, const CodeGenOptions &CGO, llvm::LLVMContext &C, CoverageSourceInfo *CoverageInfo) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index f385e53..de40e41 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -19,8 +19,8 @@ #include "clang/CodeGen/BackendUtil.h" #include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/CompilerInstance.h" -#include "clang/Lex/Preprocessor.h" #include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/Preprocessor.h" #include "clang/Serialization/ASTWriter.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitstreamReader.h" @@ -31,8 +31,10 @@ #include "llvm/IR/Module.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Path.h" #include "llvm/Support/TargetRegistry.h" #include <memory> +#include <utility> using namespace clang; @@ -42,6 +44,7 @@ namespace { class PCHContainerGenerator : public ASTConsumer { DiagnosticsEngine &Diags; const std::string MainFileName; + const std::string OutputFileName; ASTContext *Ctx; ModuleMap &MMap; const HeaderSearchOptions &HeaderSearchOpts; @@ -52,17 +55,15 @@ class PCHContainerGenerator : public ASTConsumer { std::unique_ptr<llvm::LLVMContext> VMContext; std::unique_ptr<llvm::Module> M; std::unique_ptr<CodeGen::CodeGenModule> Builder; - raw_pwrite_stream *OS; + std::unique_ptr<raw_pwrite_stream> OS; std::shared_ptr<PCHBuffer> Buffer; /// Visit every type and emit debug info for it. struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> { clang::CodeGen::CGDebugInfo &DI; ASTContext &Ctx; - bool SkipTagDecls; - DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx, - bool SkipTagDecls) - : DI(DI), Ctx(Ctx), SkipTagDecls(SkipTagDecls) {} + DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx) + : DI(DI), Ctx(Ctx) {} /// Determine whether this type can be represented in DWARF. static bool CanRepresent(const Type *Ty) { @@ -80,7 +81,8 @@ class PCHContainerGenerator : public ASTConsumer { // TagDecls may be deferred until after all decls have been merged and we // know the complete type. Pure forward declarations will be skipped, but // they don't need to be emitted into the module anyway. - if (SkipTagDecls && isa<TagDecl>(D)) + if (auto *TD = dyn_cast<TagDecl>(D)) + if (!TD->isCompleteDefinition()) return true; QualType QualTy = Ctx.getTypeDeclType(D); @@ -103,7 +105,7 @@ class PCHContainerGenerator : public ASTConsumer { return true; SmallVector<QualType, 16> ArgTypes; - for (auto i : D->params()) + for (auto i : D->parameters()) ArgTypes.push_back(i->getType()); QualType RetTy = D->getReturnType(); QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes, @@ -122,7 +124,7 @@ class PCHContainerGenerator : public ASTConsumer { ArgTypes.push_back(D->getSelfType(Ctx, D->getClassInterface(), selfIsPseudoStrong, selfIsConsumed)); ArgTypes.push_back(Ctx.getObjCSelType()); - for (auto i : D->params()) + for (auto i : D->parameters()) ArgTypes.push_back(i->getType()); QualType RetTy = D->getReturnType(); QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes, @@ -136,20 +138,22 @@ class PCHContainerGenerator : public ASTConsumer { public: PCHContainerGenerator(CompilerInstance &CI, const std::string &MainFileName, const std::string &OutputFileName, - raw_pwrite_stream *OS, + std::unique_ptr<raw_pwrite_stream> OS, std::shared_ptr<PCHBuffer> Buffer) - : Diags(CI.getDiagnostics()), Ctx(nullptr), + : Diags(CI.getDiagnostics()), MainFileName(MainFileName), + OutputFileName(OutputFileName), Ctx(nullptr), MMap(CI.getPreprocessor().getHeaderSearchInfo().getModuleMap()), HeaderSearchOpts(CI.getHeaderSearchOpts()), PreprocessorOpts(CI.getPreprocessorOpts()), - TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()), OS(OS), - Buffer(Buffer) { + TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()), + OS(std::move(OS)), Buffer(std::move(Buffer)) { // The debug info output isn't affected by CodeModel and // ThreadModel, but the backend expects them to be nonempty. CodeGenOpts.CodeModel = "default"; CodeGenOpts.ThreadModel = "single"; CodeGenOpts.DebugTypeExtRefs = true; - CodeGenOpts.setDebugInfo(CodeGenOptions::FullDebugInfo); + CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); + CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); } ~PCHContainerGenerator() override = default; @@ -160,10 +164,15 @@ public: Ctx = &Context; VMContext.reset(new llvm::LLVMContext()); M.reset(new llvm::Module(MainFileName, *VMContext)); - M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString()); + M->setDataLayout(Ctx->getTargetInfo().getDataLayout()); Builder.reset(new CodeGen::CodeGenModule( *Ctx, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags)); - Builder->getModuleDebugInfo()->setModuleMap(MMap); + + // Prepare CGDebugInfo to emit debug info for a clang module. + auto *DI = Builder->getModuleDebugInfo(); + StringRef ModuleName = llvm::sys::path::filename(MainFileName); + DI->setPCHDescriptor({ModuleName, "", OutputFileName, ~1ULL}); + DI->setModuleMap(MMap); } bool HandleTopLevelDecl(DeclGroupRef D) override { @@ -173,7 +182,7 @@ public: // Collect debug info for all decls in this group. for (auto *I : D) if (!I->isFromASTFile()) { - DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, true); + DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx); DTV.TraverseDecl(I); } return true; @@ -190,7 +199,20 @@ public: if (D->isFromASTFile()) return; - DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, false); + // Anonymous tag decls are deferred until we are building their declcontext. + if (D->getName().empty()) + return; + + // Defer tag decls until their declcontext is complete. + auto *DeclCtx = D->getDeclContext(); + while (DeclCtx) { + if (auto *D = dyn_cast<TagDecl>(DeclCtx)) + if (!D->isCompleteDefinition()) + return; + DeclCtx = DeclCtx->getParent(); + } + + DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx); DTV.TraverseDecl(D); Builder->UpdateCompletedType(D); } @@ -215,8 +237,12 @@ public: return; M->setTargetTriple(Ctx.getTargetInfo().getTriple().getTriple()); - M->setDataLayout(Ctx.getTargetInfo().getDataLayoutString()); - Builder->getModuleDebugInfo()->setDwoId(Buffer->Signature); + M->setDataLayout(Ctx.getTargetInfo().getDataLayout()); + + // PCH files don't have a signature field in the control block, + // but LLVM detects DWO CUs by looking for a non-zero DWO id. + uint64_t Signature = Buffer->Signature ? Buffer->Signature : ~1ULL; + Builder->getModuleDebugInfo()->setDwoId(Signature); // Finalize the Builder. if (Builder) @@ -255,20 +281,18 @@ public: DEBUG({ // Print the IR for the PCH container to the debug output. llvm::SmallString<0> Buffer; - llvm::raw_svector_ostream OS(Buffer); - clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts, - Ctx.getTargetInfo().getDataLayoutString(), - M.get(), BackendAction::Backend_EmitLL, &OS); + clang::EmitBackendOutput( + Diags, CodeGenOpts, TargetOpts, LangOpts, + Ctx.getTargetInfo().getDataLayout(), M.get(), + BackendAction::Backend_EmitLL, + llvm::make_unique<llvm::raw_svector_ostream>(Buffer)); llvm::dbgs() << Buffer; }); // Use the LLVM backend to emit the pch container. clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts, - Ctx.getTargetInfo().getDataLayoutString(), - M.get(), BackendAction::Backend_EmitObj, OS); - - // Make sure the pch container hits disk. - OS->flush(); + Ctx.getTargetInfo().getDataLayout(), M.get(), + BackendAction::Backend_EmitObj, std::move(OS)); // Free the memory for the temporary buffer. llvm::SmallVector<char, 0> Empty; @@ -281,10 +305,11 @@ public: std::unique_ptr<ASTConsumer> ObjectFilePCHContainerWriter::CreatePCHContainerGenerator( CompilerInstance &CI, const std::string &MainFileName, - const std::string &OutputFileName, llvm::raw_pwrite_stream *OS, + const std::string &OutputFileName, + std::unique_ptr<llvm::raw_pwrite_stream> OS, std::shared_ptr<PCHBuffer> Buffer) const { - return llvm::make_unique<PCHContainerGenerator>(CI, MainFileName, - OutputFileName, OS, Buffer); + return llvm::make_unique<PCHContainerGenerator>( + CI, MainFileName, OutputFileName, std::move(OS), Buffer); } void ObjectFilePCHContainerReader::ExtractPCH( diff --git a/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp new file mode 100644 index 0000000..6c20f8c --- /dev/null +++ b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -0,0 +1,830 @@ +//===--- SwiftCallingConv.cpp - Lowering for the Swift calling convention -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the abstract lowering for the Swift calling convention. +// +//===----------------------------------------------------------------------===// + +#include "clang/CodeGen/SwiftCallingConv.h" +#include "clang/Basic/TargetInfo.h" +#include "CodeGenModule.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace CodeGen; +using namespace swiftcall; + +static const SwiftABIInfo &getSwiftABIInfo(CodeGenModule &CGM) { + return cast<SwiftABIInfo>(CGM.getTargetCodeGenInfo().getABIInfo()); +} + +static bool isPowerOf2(unsigned n) { + return n == (n & -n); +} + +/// Given two types with the same size, try to find a common type. +static llvm::Type *getCommonType(llvm::Type *first, llvm::Type *second) { + assert(first != second); + + // Allow pointers to merge with integers, but prefer the integer type. + if (first->isIntegerTy()) { + if (second->isPointerTy()) return first; + } else if (first->isPointerTy()) { + if (second->isIntegerTy()) return second; + if (second->isPointerTy()) return first; + + // Allow two vectors to be merged (given that they have the same size). + // This assumes that we never have two different vector register sets. + } else if (auto firstVecTy = dyn_cast<llvm::VectorType>(first)) { + if (auto secondVecTy = dyn_cast<llvm::VectorType>(second)) { + if (auto commonTy = getCommonType(firstVecTy->getElementType(), + secondVecTy->getElementType())) { + return (commonTy == firstVecTy->getElementType() ? first : second); + } + } + } + + return nullptr; +} + +static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) { + return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type)); +} + +void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) { + // Deal with various aggregate types as special cases: + + // Record types. + if (auto recType = type->getAs<RecordType>()) { + addTypedData(recType->getDecl(), begin); + + // Array types. + } else if (type->isArrayType()) { + // Incomplete array types (flexible array members?) don't provide + // data to lay out, and the other cases shouldn't be possible. + auto arrayType = CGM.getContext().getAsConstantArrayType(type); + if (!arrayType) return; + + QualType eltType = arrayType->getElementType(); + auto eltSize = CGM.getContext().getTypeSizeInChars(eltType); + for (uint64_t i = 0, e = arrayType->getSize().getZExtValue(); i != e; ++i) { + addTypedData(eltType, begin + i * eltSize); + } + + // Complex types. + } else if (auto complexType = type->getAs<ComplexType>()) { + auto eltType = complexType->getElementType(); + auto eltSize = CGM.getContext().getTypeSizeInChars(eltType); + auto eltLLVMType = CGM.getTypes().ConvertType(eltType); + addTypedData(eltLLVMType, begin, begin + eltSize); + addTypedData(eltLLVMType, begin + eltSize, begin + 2 * eltSize); + + // Member pointer types. + } else if (type->getAs<MemberPointerType>()) { + // Just add it all as opaque. + addOpaqueData(begin, begin + CGM.getContext().getTypeSizeInChars(type)); + + // Everything else is scalar and should not convert as an LLVM aggregate. + } else { + // We intentionally convert as !ForMem because we want to preserve + // that a type was an i1. + auto llvmType = CGM.getTypes().ConvertType(type); + addTypedData(llvmType, begin); + } +} + +void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin) { + addTypedData(record, begin, CGM.getContext().getASTRecordLayout(record)); +} + +void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin, + const ASTRecordLayout &layout) { + // Unions are a special case. + if (record->isUnion()) { + for (auto field : record->fields()) { + if (field->isBitField()) { + addBitFieldData(field, begin, 0); + } else { + addTypedData(field->getType(), begin); + } + } + return; + } + + // Note that correctness does not rely on us adding things in + // their actual order of layout; it's just somewhat more efficient + // for the builder. + + // With that in mind, add "early" C++ data. + auto cxxRecord = dyn_cast<CXXRecordDecl>(record); + if (cxxRecord) { + // - a v-table pointer, if the class adds its own + if (layout.hasOwnVFPtr()) { + addTypedData(CGM.Int8PtrTy, begin); + } + + // - non-virtual bases + for (auto &baseSpecifier : cxxRecord->bases()) { + if (baseSpecifier.isVirtual()) continue; + + auto baseRecord = baseSpecifier.getType()->getAsCXXRecordDecl(); + addTypedData(baseRecord, begin + layout.getBaseClassOffset(baseRecord)); + } + + // - a vbptr if the class adds its own + if (layout.hasOwnVBPtr()) { + addTypedData(CGM.Int8PtrTy, begin + layout.getVBPtrOffset()); + } + } + + // Add fields. + for (auto field : record->fields()) { + auto fieldOffsetInBits = layout.getFieldOffset(field->getFieldIndex()); + if (field->isBitField()) { + addBitFieldData(field, begin, fieldOffsetInBits); + } else { + addTypedData(field->getType(), + begin + CGM.getContext().toCharUnitsFromBits(fieldOffsetInBits)); + } + } + + // Add "late" C++ data: + if (cxxRecord) { + // - virtual bases + for (auto &vbaseSpecifier : cxxRecord->vbases()) { + auto baseRecord = vbaseSpecifier.getType()->getAsCXXRecordDecl(); + addTypedData(baseRecord, begin + layout.getVBaseClassOffset(baseRecord)); + } + } +} + +void SwiftAggLowering::addBitFieldData(const FieldDecl *bitfield, + CharUnits recordBegin, + uint64_t bitfieldBitBegin) { + assert(bitfield->isBitField()); + auto &ctx = CGM.getContext(); + auto width = bitfield->getBitWidthValue(ctx); + + // We can ignore zero-width bit-fields. + if (width == 0) return; + + // toCharUnitsFromBits rounds down. + CharUnits bitfieldByteBegin = ctx.toCharUnitsFromBits(bitfieldBitBegin); + + // Find the offset of the last byte that is partially occupied by the + // bit-field; since we otherwise expect exclusive ends, the end is the + // next byte. + uint64_t bitfieldBitLast = bitfieldBitBegin + width - 1; + CharUnits bitfieldByteEnd = + ctx.toCharUnitsFromBits(bitfieldBitLast) + CharUnits::One(); + addOpaqueData(recordBegin + bitfieldByteBegin, + recordBegin + bitfieldByteEnd); +} + +void SwiftAggLowering::addTypedData(llvm::Type *type, CharUnits begin) { + assert(type && "didn't provide type for typed data"); + addTypedData(type, begin, begin + getTypeStoreSize(CGM, type)); +} + +void SwiftAggLowering::addTypedData(llvm::Type *type, + CharUnits begin, CharUnits end) { + assert(type && "didn't provide type for typed data"); + assert(getTypeStoreSize(CGM, type) == end - begin); + + // Legalize vector types. + if (auto vecTy = dyn_cast<llvm::VectorType>(type)) { + SmallVector<llvm::Type*, 4> componentTys; + legalizeVectorType(CGM, end - begin, vecTy, componentTys); + assert(componentTys.size() >= 1); + + // Walk the initial components. + for (size_t i = 0, e = componentTys.size(); i != e - 1; ++i) { + llvm::Type *componentTy = componentTys[i]; + auto componentSize = getTypeStoreSize(CGM, componentTy); + assert(componentSize < end - begin); + addLegalTypedData(componentTy, begin, begin + componentSize); + begin += componentSize; + } + + return addLegalTypedData(componentTys.back(), begin, end); + } + + // Legalize integer types. + if (auto intTy = dyn_cast<llvm::IntegerType>(type)) { + if (!isLegalIntegerType(CGM, intTy)) + return addOpaqueData(begin, end); + } + + // All other types should be legal. + return addLegalTypedData(type, begin, end); +} + +void SwiftAggLowering::addLegalTypedData(llvm::Type *type, + CharUnits begin, CharUnits end) { + // Require the type to be naturally aligned. + if (!begin.isZero() && !begin.isMultipleOf(getNaturalAlignment(CGM, type))) { + + // Try splitting vector types. + if (auto vecTy = dyn_cast<llvm::VectorType>(type)) { + auto split = splitLegalVectorType(CGM, end - begin, vecTy); + auto eltTy = split.first; + auto numElts = split.second; + + auto eltSize = (end - begin) / numElts; + assert(eltSize == getTypeStoreSize(CGM, eltTy)); + for (size_t i = 0, e = numElts; i != e; ++i) { + addLegalTypedData(eltTy, begin, begin + eltSize); + begin += eltSize; + } + assert(begin == end); + return; + } + + return addOpaqueData(begin, end); + } + + addEntry(type, begin, end); +} + +void SwiftAggLowering::addEntry(llvm::Type *type, + CharUnits begin, CharUnits end) { + assert((!type || + (!isa<llvm::StructType>(type) && !isa<llvm::ArrayType>(type))) && + "cannot add aggregate-typed data"); + assert(!type || begin.isMultipleOf(getNaturalAlignment(CGM, type))); + + // Fast path: we can just add entries to the end. + if (Entries.empty() || Entries.back().End <= begin) { + Entries.push_back({begin, end, type}); + return; + } + + // Find the first existing entry that ends after the start of the new data. + // TODO: do a binary search if Entries is big enough for it to matter. + size_t index = Entries.size() - 1; + while (index != 0) { + if (Entries[index - 1].End <= begin) break; + --index; + } + + // The entry ends after the start of the new data. + // If the entry starts after the end of the new data, there's no conflict. + if (Entries[index].Begin >= end) { + // This insertion is potentially O(n), but the way we generally build + // these layouts makes that unlikely to matter: we'd need a union of + // several very large types. + Entries.insert(Entries.begin() + index, {begin, end, type}); + return; + } + + // Otherwise, the ranges overlap. The new range might also overlap + // with later ranges. +restartAfterSplit: + + // Simplest case: an exact overlap. + if (Entries[index].Begin == begin && Entries[index].End == end) { + // If the types match exactly, great. + if (Entries[index].Type == type) return; + + // If either type is opaque, make the entry opaque and return. + if (Entries[index].Type == nullptr) { + return; + } else if (type == nullptr) { + Entries[index].Type = nullptr; + return; + } + + // If they disagree in an ABI-agnostic way, just resolve the conflict + // arbitrarily. + if (auto entryType = getCommonType(Entries[index].Type, type)) { + Entries[index].Type = entryType; + return; + } + + // Otherwise, make the entry opaque. + Entries[index].Type = nullptr; + return; + } + + // Okay, we have an overlapping conflict of some sort. + + // If we have a vector type, split it. + if (auto vecTy = dyn_cast_or_null<llvm::VectorType>(type)) { + auto eltTy = vecTy->getElementType(); + CharUnits eltSize = (end - begin) / vecTy->getNumElements(); + assert(eltSize == getTypeStoreSize(CGM, eltTy)); + for (unsigned i = 0, e = vecTy->getNumElements(); i != e; ++i) { + addEntry(eltTy, begin, begin + eltSize); + begin += eltSize; + } + assert(begin == end); + return; + } + + // If the entry is a vector type, split it and try again. + if (Entries[index].Type && Entries[index].Type->isVectorTy()) { + splitVectorEntry(index); + goto restartAfterSplit; + } + + // Okay, we have no choice but to make the existing entry opaque. + + Entries[index].Type = nullptr; + + // Stretch the start of the entry to the beginning of the range. + if (begin < Entries[index].Begin) { + Entries[index].Begin = begin; + assert(index == 0 || begin >= Entries[index - 1].End); + } + + // Stretch the end of the entry to the end of the range; but if we run + // into the start of the next entry, just leave the range there and repeat. + while (end > Entries[index].End) { + assert(Entries[index].Type == nullptr); + + // If the range doesn't overlap the next entry, we're done. + if (index == Entries.size() - 1 || end <= Entries[index + 1].Begin) { + Entries[index].End = end; + break; + } + + // Otherwise, stretch to the start of the next entry. + Entries[index].End = Entries[index + 1].Begin; + + // Continue with the next entry. + index++; + + // This entry needs to be made opaque if it is not already. + if (Entries[index].Type == nullptr) + continue; + + // Split vector entries unless we completely subsume them. + if (Entries[index].Type->isVectorTy() && + end < Entries[index].End) { + splitVectorEntry(index); + } + + // Make the entry opaque. + Entries[index].Type = nullptr; + } +} + +/// Replace the entry of vector type at offset 'index' with a sequence +/// of its component vectors. +void SwiftAggLowering::splitVectorEntry(unsigned index) { + auto vecTy = cast<llvm::VectorType>(Entries[index].Type); + auto split = splitLegalVectorType(CGM, Entries[index].getWidth(), vecTy); + + auto eltTy = split.first; + CharUnits eltSize = getTypeStoreSize(CGM, eltTy); + auto numElts = split.second; + Entries.insert(&Entries[index + 1], numElts - 1, StorageEntry()); + + CharUnits begin = Entries[index].Begin; + for (unsigned i = 0; i != numElts; ++i) { + Entries[index].Type = eltTy; + Entries[index].Begin = begin; + Entries[index].End = begin + eltSize; + begin += eltSize; + } +} + +/// Given a power-of-two unit size, return the offset of the aligned unit +/// of that size which contains the given offset. +/// +/// In other words, round down to the nearest multiple of the unit size. +static CharUnits getOffsetAtStartOfUnit(CharUnits offset, CharUnits unitSize) { + assert(isPowerOf2(unitSize.getQuantity())); + auto unitMask = ~(unitSize.getQuantity() - 1); + return CharUnits::fromQuantity(offset.getQuantity() & unitMask); +} + +static bool areBytesInSameUnit(CharUnits first, CharUnits second, + CharUnits chunkSize) { + return getOffsetAtStartOfUnit(first, chunkSize) + == getOffsetAtStartOfUnit(second, chunkSize); +} + +void SwiftAggLowering::finish() { + if (Entries.empty()) { + Finished = true; + return; + } + + // We logically split the layout down into a series of chunks of this size, + // which is generally the size of a pointer. + const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM); + + // First pass: if two entries share a chunk, make them both opaque + // and stretch one to meet the next. + bool hasOpaqueEntries = (Entries[0].Type == nullptr); + for (size_t i = 1, e = Entries.size(); i != e; ++i) { + if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(), + Entries[i].Begin, chunkSize)) { + Entries[i - 1].Type = nullptr; + Entries[i].Type = nullptr; + Entries[i - 1].End = Entries[i].Begin; + hasOpaqueEntries = true; + + } else if (Entries[i].Type == nullptr) { + hasOpaqueEntries = true; + } + } + + // The rest of the algorithm leaves non-opaque entries alone, so if we + // have no opaque entries, we're done. + if (!hasOpaqueEntries) { + Finished = true; + return; + } + + // Okay, move the entries to a temporary and rebuild Entries. + auto orig = std::move(Entries); + assert(Entries.empty()); + + for (size_t i = 0, e = orig.size(); i != e; ++i) { + // Just copy over non-opaque entries. + if (orig[i].Type != nullptr) { + Entries.push_back(orig[i]); + continue; + } + + // Scan forward to determine the full extent of the next opaque range. + // We know from the first pass that only contiguous ranges will overlap + // the same aligned chunk. + auto begin = orig[i].Begin; + auto end = orig[i].End; + while (i + 1 != e && + orig[i + 1].Type == nullptr && + end == orig[i + 1].Begin) { + end = orig[i + 1].End; + i++; + } + + // Add an entry per intersected chunk. + do { + // Find the smallest aligned storage unit in the maximal aligned + // storage unit containing 'begin' that contains all the bytes in + // the intersection between the range and this chunk. + CharUnits localBegin = begin; + CharUnits chunkBegin = getOffsetAtStartOfUnit(localBegin, chunkSize); + CharUnits chunkEnd = chunkBegin + chunkSize; + CharUnits localEnd = std::min(end, chunkEnd); + + // Just do a simple loop over ever-increasing unit sizes. + CharUnits unitSize = CharUnits::One(); + CharUnits unitBegin, unitEnd; + for (; ; unitSize *= 2) { + assert(unitSize <= chunkSize); + unitBegin = getOffsetAtStartOfUnit(localBegin, unitSize); + unitEnd = unitBegin + unitSize; + if (unitEnd >= localEnd) break; + } + + // Add an entry for this unit. + auto entryTy = + llvm::IntegerType::get(CGM.getLLVMContext(), + CGM.getContext().toBits(unitSize)); + Entries.push_back({unitBegin, unitEnd, entryTy}); + + // The next chunk starts where this chunk left off. + begin = localEnd; + } while (begin != end); + } + + // Okay, finally finished. + Finished = true; +} + +void SwiftAggLowering::enumerateComponents(EnumerationCallback callback) const { + assert(Finished && "haven't yet finished lowering"); + + for (auto &entry : Entries) { + callback(entry.Begin, entry.Type); + } +} + +std::pair<llvm::StructType*, llvm::Type*> +SwiftAggLowering::getCoerceAndExpandTypes() const { + assert(Finished && "haven't yet finished lowering"); + + auto &ctx = CGM.getLLVMContext(); + + if (Entries.empty()) { + auto type = llvm::StructType::get(ctx); + return { type, type }; + } + + SmallVector<llvm::Type*, 8> elts; + CharUnits lastEnd = CharUnits::Zero(); + bool hasPadding = false; + bool packed = false; + for (auto &entry : Entries) { + if (entry.Begin != lastEnd) { + auto paddingSize = entry.Begin - lastEnd; + assert(!paddingSize.isNegative()); + + auto padding = llvm::ArrayType::get(llvm::Type::getInt8Ty(ctx), + paddingSize.getQuantity()); + elts.push_back(padding); + hasPadding = true; + } + + if (!packed && !entry.Begin.isMultipleOf( + CharUnits::fromQuantity( + CGM.getDataLayout().getABITypeAlignment(entry.Type)))) + packed = true; + + elts.push_back(entry.Type); + lastEnd = entry.End; + } + + // We don't need to adjust 'packed' to deal with possible tail padding + // because we never do that kind of access through the coercion type. + auto coercionType = llvm::StructType::get(ctx, elts, packed); + + llvm::Type *unpaddedType = coercionType; + if (hasPadding) { + elts.clear(); + for (auto &entry : Entries) { + elts.push_back(entry.Type); + } + if (elts.size() == 1) { + unpaddedType = elts[0]; + } else { + unpaddedType = llvm::StructType::get(ctx, elts, /*packed*/ false); + } + } else if (Entries.size() == 1) { + unpaddedType = Entries[0].Type; + } + + return { coercionType, unpaddedType }; +} + +bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const { + assert(Finished && "haven't yet finished lowering"); + + // Empty types don't need to be passed indirectly. + if (Entries.empty()) return false; + + CharUnits totalSize = Entries.back().End; + + // Avoid copying the array of types when there's just a single element. + if (Entries.size() == 1) { + return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize, + Entries.back().Type, + asReturnValue); + } + + SmallVector<llvm::Type*, 8> componentTys; + componentTys.reserve(Entries.size()); + for (auto &entry : Entries) { + componentTys.push_back(entry.Type); + } + return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize, + componentTys, + asReturnValue); +} + +CharUnits swiftcall::getMaximumVoluntaryIntegerSize(CodeGenModule &CGM) { + // Currently always the size of an ordinary pointer. + return CGM.getContext().toCharUnitsFromBits( + CGM.getContext().getTargetInfo().getPointerWidth(0)); +} + +CharUnits swiftcall::getNaturalAlignment(CodeGenModule &CGM, llvm::Type *type) { + // For Swift's purposes, this is always just the store size of the type + // rounded up to a power of 2. + auto size = (unsigned long long) getTypeStoreSize(CGM, type).getQuantity(); + if (!isPowerOf2(size)) { + size = 1ULL << (llvm::findLastSet(size, llvm::ZB_Undefined) + 1); + } + assert(size >= CGM.getDataLayout().getABITypeAlignment(type)); + return CharUnits::fromQuantity(size); +} + +bool swiftcall::isLegalIntegerType(CodeGenModule &CGM, + llvm::IntegerType *intTy) { + auto size = intTy->getBitWidth(); + switch (size) { + case 1: + case 8: + case 16: + case 32: + case 64: + // Just assume that the above are always legal. + return true; + + case 128: + return CGM.getContext().getTargetInfo().hasInt128Type(); + + default: + return false; + } +} + +bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize, + llvm::VectorType *vectorTy) { + return isLegalVectorType(CGM, vectorSize, vectorTy->getElementType(), + vectorTy->getNumElements()); +} + +bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize, + llvm::Type *eltTy, unsigned numElts) { + assert(numElts > 1 && "illegal vector length"); + return getSwiftABIInfo(CGM) + .isLegalVectorTypeForSwift(vectorSize, eltTy, numElts); +} + +std::pair<llvm::Type*, unsigned> +swiftcall::splitLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize, + llvm::VectorType *vectorTy) { + auto numElts = vectorTy->getNumElements(); + auto eltTy = vectorTy->getElementType(); + + // Try to split the vector type in half. + if (numElts >= 4 && isPowerOf2(numElts)) { + if (isLegalVectorType(CGM, vectorSize / 2, eltTy, numElts / 2)) + return {llvm::VectorType::get(eltTy, numElts / 2), 2}; + } + + return {eltTy, numElts}; +} + +void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize, + llvm::VectorType *origVectorTy, + llvm::SmallVectorImpl<llvm::Type*> &components) { + // If it's already a legal vector type, use it. + if (isLegalVectorType(CGM, origVectorSize, origVectorTy)) { + components.push_back(origVectorTy); + return; + } + + // Try to split the vector into legal subvectors. + auto numElts = origVectorTy->getNumElements(); + auto eltTy = origVectorTy->getElementType(); + assert(numElts != 1); + + // The largest size that we're still considering making subvectors of. + // Always a power of 2. + unsigned logCandidateNumElts = llvm::findLastSet(numElts, llvm::ZB_Undefined); + unsigned candidateNumElts = 1U << logCandidateNumElts; + assert(candidateNumElts <= numElts && candidateNumElts * 2 > numElts); + + // Minor optimization: don't check the legality of this exact size twice. + if (candidateNumElts == numElts) { + logCandidateNumElts--; + candidateNumElts >>= 1; + } + + CharUnits eltSize = (origVectorSize / numElts); + CharUnits candidateSize = eltSize * candidateNumElts; + + // The sensibility of this algorithm relies on the fact that we never + // have a legal non-power-of-2 vector size without having the power of 2 + // also be legal. + while (logCandidateNumElts > 0) { + assert(candidateNumElts == 1U << logCandidateNumElts); + assert(candidateNumElts <= numElts); + assert(candidateSize == eltSize * candidateNumElts); + + // Skip illegal vector sizes. + if (!isLegalVectorType(CGM, candidateSize, eltTy, candidateNumElts)) { + logCandidateNumElts--; + candidateNumElts /= 2; + candidateSize /= 2; + continue; + } + + // Add the right number of vectors of this size. + auto numVecs = numElts >> logCandidateNumElts; + components.append(numVecs, llvm::VectorType::get(eltTy, candidateNumElts)); + numElts -= (numVecs << logCandidateNumElts); + + if (numElts == 0) return; + + // It's possible that the number of elements remaining will be legal. + // This can happen with e.g. <7 x float> when <3 x float> is legal. + // This only needs to be separately checked if it's not a power of 2. + if (numElts > 2 && !isPowerOf2(numElts) && + isLegalVectorType(CGM, eltSize * numElts, eltTy, numElts)) { + components.push_back(llvm::VectorType::get(eltTy, numElts)); + return; + } + + // Bring vecSize down to something no larger than numElts. + do { + logCandidateNumElts--; + candidateNumElts /= 2; + candidateSize /= 2; + } while (candidateNumElts > numElts); + } + + // Otherwise, just append a bunch of individual elements. + components.append(numElts, eltTy); +} + +bool swiftcall::shouldPassCXXRecordIndirectly(CodeGenModule &CGM, + const CXXRecordDecl *record) { + // Following a recommendation from Richard Smith, pass a C++ type + // indirectly only if the destructor is non-trivial or *all* of the + // copy/move constructors are deleted or non-trivial. + + if (record->hasNonTrivialDestructor()) + return true; + + // It would be nice if this were summarized on the CXXRecordDecl. + for (auto ctor : record->ctors()) { + if (ctor->isCopyOrMoveConstructor() && !ctor->isDeleted() && + ctor->isTrivial()) { + return false; + } + } + + return true; +} + +static ABIArgInfo classifyExpandedType(SwiftAggLowering &lowering, + bool forReturn, + CharUnits alignmentForIndirect) { + if (lowering.empty()) { + return ABIArgInfo::getIgnore(); + } else if (lowering.shouldPassIndirectly(forReturn)) { + return ABIArgInfo::getIndirect(alignmentForIndirect, /*byval*/ false); + } else { + auto types = lowering.getCoerceAndExpandTypes(); + return ABIArgInfo::getCoerceAndExpand(types.first, types.second); + } +} + +static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type, + bool forReturn) { + if (auto recordType = dyn_cast<RecordType>(type)) { + auto record = recordType->getDecl(); + auto &layout = CGM.getContext().getASTRecordLayout(record); + + if (auto cxxRecord = dyn_cast<CXXRecordDecl>(record)) { + if (shouldPassCXXRecordIndirectly(CGM, cxxRecord)) + return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false); + } + + SwiftAggLowering lowering(CGM); + lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout); + lowering.finish(); + + return classifyExpandedType(lowering, forReturn, layout.getAlignment()); + } + + // Just assume that all of our target ABIs can support returning at least + // two integer or floating-point values. + if (isa<ComplexType>(type)) { + return (forReturn ? ABIArgInfo::getDirect() : ABIArgInfo::getExpand()); + } + + // Vector types may need to be legalized. + if (isa<VectorType>(type)) { + SwiftAggLowering lowering(CGM); + lowering.addTypedData(type, CharUnits::Zero()); + lowering.finish(); + + CharUnits alignment = CGM.getContext().getTypeAlignInChars(type); + return classifyExpandedType(lowering, forReturn, alignment); + } + + // Member pointer types need to be expanded, but it's a simple form of + // expansion that 'Direct' can handle. Note that CanBeFlattened should be + // true for this to work. + + // 'void' needs to be ignored. + if (type->isVoidType()) { + return ABIArgInfo::getIgnore(); + } + + // Everything else can be passed directly. + return ABIArgInfo::getDirect(); +} + +ABIArgInfo swiftcall::classifyReturnType(CodeGenModule &CGM, CanQualType type) { + return classifyType(CGM, type, /*forReturn*/ true); +} + +ABIArgInfo swiftcall::classifyArgumentType(CodeGenModule &CGM, + CanQualType type) { + return classifyType(CGM, type, /*forReturn*/ false); +} + +void swiftcall::computeABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { + auto &retInfo = FI.getReturnInfo(); + retInfo = classifyReturnType(CGM, FI.getReturnType()); + + for (unsigned i = 0, e = FI.arg_size(); i != e; ++i) { + auto &argInfo = FI.arg_begin()[i]; + argInfo.info = classifyArgumentType(CGM, argInfo.type); + } +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index 3d1ddef..bc03616 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -19,6 +19,7 @@ #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" @@ -68,6 +69,46 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, ABIInfo::~ABIInfo() {} +/// Does the given lowering require more than the given number of +/// registers when expanded? +/// +/// This is intended to be the basis of a reasonable basic implementation +/// of should{Pass,Return}IndirectlyForSwift. +/// +/// For most targets, a limit of four total registers is reasonable; this +/// limits the amount of code required in order to move around the value +/// in case it wasn't produced immediately prior to the call by the caller +/// (or wasn't produced in exactly the right registers) or isn't used +/// immediately within the callee. But some targets may need to further +/// limit the register count due to an inability to support that many +/// return registers. +static bool occupiesMoreThan(CodeGenTypes &cgt, + ArrayRef<llvm::Type*> scalarTypes, + unsigned maxAllRegisters) { + unsigned intCount = 0, fpCount = 0; + for (llvm::Type *type : scalarTypes) { + if (type->isPointerTy()) { + intCount++; + } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) { + auto ptrWidth = cgt.getTarget().getPointerWidth(0); + intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; + } else { + assert(type->isVectorTy() || type->isFloatingPointTy()); + fpCount++; + } + } + + return (intCount + fpCount > maxAllRegisters); +} + +bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + // The default implementation of this assumes that the target guarantees + // 128-bit SIMD support but nothing more. + return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16); +} + static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI) { const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()); @@ -117,6 +158,8 @@ const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } +bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); } + bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; } @@ -130,7 +173,7 @@ bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const { return false; } -void ABIArgInfo::dump() const { +LLVM_DUMP_METHOD void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; switch (TheKind) { @@ -158,6 +201,10 @@ void ABIArgInfo::dump() const { case Expand: OS << "Expand"; break; + case CoerceAndExpand: + OS << "CoerceAndExpand Type="; + getCoerceAndExpandType()->print(OS); + break; } OS << ")\n"; } @@ -217,7 +264,7 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, } // Advance the pointer past the argument, then store that back. - CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize); + CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); llvm::Value *NextPtr = CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize, "argp.next"); @@ -225,7 +272,8 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. - if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) { + if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() && + !DirectTy->isStructTy()) { Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); } @@ -324,6 +372,9 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, Opt += Lib; } +unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::C; +} static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it @@ -364,7 +415,7 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { const RecordType *RT = T->getAs<RecordType>(); if (!RT) - return 0; + return false; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; @@ -456,73 +507,55 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { return Found; } -static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { - // Treat complex types as the element type. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) - Ty = CTy->getElementType(); - - // Check for a type which we know has a simple scalar argument-passing - // convention without any padding. (We're specifically looking for 32 - // and 64-bit integer and integer-equivalents, float, and double.) - if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && - !Ty->isEnumeralType() && !Ty->isBlockPointerType()) - return false; - - uint64_t Size = Context.getTypeSize(Ty); - return Size == 32 || Size == 64; -} - -/// canExpandIndirectArgument - Test whether an argument type which is to be -/// passed indirectly (on the stack) would have the equivalent layout if it was -/// expanded into separate arguments. If so, we prefer to do the latter to avoid -/// inhibiting optimizations. -/// -// FIXME: This predicate is missing many cases, currently it just follows -// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We -// should probably make this smarter, or better yet make the LLVM backend -// capable of handling it. -static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) { - // We can only expand structure types. - const RecordType *RT = Ty->getAs<RecordType>(); - if (!RT) - return false; - - // We can only expand (C) structures. - // - // FIXME: This needs to be generalized to handle classes as well. - const RecordDecl *RD = RT->getDecl(); - if (!RD->isStruct()) - return false; - - // We try to expand CLike CXXRecordDecl. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!CXXRD->isCLike()) - return false; - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - if (!is32Or64BitBasicType(FD->getType(), Context)) - return false; +namespace { +Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, + const ABIArgInfo &AI) { + // This default implementation defers to the llvm backend's va_arg + // instruction. It can handle only passing arguments directly + // (typically only handled in the backend for primitive types), or + // aggregates passed indirectly by pointer (NOTE: if the "byval" + // flag has ABI impact in the callee, this implementation cannot + // work.) + + // Only a few cases are covered here at the moment -- those needed + // by the default abi. + llvm::Value *Val; + + if (AI.isIndirect()) { + assert(!AI.getPaddingType() && + "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); + assert( + !AI.getIndirectRealign() && + "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!"); + + auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty); + CharUnits TyAlignForABI = TyInfo.second; + + llvm::Type *BaseTy = + llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty)); + llvm::Value *Addr = + CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy); + return Address(Addr, TyAlignForABI); + } else { + assert((AI.isDirect() || AI.isExtend()) && + "Unexpected ArgInfo Kind in generic VAArg emitter!"); - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) - return false; + assert(!AI.getInReg() && + "Unexpected InReg seen in arginfo in generic VAArg emitter!"); + assert(!AI.getPaddingType() && + "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); + assert(!AI.getDirectOffset() && + "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!"); + assert(!AI.getCoerceToType() && + "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!"); - Size += Context.getTypeSize(FD->getType()); + Address Temp = CGF.CreateMemTemp(Ty, "varet"); + Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty)); + CGF.Builder.CreateStore(Val, Temp); + return Temp; } - - // Make sure there are not any holes in the struct. - if (Size != Context.getTypeSize(Ty)) - return false; - - return true; } -namespace { /// DefaultABIInfo - The default implementation for ABI specific /// details. This implementation provides information which results in /// self-consistent and sensible LLVM IR generation, but does not @@ -542,7 +575,9 @@ public: } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; + QualType Ty) const override { + return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); + } }; class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { @@ -551,11 +586,6 @@ public: : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} }; -Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - return Address::invalid(); -} - ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); @@ -607,13 +637,17 @@ private: ABIArgInfo classifyArgumentType(QualType Ty) const; // DefaultABIInfo's classifyReturnType and classifyArgumentType are - // non-virtual, but computeInfo is virtual, so we overload that. + // non-virtual, but computeInfo and EmitVAArg are virtual, so we + // overload them. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; }; class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { @@ -665,6 +699,14 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { return DefaultABIInfo::classifyReturnType(RetTy); } +Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), + /*AllowHigherAlign=*/ true); +} + //===----------------------------------------------------------------------===// // le32/PNaCl bitcode ABI Implementation // @@ -700,7 +742,13 @@ void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return Address::invalid(); + // The PNaCL ABI is a bit odd, in that varargs don't use normal + // function classification. Structs get passed directly for varargs + // functions, through a rewriting transform in + // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows + // this target to actually support a va_arg instructions with an + // aggregate type, unlike other targets. + return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); } /// \brief Classify argument of given type \p Ty. @@ -797,7 +845,7 @@ struct CCState { }; /// X86_32ABIInfo - The X86-32 ABI information. -class X86_32ABIInfo : public ABIInfo { +class X86_32ABIInfo : public SwiftABIInfo { enum Class { Integer, Float @@ -849,6 +897,8 @@ class X86_32ABIInfo : public ABIInfo { bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; + bool canExpandIndirectArgument(QualType Ty) const; + /// \brief Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; @@ -866,12 +916,22 @@ public: X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) - : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), + : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), DefaultNumRegisterParameters(NumRegisterParameters) {} + + bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + // LLVM's x86-32 lowering currently only assigns up to three + // integer registers and three fp registers. Oddly, it'll use up to + // four vector registers for vectors, but those can overlap with the + // scalar registers. + return occupiesMoreThan(CGT, scalars, /*total*/ 3); + } }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { @@ -920,6 +980,11 @@ public: ('T' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } + + StringRef getARCRetainAutoreleasedReturnValueMarker() const override { + return "movl\t%ebp, %ebp" + "\t\t## marker for objc_retainAutoreleaseReturnValue"; + } }; } @@ -1054,6 +1119,72 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, return true; } +static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { + // Treat complex types as the element type. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + // Check for a type which we know has a simple scalar argument-passing + // convention without any padding. (We're specifically looking for 32 + // and 64-bit integer and integer-equivalents, float, and double.) + if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && + !Ty->isEnumeralType() && !Ty->isBlockPointerType()) + return false; + + uint64_t Size = Context.getTypeSize(Ty); + return Size == 32 || Size == 64; +} + +/// Test whether an argument type which is to be passed indirectly (on the +/// stack) would have the equivalent layout if it was expanded into separate +/// arguments. If so, we prefer to do the latter to avoid inhibiting +/// optimizations. +bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { + // We can only expand structure types. + const RecordType *RT = Ty->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (!IsWin32StructABI ) { + // On non-Windows, we have to conservatively match our old bitcode + // prototypes in order to be ABI-compatible at the bitcode level. + if (!CXXRD->isCLike()) + return false; + } else { + // Don't do this for dynamic classes. + if (CXXRD->isDynamicClass()) + return false; + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : CXXRD->bases()) { + if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) + return false; + } + } + } + + uint64_t Size = 0; + + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), getContext())) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += getContext().getTypeSize(FD->getType()); + } + + // We can do this if there was no alignment padding. + return Size == getContext().getTypeSize(Ty); +} + ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -1114,6 +1245,10 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) return getIndirectReturnResult(RetTy, State); + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + // Small structures which are register sized are generally returned // in a register. if (shouldReturnTypeInRegister(RetTy, getContext())) { @@ -1266,6 +1401,12 @@ bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { + // On Windows, aggregates other than HFAs are never passed in registers, and + // they do not consume register slots. Homogenous floating-point aggregates + // (HFAs) have already been dealt with at this point. + if (IsWin32StructABI && isAggregateTypeForABI(Ty)) + return false; + NeedsPadding = false; InReg = !IsMCUABI; @@ -1339,23 +1480,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } if (isAggregateTypeForABI(Ty)) { - if (RT) { - // Structs are always byval on win32, regardless of what they contain. - if (IsWin32StructABI) - return getIndirectResult(Ty, true, State); + // Structures with flexible arrays are always indirect. + // FIXME: This should not be byval! + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, true, State); - // Structures with flexible arrays are always indirect. - if (RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectResult(Ty, true, State); - } - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) + // Ignore empty structs/unions on non-Windows. + if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - bool NeedsPadding, InReg; + bool NeedsPadding = false; + bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); @@ -1373,9 +1510,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). - if (getContext().getTypeSize(Ty) <= 4*32 && - canExpandIndirectArgument(Ty, getContext()) && - (!IsMCUABI || State.FreeRegs == 0)) + if (getContext().getTypeSize(Ty) <= 4 * 32 && + (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall, @@ -1474,7 +1610,7 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, // Insert padding bytes to respect alignment. CharUnits FieldEnd = StackOffset; - StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign); + StackOffset = FieldEnd.alignTo(FieldAlign); if (StackOffset != FieldEnd) { CharUnits NumBytes = StackOffset - FieldEnd; llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); @@ -1495,10 +1631,14 @@ static bool isArgInAlloca(const ABIArgInfo &Info) { return false; case ABIArgInfo::Direct: case ABIArgInfo::Extend: - case ABIArgInfo::Expand: if (Info.getInReg()) return false; return true; + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + // These are aggregate types which are never passed in registers when + // inalloca is involved. + return true; } llvm_unreachable("invalid enum"); } @@ -1609,6 +1749,10 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::AttributeSet::FunctionIndex, B)); } + if (FD->hasAttr<AnyX86InterruptAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->setCallingConv(llvm::CallingConv::X86_INTR); + } } } @@ -1675,7 +1819,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { } /// X86_64ABIInfo - The X86_64 ABI information. -class X86_64ABIInfo : public ABIInfo { +class X86_64ABIInfo : public SwiftABIInfo { enum Class { Integer = 0, SSE, @@ -1779,6 +1923,17 @@ class X86_64ABIInfo : public ABIInfo { return !getTarget().getTriple().isOSDarwin(); } + /// GCC classifies <1 x long long> as SSE but compatibility with older clang + // compilers require us to classify it as INTEGER. + bool classifyIntegerMMXAsSSE() const { + const llvm::Triple &Triple = getTarget().getTriple(); + if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4) + return false; + if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10) + return false; + return true; + } + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. @@ -1786,7 +1941,7 @@ class X86_64ABIInfo : public ABIInfo { public: X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : - ABIInfo(CGT), AVXLevel(AVXLevel), + SwiftABIInfo(CGT), AVXLevel(AVXLevel), Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) { } @@ -1813,6 +1968,12 @@ public: bool has64BitPointers() const { return Has64BitPointers; } + + bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } }; /// WinX86_64ABIInfo - The Windows X86_64 ABI information. @@ -1914,6 +2075,16 @@ public: ('T' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<AnyX86InterruptAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->setCallingConv(llvm::CallingConv::X86_INTR); + } + } + } }; class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo { @@ -2031,6 +2202,13 @@ void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D, CodeGen::CodeGenModule &CGM) const { TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<AnyX86InterruptAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->setCallingConv(llvm::CallingConv::X86_INTR); + } + } + addStackProbeSizeTargetAttribute(D, GV, CGM); } } @@ -2203,15 +2381,20 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, if (EB_Lo != EB_Hi) Hi = Lo; } else if (Size == 64) { + QualType ElementType = VT->getElementType(); + // gcc passes <1 x double> in memory. :( - if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) + if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) return; - // gcc passes <1 x long long> as INTEGER. - if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::LongLong) || - VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULongLong) || - VT->getElementType()->isSpecificBuiltinType(BuiltinType::Long) || - VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULong)) + // gcc passes <1 x long long> as SSE but clang used to unconditionally + // pass them as integer. For platforms where clang is the de facto + // platform compiler, we must continue to use integer. + if (!classifyIntegerMMXAsSSE() && + (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || + ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || + ElementType->isSpecificBuiltinType(BuiltinType::Long) || + ElementType->isSpecificBuiltinType(BuiltinType::ULong))) Current = Integer; else Current = SSE; @@ -2775,7 +2958,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, // the second element at offset 8. Check for this: unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); unsigned HiAlign = TD.getABITypeAlignment(Hi); - unsigned HiStart = llvm::RoundUpToAlignment(LoSize, HiAlign); + unsigned HiStart = llvm::alignTo(LoSize, HiAlign); assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); // To handle this, we have to increase the size of the low part so that the @@ -3473,13 +3656,15 @@ public: } +// TODO: this implementation is now likely redundant with +// DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { // TODO: Implement this. For now ignore. (void)CTy; - return Address::invalid(); + return Address::invalid(); // FIXME? } // struct __va_list_tag { @@ -3578,7 +3763,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, CharUnits Size; if (!isIndirect) { auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty); - Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign); + Size = TypeInfo.first.alignTo(OverflowAreaAlign); } else { Size = CGF.getPointerSize(); } @@ -3663,7 +3848,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, namespace { /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. -class PPC64_SVR4_ABIInfo : public DefaultABIInfo { +class PPC64_SVR4_ABIInfo : public ABIInfo { public: enum ABIKind { ELFv1 = 0, @@ -3674,6 +3859,7 @@ private: static const unsigned GPRBits = 64; ABIKind Kind; bool HasQPX; + bool IsSoftFloatABI; // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and // will be passed in a QPX register. @@ -3704,8 +3890,10 @@ private: } public: - PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX) - : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {} + PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX), + IsSoftFloatABI(SoftFloatABI) {} bool isPromotableTypeForABI(QualType Ty) const; CharUnits getParamTypeAlignment(QualType Ty) const; @@ -3753,8 +3941,10 @@ class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, - PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX) - : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {} + PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX, + SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -3945,8 +4135,19 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, // agree in both total size and mode (float vs. vector) are // treated as being equivalent here. const Type *TyPtr = Ty.getTypePtr(); - if (!Base) + if (!Base) { Base = TyPtr; + // If it's a non-power-of-2 vector, its size is already a power-of-2, + // so make sure to widen it explicitly. + if (const VectorType *VT = Base->getAs<VectorType>()) { + QualType EltTy = VT->getElementType(); + unsigned NumElements = + getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy); + Base = getContext() + .getVectorType(EltTy, NumElements, VT->getVectorKind()) + .getTypePtr(); + } + } if (Base->isVectorType() != TyPtr->isVectorType() || getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) @@ -3961,8 +4162,11 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) + BT->getKind() == BuiltinType::LongDouble) { + if (IsSoftFloatABI) + return false; return true; + } } if (const VectorType *VT = Ty->getAs<VectorType>()) { if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty)) @@ -4029,13 +4233,13 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { // Types up to 8 bytes are passed as integer type (which will be // properly aligned in the argument save area doubleword). if (Bits <= GPRBits) - CoerceTy = llvm::IntegerType::get(getVMContext(), - llvm::RoundUpToAlignment(Bits, 8)); + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); // Larger types are passed as arrays, with the base type selected // according to the required alignment in the save area. else { uint64_t RegBits = ABIAlign * 8; - uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits; + uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits; llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits); CoerceTy = llvm::ArrayType::get(RegTy, NumRegs); } @@ -4095,8 +4299,8 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr); } else - CoerceTy = llvm::IntegerType::get(getVMContext(), - llvm::RoundUpToAlignment(Bits, 8)); + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); return ABIArgInfo::getDirect(CoerceTy); } @@ -4220,7 +4424,7 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, namespace { -class AArch64ABIInfo : public ABIInfo { +class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, @@ -4231,7 +4435,8 @@ private: ABIKind Kind; public: - AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {} + AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) + : SwiftABIInfo(CGT), Kind(Kind) {} private: ABIKind getABIKind() const { return Kind; } @@ -4264,6 +4469,12 @@ private: return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } + + bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4289,6 +4500,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { // Handle illegal vector types here. if (isIllegalVectorType(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); + // Android promotes <2 x i8> to i16, not i32 + if (isAndroid() && (Size <= 16)) { + llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -4409,8 +4625,8 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); - // NumElements should be power of 2 between 1 and 16. - if ((NumElements & (NumElements - 1)) != 0 || NumElements > 16) + // NumElements should be power of 2. + if (!llvm::isPowerOf2_32(NumElements)) return true; return Size != 64 && (Size != 128 || NumElements == 1); } @@ -4489,7 +4705,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top reg_top_offset = CharUnits::fromQuantity(8); - RegSize = llvm::RoundUpToAlignment(RegSize, 8); + RegSize = llvm::alignTo(RegSize, 8); } else { // 4 is the field number of __vr_offs. reg_offs_p = @@ -4659,7 +4875,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, if (IsIndirect) StackSize = StackSlotSize; else - StackSize = TyInfo.first.RoundUpToAlignment(StackSlotSize); + StackSize = TyInfo.first.alignTo(StackSlotSize); llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); llvm::Value *NewStack = @@ -4699,7 +4915,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, // illegal vector types. Lower VAArg here for these cases and use // the LLVM va_arg instruction for everything else. if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) - return Address::invalid(); + return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); CharUnits SlotSize = CharUnits::fromQuantity(8); @@ -4733,7 +4949,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, namespace { -class ARMABIInfo : public ABIInfo { +class ARMABIInfo : public SwiftABIInfo { public: enum ABIKind { APCS = 0, @@ -4746,7 +4962,8 @@ private: ABIKind Kind; public: - ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) { + ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) + : SwiftABIInfo(CGT), Kind(_Kind) { setCCs(); } @@ -4757,6 +4974,8 @@ public: case llvm::Triple::EABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABI: + case llvm::Triple::MuslEABIHF: return true; default: return false; @@ -4767,17 +4986,13 @@ public: switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABIHF: return true; default: return false; } } - bool isAndroid() const { - return (getTarget().getTriple().getEnvironment() == - llvm::Triple::Android); - } - ABIKind getABIKind() const { return Kind; } private: @@ -4797,6 +5012,12 @@ private: llvm::CallingConv::ID getLLVMDefaultCC() const; llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); + + bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -4877,6 +5098,16 @@ public: void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( @@ -4906,7 +5137,7 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { /// Return the default calling convention that LLVM will use. llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { // The default calling convention that LLVM will infer. - if (isEABIHF() || getTarget().getTriple().isWatchOS()) + if (isEABIHF() || getTarget().getTriple().isWatchABI()) return llvm::CallingConv::ARM_AAPCS_VFP; else if (isEABI()) return llvm::CallingConv::ARM_AAPCS; @@ -4988,7 +5219,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // __fp16 gets passed as if it were an int or float, but with the top 16 bits // unspecified. This is not done for OpenCL as it handles the half type // natively, and does not need to interwork with AAPCS code. - if (Ty->isHalfType() && !getContext().getLangOpts().OpenCL) { + if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); @@ -5180,7 +5411,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, // __fp16 gets returned as if it were an int or float, but with the top 16 // bits unspecified. This is not done for OpenCL as it handles the half type // natively, and does not need to interwork with AAPCS code. - if (RetTy->isHalfType() && !getContext().getLangOpts().OpenCL) { + if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); @@ -5257,7 +5488,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); llvm::Type *CoerceTy = - llvm::ArrayType::get(Int32Ty, llvm::RoundUpToAlignment(Size, 32) / 32); + llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32); return ABIArgInfo::getDirect(CoerceTy); } @@ -5513,12 +5744,12 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, namespace { -class SystemZABIInfo : public ABIInfo { +class SystemZABIInfo : public SwiftABIInfo { bool HasVector; public: SystemZABIInfo(CodeGenTypes &CGT, bool HV) - : ABIInfo(CGT), HasVector(HV) {} + : SwiftABIInfo(CGT), HasVector(HV) {} bool isPromotableIntegerType(QualType Ty) const; bool isCompoundType(QualType Ty) const; @@ -5538,6 +5769,12 @@ public: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; + + bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } }; class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { @@ -6067,8 +6304,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), (uint64_t)StackAlignInBytes); - unsigned CurrOffset = llvm::RoundUpToAlignment(Offset, Align); - Offset = CurrOffset + llvm::RoundUpToAlignment(TySize, Align * 8) / 8; + unsigned CurrOffset = llvm::alignTo(Offset, Align); + Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { // Ignore empty aggregates. @@ -6465,6 +6702,132 @@ Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } //===----------------------------------------------------------------------===// +// Lanai ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class LanaiABIInfo : public DefaultABIInfo { +public: + LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + bool shouldUseInReg(QualType Ty, CCState &State) const; + + void computeInfo(CGFunctionInfo &FI) const override { + CCState State(FI.getCallingConvention()); + // Lanai uses 4 registers to pass arguments unless the function has the + // regparm attribute set. + if (FI.getHasRegParm()) { + State.FreeRegs = FI.getRegParm(); + } else { + State.FreeRegs = 4; + } + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, State); + } + + ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; + ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; +}; +} // end anonymous namespace + +bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const { + unsigned Size = getContext().getTypeSize(Ty); + unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U; + + if (SizeInRegs == 0) + return false; + + if (SizeInRegs > State.FreeRegs) { + State.FreeRegs = 0; + return false; + } + + State.FreeRegs -= SizeInRegs; + + return true; +} + +ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal, + CCState &State) const { + if (!ByVal) { + if (State.FreeRegs) { + --State.FreeRegs; // Non-byval indirects just use one pointer. + return getNaturalAlignIndirectInReg(Ty); + } + return getNaturalAlignIndirect(Ty, false); + } + + // Compute the byval alignment. + const unsigned MinABIStackAlignInBytes = 4; + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + /*Realign=*/TypeAlign > + MinABIStackAlignInBytes); +} + +ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, + CCState &State) const { + // Check with the C++ ABI first. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) { + return getIndirectResult(Ty, /*ByVal=*/false, State); + } else if (RAA == CGCXXABI::RAA_DirectInMemory) { + return getNaturalAlignIndirect(Ty, /*ByRef=*/true); + } + } + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, /*ByVal=*/true, State); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; + if (SizeInRegs <= State.FreeRegs) { + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + State.FreeRegs -= SizeInRegs; + return ABIArgInfo::getDirectInReg(Result); + } else { + State.FreeRegs = 0; + } + return getIndirectResult(Ty, true, State); + } + + // Treat an enum type as its underlying type. + if (const auto *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + bool InReg = shouldUseInReg(Ty, State); + if (Ty->isPromotableIntegerType()) { + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getExtend(); + } + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getDirect(); +} + +namespace { +class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { +public: + LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {} +}; +} + +//===----------------------------------------------------------------------===// // AMDGPU ABI Implementation //===----------------------------------------------------------------------===// @@ -6476,6 +6839,7 @@ public: : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; + unsigned getOpenCLKernelCallingConv() const override; }; } @@ -6504,6 +6868,53 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( } +unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::AMDGPU_KERNEL; +} + +//===----------------------------------------------------------------------===// +// SPARC v8 ABI Implementation. +// Based on the SPARC Compliance Definition version 2.4.1. +// +// Ensures that complex values are passed in registers. +// +namespace { +class SparcV8ABIInfo : public DefaultABIInfo { +public: + SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyReturnType(QualType RetTy) const; + void computeInfo(CGFunctionInfo &FI) const override; +}; +} // end anonymous namespace + + +ABIArgInfo +SparcV8ABIInfo::classifyReturnType(QualType Ty) const { + if (Ty->isAnyComplexType()) { + return ABIArgInfo::getDirect(); + } + else { + return DefaultABIInfo::classifyReturnType(Ty); + } +} + +void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const { + + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &Arg : FI.arguments()) + Arg.info = classifyArgumentType(Arg.type); +} + +namespace { +class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { +public: + SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {} +}; +} // end anonymous namespace + //===----------------------------------------------------------------------===// // SPARC v9 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. @@ -6569,7 +6980,7 @@ private: return; // Finish the current 64-bit word. - uint64_t Aligned = llvm::RoundUpToAlignment(Size, 64); + uint64_t Aligned = llvm::alignTo(Size, 64); if (Aligned > Size && Aligned <= ToSize) { Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size)); Size = Aligned; @@ -6686,7 +7097,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { CoerceBuilder CB(getVMContext(), getDataLayout()); CB.addStruct(0, StrTy); - CB.pad(llvm::RoundUpToAlignment(CB.DL.getTypeSizeInBits(StrTy), 64)); + CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64)); // Try to use the original type for coercion. llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType(); @@ -6716,6 +7127,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CharUnits Stride; switch (AI.getKind()) { case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); @@ -6728,7 +7140,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, case ABIArgInfo::Direct: { auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType()); - Stride = CharUnits::fromQuantity(AllocSize).RoundUpToAlignment(SlotSize); + Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize); ArgAddr = Addr; break; } @@ -6924,6 +7336,8 @@ public: } // End anonymous namespace. +// TODO: this implementation is likely now redundant with the default +// EmitVAArg. Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CGBuilderTy &Builder = CGF.Builder; @@ -6944,6 +7358,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CharUnits ArgSize = CharUnits::Zero(); switch (AI.getKind()) { case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Ignore: @@ -6955,7 +7370,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Val = Builder.CreateBitCast(AP, ArgPtrTy); ArgSize = CharUnits::fromQuantity( getDataLayout().getTypeAllocSize(AI.getCoerceToType())); - ArgSize = ArgSize.RoundUpToAlignment(SlotSize); + ArgSize = ArgSize.alignTo(SlotSize); break; case ABIArgInfo::Indirect: Val = Builder.CreateElementBitCast(AP, ArgPtrTy); @@ -7086,15 +7501,59 @@ void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, SmallStringEnc Enc; if (getTypeString(Enc, D, CGM, TSC)) { llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::SmallVector<llvm::Metadata *, 2> MDVals; - MDVals.push_back(llvm::ConstantAsMetadata::get(GV)); - MDVals.push_back(llvm::MDString::get(Ctx, Enc.str())); + llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV), + llvm::MDString::get(Ctx, Enc.str())}; llvm::NamedMDNode *MD = CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings"); MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } } +//===----------------------------------------------------------------------===// +// SPIR ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; + unsigned getOpenCLKernelCallingConv() const override; +}; +} // End anonymous namespace. + +/// Emit SPIR specific metadata: OpenCL and SPIR version. +void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const { + llvm::LLVMContext &Ctx = CGM.getModule().getContext(); + llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); + llvm::Module &M = CGM.getModule(); + // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the + // opencl.spir.version named metadata. + llvm::Metadata *SPIRVerElts[] = { + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 2)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 0))}; + llvm::NamedMDNode *SPIRVerMD = + M.getOrInsertNamedMetadata("opencl.spir.version"); + SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); + // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the + // opencl.ocl.version named metadata node. + llvm::Metadata *OCLVerElts[] = { + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))}; + llvm::NamedMDNode *OCLVerMD = + M.getOrInsertNamedMetadata("opencl.ocl.version"); + OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); +} + +unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::SPIR_KERNEL; +} + static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); @@ -7436,29 +7895,35 @@ const llvm::Triple &CodeGenModule::getTriple() const { } bool CodeGenModule::supportsCOMDAT() const { - return !getTriple().isOSBinFormatMachO(); + return getTriple().supportsCOMDAT(); } const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (TheTargetCodeGenInfo) return *TheTargetCodeGenInfo; + // Helper to set the unique_ptr while still keeping the return value. + auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & { + this->TheTargetCodeGenInfo.reset(P); + return *P; + }; + const llvm::Triple &Triple = getTarget().getTriple(); switch (Triple.getArch()) { default: - return *(TheTargetCodeGenInfo = new DefaultTargetCodeGenInfo(Types)); + return SetCGInfo(new DefaultTargetCodeGenInfo(Types)); case llvm::Triple::le32: - return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types)); + return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); case llvm::Triple::mips: case llvm::Triple::mipsel: if (Triple.getOS() == llvm::Triple::NaCl) - return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types)); - return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, true)); + return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); + return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true)); case llvm::Triple::mips64: case llvm::Triple::mips64el: - return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false)); + return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { @@ -7466,78 +7931,79 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; - return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind)); + return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } case llvm::Triple::wasm32: case llvm::Triple::wasm64: - return *(TheTargetCodeGenInfo = new WebAssemblyTargetCodeGenInfo(Types)); + return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types)); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - { - if (Triple.getOS() == llvm::Triple::Win32) { - TheTargetCodeGenInfo = - new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP); - return *TheTargetCodeGenInfo; - } - - ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; - StringRef ABIStr = getTarget().getABI(); - if (ABIStr == "apcs-gnu") - Kind = ARMABIInfo::APCS; - else if (ABIStr == "aapcs16") - Kind = ARMABIInfo::AAPCS16_VFP; - else if (CodeGenOpts.FloatABI == "hard" || - (CodeGenOpts.FloatABI != "soft" && - Triple.getEnvironment() == llvm::Triple::GNUEABIHF)) - Kind = ARMABIInfo::AAPCS_VFP; - - return *(TheTargetCodeGenInfo = new ARMTargetCodeGenInfo(Types, Kind)); + case llvm::Triple::thumbeb: { + if (Triple.getOS() == llvm::Triple::Win32) { + return SetCGInfo( + new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP)); } + ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; + StringRef ABIStr = getTarget().getABI(); + if (ABIStr == "apcs-gnu") + Kind = ARMABIInfo::APCS; + else if (ABIStr == "aapcs16") + Kind = ARMABIInfo::AAPCS16_VFP; + else if (CodeGenOpts.FloatABI == "hard" || + (CodeGenOpts.FloatABI != "soft" && + (Triple.getEnvironment() == llvm::Triple::GNUEABIHF || + Triple.getEnvironment() == llvm::Triple::MuslEABIHF || + Triple.getEnvironment() == llvm::Triple::EABIHF))) + Kind = ARMABIInfo::AAPCS_VFP; + + return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind)); + } + case llvm::Triple::ppc: - return *(TheTargetCodeGenInfo = - new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft")); + return SetCGInfo( + new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft")); case llvm::Triple::ppc64: if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; if (getTarget().getABI() == "elfv2") Kind = PPC64_SVR4_ABIInfo::ELFv2; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return *(TheTargetCodeGenInfo = - new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } else - return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types)); + return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2; if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx") Kind = PPC64_SVR4_ABIInfo::ELFv1; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return *(TheTargetCodeGenInfo = - new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - return *(TheTargetCodeGenInfo = new NVPTXTargetCodeGenInfo(Types)); + return SetCGInfo(new NVPTXTargetCodeGenInfo(Types)); case llvm::Triple::msp430: - return *(TheTargetCodeGenInfo = new MSP430TargetCodeGenInfo(Types)); + return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); case llvm::Triple::systemz: { bool HasVector = getTarget().getABI() == "vector"; - return *(TheTargetCodeGenInfo = new SystemZTargetCodeGenInfo(Types, - HasVector)); + return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector)); } case llvm::Triple::tce: - return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types)); + return SetCGInfo(new TCETargetCodeGenInfo(Types)); case llvm::Triple::x86: { bool IsDarwinVectorABI = Triple.isOSDarwin(); @@ -7546,44 +8012,49 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); if (Triple.getOS() == llvm::Triple::Win32) { - return *(TheTargetCodeGenInfo = new WinX86_32TargetCodeGenInfo( - Types, IsDarwinVectorABI, RetSmallStructInRegABI, - IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); + return SetCGInfo(new WinX86_32TargetCodeGenInfo( + Types, IsDarwinVectorABI, RetSmallStructInRegABI, + IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); } else { - return *(TheTargetCodeGenInfo = new X86_32TargetCodeGenInfo( - Types, IsDarwinVectorABI, RetSmallStructInRegABI, - IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, - CodeGenOpts.FloatABI == "soft")); + return SetCGInfo(new X86_32TargetCodeGenInfo( + Types, IsDarwinVectorABI, RetSmallStructInRegABI, + IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, + CodeGenOpts.FloatABI == "soft")); } } case llvm::Triple::x86_64: { StringRef ABI = getTarget().getABI(); - X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : - ABI == "avx" ? X86AVXABILevel::AVX : - X86AVXABILevel::None); + X86AVXABILevel AVXLevel = + (ABI == "avx512" + ? X86AVXABILevel::AVX512 + : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: - return *(TheTargetCodeGenInfo = - new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); + return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); case llvm::Triple::PS4: - return *(TheTargetCodeGenInfo = - new PS4TargetCodeGenInfo(Types, AVXLevel)); + return SetCGInfo(new PS4TargetCodeGenInfo(Types, AVXLevel)); default: - return *(TheTargetCodeGenInfo = - new X86_64TargetCodeGenInfo(Types, AVXLevel)); + return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel)); } } case llvm::Triple::hexagon: - return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types)); + return SetCGInfo(new HexagonTargetCodeGenInfo(Types)); + case llvm::Triple::lanai: + return SetCGInfo(new LanaiTargetCodeGenInfo(Types)); case llvm::Triple::r600: - return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); + return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::amdgcn: - return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); + return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); + case llvm::Triple::sparc: + return SetCGInfo(new SparcV8TargetCodeGenInfo(Types)); case llvm::Triple::sparcv9: - return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types)); + return SetCGInfo(new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: - return *(TheTargetCodeGenInfo = new XCoreTargetCodeGenInfo(Types)); + return SetCGInfo(new XCoreTargetCodeGenInfo(Types)); + case llvm::Triple::spir: + case llvm::Triple::spir64: + return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h index 87b4704..e463825 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h @@ -29,15 +29,14 @@ class Value; } namespace clang { -class ABIInfo; class Decl; namespace CodeGen { +class ABIInfo; class CallArgList; class CodeGenModule; class CodeGenFunction; class CGFunctionInfo; -} /// TargetCodeGenInfo - This class organizes various target-specific /// codegeneration issues, like target-specific attributes, builtins and so @@ -218,7 +217,12 @@ public: virtual void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const {} + + /// Get LLVM calling convention for OpenCL kernel. + virtual unsigned getOpenCLKernelCallingConv() const; }; + +} // namespace CodeGen } // namespace clang #endif // LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H |