diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen')
64 files changed, 10556 insertions, 4023 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h index ac31dfd..575506d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_ABIINFO_H #define LLVM_CLANG_LIB_CODEGEN_ABIINFO_H +#include "clang/AST/CharUnits.h" #include "clang/AST/Type.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Type.h" @@ -23,6 +24,7 @@ namespace llvm { namespace clang { class ASTContext; + class CodeGenOptions; class TargetInfo; namespace CodeGen { @@ -67,6 +69,7 @@ namespace swiftcall { llvm::LLVMContext &getVMContext() const; const llvm::DataLayout &getDataLayout() const; const TargetInfo &getTarget() const; + const CodeGenOptions &getCodeGenOpts() const; /// Return the calling convention to use for system runtime /// functions. @@ -148,7 +151,6 @@ namespace swiftcall { return info->supportsSwift(); } }; - } // end namespace CodeGen } // end namespace clang diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp index d2ce6ea..513896d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp @@ -35,7 +35,6 @@ #include "llvm/LTO/LTOBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -50,10 +49,12 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> using namespace clang; @@ -61,6 +62,9 @@ using namespace llvm; namespace { +// Default filename used for profile generation. +static constexpr StringLiteral DefaultProfileGenName = "default_%m.profraw"; + class EmitAssemblyHelper { DiagnosticsEngine &Diags; const HeaderSearchOptions &HSOpts; @@ -73,7 +77,6 @@ class EmitAssemblyHelper { std::unique_ptr<raw_pwrite_stream> OS; -private: TargetIRAnalysis getTargetIRAnalysis() const { if (TM) return TM->getTargetIRAnalysis(); @@ -81,9 +84,6 @@ private: return TargetIRAnalysis(); } - /// Set LLVM command line options passed through -backend-option. - void setCommandLineOpts(); - void CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM); /// Generates the TargetMachine. @@ -130,16 +130,20 @@ public: // that we add to the PassManagerBuilder. class PassManagerBuilderWrapper : public PassManagerBuilder { public: - PassManagerBuilderWrapper(const CodeGenOptions &CGOpts, + PassManagerBuilderWrapper(const Triple &TargetTriple, + const CodeGenOptions &CGOpts, const LangOptions &LangOpts) - : PassManagerBuilder(), CGOpts(CGOpts), LangOpts(LangOpts) {} + : PassManagerBuilder(), TargetTriple(TargetTriple), CGOpts(CGOpts), + LangOpts(LangOpts) {} + const Triple &getTargetTriple() const { return TargetTriple; } const CodeGenOptions &getCGOpts() const { return CGOpts; } const LangOptions &getLangOpts() const { return LangOpts; } + private: + const Triple &TargetTriple; const CodeGenOptions &CGOpts; const LangOptions &LangOpts; }; - } static void addObjCARCAPElimPass(const PassManagerBuilder &Builder, PassManagerBase &PM) { @@ -183,19 +187,42 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters; Opts.TracePC = CGOpts.SanitizeCoverageTracePC; Opts.TracePCGuard = CGOpts.SanitizeCoverageTracePCGuard; + Opts.NoPrune = CGOpts.SanitizeCoverageNoPrune; + Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters; PM.add(createSanitizerCoverageModulePass(Opts)); } +// Check if ASan should use GC-friendly instrumentation for globals. +// First of all, there is no point if -fdata-sections is off (expect for MachO, +// where this is not a factor). Also, on ELF this feature requires an assembler +// extension that only works with -integrated-as at the moment. +static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { + if (!CGOpts.SanitizeAddressGlobalsDeadStripping) + return false; + switch (T.getObjectFormat()) { + case Triple::MachO: + case Triple::COFF: + return true; + case Triple::ELF: + return CGOpts.DataSections && !CGOpts.DisableIntegratedAS; + default: + return false; + } +} + static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { const PassManagerBuilderWrapper &BuilderWrapper = static_cast<const PassManagerBuilderWrapper&>(Builder); + const Triple &T = BuilderWrapper.getTargetTriple(); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address); bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope; + bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); - PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover)); + PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, + UseGlobalsGC)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -262,7 +289,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, TLII->disableAllFunctions(); else { // Disable individual libc/libm calls in TargetLibraryInfo. - LibFunc::Func F; + LibFunc F; for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs()) if (TLII->getLibFunc(FuncName, F)) TLII->setUnavailable(F); @@ -292,6 +319,140 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts, MPM->add(createRewriteSymbolsPass(DL)); } +static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) { + switch (CodeGenOpts.OptimizationLevel) { + default: + llvm_unreachable("Invalid optimization level!"); + case 0: + return CodeGenOpt::None; + case 1: + return CodeGenOpt::Less; + case 2: + return CodeGenOpt::Default; // O2/Os/Oz + case 3: + return CodeGenOpt::Aggressive; + } +} + +static llvm::CodeModel::Model getCodeModel(const CodeGenOptions &CodeGenOpts) { + unsigned CodeModel = + llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) + .Case("small", llvm::CodeModel::Small) + .Case("kernel", llvm::CodeModel::Kernel) + .Case("medium", llvm::CodeModel::Medium) + .Case("large", llvm::CodeModel::Large) + .Case("default", llvm::CodeModel::Default) + .Default(~0u); + assert(CodeModel != ~0u && "invalid code model!"); + return static_cast<llvm::CodeModel::Model>(CodeModel); +} + +static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) { + // Keep this synced with the equivalent code in + // lib/Frontend/CompilerInvocation.cpp + llvm::Optional<llvm::Reloc::Model> RM; + RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel) + .Case("static", llvm::Reloc::Static) + .Case("pic", llvm::Reloc::PIC_) + .Case("ropi", llvm::Reloc::ROPI) + .Case("rwpi", llvm::Reloc::RWPI) + .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) + .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC); + assert(RM.hasValue() && "invalid PIC model!"); + return *RM; +} + +static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) { + if (Action == Backend_EmitObj) + return TargetMachine::CGFT_ObjectFile; + else if (Action == Backend_EmitMCNull) + return TargetMachine::CGFT_Null; + else { + assert(Action == Backend_EmitAssembly && "Invalid action!"); + return TargetMachine::CGFT_AssemblyFile; + } +} + +static void initTargetOptions(llvm::TargetOptions &Options, + const CodeGenOptions &CodeGenOpts, + const clang::TargetOptions &TargetOpts, + const LangOptions &LangOpts, + const HeaderSearchOptions &HSOpts) { + Options.ThreadModel = + llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel) + .Case("posix", llvm::ThreadModel::POSIX) + .Case("single", llvm::ThreadModel::Single); + + // Set float ABI type. + assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" || + CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) && + "Invalid Floating Point ABI!"); + Options.FloatABIType = + llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI) + .Case("soft", llvm::FloatABI::Soft) + .Case("softfp", llvm::FloatABI::Soft) + .Case("hard", llvm::FloatABI::Hard) + .Default(llvm::FloatABI::Default); + + // Set FP fusion mode. + switch (LangOpts.getDefaultFPContractMode()) { + case LangOptions::FPC_Off: + // Preserve any contraction performed by the front-end. (Strict performs + // splitting of the muladd instrinsic in the backend.) + Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; + break; + case LangOptions::FPC_On: + Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; + break; + case LangOptions::FPC_Fast: + Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; + break; + } + + Options.UseInitArray = CodeGenOpts.UseInitArray; + Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; + Options.CompressDebugSections = CodeGenOpts.getCompressDebugSections(); + Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; + + // Set EABI version. + Options.EABIVersion = TargetOpts.EABIVersion; + + if (LangOpts.SjLjExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::SjLj; + + Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; + Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; + Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; + Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; + Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; + Options.FunctionSections = CodeGenOpts.FunctionSections; + Options.DataSections = CodeGenOpts.DataSections; + Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; + Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; + Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); + + if (CodeGenOpts.EnableSplitDwarf) + Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; + Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; + Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; + Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; + Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; + Options.MCOptions.MCIncrementalLinkerCompatible = + CodeGenOpts.IncrementalLinkerCompatible; + Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; + Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; + Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; + Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; + Options.MCOptions.ABIName = TargetOpts.ABI; + for (const auto &Entry : HSOpts.UserEntries) + if (!Entry.IsFramework && + (Entry.Group == frontend::IncludeDirGroup::Quoted || + Entry.Group == frontend::IncludeDirGroup::Angled || + Entry.Group == frontend::IncludeDirGroup::System)) + Options.MCOptions.IASSearchPaths.push_back( + Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); +} + void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM) { // Handle disabling of all LLVM passes, where we want to preserve the @@ -299,8 +460,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (CodeGenOpts.DisableLLVMPasses) return; - PassManagerBuilderWrapper PMBuilder(CodeGenOpts, LangOpts); - // Figure out TargetLibraryInfo. This needs to be added to MPM and FPM // manually (and not via PMBuilder), since some passes (eg. InstrProfiling) // are inserted before PMBuilder ones - they'd get the default-constructed @@ -309,6 +468,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); + PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts); + // At O0 and O1 we only run the always inliner which is more efficient. At // higher optimization levels we run the normal inliner. if (CodeGenOpts.OptimizationLevel <= 1) { @@ -316,13 +477,17 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, !CodeGenOpts.DisableLifetimeMarkers); PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics); } else { + // We do not want to inline hot callsites for SamplePGO module-summary build + // because profile annotation will happen again in ThinLTO backend, and we + // want the IR of the hot path to match the profile. PMBuilder.Inliner = createFunctionInliningPass( - CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize); + CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize, + (!CodeGenOpts.SampleProfileFile.empty() && + CodeGenOpts.EmitSummaryIndex)); } PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; - PMBuilder.BBVectorize = CodeGenOpts.VectorizeBB; PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; @@ -334,16 +499,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, MPM.add(new TargetLibraryInfoWrapperPass(*TLII)); - // Add target-specific passes that need to run as early as possible. if (TM) - PMBuilder.addExtension( - PassManagerBuilder::EP_EarlyAsPossible, - [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { - TM->addEarlyAsPossiblePasses(PM); - }); + TM->adjustPassManager(PMBuilder); - PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, - addAddDiscriminatorsPass); + if (CodeGenOpts.DebugInfoForProfiling || + !CodeGenOpts.SampleProfileFile.empty()) + PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, + addAddDiscriminatorsPass); // In ObjC ARC mode, add the main ARC optimization passes. if (LangOpts.ObjCAutoRefCount) { @@ -454,7 +616,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (!CodeGenOpts.InstrProfileOutput.empty()) PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; else - PMBuilder.PGOInstrGen = "default_%m.profraw"; + PMBuilder.PGOInstrGen = DefaultProfileGenName; } if (CodeGenOpts.hasProfileIRUse()) PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; @@ -466,7 +628,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.populateModulePassManager(MPM); } -void EmitAssemblyHelper::setCommandLineOpts() { +static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { SmallVector<const char *, 16> BackendArgs; BackendArgs.push_back("clang"); // Fake program name. if (!CodeGenOpts.DebugPass.empty()) { @@ -495,126 +657,14 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { return; } - unsigned CodeModel = - llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) - .Case("small", llvm::CodeModel::Small) - .Case("kernel", llvm::CodeModel::Kernel) - .Case("medium", llvm::CodeModel::Medium) - .Case("large", llvm::CodeModel::Large) - .Case("default", llvm::CodeModel::Default) - .Default(~0u); - assert(CodeModel != ~0u && "invalid code model!"); - llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel); - + llvm::CodeModel::Model CM = getCodeModel(CodeGenOpts); std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); - - // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp. - llvm::Optional<llvm::Reloc::Model> RM; - RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel) - .Case("static", llvm::Reloc::Static) - .Case("pic", llvm::Reloc::PIC_) - .Case("ropi", llvm::Reloc::ROPI) - .Case("rwpi", llvm::Reloc::RWPI) - .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) - .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC); - assert(RM.hasValue() && "invalid PIC model!"); - - CodeGenOpt::Level OptLevel; - switch (CodeGenOpts.OptimizationLevel) { - default: - llvm_unreachable("Invalid optimization level!"); - case 0: - OptLevel = CodeGenOpt::None; - break; - case 1: - OptLevel = CodeGenOpt::Less; - break; - case 2: - OptLevel = CodeGenOpt::Default; - break; // O2/Os/Oz - case 3: - OptLevel = CodeGenOpt::Aggressive; - break; - } + llvm::Reloc::Model RM = getRelocModel(CodeGenOpts); + CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts); llvm::TargetOptions Options; - - Options.ThreadModel = - llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel) - .Case("posix", llvm::ThreadModel::POSIX) - .Case("single", llvm::ThreadModel::Single); - - // Set float ABI type. - assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" || - CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) && - "Invalid Floating Point ABI!"); - Options.FloatABIType = - llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI) - .Case("soft", llvm::FloatABI::Soft) - .Case("softfp", llvm::FloatABI::Soft) - .Case("hard", llvm::FloatABI::Hard) - .Default(llvm::FloatABI::Default); - - // Set FP fusion mode. - switch (CodeGenOpts.getFPContractMode()) { - case CodeGenOptions::FPC_Off: - Options.AllowFPOpFusion = llvm::FPOpFusion::Strict; - break; - case CodeGenOptions::FPC_On: - Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; - break; - case CodeGenOptions::FPC_Fast: - Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; - break; - } - - Options.UseInitArray = CodeGenOpts.UseInitArray; - Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; - Options.CompressDebugSections = CodeGenOpts.CompressDebugSections; - Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; - - // Set EABI version. - Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion) - .Case("4", llvm::EABI::EABI4) - .Case("5", llvm::EABI::EABI5) - .Case("gnu", llvm::EABI::GNU) - .Default(llvm::EABI::Default); - - if (LangOpts.SjLjExceptions) - Options.ExceptionModel = llvm::ExceptionHandling::SjLj; - - Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD; - Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; - Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; - Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; - Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; - Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; - Options.FunctionSections = CodeGenOpts.FunctionSections; - Options.DataSections = CodeGenOpts.DataSections; - Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; - Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; - Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); - - Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; - Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; - Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; - Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; - Options.MCOptions.MCIncrementalLinkerCompatible = - CodeGenOpts.IncrementalLinkerCompatible; - Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; - Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; - Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; - Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; - Options.MCOptions.ABIName = TargetOpts.ABI; - for (const auto &Entry : HSOpts.UserEntries) - if (!Entry.IsFramework && - (Entry.Group == frontend::IncludeDirGroup::Quoted || - Entry.Group == frontend::IncludeDirGroup::Angled || - Entry.Group == frontend::IncludeDirGroup::System)) - Options.MCOptions.IASSearchPaths.push_back( - Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); - + initTargetOptions(Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts); TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, Options, RM, CM, OptLevel)); } @@ -630,13 +680,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, // Normal mode, emit a .s or .o file by running the code generator. Note, // this also adds codegenerator level optimization passes. - TargetMachine::CodeGenFileType CGFT = TargetMachine::CGFT_AssemblyFile; - if (Action == Backend_EmitObj) - CGFT = TargetMachine::CGFT_ObjectFile; - else if (Action == Backend_EmitMCNull) - CGFT = TargetMachine::CGFT_Null; - else - assert(Action == Backend_EmitAssembly && "Invalid action!"); + TargetMachine::CodeGenFileType CGFT = getCodeGenFileType(Action); // Add ObjC ARC final-cleanup optimizations. This is done as part of the // "codegen" passes so that it isn't run multiple times when there is @@ -657,7 +701,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); - setCommandLineOpts(); + setCommandLineOpts(CodeGenOpts); bool UsesCodeGen = (Action != Backend_EmitNothing && Action != Backend_EmitBC && @@ -683,14 +727,31 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + std::unique_ptr<raw_fd_ostream> ThinLinkOS; + switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - PerModulePasses.add(createBitcodeWriterPass( - *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex, - CodeGenOpts.EmitSummaryIndex)); + if (CodeGenOpts.EmitSummaryIndex) { + if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { + std::error_code EC; + ThinLinkOS.reset(new llvm::raw_fd_ostream( + CodeGenOpts.ThinLinkBitcodeFile, EC, + llvm::sys::fs::F_None)); + if (EC) { + Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile + << EC.message(); + return; + } + } + PerModulePasses.add( + createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get())); + } + else + PerModulePasses.add( + createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists)); break; case Backend_EmitLL: @@ -769,7 +830,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); - setCommandLineOpts(); + setCommandLineOpts(CodeGenOpts); // The new pass manager always makes a target machine available to passes // during construction. @@ -779,7 +840,28 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( return; TheModule->setDataLayout(TM->createDataLayout()); - PassBuilder PB(TM.get()); + PGOOptions PGOOpt; + + // -fprofile-generate. + PGOOpt.RunProfileGen = CodeGenOpts.hasProfileIRInstr(); + if (PGOOpt.RunProfileGen) + PGOOpt.ProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() ? + DefaultProfileGenName : CodeGenOpts.InstrProfileOutput; + + // -fprofile-use. + if (CodeGenOpts.hasProfileIRUse()) + PGOOpt.ProfileUseFile = CodeGenOpts.ProfileInstrumentUsePath; + + if (!CodeGenOpts.SampleProfileFile.empty()) + PGOOpt.SampleProfileFile = CodeGenOpts.SampleProfileFile; + + // Only pass a PGO options struct if -fprofile-generate or + // -fprofile-use were passed on the cmdline. + PassBuilder PB(TM.get(), + (PGOOpt.RunProfileGen || + !PGOOpt.ProfileUseFile.empty() || + !PGOOpt.SampleProfileFile.empty()) ? + Optional<PGOOptions>(PGOOpt) : None); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -796,20 +878,34 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PB.registerLoopAnalyses(LAM); PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); - ModulePassManager MPM; + ModulePassManager MPM(CodeGenOpts.DebugPassManager); if (!CodeGenOpts.DisableLLVMPasses) { + bool IsThinLTO = CodeGenOpts.EmitSummaryIndex; + bool IsLTO = CodeGenOpts.PrepareForLTO; + if (CodeGenOpts.OptimizationLevel == 0) { // Build a minimal pipeline based on the semantics required by Clang, // which is just that always inlining occurs. MPM.addPass(AlwaysInlinerPass()); + if (IsThinLTO) + MPM.addPass(NameAnonGlobalPass()); } else { - // Otherwise, use the default pass pipeline. We also have to map our - // optimization levels into one of the distinct levels used to configure - // the pipeline. + // Map our optimization levels into one of the distinct levels used to + // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); - MPM = PB.buildPerModuleDefaultPipeline(Level); + if (IsThinLTO) { + MPM = PB.buildThinLTOPreLinkDefaultPipeline( + Level, CodeGenOpts.DebugPassManager); + MPM.addPass(NameAnonGlobalPass()); + } else if (IsLTO) { + MPM = PB.buildLTOPreLinkDefaultPipeline(Level, + CodeGenOpts.DebugPassManager); + } else { + MPM = PB.buildPerModuleDefaultPipeline(Level, + CodeGenOpts.DebugPassManager); + } } } @@ -817,6 +913,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // create that pass manager here and use it as needed below. legacy::PassManager CodeGenPasses; bool NeedCodeGen = false; + Optional<raw_fd_ostream> ThinLinkOS; // Append any output we need to the pass manager. switch (Action) { @@ -824,9 +921,24 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( break; case Backend_EmitBC: - MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, - CodeGenOpts.EmitSummaryIndex, - CodeGenOpts.EmitSummaryIndex)); + if (CodeGenOpts.EmitSummaryIndex) { + if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { + std::error_code EC; + ThinLinkOS.emplace(CodeGenOpts.ThinLinkBitcodeFile, EC, + llvm::sys::fs::F_None); + if (EC) { + Diags.Report(diag::err_fe_unable_to_open_output) + << CodeGenOpts.ThinLinkBitcodeFile << EC.message(); + return; + } + } + MPM.addPass( + ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &*ThinLinkOS : nullptr)); + } else { + MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, + CodeGenOpts.EmitSummaryIndex, + CodeGenOpts.EmitSummaryIndex)); + } break; case Backend_EmitLL: @@ -861,21 +973,50 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } +Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) { + Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + // The bitcode file may contain multiple modules, we want the one that is + // marked as being the ThinLTO module. + for (BitcodeModule &BM : *BMsOrErr) { + Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); + if (LTOInfo && LTOInfo->IsThinLTO) + return BM; + } + + return make_error<StringError>("Could not find module summary", + inconvertibleErrorCode()); +} + static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, - std::unique_ptr<raw_pwrite_stream> OS) { - StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>> + const HeaderSearchOptions &HeaderOpts, + const CodeGenOptions &CGOpts, + const clang::TargetOptions &TOpts, + const LangOptions &LOpts, + std::unique_ptr<raw_pwrite_stream> OS, + std::string SampleProfile, + BackendAction Action) { + StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>> ModuleToDefinedGVSummaries; CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + setCommandLineOpts(CGOpts); + // We can simply import the values mentioned in the combined index, since // we should only invoke this using the individual indexes written out // via a WriteIndexesThinBackend. FunctionImporter::ImportMapTy ImportList; for (auto &GlobalList : *CombinedIndex) { + // Ignore entries for undefined references. + if (GlobalList.second.SummaryList.empty()) + continue; + auto GUID = GlobalList.first; - assert(GlobalList.second.size() == 1 && + assert(GlobalList.second.SummaryList.size() == 1 && "Expected individual combined index to have one summary per GUID"); - auto &Summary = GlobalList.second[0]; + auto &Summary = GlobalList.second.SummaryList[0]; // Skip the summaries for the importing module. These are included to // e.g. record required linkage changes. if (Summary->modulePath() == M->getModuleIdentifier()) @@ -897,32 +1038,15 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, return; } - Expected<std::vector<BitcodeModule>> BMsOrErr = - getBitcodeModuleList(**MBOrErr); - if (!BMsOrErr) { - handleAllErrors(BMsOrErr.takeError(), [&](ErrorInfoBase &EIB) { + Expected<BitcodeModule> BMOrErr = FindThinLTOModule(**MBOrErr); + if (!BMOrErr) { + handleAllErrors(BMOrErr.takeError(), [&](ErrorInfoBase &EIB) { errs() << "Error loading imported file '" << I.first() << "': " << EIB.message() << '\n'; }); return; } - - // The bitcode file may contain multiple modules, we want the one with a - // summary. - bool FoundModule = false; - for (BitcodeModule &BM : *BMsOrErr) { - Expected<bool> HasSummary = BM.hasSummary(); - if (HasSummary && *HasSummary) { - ModuleMap.insert({I.first(), BM}); - FoundModule = true; - break; - } - } - if (!FoundModule) { - errs() << "Error loading imported file '" << I.first() - << "': Could not find module summary\n"; - return; - } + ModuleMap.insert({I.first(), *BMOrErr}); OwnedImports.push_back(std::move(*MBOrErr)); } @@ -930,6 +1054,36 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, return llvm::make_unique<lto::NativeObjectStream>(std::move(OS)); }; lto::Config Conf; + Conf.CPU = TOpts.CPU; + Conf.CodeModel = getCodeModel(CGOpts); + Conf.MAttrs = TOpts.Features; + Conf.RelocModel = getRelocModel(CGOpts); + Conf.CGOptLevel = getCGOptLevel(CGOpts); + initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); + Conf.SampleProfile = std::move(SampleProfile); + Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; + switch (Action) { + case Backend_EmitNothing: + Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { + return false; + }; + break; + case Backend_EmitLL: + Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + M->print(*OS, nullptr, CGOpts.EmitLLVMUseLists); + return false; + }; + break; + case Backend_EmitBC: + Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists); + return false; + }; + break; + default: + Conf.CGFileType = getCodeGenFileType(Action); + break; + } if (Error E = thinBackend( Conf, 0, AddStream, *M, *CombinedIndex, ImportList, ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) { @@ -952,7 +1106,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // into memory and pass it into runThinLTOBackend, which will run the // function importer and invoke LTO passes. Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr = - llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile); + llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile, + /*IgnoreEmptyThinLTOIndexFile*/true); if (!IndexOrErr) { logAllUnhandledErrors(IndexOrErr.takeError(), errs(), "Error loading index file '" + @@ -965,7 +1120,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // of an error). bool DoThinLTOBackend = CombinedIndex != nullptr; if (DoThinLTOBackend) { - runThinLTOBackend(CombinedIndex.get(), M, std::move(OS)); + runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, + LOpts, std::move(OS), CGOpts.SampleProfileFile, Action); return; } } @@ -996,6 +1152,7 @@ static const char* getSectionNameForBitcode(const Triple &T) { return "__LLVM,__bitcode"; case Triple::COFF: case Triple::ELF: + case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmbc"; } @@ -1008,6 +1165,7 @@ static const char* getSectionNameForCommandline(const Triple &T) { return "__LLVM,__cmdline"; case Triple::COFF: case Triple::ELF: + case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmcmd"; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp index 9287e46..a6e6fec 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp @@ -95,7 +95,7 @@ namespace { BFI.StorageOffset += OffsetInChars; LVal = LValue::MakeBitfield(Address(Addr, lvalue.getAlignment()), BFI, lvalue.getType(), - lvalue.getAlignmentSource()); + lvalue.getBaseInfo()); LVal.setTBAAInfo(lvalue.getTBAAInfo()); AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned); if (AtomicTy.isNull()) { @@ -203,7 +203,7 @@ namespace { addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); return LValue::MakeAddr(addr, getValueType(), CGF.getContext(), - LVal.getAlignmentSource(), LVal.getTBAAInfo()); + LVal.getBaseInfo(), LVal.getTBAAInfo()); } /// \brief Emits atomic load. @@ -1181,15 +1181,15 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, if (LVal.isBitField()) return CGF.EmitLoadOfBitfieldLValue( LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(), - LVal.getAlignmentSource())); + LVal.getBaseInfo()), loc); if (LVal.isVectorElt()) return CGF.EmitLoadOfLValue( LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(), - LVal.getAlignmentSource()), loc); + LVal.getBaseInfo()), loc); assert(LVal.isExtVectorElt()); return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt( addr, LVal.getExtVectorElts(), LVal.getType(), - LVal.getAlignmentSource())); + LVal.getBaseInfo())); } RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, @@ -1506,26 +1506,26 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, UpdateLVal = LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); } else if (AtomicLVal.isVectorElt()) { UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); DesiredLVal = LValue::MakeVectorElt( DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); } else { assert(AtomicLVal.isExtVectorElt()); UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); } UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); @@ -1612,17 +1612,17 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); } else if (AtomicLVal.isVectorElt()) { DesiredLVal = LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); } else { assert(AtomicLVal.isExtVectorElt()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo()); } DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); // Store new value in the corresponding memory area diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp index b250b9a..1810489 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp @@ -16,7 +16,7 @@ #include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/DeclObjC.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" @@ -266,7 +266,7 @@ static bool isSafeForCXXConstantCapture(QualType type) { static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, CodeGenFunction *CGF, const VarDecl *var) { - // Return if this is a function paramter. We shouldn't try to + // Return if this is a function parameter. We shouldn't try to // rematerialize default arguments of function parameters. if (isa<ParmVarDecl>(var)) return nullptr; @@ -318,6 +318,19 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, elementTypes.push_back(CGM.getBlockDescriptorType()); } +static QualType getCaptureFieldType(const CodeGenFunction &CGF, + const BlockDecl::Capture &CI) { + const VarDecl *VD = CI.getVariable(); + + // If the variable is captured by an enclosing block or lambda expression, + // use the type of the capture field. + if (CGF.BlockInfo && CI.isNested()) + return CGF.BlockInfo->getCapture(VD).fieldType(); + if (auto *FD = CGF.LambdaCaptureFields.lookup(VD)) + return FD->getType(); + return VD->getType(); +} + /// Compute the layout of the given block. Attempts to lay the block /// out with minimal space requirements. static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, @@ -432,15 +445,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, } } - QualType VT = variable->getType(); - - // If the variable is captured by an enclosing block or lambda expression, - // use the type of the capture field. - if (CGF->BlockInfo && CI.isNested()) - VT = CGF->BlockInfo->getCapture(variable).fieldType(); - else if (auto *FD = CGF->LambdaCaptureFields.lookup(variable)) - VT = FD->getType(); - + QualType VT = getCaptureFieldType(*CGF, CI); CharUnits size = C.getTypeSizeInChars(VT); CharUnits align = C.getDeclAlign(variable); @@ -606,15 +611,25 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { if (capture.isConstant()) continue; // Ignore objects that aren't destructed. - QualType::DestructionKind dtorKind = - variable->getType().isDestructedType(); + QualType VT = getCaptureFieldType(CGF, CI); + QualType::DestructionKind dtorKind = VT.isDestructedType(); if (dtorKind == QualType::DK_none) continue; CodeGenFunction::Destroyer *destroyer; // Block captures count as local values and have imprecise semantics. // They also can't be arrays, so need to worry about that. - if (dtorKind == QualType::DK_objc_strong_lifetime) { + // + // For const-qualified captures, emit clang.arc.use to ensure the captured + // object doesn't get released while we are still depending on its validity + // within the block. + if (VT.isConstQualified() && + VT.getObjCLifetime() == Qualifiers::OCL_Strong && + CGF.CGM.getCodeGenOpts().OptimizationLevel != 0) { + assert(CGF.CGM.getLangOpts().ObjCAutoRefCount && + "expected ObjC ARC to be enabled"); + destroyer = CodeGenFunction::emitARCIntrinsicUse; + } else if (dtorKind == QualType::DK_objc_strong_lifetime) { destroyer = CodeGenFunction::destroyARCStrongImprecise; } else { destroyer = CGF.getDestroyer(dtorKind); @@ -634,7 +649,7 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { if (useArrayEHCleanup) cleanupKind = InactiveNormalAndEHCleanup; - CGF.pushDestroy(cleanupKind, addr, variable->getType(), + CGF.pushDestroy(cleanupKind, addr, VT, destroyer, useArrayEHCleanup); // Remember where that cleanup was. @@ -718,7 +733,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Otherwise, we have to emit this as a local block. - llvm::Constant *isa = CGM.getNSConcreteStackBlock(); + llvm::Constant *isa = + (!CGM.getContext().getLangOpts().OpenCL) + ? CGM.getNSConcreteStackBlock() + : CGM.getNullPointer(VoidPtrPtrTy, + CGM.getContext().getPointerType( + QualType(CGM.getContext().VoidPtrTy))); isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy); // Build the block descriptor. @@ -856,6 +876,13 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { } else if (type->isReferenceType()) { Builder.CreateStore(src.getPointer(), blockField); + // If type is const-qualified, copy the value into the block field. + } else if (type.isConstQualified() && + type.getObjCLifetime() == Qualifiers::OCL_Strong && + CGM.getCodeGenOpts().OptimizationLevel != 0) { + llvm::Value *value = Builder.CreateLoad(src, "captured"); + Builder.CreateStore(value, blockField); + // If this is an ARC __strong block-pointer variable, don't do a // block copy. // @@ -876,9 +903,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { } else { // Fake up a new variable so that EmitScalarInit doesn't think // we're referring to the variable in its own initializer. - ImplicitParamDecl blockFieldPseudoVar(getContext(), /*DC*/ nullptr, - SourceLocation(), /*name*/ nullptr, - type); + ImplicitParamDecl BlockFieldPseudoVar(getContext(), type, + ImplicitParamDecl::Other); // We use one of these or the other depending on whether the // reference is nested. @@ -891,8 +917,9 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // FIXME: Pass a specific location for the expr init so that the store is // attributed to a reasonable location - otherwise it may be attributed to // locations of subexpressions in the initialization. - EmitExprAsInit(&l2r, &blockFieldPseudoVar, - MakeAddrLValue(blockField, type, AlignmentSource::Decl), + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + EmitExprAsInit(&l2r, &BlockFieldPseudoVar, + MakeAddrLValue(blockField, type, BaseInfo), /*captured by init*/ false); } @@ -906,9 +933,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Cast to the converted block-pointer type, which happens (somewhat // unfortunately) to be a pointer to function type. - llvm::Value *result = - Builder.CreateBitCast(blockAddr.getPointer(), - ConvertType(blockInfo.getBlockExpr()->getType())); + llvm::Value *result = Builder.CreatePointerCast( + blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); return result; } @@ -935,9 +961,8 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() { // const char *signature; // the block signature // const char *layout; // reserved // }; - BlockDescriptorType = - llvm::StructType::create("struct.__block_descriptor", - UnsignedLongTy, UnsignedLongTy, nullptr); + BlockDescriptorType = llvm::StructType::create( + "struct.__block_descriptor", UnsignedLongTy, UnsignedLongTy); // Now form a pointer to that. unsigned AddrSpace = 0; @@ -961,9 +986,8 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() { // struct __block_descriptor *__descriptor; // }; GenericBlockLiteralType = - llvm::StructType::create("struct.__block_literal_generic", - VoidPtrTy, IntTy, IntTy, VoidPtrTy, - BlockDescPtrTy, nullptr); + llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, + IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); return GenericBlockLiteralType; } @@ -976,21 +1000,41 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); // Get a pointer to the generic block literal. + // For OpenCL we generate generic AS void ptr to be able to reuse the same + // block definition for blocks with captures generated as private AS local + // variables and without captures generated as global AS program scope + // variables. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); + llvm::Type *BlockLiteralTy = - llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType()); + llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); // Bitcast the callee to a block literal. - BlockPtr = Builder.CreateBitCast(BlockPtr, BlockLiteralTy, "block.literal"); + BlockPtr = + Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); // Get the function pointer from the literal. llvm::Value *FuncPtr = Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - BlockPtr = Builder.CreateBitCast(BlockPtr, VoidPtrTy); // Add the block literal. CallArgList Args; - Args.add(RValue::get(BlockPtr), getContext().VoidPtrTy); + + QualType VoidPtrQualTy = getContext().VoidPtrTy; + llvm::Type *GenericVoidPtrTy = VoidPtrTy; + if (getLangOpts().OpenCL) { + GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); + VoidPtrQualTy = + getContext().getPointerType(getContext().getAddrSpaceQualType( + getContext().VoidTy, LangAS::opencl_generic)); + } + + BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); + Args.add(RValue::get(BlockPtr), VoidPtrQualTy); QualType FnType = BPT->getPointeeType(); @@ -1097,7 +1141,11 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, auto fields = builder.beginStruct(); // isa - fields.add(CGM.getNSConcreteGlobalBlock()); + fields.add((!CGM.getContext().getLangOpts().OpenCL) + ? CGM.getNSConcreteGlobalBlock() + : CGM.getNullPointer(CGM.VoidPtrPtrTy, + CGM.getContext().getPointerType(QualType( + CGM.getContext().VoidPtrTy)))); // __flags BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; @@ -1114,16 +1162,19 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); - llvm::Constant *literal = - fields.finishAndCreateGlobal("__block_literal_global", - blockInfo.BlockAlign, - /*constant*/ true); + unsigned AddrSpace = 0; + if (CGM.getContext().getLangOpts().OpenCL) + AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + + llvm::Constant *literal = fields.finishAndCreateGlobal( + "__block_literal_global", blockInfo.BlockAlign, + /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace); // Return a constant of the appropriately-casted type. llvm::Type *RequiredType = CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType()); llvm::Constant *Result = - llvm::ConstantExpr::getBitCast(literal, RequiredType); + llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); return Result; } @@ -1155,9 +1206,13 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, // Instead of messing around with LocalDeclMap, just set the value // directly as BlockPointer. - BlockPointer = Builder.CreateBitCast(arg, - BlockInfo->StructureType->getPointerTo(), - "block"); + BlockPointer = Builder.CreatePointerCast( + arg, + BlockInfo->StructureType->getPointerTo( + getContext().getLangOpts().OpenCL + ? getContext().getTargetAddressSpace(LangAS::opencl_generic) + : 0), + "block"); } Address CodeGenFunction::LoadBlockStruct() { @@ -1196,11 +1251,21 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // The first argument is the block pointer. Just take it as a void* // and cast it later. QualType selfTy = getContext().VoidPtrTy; + + // For OpenCL passed block pointer can be private AS local variable or + // global AS program scope variable (for the case with and without captures). + // Generic AS is used therefore to be able to accommodate both private and + // generic AS in one implementation. + if (getLangOpts().OpenCL) + selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType( + getContext().VoidTy, LangAS::opencl_generic)); + IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); - ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl), - SourceLocation(), II, selfTy); - args.push_back(&selfDecl); + ImplicitParamDecl SelfDecl(getContext(), const_cast<BlockDecl *>(blockDecl), + SourceLocation(), II, selfTy, + ImplicitParamDecl::ObjCSelf); + args.push_back(&SelfDecl); // Now add the rest of the parameters. args.append(blockDecl->param_begin(), blockDecl->param_end()); @@ -1323,23 +1388,102 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, return fn; } -/* - notes.push_back(HelperInfo()); - HelperInfo ¬e = notes.back(); - note.index = capture.getIndex(); - note.RequiresCopying = (ci->hasCopyExpr() || BlockRequiresCopying(type)); - note.cxxbar_import = ci->getCopyExpr(); - - if (ci->isByRef()) { - note.flag = BLOCK_FIELD_IS_BYREF; - if (type.isObjCGCWeak()) - note.flag |= BLOCK_FIELD_IS_WEAK; - } else if (type->isBlockPointerType()) { - note.flag = BLOCK_FIELD_IS_BLOCK; - } else { - note.flag = BLOCK_FIELD_IS_OBJECT; - } - */ +namespace { + +/// Represents a type of copy/destroy operation that should be performed for an +/// entity that's captured by a block. +enum class BlockCaptureEntityKind { + CXXRecord, // Copy or destroy + ARCWeak, + ARCStrong, + BlockObject, // Assign or release + None +}; + +/// Represents a captured entity that requires extra operations in order for +/// this entity to be copied or destroyed correctly. +struct BlockCaptureManagedEntity { + BlockCaptureEntityKind Kind; + BlockFieldFlags Flags; + const BlockDecl::Capture &CI; + const CGBlockInfo::Capture &Capture; + + BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags, + const BlockDecl::Capture &CI, + const CGBlockInfo::Capture &Capture) + : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {} +}; + +} // end anonymous namespace + +static std::pair<BlockCaptureEntityKind, BlockFieldFlags> +computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, + const LangOptions &LangOpts) { + if (CI.getCopyExpr()) { + assert(!CI.isByRef()); + // don't bother computing flags + return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); + } + BlockFieldFlags Flags; + if (CI.isByRef()) { + Flags = BLOCK_FIELD_IS_BYREF; + if (T.isObjCGCWeak()) + Flags |= BLOCK_FIELD_IS_WEAK; + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + } + if (!T->isObjCRetainableType()) + // For all other types, the memcpy is fine. + return std::make_pair(BlockCaptureEntityKind::None, Flags); + + Flags = BLOCK_FIELD_IS_OBJECT; + bool isBlockPointer = T->isBlockPointerType(); + if (isBlockPointer) + Flags = BLOCK_FIELD_IS_BLOCK; + + // Special rules for ARC captures: + Qualifiers QS = T.getQualifiers(); + + // We need to register __weak direct captures with the runtime. + if (QS.getObjCLifetime() == Qualifiers::OCL_Weak) + return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags); + + // We need to retain the copied value for __strong direct captures. + if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) { + // If it's a block pointer, we have to copy the block and + // assign that to the destination pointer, so we might as + // well use _Block_object_assign. Otherwise we can avoid that. + return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong + : BlockCaptureEntityKind::BlockObject, + Flags); + } + + // Non-ARC captures of retainable pointers are strong and + // therefore require a call to _Block_object_assign. + if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount) + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + + // Otherwise the memcpy is fine. + return std::make_pair(BlockCaptureEntityKind::None, Flags); +} + +/// Find the set of block captures that need to be explicitly copied or destroy. +static void findBlockCapturedManagedEntities( + const CGBlockInfo &BlockInfo, const LangOptions &LangOpts, + SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures, + llvm::function_ref<std::pair<BlockCaptureEntityKind, BlockFieldFlags>( + const BlockDecl::Capture &, QualType, const LangOptions &)> + Predicate) { + for (const auto &CI : BlockInfo.getBlockDecl()->captures()) { + const VarDecl *Variable = CI.getVariable(); + const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable); + if (Capture.isConstant()) + continue; + + auto Info = Predicate(CI, Variable->getType(), LangOpts); + if (Info.first != BlockCaptureEntityKind::None) + ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture); + } +} /// Generate the copy-helper function for a block closure object: /// static void block_copy_helper(block_t *dst, block_t *src); @@ -1354,12 +1498,12 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); FunctionArgList args; - ImplicitParamDecl dstDecl(getContext(), nullptr, SourceLocation(), nullptr, - C.VoidPtrTy); - args.push_back(&dstDecl); - ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr, - C.VoidPtrTy); - args.push_back(&srcDecl); + ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&DstDecl); + ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -1391,86 +1535,36 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { auto AL = ApplyDebugLocation::CreateArtificial(*this); llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); - Address src = GetAddrOfLocalVar(&srcDecl); + Address src = GetAddrOfLocalVar(&SrcDecl); src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign); src = Builder.CreateBitCast(src, structPtrTy, "block.source"); - Address dst = GetAddrOfLocalVar(&dstDecl); + Address dst = GetAddrOfLocalVar(&DstDecl); dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign); dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest"); - const BlockDecl *blockDecl = blockInfo.getBlockDecl(); - - for (const auto &CI : blockDecl->captures()) { - const VarDecl *variable = CI.getVariable(); - QualType type = variable->getType(); - - const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); - if (capture.isConstant()) continue; - - const Expr *copyExpr = CI.getCopyExpr(); - BlockFieldFlags flags; - - bool useARCWeakCopy = false; - bool useARCStrongCopy = false; - - if (copyExpr) { - assert(!CI.isByRef()); - // don't bother computing flags + SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures; + findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures, + computeCopyInfoForBlockCapture); - } else if (CI.isByRef()) { - flags = BLOCK_FIELD_IS_BYREF; - if (type.isObjCGCWeak()) - flags |= BLOCK_FIELD_IS_WEAK; - - } else if (type->isObjCRetainableType()) { - flags = BLOCK_FIELD_IS_OBJECT; - bool isBlockPointer = type->isBlockPointerType(); - if (isBlockPointer) - flags = BLOCK_FIELD_IS_BLOCK; - - // Special rules for ARC captures: - Qualifiers qs = type.getQualifiers(); - - // We need to register __weak direct captures with the runtime. - if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) { - useARCWeakCopy = true; - - // We need to retain the copied value for __strong direct captures. - } else if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) { - // If it's a block pointer, we have to copy the block and - // assign that to the destination pointer, so we might as - // well use _Block_object_assign. Otherwise we can avoid that. - if (!isBlockPointer) - useARCStrongCopy = true; - - // Non-ARC captures of retainable pointers are strong and - // therefore require a call to _Block_object_assign. - } else if (!qs.getObjCLifetime() && !getLangOpts().ObjCAutoRefCount) { - // fall through - - // Otherwise the memcpy is fine. - } else { - continue; - } - - // For all other types, the memcpy is fine. - } else { - continue; - } + for (const auto &CopiedCapture : CopiedCaptures) { + const BlockDecl::Capture &CI = CopiedCapture.CI; + const CGBlockInfo::Capture &capture = CopiedCapture.Capture; + BlockFieldFlags flags = CopiedCapture.Flags; unsigned index = capture.getIndex(); Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset()); Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset()); // If there's an explicit copy expression, we do that. - if (copyExpr) { - EmitSynthesizedCXXCopyCtor(dstField, srcField, copyExpr); - } else if (useARCWeakCopy) { + if (CI.getCopyExpr()) { + assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord); + EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr()); + } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { EmitARCCopyWeak(dstField, srcField); } else { llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src"); - if (useARCStrongCopy) { + if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { // At -O0, store null into the destination field (so that the // storeStrong doesn't over-release) and then call storeStrong. // This is a workaround to not having an initStrong call. @@ -1491,6 +1585,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent(); } } else { + assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject); srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy); llvm::Value *dstAddr = Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy); @@ -1498,6 +1593,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) }; + const VarDecl *variable = CI.getVariable(); bool copyCanThrow = false; if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) { const Expr *copyExpr = @@ -1521,6 +1617,52 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } +static std::pair<BlockCaptureEntityKind, BlockFieldFlags> +computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, + const LangOptions &LangOpts) { + BlockFieldFlags Flags; + if (CI.isByRef()) { + Flags = BLOCK_FIELD_IS_BYREF; + if (T.isObjCGCWeak()) + Flags |= BLOCK_FIELD_IS_WEAK; + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + } + + if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) { + if (Record->hasTrivialDestructor()) + return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); + return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); + } + + // Other types don't need to be destroy explicitly. + if (!T->isObjCRetainableType()) + return std::make_pair(BlockCaptureEntityKind::None, Flags); + + Flags = BLOCK_FIELD_IS_OBJECT; + if (T->isBlockPointerType()) + Flags = BLOCK_FIELD_IS_BLOCK; + + // Special rules for ARC captures. + Qualifiers QS = T.getQualifiers(); + + // Use objc_storeStrong for __strong direct captures; the + // dynamic tools really like it when we do this. + if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) + return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags); + + // Support __weak direct captures. + if (QS.getObjCLifetime() == Qualifiers::OCL_Weak) + return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags); + + // Non-ARC captures are strong, and we need to use + // _Block_object_dispose. + if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount) + return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); + + // Otherwise, we have nothing to do. + return std::make_pair(BlockCaptureEntityKind::None, Flags); +} + /// Generate the destroy-helper function for a block closure object: /// static void block_destroy_helper(block_t *theBlock); /// @@ -1533,9 +1675,9 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); FunctionArgList args; - ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr, - C.VoidPtrTy); - args.push_back(&srcDecl); + ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -1566,83 +1708,43 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); - Address src = GetAddrOfLocalVar(&srcDecl); + Address src = GetAddrOfLocalVar(&SrcDecl); src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign); src = Builder.CreateBitCast(src, structPtrTy, "block"); - const BlockDecl *blockDecl = blockInfo.getBlockDecl(); - CodeGenFunction::RunCleanupsScope cleanups(*this); - for (const auto &CI : blockDecl->captures()) { - const VarDecl *variable = CI.getVariable(); - QualType type = variable->getType(); + SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures; + findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures, + computeDestroyInfoForBlockCapture); - const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); - if (capture.isConstant()) continue; - - BlockFieldFlags flags; - const CXXDestructorDecl *dtor = nullptr; - - bool useARCWeakDestroy = false; - bool useARCStrongDestroy = false; - - if (CI.isByRef()) { - flags = BLOCK_FIELD_IS_BYREF; - if (type.isObjCGCWeak()) - flags |= BLOCK_FIELD_IS_WEAK; - } else if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) { - if (record->hasTrivialDestructor()) - continue; - dtor = record->getDestructor(); - } else if (type->isObjCRetainableType()) { - flags = BLOCK_FIELD_IS_OBJECT; - if (type->isBlockPointerType()) - flags = BLOCK_FIELD_IS_BLOCK; - - // Special rules for ARC captures. - Qualifiers qs = type.getQualifiers(); - - // Use objc_storeStrong for __strong direct captures; the - // dynamic tools really like it when we do this. - if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) { - useARCStrongDestroy = true; - - // Support __weak direct captures. - } else if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) { - useARCWeakDestroy = true; - - // Non-ARC captures are strong, and we need to use _Block_object_dispose. - } else if (!qs.hasObjCLifetime() && !getLangOpts().ObjCAutoRefCount) { - // fall through - - // Otherwise, we have nothing to do. - } else { - continue; - } - } else { - continue; - } + for (const auto &DestroyedCapture : DestroyedCaptures) { + const BlockDecl::Capture &CI = DestroyedCapture.CI; + const CGBlockInfo::Capture &capture = DestroyedCapture.Capture; + BlockFieldFlags flags = DestroyedCapture.Flags; Address srcField = Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); - // If there's an explicit copy expression, we do that. - if (dtor) { - PushDestructorCleanup(dtor, srcField); + // If the captured record has a destructor then call it. + if (DestroyedCapture.Kind == BlockCaptureEntityKind::CXXRecord) { + const auto *Dtor = + CI.getVariable()->getType()->getAsCXXRecordDecl()->getDestructor(); + PushDestructorCleanup(Dtor, srcField); - // If this is a __weak capture, emit the release directly. - } else if (useARCWeakDestroy) { + // If this is a __weak capture, emit the release directly. + } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { EmitARCDestroyWeak(srcField); // Destroy strong objects with a call if requested. - } else if (useARCStrongDestroy) { + } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { EmitARCDestroyStrong(srcField, ARCImpreciseLifetime); // Otherwise we call _Block_object_dispose. It wouldn't be too // hard to just emit this as a cleanup if we wanted to make sure // that things were done in reverse. } else { + assert(DestroyedCapture.Kind == BlockCaptureEntityKind::BlockObject); llvm::Value *value = Builder.CreateLoad(srcField); value = Builder.CreateBitCast(value, VoidPtrTy); BuildBlockRelease(value, flags); @@ -1815,13 +1917,13 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, QualType R = Context.VoidTy; FunctionArgList args; - ImplicitParamDecl dst(CGF.getContext(), nullptr, SourceLocation(), nullptr, - Context.VoidPtrTy); - args.push_back(&dst); + ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Dst); - ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr, - Context.VoidPtrTy); - args.push_back(&src); + ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Src); const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); @@ -1852,7 +1954,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0); // dst->x - Address destField = CGF.GetAddrOfLocalVar(&dst); + Address destField = CGF.GetAddrOfLocalVar(&Dst); destField = Address(CGF.Builder.CreateLoad(destField), byrefInfo.ByrefAlignment); destField = CGF.Builder.CreateBitCast(destField, byrefPtrType); @@ -1860,7 +1962,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, "dest-object"); // src->x - Address srcField = CGF.GetAddrOfLocalVar(&src); + Address srcField = CGF.GetAddrOfLocalVar(&Src); srcField = Address(CGF.Builder.CreateLoad(srcField), byrefInfo.ByrefAlignment); srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType); @@ -1892,9 +1994,9 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, QualType R = Context.VoidTy; FunctionArgList args; - ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr, - Context.VoidPtrTy); - args.push_back(&src); + ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Src); const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); @@ -1923,7 +2025,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, CGF.StartFunction(FD, R, Fn, FI, args); if (generator.needsDispose()) { - Address addr = CGF.GetAddrOfLocalVar(&src); + Address addr = CGF.GetAddrOfLocalVar(&Src); addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.ByrefAlignment); auto byrefPtrType = byrefInfo.Type->getPointerTo(0); addr = CGF.Builder.CreateBitCast(addr, byrefPtrType); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index b3d02f1..f3527b0 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -420,10 +420,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType) { + llvm::IntegerType *ResType, + llvm::Value *EmittedE) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) - return emitBuiltinObjectSize(E, Type, ResType); + return emitBuiltinObjectSize(E, Type, ResType, EmittedE); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } @@ -432,9 +433,14 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, /// - A llvm::Argument (if E is a param with the pass_object_size attribute on /// it) /// - A call to the @llvm.objectsize intrinsic +/// +/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null +/// and we wouldn't otherwise try to reference a pass_object_size parameter, +/// we'll call @llvm.objectsize on EmittedE, rather than emitting E. llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType) { + llvm::IntegerType *ResType, + llvm::Value *EmittedE) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { @@ -457,16 +463,20 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't // evaluate E for side-effects. In either case, we shouldn't lower to // @llvm.objectsize. - if (Type == 3 || E->HasSideEffects(getContext())) + if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) return getDefaultBuiltinObjectSizeResult(Type, ResType); - // LLVM only supports 0 and 2, make sure that we pass along that - // as a boolean. - auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); - // FIXME: Get right address space. - llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); - return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); + Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); + assert(Ptr->getType()->isPointerTy() && + "Non-pointer passed to __builtin_object_size?"); + + Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); + + // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. + Value *Min = Builder.getInt1((Type & 2) != 0); + // For GCC compatability, __builtin_object_size treat NULL as unknown size. + Value *NullIsUnknown = Builder.getTrue(); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); } // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we @@ -482,10 +492,12 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, + _interlockedbittestandset, + __fastfail, }; Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, - const CallExpr *E) { + const CallExpr *E) { switch (BuiltinID) { case MSVCIntrin::_BitScanForward: case MSVCIntrin::_BitScanReverse: { @@ -548,6 +560,22 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); + case MSVCIntrin::_interlockedbittestandset: { + llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Or, Addr, + Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), + llvm::AtomicOrdering::SequentiallyConsistent); + // Shift the relevant bit to the least significant position, truncate to + // the result type, and test the low bit. + llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); + llvm::Value *Truncated = + Builder.CreateTrunc(Shifted, ConvertType(E->getType())); + return Builder.CreateAnd(Truncated, + ConstantInt::get(Truncated->getType(), 1)); + } + case MSVCIntrin::_InterlockedDecrement: { llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( @@ -566,6 +594,37 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, llvm::AtomicOrdering::SequentiallyConsistent); return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); } + + case MSVCIntrin::__fastfail: { + // Request immediate process termination from the kernel. The instruction + // sequences to do this are documented on MSDN: + // https://msdn.microsoft.com/en-us/library/dn774154.aspx + llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); + StringRef Asm, Constraints; + switch (ISA) { + default: + ErrorUnsupported(E, "__fastfail call for this architecture"); + break; + case llvm::Triple::x86: + case llvm::Triple::x86_64: + Asm = "int $$0x29"; + Constraints = "{cx}"; + break; + case llvm::Triple::thumb: + Asm = "udf #251"; + Constraints = "{r0}"; + break; + } + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); + CS.setAttributes(NoReturnAttr); + return CS.getInstruction(); + } } llvm_unreachable("Incorrect MSVC intrinsic!"); } @@ -932,7 +991,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. - return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); + return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, + /*EmittedE=*/nullptr)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -1750,12 +1810,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__atomic_signal_fence: case Builtin::BI__c11_atomic_thread_fence: case Builtin::BI__c11_atomic_signal_fence: { - llvm::SynchronizationScope Scope; + llvm::SyncScope::ID SSID; if (BuiltinID == Builtin::BI__atomic_signal_fence || BuiltinID == Builtin::BI__c11_atomic_signal_fence) - Scope = llvm::SingleThread; + SSID = llvm::SyncScope::SingleThread; else - Scope = llvm::CrossThread; + SSID = llvm::SyncScope::System; Value *Order = EmitScalarExpr(E->getArg(0)); if (isa<llvm::ConstantInt>(Order)) { int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); @@ -1765,17 +1825,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; case 1: // memory_order_consume case 2: // memory_order_acquire - Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); break; case 3: // memory_order_release - Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); break; case 4: // memory_order_acq_rel - Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); break; case 5: // memory_order_seq_cst - Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, - Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); break; } return RValue::get(nullptr); @@ -1792,23 +1851,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); Builder.SetInsertPoint(AcquireBB); - Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(1), AcquireBB); SI->addCase(Builder.getInt32(2), AcquireBB); Builder.SetInsertPoint(ReleaseBB); - Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(3), ReleaseBB); Builder.SetInsertPoint(AcqRelBB); - Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(4), AcqRelBB); Builder.SetInsertPoint(SeqCstBB); - Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(5), SeqCstBB); @@ -2195,16 +2254,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedXor16: case Builtin::BI_InterlockedXor: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); - case Builtin::BI__readfsdword: { - llvm::Type *IntTy = ConvertType(E->getType()); - Value *IntToPtr = - Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 257)); - LoadInst *Load = Builder.CreateAlignedLoad( - IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType())); - Load->setVolatile(true); - return RValue::get(Load); - } + case Builtin::BI_interlockedbittestandset: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); case Builtin::BI__exception_code: case Builtin::BI_exception_code: @@ -2218,9 +2270,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_setjmpex: { if (getTarget().getTriple().isOSMSVCRT()) { llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); @@ -2238,9 +2290,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI_setjmp: { if (getTarget().getTriple().isOSMSVCRT()) { - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Value *Buf = Builder.CreateBitOrPointerCast( EmitScalarExpr(E->getArg(0)), Int8PtrTy); llvm::CallSite CS; @@ -2276,6 +2328,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; } + case Builtin::BI__fastfail: + return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); + case Builtin::BI__builtin_coro_size: { auto & Context = getContext(); auto SizeTy = Context.getSizeType(); @@ -2492,25 +2547,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned NumArgs = E->getNumArgs(); llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); - llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); - llvm::Value *Range = EmitScalarExpr(E->getArg(2)); + LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); + llvm::Value *Range = NDRangeL.getAddress().getPointer(); + llvm::Type *RangeTy = NDRangeL.getAddress().getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; - llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); - return RValue::get(Builder.CreateCall( - CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); + AttrBuilder B; + B.addAttribute(Attribute::ByVal); + llvm::AttributeList ByValAttrSet = + llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); + + auto RTCall = + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), + {Queue, Flags, Range, Block}); + RTCall->setAttributes(ByValAttrSet); + return RValue::get(RTCall); } assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); @@ -2518,14 +2584,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_vaargs"; - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, ConstantInt::get(IntTy, NumArgs - 4)}; - std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, - IntTy}; + std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, + GenericVoidPtrTy, IntTy}; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -2555,12 +2621,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); - std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy, - Int32Ty, EventPtrTy, EventPtrTy, - Int8PtrTy}; + std::vector<llvm::Type *> ArgTys = { + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrTy, EventPtrTy, GenericVoidPtrTy}; std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, EventList, ClkEvent, Block}; @@ -2592,30 +2658,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef<llvm::Value *>(Args))); } + LLVM_FALLTHROUGH; } // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. case Builtin::BIget_kernel_work_group_size: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), - "__get_kernel_work_group_size_impl"), - Arg)); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + "__get_kernel_work_group_size_impl"), + Arg)); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), "__get_kernel_preferred_work_group_multiple_impl"), Arg)); } case Builtin::BIprintf: - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) - return EmitCUDADevicePrintfCallExpr(E, ReturnValue); + if (getTarget().getTriple().isNVPTX()) + return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: @@ -2680,7 +2751,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Push a clang.arc.use cleanup for each object in RetainableOperands. The // cleanup will cause the use to appear after the final log call, keeping - // the object valid while it’s held in the log buffer. Note that if there’s + // the object valid while it's held in the log buffer. Note that if there's // a release cleanup on the object, it will already be active; since // cleanups are emitted in reverse order, the use will occur before the // object is released. @@ -2698,6 +2769,59 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(ConstantInt::get(ConvertType(E->getType()), Layout.size().getQuantity())); } + + case Builtin::BI__xray_customevent: { + if (!ShouldXRayInstrumentFunction()) + return RValue::getIgnored(); + if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { + if (XRayAttr->neverXRayInstrument()) + return RValue::getIgnored(); + } + Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); + auto FTy = F->getFunctionType(); + auto Arg0 = E->getArg(0); + auto Arg0Val = EmitScalarExpr(Arg0); + auto Arg0Ty = Arg0->getType(); + auto PTy0 = FTy->getParamType(0); + if (PTy0 != Arg0Val->getType()) { + if (Arg0Ty->isArrayType()) + Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); + else + Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); + } + auto Arg1 = EmitScalarExpr(E->getArg(1)); + auto PTy1 = FTy->getParamType(1); + if (PTy1 != Arg1->getType()) + Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); + return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); + } + + case Builtin::BI__builtin_ms_va_start: + case Builtin::BI__builtin_ms_va_end: + return RValue::get( + EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), + BuiltinID == Builtin::BI__builtin_ms_va_start)); + + case Builtin::BI__builtin_ms_va_copy: { + // Lower this manually. We can't reliably determine whether or not any + // given va_copy() is for a Win64 va_list from the calling convention + // alone, because it's legal to do this from a System V ABI function. + // With opaque pointer types, we won't have enough information in LLVM + // IR to determine this from the argument types, either. Best to do it + // now, while we have enough information. + Address DestAddr = EmitMSVAListRef(E->getArg(0)); + Address SrcAddr = EmitMSVAListRef(E->getArg(1)); + + llvm::Type *BPP = Int8PtrPtrTy; + + DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), + DestAddr.getAlignment()); + SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), + SrcAddr.getAlignment()); + + Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); + return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -3716,6 +3840,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcalt_v: case NEON::BI__builtin_neon_vcaltq_v: std::swap(Ops[0], Ops[1]); + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcage_v: case NEON::BI__builtin_neon_vcageq_v: case NEON::BI__builtin_neon_vcagt_v: @@ -4474,7 +4599,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd); - llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Value *Val = EmitScalarExpr(E->getArg(0)); @@ -4959,6 +5084,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vsri_n_v: case NEON::BI__builtin_neon_vsriq_n_v: rightShift = true; + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vsli_n_v: case NEON::BI__builtin_neon_vsliq_n_v: Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); @@ -5304,7 +5430,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp); - llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); + llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); @@ -7115,33 +7241,15 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, return EmitX86Select(CGF, Ops[3], Res, Ops[2]); } +static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, + llvm::Type *DstTy) { + unsigned NumberOfElements = DstTy->getVectorNumElements(); + Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); + return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - if (BuiltinID == X86::BI__builtin_ms_va_start || - BuiltinID == X86::BI__builtin_ms_va_end) - return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), - BuiltinID == X86::BI__builtin_ms_va_start); - if (BuiltinID == X86::BI__builtin_ms_va_copy) { - // Lower this manually. We can't reliably determine whether or not any - // given va_copy() is for a Win64 va_list from the calling convention - // alone, because it's legal to do this from a System V ABI function. - // With opaque pointer types, we won't have enough information in LLVM - // IR to determine this from the argument types, either. Best to do it - // now, while we have enough information. - Address DestAddr = EmitMSVAListRef(E->getArg(0)); - Address SrcAddr = EmitMSVAListRef(E->getArg(1)); - - llvm::Type *BPP = Int8PtrPtrTy; - - DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), - DestAddr.getAlignment()); - SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), - SrcAddr.getAlignment()); - - Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); - return Builder.CreateStore(ArgPtr, DestAddr); - } - SmallVector<Value*, 4> Ops; // Find out if any arguments are required to be integer constant expressions. @@ -7228,39 +7336,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, AVX512PF, AVX512VBMI, AVX512IFMA, + AVX5124VNNIW, // TODO implement this fully + AVX5124FMAPS, // TODO implement this fully + AVX512VPOPCNTDQ, MAX }; - X86Features Feature = StringSwitch<X86Features>(FeatureStr) - .Case("cmov", X86Features::CMOV) - .Case("mmx", X86Features::MMX) - .Case("popcnt", X86Features::POPCNT) - .Case("sse", X86Features::SSE) - .Case("sse2", X86Features::SSE2) - .Case("sse3", X86Features::SSE3) - .Case("ssse3", X86Features::SSSE3) - .Case("sse4.1", X86Features::SSE4_1) - .Case("sse4.2", X86Features::SSE4_2) - .Case("avx", X86Features::AVX) - .Case("avx2", X86Features::AVX2) - .Case("sse4a", X86Features::SSE4_A) - .Case("fma4", X86Features::FMA4) - .Case("xop", X86Features::XOP) - .Case("fma", X86Features::FMA) - .Case("avx512f", X86Features::AVX512F) - .Case("bmi", X86Features::BMI) - .Case("bmi2", X86Features::BMI2) - .Case("aes", X86Features::AES) - .Case("pclmul", X86Features::PCLMUL) - .Case("avx512vl", X86Features::AVX512VL) - .Case("avx512bw", X86Features::AVX512BW) - .Case("avx512dq", X86Features::AVX512DQ) - .Case("avx512cd", X86Features::AVX512CD) - .Case("avx512er", X86Features::AVX512ER) - .Case("avx512pf", X86Features::AVX512PF) - .Case("avx512vbmi", X86Features::AVX512VBMI) - .Case("avx512ifma", X86Features::AVX512IFMA) - .Default(X86Features::MAX); + X86Features Feature = + StringSwitch<X86Features>(FeatureStr) + .Case("cmov", X86Features::CMOV) + .Case("mmx", X86Features::MMX) + .Case("popcnt", X86Features::POPCNT) + .Case("sse", X86Features::SSE) + .Case("sse2", X86Features::SSE2) + .Case("sse3", X86Features::SSE3) + .Case("ssse3", X86Features::SSSE3) + .Case("sse4.1", X86Features::SSE4_1) + .Case("sse4.2", X86Features::SSE4_2) + .Case("avx", X86Features::AVX) + .Case("avx2", X86Features::AVX2) + .Case("sse4a", X86Features::SSE4_A) + .Case("fma4", X86Features::FMA4) + .Case("xop", X86Features::XOP) + .Case("fma", X86Features::FMA) + .Case("avx512f", X86Features::AVX512F) + .Case("bmi", X86Features::BMI) + .Case("bmi2", X86Features::BMI2) + .Case("aes", X86Features::AES) + .Case("pclmul", X86Features::PCLMUL) + .Case("avx512vl", X86Features::AVX512VL) + .Case("avx512bw", X86Features::AVX512BW) + .Case("avx512dq", X86Features::AVX512DQ) + .Case("avx512cd", X86Features::AVX512CD) + .Case("avx512er", X86Features::AVX512ER) + .Case("avx512pf", X86Features::AVX512PF) + .Case("avx512vbmi", X86Features::AVX512VBMI) + .Case("avx512ifma", X86Features::AVX512IFMA) + .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) + .Default(X86Features::MAX); assert(Feature != X86Features::MAX && "Invalid feature!"); // Matching the struct layout from the compiler-rt/libgcc structure that is @@ -7269,8 +7382,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // unsigned int __cpu_type; // unsigned int __cpu_subtype; // unsigned int __cpu_features[1]; - llvm::Type *STy = llvm::StructType::get( - Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr); + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); // Grab the global __cpu_model. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); @@ -7321,7 +7434,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: - return UndefValue::get(ConvertType(E->getType())); + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return llvm::Constant::getNullValue(ConvertType(E->getType())); case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: case X86::BI__builtin_ia32_vec_init_v2si: @@ -7408,6 +7526,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + case X86::BI__builtin_ia32_vpopcntd_512: + case X86::BI__builtin_ia32_vpopcntq_512: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, Ops); + } + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: + return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: @@ -7788,6 +7926,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } // We can't handle 8-31 immediates with native IR, use the intrinsic. + // Except for predicates that create constants. Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); @@ -7795,12 +7934,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, ID = Intrinsic::x86_sse_cmp_ps; break; case X86::BI__builtin_ia32_cmpps256: + // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector + // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... + if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { + Value *Constant = (CC == 0xf || CC == 0x1f) ? + llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : + llvm::Constant::getNullValue(Builder.getInt32Ty()); + Value *Vec = Builder.CreateVectorSplat( + Ops[0]->getType()->getVectorNumElements(), Constant); + return Builder.CreateBitCast(Vec, Ops[0]->getType()); + } ID = Intrinsic::x86_avx_cmp_ps_256; break; case X86::BI__builtin_ia32_cmppd: ID = Intrinsic::x86_sse2_cmp_pd; break; case X86::BI__builtin_ia32_cmppd256: + // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector + // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... + if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { + Value *Constant = (CC == 0xf || CC == 0x1f) ? + llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : + llvm::Constant::getNullValue(Builder.getInt64Ty()); + Value *Vec = Builder.CreateVectorSplat( + Ops[0]->getType()->getVectorNumElements(), Constant); + return Builder.CreateBitCast(Vec, Ops[0]->getType()); + } ID = Intrinsic::x86_avx_cmp_pd_256; break; } @@ -7881,13 +8040,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__faststorefence: { return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, - llvm::CrossThread); + llvm::SyncScope::System); } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier: case X86::BI_WriteBarrier: { return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, - llvm::SingleThread); + llvm::SyncScope::SingleThread); } case X86::BI_BitScanForward: case X86::BI_BitScanForward64: @@ -7922,6 +8081,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // instruction, but it will create a memset that won't be optimized away. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); } + case X86::BI__ud2: + // llvm.trap makes a ud2a instruction on x86. + return EmitTrapCall(Intrinsic::trap); + case X86::BI__int2c: { + // This syscall signals a driver assertion failure in x86 NT kernels. + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + CallSite CS = Builder.CreateCall(IA); + CS.setAttributes(NoReturnAttr); + return CS.getInstruction(); + } + case X86::BI__readfsbyte: + case X86::BI__readfsword: + case X86::BI__readfsdword: + case X86::BI__readfsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 257)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } + case X86::BI__readgsbyte: + case X86::BI__readgsword: + case X86::BI__readgsdword: + case X86::BI__readgsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 256)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } } } @@ -8279,6 +8477,80 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops); } } + + case PPC::BI__builtin_vsx_xxpermdi: { + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && "Third arg must be constant integer!"); + + unsigned Index = ArgCI->getZExtValue(); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + // Element zero comes from the first input vector and element one comes from + // the second. The element indices within each vector are numbered in big + // endian order so the shuffle mask must be adjusted for this on little + // endian platforms (i.e. index is complemented and source vector reversed). + unsigned ElemIdx0; + unsigned ElemIdx1; + if (getTarget().isLittleEndian()) { + ElemIdx0 = (~Index & 1) + 2; + ElemIdx1 = (~Index & 2) >> 1; + } else { // BigEndian + ElemIdx0 = (Index & 2) >> 1; + ElemIdx1 = 2 + (Index & 1); + } + + Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1)}; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } + + case PPC::BI__builtin_vsx_xxsldwi: { + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && "Third argument must be a compile time constant"); + unsigned Index = ArgCI->getZExtValue() & 0x3; + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); + + // Create a shuffle mask + unsigned ElemIdx0; + unsigned ElemIdx1; + unsigned ElemIdx2; + unsigned ElemIdx3; + if (getTarget().isLittleEndian()) { + // Little endian element N comes from element 8+N-Index of the + // concatenated wide vector (of course, using modulo arithmetic on + // the total number of elements). + ElemIdx0 = (8 - Index) % 8; + ElemIdx1 = (9 - Index) % 8; + ElemIdx2 = (10 - Index) % 8; + ElemIdx3 = (11 - Index) % 8; + } else { + // Big endian ElemIdx<N> = Index + N + ElemIdx0 = Index; + ElemIdx1 = Index + 1; + ElemIdx2 = Index + 2; + ElemIdx3 = Index + 3; + } + + Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1), + ConstantInt::get(Int32Ty, ElemIdx2), + ConstantInt::get(Int32Ty, ElemIdx3)}; + + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } } } @@ -8326,6 +8598,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp: { + llvm::SmallVector<llvm::Value *, 5> Args; + for (unsigned I = 0; I != 5; ++I) + Args.push_back(EmitScalarExpr(E->getArg(I))); + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, + Args[0]->getType()); + return Builder.CreateCall(F, Args); + } case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: @@ -8391,7 +8671,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_classf: case AMDGPU::BI__builtin_amdgcn_classh: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); - + case AMDGPU::BI__builtin_amdgcn_fmed3f: + case AMDGPU::BI__builtin_amdgcn_fmed3h: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); @@ -8510,12 +8792,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Undef}); } + case SystemZ::BI__builtin_s390_vfsqsb: case SystemZ::BI__builtin_s390_vfsqdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vfmasb: case SystemZ::BI__builtin_s390_vfmadb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8524,6 +8808,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Z}); } + case SystemZ::BI__builtin_s390_vfmssb: case SystemZ::BI__builtin_s390_vfmsdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8533,12 +8818,35 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); } + case SystemZ::BI__builtin_s390_vfnmasb: + case SystemZ::BI__builtin_s390_vfnmadb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); + } + case SystemZ::BI__builtin_s390_vfnmssb: + case SystemZ::BI__builtin_s390_vfnmsdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); + } + case SystemZ::BI__builtin_s390_vflpsb: case SystemZ::BI__builtin_s390_vflpdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vflnsb: case SystemZ::BI__builtin_s390_vflndb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8546,6 +8854,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); } + case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8555,8 +8864,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); (void)IsConstM4; (void)IsConstM5; - // Check whether this instance of vfidb can be represented via a LLVM - // standard intrinsic. We only support some combinations of M4 and M5. + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some combinations of M4 and M5. Intrinsic::ID ID = Intrinsic::not_intrinsic; switch (M4.getZExtValue()) { default: break; @@ -8581,11 +8890,76 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } - Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; + case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); return Builder.CreateCall(F, {X, M4Value, M5Value}); } + case SystemZ::BI__builtin_s390_vfmaxsb: + case SystemZ::BI__builtin_s390_vfmaxdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::maxnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; + case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } + case SystemZ::BI__builtin_s390_vfminsb: + case SystemZ::BI__builtin_s390_vfmindb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::minnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; + case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } // Vector intrisincs that output the post-instruction CC value. @@ -8652,10 +9026,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, INTRINSIC_WITH_CC(s390_vstrczhs); INTRINSIC_WITH_CC(s390_vstrczfs); + INTRINSIC_WITH_CC(s390_vfcesbs); INTRINSIC_WITH_CC(s390_vfcedbs); + INTRINSIC_WITH_CC(s390_vfchsbs); INTRINSIC_WITH_CC(s390_vfchdbs); + INTRINSIC_WITH_CC(s390_vfchesbs); INTRINSIC_WITH_CC(s390_vfchedbs); + INTRINSIC_WITH_CC(s390_vftcisb); INTRINSIC_WITH_CC(s390_vftcidb); #undef INTRINSIC_WITH_CC @@ -8669,9 +9047,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); - AlignmentSource AlignSource; clang::CharUnits Align = - getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); + getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); return Builder.CreateCall( CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), Ptr->getType()}), @@ -8923,6 +9300,16 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); return Builder.CreateCall(Callee, X); } + case WebAssembly::BI__builtin_wasm_throw: { + Value *Tag = EmitScalarExpr(E->getArg(0)); + Value *Obj = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); + return Builder.CreateCall(Callee, {Tag, Obj}); + } + case WebAssembly::BI__builtin_wasm_rethrow: { + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); + return Builder.CreateCall(Callee); + } default: return nullptr; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp index 83febcb..d24ef0a 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp @@ -15,7 +15,7 @@ #include "CGCUDARuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -265,7 +265,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { "__cudaRegisterFatBinary"); // struct { int magic, int version, void * gpu_binary, void * dont_care }; llvm::StructType *FatbinWrapperTy = - llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy, nullptr); + llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy); llvm::Function *ModuleCtorFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrTy, false), diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp index 59010f4..0f3141a 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp @@ -256,7 +256,7 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor( return GetOrCreateLLVMFunction( getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer, - /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeSet(), IsForDefinition); + /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); } static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp index df75a7d..0332586 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp @@ -30,38 +30,9 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) { } bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const { - // If RD has a non-trivial move or copy constructor, we cannot copy the - // argument. - if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor()) - return false; - - // If RD has a non-trivial destructor, we cannot copy the argument. - if (RD->hasNonTrivialDestructor()) - return false; - // We can only copy the argument if there exists at least one trivial, // non-deleted copy or move constructor. - // FIXME: This assumes that all lazily declared copy and move constructors are - // not deleted. This assumption might not be true in some corner cases. - bool CopyDeleted = false; - bool MoveDeleted = false; - for (const CXXConstructorDecl *CD : RD->ctors()) { - if (CD->isCopyConstructor() || CD->isMoveConstructor()) { - assert(CD->isTrivial()); - // We had at least one undeleted trivial copy or move ctor. Return - // directly. - if (!CD->isDeleted()) - return true; - if (CD->isCopyConstructor()) - CopyDeleted = true; - else - MoveDeleted = true; - } - } - - // If all trivial copy and move constructors are deleted, we cannot copy the - // argument. - return !(CopyDeleted && MoveDeleted); + return RD->canPassInRegisters(); } llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) { @@ -159,10 +130,10 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) { // FIXME: I'm not entirely sure I like using a fake decl just for code // generation. Maybe we can come up with a better way? - ImplicitParamDecl *ThisDecl - = ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(), - &CGM.getContext().Idents.get("this"), - MD->getThisType(CGM.getContext())); + auto *ThisDecl = ImplicitParamDecl::Create( + CGM.getContext(), nullptr, MD->getLocation(), + &CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()), + ImplicitParamDecl::CXXThis); params.push_back(ThisDecl); CGF.CXXABIThisDecl = ThisDecl; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h index d53fd4c..7b912e3 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h @@ -291,11 +291,26 @@ public: /// Emit constructor variants required by this ABI. virtual void EmitCXXConstructors(const CXXConstructorDecl *D) = 0; + /// Notes how many arguments were added to the beginning (Prefix) and ending + /// (Suffix) of an arg list. + /// + /// Note that Prefix actually refers to the number of args *after* the first + /// one: `this` arguments always come first. + struct AddedStructorArgs { + unsigned Prefix = 0; + unsigned Suffix = 0; + AddedStructorArgs() = default; + AddedStructorArgs(unsigned P, unsigned S) : Prefix(P), Suffix(S) {} + static AddedStructorArgs prefix(unsigned N) { return {N, 0}; } + static AddedStructorArgs suffix(unsigned N) { return {0, N}; } + }; + /// Build the signature of the given constructor or destructor variant by /// adding any required parameters. For convenience, ArgTys has been /// initialized with the type of 'this'. - virtual void buildStructorSignature(const CXXMethodDecl *MD, StructorType T, - SmallVectorImpl<CanQualType> &ArgTys) = 0; + virtual AddedStructorArgs + buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + SmallVectorImpl<CanQualType> &ArgTys) = 0; /// Returns true if the given destructor type should be emitted as a linkonce /// delegating thunk, regardless of whether the dtor is defined in this TU or @@ -355,9 +370,9 @@ public: /// Add any ABI-specific implicit arguments needed to call a constructor. /// - /// \return The number of args added to the call, which is typically zero or - /// one. - virtual unsigned + /// \return The number of arguments added at the beginning and end of the + /// call, which is typically zero or one. + virtual AddedStructorArgs addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, CallArgList &Args) = 0; @@ -377,7 +392,7 @@ public: isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) = 0; - /// Checks if ABI requires to initilize vptrs for given dynamic class. + /// Checks if ABI requires to initialize vptrs for given dynamic class. virtual bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) = 0; /// Get the address point of the vtable for the given base subobject. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp index c7c61e0..316bf44 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp @@ -50,7 +50,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86FastCall: return llvm::CallingConv::X86_FastCall; case CC_X86RegCall: return llvm::CallingConv::X86_RegCall; case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall; - case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64; + case CC_Win64: return llvm::CallingConv::Win64; case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV; case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS; case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; @@ -101,39 +101,64 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) { FTNP->getExtInfo(), {}, RequiredArgs(0)); } -/// Adds the formal paramaters in FPT to the given prefix. If any parameter in +static void addExtParameterInfosForCall( + llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, + const FunctionProtoType *proto, + unsigned prefixArgs, + unsigned totalArgs) { + assert(proto->hasExtParameterInfos()); + assert(paramInfos.size() <= prefixArgs); + assert(proto->getNumParams() + prefixArgs <= totalArgs); + + paramInfos.reserve(totalArgs); + + // Add default infos for any prefix args that don't already have infos. + paramInfos.resize(prefixArgs); + + // Add infos for the prototype. + for (const auto &ParamInfo : proto->getExtParameterInfos()) { + paramInfos.push_back(ParamInfo); + // pass_object_size params have no parameter info. + if (ParamInfo.hasPassObjectSize()) + paramInfos.emplace_back(); + } + + assert(paramInfos.size() <= totalArgs && + "Did we forget to insert pass_object_size args?"); + // Add default infos for the variadic and/or suffix arguments. + paramInfos.resize(totalArgs); +} + +/// Adds the formal parameters in FPT to the given prefix. If any parameter in /// FPT has pass_object_size attrs, then we'll add parameters for those, too. static void appendParameterTypes(const CodeGenTypes &CGT, SmallVectorImpl<CanQualType> &prefix, SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, - CanQual<FunctionProtoType> FPT, - const FunctionDecl *FD) { - // Fill out paramInfos. - if (FPT->hasExtParameterInfos() || !paramInfos.empty()) { - assert(paramInfos.size() <= prefix.size()); - auto protoParamInfos = FPT->getExtParameterInfos(); - paramInfos.reserve(prefix.size() + protoParamInfos.size()); - paramInfos.resize(prefix.size()); - paramInfos.append(protoParamInfos.begin(), protoParamInfos.end()); - } - - // Fast path: unknown target. - if (FD == nullptr) { + CanQual<FunctionProtoType> FPT) { + // Fast path: don't touch param info if we don't need to. + if (!FPT->hasExtParameterInfos()) { + assert(paramInfos.empty() && + "We have paramInfos, but the prototype doesn't?"); prefix.append(FPT->param_type_begin(), FPT->param_type_end()); return; } - // In the vast majority cases, we'll have precisely FPT->getNumParams() + unsigned PrefixSize = prefix.size(); + // In the vast majority of cases, we'll have precisely FPT->getNumParams() // parameters; the only thing that can change this is the presence of // pass_object_size. So, we preallocate for the common case. prefix.reserve(prefix.size() + FPT->getNumParams()); - assert(FD->getNumParams() == FPT->getNumParams()); + auto ExtInfos = FPT->getExtParameterInfos(); + assert(ExtInfos.size() == FPT->getNumParams()); for (unsigned I = 0, E = FPT->getNumParams(); I != E; ++I) { prefix.push_back(FPT->getParamType(I)); - if (FD->getParamDecl(I)->hasAttr<PassObjectSizeAttr>()) + if (ExtInfos[I].hasPassObjectSize()) prefix.push_back(CGT.getContext().getSizeType()); } + + addExtParameterInfosForCall(paramInfos, FPT.getTypePtr(), PrefixSize, + prefix.size()); } /// Arrange the LLVM function layout for a value of the given function @@ -147,7 +172,7 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, RequiredArgs Required = RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD); // FIXME: Kill copy. - appendParameterTypes(CGT, prefix, paramInfos, FTP, FD); + appendParameterTypes(CGT, prefix, paramInfos, FTP); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod, @@ -193,7 +218,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { return CC_IntelOclBicc; if (D->hasAttr<MSABIAttr>()) - return IsWindows ? CC_C : CC_X86_64Win64; + return IsWindows ? CC_C : CC_Win64; if (D->hasAttr<SysVABIAttr>()) return IsWindows ? CC_X86_64SysV : CC_C; @@ -286,9 +311,19 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, // Add the formal parameters. if (PassParams) - appendParameterTypes(*this, argTypes, paramInfos, FTP, MD); - - TheCXXABI.buildStructorSignature(MD, Type, argTypes); + appendParameterTypes(*this, argTypes, paramInfos, FTP); + + CGCXXABI::AddedStructorArgs AddedArgs = + TheCXXABI.buildStructorSignature(MD, Type, argTypes); + if (!paramInfos.empty()) { + // Note: prefix implies after the first param. + if (AddedArgs.Prefix) + paramInfos.insert(paramInfos.begin() + 1, AddedArgs.Prefix, + FunctionProtoType::ExtParameterInfo{}); + if (AddedArgs.Suffix) + paramInfos.append(AddedArgs.Suffix, + FunctionProtoType::ExtParameterInfo{}); + } RequiredArgs required = (PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size()) @@ -321,26 +356,6 @@ getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) { return argTypes; } -static void addExtParameterInfosForCall( - llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, - const FunctionProtoType *proto, - unsigned prefixArgs, - unsigned totalArgs) { - assert(proto->hasExtParameterInfos()); - assert(paramInfos.size() <= prefixArgs); - assert(proto->getNumParams() + prefixArgs <= totalArgs); - - // Add default infos for any prefix args that don't already have infos. - paramInfos.resize(prefixArgs); - - // Add infos for the prototype. - auto protoInfos = proto->getExtParameterInfos(); - paramInfos.append(protoInfos.begin(), protoInfos.end()); - - // Add default infos for the variadic arguments. - paramInfos.resize(totalArgs); -} - static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> getExtParameterInfosForCall(const FunctionProtoType *proto, unsigned prefixArgs, unsigned totalArgs) { @@ -352,18 +367,31 @@ getExtParameterInfosForCall(const FunctionProtoType *proto, } /// Arrange a call to a C++ method, passing the given arguments. +/// +/// ExtraPrefixArgs is the number of ABI-specific args passed after the `this` +/// parameter. +/// ExtraSuffixArgs is the number of ABI-specific args passed at the end of +/// args. +/// PassProtoArgs indicates whether `args` has args for the parameters in the +/// given CXXConstructorDecl. const CGFunctionInfo & CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, const CXXConstructorDecl *D, CXXCtorType CtorKind, - unsigned ExtraArgs) { + unsigned ExtraPrefixArgs, + unsigned ExtraSuffixArgs, + bool PassProtoArgs) { // FIXME: Kill copy. SmallVector<CanQualType, 16> ArgTypes; for (const auto &Arg : args) ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); + // +1 for implicit this, which should always be args[0]. + unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs; + CanQual<FunctionProtoType> FPT = GetFormalType(D); - RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D); + RequiredArgs Required = + RequiredArgs::forPrototypePlus(FPT, TotalPrefixArgs + ExtraSuffixArgs, D); GlobalDecl GD(D, CtorKind); CanQualType ResultType = TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() @@ -372,8 +400,14 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, : Context.VoidTy; FunctionType::ExtInfo Info = FPT->getExtInfo(); - auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs, - ArgTypes.size()); + llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> ParamInfos; + // If the prototype args are elided, we should only have ABI-specific args, + // which never have param info. + if (PassProtoArgs && FPT->hasExtParameterInfos()) { + // ABI-specific suffix arguments are treated the same as variadic arguments. + addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs, + ArgTypes.size()); + } return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true, /*chainCall=*/false, ArgTypes, Info, ParamInfos, Required); @@ -617,15 +651,20 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType, } /// Arrange a call to a C++ method, passing the given arguments. +/// +/// numPrefixArgs is the number of ABI-specific prefix arguments we have. It +/// does not count `this`. const CGFunctionInfo & CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, const FunctionProtoType *proto, - RequiredArgs required) { - unsigned numRequiredArgs = - (proto->isVariadic() ? required.getNumRequiredArgs() : args.size()); - unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams(); + RequiredArgs required, + unsigned numPrefixArgs) { + assert(numPrefixArgs + 1 <= args.size() && + "Emitting a call with less args than the required prefix?"); + // Add one to account for `this`. It's a bit awkward here, but we don't count + // `this` in similar places elsewhere. auto paramInfos = - getExtParameterInfosForCall(proto, numPrefixArgs, args.size()); + getExtParameterInfosForCall(proto, numPrefixArgs + 1, args.size()); // FIXME: Kill copy. auto argTypes = getArgTypesForCall(Context, args); @@ -668,6 +707,12 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature, signature.getRequiredArgs()); } +namespace clang { +namespace CodeGen { +void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI); +} +} + /// Arrange the argument and result information for an abstract value /// of a given function type. This is the method which all of the /// above functions ultimately defer to. @@ -680,7 +725,7 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs required) { assert(std::all_of(argTypes.begin(), argTypes.end(), - std::mem_fun_ref(&CanQualType::isCanonicalAsParam))); + [](CanQualType T) { return T.isCanonicalAsParam(); })); // Lookup or create unique function info. llvm::FoldingSetNodeID ID; @@ -702,12 +747,16 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, bool inserted = FunctionsBeingProcessed.insert(FI).second; (void)inserted; assert(inserted && "Recursively being processed?"); - + // Compute ABI information. - if (info.getCC() != CC_Swift) { - getABIInfo().computeInfo(*FI); - } else { + if (CC == llvm::CallingConv::SPIR_KERNEL) { + // Force target independent argument handling for the host visible + // kernel functions. + computeSPIRKernelABIInfo(CGM, *FI); + } else if (info.getCC() == CC_Swift) { swiftcall::computeABIInfo(CGM, *FI); + } else { + getABIInfo().computeInfo(*FI); } // Loop over all of the computed argument and return value info. If any of @@ -749,6 +798,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->ChainCall = chainCall; FI->NoReturn = info.getNoReturn(); FI->ReturnsRetained = info.getProducesResult(); + FI->NoCallerSavedRegs = info.getNoCallerSavedRegs(); FI->Required = required; FI->HasRegParm = info.getHasRegParm(); FI->RegParm = info.getRegParm(); @@ -1247,7 +1297,7 @@ static void CreateCoercedStore(llvm::Value *Src, // If store is legal, just bitcast the src pointer. if (SrcSize <= DstSize) { - Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy)); + Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy); BuildAggStore(CGF, Src, Dst, DstIsVolatile); } else { // Otherwise do coercion through memory. This is stupid, but @@ -1547,9 +1597,10 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { case ABIArgInfo::Indirect: { assert(NumIRArgs == 1); - // indirect arguments are always on the stack, which is addr space #0. + // indirect arguments are always on the stack, which is alloca addr space. llvm::Type *LTy = ConvertTypeForMem(it->type); - ArgTypes[FirstIRArg] = LTy->getPointerTo(); + ArgTypes[FirstIRArg] = LTy->getPointerTo( + CGM.getDataLayout().getAllocaAddrSpace()); break; } @@ -1620,15 +1671,111 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } +void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. + if (!HasOptnone) { + if (CodeGenOpts.OptimizeSize) + FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize); + if (CodeGenOpts.OptimizeSize == 2) + FuncAttrs.addAttribute(llvm::Attribute::MinSize); + } + + if (CodeGenOpts.DisableRedZone) + FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); + if (CodeGenOpts.NoImplicitFloat) + FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); + + if (AttrOnCallSite) { + // Attributes that should go on the call site only. + if (!CodeGenOpts.SimplifyLibCalls || + CodeGenOpts.isNoBuiltinFunc(Name.data())) + FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin); + if (!CodeGenOpts.TrapFuncName.empty()) + FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); + } else { + // Attributes that should go on the function, but not the call site. + if (!CodeGenOpts.DisableFPElim) { + FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); + } else if (CodeGenOpts.OmitLeafFramePointer) { + FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); + FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + } else { + FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); + FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + } + + FuncAttrs.addAttribute("less-precise-fpmad", + llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); + + if (!CodeGenOpts.FPDenormalMode.empty()) + FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode); + + FuncAttrs.addAttribute("no-trapping-math", + llvm::toStringRef(CodeGenOpts.NoTrappingMath)); + + // TODO: Are these all needed? + // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. + FuncAttrs.addAttribute("no-infs-fp-math", + llvm::toStringRef(CodeGenOpts.NoInfsFPMath)); + FuncAttrs.addAttribute("no-nans-fp-math", + llvm::toStringRef(CodeGenOpts.NoNaNsFPMath)); + FuncAttrs.addAttribute("unsafe-fp-math", + llvm::toStringRef(CodeGenOpts.UnsafeFPMath)); + FuncAttrs.addAttribute("use-soft-float", + llvm::toStringRef(CodeGenOpts.SoftFloat)); + FuncAttrs.addAttribute("stack-protector-buffer-size", + llvm::utostr(CodeGenOpts.SSPBufferSize)); + FuncAttrs.addAttribute("no-signed-zeros-fp-math", + llvm::toStringRef(CodeGenOpts.NoSignedZeros)); + FuncAttrs.addAttribute( + "correctly-rounded-divide-sqrt-fp-math", + llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); + + // TODO: Reciprocal estimate codegen options should apply to instructions? + std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals; + if (!Recips.empty()) + FuncAttrs.addAttribute("reciprocal-estimates", + llvm::join(Recips.begin(), Recips.end(), ",")); + + if (CodeGenOpts.StackRealignment) + FuncAttrs.addAttribute("stackrealign"); + if (CodeGenOpts.Backchain) + FuncAttrs.addAttribute("backchain"); + } + + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions and calls in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as + // __syncthreads(), and so can't have certain optimizations applied around + // them). LLVM will remove this attribute where it safely can. + FuncAttrs.addAttribute(llvm::Attribute::Convergent); + + // Exceptions aren't supported in CUDA device code. + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + + // Respect -fcuda-flush-denormals-to-zero. + if (getLangOpts().CUDADeviceFlushDenormalsToZero) + FuncAttrs.addAttribute("nvptx-f32ftz", "true"); + } +} + +void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { + llvm::AttrBuilder FuncAttrs; + ConstructDefaultFnAttrList(F.getName(), + F.hasFnAttribute(llvm::Attribute::OptimizeNone), + /* AttrOnCallsite = */ false, FuncAttrs); + F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); +} + void CodeGenModule::ConstructAttributeList( StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo, - AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) { + llvm::AttributeList &AttrList, unsigned &CallingConv, bool AttrOnCallSite) { llvm::AttrBuilder FuncAttrs; llvm::AttrBuilder RetAttrs; - bool HasOptnone = false; CallingConv = FI.getEffectiveCallingConvention(); - if (FI.isNoReturn()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); @@ -1639,7 +1786,7 @@ void CodeGenModule::ConstructAttributeList( const Decl *TargetDecl = CalleeInfo.getCalleeDecl(); - bool HasAnyX86InterruptAttr = false; + bool HasOptnone = false; // FIXME: handle sseregparm someday... if (TargetDecl) { if (TargetDecl->hasAttr<ReturnsTwiceAttr>()) @@ -1648,6 +1795,8 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); if (TargetDecl->hasAttr<NoReturnAttr>()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + if (TargetDecl->hasAttr<ColdAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::Cold); if (TargetDecl->hasAttr<NoDuplicateAttr>()) FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr<ConvergentAttr>()) @@ -1678,8 +1827,9 @@ void CodeGenModule::ConstructAttributeList( RetAttrs.addAttribute(llvm::Attribute::NoAlias); if (TargetDecl->hasAttr<ReturnsNonNullAttr>()) RetAttrs.addAttribute(llvm::Attribute::NonNull); + if (TargetDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>()) + FuncAttrs.addAttribute("no_caller_saved_registers"); - HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>(); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) { Optional<unsigned> NumElemsParam; @@ -1691,86 +1841,19 @@ void CodeGenModule::ConstructAttributeList( } } - // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. - if (!HasOptnone) { - if (CodeGenOpts.OptimizeSize) - FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize); - if (CodeGenOpts.OptimizeSize == 2) - FuncAttrs.addAttribute(llvm::Attribute::MinSize); - } + ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs); - if (CodeGenOpts.DisableRedZone) - FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); - if (CodeGenOpts.NoImplicitFloat) - FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); if (CodeGenOpts.EnableSegmentedStacks && !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>())) FuncAttrs.addAttribute("split-stack"); - if (AttrOnCallSite) { - // Attributes that should go on the call site only. - if (!CodeGenOpts.SimplifyLibCalls || - CodeGenOpts.isNoBuiltinFunc(Name.data())) - FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin); - if (!CodeGenOpts.TrapFuncName.empty()) - FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); - } else { - // Attributes that should go on the function, but not the call site. - if (!CodeGenOpts.DisableFPElim) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - } else if (CodeGenOpts.OmitLeafFramePointer) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } else { - FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } - + if (!AttrOnCallSite) { bool DisableTailCalls = - CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr || - (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>()); - FuncAttrs.addAttribute( - "disable-tail-calls", - llvm::toStringRef(DisableTailCalls)); - - FuncAttrs.addAttribute("less-precise-fpmad", - llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); - - if (!CodeGenOpts.FPDenormalMode.empty()) - FuncAttrs.addAttribute("denormal-fp-math", - CodeGenOpts.FPDenormalMode); - - FuncAttrs.addAttribute("no-trapping-math", - llvm::toStringRef(CodeGenOpts.NoTrappingMath)); - - // TODO: Are these all needed? - // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. - FuncAttrs.addAttribute("no-infs-fp-math", - llvm::toStringRef(CodeGenOpts.NoInfsFPMath)); - FuncAttrs.addAttribute("no-nans-fp-math", - llvm::toStringRef(CodeGenOpts.NoNaNsFPMath)); - FuncAttrs.addAttribute("unsafe-fp-math", - llvm::toStringRef(CodeGenOpts.UnsafeFPMath)); - FuncAttrs.addAttribute("use-soft-float", - llvm::toStringRef(CodeGenOpts.SoftFloat)); - FuncAttrs.addAttribute("stack-protector-buffer-size", - llvm::utostr(CodeGenOpts.SSPBufferSize)); - FuncAttrs.addAttribute("no-signed-zeros-fp-math", - llvm::toStringRef(CodeGenOpts.NoSignedZeros)); - FuncAttrs.addAttribute( - "correctly-rounded-divide-sqrt-fp-math", - llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); - - // TODO: Reciprocal estimate codegen options should apply to instructions? - std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals; - if (!Recips.empty()) - FuncAttrs.addAttribute("reciprocal-estimates", - llvm::join(Recips.begin(), Recips.end(), ",")); - - if (CodeGenOpts.StackRealignment) - FuncAttrs.addAttribute("stackrealign"); - if (CodeGenOpts.Backchain) - FuncAttrs.addAttribute("backchain"); + CodeGenOpts.DisableTailCalls || + (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() || + TargetDecl->hasAttr<AnyX86InterruptAttr>())); + FuncAttrs.addAttribute("disable-tail-calls", + llvm::toStringRef(DisableTailCalls)); // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then @@ -1794,8 +1877,8 @@ void CodeGenModule::ConstructAttributeList( // the function. const auto *TD = FD->getAttr<TargetAttr>(); TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "") + TargetCPU = ParsedAttr.Architecture; if (TargetCPU != "") FuncAttrs.addAttribute("target-cpu", TargetCPU); if (!Features.empty()) { @@ -1819,21 +1902,6 @@ void CodeGenModule::ConstructAttributeList( } } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Conservatively, mark all functions and calls in CUDA as convergent - // (meaning, they may call an intrinsically convergent op, such as - // __syncthreads(), and so can't have certain optimizations applied around - // them). LLVM will remove this attribute where it safely can. - FuncAttrs.addAttribute(llvm::Attribute::Convergent); - - // Exceptions aren't supported in CUDA device code. - FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); - - // Respect -fcuda-flush-denormals-to-zero. - if (getLangOpts().CUDADeviceFlushDenormalsToZero) - FuncAttrs.addAttribute("nvptx-f32ftz", "true"); - } - ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); @@ -1876,13 +1944,8 @@ void CodeGenModule::ConstructAttributeList( RetAttrs.addAttribute(llvm::Attribute::NonNull); } - // Attach return attributes. - if (RetAttrs.hasAttributes()) { - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs)); - } - bool hasUsedSRet = false; + SmallVector<llvm::AttributeSet, 4> ArgAttrs(IRFunctionArgs.totalIRArgs()); // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { @@ -1891,16 +1954,16 @@ void CodeGenModule::ConstructAttributeList( hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), IRFunctionArgs.getSRetArgNo() + 1, SRETAttrs)); + ArgAttrs[IRFunctionArgs.getSRetArgNo()] = + llvm::AttributeSet::get(getLLVMContext(), SRETAttrs); } // Attach attributes to inalloca argument. if (IRFunctionArgs.hasInallocaArg()) { llvm::AttrBuilder Attrs; Attrs.addAttribute(llvm::Attribute::InAlloca); - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), IRFunctionArgs.getInallocaArgNo() + 1, Attrs)); + ArgAttrs[IRFunctionArgs.getInallocaArgNo()] = + llvm::AttributeSet::get(getLLVMContext(), Attrs); } unsigned ArgNo = 0; @@ -1913,10 +1976,12 @@ void CodeGenModule::ConstructAttributeList( // Add attribute for padding argument, if necessary. if (IRFunctionArgs.hasPaddingArg(ArgNo)) { - if (AI.getPaddingInReg()) - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), IRFunctionArgs.getPaddingArgNo(ArgNo) + 1, - llvm::Attribute::InReg)); + if (AI.getPaddingInReg()) { + ArgAttrs[IRFunctionArgs.getPaddingArgNo(ArgNo)] = + llvm::AttributeSet::get( + getLLVMContext(), + llvm::AttrBuilder().addAttribute(llvm::Attribute::InReg)); + } } // 'restrict' -> 'noalias' is done in EmitFunctionProlog when we @@ -2031,17 +2096,15 @@ void CodeGenModule::ConstructAttributeList( unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); for (unsigned i = 0; i < NumIRArgs; i++) - PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), - FirstIRArg + i + 1, Attrs)); + ArgAttrs[FirstIRArg + i] = + llvm::AttributeSet::get(getLLVMContext(), Attrs); } } assert(ArgNo == FI.arg_size()); - if (FuncAttrs.hasAttributes()) - PAL.push_back(llvm:: - AttributeSet::get(getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - FuncAttrs)); + AttrList = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeSet::get(getLLVMContext(), FuncAttrs), + llvm::AttributeSet::get(getLLVMContext(), RetAttrs), ArgAttrs); } /// An argument came in as a promoted argument; demote it back to its @@ -2152,8 +2215,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (IRFunctionArgs.hasSRetArg()) { auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]); AI->setName("agg.result"); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1, - llvm::Attribute::NoAlias)); + AI->addAttr(llvm::Attribute::NoAlias); } // Track if we received the parameter as a pointer (indirect, byval, or @@ -2244,9 +2306,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) { if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), PVD->getFunctionScopeIndex())) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::Attribute::NonNull); QualType OTy = PVD->getOriginalType(); if (const auto *ArrTy = @@ -2263,12 +2323,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::AttrBuilder Attrs; Attrs.addDereferenceableAttr( getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, Attrs)); + AI->addAttrs(Attrs); } else if (getContext().getTargetAddressSpace(ETy) == 0) { - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::Attribute::NonNull); } } } else if (const auto *ArrTy = @@ -2278,35 +2335,26 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // we know that it must be nonnull. if (ArrTy->getSizeModifier() == VariableArrayType::Static && !getContext().getTargetAddressSpace(ArrTy->getElementType())) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::Attribute::NonNull); } const auto *AVAttr = PVD->getAttr<AlignValueAttr>(); if (!AVAttr) if (const auto *TOTy = dyn_cast<TypedefType>(OTy)) AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>(); - if (AVAttr) { + if (AVAttr) { llvm::Value *AlignmentValue = EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue); - unsigned Alignment = - std::min((unsigned) AlignmentCI->getZExtValue(), - +llvm::Value::MaximumAlignment); - - llvm::AttrBuilder Attrs; - Attrs.addAlignmentAttr(Alignment); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, Attrs)); + unsigned Alignment = std::min((unsigned)AlignmentCI->getZExtValue(), + +llvm::Value::MaximumAlignment); + AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment)); } } if (Arg->getType().isRestrictQualified()) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NoAlias)); + AI->addAttr(llvm::Attribute::NoAlias); // LLVM expects swifterror parameters to be used in very restricted // ways. Copy the value into a less-restricted temporary. @@ -2364,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, Address AddrToStoreInto = Address::invalid(); if (SrcSize <= DstSize) { - AddrToStoreInto = - Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy)); + AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); } else { AddrToStoreInto = CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); @@ -2858,19 +2905,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, llvm::Instruction *Ret; if (RV) { - if (CurCodeDecl && SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) { - if (auto RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>()) { - SanitizerScope SanScope(this); - llvm::Value *Cond = Builder.CreateICmpNE( - RV, llvm::Constant::getNullValue(RV->getType())); - llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(EndLoc), - EmitCheckSourceLocation(RetNNAttr->getLocation()), - }; - EmitCheck(std::make_pair(Cond, SanitizerKind::ReturnsNonnullAttribute), - SanitizerHandler::NonnullReturn, StaticData, None); - } - } + EmitReturnValueCheck(RV); Ret = Builder.CreateRet(RV); } else { Ret = Builder.CreateRetVoid(); @@ -2880,6 +2915,65 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, Ret->setDebugLoc(std::move(RetDbgLoc)); } +void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) { + // A current decl may not be available when emitting vtable thunks. + if (!CurCodeDecl) + return; + + ReturnsNonNullAttr *RetNNAttr = nullptr; + if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) + RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>(); + + if (!RetNNAttr && !requiresReturnValueNullabilityCheck()) + return; + + // Prefer the returns_nonnull attribute if it's present. + SourceLocation AttrLoc; + SanitizerMask CheckKind; + SanitizerHandler Handler; + if (RetNNAttr) { + assert(!requiresReturnValueNullabilityCheck() && + "Cannot check nullability and the nonnull attribute"); + AttrLoc = RetNNAttr->getLocation(); + CheckKind = SanitizerKind::ReturnsNonnullAttribute; + Handler = SanitizerHandler::NonnullReturn; + } else { + if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl)) + if (auto *TSI = DD->getTypeSourceInfo()) + if (auto FTL = TSI->getTypeLoc().castAs<FunctionTypeLoc>()) + AttrLoc = FTL.getReturnLoc().findNullabilityLoc(); + CheckKind = SanitizerKind::NullabilityReturn; + Handler = SanitizerHandler::NullabilityReturn; + } + + SanitizerScope SanScope(this); + + // Make sure the "return" source location is valid. If we're checking a + // nullability annotation, make sure the preconditions for the check are met. + llvm::BasicBlock *Check = createBasicBlock("nullcheck"); + llvm::BasicBlock *NoCheck = createBasicBlock("no.nullcheck"); + llvm::Value *SLocPtr = Builder.CreateLoad(ReturnLocation, "return.sloc.load"); + llvm::Value *CanNullCheck = Builder.CreateIsNotNull(SLocPtr); + if (requiresReturnValueNullabilityCheck()) + CanNullCheck = + Builder.CreateAnd(CanNullCheck, RetValNullabilityPrecondition); + Builder.CreateCondBr(CanNullCheck, Check, NoCheck); + EmitBlock(Check); + + // Now do the null check. + llvm::Value *Cond = Builder.CreateIsNotNull(RV); + llvm::Constant *StaticData[] = {EmitCheckSourceLocation(AttrLoc)}; + llvm::Value *DynamicData[] = {SLocPtr}; + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, DynamicData); + + EmitBlock(NoCheck); + +#ifndef NDEBUG + // The return location should not be used after the check has been emitted. + ReturnLocation = Address::invalid(); +#endif +} + static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) { const CXXRecordDecl *RD = type->getAsCXXRecordDecl(); return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory; @@ -3188,50 +3282,63 @@ void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const { void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, - const FunctionDecl *FD, + AbstractCallee AC, unsigned ParmNum) { - if (!SanOpts.has(SanitizerKind::NonnullAttribute) || !FD) + if (!AC.getDecl() || !(SanOpts.has(SanitizerKind::NonnullAttribute) || + SanOpts.has(SanitizerKind::NullabilityArg))) return; - auto PVD = ParmNum < FD->getNumParams() ? FD->getParamDecl(ParmNum) : nullptr; + + // The param decl may be missing in a variadic function. + auto PVD = ParmNum < AC.getNumParams() ? AC.getParamDecl(ParmNum) : nullptr; unsigned ArgNo = PVD ? PVD->getFunctionScopeIndex() : ParmNum; - auto NNAttr = getNonNullAttr(FD, PVD, ArgType, ArgNo); - if (!NNAttr) + + // Prefer the nonnull attribute if it's present. + const NonNullAttr *NNAttr = nullptr; + if (SanOpts.has(SanitizerKind::NonnullAttribute)) + NNAttr = getNonNullAttr(AC.getDecl(), PVD, ArgType, ArgNo); + + bool CanCheckNullability = false; + if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD) { + auto Nullability = PVD->getType()->getNullability(getContext()); + CanCheckNullability = Nullability && + *Nullability == NullabilityKind::NonNull && + PVD->getTypeSourceInfo(); + } + + if (!NNAttr && !CanCheckNullability) return; + + SourceLocation AttrLoc; + SanitizerMask CheckKind; + SanitizerHandler Handler; + if (NNAttr) { + AttrLoc = NNAttr->getLocation(); + CheckKind = SanitizerKind::NonnullAttribute; + Handler = SanitizerHandler::NonnullArg; + } else { + AttrLoc = PVD->getTypeSourceInfo()->getTypeLoc().findNullabilityLoc(); + CheckKind = SanitizerKind::NullabilityArg; + Handler = SanitizerHandler::NullabilityArg; + } + SanitizerScope SanScope(this); assert(RV.isScalar()); llvm::Value *V = RV.getScalarVal(); llvm::Value *Cond = Builder.CreateICmpNE(V, llvm::Constant::getNullValue(V->getType())); llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(ArgLoc), - EmitCheckSourceLocation(NNAttr->getLocation()), + EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc), llvm::ConstantInt::get(Int32Ty, ArgNo + 1), }; - EmitCheck(std::make_pair(Cond, SanitizerKind::NonnullAttribute), - SanitizerHandler::NonnullArg, StaticData, None); + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None); } void CodeGenFunction::EmitCallArgs( CallArgList &Args, ArrayRef<QualType> ArgTypes, llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange, - const FunctionDecl *CalleeDecl, unsigned ParamsToSkip, - EvaluationOrder Order) { + AbstractCallee AC, unsigned ParamsToSkip, EvaluationOrder Order) { assert((int)ArgTypes.size() == (ArgRange.end() - ArgRange.begin())); - auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg) { - if (CalleeDecl == nullptr || I >= CalleeDecl->getNumParams()) - return; - auto *PS = CalleeDecl->getParamDecl(I)->getAttr<PassObjectSizeAttr>(); - if (PS == nullptr) - return; - - const auto &Context = getContext(); - auto SizeTy = Context.getSizeType(); - auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T); - Args.add(RValue::get(V), SizeTy); - }; - // We *have* to evaluate arguments from right to left in the MS C++ ABI, // because arguments are destroyed left to right in the callee. As a special // case, there are certain language constructs that require left-to-right @@ -3242,6 +3349,27 @@ void CodeGenFunction::EmitCallArgs( ? Order == EvaluationOrder::ForceLeftToRight : Order != EvaluationOrder::ForceRightToLeft; + auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg, + RValue EmittedArg) { + if (!AC.hasFunctionDecl() || I >= AC.getNumParams()) + return; + auto *PS = AC.getParamDecl(I)->getAttr<PassObjectSizeAttr>(); + if (PS == nullptr) + return; + + const auto &Context = getContext(); + auto SizeTy = Context.getSizeType(); + auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); + assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?"); + llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T, + EmittedArg.getScalarVal()); + Args.add(RValue::get(V), SizeTy); + // If we're emitting args in reverse, be sure to do so with + // pass_object_size, as well. + if (!LeftToRight) + std::swap(Args.back(), *(&Args.back() - 1)); + }; + // Insert a stack save if we're going to need any inalloca args. bool HasInAllocaArgs = false; if (CGM.getTarget().getCXXABI().isMicrosoft()) { @@ -3259,11 +3387,28 @@ void CodeGenFunction::EmitCallArgs( for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) { unsigned Idx = LeftToRight ? I : E - I - 1; CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx; - if (!LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg); + unsigned InitialArgSize = Args.size(); + // If *Arg is an ObjCIndirectCopyRestoreExpr, check that either the types of + // the argument and parameter match or the objc method is parameterized. + assert((!isa<ObjCIndirectCopyRestoreExpr>(*Arg) || + getContext().hasSameUnqualifiedType((*Arg)->getType(), + ArgTypes[Idx]) || + (isa<ObjCMethodDecl>(AC.getDecl()) && + isObjCMethodWithTypeParams(cast<ObjCMethodDecl>(AC.getDecl())))) && + "Argument and parameter types don't match"); EmitCallArg(Args, *Arg, ArgTypes[Idx]); - EmitNonNullArgCheck(Args.back().RV, ArgTypes[Idx], (*Arg)->getExprLoc(), - CalleeDecl, ParamsToSkip + Idx); - if (LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg); + // In particular, we depend on it being the last arg in Args, and the + // objectsize bits depend on there only being one arg if !LeftToRight. + assert(InitialArgSize + 1 == Args.size() && + "The code below depends on only adding one arg per EmitCallArg"); + (void)InitialArgSize; + RValue RVArg = Args.back().RV; + EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC, + ParamsToSkip + Idx); + // @llvm.objectsize should never have side-effects and shouldn't need + // destruction/cleanups, so we can safely "emit" it after its arg, + // regardless of right-to-leftness + MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg); } if (!LeftToRight) { @@ -3311,7 +3456,6 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, if (const ObjCIndirectCopyRestoreExpr *CRE = dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) { assert(getLangOpts().ObjCAutoRefCount); - assert(getContext().hasSameUnqualifiedType(E->getType(), type)); return emitWritebackArg(*this, args, CRE); } @@ -3571,12 +3715,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address ArgMemory = Address::invalid(); const llvm::StructLayout *ArgMemoryLayout = nullptr; if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { - ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct); + const llvm::DataLayout &DL = CGM.getDataLayout(); + ArgMemoryLayout = DL.getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); llvm::AllocaInst *AI; if (IP) { IP = IP->getNextNode(); - AI = new llvm::AllocaInst(ArgStruct, "argmem", IP); + AI = new llvm::AllocaInst(ArgStruct, DL.getAllocaAddrSpace(), + "argmem", IP); } else { AI = CreateTempAlloca(ArgStruct, "argmem"); } @@ -3675,7 +3821,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(NumIRArgs == 1); if (RV.isScalar() || RV.isComplex()) { // Make a temporary alloca to pass the argument. - Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "indirect-arg-temp", false); IRCallArgs[FirstIRArg] = Addr.getPointer(); LValue argLV = MakeAddrLValue(Addr, I->Ty); @@ -3704,7 +3851,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, < Align.getQuantity()) || (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "byval-temp", false); IRCallArgs[FirstIRArg] = AI.getPointer(); EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); } else { @@ -3972,13 +4120,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Compute the calling convention and attributes. unsigned CallingConv; - CodeGen::AttributeListType AttributeList; + llvm::AttributeList Attrs; CGM.ConstructAttributeList(CalleePtr->getName(), CallInfo, - Callee.getAbstractInfo(), - AttributeList, CallingConv, + Callee.getAbstractInfo(), Attrs, CallingConv, /*AttrOnCallSite=*/true); - llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(), - AttributeList); // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -3989,15 +4134,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, !(Callee.getAbstractInfo().getCalleeDecl() && Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) { Attrs = - Attrs.addAttribute(getLLVMContext(), - llvm::AttributeSet::FunctionIndex, + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); } // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex, + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoInline); } @@ -4014,7 +4158,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CannotThrow = true; } else { // Otherwise, nounwind call sites will never throw. - CannotThrow = Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex, + CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); } llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); @@ -4127,6 +4271,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Builder.CreateStore(elt, eltAddr); } // FALLTHROUGH + LLVM_FALLTHROUGH; } case ABIArgInfo::InAlloca: @@ -4210,6 +4355,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment); EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(), OffsetValue); + } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { + llvm::Value *ParamVal = + CallArgs[AA->getParamIndex() - 1].RV.getScalarVal(); + EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal); } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h index 031ce83..7e10407 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h @@ -25,10 +25,10 @@ #include "ABIInfo.h" namespace llvm { - class AttributeSet; - class Function; - class Type; - class Value; +class AttributeList; +class Function; +class Type; +class Value; } namespace clang { @@ -39,28 +39,27 @@ namespace clang { class VarDecl; namespace CodeGen { - typedef SmallVector<llvm::AttributeSet, 8> AttributeListType; - /// Abstract information about a function or function prototype. - class CGCalleeInfo { - /// \brief The function prototype of the callee. - const FunctionProtoType *CalleeProtoTy; - /// \brief The function declaration of the callee. - const Decl *CalleeDecl; - - public: - explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} - CGCalleeInfo(const Decl *calleeDecl) - : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} - - const FunctionProtoType *getCalleeFunctionProtoType() const { - return CalleeProtoTy; - } - const Decl *getCalleeDecl() const { return CalleeDecl; } +/// Abstract information about a function or function prototype. +class CGCalleeInfo { + /// \brief The function prototype of the callee. + const FunctionProtoType *CalleeProtoTy; + /// \brief The function declaration of the callee. + const Decl *CalleeDecl; + +public: + explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) + : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy) + : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} + CGCalleeInfo(const Decl *calleeDecl) + : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} + + const FunctionProtoType *getCalleeFunctionProtoType() const { + return CalleeProtoTy; + } + const Decl *getCalleeDecl() const { return CalleeDecl; } }; /// All available information about a concrete callee. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp index 05d0567..50d702c 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp @@ -129,14 +129,14 @@ Address CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base, llvm::Value *memberPtr, const MemberPointerType *memberPtrType, - AlignmentSource *alignSource) { + LValueBaseInfo *BaseInfo) { // Ask the ABI to compute the actual address. llvm::Value *ptr = CGM.getCXXABI().EmitMemberDataPointerAddress(*this, E, base, memberPtr, memberPtrType); QualType memberType = memberPtrType->getPointeeType(); - CharUnits memberAlign = getNaturalTypeAlignment(memberType, alignSource); + CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo); memberAlign = CGM.getDynamicOffsetAlignment(base.getAlignment(), memberPtrType->getClass()->getAsCXXRecordDecl(), @@ -309,8 +309,10 @@ Address CodeGenFunction::GetAddressOfBaseClass( // just do a bitcast; null checks are unnecessary. if (NonVirtualOffset.isZero() && !VBase) { if (sanitizePerformTypeCheck()) { + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::Null, !NullCheckValue); EmitTypeCheck(TCK_Upcast, Loc, Value.getPointer(), - DerivedTy, DerivedAlign, !NullCheckValue); + DerivedTy, DerivedAlign, SkippedChecks); } return Builder.CreateBitCast(Value, BasePtrTy); } @@ -331,8 +333,10 @@ Address CodeGenFunction::GetAddressOfBaseClass( } if (sanitizePerformTypeCheck()) { + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::Null, true); EmitTypeCheck(VBase ? TCK_UpcastToVirtualBase : TCK_Upcast, Loc, - Value.getPointer(), DerivedTy, DerivedAlign, true); + Value.getPointer(), DerivedTy, DerivedAlign, SkippedChecks); } // Compute the virtual offset. @@ -685,7 +689,8 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, /// complete-to-base constructor delegation optimization, i.e. /// emitting the complete constructor as a simple call to the base /// constructor. -static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor) { +bool CodeGenFunction::IsConstructorDelegationValid( + const CXXConstructorDecl *Ctor) { // Currently we disable the optimization for classes with virtual // bases because (1) the addresses of parameter variables need to be @@ -1131,10 +1136,11 @@ namespace { RHS = EC->getSubExpr(); if (!RHS) return nullptr; - MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS); - if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field) - return nullptr; - return Field; + if (MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS)) { + if (ME2->getMemberDecl() == Field) + return Field; + } + return nullptr; } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) { CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl()); if (!(MD && isMemcpyEquivalentSpecialMember(MD))) @@ -1384,6 +1390,20 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CurGD.getDecl()); CXXDtorType DtorType = CurGD.getDtorType(); + // For an abstract class, non-base destructors are never used (and can't + // be emitted in general, because vbase dtors may not have been validated + // by Sema), but the Itanium ABI doesn't make them optional and Clang may + // in fact emit references to them from other compilations, so emit them + // as functions containing a trap instruction. + if (DtorType != Dtor_Base && Dtor->getParent()->isAbstract()) { + llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); + return; + } + Stmt *Body = Dtor->getBody(); if (Body) incrementProfileCounter(Body); @@ -1416,9 +1436,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // we'd introduce *two* handler blocks. In the Microsoft ABI, we // always delegate because we might not have a definition in this TU. switch (DtorType) { - case Dtor_Comdat: - llvm_unreachable("not expecting a COMDAT"); - + case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); case Dtor_Deleting: llvm_unreachable("already handled deleting case"); case Dtor_Complete: @@ -1433,7 +1451,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { /*Delegating=*/false, LoadCXXThisAddress()); break; } + // Fallthrough: act like we're in the base variant. + LLVM_FALLTHROUGH; case Dtor_Base: assert(Body); @@ -1950,7 +1970,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, // Add the rest of the user-supplied arguments. const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); - EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor()); + EvaluationOrder Order = E->isListInitialization() + ? EvaluationOrder::ForceLeftToRight + : EvaluationOrder::Default; + EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(), + /*ParamsToSkip*/ 0, Order); EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args); } @@ -1970,7 +1994,7 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, // Likewise if they're inalloca. const CGFunctionInfo &Info = - CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0); + CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0, 0); if (Info.usesInAlloca()) return false; } @@ -2012,10 +2036,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, return; } + bool PassPrototypeArgs = true; // Check whether we can actually emit the constructor before trying to do so. if (auto Inherited = D->getInheritedConstructor()) { - if (getTypes().inheritingCtorHasParams(Inherited, Type) && - !canEmitDelegateCallArgs(*this, D, Type, Args)) { + PassPrototypeArgs = getTypes().inheritingCtorHasParams(Inherited, Type); + if (PassPrototypeArgs && !canEmitDelegateCallArgs(*this, D, Type, Args)) { EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase, Delegating, Args); return; @@ -2023,14 +2048,15 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, } // Insert any ABI-specific implicit constructor arguments. - unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs( - *this, D, Type, ForVirtualBase, Delegating, Args); + CGCXXABI::AddedStructorArgs ExtraArgs = + CGM.getCXXABI().addImplicitConstructorArgs(*this, D, Type, ForVirtualBase, + Delegating, Args); // Emit the call. llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(D, getFromCtorType(Type)); - const CGFunctionInfo &Info = - CGM.getTypes().arrangeCXXConstructorCall(Args, D, Type, ExtraArgs); + const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( + Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); CGCallee Callee = CGCallee::forDirect(CalleePtr, D); EmitCall(Info, Callee, ReturnValueSlot(), Args); @@ -2102,7 +2128,9 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall( void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase, bool Delegating, CallArgList &Args) { - InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType)); + GlobalDecl GD(Ctor, CtorType); + InlinedInheritingConstructorScope Scope(*this, GD); + ApplyInlineDebugLocation DebugScope(*this, GD); // Save the arguments to be passed to the inherited constructor. CXXInheritedCtorInitExprArgs = Args; @@ -2688,79 +2716,6 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( cast<llvm::PointerType>(VTable->getType())->getElementType()); } -bool -CodeGenFunction::CanDevirtualizeMemberFunctionCall(const Expr *Base, - const CXXMethodDecl *MD) { - // When building with -fapple-kext, all calls must go through the vtable since - // the kernel linker can do runtime patching of vtables. - if (getLangOpts().AppleKext) - return false; - - // If the member function is marked 'final', we know that it can't be - // overridden and can therefore devirtualize it unless it's pure virtual. - if (MD->hasAttr<FinalAttr>()) - return !MD->isPure(); - - // If the base expression (after skipping derived-to-base conversions) is a - // class prvalue, then we can devirtualize. - Base = Base->getBestDynamicClassTypeExpr(); - if (Base->isRValue() && Base->getType()->isRecordType()) - return true; - - // If we don't even know what we would call, we can't devirtualize. - const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType(); - if (!BestDynamicDecl) - return false; - - // There may be a method corresponding to MD in a derived class. - const CXXMethodDecl *DevirtualizedMethod = - MD->getCorrespondingMethodInClass(BestDynamicDecl); - - // If that method is pure virtual, we can't devirtualize. If this code is - // reached, the result would be UB, not a direct call to the derived class - // function, and we can't assume the derived class function is defined. - if (DevirtualizedMethod->isPure()) - return false; - - // If that method is marked final, we can devirtualize it. - if (DevirtualizedMethod->hasAttr<FinalAttr>()) - return true; - - // Similarly, if the class itself is marked 'final' it can't be overridden - // and we can therefore devirtualize the member function call. - if (BestDynamicDecl->hasAttr<FinalAttr>()) - return true; - - if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base)) { - if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) { - // This is a record decl. We know the type and can devirtualize it. - return VD->getType()->isRecordType(); - } - - return false; - } - - // We can devirtualize calls on an object accessed by a class member access - // expression, since by C++11 [basic.life]p6 we know that it can't refer to - // a derived class object constructed in the same location. - if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base)) - if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl())) - return VD->getType()->isRecordType(); - - // Likewise for calls on an object accessed by a (non-reference) pointer to - // member access. - if (auto *BO = dyn_cast<BinaryOperator>(Base)) { - if (BO->isPtrMemOp()) { - auto *MPT = BO->getRHS()->getType()->castAs<MemberPointerType>(); - if (MPT->getPointeeType()->isRecordType()) - return true; - } - } - - // We can't devirtualize the call. - return false; -} - void CodeGenFunction::EmitForwardingCallToLambda( const CXXMethodDecl *callOperator, CallArgList &callArgs) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp index 3666858..b5453bc 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp @@ -51,8 +51,7 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) { if (rv.isComplex()) { CodeGenFunction::ComplexPairTy V = rv.getComplexVal(); llvm::Type *ComplexTy = - llvm::StructType::get(V.first->getType(), V.second->getType(), - (void*) nullptr); + llvm::StructType::get(V.first->getType(), V.second->getType()); Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex"); CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0, CharUnits())); @@ -418,11 +417,15 @@ void CodeGenFunction::ResolveBranchFixups(llvm::BasicBlock *Block) { } /// Pops cleanup blocks until the given savepoint is reached. -void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) { +void CodeGenFunction::PopCleanupBlocks( + EHScopeStack::stable_iterator Old, + std::initializer_list<llvm::Value **> ValuesToReload) { assert(Old.isValid()); + bool HadBranches = false; while (EHStack.stable_begin() != Old) { EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.begin()); + HadBranches |= Scope.hasBranches(); // As long as Old strictly encloses the scope's enclosing normal // cleanup, we're going to emit another normal cleanup which @@ -432,14 +435,48 @@ void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) { PopCleanupBlock(FallThroughIsBranchThrough); } + + // If we didn't have any branches, the insertion point before cleanups must + // dominate the current insertion point and we don't need to reload any + // values. + if (!HadBranches) + return; + + // Spill and reload all values that the caller wants to be live at the current + // insertion point. + for (llvm::Value **ReloadedValue : ValuesToReload) { + auto *Inst = dyn_cast_or_null<llvm::Instruction>(*ReloadedValue); + if (!Inst) + continue; + + // Don't spill static allocas, they dominate all cleanups. These are created + // by binding a reference to a local variable or temporary. + auto *AI = dyn_cast<llvm::AllocaInst>(Inst); + if (AI && AI->isStaticAlloca()) + continue; + + Address Tmp = + CreateDefaultAlignTempAlloca(Inst->getType(), "tmp.exprcleanup"); + + // Find an insertion point after Inst and spill it to the temporary. + llvm::BasicBlock::iterator InsertBefore; + if (auto *Invoke = dyn_cast<llvm::InvokeInst>(Inst)) + InsertBefore = Invoke->getNormalDest()->getFirstInsertionPt(); + else + InsertBefore = std::next(Inst->getIterator()); + CGBuilderTy(CGM, &*InsertBefore).CreateStore(Inst, Tmp); + + // Reload the value at the current insertion point. + *ReloadedValue = Builder.CreateLoad(Tmp); + } } /// Pops cleanup blocks until the given savepoint is reached, then add the /// cleanups from the given savepoint in the lifetime-extended cleanups stack. -void -CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old, - size_t OldLifetimeExtendedSize) { - PopCleanupBlocks(Old); +void CodeGenFunction::PopCleanupBlocks( + EHScopeStack::stable_iterator Old, size_t OldLifetimeExtendedSize, + std::initializer_list<llvm::Value **> ValuesToReload) { + PopCleanupBlocks(Old, ValuesToReload); // Move our deferred cleanups onto the EH stack. for (size_t I = OldLifetimeExtendedSize, @@ -578,7 +615,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF, llvm::SwitchInst *si = cast<llvm::SwitchInst>(use.getUser()); if (si->getNumCases() == 1 && si->getDefaultDest() == unreachableBB) { // Replace the switch with a branch. - llvm::BranchInst::Create(si->case_begin().getCaseSuccessor(), si); + llvm::BranchInst::Create(si->case_begin()->getCaseSuccessor(), si); // The switch operand is a load from the cleanup-dest alloca. llvm::LoadInst *condition = cast<llvm::LoadInst>(si->getCondition()); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp index 2fdb127..a65faa6 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp @@ -11,29 +11,70 @@ // //===----------------------------------------------------------------------===// +#include "CGCleanup.h" #include "CodeGenFunction.h" +#include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" using namespace clang; using namespace CodeGen; -namespace clang { -namespace CodeGen { +using llvm::Value; +using llvm::BasicBlock; + +namespace { +enum class AwaitKind { Init, Normal, Yield, Final }; +static constexpr llvm::StringLiteral AwaitKindStr[] = {"init", "await", "yield", + "final"}; +} + +struct clang::CodeGen::CGCoroData { + // What is the current await expression kind and how many + // await/yield expressions were encountered so far. + // These are used to generate pretty labels for await expressions in LLVM IR. + AwaitKind CurrentAwaitKind = AwaitKind::Init; + unsigned AwaitNum = 0; + unsigned YieldNum = 0; + + // How many co_return statements are in the coroutine. Used to decide whether + // we need to add co_return; equivalent at the end of the user authored body. + unsigned CoreturnCount = 0; + + // A branch to this block is emitted when coroutine needs to suspend. + llvm::BasicBlock *SuspendBB = nullptr; + + // Stores the jump destination just before the coroutine memory is freed. + // This is the destination that every suspend point jumps to for the cleanup + // branch. + CodeGenFunction::JumpDest CleanupJD; + + // Stores the jump destination just before the final suspend. The co_return + // statements jumps to this point after calling return_xxx promise member. + CodeGenFunction::JumpDest FinalJD; -struct CGCoroData { // Stores the llvm.coro.id emitted in the function so that we can supply it // as the first argument to coro.begin, coro.alloc and coro.free intrinsics. // Note: llvm.coro.id returns a token that cannot be directly expressed in a // builtin. llvm::CallInst *CoroId = nullptr; + + // Stores the llvm.coro.begin emitted in the function so that we can replace + // all coro.frame intrinsics with direct SSA value of coro.begin that returns + // the address of the coroutine frame of the current coroutine. + llvm::CallInst *CoroBegin = nullptr; + + // Stores the last emitted coro.free for the deallocate expressions, we use it + // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem). + llvm::CallInst *LastCoroFree = nullptr; + // If coro.id came from the builtin, remember the expression to give better // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by // EmitCoroutineBody. CallExpr const *CoroIdExpr = nullptr; }; -} -} +// Defining these here allows to keep CGCoroData private to this file. clang::CodeGen::CodeGenFunction::CGCoroInfo::CGCoroInfo() {} CodeGenFunction::CGCoroInfo::~CGCoroInfo() {} @@ -59,19 +100,528 @@ static void createCoroData(CodeGenFunction &CGF, CurCoro.Data->CoroIdExpr = CoroIdExpr; } +// Synthesize a pretty name for a suspend point. +static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) { + unsigned No = 0; + switch (Kind) { + case AwaitKind::Init: + case AwaitKind::Final: + break; + case AwaitKind::Normal: + No = ++Coro.AwaitNum; + break; + case AwaitKind::Yield: + No = ++Coro.YieldNum; + break; + } + SmallString<32> Prefix(AwaitKindStr[static_cast<unsigned>(Kind)]); + if (No > 1) { + Twine(No).toVector(Prefix); + } + return Prefix; +} + +// Emit suspend expression which roughly looks like: +// +// auto && x = CommonExpr(); +// if (!x.await_ready()) { +// llvm_coro_save(); +// x.await_suspend(...); (*) +// llvm_coro_suspend(); (**) +// } +// x.await_resume(); +// +// where the result of the entire expression is the result of x.await_resume() +// +// (*) If x.await_suspend return type is bool, it allows to veto a suspend: +// if (x.await_suspend(...)) +// llvm_coro_suspend(); +// +// (**) llvm_coro_suspend() encodes three possible continuations as +// a switch instruction: +// +// %where-to = call i8 @llvm.coro.suspend(...) +// switch i8 %where-to, label %coro.ret [ ; jump to epilogue to suspend +// i8 0, label %yield.ready ; go here when resumed +// i8 1, label %yield.cleanup ; go here when destroyed +// ] +// +// See llvm's docs/Coroutines.rst for more details. +// +namespace { + struct LValueOrRValue { + LValue LV; + RValue RV; + }; +} +static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro, + CoroutineSuspendExpr const &S, + AwaitKind Kind, AggValueSlot aggSlot, + bool ignoreResult, bool forLValue) { + auto *E = S.getCommonExpr(); + + auto Binder = + CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E); + auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); }); + + auto Prefix = buildSuspendPrefixStr(Coro, Kind); + BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready")); + BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend")); + BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup")); + + // If expression is ready, no need to suspend. + CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0); + + // Otherwise, emit suspend logic. + CGF.EmitBlock(SuspendBlock); + + auto &Builder = CGF.Builder; + llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save); + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy); + auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); + + auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); + if (SuspendRet != nullptr) { + // Veto suspension if requested by bool returning await_suspend. + assert(SuspendRet->getType()->isIntegerTy(1) && + "Sema should have already checked that it is void or bool"); + BasicBlock *RealSuspendBlock = + CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); + CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); + SuspendBlock = RealSuspendBlock; + CGF.EmitBlock(RealSuspendBlock); + } + + // Emit the suspend point. + const bool IsFinalSuspend = (Kind == AwaitKind::Final); + llvm::Function *CoroSuspend = + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend); + auto *SuspendResult = Builder.CreateCall( + CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)}); + + // Create a switch capturing three possible continuations. + auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2); + Switch->addCase(Builder.getInt8(0), ReadyBlock); + Switch->addCase(Builder.getInt8(1), CleanupBlock); + + // Emit cleanup for this suspend point. + CGF.EmitBlock(CleanupBlock); + CGF.EmitBranchThroughCleanup(Coro.CleanupJD); + + // Emit await_resume expression. + CGF.EmitBlock(ReadyBlock); + LValueOrRValue Res; + if (forLValue) + Res.LV = CGF.EmitLValue(S.getResumeExpr()); + else + Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult); + return Res; +} + +RValue CodeGenFunction::EmitCoawaitExpr(const CoawaitExpr &E, + AggValueSlot aggSlot, + bool ignoreResult) { + return emitSuspendExpression(*this, *CurCoro.Data, E, + CurCoro.Data->CurrentAwaitKind, aggSlot, + ignoreResult, /*forLValue*/false).RV; +} +RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E, + AggValueSlot aggSlot, + bool ignoreResult) { + return emitSuspendExpression(*this, *CurCoro.Data, E, AwaitKind::Yield, + aggSlot, ignoreResult, /*forLValue*/false).RV; +} + +void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) { + ++CurCoro.Data->CoreturnCount; + EmitStmt(S.getPromiseCall()); + EmitBranchThroughCleanup(CurCoro.Data->FinalJD); +} + + +#ifndef NDEBUG +static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx, + const CoroutineSuspendExpr *E) { + const auto *RE = E->getResumeExpr(); + // Is it possible for RE to be a CXXBindTemporaryExpr wrapping + // a MemberCallExpr? + assert(isa<CallExpr>(RE) && "unexpected suspend expression type"); + return cast<CallExpr>(RE)->getCallReturnType(Ctx); +} +#endif + +LValue +CodeGenFunction::EmitCoawaitLValue(const CoawaitExpr *E) { + assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() && + "Can't have a scalar return unless the return type is a " + "reference type!"); + return emitSuspendExpression(*this, *CurCoro.Data, *E, + CurCoro.Data->CurrentAwaitKind, AggValueSlot::ignored(), + /*ignoreResult*/false, /*forLValue*/true).LV; +} + +LValue +CodeGenFunction::EmitCoyieldLValue(const CoyieldExpr *E) { + assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() && + "Can't have a scalar return unless the return type is a " + "reference type!"); + return emitSuspendExpression(*this, *CurCoro.Data, *E, + AwaitKind::Yield, AggValueSlot::ignored(), + /*ignoreResult*/false, /*forLValue*/true).LV; +} + +// Hunts for the parameter reference in the parameter copy/move declaration. +namespace { +struct GetParamRef : public StmtVisitor<GetParamRef> { +public: + DeclRefExpr *Expr = nullptr; + GetParamRef() {} + void VisitDeclRefExpr(DeclRefExpr *E) { + assert(Expr == nullptr && "multilple declref in param move"); + Expr = E; + } + void VisitStmt(Stmt *S) { + for (auto *C : S->children()) { + if (C) + Visit(C); + } + } +}; +} + +// This class replaces references to parameters to their copies by changing +// the addresses in CGF.LocalDeclMap and restoring back the original values in +// its destructor. + +namespace { + struct ParamReferenceReplacerRAII { + CodeGenFunction::DeclMapTy SavedLocals; + CodeGenFunction::DeclMapTy& LocalDeclMap; + + ParamReferenceReplacerRAII(CodeGenFunction::DeclMapTy &LocalDeclMap) + : LocalDeclMap(LocalDeclMap) {} + + void addCopy(DeclStmt const *PM) { + // Figure out what param it refers to. + + assert(PM->isSingleDecl()); + VarDecl const*VD = static_cast<VarDecl const*>(PM->getSingleDecl()); + Expr const *InitExpr = VD->getInit(); + GetParamRef Visitor; + Visitor.Visit(const_cast<Expr*>(InitExpr)); + assert(Visitor.Expr); + auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr); + auto *PD = DREOrig->getDecl(); + + auto it = LocalDeclMap.find(PD); + assert(it != LocalDeclMap.end() && "parameter is not found"); + SavedLocals.insert({ PD, it->second }); + + auto copyIt = LocalDeclMap.find(VD); + assert(copyIt != LocalDeclMap.end() && "parameter copy is not found"); + it->second = copyIt->getSecond(); + } + + ~ParamReferenceReplacerRAII() { + for (auto&& SavedLocal : SavedLocals) { + LocalDeclMap.insert({SavedLocal.first, SavedLocal.second}); + } + } + }; +} + +// For WinEH exception representation backend needs to know what funclet coro.end +// belongs to. That information is passed in a funclet bundle. +static SmallVector<llvm::OperandBundleDef, 1> +getBundlesForCoroEnd(CodeGenFunction &CGF) { + SmallVector<llvm::OperandBundleDef, 1> BundleList; + + if (llvm::Instruction *EHPad = CGF.CurrentFuncletPad) + BundleList.emplace_back("funclet", EHPad); + + return BundleList; +} + +namespace { +// We will insert coro.end to cut any of the destructors for objects that +// do not need to be destroyed once the coroutine is resumed. +// See llvm/docs/Coroutines.rst for more details about coro.end. +struct CallCoroEnd final : public EHScopeStack::Cleanup { + void Emit(CodeGenFunction &CGF, Flags flags) override { + auto &CGM = CGF.CGM; + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); + llvm::Function *CoroEndFn = CGM.getIntrinsic(llvm::Intrinsic::coro_end); + // See if we have a funclet bundle to associate coro.end with. (WinEH) + auto Bundles = getBundlesForCoroEnd(CGF); + auto *CoroEnd = CGF.Builder.CreateCall( + CoroEndFn, {NullPtr, CGF.Builder.getTrue()}, Bundles); + if (Bundles.empty()) { + // Otherwise, (landingpad model), create a conditional branch that leads + // either to a cleanup block or a block with EH resume instruction. + auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true); + auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont"); + CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB); + CGF.EmitBlock(CleanupContBB); + } + } +}; +} + +namespace { +// Make sure to call coro.delete on scope exit. +struct CallCoroDelete final : public EHScopeStack::Cleanup { + Stmt *Deallocate; + + // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;" + + // Note: That deallocation will be emitted twice: once for a normal exit and + // once for exceptional exit. This usage is safe because Deallocate does not + // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr() + // builds a single call to a deallocation function which is safe to emit + // multiple times. + void Emit(CodeGenFunction &CGF, Flags) override { + // Remember the current point, as we are going to emit deallocation code + // first to get to coro.free instruction that is an argument to a delete + // call. + BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock(); + + auto *FreeBB = CGF.createBasicBlock("coro.free"); + CGF.EmitBlock(FreeBB); + CGF.EmitStmt(Deallocate); + + auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); + CGF.EmitBlock(AfterFreeBB); + + // We should have captured coro.free from the emission of deallocate. + auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; + if (!CoroFree) { + CGF.CGM.Error(Deallocate->getLocStart(), + "Deallocation expressoin does not refer to coro.free"); + return; + } + + // Get back to the block we were originally and move coro.free there. + auto *InsertPt = SaveInsertBlock->getTerminator(); + CoroFree->moveBefore(InsertPt); + CGF.Builder.SetInsertPoint(InsertPt); + + // Add if (auto *mem = coro.free) Deallocate; + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); + auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr); + CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB); + + // No longer need old terminator. + InsertPt->eraseFromParent(); + CGF.Builder.SetInsertPoint(AfterFreeBB); + } + explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {} +}; +} + +namespace { +struct GetReturnObjectManager { + CodeGenFunction &CGF; + CGBuilderTy &Builder; + const CoroutineBodyStmt &S; + + Address GroActiveFlag; + CodeGenFunction::AutoVarEmission GroEmission; + + GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S) + : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()), + GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {} + + // The gro variable has to outlive coroutine frame and coroutine promise, but, + // it can only be initialized after coroutine promise was created, thus, we + // split its emission in two parts. EmitGroAlloca emits an alloca and sets up + // cleanups. Later when coroutine promise is available we initialize the gro + // and sets the flag that the cleanup is now active. + + void EmitGroAlloca() { + auto *GroDeclStmt = dyn_cast<DeclStmt>(S.getResultDecl()); + if (!GroDeclStmt) { + // If get_return_object returns void, no need to do an alloca. + return; + } + + auto *GroVarDecl = cast<VarDecl>(GroDeclStmt->getSingleDecl()); + + // Set GRO flag that it is not initialized yet + GroActiveFlag = + CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), "gro.active"); + Builder.CreateStore(Builder.getFalse(), GroActiveFlag); + + GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl); + + // Remember the top of EHStack before emitting the cleanup. + auto old_top = CGF.EHStack.stable_begin(); + CGF.EmitAutoVarCleanups(GroEmission); + auto top = CGF.EHStack.stable_begin(); + + // Make the cleanup conditional on gro.active + for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top); + b != e; b++) { + if (auto *Cleanup = dyn_cast<EHCleanupScope>(&*b)) { + assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?"); + Cleanup->setActiveFlag(GroActiveFlag); + Cleanup->setTestFlagInEHCleanup(); + Cleanup->setTestFlagInNormalCleanup(); + } + } + } + + void EmitGroInit() { + if (!GroActiveFlag.isValid()) { + // No Gro variable was allocated. Simply emit the call to + // get_return_object. + CGF.EmitStmt(S.getResultDecl()); + return; + } + + CGF.EmitAutoVarInit(GroEmission); + Builder.CreateStore(Builder.getTrue(), GroActiveFlag); + } +}; +} + +static void emitBodyAndFallthrough(CodeGenFunction &CGF, + const CoroutineBodyStmt &S, Stmt *Body) { + CGF.EmitStmt(Body); + const bool CanFallthrough = CGF.Builder.GetInsertBlock(); + if (CanFallthrough) + if (Stmt *OnFallthrough = S.getFallthroughHandler()) + CGF.EmitStmt(OnFallthrough); +} + void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); auto &TI = CGM.getContext().getTargetInfo(); unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); + auto *EntryBB = Builder.GetInsertBlock(); + auto *AllocBB = createBasicBlock("coro.alloc"); + auto *InitBB = createBasicBlock("coro.init"); + auto *FinalBB = createBasicBlock("coro.final"); + auto *RetBB = createBasicBlock("coro.ret"); + auto *CoroId = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_id), {Builder.getInt32(NewAlign), NullPtr, NullPtr, NullPtr}); createCoroData(*this, CurCoro, CoroId); + CurCoro.Data->SuspendBB = RetBB; + + // Backend is allowed to elide memory allocations, to help it, emit + // auto mem = coro.alloc() ? 0 : ... allocation code ...; + auto *CoroAlloc = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId}); + + Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB); + + EmitBlock(AllocBB); + auto *AllocateCall = EmitScalarExpr(S.getAllocate()); + auto *AllocOrInvokeContBB = Builder.GetInsertBlock(); + + // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. + if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { + auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); + + // See if allocation was successful. + auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); + auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); + Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); + + // If not, return OnAllocFailure object. + EmitBlock(RetOnFailureBB); + EmitStmt(RetOnAllocFailure); + } + else { + Builder.CreateBr(InitBB); + } + + EmitBlock(InitBB); + + // Pass the result of the allocation to coro.begin. + auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); + Phi->addIncoming(NullPtr, EntryBB); + Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + auto *CoroBegin = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); + CurCoro.Data->CoroBegin = CoroBegin; + + GetReturnObjectManager GroManager(*this, S); + GroManager.EmitGroAlloca(); + + CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB); + { + ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap); + CodeGenFunction::RunCleanupsScope ResumeScope(*this); + EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate()); + + // Create parameter copies. We do it before creating a promise, since an + // evolution of coroutine TS may allow promise constructor to observe + // parameter copies. + for (auto *PM : S.getParamMoves()) { + EmitStmt(PM); + ParamReplacer.addCopy(cast<DeclStmt>(PM)); + // TODO: if(CoroParam(...)) need to surround ctor and dtor + // for the copy, so that llvm can elide it if the copy is + // not needed. + } - EmitScalarExpr(S.getAllocate()); - // FIXME: Emit the rest of the coroutine. - EmitStmt(S.getDeallocate()); + EmitStmt(S.getPromiseDeclStmt()); + + Address PromiseAddr = GetAddrOfLocalVar(S.getPromiseDecl()); + auto *PromiseAddrVoidPtr = + new llvm::BitCastInst(PromiseAddr.getPointer(), VoidPtrTy, "", CoroId); + // Update CoroId to refer to the promise. We could not do it earlier because + // promise local variable was not emitted yet. + CoroId->setArgOperand(1, PromiseAddrVoidPtr); + + // Now we have the promise, initialize the GRO + GroManager.EmitGroInit(); + + EHStack.pushCleanup<CallCoroEnd>(EHCleanup); + + CurCoro.Data->CurrentAwaitKind = AwaitKind::Init; + EmitStmt(S.getInitSuspendStmt()); + CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB); + + CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal; + + if (auto *OnException = S.getExceptionHandler()) { + auto Loc = S.getLocStart(); + CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException); + auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch); + + EnterCXXTryStmt(*TryStmt); + emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock()); + ExitCXXTryStmt(*TryStmt); + } + else { + emitBodyAndFallthrough(*this, S, S.getBody()); + } + + // See if we need to generate final suspend. + const bool CanFallthrough = Builder.GetInsertBlock(); + const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0; + if (CanFallthrough || HasCoreturns) { + EmitBlock(FinalBB); + CurCoro.Data->CurrentAwaitKind = AwaitKind::Final; + EmitStmt(S.getFinalSuspendStmt()); + } else { + // We don't need FinalBB. Emit it to make sure the block is deleted. + EmitBlock(FinalBB, /*IsFinished=*/true); + } + } + + EmitBlock(RetBB); + // Emit coro.end before getReturnStmt (and parameter destructors), since + // resume and destroy parts of the coroutine should not include them. + llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); + Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); + + if (Stmt *Ret = S.getReturnStmt()) + EmitStmt(Ret); } // Emit coroutine intrinsic and patch up arguments of the token type. @@ -81,6 +631,17 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, switch (IID) { default: break; + // The coro.frame builtin is replaced with an SSA value of the coro.begin + // intrinsic. + case llvm::Intrinsic::coro_frame: { + if (CurCoro.Data && CurCoro.Data->CoroBegin) { + return RValue::get(CurCoro.Data->CoroBegin); + } + CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin " + "has been used earlier in this function"); + auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + return RValue::get(NullPtr); + } // The following three intrinsics take a token parameter referring to a token // returned by earlier call to @llvm.coro.id. Since we cannot represent it in // builtins, we patch it up here. @@ -94,6 +655,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_id has" " been used earlier in this function"); // Fallthrough to the next case to add TokenNone as the first argument. + LLVM_FALLTHROUGH; } // @llvm.coro.suspend takes a token parameter. Add token 'none' as the first // argument. @@ -107,10 +669,22 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, llvm::Value *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); + // Note: The following code is to enable to emit coro.id and coro.begin by + // hand to experiment with coroutines in C. // If we see @llvm.coro.id remember it in the CoroData. We will update // coro.alloc, coro.begin and coro.free intrinsics to refer to it. if (IID == llvm::Intrinsic::coro_id) { createCoroData(*this, CurCoro, Call, E); } + else if (IID == llvm::Intrinsic::coro_begin) { + if (CurCoro.Data) + CurCoro.Data->CoroBegin = Call; + } + else if (IID == llvm::Intrinsic::coro_free) { + // Remember the last coro_free as we need it to build the conditional + // deletion of the coroutine frame. + if (CurCoro.Data) + CurCoro.Data->LastCoroFree = Call; + } return RValue::get(Call); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 12a6803..18b1d10 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -107,8 +107,8 @@ void ApplyDebugLocation::init(SourceLocation TemporaryLocation, // Construct a location that has a valid scope, but no line info. assert(!DI->LexicalBlockStack.empty()); - CGF->Builder.SetCurrentDebugLocation( - llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back())); + CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( + 0, 0, DI->LexicalBlockStack.back(), DI->getInlinedAt())); } ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E) @@ -134,6 +134,30 @@ ApplyDebugLocation::~ApplyDebugLocation() { CGF->Builder.SetCurrentDebugLocation(std::move(OriginalLocation)); } +ApplyInlineDebugLocation::ApplyInlineDebugLocation(CodeGenFunction &CGF, + GlobalDecl InlinedFn) + : CGF(&CGF) { + if (!CGF.getDebugInfo()) { + this->CGF = nullptr; + return; + } + auto &DI = *CGF.getDebugInfo(); + SavedLocation = DI.getLocation(); + assert((DI.getInlinedAt() == + CGF.Builder.getCurrentDebugLocation()->getInlinedAt()) && + "CGDebugInfo and IRBuilder are out of sync"); + + DI.EmitInlineFunctionStart(CGF.Builder, InlinedFn); +} + +ApplyInlineDebugLocation::~ApplyInlineDebugLocation() { + if (!CGF) + return; + auto &DI = *CGF->getDebugInfo(); + DI.EmitInlineFunctionEnd(CGF->Builder); + DI.EmitLocation(CGF->Builder, SavedLocation); +} + void CGDebugInfo::setLocation(SourceLocation Loc) { // If the new location isn't valid return. if (Loc.isInvalid()) @@ -185,7 +209,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, // Check namespace. if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context)) - return getOrCreateNameSpace(NSDecl); + return getOrCreateNamespace(NSDecl); if (const auto *RDecl = dyn_cast<RecordDecl>(Context)) if (!RDecl->isDependentType()) @@ -249,8 +273,8 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) { << OC->getIdentifier()->getNameStart() << ')'; } } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { - OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '(' - << OCD->getIdentifier()->getNameStart() << ')'; + OS << OCD->getClassInterface()->getName() << '(' + << OCD->getName() << ')'; } else if (isa<ObjCProtocolDecl>(DC)) { // We can extract the type of the class from the self pointer. if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) { @@ -504,12 +528,15 @@ void CGDebugInfo::CreateCompileUnit() { // Create new compile unit. // FIXME - Eliminate TheCU. TheCU = DBuilder.createCompileUnit( - LangTag, DBuilder.createFile(remapDIPath(MainFileName), - remapDIPath(getCurrentDirname()), CSKind, - Checksum), + LangTag, + DBuilder.createFile(remapDIPath(MainFileName), + remapDIPath(getCurrentDirname()), CSKind, Checksum), Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, - CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */, - CGM.getCodeGenOpts().SplitDwarfInlining); + CGM.getCodeGenOpts().EnableSplitDwarf + ? "" + : CGM.getCodeGenOpts().SplitDwarfFile, + EmissionKind, 0 /* DWOid */, CGM.getCodeGenOpts().SplitDwarfInlining, + CGM.getCodeGenOpts().DebugInfoForProfiling); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -581,8 +608,6 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return getOrCreateStructPtrType("opencl_clk_event_t", OCLClkEventDITy); case BuiltinType::OCLQueue: return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy); - case BuiltinType::OCLNDRange: - return getOrCreateStructPtrType("opencl_ndrange_t", OCLNDRangeDITy); case BuiltinType::OCLReserveID: return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy); @@ -793,17 +818,19 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, // Bit size, align and offset of the type. // Size is always the size of a pointer. We can't use getTypeSize here // because that does not return the correct value for references. - unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy); - uint64_t Size = CGM.getTarget().getPointerWidth(AS); + unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy); + uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace); auto Align = getTypeAlignIfRequired(Ty, CGM.getContext()); + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(AddressSpace); if (Tag == llvm::dwarf::DW_TAG_reference_type || Tag == llvm::dwarf::DW_TAG_rvalue_reference_type) return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit), - Size, Align); + Size, Align, DWARFAddressSpace); else return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size, - Align); + Align, DWARFAddressSpace); } llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name, @@ -929,7 +956,7 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_BORLAND_pascal; // FIXME: Create new DW_CC_ codes for these calling conventions. - case CC_X86_64Win64: + case CC_Win64: case CC_X86_64SysV: case CC_AAPCS: case CC_AAPCS_VFP: @@ -1014,7 +1041,13 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, assert(SizeInBits > 0 && "found named 0-width bitfield"); uint64_t StorageOffsetInBits = CGM.getContext().toBits(BitFieldInfo.StorageOffset); - uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset; + uint64_t Offset = BitFieldInfo.Offset; + // The bit offsets for big endian machines are reversed for big + // endian target, compensate for that as the DIDerivedType requires + // un-reversed offsets. + if (CGM.getDataLayout().isBigEndian()) + Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset; + uint64_t OffsetInBits = StorageOffsetInBits + Offset; llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD); return DBuilder.createBitFieldMemberType( RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits, @@ -1608,8 +1641,13 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) { llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy); llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements); unsigned Size = Context.getTypeSize(Context.VoidPtrTy); + unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); + llvm::DIType *vtbl_ptr_type = - DBuilder.createPointerType(SubTy, Size, 0, "__vtbl_ptr_type"); + DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace, + "__vtbl_ptr_type"); VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size); return VTablePtrType; } @@ -1648,10 +1686,14 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, unsigned VSlotCount = VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData; unsigned VTableWidth = PtrWidth * VSlotCount; + unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); // Create a very wide void* type and insert it directly in the element list. llvm::DIType *VTableType = - DBuilder.createPointerType(nullptr, VTableWidth, 0, "__vtbl_ptr_type"); + DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace, + "__vtbl_ptr_type"); EltTys.push_back(VTableType); // The vptr is a pointer to this special vtable type. @@ -1714,7 +1756,27 @@ void CGDebugInfo::completeType(const RecordDecl *RD) { completeRequiredType(RD); } +/// Return true if the class or any of its methods are marked dllimport. +static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { + if (RD->hasAttr<DLLImportAttr>()) + return true; + for (const CXXMethodDecl *MD : RD->methods()) + if (MD->hasAttr<DLLImportAttr>()) + return true; + return false; +} + void CGDebugInfo::completeClassData(const RecordDecl *RD) { + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->isDynamicClass() && + CGM.getVTableLinkage(CXXRD) == + llvm::GlobalValue::AvailableExternallyLinkage && + !isClassOrMethodDLLImport(CXXRD)) + return; + completeClass(RD); +} + +void CGDebugInfo::completeClass(const RecordDecl *RD) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getRecordType(RD); @@ -1760,22 +1822,16 @@ static bool isDefinedInClangModule(const RecordDecl *RD) { return true; } -/// Return true if the class or any of its methods are marked dllimport. -static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { - if (RD->hasAttr<DLLImportAttr>()) - return true; - for (const CXXMethodDecl *MD : RD->methods()) - if (MD->hasAttr<DLLImportAttr>()) - return true; - return false; -} - static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) return true; + if (auto *ES = RD->getASTContext().getExternalSource()) + if (ES->hasExternalDefinitions(RD) == ExternalASTSource::EK_Always) + return true; + if (DebugKind > codegenoptions::LimitedDebugInfo) return false; @@ -2009,7 +2065,11 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, if (CreateSkeletonCU && IsRootModule) { // PCH files don't have a signature field in the control block, // but LLVM detects skeleton CUs by looking for a non-zero DWO id. - uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL; + // We use the lower 64 bits for debug info. + uint64_t Signature = + Mod.getSignature() + ? (uint64_t)Mod.getSignature()[1] << 32 | Mod.getSignature()[0] + : ~1ULL; llvm::DIBuilder DIB(CGM.getModule()); DIB.createCompileUnit(TheCU->getSourceLanguage(), DIB.createFile(Mod.getModuleName(), Mod.getPath()), @@ -2408,6 +2468,21 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) { FullName); } +llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent, + unsigned MType, SourceLocation LineLoc, + StringRef Name, StringRef Value) { + unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc); + return DBuilder.createMacro(Parent, Line, MType, Name, Value); +} + +llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, + SourceLocation LineLoc, + SourceLocation FileLoc) { + llvm::DIFile *FName = getOrCreateFile(FileLoc); + unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc); + return DBuilder.createTempMacroFile(Parent, Line, FName); +} + static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { Qualifiers Quals; do { @@ -2451,8 +2526,9 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { case Type::SubstTemplateTypeParm: T = cast<SubstTemplateTypeParmType>(T)->getReplacementType(); break; - case Type::Auto: { - QualType DT = cast<AutoType>(T)->getDeducedType(); + case Type::Auto: + case Type::DeducedTemplateSpecialization: { + QualType DT = cast<DeducedType>(T)->getDeducedType(); assert(!DT.isNull() && "Undeduced types shouldn't reach here."); T = DT; break; @@ -2488,11 +2564,17 @@ void CGDebugInfo::completeTemplateDefinition( const ClassTemplateSpecializationDecl &SD) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; + completeUnusedClass(SD); +} - completeClassData(&SD); +void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) { + if (DebugKind <= codegenoptions::DebugLineTablesOnly) + return; + + completeClassData(&D); // In case this type has no member function definitions being emitted, ensure // it is retained - RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr()); + RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr()); } llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { @@ -2537,7 +2619,7 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) { // best to make this behavior a command line or debugger tuning // option. FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager()); - if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) { + if (Module *M = D->getOwningModule()) { // This is a (sub-)module. auto Info = ExternalASTSource::ASTSourceDescriptor(*M); return getOrCreateModuleRef(Info, /*SkeletonCU=*/false); @@ -2618,6 +2700,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::Attributed: case Type::Adjusted: case Type::Decayed: + case Type::DeducedTemplateSpecialization: case Type::Elaborated: case Type::Paren: case Type::SubstTemplateTypeParm: @@ -2704,6 +2787,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { // them distinct if they are ODR-uniqued. if (FullName.empty()) break; + LLVM_FALLTHROUGH; case llvm::dwarf::DW_TAG_structure_type: case llvm::dwarf::DW_TAG_union_type: @@ -2774,16 +2858,17 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, } // No need to replicate the linkage name if it isn't different from the // subprogram name, no need to have it at all unless coverage is enabled or - // debug is set to more than just line tables. + // debug is set to more than just line tables or extra debug info is needed. if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs && !CGM.getCodeGenOpts().EmitGcovNotes && + !CGM.getCodeGenOpts().DebugInfoForProfiling && DebugKind <= codegenoptions::DebugLineTablesOnly)) LinkageName = StringRef(); if (DebugKind >= codegenoptions::LimitedDebugInfo) { if (const NamespaceDecl *NSDecl = dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) - FDContext = getOrCreateNameSpace(NSDecl); + FDContext = getOrCreateNamespace(NSDecl); else if (const RecordDecl *RDecl = dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) { llvm::DIScope *Mod = getParentModuleOrNull(RDecl); @@ -2844,28 +2929,40 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU); } -llvm::DISubprogram * -CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { +llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, + bool Stub) { llvm::DINodeArray TParamsArray; StringRef Name, LinkageName; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; - SourceLocation Loc = FD->getLocation(); + SourceLocation Loc = GD.getDecl()->getLocation(); llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; unsigned Line = getLineNumber(Loc); - - collectFunctionDeclProps(FD, Unit, Name, LinkageName, DContext, + collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray, Flags); + auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); + // Build function type. SmallVector<QualType, 16> ArgTypes; - for (const ParmVarDecl *Parm: FD->parameters()) - ArgTypes.push_back(Parm->getType()); + if (FD) + for (const ParmVarDecl *Parm : FD->parameters()) + ArgTypes.push_back(Parm->getType()); CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); QualType FnType = CGM.getContext().getFunctionType( FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); + if (Stub) { + return DBuilder.createFunction( + DContext, Name, LinkageName, Unit, Line, + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), + !FD->isExternallyVisible(), + /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get(), getFunctionDeclaration(FD)); + } + llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl( DContext, Name, LinkageName, Unit, Line, - getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(), + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), + !FD->isExternallyVisible(), /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(FD)); const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl()); @@ -2875,6 +2972,16 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { return SP; } +llvm::DISubprogram * +CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) { + return getFunctionFwdDeclOrStub(GD, /* Stub = */ false); +} + +llvm::DISubprogram * +CGDebugInfo::getFunctionStub(GlobalDecl GD) { + return getFunctionFwdDeclOrStub(GD, /* Stub = */ true); +} + llvm::DIGlobalVariable * CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { QualType T; @@ -3146,6 +3253,27 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, TParamsArray.get(), getFunctionDeclaration(D))); } +void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) { + const auto *FD = cast<FunctionDecl>(GD.getDecl()); + // If there is a subprogram for this function available then use it. + auto FI = SPCache.find(FD->getCanonicalDecl()); + llvm::DISubprogram *SP = nullptr; + if (FI != SPCache.end()) + SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second); + if (!SP || !SP->isDefinition()) + SP = getFunctionStub(GD); + FnBeginRegionCount.push_back(LexicalBlockStack.size()); + LexicalBlockStack.emplace_back(SP); + setInlinedAt(Builder.getCurrentDebugLocation()); + EmitLocation(Builder, FD->getLocation()); +} + +void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) { + assert(CurInlinedAt && "unbalanced inline scope stack"); + EmitFunctionEnd(Builder, nullptr); + setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt()); +} + void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { // Update our current location setLocation(Loc); @@ -3155,7 +3283,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { llvm::MDNode *Scope = LexicalBlockStack.back(); Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( - getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope)); + getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope, CurInlinedAt)); } void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { @@ -3167,14 +3295,29 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { getColumnNumber(CurLoc))); } +void CGDebugInfo::AppendAddressSpaceXDeref( + unsigned AddressSpace, + SmallVectorImpl<int64_t> &Expr) const { + Optional<unsigned> DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(AddressSpace); + if (!DWARFAddressSpace) + return; + + Expr.push_back(llvm::dwarf::DW_OP_constu); + Expr.push_back(DWARFAddressSpace.getValue()); + Expr.push_back(llvm::dwarf::DW_OP_swap); + Expr.push_back(llvm::dwarf::DW_OP_xderef); +} + void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder, SourceLocation Loc) { // Set our current location. setLocation(Loc); // Emit a line table change for the current location inside the new scope. - Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( - getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back())); + Builder.SetCurrentDebugLocation( + llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), + LexicalBlockStack.back(), CurInlinedAt)); if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; @@ -3196,7 +3339,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder, LexicalBlockStack.pop_back(); } -void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) { +void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) { assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); unsigned RCount = FnBeginRegionCount.back(); assert(RCount <= LexicalBlockStack.size() && "Region stack mismatch"); @@ -3208,6 +3351,9 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) { LexicalBlockStack.pop_back(); } FnBeginRegionCount.pop_back(); + + if (Fn && Fn->getSubprogram()) + DBuilder.finalizeSubprogram(Fn->getSubprogram()); } llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, @@ -3316,56 +3462,45 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Line = getLineNumber(VD->getLocation()); Column = getColumnNumber(VD->getLocation()); } - SmallVector<int64_t, 9> Expr; + SmallVector<int64_t, 13> Expr; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; if (VD->isImplicit()) Flags |= llvm::DINode::FlagArtificial; auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); - // If this is the first argument and it is implicit then - // give it an object pointer flag. - // FIXME: There has to be a better way to do this, but for static - // functions there won't be an implicit param at arg1 and - // otherwise it is 'self' or 'this'. - if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1) - Flags |= llvm::DINode::FlagObjectPointer; - if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) - if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() && - !VD->getType()->isPointerType()) - Expr.push_back(llvm::dwarf::DW_OP_deref); + unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType()); + AppendAddressSpaceXDeref(AddressSpace, Expr); - auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); + // If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an + // object pointer flag. + if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) { + if (IPD->getParameterKind() == ImplicitParamDecl::CXXThis || + IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf) + Flags |= llvm::DINode::FlagObjectPointer; + } + // Note: Older versions of clang used to emit byval references with an extra + // DW_OP_deref, because they referenced the IR arg directly instead of + // referencing an alloca. Newer versions of LLVM don't treat allocas + // differently from other function arguments when used in a dbg.declare. + auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); StringRef Name = VD->getName(); if (!Name.empty()) { if (VD->hasAttr<BlocksAttr>()) { + // Here, we need an offset *into* the alloca. CharUnits offset = CharUnits::fromQuantity(32); - Expr.push_back(llvm::dwarf::DW_OP_plus); + Expr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of __forwarding field offset = CGM.getContext().toCharUnitsFromBits( CGM.getTarget().getPointerWidth(0)); Expr.push_back(offset.getQuantity()); Expr.push_back(llvm::dwarf::DW_OP_deref); - Expr.push_back(llvm::dwarf::DW_OP_plus); + Expr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of x field offset = CGM.getContext().toCharUnitsFromBits(XOffset); Expr.push_back(offset.getQuantity()); - - // Create the descriptor for the variable. - auto *D = ArgNo - ? DBuilder.createParameterVariable(Scope, VD->getName(), - *ArgNo, Unit, Line, Ty) - : DBuilder.createAutoVariable(Scope, VD->getName(), Unit, - Line, Ty, Align); - - // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), - Builder.GetInsertBlock()); - return; - } else if (isa<VariableArrayType>(VD->getType())) - Expr.push_back(llvm::dwarf::DW_OP_deref); + } } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) { // If VD is an anonymous union then Storage represents value for // all union fields. @@ -3393,9 +3528,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Flags | llvm::DINode::FlagArtificial, FieldAlign); // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), - Builder.GetInsertBlock()); + DBuilder.insertDeclare( + Storage, D, DBuilder.createExpression(Expr), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); } } } @@ -3411,7 +3547,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), Builder.GetInsertBlock()); } @@ -3453,8 +3589,9 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( // Self is passed along as an implicit non-arg variable in a // block. Mark it as the object pointer. - if (isa<ImplicitParamDecl>(VD) && VD->getName() == "self") - Ty = CreateSelfType(VD->getType(), Ty); + if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) + if (IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf) + Ty = CreateSelfType(VD->getType(), Ty); // Get location information. unsigned Line = getLineNumber(VD->getLocation()); @@ -3467,19 +3604,18 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( ->getElementOffset(blockInfo.getCapture(VD).getIndex())); SmallVector<int64_t, 9> addr; - if (isa<llvm::AllocaInst>(Storage)) - addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus); + addr.push_back(llvm::dwarf::DW_OP_deref); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); addr.push_back(offset.getQuantity()); if (isByRef) { addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of __forwarding field offset = CGM.getContext().toCharUnitsFromBits(target.getPointerSizeInBits(0)); addr.push_back(offset.getQuantity()); addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of x field offset = CGM.getContext().toCharUnitsFromBits(XOffset); addr.push_back(offset.getQuantity()); @@ -3492,13 +3628,13 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( Line, Ty, false, llvm::DINode::FlagZero, Align); // Insert an llvm.dbg.declare into the current block. - auto DL = llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back()); + auto DL = + llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back(), CurInlinedAt); + auto *Expr = DBuilder.createExpression(addr); if (InsertPoint) - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL, - InsertPoint); + DBuilder.insertDeclare(Storage, D, Expr, DL, InsertPoint); else - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL, - Builder.GetInsertBlock()); + DBuilder.insertDeclare(Storage, D, Expr, DL, Builder.GetInsertBlock()); } void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI, @@ -3660,12 +3796,13 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Insert an llvm.dbg.value into the current block. DBuilder.insertDbgValueIntrinsic( LocalAddr, 0, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope), Builder.GetInsertBlock()); + llvm::DebugLoc::get(line, column, scope, CurInlinedAt), + Builder.GetInsertBlock()); } // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope), + llvm::DebugLoc::get(line, column, scope, CurInlinedAt), Builder.GetInsertBlock()); } @@ -3747,9 +3884,16 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, GVE = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext); } else { auto Align = getDeclAlignIfRequired(D, CGM.getContext()); + + SmallVector<int64_t, 4> Expr; + unsigned AddressSpace = + CGM.getContext().getTargetAddressSpace(D->getType()); + AppendAddressSpaceXDeref(AddressSpace, Expr); + GVE = DBuilder.createGlobalVariableExpression( DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasLocalLinkage(), /*Expr=*/nullptr, + Var->hasLocalLinkage(), + Expr.empty() ? nullptr : DBuilder.createExpression(Expr), getOrCreateStaticDataMemberDeclarationOrNull(D), Align); Var->addDebugInfo(GVE); } @@ -3826,10 +3970,10 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { const NamespaceDecl *NSDecl = UD.getNominatedNamespace(); if (!NSDecl->isAnonymousNamespace() || CGM.getCodeGenOpts().DebugExplicitImport) { + auto Loc = UD.getLocation(); DBuilder.createImportedModule( getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())), - getOrCreateNameSpace(NSDecl), - getLineNumber(UD.getLocation())); + getOrCreateNamespace(NSDecl), getOrCreateFile(Loc), getLineNumber(Loc)); } } @@ -3852,10 +3996,12 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { if (AT->getDeducedType().isNull()) return; if (llvm::DINode *Target = - getDeclarationOrDefinition(USD.getUnderlyingDecl())) + getDeclarationOrDefinition(USD.getUnderlyingDecl())) { + auto Loc = USD.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target, - getLineNumber(USD.getLocation())); + getOrCreateFile(Loc), getLineNumber(Loc)); + } } void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { @@ -3863,10 +4009,11 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { return; if (Module *M = ID.getImportedModule()) { auto Info = ExternalASTSource::ASTSourceDescriptor(*M); + auto Loc = ID.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())), - getOrCreateModuleRef(Info, DebugTypeExtRefs), - getLineNumber(ID.getLocation())); + getOrCreateModuleRef(Info, DebugTypeExtRefs), getOrCreateFile(Loc), + getLineNumber(Loc)); } } @@ -3878,35 +4025,37 @@ CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) { if (VH) return cast<llvm::DIImportedEntity>(VH); llvm::DIImportedEntity *R; + auto Loc = NA.getLocation(); if (const auto *Underlying = dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace())) // This could cache & dedup here rather than relying on metadata deduping. R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())), - EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()), - NA.getName()); + EmitNamespaceAlias(*Underlying), getOrCreateFile(Loc), + getLineNumber(Loc), NA.getName()); else R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())), - getOrCreateNameSpace(cast<NamespaceDecl>(NA.getAliasedNamespace())), - getLineNumber(NA.getLocation()), NA.getName()); + getOrCreateNamespace(cast<NamespaceDecl>(NA.getAliasedNamespace())), + getOrCreateFile(Loc), getLineNumber(Loc), NA.getName()); VH.reset(R); return R; } llvm::DINamespace * -CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) { - NSDecl = NSDecl->getCanonicalDecl(); - auto I = NameSpaceCache.find(NSDecl); - if (I != NameSpaceCache.end()) +CGDebugInfo::getOrCreateNamespace(const NamespaceDecl *NSDecl) { + // Don't canonicalize the NamespaceDecl here: The DINamespace will be uniqued + // if necessary, and this way multiple declarations of the same namespace in + // different parent modules stay distinct. + auto I = NamespaceCache.find(NSDecl); + if (I != NamespaceCache.end()) return cast<llvm::DINamespace>(I->second); - unsigned LineNo = getLineNumber(NSDecl->getLocation()); - llvm::DIFile *FileD = getOrCreateFile(NSDecl->getLocation()); llvm::DIScope *Context = getDeclContextDescriptor(NSDecl); - llvm::DINamespace *NS = DBuilder.createNameSpace( - Context, NSDecl->getName(), FileD, LineNo, NSDecl->isInline()); - NameSpaceCache[NSDecl].reset(NS); + // Don't trust the context if it is a DIModule (see comment above). + llvm::DINamespace *NS = + DBuilder.createNameSpace(Context, NSDecl->getName(), NSDecl->isInline()); + NamespaceCache[NSDecl].reset(NS); return NS; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h index ac2e8dd..39249c7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h @@ -61,6 +61,7 @@ class CGDebugInfo { ModuleMap *ClangModuleMap = nullptr; ExternalASTSource::ASTSourceDescriptor PCHDescriptor; SourceLocation CurLoc; + llvm::MDNode *CurInlinedAt = nullptr; llvm::DIType *VTablePtrType = nullptr; llvm::DIType *ClassTy = nullptr; llvm::DICompositeType *ObjTy = nullptr; @@ -124,7 +125,7 @@ class CGDebugInfo { /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations) that aren't covered by other more specific caches. llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache; - llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NameSpaceCache; + llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NamespaceCache; llvm::DenseMap<const NamespaceAliasDecl *, llvm::TrackingMDRef> NamespaceAliasCache; llvm::DenseMap<const Decl *, llvm::TypedTrackingMDRef<llvm::DIDerivedType>> @@ -193,8 +194,9 @@ class CGDebugInfo { getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F); /// \return debug info descriptor for vtable. llvm::DIType *getOrCreateVTablePtrType(llvm::DIFile *F); + /// \return namespace descriptor for the given namespace decl. - llvm::DINamespace *getOrCreateNameSpace(const NamespaceDecl *N); + llvm::DINamespace *getOrCreateNamespace(const NamespaceDecl *N); llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty, QualType PointeeTy, llvm::DIFile *F); llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache); @@ -292,6 +294,15 @@ class CGDebugInfo { /// Create a new lexical block node and push it on the stack. void CreateLexicalBlock(SourceLocation Loc); + /// If target-specific LLVM \p AddressSpace directly maps to target-specific + /// DWARF address space, appends extended dereferencing mechanism to complex + /// expression \p Expr. Otherwise, does nothing. + /// + /// Extended dereferencing mechanism is has the following format: + /// DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef + void AppendAddressSpaceXDeref(unsigned AddressSpace, + SmallVectorImpl<int64_t> &Expr) const; + public: CGDebugInfo(CodeGenModule &CGM); ~CGDebugInfo(); @@ -320,6 +331,17 @@ public: /// ignored. void setLocation(SourceLocation Loc); + /// Return the current source location. This does not necessarily correspond + /// to the IRBuilder's current DebugLoc. + SourceLocation getLocation() const { return CurLoc; } + + /// Update the current inline scope. All subsequent calls to \p EmitLocation + /// will create a location with this inlinedAt field. + void setInlinedAt(llvm::MDNode *InlinedAt) { CurInlinedAt = InlinedAt; } + + /// \return the current inline scope. + llvm::MDNode *getInlinedAt() const { return CurInlinedAt; } + // Converts a SourceLocation to a DebugLoc llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Loc); @@ -336,11 +358,16 @@ public: SourceLocation ScopeLoc, QualType FnType, llvm::Function *Fn, CGBuilderTy &Builder); + /// Start a new scope for an inlined function. + void EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD); + /// End an inlined function scope. + void EmitInlineFunctionEnd(CGBuilderTy &Builder); + /// Emit debug info for a function declaration. void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType); /// Constructs the debug code for exiting a function. - void EmitFunctionEnd(CGBuilderTy &Builder); + void EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn); /// Emit metadata to indicate the beginning of a new lexical block /// and push the block onto the stack. @@ -409,9 +436,21 @@ public: void completeType(const RecordDecl *RD); void completeRequiredType(const RecordDecl *RD); void completeClassData(const RecordDecl *RD); + void completeClass(const RecordDecl *RD); void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD); - + void completeUnusedClass(const CXXRecordDecl &D); + + /// Create debug info for a macro defined by a #define directive or a macro + /// undefined by a #undef directive. + llvm::DIMacro *CreateMacro(llvm::DIMacroFile *Parent, unsigned MType, + SourceLocation LineLoc, StringRef Name, + StringRef Value); + + /// Create debug info for a file referenced by an #include directive. + llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent, + SourceLocation LineLoc, + SourceLocation FileLoc); private: /// Emit call to llvm.dbg.declare for a variable declaration. void EmitDeclare(const VarDecl *decl, llvm::Value *AI, @@ -491,11 +530,18 @@ private: llvm::DIDerivedType * getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D); + /// Helper that either creates a forward declaration or a stub. + llvm::DISubprogram *getFunctionFwdDeclOrStub(GlobalDecl GD, bool Stub); + /// Create a subprogram describing the forward declaration - /// represented in the given FunctionDecl. - llvm::DISubprogram *getFunctionForwardDeclaration(const FunctionDecl *FD); + /// represented in the given FunctionDecl wrapped in a GlobalDecl. + llvm::DISubprogram *getFunctionForwardDeclaration(GlobalDecl GD); + + /// Create a DISubprogram describing the function + /// represented in the given FunctionDecl wrapped in a GlobalDecl. + llvm::DISubprogram *getFunctionStub(GlobalDecl GD); - /// Create a global variable describing the forward decalration + /// Create a global variable describing the forward declaration /// represented in the given VarDecl. llvm::DIGlobalVariable * getGlobalVariableForwardDeclaration(const VarDecl *VD); @@ -622,6 +668,20 @@ public: }; +/// A scoped helper to set the current debug location to an inlined location. +class ApplyInlineDebugLocation { + SourceLocation SavedLocation; + CodeGenFunction *CGF; + +public: + /// Set up the CodeGenFunction's DebugInfo to produce inline locations for the + /// function \p InlinedFn. The current debug location becomes the inlined call + /// site of the inlined function. + ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn); + /// Restore everything back to the orginial state. + ~ApplyInlineDebugLocation(); +}; + } // namespace CodeGen } // namespace clang diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp index 0a88b23..2351786 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" @@ -50,6 +51,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::TemplateTypeParm: case Decl::UnresolvedUsingValue: case Decl::NonTypeTemplateParm: + case Decl::CXXDeductionGuide: case Decl::CXXMethod: case Decl::CXXConstructor: case Decl::CXXDestructor: @@ -151,7 +153,14 @@ void CodeGenFunction::EmitDecl(const Decl &D) { /// EmitVarDecl - This method handles emission of any variable declaration /// inside a function, including static vars etc. void CodeGenFunction::EmitVarDecl(const VarDecl &D) { - if (D.isStaticLocal()) { + if (D.hasExternalStorage()) + // Don't emit it now, allow it to be emitted lazily on its first use. + return; + + // Some function-scope variable does not have static storage but still + // needs to be emitted like a static variable, e.g. a function-scope + // variable in constant address space in OpenCL. + if (D.getStorageDuration() != SD_Automatic) { llvm::GlobalValue::LinkageTypes Linkage = CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false); @@ -162,10 +171,6 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { return EmitStaticVarDecl(D, Linkage); } - if (D.hasExternalStorage()) - // Don't emit it now, allow it to be emitted lazily on its first use. - return; - if (D.getType().getAddressSpace() == LangAS::opencl_local) return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D); @@ -216,8 +221,8 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( Name = getStaticDeclName(*this, D); llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty); - unsigned AddrSpace = - GetGlobalVarAddressSpace(&D, getContext().getTargetAddressSpace(Ty)); + unsigned AS = GetGlobalVarAddressSpace(&D); + unsigned TargetAS = getContext().getTargetAddressSpace(AS); // Local address space cannot have an initializer. llvm::Constant *Init = nullptr; @@ -226,12 +231,9 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( else Init = llvm::UndefValue::get(LTy); - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(getModule(), LTy, - Ty.isConstant(getContext()), Linkage, - Init, Name, nullptr, - llvm::GlobalVariable::NotThreadLocal, - AddrSpace); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, + nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); setGlobalVisibility(GV, &D); @@ -249,11 +251,12 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( } // Make sure the result is of the correct type. - unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(Ty); + unsigned ExpectedAS = Ty.getAddressSpace(); llvm::Constant *Addr = GV; - if (AddrSpace != ExpectedAddrSpace) { - llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace); - Addr = llvm::ConstantExpr::getAddrSpaceCast(GV, PTy); + if (AS != ExpectedAS) { + Addr = getTargetCodeGenInfo().performAddrSpaceCast( + *this, GV, AS, ExpectedAS, + LTy->getPointerTo(getContext().getTargetAddressSpace(ExpectedAS))); } setStaticLocalDeclAddress(&D, Addr); @@ -401,6 +404,13 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, if (D.hasAttr<AnnotateAttr>()) CGM.AddGlobalAnnotations(&D, var); + if (auto *SA = D.getAttr<PragmaClangBSSSectionAttr>()) + var->addAttribute("bss-section", SA->getName()); + if (auto *SA = D.getAttr<PragmaClangDataSectionAttr>()) + var->addAttribute("data-section", SA->getName()); + if (auto *SA = D.getAttr<PragmaClangRodataSectionAttr>()) + var->addAttribute("rodata-section", SA->getName()); + if (const SectionAttr *SA = D.getAttr<SectionAttr>()) var->setSection(SA->getName()); @@ -671,6 +681,27 @@ static void drillIntoBlockVariable(CodeGenFunction &CGF, lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var)); } +void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, + SourceLocation Loc) { + if (!SanOpts.has(SanitizerKind::NullabilityAssign)) + return; + + auto Nullability = LHS.getType()->getNullability(getContext()); + if (!Nullability || *Nullability != NullabilityKind::NonNull) + return; + + // Check if the right hand side of the assignment is nonnull, if the left + // hand side must be nonnull. + SanitizerScope SanScope(this); + llvm::Value *IsNotNull = Builder.CreateIsNotNull(RHS); + llvm::Constant *StaticData[] = { + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(LHS.getType()), + llvm::ConstantInt::get(Int8Ty, 0), // The LogAlignment info is unused. + llvm::ConstantInt::get(Int8Ty, TCK_NonnullAssign)}; + EmitCheck({{IsNotNull, SanitizerKind::NullabilityAssign}}, + SanitizerHandler::TypeMismatch, StaticData, RHS); +} + void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime(); @@ -678,6 +709,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, llvm::Value *value = EmitScalarExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D)); + EmitNullabilityCheck(lvalue, value, init->getExprLoc()); EmitStoreThroughLValue(RValue::get(value), lvalue, true); return; } @@ -766,6 +798,8 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D)); + EmitNullabilityCheck(lvalue, value, init->getExprLoc()); + // If the variable might have been accessed by its initializer, we // might have to initialize with a barrier. We have to do this for // both __weak and __strong, but __weak got filtered out above. @@ -899,7 +933,7 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, return nullptr; llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size); - Addr = Builder.CreateBitCast(Addr, Int8PtrTy); + Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr}); C->setDoesNotThrow(); @@ -907,7 +941,7 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, } void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { - Addr = Builder.CreateBitCast(Addr, Int8PtrTy); + Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr}); C->setDoesNotThrow(); @@ -918,6 +952,7 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { CodeGenFunction::AutoVarEmission CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { QualType Ty = D.getType(); + assert(Ty.getAddressSpace() == LangAS::Default); AutoVarEmission emission(D); @@ -1010,8 +1045,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment); - address.getPointer()->setName(D.getName()); + address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName()); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of // the catch parameter starts in the catchpad instruction, and we can't @@ -1022,11 +1056,21 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Emit a lifetime intrinsic if meaningful. There's no point in doing this // if we don't have a valid insertion point (?). if (HaveInsertPoint() && !IsMSCatchParam) { - // goto or switch-case statements can break lifetime into several - // regions which need more efforts to handle them correctly. PR28267 - // This is rare case, but it's better just omit intrinsics than have - // them incorrectly placed. - if (!Bypasses.IsBypassed(&D)) { + // If there's a jump into the lifetime of this variable, its lifetime + // gets broken up into several regions in IR, which requires more work + // to handle correctly. For now, just omit the intrinsics; this is a + // rare case, and it's better to just be conservatively correct. + // PR28267. + // + // We have to do this in all language modes if there's a jump past the + // declaration. We also have to do it in C if there's a jump to an + // earlier point in the current block because non-VLA lifetimes begin as + // soon as the containing block is entered, not when its variables + // actually come into scope; suppressing the lifetime annotations + // completely in this case is unnecessarily pessimistic, but again, this + // is rare. + if (!Bypasses.IsBypassed(&D) && + !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) { uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy); emission.SizeForLifetimeMarkers = EmitLifetimeStart(size, address.getPointer()); @@ -1061,10 +1105,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { llvm::Type *llvmTy = ConvertTypeForMem(elementType); // Allocate memory for the array. - llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla"); - vla->setAlignment(alignment.getQuantity()); - - address = Address(vla, alignment); + address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount); } setAddrOfLocalVar(&D, address); @@ -1083,6 +1124,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, address.getPointer()); + // Make sure we call @llvm.lifetime.end. + if (emission.useLifetimeMarkers()) + EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, + emission.getAllocatedAddress(), + emission.getSizeForLifetimeMarkers()); + return emission; } @@ -1373,13 +1420,6 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { const VarDecl &D = *emission.Variable; - // Make sure we call @llvm.lifetime.end. This needs to happen - // *last*, so the cleanup needs to be pushed *first*. - if (emission.useLifetimeMarkers()) - EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, - emission.getAllocatedAddress(), - emission.getSizeForLifetimeMarkers()); - // Check the type for a cleanup. if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) emitAutoVarTypeCleanup(emission, dtorKind); @@ -1691,17 +1731,19 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, /// Lazily declare the @llvm.lifetime.start intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { - if (LifetimeStartFn) return LifetimeStartFn; + if (LifetimeStartFn) + return LifetimeStartFn; LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(), - llvm::Intrinsic::lifetime_start); + llvm::Intrinsic::lifetime_start, AllocaInt8PtrTy); return LifetimeStartFn; } /// Lazily declare the @llvm.lifetime.end intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() { - if (LifetimeEndFn) return LifetimeEndFn; + if (LifetimeEndFn) + return LifetimeEndFn; LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(), - llvm::Intrinsic::lifetime_end); + llvm::Intrinsic::lifetime_end, AllocaInt8PtrTy); return LifetimeEndFn; } @@ -1816,6 +1858,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, lt = Qualifiers::OCL_ExplicitNone; } + // Load objects passed indirectly. + if (Arg.isIndirect() && !ArgVal) + ArgVal = Builder.CreateLoad(DeclPtr); + if (lt == Qualifiers::OCL_Strong) { if (!isConsumed) { if (CGM.getCodeGenOpts().OptimizationLevel == 0) { @@ -1869,6 +1915,19 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, DeclPtr.getPointer()); + + // We can only check return value nullability if all arguments to the + // function satisfy their nullability preconditions. This makes it necessary + // to emit null checks for args in the function body itself. + if (requiresReturnValueNullabilityCheck()) { + auto Nullability = Ty->getNullability(getContext()); + if (Nullability && *Nullability == NullabilityKind::NonNull) { + SanitizerScope SanScope(this); + RetValNullabilityPrecondition = + Builder.CreateAnd(RetValNullabilityPrecondition, + Builder.CreateIsNotNull(Arg.getAnyValue())); + } + } } void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp index f56e182..d8768be 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp @@ -237,7 +237,7 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, llvm::FunctionType::get(IntTy, dtorStub->getType(), false); llvm::Constant *atexit = - CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeSet(), + CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), /*Local=*/true); if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit)) atexitFn->setDoesNotThrow(); @@ -571,9 +571,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, FinishFunction(); } -void CodeGenFunction::GenerateCXXGlobalDtorsFunc(llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakVH, llvm::Constant*> > - &DtorsAndObjects) { +void CodeGenFunction::GenerateCXXGlobalDtorsFunc( + llvm::Function *Fn, + const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> + &DtorsAndObjects) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -602,9 +603,9 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray, const VarDecl *VD) { FunctionArgList args; - ImplicitParamDecl dst(getContext(), nullptr, SourceLocation(), nullptr, - getContext().VoidPtrTy); - args.push_back(&dst); + ImplicitParamDecl Dst(getContext(), getContext().VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Dst); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp index f908bf2..40ae092 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp @@ -180,8 +180,8 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T, // The GCC runtime's personality function inherently doesn't support // mixed EH. Use the C++ personality just to avoid returning null. case ObjCRuntime::GCC: - case ObjCRuntime::ObjFW: // XXX: this will change soon - return EHPersonality::GNU_ObjC; + case ObjCRuntime::ObjFW: + return getObjCPersonality(T, L); case ObjCRuntime::GNUstep: return EHPersonality::GNU_ObjCXX; } @@ -231,7 +231,7 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true), Personality.PersonalityFn, - llvm::AttributeSet(), /*Local=*/true); + llvm::AttributeList(), /*Local=*/true); } static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, @@ -765,8 +765,8 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() { llvm::BasicBlock *lpad = createBasicBlock("lpad"); EmitBlock(lpad); - llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad( - llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0); + llvm::LandingPadInst *LPadInst = + Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0); llvm::Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0); Builder.CreateStore(LPadExn, getExceptionSlot()); @@ -1310,8 +1310,8 @@ llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() { if (!CurFn->hasPersonalityFn()) CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, Personality)); - llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad( - llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0); + llvm::LandingPadInst *LPadInst = + Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0); LPadInst->addClause(getCatchAllValue(*this)); llvm::Value *Exn = nullptr; @@ -1387,8 +1387,7 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) { llvm::Value *Exn = getExceptionFromSlot(); llvm::Value *Sel = getSelectorFromSlot(); - llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), - Sel->getType(), nullptr); + llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), Sel->getType()); llvm::Value *LPadVal = llvm::UndefValue::get(LPadType); LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val"); @@ -1650,39 +1649,29 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, // parameters. Win32 filters take no parameters. if (IsFilter) { Args.push_back(ImplicitParamDecl::Create( - getContext(), nullptr, StartLoc, + getContext(), /*DC=*/nullptr, StartLoc, &getContext().Idents.get("exception_pointers"), - getContext().VoidPtrTy)); + getContext().VoidPtrTy, ImplicitParamDecl::Other)); } else { Args.push_back(ImplicitParamDecl::Create( - getContext(), nullptr, StartLoc, + getContext(), /*DC=*/nullptr, StartLoc, &getContext().Idents.get("abnormal_termination"), - getContext().UnsignedCharTy)); + getContext().UnsignedCharTy, ImplicitParamDecl::Other)); } Args.push_back(ImplicitParamDecl::Create( - getContext(), nullptr, StartLoc, - &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy)); + getContext(), /*DC=*/nullptr, StartLoc, + &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy, + ImplicitParamDecl::Other)); } QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy; - llvm::Function *ParentFn = ParentCGF.CurFn; const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Function *Fn = llvm::Function::Create( FnTy, llvm::GlobalValue::InternalLinkage, Name.str(), &CGM.getModule()); - // The filter is either in the same comdat as the function, or it's internal. - if (llvm::Comdat *C = ParentFn->getComdat()) { - Fn->setComdat(C); - } else if (ParentFn->hasWeakLinkage() || ParentFn->hasLinkOnceLinkage()) { - llvm::Comdat *C = CGM.getModule().getOrInsertComdat(ParentFn->getName()); - ParentFn->setComdat(C); - Fn->setComdat(C); - } else { - Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - } IsOutlinedSEHHelper = true; @@ -1758,7 +1747,7 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF, // }; // int exceptioncode = exception_pointers->ExceptionRecord->ExceptionCode; llvm::Type *RecordTy = CGM.Int32Ty->getPointerTo(); - llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy, nullptr); + llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy); llvm::Value *Ptrs = Builder.CreateBitCast(SEHInfo, PtrsTy->getPointerTo()); llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, Ptrs, 0); Rec = Builder.CreateAlignedLoad(Rec, getPointerAlign()); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp index e5e34a5..63c7b3d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp @@ -61,17 +61,39 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, - const Twine &Name) { - auto Alloca = CreateTempAlloca(Ty, Name); + const Twine &Name, + llvm::Value *ArraySize, + bool CastToDefaultAddrSpace) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); Alloca->setAlignment(Align.getQuantity()); - return Address(Alloca, Align); -} - -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. + llvm::Value *V = Alloca; + // Alloca always returns a pointer in alloca address space, which may + // be different from the type defined by the language. For example, + // in C++ the auto variables are in the default address space. Therefore + // cast alloca to the default address space when necessary. + if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { + auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + auto CurIP = Builder.saveIP(); + Builder.SetInsertPoint(AllocaInsertPt); + V = getTargetHooks().performAddrSpaceCast( + *this, V, getASTAllocaAddressSpace(), LangAS::Default, + Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); + Builder.restoreIP(CurIP); + } + + return Address(V, Align); +} + +/// CreateTempAlloca - This creates an alloca and inserts it into the entry +/// block if \p ArraySize is nullptr, otherwise inserts it at the current +/// insertion point of the builder. llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, - const Twine &Name) { - return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt); + const Twine &Name, + llvm::Value *ArraySize) { + if (ArraySize) + return Builder.CreateAlloca(Ty, ArraySize, Name); + return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), + ArraySize, Name, AllocaInsertPt); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the @@ -98,14 +120,18 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) { return CreateTempAlloca(ConvertType(Ty), Align, Name); } -Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) { +Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name, + bool CastToDefaultAddrSpace) { // FIXME: Should we prefer the preferred type alignment here? - return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name); + return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, + CastToDefaultAddrSpace); } Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, - const Twine &Name) { - return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name); + const Twine &Name, + bool CastToDefaultAddrSpace) { + return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr, + CastToDefaultAddrSpace); } /// EvaluateExprAsBool - Perform the usual unary conversions on the specified @@ -315,9 +341,10 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, } } -static Address -createReferenceTemporary(CodeGenFunction &CGF, - const MaterializeTemporaryExpr *M, const Expr *Inner) { +static Address createReferenceTemporary(CodeGenFunction &CGF, + const MaterializeTemporaryExpr *M, + const Expr *Inner) { + auto &TCG = CGF.getTargetHooks(); switch (M->getStorageDuration()) { case SD_FullExpression: case SD_Automatic: { @@ -330,13 +357,24 @@ createReferenceTemporary(CodeGenFunction &CGF, (Ty->isArrayType() || Ty->isRecordType()) && CGF.CGM.isTypeConstant(Ty, true)) if (llvm::Constant *Init = CGF.CGM.EmitConstantExpr(Inner, Ty, &CGF)) { - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp"); - CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty); - GV->setAlignment(alignment.getQuantity()); - // FIXME: Should we put the new global into a COMDAT? - return Address(GV, alignment); + if (auto AddrSpace = CGF.getTarget().getConstantAddressSpace()) { + auto AS = AddrSpace.getValue(); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp", nullptr, + llvm::GlobalValue::NotThreadLocal, + CGF.getContext().getTargetAddressSpace(AS)); + CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty); + GV->setAlignment(alignment.getQuantity()); + llvm::Constant *C = GV; + if (AS != LangAS::Default) + C = TCG.performAddrSpaceCast( + CGF.CGM, GV, AS, LangAS::Default, + GV->getValueType()->getPointerTo( + CGF.getContext().getTargetAddressSpace(LangAS::Default))); + // FIXME: Should we put the new global into a COMDAT? + return Address(C, alignment); + } } return CGF.CreateMemTemp(Ty, "ref.tmp"); } @@ -373,12 +411,14 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // dynamic initialization or a cleanup and we can just return the address // of the temporary. if (Var->hasInitializer()) - return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl); + return MakeAddrLValue(Object, M->getType(), + LValueBaseInfo(AlignmentSource::Decl, false)); Var->setInitializer(CGM.EmitNullConstant(E->getType())); } LValue RefTempDst = MakeAddrLValue(Object, M->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, + false)); switch (getEvaluationKind(E->getType())) { default: llvm_unreachable("expected scalar or aggregate expression"); @@ -415,9 +455,11 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // Create and initialize the reference temporary. Address Object = createReferenceTemporary(*this, M, E); - if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) { + if (auto *Var = dyn_cast<llvm::GlobalVariable>( + Object.getPointer()->stripPointerCasts())) { Object = Address(llvm::ConstantExpr::getBitCast( - Var, ConvertTypeForMem(E->getType())->getPointerTo()), + cast<llvm::Constant>(Object.getPointer()), + ConvertTypeForMem(E->getType())->getPointerTo()), Object.getAlignment()); // If the temporary is a global and has a constant initializer or is a // constant temporary that we promoted to a global, we may have already @@ -464,7 +506,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { case SubobjectAdjustment::FieldAdjustment: { LValue LV = MakeAddrLValue(Object, E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); LV = EmitLValueForField(LV, Adjustment.Field); assert(LV.isSimple() && "materialized temporary field is not a simple lvalue"); @@ -481,7 +523,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { } } - return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl); + return MakeAddrLValue(Object, M->getType(), + LValueBaseInfo(AlignmentSource::Decl, false)); } RValue @@ -534,7 +577,8 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const { void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *Ptr, QualType Ty, - CharUnits Alignment, bool SkipNullCheck) { + CharUnits Alignment, + SanitizerSet SkippedChecks) { if (!sanitizePerformTypeCheck()) return; @@ -544,31 +588,52 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (Ptr->getType()->getPointerAddressSpace()) return; + // Don't check pointers to volatile data. The behavior here is implementation- + // defined. + if (Ty.isVolatileQualified()) + return; + SanitizerScope SanScope(this); SmallVector<std::pair<llvm::Value *, SanitizerMask>, 3> Checks; llvm::BasicBlock *Done = nullptr; + // Quickly determine whether we have a pointer to an alloca. It's possible + // to skip null checks, and some alignment checks, for these pointers. This + // can reduce compile-time significantly. + auto PtrToAlloca = + dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases()); + bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast || TCK == TCK_UpcastToVirtualBase; if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) && - !SkipNullCheck) { + !SkippedChecks.has(SanitizerKind::Null) && !PtrToAlloca) { // The glvalue must not be an empty glvalue. llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr); - if (AllowNullPointers) { - // When performing pointer casts, it's OK if the value is null. - // Skip the remaining checks in that case. - Done = createBasicBlock("null"); - llvm::BasicBlock *Rest = createBasicBlock("not.null"); - Builder.CreateCondBr(IsNonNull, Rest, Done); - EmitBlock(Rest); - } else { - Checks.push_back(std::make_pair(IsNonNull, SanitizerKind::Null)); + // The IR builder can constant-fold the null check if the pointer points to + // a constant. + bool PtrIsNonNull = + IsNonNull == llvm::ConstantInt::getTrue(getLLVMContext()); + + // Skip the null check if the pointer is known to be non-null. + if (!PtrIsNonNull) { + if (AllowNullPointers) { + // When performing pointer casts, it's OK if the value is null. + // Skip the remaining checks in that case. + Done = createBasicBlock("null"); + llvm::BasicBlock *Rest = createBasicBlock("not.null"); + Builder.CreateCondBr(IsNonNull, Rest, Done); + EmitBlock(Rest); + } else { + Checks.push_back(std::make_pair(IsNonNull, SanitizerKind::Null)); + } } } - if (SanOpts.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) { + if (SanOpts.has(SanitizerKind::ObjectSize) && + !SkippedChecks.has(SanitizerKind::ObjectSize) && + !Ty->isIncompleteType()) { uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity(); // The glvalue must refer to a large enough storage region. @@ -578,22 +643,25 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); llvm::Value *Min = Builder.getFalse(); + llvm::Value *NullIsUnknown = Builder.getFalse(); llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); - llvm::Value *LargeEnough = - Builder.CreateICmpUGE(Builder.CreateCall(F, {CastAddr, Min}), - llvm::ConstantInt::get(IntPtrTy, Size)); + llvm::Value *LargeEnough = Builder.CreateICmpUGE( + Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}), + llvm::ConstantInt::get(IntPtrTy, Size)); Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); } uint64_t AlignVal = 0; - if (SanOpts.has(SanitizerKind::Alignment)) { + if (SanOpts.has(SanitizerKind::Alignment) && + !SkippedChecks.has(SanitizerKind::Alignment)) { AlignVal = Alignment.getQuantity(); if (!Ty->isIncompleteType() && !AlignVal) AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity(); // The glvalue must be suitably aligned. - if (AlignVal) { + if (AlignVal > 1 && + (!PtrToAlloca || PtrToAlloca->getAlignment() < AlignVal)) { llvm::Value *Align = Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy), llvm::ConstantInt::get(IntPtrTy, AlignVal - 1)); @@ -624,6 +692,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // or call a non-static member function CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); if (SanOpts.has(SanitizerKind::Vptr) && + !SkippedChecks.has(SanitizerKind::Vptr) && (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || TCK == TCK_UpcastToVirtualBase) && @@ -825,7 +894,7 @@ void CodeGenModule::EmitExplicitCastExprType(const ExplicitCastExpr *E, /// EmitPointerWithAlignment - Given an expression of pointer type, try to /// derive a more accurate bound on the alignment of the pointer. Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, - AlignmentSource *Source) { + LValueBaseInfo *BaseInfo) { // We allow this with ObjC object pointers because of fragile ABIs. assert(E->getType()->isPointerType() || E->getType()->isObjCObjectPointerType()); @@ -844,16 +913,20 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, if (PtrTy->getPointeeType()->isVoidType()) break; - AlignmentSource InnerSource; - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerSource); - if (Source) *Source = InnerSource; + LValueBaseInfo InnerInfo; + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerInfo); + if (BaseInfo) *BaseInfo = InnerInfo; // If this is an explicit bitcast, and the source l-value is // opaque, honor the alignment of the casted-to type. if (isa<ExplicitCastExpr>(CE) && - InnerSource != AlignmentSource::Decl) { - Addr = Address(Addr.getPointer(), - getNaturalPointeeTypeAlignment(E->getType(), Source)); + InnerInfo.getAlignmentSource() != AlignmentSource::Decl) { + LValueBaseInfo ExpInfo; + CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), + &ExpInfo); + if (BaseInfo) + BaseInfo->mergeForCast(ExpInfo); + Addr = Address(Addr.getPointer(), Align); } if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) && @@ -871,12 +944,12 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // Array-to-pointer decay. case CK_ArrayToPointerDecay: - return EmitArrayToPointerDecay(CE->getSubExpr(), Source); + return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo); // Derived-to-base conversions. case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), Source); + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo); auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl(); return GetAddressOfBaseClass(Addr, Derived, CE->path_begin(), CE->path_end(), @@ -895,7 +968,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { if (UO->getOpcode() == UO_AddrOf) { LValue LV = EmitLValue(UO->getSubExpr()); - if (Source) *Source = LV.getAlignmentSource(); + if (BaseInfo) *BaseInfo = LV.getBaseInfo(); return LV.getAddress(); } } @@ -903,7 +976,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // TODO: conditional operators, comma. // Otherwise, use the alignment of the type. - CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), Source); + CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo); return Address(EmitScalarExpr(E), Align); } @@ -947,15 +1020,47 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E, E->getType()); } +bool CodeGenFunction::IsWrappedCXXThis(const Expr *Obj) { + const Expr *Base = Obj; + while (!isa<CXXThisExpr>(Base)) { + // The result of a dynamic_cast can be null. + if (isa<CXXDynamicCastExpr>(Base)) + return false; + + if (const auto *CE = dyn_cast<CastExpr>(Base)) { + Base = CE->getSubExpr(); + } else if (const auto *PE = dyn_cast<ParenExpr>(Base)) { + Base = PE->getSubExpr(); + } else if (const auto *UO = dyn_cast<UnaryOperator>(Base)) { + if (UO->getOpcode() == UO_Extension) + Base = UO->getSubExpr(); + else + return false; + } else { + return false; + } + } + return true; +} + LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { LValue LV; if (SanOpts.has(SanitizerKind::ArrayBounds) && isa<ArraySubscriptExpr>(E)) LV = EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E), /*Accessed*/true); else LV = EmitLValue(E); - if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) + if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) { + SanitizerSet SkippedChecks; + if (const auto *ME = dyn_cast<MemberExpr>(E)) { + bool IsBaseCXXThis = IsWrappedCXXThis(ME->getBase()); + if (IsBaseCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsBaseCXXThis || isa<DeclRefExpr>(ME->getBase())) + SkippedChecks.set(SanitizerKind::Null, true); + } EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(), - E->getType(), LV.getAlignment()); + E->getType(), LV.getAlignment(), SkippedChecks); + } return LV; } @@ -1033,7 +1138,19 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { const auto *cleanups = cast<ExprWithCleanups>(E); enterFullExpression(cleanups); RunCleanupsScope Scope(*this); - return EmitLValue(cleanups->getSubExpr()); + LValue LV = EmitLValue(cleanups->getSubExpr()); + if (LV.isSimple()) { + // Defend against branches out of gnu statement expressions surrounded by + // cleanups. + llvm::Value *V = LV.getPointer(); + Scope.ForceCleanup({&V}); + return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), + getContext(), LV.getBaseInfo(), + LV.getTBAAInfo()); + } + // FIXME: Is it possible to create an ExprWithCleanups that produces a + // bitfield lvalue or some other non-simple lvalue? + return LV; } case Expr::CXXDefaultArgExprClass: @@ -1085,6 +1202,11 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::MaterializeTemporaryExprClass: return EmitMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(E)); + + case Expr::CoawaitExprClass: + return EmitCoawaitLValue(cast<CoawaitExpr>(E)); + case Expr::CoyieldExprClass: + return EmitCoyieldLValue(cast<CoyieldExpr>(E)); } } @@ -1203,7 +1325,7 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) { llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, SourceLocation Loc) { return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), Loc, lvalue.getAlignmentSource(), + lvalue.getType(), Loc, lvalue.getBaseInfo(), lvalue.getTBAAInfo(), lvalue.getTBAABaseType(), lvalue.getTBAAOffset(), lvalue.isNontemporal()); @@ -1265,40 +1387,89 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { return MDHelper.createRange(Min, End); } +bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, + SourceLocation Loc) { + bool HasBoolCheck = SanOpts.has(SanitizerKind::Bool); + bool HasEnumCheck = SanOpts.has(SanitizerKind::Enum); + if (!HasBoolCheck && !HasEnumCheck) + return false; + + bool IsBool = hasBooleanRepresentation(Ty) || + NSAPI(CGM.getContext()).isObjCBOOLType(Ty); + bool NeedsBoolCheck = HasBoolCheck && IsBool; + bool NeedsEnumCheck = HasEnumCheck && Ty->getAs<EnumType>(); + if (!NeedsBoolCheck && !NeedsEnumCheck) + return false; + + // Single-bit booleans don't need to be checked. Special-case this to avoid + // a bit width mismatch when handling bitfield values. This is handled by + // EmitFromMemory for the non-bitfield case. + if (IsBool && + cast<llvm::IntegerType>(Value->getType())->getBitWidth() == 1) + return false; + + llvm::APInt Min, End; + if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) + return true; + + SanitizerScope SanScope(this); + llvm::Value *Check; + --End; + if (!Min) { + Check = Builder.CreateICmpULE( + Value, llvm::ConstantInt::get(getLLVMContext(), End)); + } else { + llvm::Value *Upper = Builder.CreateICmpSLE( + Value, llvm::ConstantInt::get(getLLVMContext(), End)); + llvm::Value *Lower = Builder.CreateICmpSGE( + Value, llvm::ConstantInt::get(getLLVMContext(), Min)); + Check = Builder.CreateAnd(Upper, Lower); + } + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc), + EmitCheckTypeDescriptor(Ty)}; + SanitizerMask Kind = + NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool; + EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue, + StaticArgs, EmitCheckValue(Value)); + return true; +} + llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, - AlignmentSource AlignSource, + LValueBaseInfo BaseInfo, llvm::MDNode *TBAAInfo, QualType TBAABaseType, uint64_t TBAAOffset, bool isNontemporal) { - // For better performance, handle vector loads differently. - if (Ty->isVectorType()) { - const llvm::Type *EltTy = Addr.getElementType(); - - const auto *VTy = cast<llvm::VectorType>(EltTy); - - // Handle vectors of size 3 like size 4 for better performance. - if (VTy->getNumElements() == 3) { - - // Bitcast to vec4 type. - llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(), - 4); - Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); - // Now load value. - llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); - - // Shuffle vector to get vec3. - V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), - {0, 1, 2}, "extractVec"); - return EmitFromMemory(V, Ty); + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // For better performance, handle vector loads differently. + if (Ty->isVectorType()) { + const llvm::Type *EltTy = Addr.getElementType(); + + const auto *VTy = cast<llvm::VectorType>(EltTy); + + // Handle vectors of size 3 like size 4 for better performance. + if (VTy->getNumElements() == 3) { + + // Bitcast to vec4 type. + llvm::VectorType *vec4Ty = + llvm::VectorType::get(VTy->getElementType(), 4); + Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); + // Now load value. + llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); + + // Shuffle vector to get vec3. + V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), + {0, 1, 2}, "extractVec"); + return EmitFromMemory(V, Ty); + } } } // Atomic operations have to be done on integral types. LValue AtomicLValue = - LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); + LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo); if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) { return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } @@ -1310,42 +1481,17 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Load, TBAAPath, - false /*ConvertTypeToTag*/); + bool MayAlias = BaseInfo.getMayAlias(); + llvm::MDNode *TBAA = MayAlias + ? CGM.getTBAAInfo(getContext().CharTy) + : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); + if (TBAA) + CGM.DecorateInstructionWithTBAA(Load, TBAA, MayAlias); } - bool IsBool = hasBooleanRepresentation(Ty) || - NSAPI(CGM.getContext()).isObjCBOOLType(Ty); - bool NeedsBoolCheck = SanOpts.has(SanitizerKind::Bool) && IsBool; - bool NeedsEnumCheck = - SanOpts.has(SanitizerKind::Enum) && Ty->getAs<EnumType>(); - if (NeedsBoolCheck || NeedsEnumCheck) { - SanitizerScope SanScope(this); - llvm::APInt Min, End; - if (getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) { - --End; - llvm::Value *Check; - if (!Min) - Check = Builder.CreateICmpULE( - Load, llvm::ConstantInt::get(getLLVMContext(), End)); - else { - llvm::Value *Upper = Builder.CreateICmpSLE( - Load, llvm::ConstantInt::get(getLLVMContext(), End)); - llvm::Value *Lower = Builder.CreateICmpSGE( - Load, llvm::ConstantInt::get(getLLVMContext(), Min)); - Check = Builder.CreateAnd(Upper, Lower); - } - llvm::Constant *StaticArgs[] = { - EmitCheckSourceLocation(Loc), - EmitCheckTypeDescriptor(Ty) - }; - SanitizerMask Kind = NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool; - EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue, - StaticArgs, EmitCheckValue(Load)); - } + if (EmitScalarRangeCheck(Load, Ty, Loc)) { + // In order to prevent the optimizer from throwing away the check, don't + // attach range metadata to the load. } else if (CGM.getCodeGenOpts().OptimizationLevel > 0) if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo); @@ -1380,37 +1526,38 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, - AlignmentSource AlignSource, + LValueBaseInfo BaseInfo, llvm::MDNode *TBAAInfo, bool isInit, QualType TBAABaseType, uint64_t TBAAOffset, bool isNontemporal) { - // Handle vectors differently to get better performance. - if (Ty->isVectorType()) { - llvm::Type *SrcTy = Value->getType(); - auto *VecTy = cast<llvm::VectorType>(SrcTy); - // Handle vec3 special. - if (VecTy->getNumElements() == 3) { - // Our source is a vec3, do a shuffle vector to make it a vec4. - llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), - Builder.getInt32(2), - llvm::UndefValue::get(Builder.getInt32Ty())}; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Value = Builder.CreateShuffleVector(Value, - llvm::UndefValue::get(VecTy), - MaskV, "extractVec"); - SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); - } - if (Addr.getElementType() != SrcTy) { - Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // Handle vectors differently to get better performance. + if (Ty->isVectorType()) { + llvm::Type *SrcTy = Value->getType(); + auto *VecTy = dyn_cast<llvm::VectorType>(SrcTy); + // Handle vec3 special. + if (VecTy && VecTy->getNumElements() == 3) { + // Our source is a vec3, do a shuffle vector to make it a vec4. + llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), + Builder.getInt32(2), + llvm::UndefValue::get(Builder.getInt32Ty())}; + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), + MaskV, "extractVec"); + SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + } + if (Addr.getElementType() != SrcTy) { + Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + } } } Value = EmitToMemory(Value, Ty); LValue AtomicLValue = - LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); + LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo); if (Ty->isAtomicType() || (!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) { EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit); @@ -1425,18 +1572,19 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Store, TBAAPath, - false /*ConvertTypeToTag*/); + bool MayAlias = BaseInfo.getMayAlias(); + llvm::MDNode *TBAA = MayAlias + ? CGM.getTBAAInfo(getContext().CharTy) + : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); + if (TBAA) + CGM.DecorateInstructionWithTBAA(Store, TBAA, MayAlias); } } void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit) { EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), lvalue.getAlignmentSource(), + lvalue.getType(), lvalue.getBaseInfo(), lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(), lvalue.getTBAAOffset(), lvalue.isNontemporal()); } @@ -1487,10 +1635,11 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { return EmitLoadOfGlobalRegLValue(LV); assert(LV.isBitField() && "Unknown LValue type!"); - return EmitLoadOfBitfieldLValue(LV); + return EmitLoadOfBitfieldLValue(LV, Loc); } -RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) { +RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, + SourceLocation Loc) { const CGBitFieldInfo &Info = LV.getBitFieldInfo(); // Get the output type. @@ -1515,7 +1664,7 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) { "bf.clear"); } Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast"); - + EmitScalarRangeCheck(Val, LV.getType(), Loc); return RValue::get(Val); } @@ -1967,38 +2116,39 @@ static LValue EmitThreadPrivateVarDeclLValue( llvm::Type *RealVarTy, SourceLocation Loc) { Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc); Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy); - return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + return CGF.MakeAddrLValue(Addr, T, BaseInfo); } Address CodeGenFunction::EmitLoadOfReference(Address Addr, const ReferenceType *RefTy, - AlignmentSource *Source) { + LValueBaseInfo *BaseInfo) { llvm::Value *Ptr = Builder.CreateLoad(Addr); return Address(Ptr, getNaturalTypeAlignment(RefTy->getPointeeType(), - Source, /*forPointee*/ true)); - + BaseInfo, /*forPointee*/ true)); } LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr, const ReferenceType *RefTy) { - AlignmentSource Source; - Address Addr = EmitLoadOfReference(RefAddr, RefTy, &Source); - return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source); + LValueBaseInfo BaseInfo; + Address Addr = EmitLoadOfReference(RefAddr, RefTy, &BaseInfo); + return MakeAddrLValue(Addr, RefTy->getPointeeType(), BaseInfo); } Address CodeGenFunction::EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, - AlignmentSource *Source) { + LValueBaseInfo *BaseInfo) { llvm::Value *Addr = Builder.CreateLoad(Ptr); - return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source, + return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), + BaseInfo, /*forPointeeType=*/true)); } LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr, const PointerType *PtrTy) { - AlignmentSource Source; - Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source); - return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source); + LValueBaseInfo BaseInfo; + Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &BaseInfo); + return MakeAddrLValue(Addr, PtrTy->getPointeeType(), BaseInfo); } static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, @@ -2024,7 +2174,8 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { LV = CGF.EmitLoadOfReferenceLValue(Addr, RefTy); } else { - LV = CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + LV = CGF.MakeAddrLValue(Addr, T, BaseInfo); } setObjCGCLValueClass(CGF.getContext(), E, LV); return LV; @@ -2058,7 +2209,8 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, const Expr *E, const FunctionDecl *FD) { llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, FD); CharUnits Alignment = CGF.getContext().getDeclAlign(FD); - return CGF.MakeAddrLValue(V, E->getType(), Alignment, AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + return CGF.MakeAddrLValue(V, E->getType(), Alignment, BaseInfo); } static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD, @@ -2123,8 +2275,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // Should we be using the alignment of the constant pointer we emitted? CharUnits Alignment = getNaturalTypeAlignment(E->getType(), nullptr, /*pointee*/ true); - - return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + return MakeAddrLValue(Address(Val, Alignment), T, BaseInfo); } // Check for captured variables. @@ -2141,14 +2293,16 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { LValue CapLVal = EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), CapturedStmtInfo->getContextValue()); + bool MayAlias = CapLVal.getBaseInfo().getMayAlias(); return MakeAddrLValue( Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)), - CapLVal.getType(), AlignmentSource::Decl); + CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl, MayAlias)); } assert(isa<BlockDecl>(CurCodeDecl)); Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()); - return MakeAddrLValue(addr, T, AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + return MakeAddrLValue(addr, T, BaseInfo); } } @@ -2162,7 +2316,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { if (ND->hasAttr<WeakRefAttr>()) { const auto *VD = cast<ValueDecl>(ND); ConstantAddress Aliasee = CGM.GetWeakRefReference(VD); - return MakeAddrLValue(Aliasee, T, AlignmentSource::Decl); + return MakeAddrLValue(Aliasee, T, + LValueBaseInfo(AlignmentSource::Decl, false)); } if (const auto *VD = dyn_cast<VarDecl>(ND)) { @@ -2208,7 +2363,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { LV = EmitLoadOfReferenceLValue(addr, RefTy); } else { - LV = MakeAddrLValue(addr, T, AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + LV = MakeAddrLValue(addr, T, BaseInfo); } bool isLocalStorage = VD->hasLocalStorage(); @@ -2253,9 +2409,9 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { QualType T = E->getSubExpr()->getType()->getPointeeType(); assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type"); - AlignmentSource AlignSource; - Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &AlignSource); - LValue LV = MakeAddrLValue(Addr, T, AlignSource); + LValueBaseInfo BaseInfo; + Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo); + LValue LV = MakeAddrLValue(Addr, T, BaseInfo); LV.getQuals().setAddressSpace(ExprTy.getAddressSpace()); // We should not generate __weak write barrier on indirect reference @@ -2287,7 +2443,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { (E->getOpcode() == UO_Real ? emitAddrOfRealComponent(LV.getAddress(), LV.getType()) : emitAddrOfImagComponent(LV.getAddress(), LV.getType())); - LValue ElemLV = MakeAddrLValue(Component, T, LV.getAlignmentSource()); + LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo()); ElemLV.getQuals().addQualifiers(LV.getQuals()); return ElemLV; } @@ -2307,12 +2463,14 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) { return MakeAddrLValue(CGM.GetAddrOfConstantStringFromLiteral(E), - E->getType(), AlignmentSource::Decl); + E->getType(), + LValueBaseInfo(AlignmentSource::Decl, false)); } LValue CodeGenFunction::EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E) { return MakeAddrLValue(CGM.GetAddrOfConstantStringFromObjCEncode(E), - E->getType(), AlignmentSource::Decl); + E->getType(), + LValueBaseInfo(AlignmentSource::Decl, false)); } LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { @@ -2324,6 +2482,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { StringRef NameItems[] = { PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName}; std::string GVName = llvm::join(NameItems, NameItems + 2, "."); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); if (auto *BD = dyn_cast<BlockDecl>(CurCodeDecl)) { std::string Name = SL->getString(); if (!Name.empty()) { @@ -2332,14 +2491,14 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { if (Discriminator) Name += "_" + Twine(Discriminator + 1).str(); auto C = CGM.GetAddrOfConstantCString(Name, GVName.c_str()); - return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); + return MakeAddrLValue(C, E->getType(), BaseInfo); } else { auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str()); - return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); + return MakeAddrLValue(C, E->getType(), BaseInfo); } } auto C = CGM.GetAddrOfConstantStringFromLiteral(SL, GVName); - return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); + return MakeAddrLValue(C, E->getType(), BaseInfo); } /// Emit a type description suitable for use by a runtime sanitizer library. The @@ -2545,8 +2704,8 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction( FnType, FnName, - llvm::AttributeSet::get(CGF.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, B), + llvm::AttributeList::get(CGF.getLLVMContext(), + llvm::AttributeList::FunctionIndex, B), /*Local=*/true); llvm::CallInst *HandlerCall = CGF.EmitNounwindRuntimeCall(Fn, FnArgs); if (!MayReturn) { @@ -2709,6 +2868,24 @@ void CodeGenFunction::EmitCfiSlowPathCheck( EmitBlock(Cont); } +// Emit a stub for __cfi_check function so that the linker knows about this +// symbol in LTO mode. +void CodeGenFunction::EmitCfiCheckStub() { + llvm::Module *M = &CGM.getModule(); + auto &Ctx = M->getContext(); + llvm::Function *F = llvm::Function::Create( + llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false), + llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M); + llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F); + // FIXME: consider emitting an intrinsic call like + // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2) + // which can be lowered in CrossDSOCFI pass to the actual contents of + // __cfi_check. This would allow inlining of __cfi_check calls. + llvm::CallInst::Create( + llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::trap), "", BB); + llvm::ReturnInst::Create(Ctx, nullptr, BB); +} + // This function is basically a switch over the CFI failure kind, which is // extracted from CFICheckFailData (1st function argument). Each case is either // llvm.trap or a call to one of the two runtime handlers, based on @@ -2719,10 +2896,10 @@ void CodeGenFunction::EmitCfiSlowPathCheck( void CodeGenFunction::EmitCfiCheckFail() { SanitizerScope SanScope(this); FunctionArgList Args; - ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr, - getContext().VoidPtrTy); - ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr, - getContext().VoidPtrTy); + ImplicitParamDecl ArgData(getContext(), getContext().VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl ArgAddr(getContext(), getContext().VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&ArgData); Args.push_back(&ArgAddr); @@ -2750,9 +2927,9 @@ void CodeGenFunction::EmitCfiCheckFail() { EmitTrapCheck(DataIsNotNullPtr); llvm::StructType *SourceLocationTy = - llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr); + llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty); llvm::StructType *CfiCheckFailDataTy = - llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr); + llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy); llvm::Value *V = Builder.CreateConstGEP2_32( CfiCheckFailDataTy, @@ -2821,21 +2998,21 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) { if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", CGM.getCodeGenOpts().TrapFuncName); - TrapCall->addAttribute(llvm::AttributeSet::FunctionIndex, A); + TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A); } return TrapCall; } Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, - AlignmentSource *AlignSource) { + LValueBaseInfo *BaseInfo) { assert(E->getType()->isArrayType() && "Array to pointer decay must have array source type!"); // Expressions of array type can't be bitfields or vector elements. LValue LV = EmitLValue(E); Address Addr = LV.getAddress(); - if (AlignSource) *AlignSource = LV.getAlignmentSource(); + if (BaseInfo) *BaseInfo = LV.getBaseInfo(); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -2874,9 +3051,13 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF, llvm::Value *ptr, ArrayRef<llvm::Value*> indices, bool inbounds, + bool signedIndices, + SourceLocation loc, const llvm::Twine &name = "arrayidx") { if (inbounds) { - return CGF.Builder.CreateInBoundsGEP(ptr, indices, name); + return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, + CodeGenFunction::NotSubtraction, loc, + name); } else { return CGF.Builder.CreateGEP(ptr, indices, name); } @@ -2907,8 +3088,9 @@ static QualType getFixedSizeElementType(const ASTContext &ctx, } static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, - ArrayRef<llvm::Value*> indices, + ArrayRef<llvm::Value *> indices, QualType eltType, bool inbounds, + bool signedIndices, SourceLocation loc, const llvm::Twine &name = "arrayidx") { // All the indices except that last must be zero. #ifndef NDEBUG @@ -2928,8 +3110,8 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, CharUnits eltAlign = getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize); - llvm::Value *eltPtr = - emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, name); + llvm::Value *eltPtr = emitArraySubscriptGEP( + CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name); return Address(eltPtr, eltAlign); } @@ -2939,6 +3121,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // in lexical order (this complexity is, sadly, required by C++17). llvm::Value *IdxPre = (E->getLHS() == E->getIdx()) ? EmitScalarExpr(E->getIdx()) : nullptr; + bool SignedIndices = false; auto EmitIdxAfterBase = [&, IdxPre](bool Promote) -> llvm::Value * { auto *Idx = IdxPre; if (E->getLHS() != E->getIdx()) { @@ -2948,6 +3131,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, QualType IdxTy = E->getIdx()->getType(); bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType(); + SignedIndices |= IdxSigned; if (SanOpts.has(SanitizerKind::ArrayBounds)) EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed); @@ -2970,7 +3154,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), - LHS.getAlignmentSource()); + LHS.getBaseInfo()); } // All the other cases basically behave like simple offsetting. @@ -2982,18 +3166,19 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, Address Addr = EmitExtVectorElementLValue(LV); QualType EltType = LV.getType()->castAs<VectorType>()->getElementType(); - Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true); - return MakeAddrLValue(Addr, EltType, LV.getAlignmentSource()); + Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true, + SignedIndices, E->getExprLoc()); + return MakeAddrLValue(Addr, EltType, LV.getBaseInfo()); } - AlignmentSource AlignSource; + LValueBaseInfo BaseInfo; Address Addr = Address::invalid(); if (const VariableArrayType *vla = getContext().getAsVariableArrayType(E->getType())) { // The base must be a pointer, which is not an aggregate. Emit // it. It needs to be emitted first in case it's what captures // the VLA bounds. - Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); // The element count here is the total number of non-VLA elements. @@ -3010,13 +3195,14 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, } Addr = emitArraySubscriptGEP(*this, Addr, Idx, vla->getElementType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + SignedIndices, E->getExprLoc()); } else if (const ObjCObjectType *OIT = E->getType()->getAs<ObjCObjectType>()){ // Indexing over an interface, as in "NSString *P; P[4];" // Emit the base pointer. - Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT); @@ -3036,7 +3222,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, CharUnits EltAlign = getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize); llvm::Value *EltPtr = - emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false); + emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false, + SignedIndices, E->getExprLoc()); Addr = Address(EltPtr, EltAlign); // Cast back. @@ -3058,20 +3245,21 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, auto *Idx = EmitIdxAfterBase(/*Promote*/true); // Propagate the alignment from the array itself to the result. - Addr = emitArraySubscriptGEP(*this, ArrayLV.getAddress(), - {CGM.getSize(CharUnits::Zero()), Idx}, - E->getType(), - !getLangOpts().isSignedOverflowDefined()); - AlignSource = ArrayLV.getAlignmentSource(); + Addr = emitArraySubscriptGEP( + *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc()); + BaseInfo = ArrayLV.getBaseInfo(); } else { // The base must be a pointer; emit it with an estimate of its alignment. - Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + SignedIndices, E->getExprLoc()); } - LValue LV = MakeAddrLValue(Addr, E->getType(), AlignSource); + LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo); // TODO: Preserve/extend path TBAA metadata? @@ -3084,7 +3272,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, } static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, - AlignmentSource &AlignSource, + LValueBaseInfo &BaseInfo, QualType BaseTy, QualType ElTy, bool IsLowerBound) { LValue BaseLVal; @@ -3092,7 +3280,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); if (BaseTy->isArrayType()) { Address Addr = BaseLVal.getAddress(); - AlignSource = BaseLVal.getAlignmentSource(); + BaseInfo = BaseLVal.getBaseInfo(); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -3111,20 +3299,17 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(ElTy)); } - CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource); + LValueBaseInfo TypeInfo; + CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeInfo); + BaseInfo.mergeForCast(TypeInfo); return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); } - return CGF.EmitPointerWithAlignment(Base, &AlignSource); + return CGF.EmitPointerWithAlignment(Base, &BaseInfo); } LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound) { - QualType BaseTy; - if (auto *ASE = - dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts())) - BaseTy = OMPArraySectionExpr::getBaseOriginalType(ASE); - else - BaseTy = E->getBase()->getType(); + QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(E->getBase()); QualType ResultExprTy; if (auto *AT = getContext().getAsArrayType(BaseTy)) ResultExprTy = AT->getElementType(); @@ -3218,13 +3403,13 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, assert(Idx); Address EltPtr = Address::invalid(); - AlignmentSource AlignSource; + LValueBaseInfo BaseInfo; if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) { // The base must be a pointer, which is not an aggregate. Emit // it. It needs to be emitted first in case it's what captures // the VLA bounds. Address Base = - emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy, + emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, BaseTy, VLA->getElementType(), IsLowerBound); // The element count here is the total number of non-VLA elements. llvm::Value *NumElements = getVLASize(VLA).first; @@ -3238,7 +3423,8 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, else Idx = Builder.CreateNSWMul(Idx, NumElements); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + /*SignedIndices=*/false, E->getExprLoc()); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3257,16 +3443,18 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, // Propagate the alignment from the array itself to the result. EltPtr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, - ResultExprTy, !getLangOpts().isSignedOverflowDefined()); - AlignSource = ArrayLV.getAlignmentSource(); + ResultExprTy, !getLangOpts().isSignedOverflowDefined(), + /*SignedIndices=*/false, E->getExprLoc()); + BaseInfo = ArrayLV.getBaseInfo(); } else { - Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource, + Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, BaseTy, ResultExprTy, IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + /*SignedIndices=*/false, E->getExprLoc()); } - return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource); + return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo); } LValue CodeGenFunction:: @@ -3278,10 +3466,10 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { if (E->isArrow()) { // If it is a pointer to a vector, emit the address and form an lvalue with // it. - AlignmentSource AlignSource; - Address Ptr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + LValueBaseInfo BaseInfo; + Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); const PointerType *PT = E->getBase()->getType()->getAs<PointerType>(); - Base = MakeAddrLValue(Ptr, PT->getPointeeType(), AlignSource); + Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo); Base.getQuals().removeObjCGCAttr(); } else if (E->getBase()->isGLValue()) { // Otherwise, if the base is an lvalue ( as in the case of foo.x.x), @@ -3298,7 +3486,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { Address VecMem = CreateMemTemp(E->getBase()->getType()); Builder.CreateStore(Vec, VecMem); Base = MakeAddrLValue(VecMem, E->getBase()->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); } QualType type = @@ -3312,7 +3500,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { llvm::Constant *CV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); return LValue::MakeExtVectorElt(Base.getAddress(), CV, type, - Base.getAlignmentSource()); + Base.getBaseInfo()); } assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!"); @@ -3323,20 +3511,26 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { CElts.push_back(BaseElts->getAggregateElement(Indices[i])); llvm::Constant *CV = llvm::ConstantVector::get(CElts); return LValue::MakeExtVectorElt(Base.getExtVectorAddress(), CV, type, - Base.getAlignmentSource()); + Base.getBaseInfo()); } LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { Expr *BaseExpr = E->getBase(); - // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar. LValue BaseLV; if (E->isArrow()) { - AlignmentSource AlignSource; - Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource); + LValueBaseInfo BaseInfo; + Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo); QualType PtrTy = BaseExpr->getType()->getPointeeType(); - EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy); - BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource); + SanitizerSet SkippedChecks; + bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr); + if (IsBaseCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsBaseCXXThis || isa<DeclRefExpr>(BaseExpr)) + SkippedChecks.set(SanitizerKind::Null, true); + EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy, + /*Alignment=*/CharUnits::Zero(), SkippedChecks); + BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo); } else BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess); @@ -3394,10 +3588,37 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName()); } +static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { + const auto *RD = Type.getTypePtr()->getAsCXXRecordDecl(); + if (!RD) + return false; + + if (RD->isDynamicClass()) + return true; + + for (const auto &Base : RD->bases()) + if (hasAnyVptr(Base.getType(), Context)) + return true; + + for (const FieldDecl *Field : RD->fields()) + if (hasAnyVptr(Field->getType(), Context)) + return true; + + return false; +} + LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field) { + LValueBaseInfo BaseInfo = base.getBaseInfo(); AlignmentSource fieldAlignSource = - getFieldAlignmentSource(base.getAlignmentSource()); + getFieldAlignmentSource(BaseInfo.getAlignmentSource()); + LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias()); + + QualType type = field->getType(); + const RecordDecl *rec = field->getParent(); + if (rec->isUnion() || rec->hasAttr<MayAliasAttr>() || type->isVectorType()) + FieldBaseInfo.setMayAlias(true); + bool mayAlias = FieldBaseInfo.getMayAlias(); if (field->isBitField()) { const CGRecordLayout &RL = @@ -3417,14 +3638,9 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, QualType fieldType = field->getType().withCVRQualifiers(base.getVRQualifiers()); - return LValue::MakeBitfield(Addr, Info, fieldType, fieldAlignSource); + return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo); } - const RecordDecl *rec = field->getParent(); - QualType type = field->getType(); - - bool mayAlias = rec->hasAttr<MayAliasAttr>(); - Address addr = base.getAddress(); unsigned cvr = base.getVRQualifiers(); bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA; @@ -3433,6 +3649,14 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, assert(!type->isReferenceType() && "union has reference member"); // TODO: handle path-aware TBAA for union. TBAAPath = false; + + const auto FieldType = field->getType(); + if (CGM.getCodeGenOpts().StrictVTablePointers && + hasAnyVptr(FieldType, getContext())) + // Because unions can easily skip invariant.barriers, we need to add + // a barrier every time CXXRecord field with vptr is referenced. + addr = Address(Builder.CreateInvariantGroupBarrier(addr.getPointer()), + addr.getAlignment()); } else { // For structs, we GEP to the field that the record layout suggests. addr = emitAddrOfFieldStorage(*this, addr, field); @@ -3458,7 +3682,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, type = refType->getPointeeType(); CharUnits alignment = - getNaturalTypeAlignment(type, &fieldAlignSource, /*pointee*/ true); + getNaturalTypeAlignment(type, &FieldBaseInfo, /*pointee*/ true); + FieldBaseInfo.setMayAlias(false); addr = Address(load, alignment); // Qualifiers on the struct don't apply to the referencee, and @@ -3479,7 +3704,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, if (field->hasAttr<AnnotateAttr>()) addr = EmitFieldAnnotations(field, addr); - LValue LV = MakeAddrLValue(addr, type, fieldAlignSource); + LValue LV = MakeAddrLValue(addr, type, FieldBaseInfo); LV.getQuals().addCVRQualifiers(cvr); if (TBAAPath) { const ASTRecordLayout &Layout = @@ -3520,14 +3745,18 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base, V = Builder.CreateElementBitCast(V, llvmType, Field->getName()); // TODO: access-path TBAA? - auto FieldAlignSource = getFieldAlignmentSource(Base.getAlignmentSource()); - return MakeAddrLValue(V, FieldType, FieldAlignSource); + LValueBaseInfo BaseInfo = Base.getBaseInfo(); + LValueBaseInfo FieldBaseInfo( + getFieldAlignmentSource(BaseInfo.getAlignmentSource()), + BaseInfo.getMayAlias()); + return MakeAddrLValue(V, FieldType, FieldBaseInfo); } LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); if (E->isFileScope()) { ConstantAddress GlobalPtr = CGM.GetAddrOfConstantCompoundLiteral(E); - return MakeAddrLValue(GlobalPtr, E->getType(), AlignmentSource::Decl); + return MakeAddrLValue(GlobalPtr, E->getType(), BaseInfo); } if (E->getType()->isVariablyModifiedType()) // make sure to emit the VLA size. @@ -3535,7 +3764,7 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ Address DeclPtr = CreateMemTemp(E->getType(), ".compoundliteral"); const Expr *InitExpr = E->getInitializer(); - LValue Result = MakeAddrLValue(DeclPtr, E->getType(), AlignmentSource::Decl); + LValue Result = MakeAddrLValue(DeclPtr, E->getType(), BaseInfo); EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(), /*Init*/ true); @@ -3632,8 +3861,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { phi->addIncoming(rhs->getPointer(), rhsBlock); Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment())); AlignmentSource alignSource = - std::max(lhs->getAlignmentSource(), rhs->getAlignmentSource()); - return MakeAddrLValue(result, expr->getType(), alignSource); + std::max(lhs->getBaseInfo().getAlignmentSource(), + rhs->getBaseInfo().getAlignmentSource()); + bool MayAlias = lhs->getBaseInfo().getMayAlias() || + rhs->getBaseInfo().getMayAlias(); + return MakeAddrLValue(result, expr->getType(), + LValueBaseInfo(alignSource, MayAlias)); } else { assert((lhs || rhs) && "both operands of glvalue conditional are throw-expressions?"); @@ -3731,7 +3964,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { This, DerivedClassDecl, E->path_begin(), E->path_end(), /*NullCheckValue=*/false, E->getExprLoc()); - return MakeAddrLValue(Base, E->getType(), LV.getAlignmentSource()); + return MakeAddrLValue(Base, E->getType(), LV.getBaseInfo()); } case CK_ToUnion: return EmitAggExprToLValue(E); @@ -3758,7 +3991,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { /*MayBeNull=*/false, CFITCK_DerivedCast, E->getLocStart()); - return MakeAddrLValue(Derived, E->getType(), LV.getAlignmentSource()); + return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo()); } case CK_LValueBitCast: { // This must be a reinterpret_cast (or c-style equivalent). @@ -3774,13 +4007,13 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { /*MayBeNull=*/false, CFITCK_UnrelatedCast, E->getLocStart()); - return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource()); + return MakeAddrLValue(V, E->getType(), LV.getBaseInfo()); } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); Address V = Builder.CreateElementBitCast(LV.getAddress(), ConvertType(E->getType())); - return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource()); + return MakeAddrLValue(V, E->getType(), LV.getBaseInfo()); } case CK_ZeroToOCLQueue: llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid"); @@ -3949,6 +4182,8 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { RValue RV = EmitAnyExpr(E->getRHS()); LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); + if (RV.isScalar()) + EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); EmitStoreThroughLValue(RV, LV); return LV; } @@ -3967,7 +4202,7 @@ LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) { if (!RV.isScalar()) return MakeAddrLValue(RV.getAggregateAddress(), E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); assert(E->getCallReturnType(getContext())->isReferenceType() && "Can't have a scalar return unless the return type is a " @@ -3987,7 +4222,7 @@ LValue CodeGenFunction::EmitCXXConstructLValue(const CXXConstructExpr *E) { AggValueSlot Slot = CreateAggTemp(E->getType()); EmitCXXConstructExpr(E, Slot); return MakeAddrLValue(Slot.getAddress(), E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); } LValue @@ -4002,7 +4237,7 @@ Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) { LValue CodeGenFunction::EmitCXXUuidofLValue(const CXXUuidofExpr *E) { return MakeAddrLValue(EmitCXXUuidofExpr(E), E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); } LValue @@ -4012,7 +4247,7 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) { EmitAggExpr(E->getSubExpr(), Slot); EmitCXXTemporary(E->getTemporary(), E->getType(), Slot.getAddress()); return MakeAddrLValue(Slot.getAddress(), E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); } LValue @@ -4020,7 +4255,7 @@ CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) { AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue"); EmitLambdaExpr(E, Slot); return MakeAddrLValue(Slot.getAddress(), E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); } LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { @@ -4028,7 +4263,7 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { if (!RV.isScalar()) return MakeAddrLValue(RV.getAggregateAddress(), E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); assert(E->getMethodDecl()->getReturnType()->isReferenceType() && "Can't have a scalar return unless the return type is a " @@ -4040,7 +4275,8 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { LValue CodeGenFunction::EmitObjCSelectorLValue(const ObjCSelectorExpr *E) { Address V = CGM.getObjCRuntime().GetAddrOfSelector(*this, E->getSelector()); - return MakeAddrLValue(V, E->getType(), AlignmentSource::Decl); + return MakeAddrLValue(V, E->getType(), + LValueBaseInfo(AlignmentSource::Decl, false)); } llvm::Value *CodeGenFunction::EmitIvarOffset(const ObjCInterfaceDecl *Interface, @@ -4084,7 +4320,7 @@ LValue CodeGenFunction::EmitStmtExprLValue(const StmtExpr *E) { // Can only get l-value for message expression returning aggregate type RValue RV = EmitAnyExprToTemp(E); return MakeAddrLValue(RV.getAggregateAddress(), E->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); } RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee, @@ -4276,12 +4512,11 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) { const MemberPointerType *MPT = E->getRHS()->getType()->getAs<MemberPointerType>(); - AlignmentSource AlignSource; + LValueBaseInfo BaseInfo; Address MemberAddr = - EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, - &AlignSource); + EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, &BaseInfo); - return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), AlignSource); + return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), BaseInfo); } /// Given the address of a temporary variable, produce an r-value of @@ -4289,7 +4524,8 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) { RValue CodeGenFunction::convertTempToRValue(Address addr, QualType type, SourceLocation loc) { - LValue lvalue = MakeAddrLValue(addr, type, AlignmentSource::Decl); + LValue lvalue = MakeAddrLValue(addr, type, + LValueBaseInfo(AlignmentSource::Decl, false)); switch (getEvaluationKind(type)) { case TEK_Complex: return RValue::getComplex(EmitLoadOfComplex(lvalue, loc)); @@ -4344,9 +4580,9 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF, if (ov == resultExpr && ov->isRValue() && !forLValue && CodeGenFunction::hasAggregateEvaluationKind(ov->getType())) { CGF.EmitAggExpr(ov->getSourceExpr(), slot); - + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); LValue LV = CGF.MakeAddrLValue(slot.getAddress(), ov->getType(), - AlignmentSource::Decl); + BaseInfo); opaqueData = OVMA::bind(CGF, ov, LV); result.RV = slot.asRValue(); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp index 0092447..a05a088 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp @@ -111,6 +111,13 @@ public: void VisitGenericSelectionExpr(GenericSelectionExpr *GE) { Visit(GE->getResultExpr()); } + void VisitCoawaitExpr(CoawaitExpr *E) { + CGF.EmitCoawaitExpr(*E, Dest, IsResultUnused); + } + void VisitCoyieldExpr(CoyieldExpr *E) { + CGF.EmitCoyieldExpr(*E, Dest, IsResultUnused); + } + void VisitUnaryCoawait(UnaryOperator *E) { Visit(E->getSubExpr()); } void VisitUnaryExtension(UnaryOperator *E) { Visit(E->getSubExpr()); } void VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) { return Visit(E->getReplacement()); @@ -505,12 +512,20 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, currentElement->addIncoming(element, entryBB); // Emit the actual filler expression. - LValue elementLV = - CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType); - if (filler) - EmitInitializationToLValue(filler, elementLV); - else - EmitNullInitializationToLValue(elementLV); + { + // C++1z [class.temporary]p5: + // when a default constructor is called to initialize an element of + // an array with no corresponding initializer [...] the destruction of + // every temporary created in a default argument is sequenced before + // the construction of the next array element, if any + CodeGenFunction::RunCleanupsScope CleanupsScope(CGF); + LValue elementLV = + CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType); + if (filler) + EmitInitializationToLValue(filler, elementLV); + else + EmitNullInitializationToLValue(elementLV); + } // Move on to the next element. llvm::Value *nextElement = @@ -1267,7 +1282,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Store the initializer into the field. EmitInitializationToLValue(E->getInit(curInitIndex++), LV); } else { - // We're out of initalizers; default-initialize to null + // We're out of initializers; default-initialize to null EmitNullInitializationToLValue(LV); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp index 71c8fb8..ab17024 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp @@ -24,7 +24,15 @@ using namespace clang; using namespace CodeGen; -static RequiredArgs +namespace { +struct MemberCallInfo { + RequiredArgs ReqArgs; + // Number of prefix arguments for the call. Ignores the `this` pointer. + unsigned PrefixSize; +}; +} + +static MemberCallInfo commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, @@ -48,6 +56,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD); + unsigned PrefixSize = Args.size() - 1; // And the rest of the call args. if (RtlArgs) { @@ -65,7 +74,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, FPT->getNumParams() == 0 && "No CallExpr specified for function with non-zero number of arguments"); } - return required; + return {required, PrefixSize}; } RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( @@ -75,9 +84,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( const CallExpr *CE, CallArgList *RtlArgs) { const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); CallArgList Args; - RequiredArgs required = commonEmitCXXMemberOrOperatorCall( + MemberCallInfo CallInfo = commonEmitCXXMemberOrOperatorCall( *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs); - auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required); + auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall( + Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize); return EmitCall(FnInfo, Callee, ReturnValue, Args); } @@ -189,7 +199,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( bool CanUseVirtualCall = MD->isVirtual() && !HasQualifier; const CXXMethodDecl *DevirtualizedMethod = nullptr; - if (CanUseVirtualCall && CanDevirtualizeMemberFunctionCall(Base, MD)) { + if (CanUseVirtualCall && + MD->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) { const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType(); DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl); assert(DevirtualizedMethod); @@ -290,10 +301,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (CE) CallLoc = CE->getExprLoc(); - EmitTypeCheck(isa<CXXConstructorDecl>(CalleeDecl) - ? CodeGenFunction::TCK_ConstructorCall - : CodeGenFunction::TCK_MemberCall, - CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent())); + SanitizerSet SkippedChecks; + if (const auto *CMCE = dyn_cast<CXXMemberCallExpr>(CE)) { + auto *IOA = CMCE->getImplicitObjectArgument(); + bool IsImplicitObjectCXXThis = IsWrappedCXXThis(IOA); + if (IsImplicitObjectCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA)) + SkippedChecks.set(SanitizerKind::Null, true); + } + EmitTypeCheck( + isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall + : CodeGenFunction::TCK_MemberCall, + CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()), + /*Alignment=*/CharUnits::Zero(), SkippedChecks); // FIXME: Uses of 'MD' past this point need to be audited. We may need to use // 'CalleeDecl' instead. @@ -420,7 +441,8 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // And the rest of the call args EmitCallArgs(Args, FPT, E->arguments()); - return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), + return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required, + /*PrefixSize=*/0), Callee, ReturnValue, Args); } @@ -659,7 +681,10 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // Emit the array size expression. // We multiply the size of all dimensions for NumElements. // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6. - numElements = CGF.EmitScalarExpr(e->getArraySize()); + numElements = CGF.CGM.EmitConstantExpr(e->getArraySize(), + CGF.getContext().getSizeType(), &CGF); + if (!numElements) + numElements = CGF.EmitScalarExpr(e->getArraySize()); assert(isa<llvm::IntegerType>(numElements->getType())); // The number of elements can be have an arbitrary integer type; @@ -1256,10 +1281,10 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { // FIXME: Add addAttribute to CallSite. if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke)) - CI->addAttribute(llvm::AttributeSet::FunctionIndex, + CI->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Builtin); else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke)) - II->addAttribute(llvm::AttributeSet::FunctionIndex, + II->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Builtin); else llvm_unreachable("unexpected kind of call instruction"); @@ -1507,13 +1532,13 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { assert(E->getNumPlacementArgs() == 1); const Expr *arg = *E->placement_arguments().begin(); - AlignmentSource alignSource; - allocation = EmitPointerWithAlignment(arg, &alignSource); + LValueBaseInfo BaseInfo; + allocation = EmitPointerWithAlignment(arg, &BaseInfo); // The pointer expression will, in many cases, be an opaque void*. // In these cases, discard the computed alignment and use the // formal alignment of the allocated type. - if (alignSource != AlignmentSource::Decl) + if (BaseInfo.getAlignmentSource() != AlignmentSource::Decl) allocation = Address(allocation.getPointer(), allocAlign); // Set up allocatorArgs for the call to operator delete if it's not @@ -1560,7 +1585,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { // FIXME: Why do we not pass a CalleeDecl here? EmitCallArgs(allocatorArgs, allocatorType, E->placement_arguments(), - /*CalleeDecl*/nullptr, /*ParamsToSkip*/ParamsToSkip); + /*AC*/AbstractCallee(), /*ParamsToSkip*/ParamsToSkip); RValue RV = EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs); @@ -1634,8 +1659,9 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { // Passing pointer through invariant.group.barrier to avoid propagation of // vptrs information which may be included in previous type. + // To not break LTO with different optimizations levels, we do it regardless + // of optimization level. if (CGM.getCodeGenOpts().StrictVTablePointers && - CGM.getCodeGenOpts().OptimizationLevel > 0 && allocator->isReservedGlobalPlacementOperator()) result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()), result.getAlignment()); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp index 59bc9cd..9809723 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp @@ -110,6 +110,16 @@ public: VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) { return Visit(PE->getReplacement()); } + ComplexPairTy VisitCoawaitExpr(CoawaitExpr *S) { + return CGF.EmitCoawaitExpr(*S).getComplexVal(); + } + ComplexPairTy VisitCoyieldExpr(CoyieldExpr *S) { + return CGF.EmitCoyieldExpr(*S).getComplexVal(); + } + ComplexPairTy VisitUnaryCoawait(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + // l-values. ComplexPairTy VisitDeclRefExpr(DeclRefExpr *E) { @@ -198,7 +208,11 @@ public: ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) { CGF.enterFullExpression(E); CodeGenFunction::RunCleanupsScope Scope(CGF); - return Visit(E->getSubExpr()); + ComplexPairTy Vals = Visit(E->getSubExpr()); + // Defend against dominance problems caused by jumps out of expression + // evaluation through the shared cleanup block. + Scope.ForceCleanup({&Vals.first, &Vals.second}); + return Vals; } ComplexPairTy VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) { assert(E->getType()->isAnyComplexType() && "Expected complex type!"); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp index 3db15c6..6b72774 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp @@ -201,7 +201,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, unsigned NewFieldWidth = FieldSize - BitsInPreviousByte; if (CGM.getDataLayout().isBigEndian()) { - Tmp = Tmp.lshr(NewFieldWidth); + Tmp.lshrInPlace(NewFieldWidth); Tmp = Tmp.trunc(BitsInPreviousByte); // We want the remaining high bits. @@ -210,7 +210,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, Tmp = Tmp.trunc(BitsInPreviousByte); // We want the remaining low bits. - FieldValue = FieldValue.lshr(BitsInPreviousByte); + FieldValue.lshrInPlace(BitsInPreviousByte); FieldValue = FieldValue.trunc(NewFieldWidth); } } @@ -273,7 +273,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, // We want the low bits. Tmp = FieldValue.trunc(CharWidth); - FieldValue = FieldValue.lshr(CharWidth); + FieldValue.lshrInPlace(CharWidth); } Elements.push_back(llvm::ConstantInt::get(CGM.getLLVMContext(), Tmp)); @@ -1361,9 +1361,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, Value.getComplexIntImag()); // FIXME: the target may want to specify that this is packed. - llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(), - Complex[1]->getType(), - nullptr); + llvm::StructType *STy = + llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType()); return llvm::ConstantStruct::get(STy, Complex); } case APValue::Float: { @@ -1384,9 +1383,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, Value.getComplexFloatImag()); // FIXME: the target may want to specify that this is packed. - llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(), - Complex[1]->getType(), - nullptr); + llvm::StructType *STy = + llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType()); return llvm::ConstantStruct::get(STy, Complex); } case APValue::Vector: { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp index 1b85c45..1170b01 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenFunction.h" +#include "CGCleanup.h" #include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" @@ -24,10 +25,12 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetInfo.h" #include "clang/Frontend/CodeGenOptions.h" +#include "llvm/ADT/Optional.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" @@ -42,13 +45,85 @@ using llvm::Value; //===----------------------------------------------------------------------===// namespace { + +/// Determine whether the given binary operation may overflow. +/// Sets \p Result to the value of the operation for BO_Add, BO_Sub, BO_Mul, +/// and signed BO_{Div,Rem}. For these opcodes, and for unsigned BO_{Div,Rem}, +/// the returned overflow check is precise. The returned value is 'true' for +/// all other opcodes, to be conservative. +bool mayHaveIntegerOverflow(llvm::ConstantInt *LHS, llvm::ConstantInt *RHS, + BinaryOperator::Opcode Opcode, bool Signed, + llvm::APInt &Result) { + // Assume overflow is possible, unless we can prove otherwise. + bool Overflow = true; + const auto &LHSAP = LHS->getValue(); + const auto &RHSAP = RHS->getValue(); + if (Opcode == BO_Add) { + if (Signed) + Result = LHSAP.sadd_ov(RHSAP, Overflow); + else + Result = LHSAP.uadd_ov(RHSAP, Overflow); + } else if (Opcode == BO_Sub) { + if (Signed) + Result = LHSAP.ssub_ov(RHSAP, Overflow); + else + Result = LHSAP.usub_ov(RHSAP, Overflow); + } else if (Opcode == BO_Mul) { + if (Signed) + Result = LHSAP.smul_ov(RHSAP, Overflow); + else + Result = LHSAP.umul_ov(RHSAP, Overflow); + } else if (Opcode == BO_Div || Opcode == BO_Rem) { + if (Signed && !RHS->isZero()) + Result = LHSAP.sdiv_ov(RHSAP, Overflow); + else + return false; + } + return Overflow; +} + struct BinOpInfo { Value *LHS; Value *RHS; QualType Ty; // Computation Type. BinaryOperator::Opcode Opcode; // Opcode of BinOp to perform - bool FPContractable; + FPOptions FPFeatures; const Expr *E; // Entire expr, for error unsupported. May not be binop. + + /// Check if the binop can result in integer overflow. + bool mayHaveIntegerOverflow() const { + // Without constant input, we can't rule out overflow. + auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS); + auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS); + if (!LHSCI || !RHSCI) + return true; + + llvm::APInt Result; + return ::mayHaveIntegerOverflow( + LHSCI, RHSCI, Opcode, Ty->hasSignedIntegerRepresentation(), Result); + } + + /// Check if the binop computes a division or a remainder. + bool isDivremOp() const { + return Opcode == BO_Div || Opcode == BO_Rem || Opcode == BO_DivAssign || + Opcode == BO_RemAssign; + } + + /// Check if the binop can result in an integer division by zero. + bool mayHaveIntegerDivisionByZero() const { + if (isDivremOp()) + if (auto *CI = dyn_cast<llvm::ConstantInt>(RHS)) + return CI->isZero(); + return true; + } + + /// Check if the binop can result in a float division by zero. + bool mayHaveFloatDivisionByZero() const { + if (isDivremOp()) + if (auto *CFP = dyn_cast<llvm::ConstantFP>(RHS)) + return CFP->isZero(); + return true; + } }; static bool MustVisitNullValue(const Expr *E) { @@ -58,6 +133,83 @@ static bool MustVisitNullValue(const Expr *E) { return E->getType()->isNullPtrType(); } +/// If \p E is a widened promoted integer, get its base (unpromoted) type. +static llvm::Optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx, + const Expr *E) { + const Expr *Base = E->IgnoreImpCasts(); + if (E == Base) + return llvm::None; + + QualType BaseTy = Base->getType(); + if (!BaseTy->isPromotableIntegerType() || + Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType())) + return llvm::None; + + return BaseTy; +} + +/// Check if \p E is a widened promoted integer. +static bool IsWidenedIntegerOp(const ASTContext &Ctx, const Expr *E) { + return getUnwidenedIntegerType(Ctx, E).hasValue(); +} + +/// Check if we can skip the overflow check for \p Op. +static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { + assert((isa<UnaryOperator>(Op.E) || isa<BinaryOperator>(Op.E)) && + "Expected a unary or binary operator"); + + // If the binop has constant inputs and we can prove there is no overflow, + // we can elide the overflow check. + if (!Op.mayHaveIntegerOverflow()) + return true; + + // If a unary op has a widened operand, the op cannot overflow. + if (const auto *UO = dyn_cast<UnaryOperator>(Op.E)) + return IsWidenedIntegerOp(Ctx, UO->getSubExpr()); + + // We usually don't need overflow checks for binops with widened operands. + // Multiplication with promoted unsigned operands is a special case. + const auto *BO = cast<BinaryOperator>(Op.E); + auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS()); + if (!OptionalLHSTy) + return false; + + auto OptionalRHSTy = getUnwidenedIntegerType(Ctx, BO->getRHS()); + if (!OptionalRHSTy) + return false; + + QualType LHSTy = *OptionalLHSTy; + QualType RHSTy = *OptionalRHSTy; + + // This is the simple case: binops without unsigned multiplication, and with + // widened operands. No overflow check is needed here. + if ((Op.Opcode != BO_Mul && Op.Opcode != BO_MulAssign) || + !LHSTy->isUnsignedIntegerType() || !RHSTy->isUnsignedIntegerType()) + return true; + + // For unsigned multiplication the overflow check can be elided if either one + // of the unpromoted types are less than half the size of the promoted type. + unsigned PromotedSize = Ctx.getTypeSize(Op.E->getType()); + return (2 * Ctx.getTypeSize(LHSTy)) < PromotedSize || + (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize; +} + +/// Update the FastMathFlags of LLVM IR from the FPOptions in LangOptions. +static void updateFastMathFlags(llvm::FastMathFlags &FMF, + FPOptions FPFeatures) { + FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement()); +} + +/// Propagate fast-math flags from \p Op to the instruction in \p V. +static Value *propagateFMFlags(Value *V, const BinOpInfo &Op) { + if (auto *I = dyn_cast<llvm::Instruction>(V)) { + llvm::FastMathFlags FMF = I->getFastMathFlags(); + updateFastMathFlags(FMF, Op.FPFeatures); + I->setFastMathFlags(FMF); + } + return V; +} + class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, Value*> { CodeGenFunction &CGF; @@ -221,6 +373,15 @@ public: Value *VisitGenericSelectionExpr(GenericSelectionExpr *GE) { return Visit(GE->getResultExpr()); } + Value *VisitCoawaitExpr(CoawaitExpr *S) { + return CGF.EmitCoawaitExpr(*S).getScalarVal(); + } + Value *VisitCoyieldExpr(CoyieldExpr *S) { + return CGF.EmitCoyieldExpr(*S).getScalarVal(); + } + Value *VisitUnaryCoawait(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } // Leaves. Value *VisitIntegerLiteral(const IntegerLiteral *E) { @@ -300,6 +461,24 @@ public: return V; } + Value *VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) { + VersionTuple Version = E->getVersion(); + + // If we're checking for a platform older than our minimum deployment + // target, we can fold the check away. + if (Version <= CGF.CGM.getTarget().getPlatformMinVersion()) + return llvm::ConstantInt::get(Builder.getInt1Ty(), 1); + + Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor(); + llvm::Value *Args[] = { + llvm::ConstantInt::get(CGF.CGM.Int32Ty, Version.getMajor()), + llvm::ConstantInt::get(CGF.CGM.Int32Ty, Min ? *Min : 0), + llvm::ConstantInt::get(CGF.CGM.Int32Ty, SMin ? *SMin : 0), + }; + + return CGF.EmitBuiltinAvailable(Args); + } + Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E); Value *VisitShuffleVectorExpr(ShuffleVectorExpr *E); Value *VisitConvertVectorExpr(ConvertVectorExpr *E); @@ -405,11 +584,7 @@ public: return CGF.LoadCXXThis(); } - Value *VisitExprWithCleanups(ExprWithCleanups *E) { - CGF.enterFullExpression(E); - CodeGenFunction::RunCleanupsScope Scope(CGF); - return Visit(E->getSubExpr()); - } + Value *VisitExprWithCleanups(ExprWithCleanups *E); Value *VisitCXXNewExpr(const CXXNewExpr *E) { return CGF.EmitCXXNewExpr(E); } @@ -464,16 +639,21 @@ public: return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); // Fall through. case LangOptions::SOB_Trapping: + if (CanElideOverflowCheck(CGF.getContext(), Ops)) + return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); return EmitOverflowCheckedBinOp(Ops); } } if (Ops.Ty->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && + !CanElideOverflowCheck(CGF.getContext(), Ops)) return EmitOverflowCheckedBinOp(Ops); - if (Ops.LHS->getType()->isFPOrFPVectorTy()) - return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); + if (Ops.LHS->getType()->isFPOrFPVectorTy()) { + Value *V = Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); + return propagateFMFlags(V, Ops); + } return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul"); } /// Create a binary op that checks for overflow. @@ -1414,10 +1594,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. - auto *Src = Visit(E); - return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(CGF, Src, - E->getType(), - DestTy); + return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast( + CGF, Visit(E), E->getType()->getPointeeType().getAddressSpace(), + DestTy->getPointeeType().getAddressSpace(), ConvertType(DestTy)); } case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: @@ -1616,6 +1795,16 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) { E->getExprLoc()); } +Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { + CGF.enterFullExpression(E); + CodeGenFunction::RunCleanupsScope Scope(CGF); + Value *V = Visit(E->getSubExpr()); + // Defend against dominance problems caused by jumps out of expression + // evaluation through the shared cleanup block. + Scope.ForceCleanup({&V}); + return V; +} + //===----------------------------------------------------------------------===// // Unary Operators //===----------------------------------------------------------------------===// @@ -1627,7 +1816,7 @@ static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E, BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false); BinOp.Ty = E->getType(); BinOp.Opcode = IsInc ? BO_Add : BO_Sub; - BinOp.FPContractable = false; + // FIXME: once UnaryOperator carries FPFeatures, copy it here. BinOp.E = E; return BinOp; } @@ -1645,6 +1834,8 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( return Builder.CreateNSWAdd(InVal, Amount, Name); // Fall through. case LangOptions::SOB_Trapping: + if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr())) + return Builder.CreateNSWAdd(InVal, Amount, Name); return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc)); } llvm_unreachable("Unknown SignedOverflowBehaviorTy"); @@ -1660,6 +1851,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *input; int amount = (isInc ? 1 : -1); + bool isSubtraction = !isInc; if (const AtomicType *atomicTy = type->getAs<AtomicType>()) { type = atomicTy->getValueType(); @@ -1749,7 +1941,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); else - value = Builder.CreateInBoundsGEP(value, numElts, "vla.inc"); + value = CGF.EmitCheckedInBoundsGEP( + value, numElts, /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "vla.inc"); // Arithmetic on function pointers (!) is just +-1. } else if (type->isFunctionType()) { @@ -1759,7 +1953,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.funcptr"); else - value = Builder.CreateInBoundsGEP(value, amt, "incdec.funcptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.funcptr"); value = Builder.CreateBitCast(value, input->getType()); // For everything else, we can just do a simple increment. @@ -1768,7 +1964,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.ptr"); else - value = Builder.CreateInBoundsGEP(value, amt, "incdec.ptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.ptr"); } // Vector increment/decrement. @@ -1849,7 +2047,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, sizeValue, "incdec.objptr"); else - value = Builder.CreateInBoundsGEP(value, sizeValue, "incdec.objptr"); + value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, + /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "incdec.objptr"); value = Builder.CreateBitCast(value, input->getType()); } @@ -1891,7 +2091,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); BinOp.Ty = E->getType(); BinOp.Opcode = BO_Sub; - BinOp.FPContractable = false; + // FIXME: once UnaryOperator carries FPFeatures, copy it here. BinOp.E = E; return EmitSub(BinOp); } @@ -2112,7 +2312,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { Result.RHS = Visit(E->getRHS()); Result.Ty = E->getType(); Result.Opcode = E->getOpcode(); - Result.FPContractable = E->isFPContractable(); + Result.FPFeatures = E->getFPFeatures(); Result.E = E; return Result; } @@ -2132,7 +2332,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( OpInfo.RHS = Visit(E->getRHS()); OpInfo.Ty = E->getComputationResultType(); OpInfo.Opcode = E->getOpcode(); - OpInfo.FPContractable = E->isFPContractable(); + OpInfo.FPFeatures = E->getFPFeatures(); OpInfo.E = E; // Load/convert the LHS. LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); @@ -2263,8 +2463,11 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck( SanitizerKind::IntegerDivideByZero)); } + const auto *BO = cast<BinaryOperator>(Ops.E); if (CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow) && - Ops.Ty->hasSignedIntegerRepresentation()) { + Ops.Ty->hasSignedIntegerRepresentation() && + !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS()) && + Ops.mayHaveIntegerOverflow()) { llvm::IntegerType *Ty = cast<llvm::IntegerType>(Zero->getType()); llvm::Value *IntMin = @@ -2287,11 +2490,13 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { CodeGenFunction::SanitizerScope SanScope(&CGF); if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) && - Ops.Ty->isIntegerType()) { + Ops.Ty->isIntegerType() && + (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) { llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true); } else if (CGF.SanOpts.has(SanitizerKind::FloatDivideByZero) && - Ops.Ty->isRealFloatingType()) { + Ops.Ty->isRealFloatingType() && + Ops.mayHaveFloatDivisionByZero()) { llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); llvm::Value *NonZero = Builder.CreateFCmpUNE(Ops.RHS, Zero); EmitBinOpCheck(std::make_pair(NonZero, SanitizerKind::FloatDivideByZero), @@ -2324,12 +2529,13 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) { // Rem in C can't be a floating point type: C99 6.5.5p2. - if (CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero)) { + if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) || + CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) && + Ops.Ty->isIntegerType() && + (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) { CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); - - if (Ops.Ty->isIntegerType()) - EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false); + EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false); } if (Ops.Ty->hasUnsignedIntegerRepresentation()) @@ -2369,6 +2575,7 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { if (isSigned) OpID |= 1; + CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Type *opTy = CGF.CGM.getTypes().ConvertType(Ops.Ty); llvm::Function *intrinsic = CGF.CGM.getIntrinsic(IID, opTy); @@ -2384,7 +2591,6 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { // If the signed-integer-overflow sanitizer is enabled, emit a call to its // runtime. Otherwise, this is a -ftrapv check, so just emit a trap. if (!isSigned || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) { - CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Value *NotOverflow = Builder.CreateNot(overflow); SanitizerMask Kind = isSigned ? SanitizerKind::SignedIntegerOverflow : SanitizerKind::UnsignedIntegerOverflow; @@ -2460,13 +2666,14 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, std::swap(pointerOperand, indexOperand); } + bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); + unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); auto PtrTy = cast<llvm::PointerType>(pointer->getType()); if (width != DL.getTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. - bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); index = CGF.Builder.CreateIntCast(index, DL.getIntPtrType(PtrTy), isSigned, "idx.ext"); } @@ -2510,7 +2717,9 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, pointer = CGF.Builder.CreateGEP(pointer, index, "add.ptr"); } else { index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); - pointer = CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr"); + pointer = + CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, + op.E->getExprLoc(), "add.ptr"); } return pointer; } @@ -2527,7 +2736,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(pointer, index, "add.ptr"); - return CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr"); + return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, + op.E->getExprLoc(), "add.ptr"); } // Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and @@ -2577,12 +2787,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, "Only fadd/fsub can be the root of an fmuladd."); // Check whether this op is marked as fusable. - if (!op.FPContractable) - return nullptr; - - // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is - // either disabled, or handled entirely by the LLVM backend). - if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On) + if (!op.FPFeatures.allowFPContractWithinStatement()) return nullptr; // We have a potentially fusable op. Look for a mul on one of the operands. @@ -2605,7 +2810,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (op.LHS->getType()->isPointerTy() || op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ false); + return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction); if (op.Ty->isSignedIntegerOrEnumerationType()) { switch (CGF.getLangOpts().getSignedOverflowBehavior()) { @@ -2616,12 +2821,15 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); // Fall through. case LangOptions::SOB_Trapping: + if (CanElideOverflowCheck(CGF.getContext(), op)) + return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); return EmitOverflowCheckedBinOp(op); } } if (op.Ty->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && + !CanElideOverflowCheck(CGF.getContext(), op)) return EmitOverflowCheckedBinOp(op); if (op.LHS->getType()->isFPOrFPVectorTy()) { @@ -2629,7 +2837,8 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) return FMulAdd; - return Builder.CreateFAdd(op.LHS, op.RHS, "add"); + Value *V = Builder.CreateFAdd(op.LHS, op.RHS, "add"); + return propagateFMFlags(V, op); } return Builder.CreateAdd(op.LHS, op.RHS, "add"); @@ -2647,19 +2856,23 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); // Fall through. case LangOptions::SOB_Trapping: + if (CanElideOverflowCheck(CGF.getContext(), op)) + return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); return EmitOverflowCheckedBinOp(op); } } if (op.Ty->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && + !CanElideOverflowCheck(CGF.getContext(), op)) return EmitOverflowCheckedBinOp(op); if (op.LHS->getType()->isFPOrFPVectorTy()) { // Try to form an fmuladd. if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) return FMulAdd; - return Builder.CreateFSub(op.LHS, op.RHS, "sub"); + Value *V = Builder.CreateFSub(op.LHS, op.RHS, "sub"); + return propagateFMFlags(V, op); } return Builder.CreateSub(op.LHS, op.RHS, "sub"); @@ -2668,7 +2881,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // If the RHS is not a pointer, then we have normal pointer // arithmetic. if (!op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ true); + return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction); // Otherwise, this is a pointer subtraction. @@ -2751,8 +2964,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { isa<llvm::IntegerType>(Ops.LHS->getType())) { CodeGenFunction::SanitizerScope SanScope(&CGF); SmallVector<std::pair<Value *, SanitizerMask>, 2> Checks; - llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS); - llvm::Value *ValidExponent = Builder.CreateICmpULE(RHS, WidthMinusOne); + llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, Ops.RHS); + llvm::Value *ValidExponent = Builder.CreateICmpULE(Ops.RHS, WidthMinusOne); if (SanitizeExponent) { Checks.push_back( @@ -2767,12 +2980,14 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); llvm::BasicBlock *CheckShiftBase = CGF.createBasicBlock("check"); Builder.CreateCondBr(ValidExponent, CheckShiftBase, Cont); + llvm::Value *PromotedWidthMinusOne = + (RHS == Ops.RHS) ? WidthMinusOne + : GetWidthMinusOneValue(Ops.LHS, RHS); CGF.EmitBlock(CheckShiftBase); - llvm::Value *BitsShiftedOff = - Builder.CreateLShr(Ops.LHS, - Builder.CreateSub(WidthMinusOne, RHS, "shl.zeros", - /*NUW*/true, /*NSW*/true), - "shl.check"); + llvm::Value *BitsShiftedOff = Builder.CreateLShr( + Ops.LHS, Builder.CreateSub(PromotedWidthMinusOne, RHS, "shl.zeros", + /*NUW*/ true, /*NSW*/ true), + "shl.check"); if (CGF.getLangOpts().CPlusPlus) { // In C99, we are not permitted to shift a 1 bit into the sign bit. // Under C++11's rules, shifting a 1 bit into the sign bit is @@ -3038,10 +3253,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { // because the result is altered by the store, i.e., [C99 6.5.16p1] // 'An assignment expression has the value of the left operand after // the assignment...'. - if (LHS.isBitField()) + if (LHS.isBitField()) { CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); - else + } else { + CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc()); CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); + } } // If the result is clearly ignored, return now. @@ -3327,9 +3544,11 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // safe to evaluate the LHS and RHS unconditionally. if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) && isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) { - CGF.incrementProfileCounter(E); - llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr); + llvm::Value *StepV = Builder.CreateZExtOrBitCast(CondV, CGF.Int64Ty); + + CGF.incrementProfileCounter(E, StepV); + llvm::Value *LHS = Visit(lhsExpr); llvm::Value *RHS = Visit(rhsExpr); if (!LHS) { @@ -3491,8 +3710,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // vector to get a vec4, then a bitcast if the target type is different. if (NumElementsSrc == 3 && NumElementsDst != 3) { Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - DstTy); + + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + DstTy); + } + Src->setName("astype"); return Src; } @@ -3501,9 +3724,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // to vec4 if the original type is not vec4, then a shuffle vector to // get a vec3. if (NumElementsSrc != 3 && NumElementsDst == 3) { - auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - Vec4Ty); + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + Vec4Ty); + } + Src = ConvertVec3AndVec4(Builder, CGF, Src, 3); Src->setName("astype"); return Src; @@ -3626,3 +3852,136 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( llvm_unreachable("Unhandled compound assignment operator"); } + +Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, + ArrayRef<Value *> IdxList, + bool SignedIndices, + bool IsSubtraction, + SourceLocation Loc, + const Twine &Name) { + Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); + + // If the pointer overflow sanitizer isn't enabled, do nothing. + if (!SanOpts.has(SanitizerKind::PointerOverflow)) + return GEPVal; + + // If the GEP has already been reduced to a constant, leave it be. + if (isa<llvm::Constant>(GEPVal)) + return GEPVal; + + // Only check for overflows in the default address space. + if (GEPVal->getType()->getPointerAddressSpace()) + return GEPVal; + + auto *GEP = cast<llvm::GEPOperator>(GEPVal); + assert(GEP->isInBounds() && "Expected inbounds GEP"); + + SanitizerScope SanScope(this); + auto &VMContext = getLLVMContext(); + const auto &DL = CGM.getDataLayout(); + auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); + + // Grab references to the signed add/mul overflow intrinsics for intptr_t. + auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); + auto *SAddIntrinsic = + CGM.getIntrinsic(llvm::Intrinsic::sadd_with_overflow, IntPtrTy); + auto *SMulIntrinsic = + CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy); + + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset = nullptr; + // The offset overflow flag - true if the total offset overflows. + llvm::Value *OffsetOverflows = Builder.getFalse(); + + /// Return the result of the given binary operation. + auto eval = [&](BinaryOperator::Opcode Opcode, llvm::Value *LHS, + llvm::Value *RHS) -> llvm::Value * { + assert((Opcode == BO_Add || Opcode == BO_Mul) && "Can't eval binop"); + + // If the operands are constants, return a constant result. + if (auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS)) { + if (auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS)) { + llvm::APInt N; + bool HasOverflow = mayHaveIntegerOverflow(LHSCI, RHSCI, Opcode, + /*Signed=*/true, N); + if (HasOverflow) + OffsetOverflows = Builder.getTrue(); + return llvm::ConstantInt::get(VMContext, N); + } + } + + // Otherwise, compute the result with checked arithmetic. + auto *ResultAndOverflow = Builder.CreateCall( + (Opcode == BO_Add) ? SAddIntrinsic : SMulIntrinsic, {LHS, RHS}); + OffsetOverflows = Builder.CreateOr( + Builder.CreateExtractValue(ResultAndOverflow, 1), OffsetOverflows); + return Builder.CreateExtractValue(ResultAndOverflow, 0); + }; + + // Determine the total byte offset by looking at each GEP operand. + for (auto GTI = llvm::gep_type_begin(GEP), GTE = llvm::gep_type_end(GEP); + GTI != GTE; ++GTI) { + llvm::Value *LocalOffset; + auto *Index = GTI.getOperand(); + // Compute the local offset contributed by this indexing step: + if (auto *STy = GTI.getStructTypeOrNull()) { + // For struct indexing, the local offset is the byte position of the + // specified field. + unsigned FieldNo = cast<llvm::ConstantInt>(Index)->getZExtValue(); + LocalOffset = llvm::ConstantInt::get( + IntPtrTy, DL.getStructLayout(STy)->getElementOffset(FieldNo)); + } else { + // Otherwise this is array-like indexing. The local offset is the index + // multiplied by the element size. + auto *ElementSize = llvm::ConstantInt::get( + IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType())); + auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true); + LocalOffset = eval(BO_Mul, ElementSize, IndexS); + } + + // If this is the first offset, set it as the total offset. Otherwise, add + // the local offset into the running total. + if (!TotalOffset || TotalOffset == Zero) + TotalOffset = LocalOffset; + else + TotalOffset = eval(BO_Add, TotalOffset, LocalOffset); + } + + // Common case: if the total offset is zero, don't emit a check. + if (TotalOffset == Zero) + return GEPVal; + + // Now that we've computed the total offset, add it to the base pointer (with + // wrapping semantics). + auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy); + auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset); + + // The GEP is valid if: + // 1) The total offset doesn't overflow, and + // 2) The sign of the difference between the computed address and the base + // pointer matches the sign of the total offset. + llvm::Value *ValidGEP; + auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); + if (SignedIndices) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); + llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd( + Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), + NoOffsetOverflow); + } else if (!SignedIndices && !IsSubtraction) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); + } else { + auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow); + } + + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; + // Pass the computed GEP to the runtime to avoid emitting poisoned arguments. + llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP}; + EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow), + SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs); + + return GEPVal; +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGGPUBuiltin.cpp index 44dd003..48156b1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -1,4 +1,4 @@ -//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===// +//===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// Generates code for built-in CUDA calls which are not runtime-specific. -// (Runtime-specific codegen lives in CGCUDARuntime.) +// Generates code for built-in GPU calls which are not runtime-specific. +// (Runtime-specific codegen lives in programming model specific files.) // //===----------------------------------------------------------------------===// @@ -67,10 +67,9 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { // Note that by the time this function runs, E's args have already undergone the // standard C vararg promotion (short -> int, float -> double, etc.). RValue -CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue) { - assert(getLangOpts().CUDA); - assert(getLangOpts().CUDAIsDevice); +CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue) { + assert(getTarget().getTriple().isNVPTX()); assert(E->getBuiltinCallee() == Builtin::BIprintf); assert(E->getNumArgs() >= 1); // printf always has at least one arg. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp index 932b8a1..90fcad26 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp @@ -1,4 +1,4 @@ -//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// +//===---- CGObjC.cpp - Emit LLVM Code for Objective-C ---------------------===// // // The LLVM Compiler Infrastructure // @@ -117,10 +117,24 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const ObjCArrayLiteral *ALE = dyn_cast<ObjCArrayLiteral>(E); if (!ALE) DLE = cast<ObjCDictionaryLiteral>(E); - - // Compute the type of the array we're initializing. + + // Optimize empty collections by referencing constants, when available. uint64_t NumElements = ALE ? ALE->getNumElements() : DLE->getNumElements(); + if (NumElements == 0 && CGM.getLangOpts().ObjCRuntime.hasEmptyCollections()) { + StringRef ConstantName = ALE ? "__NSArray0__" : "__NSDictionary0__"; + QualType IdTy(CGM.getContext().getObjCIdType()); + llvm::Constant *Constant = + CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName); + LValue LV = MakeNaturalAlignAddrLValue(Constant, IdTy); + llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getLocStart()); + cast<llvm::LoadInst>(Ptr)->setMetadata( + CGM.getModule().getMDKindID("invariant.load"), + llvm::MDNode::get(getLLVMContext(), None)); + return Builder.CreateBitCast(Ptr, ConvertType(E->getType())); + } + + // Compute the type of the array we're initializing. llvm::APInt APNumElements(Context.getTypeSize(Context.getSizeType()), NumElements); QualType ElementType = Context.getObjCIdType().withConst(); @@ -148,7 +162,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const Expr *Rhs = ALE->getElement(i); LValue LV = MakeAddrLValue( Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + ElementType, LValueBaseInfo(AlignmentSource::Decl, false)); llvm::Value *value = EmitScalarExpr(Rhs); EmitStoreThroughLValue(RValue::get(value), LV, true); @@ -160,7 +174,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const Expr *Key = DLE->getKeyValueElement(i).Key; LValue KeyLV = MakeAddrLValue( Builder.CreateConstArrayGEP(Keys, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + ElementType, LValueBaseInfo(AlignmentSource::Decl, false)); llvm::Value *keyValue = EmitScalarExpr(Key); EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true); @@ -168,7 +182,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const Expr *Value = DLE->getKeyValueElement(i).Value; LValue ValueLV = MakeAddrLValue( Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + ElementType, LValueBaseInfo(AlignmentSource::Decl, false)); llvm::Value *valueValue = EmitScalarExpr(Value); EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true); if (TrackNeededObjects) { @@ -427,7 +441,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, QualType ResultType = method ? method->getReturnType() : E->getType(); CallArgList Args; - EmitCallArgs(Args, method, E->arguments()); + EmitCallArgs(Args, method, E->arguments(), /*AC*/AbstractCallee(method)); // For delegate init calls in ARC, do an unsafe store of null into // self. This represents the call taking direct ownership of that @@ -1316,7 +1330,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, BinaryOperator assign(&ivarRef, finalArg, BO_Assign, ivarRef.getType(), VK_RValue, OK_Ordinary, - SourceLocation(), false); + SourceLocation(), FPOptions()); EmitStmt(&assign); } @@ -1469,6 +1483,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ if (DI) DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); + RunCleanupsScope ForScope(*this); + // The local variable comes into scope immediately. AutoVarEmission variable = AutoVarEmission::invalid(); if (const DeclStmt *SD = dyn_cast<DeclStmt>(S.getElement())) @@ -1499,8 +1515,6 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ ArrayType::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); - RunCleanupsScope ForScope(*this); - // Emit the collection pointer. In ARC, we do a retain. llvm::Value *Collection; if (getLangOpts().ObjCAutoRefCount) { @@ -1802,26 +1816,45 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { } +static bool IsForwarding(StringRef Name) { + return llvm::StringSwitch<bool>(Name) + .Cases("objc_autoreleaseReturnValue", // ARCInstKind::AutoreleaseRV + "objc_autorelease", // ARCInstKind::Autorelease + "objc_retainAutoreleaseReturnValue", // ARCInstKind::FusedRetainAutoreleaseRV + "objc_retainAutoreleasedReturnValue", // ARCInstKind::RetainRV + "objc_retainAutorelease", // ARCInstKind::FusedRetainAutorelease + "objc_retainedObject", // ARCInstKind::NoopCast + "objc_retain", // ARCInstKind::Retain + "objc_unretainedObject", // ARCInstKind::NoopCast + "objc_unretainedPointer", // ARCInstKind::NoopCast + "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV + true) + .Default(false); +} + static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, - llvm::FunctionType *type, - StringRef fnName) { - llvm::Constant *fn = CGM.CreateRuntimeFunction(type, fnName); + llvm::FunctionType *FTy, + StringRef Name) { + llvm::Constant *RTF = CGM.CreateRuntimeFunction(FTy, Name); - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) { + if (auto *F = dyn_cast<llvm::Function>(RTF)) { // If the target runtime doesn't naturally support ARC, emit weak // references to the runtime support library. We don't really // permit this to fail, but we need a particular relocation style. if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC() && !CGM.getTriple().isOSBinFormatCOFF()) { - f->setLinkage(llvm::Function::ExternalWeakLinkage); - } else if (fnName == "objc_retain" || fnName == "objc_release") { + F->setLinkage(llvm::Function::ExternalWeakLinkage); + } else if (Name == "objc_retain" || Name == "objc_release") { // If we have Native ARC, set nonlazybind attribute for these APIs for // performance. - f->addFnAttr(llvm::Attribute::NonLazyBind); + F->addFnAttr(llvm::Attribute::NonLazyBind); } + + if (IsForwarding(Name)) + F->arg_begin()->addAttr(llvm::Attribute::Returned); } - return fn; + return RTF; } /// Perform an operation having the signature @@ -1832,7 +1865,8 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, llvm::Constant *&fn, StringRef fnName, bool isTailCall = false) { - if (isa<llvm::ConstantPointerNull>(value)) return value; + if (isa<llvm::ConstantPointerNull>(value)) + return value; if (!fn) { llvm::FunctionType *fnType = @@ -2381,6 +2415,12 @@ void CodeGenFunction::destroyARCWeak(CodeGenFunction &CGF, CGF.EmitARCDestroyWeak(addr); } +void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr, + QualType type) { + llvm::Value *value = CGF.Builder.CreateLoad(addr); + CGF.EmitARCIntrinsicUse(value); +} + namespace { struct CallObjCAutoreleasePoolObject final : EHScopeStack::Cleanup { llvm::Value *Token; @@ -3206,10 +3246,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( SrcTy = C.getPointerType(SrcTy); FunctionArgList args; - ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy); - args.push_back(&dstDecl); - ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); - args.push_back(&srcDecl); + ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + DestTy, ImplicitParamDecl::Other); + args.push_back(&DstDecl); + ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + SrcTy, ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -3225,12 +3267,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( StartFunction(FD, C.VoidTy, Fn, FI, args); - DeclRefExpr DstExpr(&dstDecl, false, DestTy, + DeclRefExpr DstExpr(&DstDecl, false, DestTy, VK_RValue, SourceLocation()); UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation()); - DeclRefExpr SrcExpr(&srcDecl, false, SrcTy, + DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation()); @@ -3239,7 +3281,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment()); CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(), - VK_LValue, SourceLocation(), false); + VK_LValue, SourceLocation(), FPOptions()); EmitStmt(&TheCall); @@ -3287,10 +3329,12 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( SrcTy = C.getPointerType(SrcTy); FunctionArgList args; - ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy); - args.push_back(&dstDecl); - ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); - args.push_back(&srcDecl); + ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + DestTy, ImplicitParamDecl::Other); + args.push_back(&DstDecl); + ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + SrcTy, ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -3305,7 +3349,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( StartFunction(FD, C.VoidTy, Fn, FI, args); - DeclRefExpr SrcExpr(&srcDecl, false, SrcTy, + DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), @@ -3331,7 +3375,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( CXXConstExpr->getConstructionKind(), SourceRange()); - DeclRefExpr DstExpr(&dstDecl, false, DestTy, + DeclRefExpr DstExpr(&DstDecl, false, DestTy, VK_RValue, SourceLocation()); RValue DV = EmitAnyExpr(&DstExpr); @@ -3375,5 +3419,54 @@ CodeGenFunction::EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty) { return Val; } +llvm::Value * +CodeGenFunction::EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args) { + assert(Args.size() == 3 && "Expected 3 argument here!"); + + if (!CGM.IsOSVersionAtLeastFn) { + llvm::FunctionType *FTy = + llvm::FunctionType::get(Int32Ty, {Int32Ty, Int32Ty, Int32Ty}, false); + CGM.IsOSVersionAtLeastFn = + CGM.CreateRuntimeFunction(FTy, "__isOSVersionAtLeast"); + } + + llvm::Value *CallRes = + EmitNounwindRuntimeCall(CGM.IsOSVersionAtLeastFn, Args); + + return Builder.CreateICmpNE(CallRes, llvm::Constant::getNullValue(Int32Ty)); +} + +void CodeGenModule::emitAtAvailableLinkGuard() { + if (!IsOSVersionAtLeastFn) + return; + // @available requires CoreFoundation only on Darwin. + if (!Target.getTriple().isOSDarwin()) + return; + // Add -framework CoreFoundation to the linker commands. We still want to + // emit the core foundation reference down below because otherwise if + // CoreFoundation is not used in the code, the linker won't link the + // framework. + auto &Context = getLLVMContext(); + llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"), + llvm::MDString::get(Context, "CoreFoundation")}; + LinkerOptionsMetadata.push_back(llvm::MDNode::get(Context, Args)); + // Emit a reference to a symbol from CoreFoundation to ensure that + // CoreFoundation is linked into the final binary. + llvm::FunctionType *FTy = + llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false); + llvm::Constant *CFFunc = + CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber"); + + llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false); + llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction( + CheckFTy, "__clang_at_available_requires_core_foundation_framework")); + CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); + CodeGenFunction CGF(*this); + CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); + CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy)); + CGF.Builder.CreateUnreachable(); + addCompilerUsedGlobal(CFLinkCheckFunc); +} CGObjCRuntime::~CGObjCRuntime() {} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp index fa2b3d8..c8b8be7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp @@ -18,7 +18,7 @@ #include "CGCleanup.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" @@ -34,7 +34,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Compiler.h" -#include <cstdarg> using namespace clang; using namespace CodeGen; @@ -58,18 +57,19 @@ public: /// Initialises the lazy function with the name, return type, and the types /// of the arguments. - LLVM_END_WITH_NULL - void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, ...) { + template <typename... Tys> + void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, + Tys *... Types) { CGM = Mod; FunctionName = name; Function = nullptr; - std::vector<llvm::Type *> ArgTys; - va_list Args; - va_start(Args, RetTy); - while (llvm::Type *ArgTy = va_arg(Args, llvm::Type *)) - ArgTys.push_back(ArgTy); - va_end(Args); - FTy = llvm::FunctionType::get(RetTy, ArgTys, false); + if(sizeof...(Tys)) { + SmallVector<llvm::Type *, 8> ArgTys({Types...}); + FTy = llvm::FunctionType::get(RetTy, ArgTys, false); + } + else { + FTy = llvm::FunctionType::get(RetTy, None, false); + } } llvm::FunctionType *getType() { return FTy; } @@ -603,11 +603,10 @@ protected: public: CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) { // IMP objc_msg_lookup(id, SEL); - MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, - nullptr); + MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy); // IMP objc_msg_lookup_super(struct objc_super*, SEL); MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); } }; @@ -663,7 +662,7 @@ class CGObjCGNUstep : public CGObjCGNU { } // The lookup function is guaranteed not to capture the receiver pointer. - LookupFn->setDoesNotCapture(1); + LookupFn->addParamAttr(0, llvm::Attribute::NoCapture); llvm::Value *args[] = { EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy), @@ -702,52 +701,51 @@ class CGObjCGNUstep : public CGObjCGNU { CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; - llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy, - PtrTy, PtrTy, IntTy, IMPTy, nullptr); + llvm::StructType *SlotStructTy = + llvm::StructType::get(PtrTy, PtrTy, PtrTy, IntTy, IMPTy); SlotTy = llvm::PointerType::getUnqual(SlotStructTy); // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender); SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy, - SelectorTy, IdTy, nullptr); + SelectorTy, IdTy); // Slot_t objc_msg_lookup_super(struct objc_super*, SEL); SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); // If we're in ObjC++ mode, then we want to make if (CGM.getLangOpts().CPlusPlus) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void *__cxa_begin_catch(void *e) - EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, nullptr); + EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy); // void __cxa_end_catch(void) - ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, nullptr); + ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy); // void _Unwind_Resume_or_Rethrow(void*) ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy, - PtrTy, nullptr); + PtrTy); } else if (R.getVersion() >= VersionTuple(1, 7)) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // id objc_begin_catch(void *e) - EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, nullptr); + EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy); // void objc_end_catch(void) - ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, nullptr); + ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy); // void _Unwind_Resume_or_Rethrow(void*) - ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, - PtrTy, nullptr); + ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, PtrTy); } llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy, - SelectorTy, IdTy, PtrDiffTy, nullptr); + SelectorTy, IdTy, PtrDiffTy); SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy, - IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr); + IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy, - IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr); + IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy", - VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr); + VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy); // void objc_setCppObjectAtomic(void *dest, const void *src, void // *helper); CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy, - PtrTy, PtrTy, nullptr); + PtrTy, PtrTy); // void objc_getCppObjectAtomic(void *dest, const void *src, void // *helper); CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy, - PtrTy, PtrTy, nullptr); + PtrTy, PtrTy); } llvm::Constant *GetCppAtomicObjectGetFunction() override { @@ -849,14 +847,14 @@ protected: public: CGObjCObjFW(CodeGenModule &Mod): CGObjCGNU(Mod, 9, 3) { // IMP objc_msg_lookup(id, SEL); - MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, nullptr); + MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy); MsgLookupFnSRet.init(&CGM, "objc_msg_lookup_stret", IMPTy, IdTy, - SelectorTy, nullptr); + SelectorTy); // IMP objc_msg_lookup_super(struct objc_super*, SEL); MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); MsgLookupSuperFnSRet.init(&CGM, "objc_msg_lookup_super_stret", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); } }; } // end anonymous namespace @@ -945,35 +943,34 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, } PtrToIdTy = llvm::PointerType::getUnqual(IdTy); - ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, nullptr); + ObjCSuperTy = llvm::StructType::get(IdTy, IdTy); PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy); llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void objc_exception_throw(id); - ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr); - ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr); + ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); + ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); // int objc_sync_enter(id); - SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, nullptr); + SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy); // int objc_sync_exit(id); - SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, nullptr); + SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy); // void objc_enumerationMutation (id) - EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy, - IdTy, nullptr); + EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy, IdTy); // id objc_getProperty(id, SEL, ptrdiff_t, BOOL) GetPropertyFn.init(&CGM, "objc_getProperty", IdTy, IdTy, SelectorTy, - PtrDiffTy, BoolTy, nullptr); + PtrDiffTy, BoolTy); // void objc_setProperty(id, SEL, ptrdiff_t, id, BOOL, BOOL) SetPropertyFn.init(&CGM, "objc_setProperty", VoidTy, IdTy, SelectorTy, - PtrDiffTy, IdTy, BoolTy, BoolTy, nullptr); + PtrDiffTy, IdTy, BoolTy, BoolTy); // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL) - GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, - PtrDiffTy, BoolTy, BoolTy, nullptr); + GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, + PtrDiffTy, BoolTy, BoolTy); // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL) - SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, - PtrDiffTy, BoolTy, BoolTy, nullptr); + SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, + PtrDiffTy, BoolTy, BoolTy); // IMP type llvm::Type *IMPArgs[] = { IdTy, SelectorTy }; @@ -997,21 +994,19 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, // Get functions needed in GC mode // id objc_assign_ivar(id, id, ptrdiff_t); - IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy, - nullptr); + IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy); // id objc_assign_strongCast (id, id*) StrongCastAssignFn.init(&CGM, "objc_assign_strongCast", IdTy, IdTy, - PtrToIdTy, nullptr); + PtrToIdTy); // id objc_assign_global(id, id*); - GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy, - nullptr); + GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy); // id objc_assign_weak(id, id*); - WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, nullptr); + WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy); // id objc_read_weak(id*); - WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, nullptr); + WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy); // void *objc_memmove_collectable(void*, void *, size_t); MemMoveFn.init(&CGM, "objc_memmove_collectable", PtrTy, PtrTy, PtrTy, - SizeTy, nullptr); + SizeTy); } } @@ -1317,7 +1312,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, } } // Cast the pointer to a simplified version of the class structure - llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy, nullptr); + llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy); ReceiverClass = Builder.CreateBitCast(ReceiverClass, llvm::PointerType::getUnqual(CastTy)); // Get the superclass pointer @@ -1326,8 +1321,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, ReceiverClass = Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign()); // Construct the structure used to look up the IMP - llvm::StructType *ObjCSuperTy = llvm::StructType::get( - Receiver->getType(), IdTy, nullptr); + llvm::StructType *ObjCSuperTy = + llvm::StructType::get(Receiver->getType(), IdTy); // FIXME: Is this really supposed to be a dynamic alloca? Address ObjCSuper = Address(Builder.CreateAlloca(ObjCSuperTy), @@ -1565,11 +1560,8 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, IvarList.addInt(IntTy, (int)IvarNames.size()); // Get the ivar structure type. - llvm::StructType *ObjCIvarTy = llvm::StructType::get( - PtrToInt8Ty, - PtrToInt8Ty, - IntTy, - nullptr); + llvm::StructType *ObjCIvarTy = + llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, IntTy); // Array of ivar structures. auto Ivars = IvarList.beginArray(ObjCIvarTy); @@ -1611,7 +1603,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( // anyway; the classes will still work with the GNU runtime, they will just // be ignored. llvm::StructType *ClassTy = llvm::StructType::get( - PtrToInt8Ty, // isa + PtrToInt8Ty, // isa PtrToInt8Ty, // super_class PtrToInt8Ty, // name LongTy, // version @@ -1620,18 +1612,18 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( IVars->getType(), // ivars Methods->getType(), // methods // These are all filled in by the runtime, so we pretend - PtrTy, // dtable - PtrTy, // subclass_list - PtrTy, // sibling_class - PtrTy, // protocols - PtrTy, // gc_object_type + PtrTy, // dtable + PtrTy, // subclass_list + PtrTy, // sibling_class + PtrTy, // protocols + PtrTy, // gc_object_type // New ABI: LongTy, // abi_version IvarOffsets->getType(), // ivar_offsets Properties->getType(), // properties IntPtrTy, // strong_pointers - IntPtrTy, // weak_pointers - nullptr); + IntPtrTy // weak_pointers + ); ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(ClassTy); @@ -2207,7 +2199,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { IvarNames.push_back(MakeConstantString(IVD->getNameAsString())); // Get the type encoding for this ivar std::string TypeStr; - Context.getObjCEncodingForType(IVD->getType(), TypeStr); + Context.getObjCEncodingForType(IVD->getType(), TypeStr, IVD); IvarTypes.push_back(MakeConstantString(TypeStr)); // Get the offset uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp index 7219592..98435fe 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp @@ -17,7 +17,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" @@ -64,13 +64,11 @@ private: // Add the non-lazy-bind attribute, since objc_msgSend is likely to // be called a lot. llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; - return - CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, - params, true), - "objc_msgSend", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NonLazyBind)); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(ObjectPtrTy, params, true), "objc_msgSend", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind)); } /// void objc_msgSend_stret (id, SEL, ...) @@ -107,8 +105,8 @@ private: llvm::Constant *getMessageSendFp2retFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; llvm::Type *longDoubleType = llvm::Type::getX86_FP80Ty(VMContext); - llvm::Type *resultType = - llvm::StructType::get(longDoubleType, longDoubleType, nullptr); + llvm::Type *resultType = + llvm::StructType::get(longDoubleType, longDoubleType); return CGM.CreateRuntimeFunction(llvm::FunctionType::get(resultType, params, true), @@ -310,7 +308,7 @@ public: SmallVector<CanQualType,5> Params; Params.push_back(Ctx.VoidPtrTy); Params.push_back(Ctx.VoidPtrTy); - Params.push_back(Ctx.LongTy); + Params.push_back(Ctx.getSizeType()); Params.push_back(Ctx.BoolTy); Params.push_back(Ctx.BoolTy); llvm::FunctionType *FTy = @@ -589,13 +587,11 @@ public: llvm::Constant *getSetJmpFn() { // This is specifically the prototype for x86. llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() }; - return - CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, - params, false), - "_setjmp", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NonLazyBind)); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind)); } public: @@ -890,7 +886,7 @@ protected: /// Cached reference to the class for constant strings. This value has type /// int * but is actually an Obj-C class pointer. - llvm::WeakVH ConstantStringClassRef; + llvm::WeakTrackingVH ConstantStringClassRef; /// \brief The LLVM type corresponding to NSConstantString. llvm::StructType *NSConstantStringType = nullptr; @@ -1008,6 +1004,8 @@ protected: const ObjCInterfaceDecl *ID, ObjCCommonTypesHelper &ObjCTypes); + std::string GetSectionName(StringRef Section, StringRef MachOAttributes); + public: /// CreateMetadataVar - Create a global variable with internal /// linkage for use by the Objective-C runtime. @@ -1680,7 +1678,10 @@ struct NullReturnState { /// Complete the null-return operation. It is valid to call this /// regardless of whether 'init' has been called. - RValue complete(CodeGenFunction &CGF, RValue result, QualType resultType, + RValue complete(CodeGenFunction &CGF, + ReturnValueSlot returnSlot, + RValue result, + QualType resultType, const CallArgList &CallArgs, const ObjCMethodDecl *Method) { // If we never had to do a null-check, just use the raw result. @@ -1747,7 +1748,8 @@ struct NullReturnState { // memory or (2) agg values in registers. if (result.isAggregate()) { assert(result.isAggregate() && "null init of non-aggregate result?"); - CGF.EmitNullInitialization(result.getAggregateAddress(), resultType); + if (!returnSlot.isUnused()) + CGF.EmitNullInitialization(result.getAggregateAddress(), resultType); if (contBB) CGF.EmitBlock(contBB); return result; } @@ -2119,11 +2121,11 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, } } - NullReturnState nullReturn; + bool RequiresNullCheck = false; llvm::Constant *Fn = nullptr; if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { - if (ReceiverCanBeNull) nullReturn.init(CGF, Arg0); + if (ReceiverCanBeNull) RequiresNullCheck = true; Fn = (ObjCABI == 2) ? ObjCTypes.getSendStretFn2(IsSuper) : ObjCTypes.getSendStretFn(IsSuper); } else if (CGM.ReturnTypeUsesFPRet(ResultType)) { @@ -2136,23 +2138,30 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, // arm64 uses objc_msgSend for stret methods and yet null receiver check // must be made for it. if (ReceiverCanBeNull && CGM.ReturnTypeUsesSRet(MSI.CallInfo)) - nullReturn.init(CGF, Arg0); + RequiresNullCheck = true; Fn = (ObjCABI == 2) ? ObjCTypes.getSendFn2(IsSuper) : ObjCTypes.getSendFn(IsSuper); } + // We don't need to emit a null check to zero out an indirect result if the + // result is ignored. + if (Return.isUnused()) + RequiresNullCheck = false; + // Emit a null-check if there's a consumed argument other than the receiver. - bool RequiresNullCheck = false; - if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) { + if (!RequiresNullCheck && CGM.getLangOpts().ObjCAutoRefCount && Method) { for (const auto *ParamDecl : Method->parameters()) { if (ParamDecl->hasAttr<NSConsumedAttr>()) { - if (!nullReturn.NullBB) - nullReturn.init(CGF, Arg0); RequiresNullCheck = true; break; } } } + + NullReturnState nullReturn; + if (RequiresNullCheck) { + nullReturn.init(CGF, Arg0); + } llvm::Instruction *CallSite; Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType); @@ -2166,7 +2175,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, llvm::CallSite(CallSite).setDoesNotReturn(); } - return nullReturn.complete(CGF, rvalue, ResultType, CallArgs, + return nullReturn.complete(CGF, Return, rvalue, ResultType, CallArgs, RequiresNullCheck ? Method : nullptr); } @@ -4790,6 +4799,27 @@ llvm::Value *CGObjCMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF, /* *** Private Interface *** */ +std::string CGObjCCommonMac::GetSectionName(StringRef Section, + StringRef MachOAttributes) { + switch (CGM.getTriple().getObjectFormat()) { + default: + llvm_unreachable("unexpected object file format"); + case llvm::Triple::MachO: { + if (MachOAttributes.empty()) + return ("__DATA," + Section).str(); + return ("__DATA," + Section + "," + MachOAttributes).str(); + } + case llvm::Triple::ELF: + assert(Section.substr(0, 2) == "__" && + "expected the name to begin with __"); + return Section.substr(2).str(); + case llvm::Triple::COFF: + assert(Section.substr(0, 2) == "__" && + "expected the name to begin with __"); + return ("." + Section.substr(2) + "$B").str(); + } +} + /// EmitImageInfo - Emit the image info marker used to encode some module /// level information. /// @@ -4813,9 +4843,10 @@ enum ImageInfoFlags { void CGObjCCommonMac::EmitImageInfo() { unsigned version = 0; // Version is unused? - const char *Section = (ObjCABI == 1) ? - "__OBJC, __image_info,regular" : - "__DATA, __objc_imageinfo, regular, no_dead_strip"; + std::string Section = + (ObjCABI == 1) + ? "__OBJC,__image_info,regular" + : GetSectionName("__objc_imageinfo", "regular,no_dead_strip"); // Generate module-level named metadata to convey this information to the // linker and code-gen. @@ -4826,7 +4857,7 @@ void CGObjCCommonMac::EmitImageInfo() { Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Version", version); Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Section", - llvm::MDString::get(VMContext,Section)); + llvm::MDString::get(VMContext, Section)); if (CGM.getLangOpts().getGC() == LangOptions::NonGC) { // Non-GC overrides those files which specify GC. @@ -5510,17 +5541,15 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // char *name; // char *attributes; // } - PropertyTy = llvm::StructType::create("struct._prop_t", - Int8PtrTy, Int8PtrTy, nullptr); + PropertyTy = llvm::StructType::create("struct._prop_t", Int8PtrTy, Int8PtrTy); // struct _prop_list_t { // uint32_t entsize; // sizeof(struct _prop_t) // uint32_t count_of_properties; // struct _prop_t prop_list[count_of_properties]; // } - PropertyListTy = - llvm::StructType::create("struct._prop_list_t", IntTy, IntTy, - llvm::ArrayType::get(PropertyTy, 0), nullptr); + PropertyListTy = llvm::StructType::create( + "struct._prop_list_t", IntTy, IntTy, llvm::ArrayType::get(PropertyTy, 0)); // struct _prop_list_t * PropertyListPtrTy = llvm::PointerType::getUnqual(PropertyListTy); @@ -5529,9 +5558,8 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // char *method_type; // char *_imp; // } - MethodTy = llvm::StructType::create("struct._objc_method", - SelectorPtrTy, Int8PtrTy, Int8PtrTy, - nullptr); + MethodTy = llvm::StructType::create("struct._objc_method", SelectorPtrTy, + Int8PtrTy, Int8PtrTy); // struct _objc_cache * CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache"); @@ -5544,17 +5572,16 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // SEL name; // char *types; // } - MethodDescriptionTy = - llvm::StructType::create("struct._objc_method_description", - SelectorPtrTy, Int8PtrTy, nullptr); + MethodDescriptionTy = llvm::StructType::create( + "struct._objc_method_description", SelectorPtrTy, Int8PtrTy); // struct _objc_method_description_list { // int count; // struct _objc_method_description[1]; // } - MethodDescriptionListTy = llvm::StructType::create( - "struct._objc_method_description_list", IntTy, - llvm::ArrayType::get(MethodDescriptionTy, 0), nullptr); + MethodDescriptionListTy = + llvm::StructType::create("struct._objc_method_description_list", IntTy, + llvm::ArrayType::get(MethodDescriptionTy, 0)); // struct _objc_method_description_list * MethodDescriptionListPtrTy = @@ -5570,11 +5597,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // const char ** extendedMethodTypes; // struct _objc_property_list *class_properties; // } - ProtocolExtensionTy = - llvm::StructType::create("struct._objc_protocol_extension", - IntTy, MethodDescriptionListPtrTy, - MethodDescriptionListPtrTy, PropertyListPtrTy, - Int8PtrPtrTy, PropertyListPtrTy, nullptr); + ProtocolExtensionTy = llvm::StructType::create( + "struct._objc_protocol_extension", IntTy, MethodDescriptionListPtrTy, + MethodDescriptionListPtrTy, PropertyListPtrTy, Int8PtrPtrTy, + PropertyListPtrTy); // struct _objc_protocol_extension * ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy); @@ -5586,10 +5612,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) ProtocolListTy = llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), - LongTy, - llvm::ArrayType::get(ProtocolTy, 0), - nullptr); + ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), LongTy, + llvm::ArrayType::get(ProtocolTy, 0)); // struct _objc_protocol { // struct _objc_protocol_extension *isa; @@ -5600,9 +5624,7 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // } ProtocolTy->setBody(ProtocolExtensionPtrTy, Int8PtrTy, llvm::PointerType::getUnqual(ProtocolListTy), - MethodDescriptionListPtrTy, - MethodDescriptionListPtrTy, - nullptr); + MethodDescriptionListPtrTy, MethodDescriptionListPtrTy); // struct _objc_protocol_list * ProtocolListPtrTy = llvm::PointerType::getUnqual(ProtocolListTy); @@ -5616,8 +5638,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *ivar_type; // int ivar_offset; // } - IvarTy = llvm::StructType::create("struct._objc_ivar", - Int8PtrTy, Int8PtrTy, IntTy, nullptr); + IvarTy = llvm::StructType::create("struct._objc_ivar", Int8PtrTy, Int8PtrTy, + IntTy); // struct _objc_ivar_list * IvarListTy = @@ -5630,9 +5652,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) MethodListPtrTy = llvm::PointerType::getUnqual(MethodListTy); // struct _objc_class_extension * - ClassExtensionTy = - llvm::StructType::create("struct._objc_class_extension", - IntTy, Int8PtrTy, PropertyListPtrTy, nullptr); + ClassExtensionTy = llvm::StructType::create( + "struct._objc_class_extension", IntTy, Int8PtrTy, PropertyListPtrTy); ClassExtensionPtrTy = llvm::PointerType::getUnqual(ClassExtensionTy); ClassTy = llvm::StructType::create(VMContext, "struct._objc_class"); @@ -5652,18 +5673,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_class_ext *ext; // }; ClassTy->setBody(llvm::PointerType::getUnqual(ClassTy), - llvm::PointerType::getUnqual(ClassTy), - Int8PtrTy, - LongTy, - LongTy, - LongTy, - IvarListPtrTy, - MethodListPtrTy, - CachePtrTy, - ProtocolListPtrTy, - Int8PtrTy, - ClassExtensionPtrTy, - nullptr); + llvm::PointerType::getUnqual(ClassTy), Int8PtrTy, LongTy, + LongTy, LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy, + ProtocolListPtrTy, Int8PtrTy, ClassExtensionPtrTy); ClassPtrTy = llvm::PointerType::getUnqual(ClassTy); @@ -5677,12 +5689,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_property_list *instance_properties;// category's @property // struct _objc_property_list *class_properties; // } - CategoryTy = - llvm::StructType::create("struct._objc_category", - Int8PtrTy, Int8PtrTy, MethodListPtrTy, - MethodListPtrTy, ProtocolListPtrTy, - IntTy, PropertyListPtrTy, PropertyListPtrTy, - nullptr); + CategoryTy = llvm::StructType::create( + "struct._objc_category", Int8PtrTy, Int8PtrTy, MethodListPtrTy, + MethodListPtrTy, ProtocolListPtrTy, IntTy, PropertyListPtrTy, + PropertyListPtrTy); // Global metadata structures @@ -5693,10 +5703,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // short cat_def_cnt; // char *defs[cls_def_cnt + cat_def_cnt]; // } - SymtabTy = - llvm::StructType::create("struct._objc_symtab", - LongTy, SelectorPtrTy, ShortTy, ShortTy, - llvm::ArrayType::get(Int8PtrTy, 0), nullptr); + SymtabTy = llvm::StructType::create("struct._objc_symtab", LongTy, + SelectorPtrTy, ShortTy, ShortTy, + llvm::ArrayType::get(Int8PtrTy, 0)); SymtabPtrTy = llvm::PointerType::getUnqual(SymtabTy); // struct _objc_module { @@ -5705,10 +5714,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *name; // struct _objc_symtab* symtab; // } - ModuleTy = - llvm::StructType::create("struct._objc_module", - LongTy, LongTy, Int8PtrTy, SymtabPtrTy, nullptr); - + ModuleTy = llvm::StructType::create("struct._objc_module", LongTy, LongTy, + Int8PtrTy, SymtabPtrTy); // FIXME: This is the size of the setjmp buffer and should be target // specific. 18 is what's used on 32-bit X86. @@ -5717,10 +5724,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // Exceptions llvm::Type *StackPtrTy = llvm::ArrayType::get(CGM.Int8PtrTy, 4); - ExceptionDataTy = - llvm::StructType::create("struct._objc_exception_data", - llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize), - StackPtrTy, nullptr); + ExceptionDataTy = llvm::StructType::create( + "struct._objc_exception_data", + llvm::ArrayType::get(CGM.Int32Ty, SetJmpBufferSize), StackPtrTy); } ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm) @@ -5731,8 +5737,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct _objc_method method_list[method_count]; // } MethodListnfABITy = - llvm::StructType::create("struct.__method_list_t", IntTy, IntTy, - llvm::ArrayType::get(MethodTy, 0), nullptr); + llvm::StructType::create("struct.__method_list_t", IntTy, IntTy, + llvm::ArrayType::get(MethodTy, 0)); // struct method_list_t * MethodListnfABIPtrTy = llvm::PointerType::getUnqual(MethodListnfABITy); @@ -5756,14 +5762,12 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul ProtocolListnfABITy = llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolnfABITy = - llvm::StructType::create("struct._protocol_t", ObjectPtrTy, Int8PtrTy, - llvm::PointerType::getUnqual(ProtocolListnfABITy), - MethodListnfABIPtrTy, MethodListnfABIPtrTy, - MethodListnfABIPtrTy, MethodListnfABIPtrTy, - PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, - Int8PtrTy, PropertyListPtrTy, - nullptr); + ProtocolnfABITy = llvm::StructType::create( + "struct._protocol_t", ObjectPtrTy, Int8PtrTy, + llvm::PointerType::getUnqual(ProtocolListnfABITy), MethodListnfABIPtrTy, + MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, + PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, Int8PtrTy, + PropertyListPtrTy); // struct _protocol_t* ProtocolnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolnfABITy); @@ -5773,8 +5777,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct _protocol_t *[protocol_count]; // } ProtocolListnfABITy->setBody(LongTy, - llvm::ArrayType::get(ProtocolnfABIPtrTy, 0), - nullptr); + llvm::ArrayType::get(ProtocolnfABIPtrTy, 0)); // struct _objc_protocol_list* ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy); @@ -5788,7 +5791,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // } IvarnfABITy = llvm::StructType::create( "struct._ivar_t", llvm::PointerType::getUnqual(IvarOffsetVarTy), - Int8PtrTy, Int8PtrTy, IntTy, IntTy, nullptr); + Int8PtrTy, Int8PtrTy, IntTy, IntTy); // struct _ivar_list_t { // uint32 entsize; // sizeof(struct _ivar_t) @@ -5796,8 +5799,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct _iver_t list[count]; // } IvarListnfABITy = - llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy, - llvm::ArrayType::get(IvarnfABITy, 0), nullptr); + llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy, + llvm::ArrayType::get(IvarnfABITy, 0)); IvarListnfABIPtrTy = llvm::PointerType::getUnqual(IvarListnfABITy); @@ -5816,13 +5819,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // } // FIXME. Add 'reserved' field in 64bit abi mode! - ClassRonfABITy = llvm::StructType::create("struct._class_ro_t", - IntTy, IntTy, IntTy, Int8PtrTy, - Int8PtrTy, MethodListnfABIPtrTy, - ProtocolListnfABIPtrTy, - IvarListnfABIPtrTy, - Int8PtrTy, PropertyListPtrTy, - nullptr); + ClassRonfABITy = llvm::StructType::create( + "struct._class_ro_t", IntTy, IntTy, IntTy, Int8PtrTy, Int8PtrTy, + MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, IvarListnfABIPtrTy, + Int8PtrTy, PropertyListPtrTy); // ImpnfABITy - LLVM for id (*)(id, SEL, ...) llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; @@ -5839,11 +5839,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul ClassnfABITy = llvm::StructType::create(VMContext, "struct._class_t"); ClassnfABITy->setBody(llvm::PointerType::getUnqual(ClassnfABITy), - llvm::PointerType::getUnqual(ClassnfABITy), - CachePtrTy, + llvm::PointerType::getUnqual(ClassnfABITy), CachePtrTy, llvm::PointerType::getUnqual(ImpnfABITy), - llvm::PointerType::getUnqual(ClassRonfABITy), - nullptr); + llvm::PointerType::getUnqual(ClassRonfABITy)); // LLVM for struct _class_t * ClassnfABIPtrTy = llvm::PointerType::getUnqual(ClassnfABITy); @@ -5858,15 +5856,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const struct _prop_list_t * const class_properties; // const uint32_t size; // } - CategorynfABITy = llvm::StructType::create("struct._category_t", - Int8PtrTy, ClassnfABIPtrTy, - MethodListnfABIPtrTy, - MethodListnfABIPtrTy, - ProtocolListnfABIPtrTy, - PropertyListPtrTy, - PropertyListPtrTy, - IntTy, - nullptr); + CategorynfABITy = llvm::StructType::create( + "struct._category_t", Int8PtrTy, ClassnfABIPtrTy, MethodListnfABIPtrTy, + MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, PropertyListPtrTy, + PropertyListPtrTy, IntTy); // New types for nonfragile abi messaging. CodeGen::CodeGenTypes &Types = CGM.getTypes(); @@ -5903,9 +5896,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // SUPER_IMP messenger; // SEL name; // }; - SuperMessageRefTy = - llvm::StructType::create("struct._super_message_ref_t", - ImpnfABITy, SelectorPtrTy, nullptr); + SuperMessageRefTy = llvm::StructType::create("struct._super_message_ref_t", + ImpnfABITy, SelectorPtrTy); // SuperMessageRefPtrTy - LLVM for struct _super_message_ref_t* SuperMessageRefPtrTy = llvm::PointerType::getUnqual(SuperMessageRefTy); @@ -5916,10 +5908,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const char* name; // c++ typeinfo string // Class cls; // }; - EHTypeTy = - llvm::StructType::create("struct._objc_typeinfo", - llvm::PointerType::getUnqual(Int8PtrTy), - Int8PtrTy, ClassnfABIPtrTy, nullptr); + EHTypeTy = llvm::StructType::create("struct._objc_typeinfo", + llvm::PointerType::getUnqual(Int8PtrTy), + Int8PtrTy, ClassnfABIPtrTy); EHTypePtrTy = llvm::PointerType::getUnqual(EHTypeTy); } @@ -5974,17 +5965,21 @@ void CGObjCNonFragileABIMac::FinishNonFragileABIModule() { } AddModuleClassList(DefinedClasses, "OBJC_LABEL_CLASS_$", - "__DATA, __objc_classlist, regular, no_dead_strip"); + GetSectionName("__objc_classlist", + "regular,no_dead_strip")); AddModuleClassList(DefinedNonLazyClasses, "OBJC_LABEL_NONLAZY_CLASS_$", - "__DATA, __objc_nlclslist, regular, no_dead_strip"); + GetSectionName("__objc_nlclslist", + "regular,no_dead_strip")); // Build list of all implemented category addresses in array // L_OBJC_LABEL_CATEGORY_$. AddModuleClassList(DefinedCategories, "OBJC_LABEL_CATEGORY_$", - "__DATA, __objc_catlist, regular, no_dead_strip"); + GetSectionName("__objc_catlist", + "regular,no_dead_strip")); AddModuleClassList(DefinedNonLazyCategories, "OBJC_LABEL_NONLAZY_CATEGORY_$", - "__DATA, __objc_nlcatlist, regular, no_dead_strip"); + GetSectionName("__objc_nlcatlist", + "regular,no_dead_strip")); EmitImageInfo(); } @@ -6397,15 +6392,15 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, llvm::GlobalVariable *PTGV = CGM.getModule().getGlobalVariable(ProtocolName); if (PTGV) return CGF.Builder.CreateAlignedLoad(PTGV, Align); - PTGV = new llvm::GlobalVariable( - CGM.getModule(), - Init->getType(), false, - llvm::GlobalValue::WeakAnyLinkage, - Init, - ProtocolName); - PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip"); + PTGV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, + llvm::GlobalValue::WeakAnyLinkage, Init, + ProtocolName); + PTGV->setSection(GetSectionName("__objc_protorefs", + "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); PTGV->setAlignment(Align.getQuantity()); + if (!CGM.getTriple().isOSBinFormatMachO()) + PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName)); CGM.addCompilerUsedGlobal(PTGV); return CGF.Builder.CreateAlignedLoad(PTGV, Align); } @@ -6862,8 +6857,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef)); PTGV->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy)); - if (CGM.getTriple().isOSBinFormatMachO()) - PTGV->setSection("__DATA, __objc_protolist, coalesced, no_dead_strip"); + PTGV->setSection(GetSectionName("__objc_protolist", + "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.addCompilerUsedGlobal(PTGV); return Entry; @@ -7059,7 +7054,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, /*constant*/ false, llvm::GlobalValue::WeakAnyLinkage); messageRef->setVisibility(llvm::GlobalValue::HiddenVisibility); - messageRef->setSection("__DATA, __objc_msgrefs, coalesced"); + messageRef->setSection(GetSectionName("__objc_msgrefs", "coalesced")); } bool requiresnullCheck = false; @@ -7089,7 +7084,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, CGCallee callee(CGCalleeInfo(), calleePtr); RValue result = CGF.EmitCall(MSI.CallInfo, callee, returnSlot, args); - return nullReturn.complete(CGF, result, resultType, formalArgs, + return nullReturn.complete(CGF, returnSlot, result, resultType, formalArgs, requiresnullCheck ? method : nullptr); } @@ -7170,7 +7165,8 @@ CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, false, llvm::GlobalValue::PrivateLinkage, ClassGV, "OBJC_CLASSLIST_REFERENCES_$_"); Entry->setAlignment(Align.getQuantity()); - Entry->setSection("__DATA, __objc_classrefs, regular, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_classrefs", + "regular,no_dead_strip")); CGM.addCompilerUsedGlobal(Entry); } return CGF.Builder.CreateAlignedLoad(Entry, Align); @@ -7204,7 +7200,8 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF, false, llvm::GlobalValue::PrivateLinkage, ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); Entry->setAlignment(Align.getQuantity()); - Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_superrefs", + "regular,no_dead_strip")); CGM.addCompilerUsedGlobal(Entry); } return CGF.Builder.CreateAlignedLoad(Entry, Align); @@ -7226,7 +7223,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF, MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); Entry->setAlignment(Align.getQuantity()); - Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_superrefs", + "regular,no_dead_strip")); CGM.addCompilerUsedGlobal(Entry); } @@ -7322,7 +7320,8 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF, false, llvm::GlobalValue::PrivateLinkage, Casted, "OBJC_SELECTOR_REFERENCES_"); Entry->setExternallyInitialized(true); - Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_selrefs", + "literal_pointers,no_dead_strip")); Entry->setAlignment(Align.getQuantity()); CGM.addCompilerUsedGlobal(Entry); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp index 3da7ed2..4cfddcb 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -26,61 +26,27 @@ using namespace clang; using namespace CodeGen; -static uint64_t LookupFieldBitOffset(CodeGen::CodeGenModule &CGM, - const ObjCInterfaceDecl *OID, - const ObjCImplementationDecl *ID, - const ObjCIvarDecl *Ivar) { - const ObjCInterfaceDecl *Container = Ivar->getContainingInterface(); - - // FIXME: We should eliminate the need to have ObjCImplementationDecl passed - // in here; it should never be necessary because that should be the lexical - // decl context for the ivar. - - // If we know have an implementation (and the ivar is in it) then - // look up in the implementation layout. - const ASTRecordLayout *RL; - if (ID && declaresSameEntity(ID->getClassInterface(), Container)) - RL = &CGM.getContext().getASTObjCImplementationLayout(ID); - else - RL = &CGM.getContext().getASTObjCInterfaceLayout(Container); - - // Compute field index. - // - // FIXME: The index here is closely tied to how ASTContext::getObjCLayout is - // implemented. This should be fixed to get the information from the layout - // directly. - unsigned Index = 0; - - for (const ObjCIvarDecl *IVD = Container->all_declared_ivar_begin(); - IVD; IVD = IVD->getNextIvar()) { - if (Ivar == IVD) - break; - ++Index; - } - assert(Index < RL->getFieldCount() && "Ivar is not inside record layout!"); - - return RL->getFieldOffset(Index); -} - uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM, const ObjCInterfaceDecl *OID, const ObjCIvarDecl *Ivar) { - return LookupFieldBitOffset(CGM, OID, nullptr, Ivar) / - CGM.getContext().getCharWidth(); + return CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar) / + CGM.getContext().getCharWidth(); } uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM, const ObjCImplementationDecl *OID, const ObjCIvarDecl *Ivar) { - return LookupFieldBitOffset(CGM, OID->getClassInterface(), OID, Ivar) / - CGM.getContext().getCharWidth(); + return CGM.getContext().lookupFieldBitOffset(OID->getClassInterface(), OID, + Ivar) / + CGM.getContext().getCharWidth(); } unsigned CGObjCRuntime::ComputeBitfieldBitOffset( CodeGen::CodeGenModule &CGM, const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) { - return LookupFieldBitOffset(CGM, ID, ID->getImplementation(), Ivar); + return CGM.getContext().lookupFieldBitOffset(ID, ID->getImplementation(), + Ivar); } LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, @@ -90,7 +56,11 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, unsigned CVRQualifiers, llvm::Value *Offset) { // Compute (type*) ( (char *) BaseValue + Offset) - QualType IvarTy = Ivar->getType().withCVRQualifiers(CVRQualifiers); + QualType InterfaceTy{OID->getTypeForDecl(), 0}; + QualType ObjectPtrTy = + CGF.CGM.getContext().getObjCObjectPointerType(InterfaceTy); + QualType IvarTy = + Ivar->getUsageType(ObjectPtrTy).withCVRQualifiers(CVRQualifiers); llvm::Type *LTy = CGF.CGM.getTypes().ConvertTypeForMem(IvarTy); llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, CGF.Int8PtrTy); V = CGF.Builder.CreateInBoundsGEP(V, Offset, "add.ptr"); @@ -115,7 +85,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, // Note, there is a subtle invariant here: we can only call this routine on // non-synthesized ivars but we may be called for synthesized ivars. However, // a synthesized ivar can never be a bit-field, so this is safe. - uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, nullptr, Ivar); + uint64_t FieldBitOffset = + CGF.CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar); uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth(); uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign(); uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext()); @@ -138,7 +109,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, Addr = CGF.Builder.CreateElementBitCast(Addr, llvm::Type::getIntNTy(CGF.getLLVMContext(), Info->StorageSize)); - return LValue::MakeBitfield(Addr, *Info, IvarTy, AlignmentSource::Decl); + return LValue::MakeBitfield(Addr, *Info, IvarTy, + LValueBaseInfo(AlignmentSource::Decl, false)); } namespace { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp index 9062936..db02c63 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -58,9 +58,6 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { case BuiltinType::OCLQueue: return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.queue_t"), 0); - case BuiltinType::OCLNDRange: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.ndrange_t"), 0); case BuiltinType::OCLReserveID: return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 4025217..9f8aa6c 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -15,7 +15,7 @@ #include "CGCleanup.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" @@ -264,6 +264,13 @@ public: return nullptr; } + /// \brief Get an LValue for the current ThreadID variable. + LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { + if (OuterRegionInfo) + return OuterRegionInfo->getThreadIDVariableLValue(CGF); + llvm_unreachable("No LValue for inlined OpenMP construct"); + } + /// \brief Get the name of the capture helper. StringRef getHelperName() const override { if (auto *OuterRegionInfo = getOldCSI()) @@ -643,6 +650,12 @@ enum OpenMPRTLFunction { // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); OMPRTL__kmpc_doacross_wait, + // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + OMPRTL__kmpc_task_reduction_init, + // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + OMPRTL__kmpc_task_reduction_get_th_data, // // Offloading related calls @@ -697,6 +710,419 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { } } +/// Check if the combiner is a call to UDR combiner and if it is so return the +/// UDR decl used for reduction. +static const OMPDeclareReductionDecl * +getReductionInit(const Expr *ReductionOp) { + if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (auto *DRE = + dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) + if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) + return DRD; + return nullptr; +} + +static void emitInitWithReductionInitializer(CodeGenFunction &CGF, + const OMPDeclareReductionDecl *DRD, + const Expr *InitOp, + Address Private, Address Original, + QualType Ty) { + if (DRD->getInitializer()) { + std::pair<llvm::Function *, llvm::Function *> Reduction = + CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); + auto *CE = cast<CallExpr>(InitOp); + auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); + const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); + const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); + auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); + auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), + [=]() -> Address { return Private; }); + PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), + [=]() -> Address { return Original; }); + (void)PrivateScope.Privatize(); + RValue Func = RValue::get(Reduction.second); + CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); + CGF.EmitIgnoredExpr(InitOp); + } else { + llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".init"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); + RValue InitRVal; + switch (CGF.getEvaluationKind(Ty)) { + case TEK_Scalar: + InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + break; + case TEK_Complex: + InitRVal = + RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + break; + case TEK_Aggregate: + InitRVal = RValue::getAggregate(LV.getAddress()); + break; + } + OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); + CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), + /*IsInitializer=*/false); + } +} + +/// \brief Emit initialization of arrays of complex types. +/// \param DestAddr Address of the array. +/// \param Type Type of array. +/// \param Init Initial expression of array. +/// \param SrcAddr Address of the original array. +static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, + QualType Type, bool EmitDeclareReductionInit, + const Expr *Init, + const OMPDeclareReductionDecl *DRD, + Address SrcAddr = Address::invalid()) { + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + DestAddr = + CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + if (DRD) + SrcAddr = + CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + + llvm::Value *SrcBegin = nullptr; + if (DRD) + SrcBegin = SrcAddr.getPointer(); + auto DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *SrcElementPHI = nullptr; + Address SrcElementCurrent = Address::invalid(); + if (DRD) { + SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, + "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + } + llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + { + CodeGenFunction::RunCleanupsScope InitScope(CGF); + if (EmitDeclareReductionInit) { + emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, + SrcElementCurrent, ElementTy); + } else + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + if (DRD) { + // Shift the address forward by one element. + auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); + } + + // Shift the address forward by one element. + auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + +LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) + return CGF.EmitOMPArraySectionExpr(OASE); + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) + return CGF.EmitLValue(ASE); + auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + CGF.CapturedStmtInfo && + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + E->getType(), VK_LValue, E->getExprLoc()); + // Store the address of the original variable associated with the LHS + // implicit variable. + return CGF.EmitLValue(&DRE); +} + +LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, + const Expr *E) { + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) + return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + return LValue(); +} + +void ReductionCodeGen::emitAggregateInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + bool EmitDeclareReductionInit = + DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); + EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), + EmitDeclareReductionInit, + EmitDeclareReductionInit ? ClausesData[N].ReductionOp + : PrivateVD->getInit(), + DRD, SharedLVal.getAddress()); +} + +ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> ReductionOps) { + ClausesData.reserve(Shareds.size()); + SharedAddresses.reserve(Shareds.size()); + Sizes.reserve(Shareds.size()); + BaseDecls.reserve(Shareds.size()); + auto IPriv = Privates.begin(); + auto IRed = ReductionOps.begin(); + for (const auto *Ref : Shareds) { + ClausesData.emplace_back(Ref, *IPriv, *IRed); + std::advance(IPriv, 1); + std::advance(IRed, 1); + } +} + +void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { + assert(SharedAddresses.size() == N && + "Number of generated lvalues must be exactly N."); + SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), + emitSharedLValueUB(CGF, ClausesData[N].Ref)); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + Sizes.emplace_back( + CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()), + nullptr); + return; + } + llvm::Value *Size; + llvm::Value *SizeInChars; + llvm::Type *ElemType = + cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) + ->getElementType(); + auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); + if (AsArraySection) { + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), + SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreateNUWAdd( + Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); + } else { + SizeInChars = CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()); + Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); + } + Sizes.emplace_back(SizeInChars, Size); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast<OpaqueValueExpr>( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, + llvm::Value *Size) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + assert(!Size && !Sizes[N].second && + "Size should be nullptr for non-variably modified redution " + "items."); + return; + } + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast<OpaqueValueExpr>( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { + assert(SharedAddresses.size() > N && "No variable was generated"); + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + QualType PrivateType = PrivateVD->getType(); + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + QualType SharedType = SharedAddresses[N].first.getType(); + SharedLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.ConvertTypeForMem(SharedType)), + SharedType, SharedAddresses[N].first.getBaseInfo()); + if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) || + CGF.getContext().getAsArrayType(PrivateVD->getType())) { + emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); + } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, + PrivateAddr, SharedLVal.getAddress(), + SharedLVal.getType()); + } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && + !CGF.isTrivialInitializer(PrivateVD->getInit())) { + CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, + PrivateVD->getType().getQualifiers(), + /*IsInitializer=*/false); + } +} + +bool ReductionCodeGen::needCleanups(unsigned N) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + return DTorKind != QualType::DK_none; +} + +void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + if (needCleanups(N)) { + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); + } +} + +static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + LValue BaseLV) { + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + if (auto *PtrTy = BaseTy->getAs<PointerType>()) + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + else { + BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), + BaseTy->castAs<ReferenceType>()); + } + BaseTy = BaseTy->getPointeeType(); + } + return CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.ConvertTypeForMem(ElTy)), + BaseLV.getType(), BaseLV.getBaseInfo()); +} + +static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + llvm::Type *BaseLVType, CharUnits BaseLVAlignment, + llvm::Value *Addr) { + Address Tmp = Address::invalid(); + Address TopTmp = Address::invalid(); + Address MostTopTmp = Address::invalid(); + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + Tmp = CGF.CreateMemTemp(BaseTy); + if (TopTmp.isValid()) + CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); + else + MostTopTmp = Tmp; + TopTmp = Tmp; + BaseTy = BaseTy->getPointeeType(); + } + llvm::Type *Ty = BaseLVType; + if (Tmp.isValid()) + Ty = Tmp.getElementType(); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); + if (Tmp.isValid()) { + CGF.Builder.CreateStore(Addr, Tmp); + return MostTopTmp; + } + return Address(Addr, BaseLVAlignment); +} + +Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + const DeclRefExpr *DE; + const VarDecl *OrigVD = nullptr; + if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) { + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast<DeclRefExpr>(Base); + OrigVD = cast<VarDecl>(DE->getDecl()); + } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast<DeclRefExpr>(Base); + OrigVD = cast<VarDecl>(DE->getDecl()); + } + if (OrigVD) { + BaseDecls.emplace_back(OrigVD); + auto OriginalBaseLValue = CGF.EmitLValue(DE); + LValue BaseLValue = + loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), + OriginalBaseLValue); + llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( + BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + llvm::Value *Ptr = + CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); + return castToBase(CGF, OrigVD->getType(), + SharedAddresses[N].first.getType(), + OriginalBaseLValue.getPointer()->getType(), + OriginalBaseLValue.getAlignment(), Ptr); + } + BaseDecls.emplace_back( + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); + return PrivateAddr; +} + +bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + return DRD && DRD->getInitializer(); +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), @@ -720,7 +1146,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( CodeGenFunction &CGF) { return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) @@ -728,7 +1154,7 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, - CGM.Int8PtrTy /* psource */, nullptr); + CGM.Int8PtrTy /* psource */); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); @@ -747,9 +1173,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, QualType PtrTy = C.getPointerType(Ty).withRestrict(); FunctionArgList Args; ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), - /*Id=*/nullptr, PtrTy); + /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), - /*Id=*/nullptr, PtrTy); + /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); Args.push_back(&OmpOutParm); Args.push_back(&OmpInParm); auto &FnInfo = @@ -760,6 +1186,7 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. @@ -842,12 +1269,12 @@ static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); } -llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { +static llvm::Value *emitParallelOrTeamsOutlinedFunction( + CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); - const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); bool HasCancel = false; if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) @@ -857,11 +1284,27 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, - HasCancel, getOutlinedHelperName()); + HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } +llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); + return emitParallelOrTeamsOutlinedFunction( + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); +} + +llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); + return emitParallelOrTeamsOutlinedFunction( + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); +} + llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, @@ -1537,6 +1980,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; } + case OMPRTL__kmpc_task_reduction_init: { + // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); + break; + } + case OMPRTL__kmpc_task_reduction_get_th_data: { + // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -1791,8 +2254,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( // threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1822,8 +2285,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( // of the variable VD CodeGenFunction DtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1887,6 +2350,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } +Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name) { + llvm::Twine VarName(Name, ".artificial."); + llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); + llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, SourceLocation()), + getThreadID(CGF, SourceLocation()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), + CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, + /*IsSigned=*/false), + getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; + return Address( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + VarLVType->getPointerTo(/*AddrSpace=*/0)), + CGM.getPointerAlign()); +} + /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { @@ -2174,10 +2658,8 @@ static llvm::Value *emitCopyprivateCopyFunction( auto &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); - ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); + ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); @@ -2450,16 +2932,14 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, return Schedule | Modifier; } -void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, - SourceLocation Loc, - const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, - bool Ordered, llvm::Value *UB, - llvm::Value *Chunk) { +void CGOpenMPRuntime::emitForDispatchInit( + CodeGenFunction &CGF, SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, + bool Ordered, const DispatchRTInput &DispatchValues) { if (!CGF.HaveInsertPoint()) return; - OpenMPSchedType Schedule = - getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); + OpenMPSchedType Schedule = getRuntimeSchedule( + ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && @@ -2470,14 +2950,14 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, // kmp_int[32|64] stride, kmp_int[32|64] chunk); // If the Chunk was not specified in the clause - use default value 1. - if (Chunk == nullptr) - Chunk = CGF.Builder.getIntN(IVSize, 1); + llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk + : CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - CGF.Builder.getIntN(IVSize, 0), // Lower - UB, // Upper + DispatchValues.LB, // Lower + DispatchValues.UB, // Upper CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; @@ -2686,6 +3166,8 @@ enum KmpTaskTFields { KmpTaskTStride, /// (Taskloops only) Is last iteration flag. KmpTaskTLastIter, + /// (Taskloops only) Reduction data. + KmpTaskTReductions, }; } // anonymous namespace @@ -2770,8 +3252,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, const RegionCodeGenTy &Codegen) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&DummyPtr); CodeGenFunction CGF(CGM); @@ -2874,7 +3355,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // descriptor, so we can reuse the logic that emits Ctors and Dtors. auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), - IdentInfo, C.CharTy); + IdentInfo, C.CharTy, ImplicitParamDecl::Other); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_unreg", @@ -2889,6 +3370,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); + if (CGM.supportsCOMDAT()) { + // It is sufficient to call registration function only once, so create a + // COMDAT group for registration/unregistration functions and associated + // data. That would reduce startup time and code size. Registration + // function serves as a COMDAT group key. + auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); + RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); + RegFn->setComdat(ComdatKey); + UnRegFn->setComdat(ComdatKey); + DeviceImages->setComdat(ComdatKey); + Desc->setComdat(ComdatKey); + } return RegFn; } @@ -2958,7 +3452,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create the offloading info metadata node. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - // Auxiliar methods to create metadata values and strings. + // Auxiliary methods to create metadata values and strings. auto getMDInt = [&](unsigned v) { return llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); @@ -3225,6 +3719,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_uint64 ub; // kmp_int64 st; // kmp_int32 liter; + // void * reductions; // }; auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); @@ -3248,6 +3743,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, addFieldToRecordDecl(C, RD, KmpUInt64Ty); addFieldToRecordDecl(C, RD, KmpInt64Ty); addFieldToRecordDecl(C, RD, KmpInt32Ty); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); } RD->completeDefinition(); return RD; @@ -3278,7 +3774,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// For taskloops: /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, -/// tt->shareds); +/// tt->reductions, tt->shareds); /// return 0; /// } /// \endcode @@ -3291,10 +3787,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, llvm::Value *TaskPrivatesMap) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); - ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, - KmpTaskTWithPrivatesPtrQTy.withRestrict()); + ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy.withRestrict(), + ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); auto &TaskEntryFnInfo = @@ -3363,10 +3860,14 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); auto LILVal = CGF.EmitLValueForField(Base, *LIFI); auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); + auto RLVal = CGF.EmitLValueForField(Base, *RFI); + auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); CallArgs.push_back(LIParam); + CallArgs.push_back(RParam); } CallArgs.push_back(SharedsParam); @@ -3385,10 +3886,11 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, QualType KmpTaskTWithPrivatesQTy) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); - ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, - KmpTaskTWithPrivatesPtrQTy.withRestrict()); + ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy.withRestrict(), + ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; @@ -3444,36 +3946,40 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, FunctionArgList Args; ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.getPointerType(PrivatesQTy).withConst().withRestrict()); + C.getPointerType(PrivatesQTy).withConst().withRestrict(), + ImplicitParamDecl::Other); Args.push_back(&TaskPrivatesArg); llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; unsigned Counter = 1; for (auto *E: PrivateVars) { Args.push_back(ImplicitParamDecl::Create( - C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) - .withConst() - .withRestrict())); + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict(), + ImplicitParamDecl::Other)); auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } for (auto *E : FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( - C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) - .withConst() - .withRestrict())); + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict(), + ImplicitParamDecl::Other)); auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } for (auto *E: LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( - C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) - .withConst() - .withRestrict())); + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict(), + ImplicitParamDecl::Other)); auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3488,6 +3994,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); + TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); @@ -3551,7 +4058,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF, auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), - SharedRefLValue.getType(), AlignmentSource::Decl); + SharedRefLValue.getType(), + LValueBaseInfo(AlignmentSource::Decl, + SharedRefLValue.getBaseInfo().getMayAlias())); QualType Type = OriginalVD->getType(); if (Type->isArrayType()) { // Initialize firstprivate array. @@ -3561,7 +4070,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, SharedRefLValue.getAddress(), Type); } else { // Initialize firstprivate array using element-by-element - // intialization. + // initialization. CGF.EmitOMPAggregateAssign( PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, @@ -3630,12 +4139,14 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); - ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); - ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.IntTy); + ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy, + ImplicitParamDecl::Other); + ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy, + ImplicitParamDecl::Other); + ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); Args.push_back(&DstArg); Args.push_back(&SrcArg); Args.push_back(&LastprivArg); @@ -3745,9 +4256,20 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). - if (KmpTaskTQTy.isNull()) { - KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( - CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); + if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { + if (SavedKmpTaskloopTQTy.isNull()) { + SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( + CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); + } + KmpTaskTQTy = SavedKmpTaskloopTQTy; + } else if (D.getDirectiveKind() == OMPD_task) { + assert(D.getDirectiveKind() == OMPD_task && + "Expected taskloop or task directive"); + if (SavedKmpTaskTQTy.isNull()) { + SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( + CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); + } + KmpTaskTQTy = SavedKmpTaskTQTy; } auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. @@ -3764,9 +4286,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; auto *TaskPrivatesMapTy = - std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), - 3) - ->getType(); + std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( @@ -4006,8 +4526,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, - NumDependencies, &TaskArgs, + auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, + &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { if (!Data.Tied) { auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); @@ -4121,11 +4641,27 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); + // Store reductions address. + LValue RedLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); + if (Data.Reductions) + CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + else { + CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.getContext().VoidPtrTy); + } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { - UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), - UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), - llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), + UpLoc, + ThreadID, + Result.NewTask, + IfVal, + LBLVal.getPointer(), + UBLVal.getPointer(), + CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + llvm::ConstantInt::getNullValue( + CGF.IntTy), // Always 0 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -4134,10 +4670,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), - Result.TaskDupFn - ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, - CGF.VoidPtrTy) - : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Result.TaskDupFn, CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } @@ -4241,20 +4776,16 @@ static void emitReductionCombiner(CodeGenFunction &CGF, CGF.EmitIgnoredExpr(ReductionOp); } -static llvm::Value *emitReductionFunction(CodeGenModule &CGM, - llvm::Type *ArgsType, - ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, - ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps) { +llvm::Value *CGOpenMPRuntime::emitReductionFunction( + CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps) { auto &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); - ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); + ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); @@ -4329,11 +4860,11 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, return Fn; } -static void emitSingleReductionCombiner(CodeGenFunction &CGF, - const Expr *ReductionOp, - const Expr *PrivateRef, - const DeclRefExpr *LHS, - const DeclRefExpr *RHS) { +void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp, + const Expr *PrivateRef, + const DeclRefExpr *LHS, + const DeclRefExpr *RHS) { if (PrivateRef->getType()->isArrayType()) { // Emit reduction for array section. auto *LHSVar = cast<VarDecl>(LHS->getDecl()); @@ -4353,9 +4884,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps, - bool WithNowait, bool SimpleReduction) { + ReductionOptionsTy Options) { if (!CGF.HaveInsertPoint()) return; + + bool WithNowait = Options.WithNowait; + bool SimpleReduction = Options.SimpleReduction; + // Next code should be emitted for reduction: // // static kmp_critical_name lock = { 0 }; @@ -4497,12 +5032,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, }; auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &RT = CGF.CGM.getOpenMPRuntime(); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), - cast<DeclRefExpr>(*IRHS)); + RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); ++IPriv; ++ILHS; ++IRHS; @@ -4562,7 +5098,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } if (XExpr) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto &&AtomicRedGen = [BO, VD, IPriv, + auto &&AtomicRedGen = [BO, VD, Loc](CodeGenFunction &CGF, const Expr *XExpr, const Expr *EExpr, const Expr *UpExpr) { LValue X = CGF.EmitLValue(XExpr); @@ -4572,7 +5108,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitOMPAtomicSimpleUpdateExpr( X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::AtomicOrdering::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { + [&CGF, UpExpr, VD, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate( VD, [&CGF, VD, XRValue, Loc]() -> Address { @@ -4640,6 +5176,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } +/// Generates unique name for artificial threadprivate variables. +/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> +static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, + unsigned N) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; + return Out.str(); +} + +/// Emits reduction initializer function: +/// \code +/// void @.red_init(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// store <type> <init>, <type>* %0 +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_init.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + LValue SharedLVal; + // If initializer uses initializer from declare reduction construct, emit a + // pointer to the address of the original reduction item (reuired by reduction + // initializer) + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = + CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); + } else { + SharedLVal = CGF.MakeNaturalAlignAddrLValue( + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + CGM.getContext().VoidPtrTy); + } + // Emit the initializer: + // %0 = bitcast void* %arg to <type>* + // store <type> <init>, <type>* %0 + RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, + [](CodeGenFunction &) { return false; }); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction combiner function: +/// \code +/// void @.red_comb(void* %arg0, void* %arg1) { +/// %lhs = bitcast void* %arg0 to <type>* +/// %rhs = bitcast void* %arg1 to <type>* +/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) +/// store <type> %2, <type>* %lhs +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N, + const Expr *ReductionOp, + const Expr *LHS, const Expr *RHS, + const Expr *PrivateRef) { + auto &C = CGM.getContext(); + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); + FunctionArgList Args; + ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&ParamInOut); + Args.emplace_back(&ParamIn); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_comb.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Remap lhs and rhs variables to the addresses of the function arguments. + // %lhs = bitcast void* %arg0 to <type>* + // %rhs = bitcast void* %arg1 to <type>* + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamInOut), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); + }); + PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamIn), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); + }); + PrivateScope.Privatize(); + // Emit the combiner body: + // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) + // store <type> %2, <type>* %lhs + CGM.getOpenMPRuntime().emitSingleReductionCombiner( + CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), + cast<DeclRefExpr>(RHS)); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction finalizer function: +/// \code +/// void @.red_fini(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// <destroy>(<type>* %0) +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + if (!RCG.needCleanups(N)) + return nullptr; + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_fini.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Emit the finalizer body: + // <destroy>(<type>* %0) + RCG.emitCleanups(CGF, N, PrivateAddr); + CGF.FinishFunction(); + return Fn; +} + +llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) + return nullptr; + + // Build typedef struct: + // kmp_task_red_input { + // void *reduce_shar; // shared reduction item + // size_t reduce_size; // size of data item + // void *reduce_init; // data initialization routine + // void *reduce_fini; // data finalization routine + // void *reduce_comb; // data combiner routine + // kmp_task_red_flags_t flags; // flags for additional info from compiler + // } kmp_task_red_input_t; + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RD->startDefinition(); + const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); + const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FlagsFD = addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); + RD->completeDefinition(); + QualType RDType = C.getRecordType(RD); + unsigned Size = Data.ReductionVars.size(); + llvm::APInt ArraySize(/*numBits=*/64, Size); + QualType ArrayRDType = C.getConstantArrayType( + RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + // kmp_task_red_input_t .rd_input.[Size]; + Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { + // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; + llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), + llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; + llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( + TaskRedInput.getPointer(), Idxs, + /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, + ".rd_input.gep."); + LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); + // ElemLVal.reduce_shar = &Shareds[Cnt]; + LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); + RCG.emitSharedLValue(CGF, Cnt); + llvm::Value *CastedShared = + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + RCG.emitAggregateType(CGF, Cnt); + llvm::Value *SizeValInChars; + llvm::Value *SizeVal; + std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); + // We use delayed creation/initialization for VLAs, array sections and + // custom reduction initializations. It is required because runtime does not + // provide the way to pass the sizes of VLAs/array sections to + // initializer/combiner/finalizer functions and does not pass the pointer to + // original reduction item to the initializer. Instead threadprivate global + // variables are used to store these values and use them in the functions. + bool DelayedCreation = !!SizeVal; + SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, + /*isSigned=*/false); + LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); + CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); + // ElemLVal.reduce_init = init; + LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); + llvm::Value *InitAddr = + CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); + CGF.EmitStoreOfScalar(InitAddr, InitLVal); + DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); + // ElemLVal.reduce_fini = fini; + LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); + llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); + llvm::Value *FiniAddr = Fini + ? CGF.EmitCastToVoidPtr(Fini) + : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); + // ElemLVal.reduce_comb = comb; + LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); + llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( + CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], + RHSExprs[Cnt], Data.ReductionCopies[Cnt])); + CGF.EmitStoreOfScalar(CombAddr, CombLVal); + // ElemLVal.flags = 0; + LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); + if (DelayedCreation) { + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), + FlagsLVal); + } else + CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + } + // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), + CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); +} + +void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, + SourceLocation Loc, + ReductionCodeGen &RCG, + unsigned N) { + auto Sizes = RCG.getSizes(N); + // Emit threadprivate global variable if the type is non-constant + // (Sizes.second = nullptr). + if (Sizes.second) { + llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, + /*isSigned=*/false); + Address SizeAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); + } + // Store address of the original reduction item if custom initializer is used. + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + SharedAddr, /*IsVolatile=*/false); + } +} + +Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) { + // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), + CGM.VoidPtrTy)}; + return Address( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), + SharedLVal.getAlignment()); +} + void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) @@ -4874,25 +5757,45 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) { return Body; } -/// \brief Emit the num_teams clause of an enclosed teams directive at the -/// target region scope. If there is no teams directive associated with the -/// target directive, or if there is no num_teams clause associated with the -/// enclosed teams directive, return nullptr. +/// Emit the number of teams for a target directive. Inspect the num_teams +/// clause associated with a teams construct combined or closely nested +/// with the target directive. +/// +/// Emit a team of size one for directives such as 'target parallel' that +/// have no associated teams construct. +/// +/// Otherwise, return nullptr. static llvm::Value * -emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, - const OMPExecutableDirective &D) { +emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - // FIXME: For the moment we do not support combined directives with target and - // teams, so we do not expect to get any num_teams clause in the provided - // directive. Once we support that, this assertion can be replaced by the - // actual emission of the clause expression. - assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && - "Not expecting clause in directive."); + auto &Bld = CGF.Builder; + + // If the target directive is combined with a teams directive: + // Return the value in the num_teams clause, if any. + // Otherwise, return 0 to denote the runtime default. + if (isOpenMPTeamsDirective(D.getDirectiveKind())) { + if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { + CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); + auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // The default value is 0. + return Bld.getInt32(0); + } + + // If the target directive is combined with a parallel directive but not a + // teams directive, start one team. + if (isOpenMPParallelDirective(D.getDirectiveKind())) + return Bld.getInt32(1); // If the current target region has a teams region enclosed, we need to get // the number of teams to pass to the runtime function call. This is done @@ -4910,38 +5813,92 @@ emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); } // If we have an enclosed teams directive but no num_teams clause we use // the default value 0. - return CGF.Builder.getInt32(0); + return Bld.getInt32(0); } // No teams associated with the directive. return nullptr; } -/// \brief Emit the thread_limit clause of an enclosed teams directive at the -/// target region scope. If there is no teams directive associated with the -/// target directive, or if there is no thread_limit clause associated with the -/// enclosed teams directive, return nullptr. +/// Emit the number of threads for a target directive. Inspect the +/// thread_limit clause associated with a teams construct combined or closely +/// nested with the target directive. +/// +/// Emit the num_threads clause for directives such as 'target parallel' that +/// have no associated teams construct. +/// +/// Otherwise, return nullptr. static llvm::Value * -emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, - const OMPExecutableDirective &D) { +emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - // FIXME: For the moment we do not support combined directives with target and - // teams, so we do not expect to get any thread_limit clause in the provided - // directive. Once we support that, this assertion can be replaced by the - // actual emission of the clause expression. - assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && - "Not expecting clause in directive."); + auto &Bld = CGF.Builder; + + // + // If the target directive is combined with a teams directive: + // Return the value in the thread_limit clause, if any. + // + // If the target directive is combined with a parallel directive: + // Return the value in the num_threads clause, if any. + // + // If both clauses are set, select the minimum of the two. + // + // If neither teams or parallel combined directives set the number of threads + // in a team, return 0 to denote the runtime default. + // + // If this is not a teams directive return nullptr. + + if (isOpenMPTeamsDirective(D.getDirectiveKind()) || + isOpenMPParallelDirective(D.getDirectiveKind())) { + llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); + llvm::Value *NumThreadsVal = nullptr; + llvm::Value *ThreadLimitVal = nullptr; + + if (const auto *ThreadLimitClause = + D.getSingleClause<OMPThreadLimitClause>()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), + /*IgnoreResultAssign*/ true); + ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } + + if (const auto *NumThreadsClause = + D.getSingleClause<OMPNumThreadsClause>()) { + CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + llvm::Value *NumThreads = + CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign*/ true); + NumThreadsVal = + Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); + } + + // Select the lesser of thread_limit and num_threads. + if (NumThreadsVal) + ThreadLimitVal = ThreadLimitVal + ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, + ThreadLimitVal), + NumThreadsVal, ThreadLimitVal) + : NumThreadsVal; + + // Set default value passed to the runtime if either teams or a target + // parallel type directive is found but no clause is specified. + if (!ThreadLimitVal) + ThreadLimitVal = DefaultThreadLimitVal; + + return ThreadLimitVal; + } // If the current target region has a teams region enclosed, we need to get // the thread limit to pass to the runtime function call. This is done @@ -5494,7 +6451,7 @@ public: // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. - llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; // Helper function to fill the information map for the different supported // clauses. @@ -5818,16 +6775,11 @@ emitOffloadingArrays(CodeGenFunction &CGF, for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { llvm::Value *BPVal = *BasePointers[i]; - if (BPVal->getType()->isPointerTy()) - BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); - else { - assert(BPVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); - } llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.BasePointersArray, 0, i); + BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); @@ -5836,16 +6788,11 @@ emitOffloadingArrays(CodeGenFunction &CGF, Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); llvm::Value *PVal = Pointers[i]; - if (PVal->getType()->isPointerTy()) - PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); - else { - assert(PVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); - } llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.PointersArray, 0, i); + P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(PVal, PAddr); @@ -5984,8 +6931,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OffloadError); // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFnID, OffloadError, OffloadErrorQType, + auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, + OutlinedFnID, OffloadError, &D](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); // Emit the offloading arrays. @@ -6021,24 +6968,50 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Return value of the runtime offloading call. llvm::Value *Return; - auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); - auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); + auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); + auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); - // If we have NumTeams defined this means that we have an enclosed teams - // region. Therefore we also expect to have ThreadLimit defined. These two - // values should be defined in the presence of a teams directive, regardless - // of having any clauses associated. If the user is using teams but no - // clauses, these two values will be the default that should be passed to - // the runtime library - a 32-bit integer with the value zero. + // The target region is an outlined function launched by the runtime + // via calls __tgt_target() or __tgt_target_teams(). + // + // __tgt_target() launches a target region with one team and one thread, + // executing a serial region. This master thread may in turn launch + // more threads within its team upon encountering a parallel region, + // however, no additional teams can be launched on the device. + // + // __tgt_target_teams() launches a target region with one or more teams, + // each with one or more threads. This call is required for target + // constructs such as: + // 'target teams' + // 'target' / 'teams' + // 'target teams distribute parallel for' + // 'target parallel' + // and so on. + // + // Note that on the host and CPU targets, the runtime implementation of + // these calls simply call the outlined function without forking threads. + // The outlined functions themselves have runtime calls to + // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by + // the compiler in emitTeamsCall() and emitParallelCall(). + // + // In contrast, on the NVPTX target, the implementation of + // __tgt_target_teams() launches a GPU kernel with the requested number + // of teams and threads so no additional calls to the runtime are required. if (NumTeams) { - assert(ThreadLimit && "Thread limit expression should be available along " - "with number of teams."); + // If we have NumTeams defined this means that we have an enclosed teams + // region. Therefore we also expect to have NumThreads defined. These two + // values should be defined in the presence of a teams directive, + // regardless of having any clauses associated. If the user is using teams + // but no clauses, these two values will be the default that should be + // passed to the runtime library - a 32-bit integer with the value zero. + assert(NumThreads && "Thread limit expression should be available along " + "with number of teams."); llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, NumTeams, - ThreadLimit}; + NumThreads}; Return = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); } else { @@ -6095,17 +7068,18 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (!S) return; - // If we find a OMP target directive, codegen the outline function and - // register the result. - // FIXME: Add other directives with target when they become supported. - bool isTargetDirective = isa<OMPTargetDirective>(S); + // Codegen OMP target directives that offload compute to the device. + bool requiresDeviceCodegen = + isa<OMPExecutableDirective>(S) && + isOpenMPTargetExecutionDirective( + cast<OMPExecutableDirective>(S)->getDirectiveKind()); - if (isTargetDirective) { - auto *E = cast<OMPExecutableDirective>(S); + if (requiresDeviceCodegen) { + auto &E = *cast<OMPExecutableDirective>(S); unsigned DeviceID; unsigned FileID; unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, + getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, FileID, Line); // Is this a target region that should not be emitted as an entry point? If @@ -6114,13 +7088,22 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, ParentName, Line)) return; - llvm::Function *Fn; - llvm::Constant *Addr; - std::tie(Fn, Addr) = - CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( - CGM, cast<OMPTargetDirective>(*E), ParentName, - /*isOffloadEntry=*/true); - assert(Fn && Addr && "Target region emission failed."); + switch (S->getStmtClass()) { + case Stmt::OMPTargetDirectiveClass: + CodeGenFunction::EmitOMPTargetDeviceFunction( + CGM, ParentName, cast<OMPTargetDirective>(*S)); + break; + case Stmt::OMPTargetParallelDirectiveClass: + CodeGenFunction::EmitOMPTargetParallelDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); + break; + case Stmt::OMPTargetTeamsDirectiveClass: + CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); + break; + default: + llvm_unreachable("Unknown target directive for OpenMP device codegen."); + } return; } @@ -6182,7 +7165,7 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { } } - // If we are in target mode we do not emit any global (declare target is not + // If we are in target mode, we do not emit any global (declare target is not // implemented yet). Therefore we signal that GD was processed in this case. return true; } @@ -6271,8 +7254,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction]( - CodeGenFunction &CGF, PrePostActionTy &) { + auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, + PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; @@ -6318,8 +7301,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( }; // Generate code for the closing of the data region. - auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; @@ -6397,7 +7379,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( "Expecting either target enter, exit data, or update directives."); // Generate the code for the opening of the data environment. - auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h index 61ddc70..185c01d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -96,15 +96,106 @@ struct OMPTaskDataTy final { SmallVector<const Expr *, 4> FirstprivateInits; SmallVector<const Expr *, 4> LastprivateVars; SmallVector<const Expr *, 4> LastprivateCopies; + SmallVector<const Expr *, 4> ReductionVars; + SmallVector<const Expr *, 4> ReductionCopies; + SmallVector<const Expr *, 4> ReductionOps; SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; llvm::PointerIntPair<llvm::Value *, 1, bool> Final; llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule; llvm::PointerIntPair<llvm::Value *, 1, bool> Priority; + llvm::Value *Reductions = nullptr; unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; }; +/// Class intended to support codegen of all kind of the reduction clauses. +class ReductionCodeGen { +private: + /// Data required for codegen of reduction clauses. + struct ReductionData { + /// Reference to the original shared item. + const Expr *Ref = nullptr; + /// Helper expression for generation of private copy. + const Expr *Private = nullptr; + /// Helper expression for generation reduction operation. + const Expr *ReductionOp = nullptr; + ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp) + : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {} + }; + /// List of reduction-based clauses. + SmallVector<ReductionData, 4> ClausesData; + + /// List of addresses of original shared variables/expressions. + SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses; + /// Sizes of the reduction items in chars. + SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes; + /// Base declarations for the reduction items. + SmallVector<const VarDecl *, 4> BaseDecls; + + /// Emits lvalue for shared expresion. + LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E); + /// Emits upper bound for shared expression (if array section). + LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E); + /// Performs aggregate initialization. + /// \param N Number of reduction item in the common list. + /// \param PrivateAddr Address of the corresponding private item. + /// \param SharedLVal Address of the original shared variable. + /// \param DRD Declare reduction construct used for reduction item. + void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD); + +public: + ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> ReductionOps); + /// Emits lvalue for a reduction item. + /// \param N Number of the reduction item. + void emitSharedLValue(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + void emitAggregateType(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + /// \param Size Size of the type in chars. + void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size); + /// Performs initialization of the private copy for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + /// \param DefaultInit Default initialization sequence that should be + /// performed if no reduction specific initialization is found. + /// \param SharedLVal Address of the original shared variable. + void + emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, + LValue SharedLVal, + llvm::function_ref<bool(CodeGenFunction &)> DefaultInit); + /// Returns true if the private copy requires cleanups. + bool needCleanups(unsigned N); + /// Emits cleanup code for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr); + /// Adjusts \p PrivatedAddr for using instead of the original variable + /// address in normal operations. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr); + /// Returns LValue for the reduction item. + LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; } + /// Returns the size of the reduction item (in chars and total number of + /// elements in the item), or nullptr, if the size is a constant. + std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const { + return Sizes[N]; + } + /// Returns the base declaration of the reduction item. + const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; } + /// Returns true if the initialization of the reduction item uses initializer + /// from declare reduction construct. + bool usesReductionInitializer(unsigned N) const; +}; + class CGOpenMPRuntime { protected: CodeGenModule &CGM; @@ -121,7 +212,7 @@ protected: /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Lambda codegen specific to an accelerator device. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, @@ -222,6 +313,10 @@ private: /// deconstructors of firstprivate C++ objects */ /// } kmp_task_t; QualType KmpTaskTQTy; + /// Saved kmp_task_t for task directive. + QualType SavedKmpTaskTQTy; + /// Saved kmp_task_t for taskloop-based directive. + QualType SavedKmpTaskloopTQTy; /// \brief Type typedef struct kmp_depend_info { /// kmp_intptr_t base_addr; /// size_t len; @@ -527,6 +622,7 @@ public: /// Get combiner/initializer for the specified user-defined reduction, if any. virtual std::pair<llvm::Function *, llvm::Function *> getUserDefinedReduction(const OMPDeclareReductionDecl *D); + /// \brief Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). @@ -535,7 +631,19 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value *emitParallelOrTeamsOutlinedFunction( + virtual llvm::Value *emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + + /// \brief Emits outlined function for the specified OpenMP teams directive + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + virtual llvm::Value *emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); @@ -659,16 +767,50 @@ public: /// virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const; + /// struct with the values to be passed to the dispatch runtime function + struct DispatchRTInput { + /// Loop lower bound + llvm::Value *LB = nullptr; + /// Loop upper bound + llvm::Value *UB = nullptr; + /// Chunk size specified using 'schedule' clause (nullptr if chunk + /// was not specified) + llvm::Value *Chunk = nullptr; + DispatchRTInput() = default; + DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk) + : LB(LB), UB(UB), Chunk(Chunk) {} + }; + + /// Call the appropriate runtime routine to initialize it before start + /// of loop. + + /// This is used for non static scheduled types and when the ordered + /// clause is present on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime + /// routine before start of the OpenMP loop to get the loop upper / lower + /// bounds \a LB and \a UB and stride \a ST. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the iteration variable. + /// \param Ordered true if loop is ordered, false otherwise. + /// \param DispatchValues struct containing llvm values for lower bound, upper + /// bound, and chunk expression. + /// For the default (nullptr) value, the chunk 1 will be used. + /// virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, - llvm::Value *UB, - llvm::Value *Chunk = nullptr); + const DispatchRTInput &DispatchValues); /// \brief Call the appropriate runtime routine to initialize it before start /// of loop. /// - /// Depending on the loop schedule, it is nesessary to call some runtime + /// This is used only in case of static schedule, when the user did not + /// specify a ordered clause on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime /// routine before start of the OpenMP loop to get the loop upper / lower /// bounds \a LB and \a UB and stride \a ST. /// @@ -676,7 +818,7 @@ public: /// \param Loc Clang source location. /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. @@ -685,7 +827,7 @@ public: /// \param UB Address of the output variable in which the upper iteration /// number is returned. /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. + /// returned necessary to generated the static_chunked scheduled loop. /// \param Chunk Value of the chunk for the static_chunked scheduled loop. /// For the default (nullptr) value, the chunk 1 will be used. /// @@ -700,7 +842,7 @@ public: /// \param Loc Clang source location. /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. @@ -709,7 +851,7 @@ public: /// \param UB Address of the output variable in which the upper iteration /// number is returned. /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. + /// returned necessary to generated the static_chunked scheduled loop. /// \param Chunk Value of the chunk for the static_chunked scheduled loop. /// For the default (nullptr) value, the chunk 1 will be used. /// @@ -726,7 +868,7 @@ public: /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, @@ -745,7 +887,7 @@ public: /// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, /// kmp_int[32|64] *p_stride); /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. /// \param LB Address of the output variable in which the lower iteration @@ -797,6 +939,14 @@ public: SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); + /// Creates artificial threadprivate variable with name \p Name and type \p + /// VarType. + /// \param VarType Type of the artificial threadprivate variable. + /// \param Name Name of the artificial threadprivate variable. + virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name); + /// \brief Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, @@ -880,6 +1030,32 @@ public: OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel = false); + + /// Emits reduction function. + /// \param ArgsType Array type containing pointers to reduction variables. + /// \param Privates List of private copies for original reduction arguments. + /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. + /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' + /// or 'operator binop(LHS, RHS)'. + llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps); + + /// Emits single reduction combiner + void emitSingleReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp, + const Expr *PrivateRef, + const DeclRefExpr *LHS, + const DeclRefExpr *RHS); + + struct ReductionOptionsTy { + bool WithNowait; + bool SimpleReduction; + OpenMPDirectiveKind ReductionKind; + }; /// \brief Emit a code for reduction clause. Next code should be emitted for /// reduction: /// \code @@ -916,14 +1092,63 @@ public: /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. - /// \param WithNowait true if parent directive has also nowait clause, false - /// otherwise. + /// \param Options List of options for reduction codegen: + /// WithNowait true if parent directive has also nowait clause, false + /// otherwise. + /// SimpleReduction Emit reduction operation only. Used for omp simd + /// directive on the host. + /// ReductionKind The kind of reduction to perform. virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps, - bool WithNowait, bool SimpleReduction); + ReductionOptionsTy Options); + + /// Emit a code for initialization of task reduction clause. Next code + /// should be emitted for reduction: + /// \code + /// + /// _task_red_item_t red_data[n]; + /// ... + /// red_data[i].shar = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit<i>; + /// red_data[i].f_fini = (void*)RedDest<i>; + /// red_data[i].f_comb = (void*)RedOp<i>; + /// red_data[i].flags = <Flag_i>; + /// ... + /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// \endcode + /// + /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. + /// \param Data Additional data for task generation like tiedness, final + /// state, list of privates, reductions etc. + virtual llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, + SourceLocation Loc, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + const OMPTaskDataTy &Data); + + /// Required to resolve existing problems in the runtime. Emits threadprivate + /// variables to store the size of the VLAs/array sections for + /// initializer/combiner/finalizer functions + emits threadprivate variable to + /// store the pointer to the original reduction item for the custom + /// initializer defined by declare reduction construct. + /// \param RCG Allows to reuse an existing data for the reductions. + /// \param N Reduction item for which fixups must be emitted. + virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N); + + /// Get the address of `void *` type of the privatue copy of the reduction + /// item specified by the \p SharedLVal. + /// \param ReductionsPtr Pointer to the reduction data returned by the + /// emitTaskReductionInit function. + /// \param SharedLVal Address of the original reduction item. + virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal); /// \brief Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); @@ -952,7 +1177,7 @@ public: /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Code generation sequence for the \a D directive. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, @@ -991,7 +1216,7 @@ public: virtual bool emitTargetGlobalVariable(GlobalDecl GD); /// \brief Emit the global \a GD if it is meaningful for the target. Returns - /// if it was emitted succesfully. + /// if it was emitted successfully. /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 6a6d832..3ced05d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -26,6 +26,11 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_kernel_init, /// \brief Call to void __kmpc_kernel_deinit(); OMPRTL_NVPTX__kmpc_kernel_deinit, + /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + /// short RequiresOMPRuntime, short RequiresDataSharing); + OMPRTL_NVPTX__kmpc_spmd_kernel_init, + /// \brief Call to void __kmpc_spmd_kernel_deinit(); + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, /// \brief Call to void __kmpc_kernel_prepare_parallel(void /// *outlined_function); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, @@ -39,6 +44,30 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 /// global_tid); OMPRTL_NVPTX__kmpc_end_serialized_parallel, + /// \brief Call to int32_t __kmpc_shuffle_int32(int32_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int32, + /// \brief Call to int64_t __kmpc_shuffle_int64(int64_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int64, + /// \brief Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32 + /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); + OMPRTL_NVPTX__kmpc_parallel_reduce_nowait, + /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, + /// int32_t num_vars, size_t reduce_size, void *reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), + /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, + /// int32_t index, int32_t width), + /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t + /// index, int32_t width, int32_t reduce)) + OMPRTL_NVPTX__kmpc_teams_reduce_nowait, + /// \brief Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); + OMPRTL_NVPTX__kmpc_end_reduce_nowait }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -76,6 +105,47 @@ public: CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; + +// A class to track the execution mode when codegening directives within +// a target region. The appropriate mode (generic/spmd) is set on entry +// to the target region and used by containing directives such as 'parallel' +// to emit optimized code. +class ExecutionModeRAII { +private: + CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode; + CGOpenMPRuntimeNVPTX::ExecutionMode &Mode; + +public: + ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, + CGOpenMPRuntimeNVPTX::ExecutionMode NewMode) + : Mode(Mode) { + SavedMode = Mode; + Mode = NewMode; + } + ~ExecutionModeRAII() { Mode = SavedMode; } +}; + +/// GPU Configuration: This information can be derived from cuda registers, +/// however, providing compile time constants helps generate more efficient +/// code. For all practical purposes this is fine because the configuration +/// is the same for all known NVPTX architectures. +enum MachineConfiguration : unsigned { + WarpSize = 32, + /// Number of bits required to represent a lane identifier, which is + /// computed as log_2(WarpSize). + LaneIDBits = 5, + LaneIDMask = WarpSize - 1, + + /// Global memory alignment for performance. + GlobalMemoryAlignment = 256, +}; + +enum NamedBarrier : unsigned { + /// Synchronize on this barrier #ID using a named barrier primitive. + /// Only the subset of active threads in a parallel region arrive at the + /// barrier. + NB_Parallel = 1, +}; } // anonymous namespace /// Get the GPU warp size. @@ -96,6 +166,23 @@ static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { llvm::None, "nvptx_tid"); } +/// Get the id of the warp in the block. +/// We assume that the warp size is 32, which is always the case +/// on the NVPTX device, to generate more efficient code. +static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id"); +} + +/// Get the id of the current lane in the Warp. +/// We assume that the warp size is 32, which is always the case +/// on the NVPTX device, to generate more efficient code. +static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask), + "nvptx_lane_id"); +} + /// Get the maximum number of threads in a block of the GPU. static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; @@ -112,16 +199,37 @@ static void getNVPTXCTABarrier(CodeGenFunction &CGF) { &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } +/// Get barrier #ID to synchronize selected (multiple of warp size) threads in +/// a CTA. +static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, + llvm::Value *NumThreads) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads}; + Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(), + llvm::Intrinsic::nvvm_barrier), + Args); +} + /// Synchronize all GPU threads in a block. static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } +/// Synchronize worker threads in a parallel region. +static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) { + return getNVPTXBarrier(CGF, NB_Parallel, NumThreads); +} + /// Get the value of the thread_limit clause in the teams directive. -/// The runtime encodes thread_limit in the launch parameter, always starting -/// thread_limit+warpSize threads per team. -static llvm::Value *getThreadLimit(CodeGenFunction &CGF) { +/// For the 'generic' execution mode, the runtime encodes thread_limit in +/// the launch parameters, always starting thread_limit+warpSize threads per +/// CTA. The threads in the last warp are reserved for master execution. +/// For the 'spmd' execution mode, all threads in a CTA are part of the team. +static llvm::Value *getThreadLimit(CodeGenFunction &CGF, + bool IsInSpmdExecutionMode = false) { CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), - "thread_limit"); + return IsInSpmdExecutionMode + ? getNVPTXNumThreads(CGF) + : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), + "thread_limit"); } /// Get the thread id of the OMP master thread. @@ -159,12 +267,34 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI); } +bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const { + return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; +} + +static CGOpenMPRuntimeNVPTX::ExecutionMode +getExecutionModeForDirective(CodeGenModule &CGM, + const OMPExecutableDirective &D) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + switch (DirectiveKind) { + case OMPD_target: + case OMPD_target_teams: + return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic; + case OMPD_target_parallel: + return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; + default: + llvm_unreachable("Unsupported directive on NVPTX device."); + } + llvm_unreachable("Unsupported directive on NVPTX device."); +} + void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + ExecutionModeRAII ModeRAII(CurrentExecutionMode, + CGOpenMPRuntimeNVPTX::ExecutionMode::Generic); EntryFunctionState EST; WorkerFunctionState WST(CGM); Work.clear(); @@ -252,6 +382,94 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF, EST.ExitBB = nullptr; } +void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, + StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen) { + ExecutionModeRAII ModeRAII(CurrentExecutionMode, + CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd); + EntryFunctionState EST; + + // Emit target region as a standalone region. + class NVPTXPrePostActionTy : public PrePostActionTy { + CGOpenMPRuntimeNVPTX &RT; + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; + const OMPExecutableDirective &D; + + public: + NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, + const OMPExecutableDirective &D) + : RT(RT), EST(EST), D(D) {} + void Enter(CodeGenFunction &CGF) override { + RT.emitSpmdEntryHeader(CGF, EST, D); + } + void Exit(CodeGenFunction &CGF) override { + RT.emitSpmdEntryFooter(CGF, EST); + } + } Action(*this, EST, D); + CodeGen.setAction(Action); + emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, + IsOffloadEntry, CodeGen); + return; +} + +void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( + CodeGenFunction &CGF, EntryFunctionState &EST, + const OMPExecutableDirective &D) { + auto &Bld = CGF.Builder; + + // Setup BBs in entry function. + llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); + EST.ExitBB = CGF.createBasicBlock(".exit"); + + // Initialize the OMP state in the runtime; called by all active threads. + // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters + // based on code analysis of the target region. + llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true), + /*RequiresOMPRuntime=*/Bld.getInt16(1), + /*RequiresDataSharing=*/Bld.getInt16(1)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + CGF.EmitBranch(ExecuteBB); + + CGF.EmitBlock(ExecuteBB); +} + +void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, + EntryFunctionState &EST) { + if (!EST.ExitBB) + EST.ExitBB = CGF.createBasicBlock(".exit"); + + llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit"); + CGF.EmitBranch(OMPDeInitBB); + + CGF.EmitBlock(OMPDeInitBB); + // DeInitialize the OMP state in the runtime; called by all active threads. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None); + CGF.EmitBranch(EST.ExitBB); + + CGF.EmitBlock(EST.ExitBB); + EST.ExitBB = nullptr; +} + +// Create a unique global variable to indicate the execution mode of this target +// region. The execution mode is either 'generic', or 'spmd' depending on the +// target directive. This variable is picked up by the offload library to setup +// the device appropriately before kernel launch. If the execution mode is +// 'generic', the runtime reserves one warp for the master, otherwise, all +// warps participate in parallel work. +static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, + CGOpenMPRuntimeNVPTX::ExecutionMode Mode) { + (void)new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode")); +} + void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { auto &Ctx = CGM.getContext(); @@ -385,6 +603,22 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); break; } + case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { + // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + // short RequiresOMPRuntime, short RequiresDataSharing); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); + break; + } + case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: { + // Build void __kmpc_spmd_kernel_deinit(); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit"); + break; + } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( /// void *outlined_function); @@ -428,6 +662,103 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; } + case OMPRTL_NVPTX__kmpc_shuffle_int32: { + // Build int32_t __kmpc_shuffle_int32(int32_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); + break; + } + case OMPRTL_NVPTX__kmpc_shuffle_int64: { + // Build int64_t __kmpc_shuffle_int64(int64_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); + break; + } + case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: { + // Build int32_t kmpc_nvptx_parallel_reduce_nowait(kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t Algorithm Version), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); + break; + } + case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: { + // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, + // int32_t num_vars, size_t reduce_size, void *reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t shortCircuit), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), + // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, + // int32_t index, int32_t width), + // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, + // int32_t index, int32_t width, int32_t reduce)) + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, + CGM.Int32Ty, CGM.Int32Ty}; + auto *CopyToScratchpadFnTy = + llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams, + /*isVarArg=*/false); + llvm::Type *LoadReduceTypeParams[] = { + CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty}; + auto *LoadReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo(), + CopyToScratchpadFnTy->getPointerTo(), + LoadReduceFnTy->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait"); + break; + } + case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { + // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); + llvm::Type *TypeParams[] = {CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); + break; + } } return RTLFn; } @@ -463,39 +794,75 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( assert(!ParentName.empty() && "Invalid target region parent name!"); - emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, - CodeGen); + CGOpenMPRuntimeNVPTX::ExecutionMode Mode = + getExecutionModeForDirective(CGM, D); + switch (Mode) { + case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic: + emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + CodeGen); + break; + case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd: + emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + CodeGen); + break; + case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown: + llvm_unreachable( + "Unknown programming model for OpenMP directive on NVPTX target."); + } + + setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM) { + : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) { if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP NVPTX can only handle device code."); } +void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) { + // Do nothing in case of Spmd mode and L0 parallel. + // TODO: If in Spmd mode and L1 parallel emit the clause. + if (isInSpmdExecutionMode()) + return; + + CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); +} + +void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + // Do nothing in case of Spmd mode and L0 parallel. + // TODO: If in Spmd mode and L1 parallel emit the clause. + if (isInSpmdExecutionMode()) + return; + + CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); +} + void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) {} -llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction( +llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar, + InnermostKind, CodeGen); +} - llvm::Function *OutlinedFun = nullptr; - if (isa<OMPTeamsDirective>(D)) { - llvm::Value *OutlinedFunVal = - CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen); - OutlinedFun = cast<llvm::Function>(OutlinedFunVal); - OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); - OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); - } else { - llvm::Value *OutlinedFunVal = - CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen); - OutlinedFun = cast<llvm::Function>(OutlinedFunVal); - } +llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + + llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( + D, ThreadIDVar, InnermostKind, CodeGen); + llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal); + OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); + OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); + OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); return OutlinedFun; } @@ -525,7 +892,10 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall( if (!CGF.HaveInsertPoint()) return; - emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); + if (isInSpmdExecutionMode()) + emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); + else + emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); } void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( @@ -533,8 +903,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { llvm::Function *Fn = cast<llvm::Function>(OutlinedFn); - auto &&L0ParallelGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) { CGBuilderTy &Bld = CGF.Builder; // Prepare for parallel region. Indicate the outlined function. @@ -565,8 +934,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &Action) { Action.Enter(CGF); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; @@ -596,3 +965,1276 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( ThenRCG(CGF); } } + +void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { + // Just call the outlined function to execute the parallel region. + // OutlinedFn(>id, &zero, CapturedStruct); + // + // TODO: Do something with IfCond when support for the 'if' clause + // is added on Spmd target directives. + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back( + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + OutlinedFnArgs.push_back( + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); +} + +/// This function creates calls to one of two shuffle functions to copy +/// variables between lanes in a warp. +static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, + QualType ElemTy, + llvm::Value *Elem, + llvm::Value *Offset) { + auto &CGM = CGF.CGM; + auto &C = CGM.getContext(); + auto &Bld = CGF.Builder; + CGOpenMPRuntimeNVPTX &RT = + *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime())); + + unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity(); + assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction."); + + OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4 + ? OMPRTL_NVPTX__kmpc_shuffle_int32 + : OMPRTL_NVPTX__kmpc_shuffle_int64; + + // Cast all types to 32- or 64-bit values before calling shuffle routines. + auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty; + auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy); + auto *WarpSize = CGF.EmitScalarConversion( + getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true), + C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation()); + + auto *ShuffledVal = + CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn), + {ElemCast, Offset, WarpSize}); + + return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy)); +} + +namespace { +enum CopyAction : unsigned { + // RemoteLaneToThread: Copy over a Reduce list from a remote lane in + // the warp using shuffle instructions. + RemoteLaneToThread, + // ThreadCopy: Make a copy of a Reduce list on the thread's stack. + ThreadCopy, + // ThreadToScratchpad: Copy a team-reduced array to the scratchpad. + ThreadToScratchpad, + // ScratchpadToThread: Copy from a scratchpad array in global memory + // containing team-reduced data to a thread's stack. + ScratchpadToThread, +}; +} // namespace + +struct CopyOptionsTy { + llvm::Value *RemoteLaneOffset; + llvm::Value *ScratchpadIndex; + llvm::Value *ScratchpadWidth; +}; + +/// Emit instructions to copy a Reduce list, which contains partially +/// aggregated values, in the specified direction. +static void emitReductionListCopy( + CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, + ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase, + CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) { + + auto &CGM = CGF.CGM; + auto &C = CGM.getContext(); + auto &Bld = CGF.Builder; + + auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; + auto *ScratchpadIndex = CopyOptions.ScratchpadIndex; + auto *ScratchpadWidth = CopyOptions.ScratchpadWidth; + + // Iterates, element-by-element, through the source Reduce list and + // make a copy. + unsigned Idx = 0; + unsigned Size = Privates.size(); + for (auto &Private : Privates) { + Address SrcElementAddr = Address::invalid(); + Address DestElementAddr = Address::invalid(); + Address DestElementPtrAddr = Address::invalid(); + // Should we shuffle in an element from a remote lane? + bool ShuffleInElement = false; + // Set to true to update the pointer in the dest Reduce list to a + // newly created element. + bool UpdateDestListPtr = false; + // Increment the src or dest pointer to the scratchpad, for each + // new element. + bool IncrScratchpadSrc = false; + bool IncrScratchpadDest = false; + + switch (Action) { + case RemoteLaneToThread: { + // Step 1.1: Get the address for the src element in the Reduce list. + Address SrcElementPtrAddr = + Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( + SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + SrcElementAddr = + Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + + // Step 1.2: Create a temporary to store the element in the destination + // Reduce list. + DestElementPtrAddr = + Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementAddr = + CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); + ShuffleInElement = true; + UpdateDestListPtr = true; + break; + } + case ThreadCopy: { + // Step 1.1: Get the address for the src element in the Reduce list. + Address SrcElementPtrAddr = + Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( + SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + SrcElementAddr = + Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + + // Step 1.2: Get the address for dest element. The destination + // element has already been created on the thread's stack. + DestElementPtrAddr = + Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + llvm::Value *DestElementPtr = + CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()); + Address DestElemAddr = + Address(DestElementPtr, C.getTypeAlignInChars(Private->getType())); + DestElementAddr = Bld.CreateElementBitCast( + DestElemAddr, CGF.ConvertTypeForMem(Private->getType())); + break; + } + case ThreadToScratchpad: { + // Step 1.1: Get the address for the src element in the Reduce list. + Address SrcElementPtrAddr = + Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( + SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + SrcElementAddr = + Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + + // Step 1.2: Get the address for dest element: + // address = base + index * ElementSizeInChars. + unsigned ElementSizeInChars = + C.getTypeSizeInChars(Private->getType()).getQuantity(); + auto *CurrentOffset = + Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), + ScratchpadIndex); + auto *ScratchPadElemAbsolutePtrVal = + Bld.CreateAdd(DestBase.getPointer(), CurrentOffset); + ScratchPadElemAbsolutePtrVal = + Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); + Address ScratchpadPtr = + Address(ScratchPadElemAbsolutePtrVal, + C.getTypeAlignInChars(Private->getType())); + DestElementAddr = Bld.CreateElementBitCast( + ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType())); + IncrScratchpadDest = true; + break; + } + case ScratchpadToThread: { + // Step 1.1: Get the address for the src element in the scratchpad. + // address = base + index * ElementSizeInChars. + unsigned ElementSizeInChars = + C.getTypeSizeInChars(Private->getType()).getQuantity(); + auto *CurrentOffset = + Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), + ScratchpadIndex); + auto *ScratchPadElemAbsolutePtrVal = + Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset); + ScratchPadElemAbsolutePtrVal = + Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); + SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal, + C.getTypeAlignInChars(Private->getType())); + IncrScratchpadSrc = true; + + // Step 1.2: Create a temporary to store the element in the destination + // Reduce list. + DestElementPtrAddr = + Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementAddr = + CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); + UpdateDestListPtr = true; + break; + } + } + + // Regardless of src and dest of copy, we emit the load of src + // element as this is required in all directions + SrcElementAddr = Bld.CreateElementBitCast( + SrcElementAddr, CGF.ConvertTypeForMem(Private->getType())); + llvm::Value *Elem = + CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false, + Private->getType(), SourceLocation()); + + // Now that all active lanes have read the element in the + // Reduce list, shuffle over the value from the remote lane. + if (ShuffleInElement) { + Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem, + RemoteLaneOffset); + } + + // Store the source element value to the dest element address. + CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, + Private->getType()); + + // Step 3.1: Modify reference in dest Reduce list as needed. + // Modifying the reference in Reduce list to point to the newly + // created element. The element is live in the current function + // scope and that of functions it invokes (i.e., reduce_function). + // RemoteReduceData[i] = (void*)&RemoteElem + if (UpdateDestListPtr) { + CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast( + DestElementAddr.getPointer(), CGF.VoidPtrTy), + DestElementPtrAddr, /*Volatile=*/false, + C.VoidPtrTy); + } + + // Step 4.1: Increment SrcBase/DestBase so that it points to the starting + // address of the next element in scratchpad memory, unless we're currently + // processing the last one. Memory alignment is also taken care of here. + if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) { + llvm::Value *ScratchpadBasePtr = + IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer(); + unsigned ElementSizeInChars = + C.getTypeSizeInChars(Private->getType()).getQuantity(); + ScratchpadBasePtr = Bld.CreateAdd( + ScratchpadBasePtr, + Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get( + CGM.SizeTy, ElementSizeInChars))); + + // Take care of global memory alignment for performance + ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateSDiv( + ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); + ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateMul( + ScratchpadBasePtr, + llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); + + if (IncrScratchpadDest) + DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); + else /* IncrScratchpadSrc = true */ + SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); + } + + Idx++; + } +} + +/// This function emits a helper that loads data from the scratchpad array +/// and (optionally) reduces it with the input operand. +/// +/// load_and_reduce(local, scratchpad, index, width, should_reduce) +/// reduce_data remote; +/// for elem in remote: +/// remote.elem = Scratchpad[elem_id][index] +/// if (should_reduce) +/// local = local @ remote +/// else +/// local = remote +static llvm::Value * +emitReduceScratchpadFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn) { + auto &C = CGM.getContext(); + auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); + + // Destination of the copy. + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + // Base address of the scratchpad array, with each element storing a + // Reduce list per team. + ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + // A source index into the scratchpad array. + ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other); + // Row width of an element in the scratchpad array, typically + // the number of teams. + ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other); + // If should_reduce == 1, then it's load AND reduce, + // If should_reduce == 0 (or otherwise), then it only loads (+ copy). + // The latter case is used for initialization. + ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other); + + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&ScratchPadArg); + Args.push_back(&IndexArg); + Args.push_back(&WidthArg); + Args.push_back(&ShouldReduceArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_load_and_reduce", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + // Get local Reduce list pointer. + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address ReduceListAddr( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); + llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( + AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + + Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); + llvm::Value *IndexVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGM.SizeTy, /*isSigned=*/true); + + Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); + llvm::Value *WidthVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGM.SizeTy, /*isSigned=*/true); + + Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg); + llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar( + AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, SourceLocation()); + + // The absolute ptr address to the base addr of the next element to copy. + llvm::Value *CumulativeElemBasePtr = + Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); + Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); + + // Create a Remote Reduce list to store the elements read from the + // scratchpad array. + Address RemoteReduceList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list"); + + // Assemble remote Reduce list from scratchpad array. + emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates, + SrcDataAddr, RemoteReduceList, + {/*RemoteLaneOffset=*/nullptr, + /*ScratchpadIndex=*/IndexVal, + /*ScratchpadWidth=*/WidthVal}); + + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); + + auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1)); + Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); + + CGF.EmitBlock(ThenBB); + // We should reduce with the local Reduce list. + // reduce_function(LocalReduceList, RemoteReduceList) + llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + ReduceListAddr.getPointer(), CGF.VoidPtrTy); + llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + RemoteReduceList.getPointer(), CGF.VoidPtrTy); + CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr}); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + // No reduction; just copy: + // Local Reduce list = Remote Reduce list. + emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates, + RemoteReduceList, ReduceListAddr); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that stores reduced data from the team +/// master to a scratchpad array in global memory. +/// +/// for elem in Reduce List: +/// scratchpad[elem_id][index] = elem +/// +static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy) { + + auto &C = CGM.getContext(); + auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); + + // Source of the copy. + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + // Base address of the scratchpad array, with each element storing a + // Reduce list per team. + ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + // A destination index into the scratchpad array, typically the team + // identifier. + ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other); + // Row width of an element in the scratchpad array, typically + // the number of teams. + ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other); + + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&ScratchPadArg); + Args.push_back(&IndexArg); + Args.push_back(&WidthArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_copy_to_scratchpad", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address SrcDataAddr( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); + llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( + AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + + Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); + llvm::Value *IndexVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGF.SizeTy, /*isSigned=*/true); + + Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); + llvm::Value *WidthVal = + Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, + Int32Ty, SourceLocation()), + CGF.SizeTy, /*isSigned=*/true); + + // The absolute ptr address to the base addr of the next element to copy. + llvm::Value *CumulativeElemBasePtr = + Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); + Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); + + emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates, + SrcDataAddr, DestDataAddr, + {/*RemoteLaneOffset=*/nullptr, + /*ScratchpadIndex=*/IndexVal, + /*ScratchpadWidth=*/WidthVal}); + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that gathers Reduce lists from the first +/// lane of every active warp to lanes in the first warp. +/// +/// void inter_warp_copy_func(void* reduce_data, num_warps) +/// shared smem[warp_size]; +/// For all data entries D in reduce_data: +/// If (I am the first lane in each warp) +/// Copy my local D to smem[warp_id] +/// sync +/// if (I am the first warp) +/// Copy smem[thread_id] to my local D +/// sync +static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy) { + auto &C = CGM.getContext(); + auto &M = CGM.getModule(); + + // ReduceList: thread local Reduce list. + // At the stage of the computation when this function is called, partially + // aggregated values reside in the first lane of every active warp. + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + // NumWarps: number of warps active in the parallel region. This could + // be smaller than 32 (max warps in a CTA) for partial block reduction. + ImplicitParamDecl NumWarpsArg(C, + C.getIntTypeForBitwidth(32, /* Signed */ true), + ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&NumWarpsArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_inter_warp_copy_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + // This array is used as a medium to transfer, one reduce element at a time, + // the data from the first lane of every warp to lanes in the first warp + // in order to perform the final step of a reduction in a parallel region + // (reduction across warps). The array is placed in NVPTX __shared__ memory + // for reduced latency, as well as to have a distinct copy for concurrently + // executing target regions. The array is declared with common linkage so + // as to be shared across compilation units. + const char *TransferMediumName = + "__openmp_nvptx_data_transfer_temporary_storage"; + llvm::GlobalVariable *TransferMedium = + M.getGlobalVariable(TransferMediumName); + if (!TransferMedium) { + auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize); + unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); + TransferMedium = new llvm::GlobalVariable( + M, Ty, + /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, + llvm::Constant::getNullValue(Ty), TransferMediumName, + /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, + SharedAddressSpace); + } + + // Get the CUDA thread id of the current OpenMP thread on the GPU. + auto *ThreadID = getNVPTXThreadID(CGF); + // nvptx_lane_id = nvptx_id % warpsize + auto *LaneID = getNVPTXLaneID(CGF); + // nvptx_warp_id = nvptx_id / warpsize + auto *WarpID = getNVPTXWarpID(CGF); + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + unsigned Idx = 0; + for (auto &Private : Privates) { + // + // Warp master copies reduce element to transfer medium in __shared__ + // memory. + // + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); + + // if (lane_id == 0) + auto IsWarpMaster = + Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master"); + Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); + CGF.EmitBlock(ThenBB); + + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = (type[i]*)(elemptrptr) + Address ElemPtr = + Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); + ElemPtr = Bld.CreateElementBitCast( + ElemPtr, CGF.ConvertTypeForMem(Private->getType())); + // elem = *elemptr + llvm::Value *Elem = CGF.EmitLoadOfScalar( + ElemPtr, /*Volatile=*/false, Private->getType(), SourceLocation()); + + // Get pointer to location in transfer medium. + // MediumPtr = &medium[warp_id] + llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); + Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType())); + // Casting to actual data type. + // MediumPtr = (type[i]*)MediumPtrAddr; + MediumPtr = Bld.CreateElementBitCast( + MediumPtr, CGF.ConvertTypeForMem(Private->getType())); + + //*MediumPtr = elem + Bld.CreateStore(Elem, MediumPtr); + + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg); + llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( + AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation()); + + auto *NumActiveThreads = Bld.CreateNSWMul( + NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads"); + // named_barrier_sync(ParallelBarrierID, num_active_threads) + syncParallelThreads(CGF, NumActiveThreads); + + // + // Warp 0 copies reduce element from transfer medium. + // + llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); + + // Up to 32 threads in warp 0 are active. + auto IsActiveThread = + Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); + Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); + + CGF.EmitBlock(W0ThenBB); + + // SrcMediumPtr = &medium[tid] + llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); + Address SrcMediumPtr(SrcMediumPtrVal, + C.getTypeAlignInChars(Private->getType())); + // SrcMediumVal = *SrcMediumPtr; + SrcMediumPtr = Bld.CreateElementBitCast( + SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType())); + llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar( + SrcMediumPtr, /*Volatile=*/false, Private->getType(), SourceLocation()); + + // TargetElemPtr = (type[i]*)(SrcDataAddr[i]) + Address TargetElemPtrPtr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( + TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + Address TargetElemPtr = + Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType())); + TargetElemPtr = Bld.CreateElementBitCast( + TargetElemPtr, CGF.ConvertTypeForMem(Private->getType())); + + // *TargetElemPtr = SrcMediumVal; + CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false, + Private->getType()); + Bld.CreateBr(W0MergeBB); + + CGF.EmitBlock(W0ElseBB); + Bld.CreateBr(W0MergeBB); + + CGF.EmitBlock(W0MergeBB); + + // While warp 0 copies values from transfer medium, all other warps must + // wait. + syncParallelThreads(CGF, NumActiveThreads); + Idx++; + } + + CGF.FinishFunction(); + return Fn; +} + +/// Emit a helper that reduces data across two OpenMP threads (lanes) +/// in the same warp. It uses shuffle instructions to copy over data from +/// a remote lane's stack. The reduction algorithm performed is specified +/// by the fourth parameter. +/// +/// Algorithm Versions. +/// Full Warp Reduce (argument value 0): +/// This algorithm assumes that all 32 lanes are active and gathers +/// data from these 32 lanes, producing a single resultant value. +/// Contiguous Partial Warp Reduce (argument value 1): +/// This algorithm assumes that only a *contiguous* subset of lanes +/// are active. This happens for the last warp in a parallel region +/// when the user specified num_threads is not an integer multiple of +/// 32. This contiguous subset always starts with the zeroth lane. +/// Partial Warp Reduce (argument value 2): +/// This algorithm gathers data from any number of lanes at any position. +/// All reduced values are stored in the lowest possible lane. The set +/// of problems every algorithm addresses is a super set of those +/// addressable by algorithms with a lower version number. Overhead +/// increases as algorithm version increases. +/// +/// Terminology +/// Reduce element: +/// Reduce element refers to the individual data field with primitive +/// data types to be combined and reduced across threads. +/// Reduce list: +/// Reduce list refers to a collection of local, thread-private +/// reduce elements. +/// Remote Reduce list: +/// Remote Reduce list refers to a collection of remote (relative to +/// the current thread) reduce elements. +/// +/// We distinguish between three states of threads that are important to +/// the implementation of this function. +/// Alive threads: +/// Threads in a warp executing the SIMT instruction, as distinguished from +/// threads that are inactive due to divergent control flow. +/// Active threads: +/// The minimal set of threads that has to be alive upon entry to this +/// function. The computation is correct iff active threads are alive. +/// Some threads are alive but they are not active because they do not +/// contribute to the computation in any useful manner. Turning them off +/// may introduce control flow overheads without any tangible benefits. +/// Effective threads: +/// In order to comply with the argument requirements of the shuffle +/// function, we must keep all lanes holding data alive. But at most +/// half of them perform value aggregation; we refer to this half of +/// threads as effective. The other half is simply handing off their +/// data. +/// +/// Procedure +/// Value shuffle: +/// In this step active threads transfer data from higher lane positions +/// in the warp to lower lane positions, creating Remote Reduce list. +/// Value aggregation: +/// In this step, effective threads combine their thread local Reduce list +/// with Remote Reduce list and store the result in the thread local +/// Reduce list. +/// Value copy: +/// In this step, we deal with the assumption made by algorithm 2 +/// (i.e. contiguity assumption). When we have an odd number of lanes +/// active, say 2k+1, only k threads will be effective and therefore k +/// new values will be produced. However, the Reduce list owned by the +/// (2k+1)th thread is ignored in the value aggregation. Therefore +/// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so +/// that the contiguity assumption still holds. +static llvm::Value * +emitShuffleAndReduceFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn) { + auto &C = CGM.getContext(); + + // Thread local Reduce list used to host the values of data to be reduced. + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + // Current lane id; could be logical. + ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other); + // Offset of the remote source lane relative to the current lane. + ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy, + ImplicitParamDecl::Other); + // Algorithm version. This is expected to be known at compile time. + ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&ReduceListArg); + Args.push_back(&LaneIDArg); + Args.push_back(&RemoteLaneOffsetArg); + Args.push_back(&AlgoVerArg); + + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + CodeGenFunction CGF(CGM); + // We don't need debug information in this function as nothing here refers to + // user code. + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + auto &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, SourceLocation()), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + + Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg); + llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar( + AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation()); + + Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg); + llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar( + AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation()); + + Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg); + llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar( + AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation()); + + // Create a local thread-private variable to host the Reduce list + // from a remote lane. + Address RemoteReduceList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list"); + + // This loop iterates through the list of reduce elements and copies, + // element by element, from a remote lane in the warp to RemoteReduceList, + // hosted on the thread's stack. + emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates, + LocalReduceList, RemoteReduceList, + {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal, + /*ScratchpadIndex=*/nullptr, + /*ScratchpadWidth=*/nullptr}); + + // The actions to be performed on the Remote Reduce list is dependent + // on the algorithm version. + // + // if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 && + // LaneId % 2 == 0 && Offset > 0): + // do the reduction value aggregation + // + // The thread local variable Reduce list is mutated in place to host the + // reduced data, which is the aggregated value produced from local and + // remote lanes. + // + // Note that AlgoVer is expected to be a constant integer known at compile + // time. + // When AlgoVer==0, the first conjunction evaluates to true, making + // the entire predicate true during compile time. + // When AlgoVer==1, the second conjunction has only the second part to be + // evaluated during runtime. Other conjunctions evaluates to false + // during compile time. + // When AlgoVer==2, the third conjunction has only the second part to be + // evaluated during runtime. Other conjunctions evaluates to false + // during compile time. + auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0)); + + auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); + auto CondAlgo1 = Bld.CreateAnd( + Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal)); + + auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2)); + auto CondAlgo2 = Bld.CreateAnd( + Algo2, + Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)), + Bld.getInt16(0))); + CondAlgo2 = Bld.CreateAnd( + CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0))); + + auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1); + CondReduce = Bld.CreateOr(CondReduce, CondAlgo2); + + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); + Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); + + CGF.EmitBlock(ThenBB); + // reduce_function(LocalReduceList, RemoteReduceList) + llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + LocalReduceList.getPointer(), CGF.VoidPtrTy); + llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + RemoteReduceList.getPointer(), CGF.VoidPtrTy); + CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr}); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local + // Reduce list. + Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); + auto CondCopy = Bld.CreateAnd( + Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal)); + + llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont"); + Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB); + + CGF.EmitBlock(CpyThenBB); + emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates, + RemoteReduceList, LocalReduceList); + Bld.CreateBr(CpyMergeBB); + + CGF.EmitBlock(CpyElseBB); + Bld.CreateBr(CpyMergeBB); + + CGF.EmitBlock(CpyMergeBB); + + CGF.FinishFunction(); + return Fn; +} + +/// +/// Design of OpenMP reductions on the GPU +/// +/// Consider a typical OpenMP program with one or more reduction +/// clauses: +/// +/// float foo; +/// double bar; +/// #pragma omp target teams distribute parallel for \ +/// reduction(+:foo) reduction(*:bar) +/// for (int i = 0; i < N; i++) { +/// foo += A[i]; bar *= B[i]; +/// } +/// +/// where 'foo' and 'bar' are reduced across all OpenMP threads in +/// all teams. In our OpenMP implementation on the NVPTX device an +/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads +/// within a team are mapped to CUDA threads within a threadblock. +/// Our goal is to efficiently aggregate values across all OpenMP +/// threads such that: +/// +/// - the compiler and runtime are logically concise, and +/// - the reduction is performed efficiently in a hierarchical +/// manner as follows: within OpenMP threads in the same warp, +/// across warps in a threadblock, and finally across teams on +/// the NVPTX device. +/// +/// Introduction to Decoupling +/// +/// We would like to decouple the compiler and the runtime so that the +/// latter is ignorant of the reduction variables (number, data types) +/// and the reduction operators. This allows a simpler interface +/// and implementation while still attaining good performance. +/// +/// Pseudocode for the aforementioned OpenMP program generated by the +/// compiler is as follows: +/// +/// 1. Create private copies of reduction variables on each OpenMP +/// thread: 'foo_private', 'bar_private' +/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned +/// to it and writes the result in 'foo_private' and 'bar_private' +/// respectively. +/// 3. Call the OpenMP runtime on the GPU to reduce within a team +/// and store the result on the team master: +/// +/// __kmpc_nvptx_parallel_reduce_nowait(..., +/// reduceData, shuffleReduceFn, interWarpCpyFn) +/// +/// where: +/// struct ReduceData { +/// double *foo; +/// double *bar; +/// } reduceData +/// reduceData.foo = &foo_private +/// reduceData.bar = &bar_private +/// +/// 'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two +/// auxiliary functions generated by the compiler that operate on +/// variables of type 'ReduceData'. They aid the runtime perform +/// algorithmic steps in a data agnostic manner. +/// +/// 'shuffleReduceFn' is a pointer to a function that reduces data +/// of type 'ReduceData' across two OpenMP threads (lanes) in the +/// same warp. It takes the following arguments as input: +/// +/// a. variable of type 'ReduceData' on the calling lane, +/// b. its lane_id, +/// c. an offset relative to the current lane_id to generate a +/// remote_lane_id. The remote lane contains the second +/// variable of type 'ReduceData' that is to be reduced. +/// d. an algorithm version parameter determining which reduction +/// algorithm to use. +/// +/// 'shuffleReduceFn' retrieves data from the remote lane using +/// efficient GPU shuffle intrinsics and reduces, using the +/// algorithm specified by the 4th parameter, the two operands +/// element-wise. The result is written to the first operand. +/// +/// Different reduction algorithms are implemented in different +/// runtime functions, all calling 'shuffleReduceFn' to perform +/// the essential reduction step. Therefore, based on the 4th +/// parameter, this function behaves slightly differently to +/// cooperate with the runtime to ensure correctness under +/// different circumstances. +/// +/// 'InterWarpCpyFn' is a pointer to a function that transfers +/// reduced variables across warps. It tunnels, through CUDA +/// shared memory, the thread-private data of type 'ReduceData' +/// from lane 0 of each warp to a lane in the first warp. +/// 4. Call the OpenMP runtime on the GPU to reduce across teams. +/// The last team writes the global reduced value to memory. +/// +/// ret = __kmpc_nvptx_teams_reduce_nowait(..., +/// reduceData, shuffleReduceFn, interWarpCpyFn, +/// scratchpadCopyFn, loadAndReduceFn) +/// +/// 'scratchpadCopyFn' is a helper that stores reduced +/// data from the team master to a scratchpad array in +/// global memory. +/// +/// 'loadAndReduceFn' is a helper that loads data from +/// the scratchpad array and reduces it with the input +/// operand. +/// +/// These compiler generated functions hide address +/// calculation and alignment information from the runtime. +/// 5. if ret == 1: +/// The team master of the last team stores the reduced +/// result to the globals in memory. +/// foo += reduceData.foo; bar *= reduceData.bar +/// +/// +/// Warp Reduction Algorithms +/// +/// On the warp level, we have three algorithms implemented in the +/// OpenMP runtime depending on the number of active lanes: +/// +/// Full Warp Reduction +/// +/// The reduce algorithm within a warp where all lanes are active +/// is implemented in the runtime as follows: +/// +/// full_warp_reduce(void *reduce_data, +/// kmp_ShuffleReductFctPtr ShuffleReduceFn) { +/// for (int offset = WARPSIZE/2; offset > 0; offset /= 2) +/// ShuffleReduceFn(reduce_data, 0, offset, 0); +/// } +/// +/// The algorithm completes in log(2, WARPSIZE) steps. +/// +/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is +/// not used therefore we save instructions by not retrieving lane_id +/// from the corresponding special registers. The 4th parameter, which +/// represents the version of the algorithm being used, is set to 0 to +/// signify full warp reduction. +/// +/// In this version, 'ShuffleReduceFn' behaves, per element, as follows: +/// +/// #reduce_elem refers to an element in the local lane's data structure +/// #remote_elem is retrieved from a remote lane +/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); +/// reduce_elem = reduce_elem REDUCE_OP remote_elem; +/// +/// Contiguous Partial Warp Reduction +/// +/// This reduce algorithm is used within a warp where only the first +/// 'n' (n <= WARPSIZE) lanes are active. It is typically used when the +/// number of OpenMP threads in a parallel region is not a multiple of +/// WARPSIZE. The algorithm is implemented in the runtime as follows: +/// +/// void +/// contiguous_partial_reduce(void *reduce_data, +/// kmp_ShuffleReductFctPtr ShuffleReduceFn, +/// int size, int lane_id) { +/// int curr_size; +/// int offset; +/// curr_size = size; +/// mask = curr_size/2; +/// while (offset>0) { +/// ShuffleReduceFn(reduce_data, lane_id, offset, 1); +/// curr_size = (curr_size+1)/2; +/// offset = curr_size/2; +/// } +/// } +/// +/// In this version, 'ShuffleReduceFn' behaves, per element, as follows: +/// +/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); +/// if (lane_id < offset) +/// reduce_elem = reduce_elem REDUCE_OP remote_elem +/// else +/// reduce_elem = remote_elem +/// +/// This algorithm assumes that the data to be reduced are located in a +/// contiguous subset of lanes starting from the first. When there is +/// an odd number of active lanes, the data in the last lane is not +/// aggregated with any other lane's dat but is instead copied over. +/// +/// Dispersed Partial Warp Reduction +/// +/// This algorithm is used within a warp when any discontiguous subset of +/// lanes are active. It is used to implement the reduction operation +/// across lanes in an OpenMP simd region or in a nested parallel region. +/// +/// void +/// dispersed_partial_reduce(void *reduce_data, +/// kmp_ShuffleReductFctPtr ShuffleReduceFn) { +/// int size, remote_id; +/// int logical_lane_id = number_of_active_lanes_before_me() * 2; +/// do { +/// remote_id = next_active_lane_id_right_after_me(); +/// # the above function returns 0 of no active lane +/// # is present right after the current lane. +/// size = number_of_active_lanes_in_this_warp(); +/// logical_lane_id /= 2; +/// ShuffleReduceFn(reduce_data, logical_lane_id, +/// remote_id-1-threadIdx.x, 2); +/// } while (logical_lane_id % 2 == 0 && size > 1); +/// } +/// +/// There is no assumption made about the initial state of the reduction. +/// Any number of lanes (>=1) could be active at any position. The reduction +/// result is returned in the first active lane. +/// +/// In this version, 'ShuffleReduceFn' behaves, per element, as follows: +/// +/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); +/// if (lane_id % 2 == 0 && offset > 0) +/// reduce_elem = reduce_elem REDUCE_OP remote_elem +/// else +/// reduce_elem = remote_elem +/// +/// +/// Intra-Team Reduction +/// +/// This function, as implemented in the runtime call +/// '__kmpc_nvptx_parallel_reduce_nowait', aggregates data across OpenMP +/// threads in a team. It first reduces within a warp using the +/// aforementioned algorithms. We then proceed to gather all such +/// reduced values at the first warp. +/// +/// The runtime makes use of the function 'InterWarpCpyFn', which copies +/// data from each of the "warp master" (zeroth lane of each warp, where +/// warp-reduced data is held) to the zeroth warp. This step reduces (in +/// a mathematical sense) the problem of reduction across warp masters in +/// a block to the problem of warp reduction. +/// +/// +/// Inter-Team Reduction +/// +/// Once a team has reduced its data to a single value, it is stored in +/// a global scratchpad array. Since each team has a distinct slot, this +/// can be done without locking. +/// +/// The last team to write to the scratchpad array proceeds to reduce the +/// scratchpad array. One or more workers in the last team use the helper +/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e., +/// the k'th worker reduces every k'th element. +/// +/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait' to +/// reduce across workers and compute a globally reduced value. +/// +void CGOpenMPRuntimeNVPTX::emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { + if (!CGF.HaveInsertPoint()) + return; + + bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); + bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); + // FIXME: Add support for simd reduction. + assert((TeamsReduction || ParallelReduction) && + "Invalid reduction selection in emitReduction."); + + auto &C = CGM.getContext(); + + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + auto Size = RHSExprs.size(); + for (auto *E : Privates) { + if (E->getType()->isVariablyModifiedType()) + // Reserve place for array size. + ++Size; + } + llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); + QualType ReductionArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + Elem); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .first, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + // 2. Emit reduce_func(). + auto *ReductionFn = emitReductionFunction( + CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, + LHSExprs, RHSExprs, ReductionOps); + + // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), + // RedList, shuffle_reduce_func, interwarp_copy_func); + auto *ThreadId = getThreadID(CGF, Loc); + auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + ReductionList.getPointer(), CGF.VoidPtrTy); + + auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction( + CGM, Privates, ReductionArrayTy, ReductionFn); + auto *InterWarpCopyFn = + emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy); + + llvm::Value *Res = nullptr; + if (ParallelReduction) { + llvm::Value *Args[] = {ThreadId, + CGF.Builder.getInt32(RHSExprs.size()), + ReductionArrayTySize, + RL, + ShuffleAndReduceFn, + InterWarpCopyFn}; + + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), + Args); + } + + if (TeamsReduction) { + auto *ScratchPadCopyFn = + emitCopyToScratchpad(CGM, Privates, ReductionArrayTy); + auto *LoadAndReduceFn = emitReduceScratchpadFunction( + CGM, Privates, ReductionArrayTy, ReductionFn); + + llvm::Value *Args[] = {ThreadId, + CGF.Builder.getInt32(RHSExprs.size()), + ReductionArrayTySize, + RL, + ShuffleAndReduceFn, + InterWarpCopyFn, + ScratchPadCopyFn, + LoadAndReduceFn}; + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait), + Args); + } + + // 5. Build switch(res) + auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); + auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); + + // 6. Build case 1: where we have reduced values in the master + // thread in each team. + // __kmpc_end_reduce{_nowait}(<gtid>); + // break; + auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); + SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); + CGF.EmitBlock(Case1BB); + + // Add emission of __kmpc_end_reduce{_nowait}(<gtid>); + llvm::Value *EndArgs[] = {ThreadId}; + auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps, + this](CodeGenFunction &CGF, PrePostActionTy &Action) { + auto IPriv = Privates.begin(); + auto ILHS = LHSExprs.begin(); + auto IRHS = RHSExprs.begin(); + for (auto *E : ReductionOps) { + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); + ++IPriv; + ++ILHS; + ++IRHS; + } + }; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + EndArgs); + RCG.setAction(Action); + RCG(CGF); + CGF.EmitBranch(DefaultBB); + CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 4010b46..ae25e94 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -43,6 +43,8 @@ private: void createWorkerFunction(CodeGenModule &CGM); }; + bool isInSpmdExecutionMode() const; + /// \brief Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); @@ -58,11 +60,12 @@ private: /// function. void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); - /// \brief Returns specified OpenMP runtime function for the current OpenMP - /// implementation. Specialized for the NVPTX device. - /// \param Function OpenMP runtime function. - /// \return Specified function. - llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + /// \brief Helper for Spmd mode target directive's entry function. + void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + const OMPExecutableDirective &D); + + /// \brief Signal termination of Spmd mode execution. + void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); // // Base class overrides. @@ -87,6 +90,22 @@ private: llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); + /// \brief Emit outlined function specialized for the Single Program + /// Multiple Data programming model for applicable target directives on the + /// NVPTX device. + /// \param D Directive to emit. + /// \param ParentName Name of the function that encloses the target region. + /// \param OutlinedFn Outlined function value to be defined by this call. + /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// \param CodeGen Object containing the target statements. + /// An outlined function may not be an entry if, e.g. the if clause always + /// evaluates to false. + void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen); + /// \brief Emit outlined function for 'target' directive on the NVPTX /// device. /// \param D Directive to emit. @@ -118,6 +137,22 @@ private: ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); + /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// This call is for a parallel directive within an SPMD target directive. + /// \param OutlinedFn Outlined function to be run in parallel threads. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// \param IfCond Condition in the associated 'if' clause, if it was + /// specified, nullptr otherwise. + /// + void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond); + protected: /// \brief Get the function name of an outlined region. // The name can be customized depending on the target. @@ -129,6 +164,20 @@ protected: public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 + /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. + virtual void emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) override; + + /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' + /// clause. + /// \param NumThreads An integer value of threads. + virtual void emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) override; + /// \brief This function ought to emit, in the general case, a call to // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed // as these numbers are obtained through the PTX grid and block configuration. @@ -138,7 +187,22 @@ public: const Expr *ThreadLimit, SourceLocation Loc) override; /// \brief Emits inlined function for the specified OpenMP parallel - // directive but an inlined function for teams. + // directive. + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + llvm::Value * + emitParallelOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; + + /// \brief Emits inlined function for the specified OpenMP teams + // directive. /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). /// \param D OpenMP directive. @@ -147,10 +211,10 @@ public: /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. llvm::Value * - emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + emitTeamsOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; /// \brief Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -177,6 +241,50 @@ public: llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; + + /// Emit a code for reduction clause. + /// + /// \param Privates List of private copies for original reduction arguments. + /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. + /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' + /// or 'operator binop(LHS, RHS)'. + /// \param Options List of options for reduction codegen: + /// WithNowait true if parent directive has also nowait clause, false + /// otherwise. + /// SimpleReduction Emit reduction operation only. Used for omp simd + /// directive on the host. + /// ReductionKind The kind of reduction to perform. + virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, + ReductionOptionsTy Options) override; + + /// Returns specified OpenMP runtime function for the current OpenMP + /// implementation. Specialized for the NVPTX device. + /// \param Function OpenMP runtime function. + /// \return Specified function. + llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + + /// Target codegen is specialized based on two programming models: the + /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd' + /// model for constructs like 'target parallel' that support it. + enum ExecutionMode { + /// Single Program Multiple Data. + Spmd, + /// Generic codegen to support fork-join model. + Generic, + Unknown, + }; + +private: + // Track the execution mode when codegening directives within a target + // region. The appropriate mode (generic/spmd) is set on entry to the + // target region and used by containing directives such as 'parallel' + // to emit optimized code. + ExecutionMode CurrentExecutionMode; }; } // CodeGen namespace. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp index 8370607..a13c386 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp @@ -145,7 +145,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { EmitCoroutineBody(cast<CoroutineBodyStmt>(*S)); break; case Stmt::CoreturnStmtClass: - CGM.ErrorUnsupported(S, "coroutine"); + EmitCoreturnStmt(cast<CoreturnStmt>(*S)); break; case Stmt::CapturedStmtClass: { const CapturedStmt *CS = cast<CapturedStmt>(S); @@ -1024,6 +1024,18 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { /// if the function returns void, or may be missing one if the function returns /// non-void. Fun stuff :). void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { + if (requiresReturnValueCheck()) { + llvm::Constant *SLoc = EmitCheckSourceLocation(S.getLocStart()); + auto *SLocPtr = + new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false, + llvm::GlobalVariable::PrivateLinkage, SLoc); + SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr); + assert(ReturnLocation.isValid() && "No valid return location"); + Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy), + ReturnLocation); + } + // Returning from an outlined SEH helper is UB, and we already warn on it. if (IsOutlinedSEHHelper) { Builder.CreateUnreachable(); @@ -1166,7 +1178,7 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { if (Rem) Rem--; SwitchInsn->addCase(Builder.getInt(LHS), CaseDest); - LHS++; + ++LHS; } return; } @@ -2127,16 +2139,16 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect, /* IsAlignStack */ false, AsmDialect); llvm::CallInst *Result = Builder.CreateCall(IA, Args); - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadNone); else if (ReadOnly) - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadOnly); } @@ -2157,7 +2169,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Conservatively, mark all inline asm blocks in CUDA as convergent // (meaning, they may call an intrinsically convergent op, such as bar.sync, // and so can't have certain optimizations applied around them). - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Convergent); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index 39e1cdf..cf430f8 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -26,7 +26,7 @@ using namespace CodeGen; namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. -class OMPLexicalScope final : public CodeGenFunction::LexicalScope { +class OMPLexicalScope : public CodeGenFunction::LexicalScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { for (const auto *C : S.clauses()) { if (auto *CPI = OMPClauseWithPreInit::get(C)) { @@ -54,10 +54,11 @@ class OMPLexicalScope final : public CodeGenFunction::LexicalScope { public: OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, - bool AsInlined = false) + bool AsInlined = false, bool EmitPreInitStmt = true) : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), InlinedShareds(CGF) { - emitPreInitStmt(CGF, S); + if (EmitPreInitStmt) + emitPreInitStmt(CGF, S); if (AsInlined) { if (S.hasAssociatedStmt()) { auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); @@ -81,6 +82,39 @@ public: } }; +/// Lexical scope for OpenMP parallel construct, that handles correct codegen +/// for captured expressions. +class OMPParallelScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !(isOpenMPTargetExecutionDirective(Kind) || + isOpenMPLoopBoundSharingDirective(Kind)) && + isOpenMPParallelDirective(Kind); + } + +public: + OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + +/// Lexical scope for OpenMP teams construct, that handles correct codegen +/// for captured expressions. +class OMPTeamsScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !isOpenMPTargetExecutionDirective(Kind) && + isOpenMPTeamsDirective(Kind); + } + +public: + OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { @@ -194,21 +228,58 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, return TmpAddr; } -llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { - assert( - CapturedStmtInfo && - "CapturedStmtInfo should be set when generating the captured function"); - const CapturedDecl *CD = S.getCapturedDecl(); - const RecordDecl *RD = S.getCapturedRecordDecl(); +static QualType getCanonicalParamType(ASTContext &C, QualType T) { + if (T->isLValueReferenceType()) { + return C.getLValueReferenceType( + getCanonicalParamType(C, T.getNonReferenceType()), + /*SpelledAsLValue=*/false); + } + if (T->isPointerType()) + return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); + return C.getCanonicalParamType(T); +} + +namespace { + /// Contains required data for proper outlined function codegen. + struct FunctionOptions { + /// Captured statement for which the function is generated. + const CapturedStmt *S = nullptr; + /// true if cast to/from UIntPtr is required for variables captured by + /// value. + bool UIntPtrCastRequired = true; + /// true if only casted argumefnts must be registered as local args or VLA + /// sizes. + bool RegisterCastedArgsOnly = false; + /// Name of the generated function. + StringRef FunctionName; + explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, + bool RegisterCastedArgsOnly, + StringRef FunctionName) + : S(S), UIntPtrCastRequired(UIntPtrCastRequired), + RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), + FunctionName(FunctionName) {} + }; +} + +static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> + &LocalAddrs, + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> + &VLASizes, + llvm::Value *&CXXThisValue, const FunctionOptions &FO) { + const CapturedDecl *CD = FO.S->getCapturedDecl(); + const RecordDecl *RD = FO.S->getCapturedRecordDecl(); assert(CD->hasBody() && "missing CapturedDecl body"); + CXXThisValue = nullptr; // Build the argument list. + CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); - FunctionArgList Args; + bool HasUIntPtrArgs = false; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); - auto I = S.captures().begin(); + auto I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; @@ -220,29 +291,26 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { // deal with pointers. We can pass in the same way the VLA type sizes to the // outlined function. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || - I->capturesVariableArrayType()) - ArgType = Ctx.getUIntPtrType(); + I->capturesVariableArrayType()) { + HasUIntPtrArgs = true; + if (FO.UIntPtrCastRequired) + ArgType = Ctx.getUIntPtrType(); + } if (I->capturesVariable() || I->capturesVariableByCopy()) { CapVar = I->getCapturedVar(); II = CapVar->getIdentifier(); } else if (I->capturesThis()) - II = &getContext().Idents.get("this"); + II = &Ctx.Idents.get("this"); else { assert(I->capturesVariableArrayType()); - II = &getContext().Idents.get("vla"); - } - if (ArgType->isVariablyModifiedType()) { - bool IsReference = ArgType->isLValueReferenceType(); - ArgType = - getContext().getCanonicalParamType(ArgType.getNonReferenceType()); - if (IsReference && !ArgType->isPointerType()) { - ArgType = getContext().getLValueReferenceType( - ArgType, /*SpelledAsLValue=*/false); - } + II = &Ctx.Idents.get("vla"); } - Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, - FD->getLocation(), II, ArgType)); + if (ArgType->isVariablyModifiedType()) + ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); + Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, + FD->getLocation(), II, ArgType, + ImplicitParamDecl::Other)); ++I; } Args.append( @@ -255,89 +323,166 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Function *F = llvm::Function::Create( - FuncLLVMTy, llvm::GlobalValue::InternalLinkage, - CapturedStmtInfo->getHelperName(), &CGM.getModule()); + llvm::Function *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FO.FunctionName, &CGM.getModule()); CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); if (CD->isNothrow()) F->addFnAttr(llvm::Attribute::NoUnwind); // Generate the function. - StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), - CD->getBody()->getLocStart()); + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); - I = S.captures().begin(); + I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); + Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If the variable is a reference we need to materialize it here. if (CurVD->getType()->isReferenceType()) { - Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), - ".materialized_ref"); - EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, - CurVD->getType()); + Address RefAddr = CGF.CreateMemTemp( + CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); + CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, + /*Volatile=*/false, CurVD->getType()); LocalAddr = RefAddr; } - setAddrOfLocalVar(CurVD, LocalAddr); + if (!FO.RegisterCastedArgsOnly) + LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); ++Cnt; ++I; continue; } - LValue ArgLVal = - MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), - AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), + Args[Cnt]->getType(), BaseInfo); if (FD->hasCapturedVLAType()) { - LValue CastedArgLVal = - MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), - Args[Cnt]->getName(), ArgLVal), - FD->getType(), AlignmentSource::Decl); + if (FO.UIntPtrCastRequired) { + ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), + Args[Cnt]->getName(), + ArgLVal), + FD->getType(), BaseInfo); + } auto *ExprArg = - EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); + CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); auto VAT = FD->getCapturedVLAType(); - VLASizeMap[VAT->getSizeExpr()] = ExprArg; + VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); } else if (I->capturesVariable()) { auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { if (ArgLVal.getType()->isLValueReferenceType()) { - ArgAddr = EmitLoadOfReference( + ArgAddr = CGF.EmitLoadOfReference( ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { assert(ArgLVal.getType()->isPointerType()); - ArgAddr = EmitLoadOfPointer( + ArgAddr = CGF.EmitLoadOfPointer( ArgAddr, ArgLVal.getType()->castAs<PointerType>()); } } - setAddrOfLocalVar( - Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); + if (!FO.RegisterCastedArgsOnly) { + LocalAddrs.insert( + {Args[Cnt], + {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); + } } else if (I->capturesVariableByCopy()) { assert(!FD->getType()->isAnyPointerType() && "Not expecting a captured pointer."); auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); - setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), - Args[Cnt]->getName(), ArgLVal, - VarTy->isReferenceType())); + LocalAddrs.insert( + {Args[Cnt], + {Var, + FO.UIntPtrCastRequired + ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), + ArgLVal, VarTy->isReferenceType()) + : ArgLVal.getAddress()}}); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); - CXXThisValue = - EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); + CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) + .getScalarVal(); + LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); } ++Cnt; ++I; } + return {F, HasUIntPtrArgs}; +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + bool NeedWrapperFunction = + getDebugInfo() && + CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; + FunctionArgList Args; + llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; + FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, + CapturedStmtInfo->getHelperName()); + llvm::Function *F; + bool HasUIntPtrArgs; + std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( + *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + setAddrOfLocalVar(LocalAddrPair.second.first, + LocalAddrPair.second.second); + } + } + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); FinishFunction(CD->getBodyRBrace()); - - return F; + if (!NeedWrapperFunction || !HasUIntPtrArgs) + return F; + + FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, + /*RegisterCastedArgsOnly=*/true, + ".nondebug_wrapper."); + CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); + WrapperCGF.disableDebugInfo(); + Args.clear(); + LocalAddrs.clear(); + VLASizes.clear(); + llvm::Function *WrapperF = + emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, + WrapperCGF.CXXThisValue, WrapperFO).first; + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + llvm::SmallVector<llvm::Value *, 4> CallArgs; + for (const auto *Arg : Args) { + llvm::Value *CallArg; + auto I = LocalAddrs.find(Arg); + if (I != LocalAddrs.end()) { + LValue LV = + WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + } else { + auto EI = VLASizes.find(Arg); + if (EI != VLASizes.end()) + CallArg = EI->second.second; + else { + LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), + Arg->getType(), BaseInfo); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + } + } + CallArgs.emplace_back(CallArg); + } + WrapperCGF.Builder.CreateCall(F, CallArgs); + WrapperCGF.FinishFunction(); + return WrapperF; } //===----------------------------------------------------------------------===// @@ -404,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } -/// Check if the combiner is a call to UDR combiner and if it is so return the -/// UDR decl used for reduction. -static const OMPDeclareReductionDecl * -getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = - dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) - return DRD; - return nullptr; -} - -static void emitInitWithReductionInitializer(CodeGenFunction &CGF, - const OMPDeclareReductionDecl *DRD, - const Expr *InitOp, - Address Private, Address Original, - QualType Ty) { - if (DRD->getInitializer()) { - std::pair<llvm::Function *, llvm::Function *> Reduction = - CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast<CallExpr>(InitOp); - auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); - const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); - const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); - auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); - PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); - (void)PrivateScope.Privatize(); - RValue Func = RValue::get(Reduction.second); - CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); - CGF.EmitIgnoredExpr(InitOp); - } else { - llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); - LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); - RValue InitRVal; - switch (CGF.getEvaluationKind(Ty)) { - case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); - break; - case TEK_Complex: - InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); - break; - case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); - break; - } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); - CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), - /*IsInitializer=*/false); - } -} - -/// \brief Emit initialization of arrays of complex types. -/// \param DestAddr Address of the array. -/// \param Type Type of array. -/// \param Init Initial expression of array. -/// \param SrcAddr Address of the original array. -static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, - QualType Type, const Expr *Init, - Address SrcAddr = Address::invalid()) { - auto *DRD = getReductionInit(Init); - // Perform element-by-element initialization. - QualType ElementTy; - - // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); - if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); - - llvm::Value *SrcBegin = nullptr; - if (DRD) - SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); - // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); - // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = - CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); - CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); - CGF.EmitBlock(BodyBB); - - CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); - - llvm::PHINode *SrcElementPHI = nullptr; - Address SrcElementCurrent = Address::invalid(); - if (DRD) { - SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPHI->addIncoming(SrcBegin, EntryBB); - SrcElementCurrent = - Address(SrcElementPHI, - SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - } - llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( - DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); - DestElementPHI->addIncoming(DestBegin, EntryBB); - Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - - // Emit copy. - { - CodeGenFunction::RunCleanupsScope InitScope(CGF); - if (DRD && (DRD->getInitializer() || !Init)) { - emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, - SrcElementCurrent, ElementTy); - } else - CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), - /*IsInitializer=*/false); - } - - if (DRD) { - // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); - } - - // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - // Check whether we've reached the end. - auto Done = - CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); - CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); - - // Done. - CGF.EmitBlock(DoneBB, /*IsFinished=*/true); -} - void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { @@ -906,259 +901,111 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV, llvm::Value *Addr) { - Address Tmp = Address::invalid(); - Address TopTmp = Address::invalid(); - Address MostTopTmp = Address::invalid(); - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - Tmp = CGF.CreateMemTemp(BaseTy); - if (TopTmp.isValid()) - CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); - else - MostTopTmp = Tmp; - TopTmp = Tmp; - BaseTy = BaseTy->getPointeeType(); - } - llvm::Type *Ty = BaseLV.getPointer()->getType(); - if (Tmp.isValid()) - Ty = Tmp.getElementType(); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); - if (Tmp.isValid()) { - CGF.Builder.CreateStore(Addr, Tmp); - return MostTopTmp; - } - return Address(Addr, BaseLV.getAlignment()); -} - -static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV) { - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs<PointerType>()) - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs<ReferenceType>()); - } - BaseTy = BaseTy->getPointeeType(); - } - return CGF.MakeAddrLValue( - Address( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), - BaseLV.getAlignment()), - BaseLV.getType(), BaseLV.getAlignmentSource()); -} - void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); auto IRed = C->reduction_ops().begin(); - for (auto IRef : C->varlists()) { - auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); - auto *DRD = getReductionInit(*IRed); - if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) - Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto OASELValueLB = EmitOMPArraySectionExpr(OASE); - auto OASELValueUB = - EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = - loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), - OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { - return OASELValueLB.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, - OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit VarDecl with copy init for arrays. - // Get the address of the original variable captured in current - // captured region. - auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), - OASELValueLB.getPointer()); - Size = Builder.CreateNUWAdd( - Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get(Size)); - EmitVariablyModifiedType(PrivateVD->getType()); - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, - OASELValueLB.getAddress()); - EmitAutoVarCleanups(Emission); - // Emit private VarDecl with reduction init. - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - OASELValueLB.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), - OASELValueLB.getType(), OriginalBaseLValue, - Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto ASELValue = EmitLValue(ASE); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = loadToBegin( - *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { - return ASELValue.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, - OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - ASELValue.getAddress(), - ASELValue.getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - ASELValue.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), ASELValue.getType(), - OriginalBaseLValue, Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), - "rhs.begin"); - }); - } else { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); - QualType Type = PrivateVD->getType(); - if (getContext().getAsArrayType(Type)) { - // Store the address of the original variable associated with the LHS - // implicit variable. - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - Address OriginalAddr = EmitLValue(&DRE).getAddress(); - PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, - LHSVD]() -> Address { - OriginalAddr = Builder.CreateElementBitCast( - OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); - return OriginalAddr; - }); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - if (Type->isVariablyModifiedType()) { - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get( - getTypeSize(OrigVD->getType().getNonReferenceType()))); - EmitVariablyModifiedType(Type); - } - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, OriginalAddr); - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); - }); - } else { - // Store the address of the original variable associated with the LHS - // implicit variable. - Address OriginalAddr = Address::invalid(); - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, - &OriginalAddr]() -> Address { - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - OriginalAddr = EmitLValue(&DRE).getAddress(); - return OriginalAddr; - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - OriginalAddr, - PrivateVD->getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - return Addr; - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Shareds.emplace_back(Ref); + Privates.emplace_back(*IPriv); + ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto ILHS = LHSs.begin(); + auto IRHS = RHSs.begin(); + auto IPriv = Privates.begin(); + for (const auto *IRef : Shareds) { + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + // Emit private VarDecl with reduction init. + RedCG.emitSharedLValue(*this, Count); + RedCG.emitAggregateType(*this, Count); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), + RedCG.getSharedLValue(Count), + [&Emission](CodeGenFunction &CGF) { + CGF.EmitAutoVarInit(Emission); + return true; + }); + EmitAutoVarCleanups(Emission); + Address BaseAddr = RedCG.adjustPrivateAddress( + *this, Count, Emission.getAllocatedAddress()); + bool IsRegistered = PrivateScope.addPrivate( + RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + if (isa<OMPArraySectionExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if (isa<ArraySubscriptExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); + }); + } else { + QualType Type = PrivateVD->getType(); + bool IsArray = getContext().getAsArrayType(Type) != nullptr; + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + // Store the address of the original variable associated with the LHS + // implicit variable. + if (IsArray) { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); } - ++ILHS; - ++IRHS; - ++IPriv; - ++IRed; + PrivateScope.addPrivate( + LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { + return IsArray + ? Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + : GetAddrOfLocalVar(PrivateVD); + }); } + ++ILHS; + ++IRHS; + ++IPriv; + ++Count; } } void CodeGenFunction::EmitOMPReductionClauseFinal( - const OMPExecutableDirective &D) { + const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { if (!HaveInsertPoint()) return; llvm::SmallVector<const Expr *, 8> Privates; @@ -1174,14 +1021,15 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); } if (HasAtLeastOneReduction) { + bool WithNowait = D.getSingleClause<OMPNowaitClause>() || + isOpenMPParallelDirective(D.getDirectiveKind()) || + D.getDirectiveKind() == OMPD_simd; + bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, - D.getSingleClause<OMPNowaitClause>() || - isOpenMPParallelDirective(D.getDirectiveKind()) || - D.getDirectiveKind() == OMPD_simd, - D.getDirectiveKind() == OMPD_simd); + {WithNowait, SimpleReduction, ReductionKind}); } } @@ -1210,14 +1058,23 @@ static void emitPostUpdateForReductionClause( CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &S, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) { - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). - emitParallelOrTeamsOutlinedFunction(S, - *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); +namespace { +/// Codegen lambda for appending distribute lower and upper bounds to outlined +/// parallel function. This is necessary for combined constructs such as +/// 'distribute parallel for' +typedef llvm::function_ref<void(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &)> + CodeGenBoundParametersTy; +} // anonymous namespace + +static void emitCommonOMPParallelDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const CodeGenBoundParametersTy &CodeGenBoundParameters) { + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), @@ -1239,13 +1096,22 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, } } - OMPLexicalScope Scope(CGF, S); + OMPParallelScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk + // lower and upper bounds with the pragma 'for' chunking mechanism. + // The following lambda takes care of appending the lower and upper bound + // parameters when necessary + CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } +static void emitEmptyBoundParameters(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &) {} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -1264,9 +1130,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1343,12 +1210,14 @@ void CodeGenFunction::EmitOMPInnerLoop( EmitBlock(LoopExit.getBlock()); } -void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { +bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { if (!HaveInsertPoint()) - return; + return false; // Emit inits for the linear variables. + bool HasLinears = false; for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { for (auto *Init : C->inits()) { + HasLinears = true; auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { AutoVarEmission Emission = EmitAutoVarAlloca(*VD); @@ -1373,6 +1242,7 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { EmitIgnoredExpr(CS); } } + return HasLinears; } void CodeGenFunction::EmitOMPLinearClauseFinal( @@ -1611,6 +1481,13 @@ void CodeGenFunction::EmitOMPSimdFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } +static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); +} + void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); @@ -1655,7 +1532,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGF.EmitOMPSimdInit(S); emitAlignedClause(CGF, S); - CGF.EmitOMPLinearClauseInit(S); + (void)CGF.EmitOMPLinearClauseInit(S); { OMPPrivateScope LoopScope(CGF); CGF.EmitOMPPrivateLoopCounters(S, LoopScope); @@ -1677,7 +1554,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { // Emit final copy of the lastprivate variables at the end of loops. if (HasLastprivateClause) CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1693,9 +1570,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { +void CodeGenFunction::EmitOMPOuterLoop( + bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, + CodeGenFunction::OMPPrivateScope &LoopScope, + const CodeGenFunction::OMPLoopArguments &LoopArgs, + const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, + const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { auto &RT = CGM.getOpenMPRuntime(); const Expr *IVExpr = S.getIterationVariable(); @@ -1713,15 +1593,18 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { - // UB = min(UB, GlobalUB) - EmitIgnoredExpr(S.getEnsureUpperBound()); + // UB = min(UB, GlobalUB) or + // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. + // 'distribute parallel for') + EmitIgnoredExpr(LoopArgs.EUB); // IV = LB - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // IV < UB - BoolCondVal = EvaluateExprAsBool(S.getCond()); + BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); } else { - BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, - LB, UB, ST); + BoolCondVal = + RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); } // If there are any cleanups between here and the loop-exit scope, @@ -1741,7 +1624,7 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). if (DynamicOrOrdered) - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); @@ -1755,24 +1638,27 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { - if (Ordered) { - CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( - CGF, Loc, IVSize, IVSigned); - } - }); + + // when 'distribute' is not combined with a 'for': + // while (idx <= UB) { BODY; ++idx; } + // when 'distribute' is combined with a 'for' + // (e.g. 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { + CodeGenOrdered(CGF, Loc, IVSize, IVSigned); + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". - EmitIgnoredExpr(S.getNextLowerBound()); - EmitIgnoredExpr(S.getNextUpperBound()); + EmitIgnoredExpr(LoopArgs.NextLB); + EmitIgnoredExpr(LoopArgs.NextUB); } EmitBranch(CondBlock); @@ -1791,7 +1677,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, void CodeGenFunction::EmitOMPForOuterLoop( const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1800,7 +1687,7 @@ void CodeGenFunction::EmitOMPForOuterLoop( assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, - /*Chunked=*/Chunk != nullptr)) && + LoopArgs.Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -1858,22 +1745,46 @@ void CodeGenFunction::EmitOMPForOuterLoop( const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { - llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); + auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); + llvm::Value *LBVal = DispatchBounds.first; + llvm::Value *UBVal = DispatchBounds.second; + CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, + LoopArgs.Chunk}; RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, Ordered, UBVal, Chunk); + IVSigned, Ordered, DipatchRTInputValues); } else { RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, IL, LB, UB, ST, Chunk); + Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, + LoopArgs.ST, LoopArgs.Chunk); } - EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, - ST, IL, Chunk); + auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, + const unsigned IVSize, + const bool IVSigned) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, + IVSigned); + } + }; + + OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); + OuterLoopArgs.IncExpr = S.getInc(); + OuterLoopArgs.Init = S.getInit(); + OuterLoopArgs.Cond = S.getCond(); + OuterLoopArgs.NextLB = S.getNextLowerBound(); + OuterLoopArgs.NextUB = S.getNextUpperBound(); + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, + emitOMPLoopBodyWithStopPoint, CodeGenOrdered); } +static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, + const unsigned IVSize, const bool IVSigned) {} + void CodeGenFunction::EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, - const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent) { auto &RT = CGM.getOpenMPRuntime(); @@ -1886,26 +1797,159 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, /* Ordered = */ false, - IL, LB, UB, ST, Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, + IVSigned, /* Ordered = */ false, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.Chunk); + + // for combined 'distribute' and 'for' the increment expression of distribute + // is store in DistInc. For 'distribute' alone, it is in Inc. + Expr *IncExpr; + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) + IncExpr = S.getDistInc(); + else + IncExpr = S.getInc(); + + // this routine is shared by 'omp distribute parallel for' and + // 'omp distribute': select the right EUB expression depending on the + // directive + OMPLoopArguments OuterLoopArgs; + OuterLoopArgs.LB = LoopArgs.LB; + OuterLoopArgs.UB = LoopArgs.UB; + OuterLoopArgs.ST = LoopArgs.ST; + OuterLoopArgs.IL = LoopArgs.IL; + OuterLoopArgs.Chunk = LoopArgs.Chunk; + OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound(); + OuterLoopArgs.IncExpr = IncExpr; + OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit(); + OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextLowerBound() + : S.getNextLowerBound(); + OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextUpperBound() + : S.getNextUpperBound(); + + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, + LoopScope, OuterLoopArgs, CodeGenLoopContent, + emitEmptyOrdered); +} + +/// Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast<VarDecl>(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + +static std::pair<LValue, LValue> +emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + + // When composing 'distribute' with 'for' (e.g. as in 'distribute + // parallel for') we need to use the 'distribute' + // chunk lower and upper bounds rather than the whole loop iteration + // space. These are parameters to the outlined function for 'parallel' + // and we copy the bounds of the previous schedule into the + // the current ones. + LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); + LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); + llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); + PrevLBVal = CGF.EmitScalarConversion( + PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); + PrevUBVal = CGF.EmitScalarConversion( + PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + + CGF.EmitStoreOfScalar(PrevLBVal, LB); + CGF.EmitStoreOfScalar(PrevUBVal, UB); + + return {LB, UB}; +} + +/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then +/// we need to use the LB and UB expressions generated by the worksharing +/// code generation support, whereas in non combined situations we would +/// just emit 0 and the LastIteration expression +/// This function is necessary due to the difference of the LB and UB +/// types for the RT emission routines for 'for_static_init' and +/// 'for_dispatch_init' +static std::pair<llvm::Value *, llvm::Value *> +emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + // when implementing a dynamic schedule for a 'for' combined with a + // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop + // is not normalized as each team only executes its own assigned + // distribute chunk + QualType IteratorTy = IVExpr->getType(); + llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + return {LBVal, UBVal}; +} + +static void emitDistributeParallelForDistributeInnerBoundParams( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { + const auto &Dir = cast<OMPLoopDirective>(S); + LValue LB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); + auto LBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(LBCast); + LValue UB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); + + auto UBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(UBCast); +} + +static void +emitInnerParallelForWhenCombined(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), + emitDistributeParallelForInnerBounds, + emitDistributeParallelForDispatchBounds); + }; - EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, - S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); + emitCommonOMPParallelDirective( + CGF, S, OMPD_for, CGInlinedWorksharingLoop, + emitDistributeParallelForDistributeInnerBoundParams); } void CodeGenFunction::EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_parallel_for, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, + /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, + /*HasCancel=*/false); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( @@ -2003,15 +2047,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( }); } -void CodeGenFunction::EmitOMPTargetTeamsDirective( - const OMPTargetTeamsDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( const OMPTargetTeamsDistributeDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective( @@ -2052,14 +2087,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( }); } -/// \brief Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast<VarDecl>(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2072,7 +2099,10 @@ namespace { }; } // namespace -bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { +bool CodeGenFunction::EmitOMPWorksharingLoop( + const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -2120,12 +2150,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { llvm::DenseSet<const Expr *> EmittedFinals; emitAlignedClause(*this, S); - EmitOMPLinearClauseInit(S); + bool HasLinears = EmitOMPLinearClauseInit(S); // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + + std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); + LValue LB = Bounds.first; + LValue UB = Bounds.second; LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -2134,7 +2164,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit 'then' code. { OMPPrivateScope LoopScope(*this); - if (EmitOMPFirstprivateClause(S, LoopScope)) { + if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables and post-update of // lastprivate variables. @@ -2211,9 +2241,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. + const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), + ST.getAddress(), IL.getAddress(), + Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + LoopArguments, CGDispatchBounds); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, @@ -2222,7 +2254,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { CGF.EmitLoadOfScalar(IL, S.getLocStart())); }); } - EmitOMPReductionClauseFinal(S); + EmitOMPReductionClauseFinal( + S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) + ? /*Parallel and Simd*/ OMPD_parallel_for_simd + : /*Parallel only*/ OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { @@ -2248,12 +2283,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { return HasLastprivateClause; } +/// The following two functions generate expressions for the loop lower +/// and upper bounds in case of static and dynamic (dispatch) schedule +/// of the associated 'for' or 'distribute' loop. +static std::pair<LValue, LValue> +emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + return {LB, UB}; +} + +/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not +/// consider the lower and upper bound expressions generated by the +/// worksharing loop support, but we use 0 and the iteration space size as +/// constants +static std::pair<llvm::Value *, llvm::Value *> +emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); + llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); + llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); + return {LBVal, UBVal}; +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2271,7 +2336,9 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2320,8 +2387,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getLocStart(), - /*fpContractable=*/false); + OK_Ordinary, S.getLocStart(), FPOptions()); // Increment for loop counter. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, S.getLocStart()); @@ -2397,7 +2463,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); }; CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { @@ -2523,9 +2589,11 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -2533,9 +2601,11 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -2545,7 +2615,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -2629,11 +2700,32 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, ++ID; } } + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, S.getLocStart(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); @@ -2688,6 +2780,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } + if (Data.Reductions) { + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); + for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = + Address(CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType( + Data.ReductionCopies[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + Scope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } (void)Scope.Privatize(); Action.Enter(CGF); @@ -2763,7 +2883,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } -void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { +void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, + Expr *IncExpr) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -2804,10 +2926,17 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { // Emit 'then' code. { // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + + LValue LB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedLowerBoundVariable() + : S.getLowerBoundVariable()))); + LValue UB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedUpperBoundVariable() + : S.getUpperBoundVariable()))); LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -2859,15 +2988,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound()); // IV = LB; - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit()); + + Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + + // for distribute alone, codegen // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); + // when combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); }, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); @@ -2876,9 +3015,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + const OMPLoopArguments LoopArguments = { + LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), + Chunk}; + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, + CodeGenLoop); } // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -2900,7 +3041,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S); + + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, @@ -3250,7 +3392,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, NewVValType = XRValExpr->getType(); auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, - IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { + IsPostfixUpdate](RValue XRValue) -> RValue { CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); RValue Res = CGF.EmitAnyExpr(UE); @@ -3277,7 +3419,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, NewVValType = X->getType().getNonReferenceType(); ExprRValue = convertToType(CGF, ExprRValue, E->getType(), X->getType().getNonReferenceType(), Loc); - auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { + auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { NewVVal = XRValue; return ExprRValue; }; @@ -3327,6 +3469,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_firstprivate: case OMPC_lastprivate: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: @@ -3404,41 +3547,24 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } -std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> -CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( - CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, - bool IsOffloadEntry) { - llvm::Function *OutlinedFn = nullptr; - llvm::Constant *OutlinedFnID = nullptr; - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); - CGF.EmitOMPPrivateClause(S, PrivateScope); - (void)PrivateScope.Privatize(); - - Action.Enter(CGF); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }; - // Emit target region as a standalone region. - CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - return std::make_pair(OutlinedFn, OutlinedFnID); -} - -void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { +static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen) { + assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); + CodeGenModule &CGM = CGF.CGM; const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - GenerateOpenMPCapturedVars(CS, CapturedVars); - llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; - // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - - if (auto *C = S.getSingleClause<OMPIfClause>()) { - IfCond = C->getCondition(); + // Check for the at most one if clause associated with the target region. + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_target) { + IfCond = C->getCondition(); + break; + } } // Check if we have any device clause associated with the directive. @@ -3453,43 +3579,76 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { bool IsOffloadEntry = true; if (IfCond) { bool Val; - if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) + if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) IsOffloadEntry = false; } if (CGM.getLangOpts().OMPTargetTriples.empty()) IsOffloadEntry = false; - assert(CurFuncDecl && "No parent declaration for target region!"); + assert(CGF.CurFuncDecl && "No parent declaration for target region!"); StringRef ParentName; // In case we have Ctors/Dtors we use the complete type variant to produce // the mangling of the device outlined kernel. - if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) + if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); - else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) + else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); else ParentName = - CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); + CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); - std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( - CGM, S, ParentName, IsOffloadEntry); - OMPLexicalScope Scope(*this, S); - CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, + IsOffloadEntry, CodeGen); + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, CapturedVars); } +static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, + PrePostActionTy &Action) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + + Action.Enter(CGF); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); +} + +void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). - emitParallelOrTeamsOutlinedFunction(S, - *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); - const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); - const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); - const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); + const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); + const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); if (NT || TL) { Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; @@ -3498,7 +3657,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, S.getLocStart()); } - OMPLexicalScope Scope(CGF, S); + OMPTeamsScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, @@ -3511,10 +3670,47 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); + emitPostUpdateForReductionClause( + *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); +} + +static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDirective &S) { + auto *CS = S.getCapturedStmt(OMPD_teams); + Action.Enter(CGF); + auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDirective( + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPCancellationPointDirective( @@ -3740,9 +3936,48 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } +static void emitTargetParallelRegion(CodeGenFunction &CGF, + const OMPTargetParallelDirective &S, + PrePostActionTy &Action) { + // Get the captured statement associated with the 'parallel' region. + auto *CS = S.getCapturedStmt(OMPD_parallel); + Action.Enter(CGF); + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + void CodeGenFunction::EmitOMPTargetParallelDirective( const OMPTargetParallelDirective &S) { - // TODO: codegen for target parallel. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPTargetParallelForDirective( @@ -3884,7 +4119,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, CodeGen); }; - EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + if (Data.Nogroup) + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + else { + CGM.getOpenMPRuntime().emitTaskgroupRegion( + *this, + [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + }, + S.getLocStart()); + } } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp index 1a09830..64b6d0d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp @@ -14,7 +14,7 @@ #include "CGCXXABI.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -284,6 +284,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, if (isa<CXXDestructorDecl>(MD)) CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs); +#ifndef NDEBUG + unsigned PrefixArgs = CallArgs.size() - 1; +#endif // Add the rest of the arguments. for (const ParmVarDecl *PD : MD->parameters()) EmitDelegateCallArg(CallArgs, PD, SourceLocation()); @@ -292,7 +295,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, #ifndef NDEBUG const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall( - CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD)); + CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD), PrefixArgs); assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() && CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() && CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention()); @@ -376,12 +379,9 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, // Apply the standard set of call attributes. unsigned CallingConv; - CodeGen::AttributeListType AttributeList; - CGM.ConstructAttributeList(CalleePtr->getName(), - *CurFnInfo, MD, AttributeList, + llvm::AttributeList Attrs; + CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, Attrs, CallingConv, /*AttrOnCallSite=*/true); - llvm::AttributeSet Attrs = - llvm::AttributeSet::get(getLLVMContext(), AttributeList); Call->setAttributes(Attrs); Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); @@ -744,9 +744,10 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { switch (keyFunction->getTemplateSpecializationKind()) { case TSK_Undeclared: case TSK_ExplicitSpecialization: - assert((def || CodeGenOpts.OptimizationLevel > 0) && - "Shouldn't query vtable linkage without key function or " - "optimizations"); + assert((def || CodeGenOpts.OptimizationLevel > 0 || + CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo) && + "Shouldn't query vtable linkage without key function, " + "optimizations, or debug info"); if (!def && CodeGenOpts.OptimizationLevel > 0) return llvm::GlobalVariable::AvailableExternallyLinkage; @@ -900,6 +901,8 @@ void CodeGenModule::EmitDeferredVTables() { for (const CXXRecordDecl *RD : DeferredVTables) if (shouldEmitVTableAtEndOfTranslationUnit(*this, RD)) VTables.GenerateClassData(RD); + else if (shouldOpportunisticallyEmitVTables()) + OpportunisticVTables.push_back(RD); assert(savedSize == DeferredVTables.size() && "deferred extra vtables during vtable emission?"); @@ -942,7 +945,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { - if (!getCodeGenOpts().PrepareForLTO) + if (!getCodeGenOpts().LTOUnit) return; CharUnits PointerWidth = diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h index 53a376d..b768eb8 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h @@ -146,6 +146,25 @@ static inline AlignmentSource getFieldAlignmentSource(AlignmentSource Source) { return AlignmentSource::Decl; } +class LValueBaseInfo { + AlignmentSource AlignSource; + bool MayAlias; + +public: + explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type, + bool Alias = false) + : AlignSource(Source), MayAlias(Alias) {} + AlignmentSource getAlignmentSource() const { return AlignSource; } + void setAlignmentSource(AlignmentSource Source) { AlignSource = Source; } + bool getMayAlias() const { return MayAlias; } + void setMayAlias(bool Alias) { MayAlias = Alias; } + + void mergeForCast(const LValueBaseInfo &Info) { + setAlignmentSource(Info.getAlignmentSource()); + setMayAlias(getMayAlias() || Info.getMayAlias()); + } +}; + /// LValue - This represents an lvalue references. Because C/C++ allow /// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a /// bitrange. @@ -200,7 +219,7 @@ class LValue { // to make the default bitfield pattern all-zeroes. bool ImpreciseLifetime : 1; - unsigned AlignSource : 2; + LValueBaseInfo BaseInfo; // This flag shows if a nontemporal load/stores should be used when accessing // this lvalue. @@ -218,7 +237,7 @@ class LValue { private: void Initialize(QualType Type, Qualifiers Quals, - CharUnits Alignment, AlignmentSource AlignSource, + CharUnits Alignment, LValueBaseInfo BaseInfo, llvm::MDNode *TBAAInfo = nullptr) { assert((!Alignment.isZero() || Type->isIncompleteType()) && "initializing l-value with zero alignment!"); @@ -227,7 +246,7 @@ private: this->Alignment = Alignment.getQuantity(); assert(this->Alignment == Alignment.getQuantity() && "Alignment exceeds allowed max!"); - this->AlignSource = unsigned(AlignSource); + this->BaseInfo = BaseInfo; // Initialize Objective-C flags. this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false; @@ -316,12 +335,8 @@ public: CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); } void setAlignment(CharUnits A) { Alignment = A.getQuantity(); } - AlignmentSource getAlignmentSource() const { - return AlignmentSource(AlignSource); - } - void setAlignmentSource(AlignmentSource Source) { - AlignSource = unsigned(Source); - } + LValueBaseInfo getBaseInfo() const { return BaseInfo; } + void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; } // simple lvalue llvm::Value *getPointer() const { @@ -370,7 +385,7 @@ public: static LValue MakeAddr(Address address, QualType type, ASTContext &Context, - AlignmentSource alignSource, + LValueBaseInfo BaseInfo, llvm::MDNode *TBAAInfo = nullptr) { Qualifiers qs = type.getQualifiers(); qs.setObjCGCAttr(Context.getObjCGCAttrKind(type)); @@ -379,29 +394,29 @@ public: R.LVType = Simple; assert(address.getPointer()->getType()->isPointerTy()); R.V = address.getPointer(); - R.Initialize(type, qs, address.getAlignment(), alignSource, TBAAInfo); + R.Initialize(type, qs, address.getAlignment(), BaseInfo, TBAAInfo); return R; } static LValue MakeVectorElt(Address vecAddress, llvm::Value *Idx, - QualType type, AlignmentSource alignSource) { + QualType type, LValueBaseInfo BaseInfo) { LValue R; R.LVType = VectorElt; R.V = vecAddress.getPointer(); R.VectorIdx = Idx; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - alignSource); + BaseInfo); return R; } static LValue MakeExtVectorElt(Address vecAddress, llvm::Constant *Elts, - QualType type, AlignmentSource alignSource) { + QualType type, LValueBaseInfo BaseInfo) { LValue R; R.LVType = ExtVectorElt; R.V = vecAddress.getPointer(); R.VectorElts = Elts; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - alignSource); + BaseInfo); return R; } @@ -414,12 +429,12 @@ public: static LValue MakeBitfield(Address Addr, const CGBitFieldInfo &Info, QualType type, - AlignmentSource alignSource) { + LValueBaseInfo BaseInfo) { LValue R; R.LVType = BitField; R.V = Addr.getPointer(); R.BitFieldInfo = &Info; - R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), alignSource); + R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo); return R; } @@ -428,7 +443,7 @@ public: R.LVType = GlobalReg; R.V = Reg.getPointer(); R.Initialize(type, type.getQualifiers(), Reg.getAlignment(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl, false)); return R; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp index 166f44f..0735a9c 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp @@ -64,3 +64,19 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes, info, {}, args); } + +llvm::FunctionType * +CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) { + assert(FD != nullptr && "Expected a non-null function declaration!"); + llvm::Type *T = CGM.getTypes().ConvertFunctionType(FD->getType(), FD); + + if (auto FT = dyn_cast<llvm::FunctionType>(T)) + return FT; + + return nullptr; +} + +llvm::Type * +CodeGen::convertTypeForMemory(CodeGenModule &CGM, QualType T) { + return CGM.getTypes().ConvertTypeForMem(T); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp index 5f74141..4f03de5 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp @@ -7,7 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "clang/CodeGen/CodeGenAction.h" +#include "CodeGenModule.h" #include "CoverageMappingGen.h" +#include "MacroPPCallbacks.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" @@ -16,15 +19,16 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" -#include "clang/CodeGen/CodeGenAction.h" #include "clang/CodeGen/ModuleBuilder.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" @@ -35,12 +39,16 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Transforms/IPO/Internalize.h" + #include <memory> using namespace clang; using namespace llvm; namespace clang { class BackendConsumer : public ASTConsumer { + using LinkModule = CodeGenAction::LinkModule; + virtual void anchor(); DiagnosticsEngine &Diags; BackendAction Action; @@ -61,43 +69,39 @@ namespace clang { std::unique_ptr<CodeGenerator> Gen; - SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4> - LinkModules; + SmallVector<LinkModule, 4> LinkModules; // This is here so that the diagnostic printer knows the module a diagnostic // refers to. llvm::Module *CurLinkModule = nullptr; public: - BackendConsumer( - BackendAction Action, DiagnosticsEngine &Diags, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, const LangOptions &LangOpts, - bool TimePasses, const std::string &InFile, - const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules, - std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr) + BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, bool TimePasses, + const std::string &InFile, + SmallVector<LinkModule, 4> LinkModules, + std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C, + CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(std::move(OS)), Context(nullptr), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts, - CodeGenOpts, C, CoverageInfo)) { + CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)) { llvm::TimePassesIsEnabled = TimePasses; - for (auto &I : LinkModules) - this->LinkModules.push_back( - std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second))); } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr<llvm::Module> takeModule() { return std::unique_ptr<llvm::Module>(Gen->ReleaseModule()); } - void releaseLinkModules() { - for (auto &I : LinkModules) - I.second.release(); - } + + CodeGenerator *getCodeGenerator() { return Gen.get(); } void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override { Gen->HandleCXXStaticMemberVarInstantiation(VD); @@ -159,6 +163,35 @@ namespace clang { HandleTopLevelDecl(D); } + // Links each entry in LinkModules into our module. Returns true on error. + bool LinkInModules() { + for (auto &LM : LinkModules) { + if (LM.PropagateAttrs) + for (Function &F : *LM.Module) + Gen->CGM().AddDefaultFnAttrs(F); + + CurLinkModule = LM.Module.get(); + + bool Err; + if (LM.Internalize) { + Err = Linker::linkModules( + *getModule(), std::move(LM.Module), LM.LinkFlags, + [](llvm::Module &M, const llvm::StringSet<> &GVS) { + internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { + return !GV.hasName() || (GVS.count(GV.getName()) == 0); + }); + }); + } else { + Err = Linker::linkModules(*getModule(), std::move(LM.Module), + LM.LinkFlags); + } + + if (Err) + return true; + } + return false; // success + } + void HandleTranslationUnit(ASTContext &C) override { { PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); @@ -195,7 +228,10 @@ namespace clang { Ctx.getDiagnosticHandler(); void *OldDiagnosticContext = Ctx.getDiagnosticContext(); Ctx.setDiagnosticHandler(DiagnosticHandler, this); - Ctx.setDiagnosticHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); + Ctx.setDiagnosticsHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); + if (CodeGenOpts.DiagnosticsHotnessThreshold != 0) + Ctx.setDiagnosticsHotnessThreshold( + CodeGenOpts.DiagnosticsHotnessThreshold); std::unique_ptr<llvm::tool_output_file> OptRecordFile; if (!CodeGenOpts.OptRecordFile.empty()) { @@ -213,16 +249,12 @@ namespace clang { llvm::make_unique<yaml::Output>(OptRecordFile->os())); if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) - Ctx.setDiagnosticHotnessRequested(true); + Ctx.setDiagnosticsHotnessRequested(true); } - // Link LinkModule into this module if present, preserving its validity. - for (auto &I : LinkModules) { - unsigned LinkFlags = I.first; - CurLinkModule = I.second.get(); - if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags)) - return; - } + // Link each LinkModule into our module. + if (LinkInModules()) + return; EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); @@ -275,7 +307,7 @@ namespace clang { /// Get the best possible source location to represent a diagnostic that /// may have associated debug info. const FullSourceLoc - getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithDebugLocBase &D, + getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo, StringRef &Filename, unsigned &Line, unsigned &Column) const; @@ -298,9 +330,8 @@ namespace clang { /// them. void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID); - void OptimizationRemarkHandler(const llvm::OptimizationRemark &D); - void OptimizationRemarkHandler(const llvm::OptimizationRemarkMissed &D); - void OptimizationRemarkHandler(const llvm::OptimizationRemarkAnalysis &D); + void + OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D); void OptimizationRemarkHandler( const llvm::OptimizationRemarkAnalysisFPCommute &D); void OptimizationRemarkHandler( @@ -308,7 +339,7 @@ namespace clang { void OptimizationFailureHandler( const llvm::DiagnosticInfoOptimizationFailure &D); }; - + void BackendConsumer::anchor() {} } @@ -377,7 +408,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, // code. if (LocCookie.isValid()) { Diags.Report(LocCookie, DiagID).AddString(Message); - + if (D.getLoc().isValid()) { DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here); // Convert the SMDiagnostic ranges into SourceRange and attach them @@ -390,7 +421,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, } return; } - + // Otherwise, report the backend issue as occurring in the generated .s file. // If Loc is invalid, we still need to report the issue, it just gets no // location info. @@ -477,8 +508,8 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { } const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( - const llvm::DiagnosticInfoWithDebugLocBase &D, bool &BadDebugInfo, StringRef &Filename, - unsigned &Line, unsigned &Column) const { + const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo, + StringRef &Filename, unsigned &Line, unsigned &Column) const { SourceManager &SourceMgr = Context->getSourceManager(); FileManager &FileMgr = SourceMgr.getFileManager(); SourceLocation DILoc; @@ -520,9 +551,9 @@ void BackendConsumer::UnsupportedDiagHandler( StringRef Filename; unsigned Line, Column; - bool BadDebugInfo; - FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, - Line, Column); + bool BadDebugInfo = false; + FullSourceLoc Loc = + getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str(); @@ -544,8 +575,8 @@ void BackendConsumer::EmitOptimizationMessage( StringRef Filename; unsigned Line, Column; bool BadDebugInfo = false; - FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, - Line, Column); + FullSourceLoc Loc = + getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); std::string Msg; raw_string_ostream MsgStream(Msg); @@ -568,36 +599,34 @@ void BackendConsumer::EmitOptimizationMessage( } void BackendConsumer::OptimizationRemarkHandler( - const llvm::OptimizationRemark &D) { - // Optimization remarks are active only if the -Rpass flag has a regular - // expression that matches the name of the pass name in \p D. - if (CodeGenOpts.OptimizationRemarkPattern && - CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName())) - EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark); -} - -void BackendConsumer::OptimizationRemarkHandler( - const llvm::OptimizationRemarkMissed &D) { - // Missed optimization remarks are active only if the -Rpass-missed - // flag has a regular expression that matches the name of the pass - // name in \p D. - if (CodeGenOpts.OptimizationRemarkMissedPattern && - CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName())) - EmitOptimizationMessage(D, - diag::remark_fe_backend_optimization_remark_missed); -} - -void BackendConsumer::OptimizationRemarkHandler( - const llvm::OptimizationRemarkAnalysis &D) { - // Optimization analysis remarks are active if the pass name is set to - // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a - // regular expression that matches the name of the pass name in \p D. - - if (D.shouldAlwaysPrint() || - (CodeGenOpts.OptimizationRemarkAnalysisPattern && - CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) - EmitOptimizationMessage( - D, diag::remark_fe_backend_optimization_remark_analysis); + const llvm::DiagnosticInfoOptimizationBase &D) { + if (D.isPassed()) { + // Optimization remarks are active only if the -Rpass flag has a regular + // expression that matches the name of the pass name in \p D. + if (CodeGenOpts.OptimizationRemarkPattern && + CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName())) + EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark); + } else if (D.isMissed()) { + // Missed optimization remarks are active only if the -Rpass-missed + // flag has a regular expression that matches the name of the pass + // name in \p D. + if (CodeGenOpts.OptimizationRemarkMissedPattern && + CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName())) + EmitOptimizationMessage( + D, diag::remark_fe_backend_optimization_remark_missed); + } else { + assert(D.isAnalysis() && "Unknown remark type"); + + bool ShouldAlwaysPrint = false; + if (auto *ORA = dyn_cast<llvm::OptimizationRemarkAnalysis>(&D)) + ShouldAlwaysPrint = ORA->shouldAlwaysPrint(); + + if (ShouldAlwaysPrint || + (CodeGenOpts.OptimizationRemarkAnalysisPattern && + CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) + EmitOptimizationMessage( + D, diag::remark_fe_backend_optimization_remark_analysis); + } } void BackendConsumer::OptimizationRemarkHandler( @@ -680,6 +709,21 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast<OptimizationRemarkAnalysisAliasing>(DI)); return; + case llvm::DK_MachineOptimizationRemark: + // Optimization remarks are always handled completely by this + // handler. There is no generic way of emitting them. + OptimizationRemarkHandler(cast<MachineOptimizationRemark>(DI)); + return; + case llvm::DK_MachineOptimizationRemarkMissed: + // Optimization remarks are always handled completely by this + // handler. There is no generic way of emitting them. + OptimizationRemarkHandler(cast<MachineOptimizationRemarkMissed>(DI)); + return; + case llvm::DK_MachineOptimizationRemarkAnalysis: + // Optimization remarks are always handled completely by this + // handler. There is no generic way of emitting them. + OptimizationRemarkHandler(cast<MachineOptimizationRemarkAnalysis>(DI)); + return; case llvm::DK_OptimizationFailure: // Optimization failures are always handled completely by this // handler. @@ -729,10 +773,6 @@ void CodeGenAction::EndSourceFileAction() { if (!getCompilerInstance().hasASTConsumer()) return; - // Take back ownership of link modules we passed to consumer. - if (!LinkModules.empty()) - BEConsumer->releaseLinkModules(); - // Steal the module from the consumer. TheModule = BEConsumer->takeModule(); } @@ -775,13 +815,12 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { // Load bitcode modules to link with, if we need to. if (LinkModules.empty()) - for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) { - const std::string &LinkBCFile = I.second; - - auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile); + for (const CodeGenOptions::BitcodeFileToLink &F : + CI.getCodeGenOpts().LinkBitcodeFiles) { + auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); if (!BCBuf) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << BCBuf.getError().message(); + << F.Filename << BCBuf.getError().message(); LinkModules.clear(); return nullptr; } @@ -791,12 +830,13 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (!ModuleOrErr) { handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << EIB.message(); + << F.Filename << EIB.message(); }); LinkModules.clear(); return nullptr; } - addLinkModule(ModuleOrErr.get().release(), I.first); + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); } CoverageSourceInfo *CoverageInfo = nullptr; @@ -810,9 +850,20 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { std::unique_ptr<BackendConsumer> Result(new BackendConsumer( BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), - CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules, - std::move(OS), *VMContext, CoverageInfo)); + CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, + std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo)); BEConsumer = Result.get(); + + // Enable generating macro debug info only when debug info is not disabled and + // also macro debug info is enabled. + if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo && + CI.getCodeGenOpts().MacroDebugInfo) { + std::unique_ptr<PPCallbacks> Callbacks = + llvm::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(), + CI.getPreprocessor()); + CI.getPreprocessor().addPPCallbacks(std::move(Callbacks)); + } + return std::move(Result); } @@ -838,9 +889,65 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, Diags->Report(DiagID).AddString("cannot compile inline asm"); } +std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) { + CompilerInstance &CI = getCompilerInstance(); + SourceManager &SM = CI.getSourceManager(); + + // For ThinLTO backend invocations, ensure that the context + // merges types based on ODR identifiers. We also need to read + // the correct module out of a multi-module bitcode file. + if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) { + VMContext->enableDebugTypeODRUniquing(); + + auto DiagErrors = [&](Error E) -> std::unique_ptr<llvm::Module> { + unsigned DiagID = + CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(DiagID) << EIB.message(); + }); + return {}; + }; + + Expected<llvm::BitcodeModule> BMOrErr = FindThinLTOModule(MBRef); + if (!BMOrErr) + return DiagErrors(BMOrErr.takeError()); + + Expected<std::unique_ptr<llvm::Module>> MOrErr = + BMOrErr->parseModule(*VMContext); + if (!MOrErr) + return DiagErrors(MOrErr.takeError()); + return std::move(*MOrErr); + } + + llvm::SMDiagnostic Err; + if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext)) + return M; + + // Translate from the diagnostic info to the SourceManager location if + // available. + // TODO: Unify this with ConvertBackendLocation() + SourceLocation Loc; + if (Err.getLineNo() > 0) { + assert(Err.getColumnNo() >= 0); + Loc = SM.translateFileLineCol(SM.getFileEntryForID(SM.getMainFileID()), + Err.getLineNo(), Err.getColumnNo() + 1); + } + + // Strip off a leading diagnostic code if there is one. + StringRef Msg = Err.getMessage(); + if (Msg.startswith("error: ")) + Msg = Msg.substr(7); + + unsigned DiagID = + CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); + + CI.getDiagnostics().Report(Loc, DiagID) << Msg; + return {}; +} + void CodeGenAction::ExecuteAction() { // If this is an IR file, we have to treat it specially. - if (getCurrentFileKind() == IK_LLVM_IR) { + if (getCurrentFileKind().getLanguage() == InputKind::LLVM_IR) { BackendAction BA = static_cast<BackendAction>(Act); CompilerInstance &CI = getCompilerInstance(); std::unique_ptr<raw_pwrite_stream> OS = @@ -855,35 +962,10 @@ void CodeGenAction::ExecuteAction() { if (Invalid) return; - // For ThinLTO backend invocations, ensure that the context - // merges types based on ODR identifiers. - if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) - VMContext->enableDebugTypeODRUniquing(); - - llvm::SMDiagnostic Err; - TheModule = parseIR(MainFile->getMemBufferRef(), Err, *VMContext); - if (!TheModule) { - // Translate from the diagnostic info to the SourceManager location if - // available. - // TODO: Unify this with ConvertBackendLocation() - SourceLocation Loc; - if (Err.getLineNo() > 0) { - assert(Err.getColumnNo() >= 0); - Loc = SM.translateFileLineCol(SM.getFileEntryForID(FID), - Err.getLineNo(), Err.getColumnNo() + 1); - } - - // Strip off a leading diagnostic code if there is one. - StringRef Msg = Err.getMessage(); - if (Msg.startswith("error: ")) - Msg = Msg.substr(7); - - unsigned DiagID = - CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); - - CI.getDiagnostics().Report(Loc, DiagID) << Msg; + TheModule = loadModule(*MainFile); + if (!TheModule) return; - } + const TargetOptions &TargetOpts = CI.getTargetOpts(); if (TheModule->getTargetTriple() != TargetOpts.Triple) { CI.getDiagnostics().Report(SourceLocation(), diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp index e142a21..c23b25e 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp @@ -22,6 +22,7 @@ #include "CodeGenPGO.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" @@ -45,15 +46,15 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts, if (CGOpts.DisableLifetimeMarkers) return false; - // Asan uses markers for use-after-scope checks. - if (CGOpts.SanitizeAddressUseAfterScope) - return true; - // Disable lifetime markers in msan builds. // FIXME: Remove this when msan works with lifetime markers. if (LangOpts.Sanitize.has(SanitizerKind::Memory)) return false; + // Asan uses markers for use-after-scope checks. + if (CGOpts.SanitizeAddressUseAfterScope) + return true; + // For now, only in optimized builds. return CGOpts.OptimizationLevel != 0; } @@ -117,25 +118,27 @@ CodeGenFunction::~CodeGenFunction() { } CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, - AlignmentSource *Source) { - return getNaturalTypeAlignment(T->getPointeeType(), Source, + LValueBaseInfo *BaseInfo) { + return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, /*forPointee*/ true); } CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, - AlignmentSource *Source, + LValueBaseInfo *BaseInfo, bool forPointeeType) { // Honor alignment typedef attributes even on incomplete types. // We also honor them straight for C++ class types, even as pointees; // there's an expressivity gap here. if (auto TT = T->getAs<TypedefType>()) { if (auto Align = TT->getDecl()->getMaxAlignment()) { - if (Source) *Source = AlignmentSource::AttributedType; + if (BaseInfo) + *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType, false); return getContext().toCharUnitsFromBits(Align); } } - if (Source) *Source = AlignmentSource::Type; + if (BaseInfo) + *BaseInfo = LValueBaseInfo(AlignmentSource::Type, false); CharUnits Alignment; if (T->isIncompleteType()) { @@ -149,6 +152,8 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, Alignment = CGM.getClassPointerAlignment(RD); } else { Alignment = getContext().getTypeAlignInChars(T); + if (T.getQualifiers().hasUnaligned()) + Alignment = CharUnits::One(); } // Cap to the global maximum type alignment unless the alignment @@ -163,9 +168,9 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, } LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { - AlignmentSource AlignSource; - CharUnits Alignment = getNaturalTypeAlignment(T, &AlignSource); - return LValue::MakeAddr(Address(V, Alignment), T, getContext(), AlignSource, + LValueBaseInfo BaseInfo; + CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo); + return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo, CGM.getTBAAInfo(T)); } @@ -173,9 +178,9 @@ LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { /// construct an l-value with the natural pointee alignment of T. LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) { - AlignmentSource AlignSource; - CharUnits Align = getNaturalTypeAlignment(T, &AlignSource, /*pointee*/ true); - return MakeAddrLValue(Address(V, Align), T, AlignSource); + LValueBaseInfo BaseInfo; + CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, /*pointee*/ true); + return MakeAddrLValue(Address(V, Align), T, BaseInfo); } @@ -200,7 +205,8 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { llvm_unreachable("non-canonical or dependent type in IR-generation"); case Type::Auto: - llvm_unreachable("undeduced auto type in IR-generation"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("undeduced type in IR-generation"); // Various scalar types. case Type::Builtin: @@ -343,7 +349,7 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // Emit debug descriptor for function end. if (CGDebugInfo *DI = getDebugInfo()) - DI->EmitFunctionEnd(Builder); + DI->EmitFunctionEnd(Builder, CurFn); // Reset the debug location to that of the simple 'return' expression, if any // rather than that of the end of the function's scope '}'. @@ -607,11 +613,6 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, argBaseTypeNames.push_back(llvm::MDString::get(Context, baseTypeName)); - // Get argument type qualifiers: - if (ty.isConstQualified()) - typeQuals = "const"; - if (ty.isVolatileQualified()) - typeQuals += typeQuals.empty() ? "volatile" : " volatile"; if (isPipe) typeQuals = "pipe"; } @@ -660,34 +661,42 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext()); if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) { - QualType hintQTy = A->getTypeHint(); - const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>(); - bool isSignedInteger = - hintQTy->isSignedIntegerType() || - (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType()); - llvm::Metadata *attrMDArgs[] = { + QualType HintQTy = A->getTypeHint(); + const ExtVectorType *HintEltQTy = HintQTy->getAs<ExtVectorType>(); + bool IsSignedInteger = + HintQTy->isSignedIntegerType() || + (HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType()); + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(llvm::UndefValue::get( CGM.getTypes().ConvertType(A->getTypeHint()))), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::IntegerType::get(Context, 32), - llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))}; - Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs)); + llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))}; + Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs)); } if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) { - llvm::Metadata *attrMDArgs[] = { + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs)); } if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) { - llvm::Metadata *attrMDArgs[] = { + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs)); + } + + if (const OpenCLIntelReqdSubGroupSizeAttr *A = + FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) { + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, AttrMDArgs)); } } @@ -707,6 +716,11 @@ static bool endsWithReturn(const Decl* F) { return false; } +static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) { + Fn->addFnAttr("sanitize_thread_no_checking_at_run_time"); + Fn->removeFnAttr(llvm::Attribute::SanitizeThread); +} + void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, @@ -750,16 +764,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr(llvm::Attribute::SafeStack); // Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize, - // .cxx_destruct and all of their calees at run time. + // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time. if (SanOpts.has(SanitizerKind::Thread)) { if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) { IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0); if (OMD->getMethodFamily() == OMF_dealloc || OMD->getMethodFamily() == OMF_initialize || (OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) { - Fn->addFnAttr("sanitize_thread_no_checking_at_run_time"); - Fn->removeFnAttr(llvm::Attribute::SanitizeThread); + markAsIgnoreThreadCheckingAtRuntime(Fn); } + } else if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + IdentifierInfo *II = FD->getIdentifier(); + if (II && II->isStr("__destroy_helper_block_")) + markAsIgnoreThreadCheckingAtRuntime(Fn); } } @@ -770,10 +787,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr("function-instrument", "xray-always"); if (XRayAttr->neverXRayInstrument()) Fn->addFnAttr("function-instrument", "xray-never"); + if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) { + Fn->addFnAttr("xray-log-args", + llvm::utostr(LogArgs->getArgumentCount())); + } } else { - Fn->addFnAttr( - "xray-instruction-threshold", - llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + if (!CGM.imbueXRayAttrs(Fn, Loc)) + Fn->addFnAttr( + "xray-instruction-threshold", + llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); } } @@ -807,6 +829,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, } } + // If we're checking nullability, we need to know whether we can check the + // return value. Initialize the flag to 'true' and refine it in EmitParmDecl. + if (SanOpts.has(SanitizerKind::NullabilityReturn)) { + auto Nullability = FnRetTy->getNullability(getContext()); + if (Nullability && *Nullability == NullabilityKind::NonNull) { + if (!(SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && + CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>())) + RetValNullabilityPrecondition = + llvm::ConstantInt::getTrue(getLLVMContext()); + } + } + // If we're in C++ mode and the function name is "main", it is guaranteed // to be norecurse by the standard (3.6.1.3 "The function main shall not be // used within a program"). @@ -827,6 +861,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Builder.SetInsertPoint(EntryBB); + // If we're checking the return value, allocate space for a pointer to a + // precise source location of the checked return statement. + if (requiresReturnValueCheck()) { + ReturnLocation = CreateDefaultAlignTempAlloca(Int8PtrTy, "return.sloc.ptr"); + InitTempAlloca(ReturnLocation, llvm::ConstantPointerNull::get(Int8PtrTy)); + } + // Emit subprogram debug descriptor. if (CGDebugInfo *DI = getDebugInfo()) { // Reconstruct the type from the argument list so that implicit parameters, @@ -851,8 +892,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // inlining, we just add an attribute to insert a mcount call in backend. // The attribute "counting-function" is set to mcount function name which is // architecture dependent. - if (CGM.getCodeGenOpts().InstrumentForProfiling) - Fn->addFnAttr("counting-function", getTarget().getMCountName()); + if (CGM.getCodeGenOpts().InstrumentForProfiling) { + if (CGM.getCodeGenOpts().CallFEntry) + Fn->addFnAttr("fentry-call", "true"); + else { + if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) + Fn->addFnAttr("counting-function", getTarget().getMCountName()); + } + } if (RetTy->isVoidType()) { // Void type; nothing to return. @@ -935,6 +982,27 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // fast register allocator would be happier... CXXThisValue = CXXABIThisValue; } + + // Check the 'this' pointer once per function, if it's available. + if (CXXABIThisValue) { + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::ObjectSize, true); + QualType ThisTy = MD->getThisType(getContext()); + + // If this is the call operator of a lambda with no capture-default, it + // may have a static invoker function, which may call this operator with + // a null 'this' pointer. + if (isLambdaCallOperator(MD) && + cast<CXXRecordDecl>(MD->getParent())->getLambdaCaptureDefault() == + LCD_None) + SkippedChecks.set(SanitizerKind::Null, true); + + EmitTypeCheck(isa<CXXConstructorDecl>(MD) ? TCK_ConstructorCall + : TCK_MemberCall, + Loc, CXXABIThisValue, ThisTy, + getContext().getTypeAlignInChars(ThisTy->getPointeeType()), + SkippedChecks); + } } // If any of the arguments have a variably modified type, make sure to @@ -1036,10 +1104,9 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, if (!Param->hasAttr<PassObjectSizeAttr>()) continue; - IdentifierInfo *NoID = nullptr; auto *Implicit = ImplicitParamDecl::Create( - getContext(), Param->getDeclContext(), Param->getLocation(), NoID, - getContext().getSizeType()); + getContext(), Param->getDeclContext(), Param->getLocation(), + /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamDecl::Other); SizeArguments[Param] = Implicit; Args.push_back(Implicit); } @@ -1076,8 +1143,13 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, if (FD->hasAttr<NoDebugAttr>()) DebugInfo = nullptr; // disable debug info indefinitely for this function + // The function might not have a body if we're generating thunks for a + // function declaration. SourceRange BodyRange; - if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange(); + if (Stmt *Body = FD->getBody()) + BodyRange = Body->getSourceRange(); + else + BodyRange = FD->getLocation(); CurEHLocation = BodyRange.getEnd(); // Use the location of the start of the function to determine where @@ -1891,6 +1963,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::Typedef: case Type::Decltype: case Type::Auto: + case Type::DeducedTemplateSpecialization: // Stop walking: nothing to do. return; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h index 5861340..6a1fa48 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h @@ -115,9 +115,12 @@ enum TypeEvaluationKind { SANITIZER_CHECK(MissingReturn, missing_return, 0) \ SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \ SANITIZER_CHECK(NegateOverflow, negate_overflow, 0) \ + SANITIZER_CHECK(NullabilityArg, nullability_arg, 0) \ + SANITIZER_CHECK(NullabilityReturn, nullability_return, 1) \ SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \ - SANITIZER_CHECK(NonnullReturn, nonnull_return, 0) \ + SANITIZER_CHECK(NonnullReturn, nonnull_return, 1) \ SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \ + SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0) \ SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \ SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \ SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \ @@ -173,6 +176,25 @@ public: // because of jumps. VarBypassDetector Bypasses; + // CodeGen lambda for loops and support for ordered clause + typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &, + JumpDest)> + CodeGenLoopTy; + typedef llvm::function_ref<void(CodeGenFunction &, SourceLocation, + const unsigned, const bool)> + CodeGenOrderedTy; + + // Codegen lambda for loop bounds in worksharing loop constructs + typedef llvm::function_ref<std::pair<LValue, LValue>( + CodeGenFunction &, const OMPExecutableDirective &S)> + CodeGenLoopBoundsTy; + + // Codegen lambda for loop bounds in dispatch-based loop implementation + typedef llvm::function_ref<std::pair<llvm::Value *, llvm::Value *>( + CodeGenFunction &, const OMPExecutableDirective &S, Address LB, + Address UB)> + CodeGenDispatchBoundsTy; + /// \brief CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, @@ -212,6 +234,13 @@ public: /// value. This is invalid iff the function has no return value. Address ReturnValue; + /// Return true if a label was seen in the current scope. + bool hasLabelBeenSeenInCurrentScope() const { + if (CurLexicalScope) + return CurLexicalScope->hasLabels(); + return !LabelMap.empty(); + } + /// AllocaInsertPoint - This is an instruction in the entry block before which /// we prefer to insert allocas. llvm::AssertingVH<llvm::Instruction> AllocaInsertPt; @@ -298,6 +327,31 @@ public: ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; + /// An abstract representation of regular/ObjC call/message targets. + class AbstractCallee { + /// The function declaration of the callee. + const Decl *CalleeDecl; + + public: + AbstractCallee() : CalleeDecl(nullptr) {} + AbstractCallee(const FunctionDecl *FD) : CalleeDecl(FD) {} + AbstractCallee(const ObjCMethodDecl *OMD) : CalleeDecl(OMD) {} + bool hasFunctionDecl() const { + return dyn_cast_or_null<FunctionDecl>(CalleeDecl); + } + const Decl *getDecl() const { return CalleeDecl; } + unsigned getNumParams() const { + if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl)) + return FD->getNumParams(); + return cast<ObjCMethodDecl>(CalleeDecl)->param_size(); + } + const ParmVarDecl *getParamDecl(unsigned I) const { + if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl)) + return FD->getParamDecl(I); + return *(cast<ObjCMethodDecl>(CalleeDecl)->param_begin() + I); + } + }; + /// \brief Sanitizers enabled for this function. SanitizerSet SanOpts; @@ -548,14 +602,10 @@ public: CGF.DidCallStackSave = false; } - /// \brief Exit this cleanup scope, emitting any accumulated - /// cleanups. + /// \brief Exit this cleanup scope, emitting any accumulated cleanups. ~RunCleanupsScope() { - if (PerformCleanup) { - CGF.DidCallStackSave = OldDidCallStackSave; - CGF.PopCleanupBlocks(CleanupStackDepth, - LifetimeExtendedCleanupStackSize); - } + if (PerformCleanup) + ForceCleanup(); } /// \brief Determine whether this scope requires any cleanups. @@ -565,11 +615,15 @@ public: /// \brief Force the emission of cleanups now, instead of waiting /// until this object is destroyed. - void ForceCleanup() { + /// \param ValuesToReload - A list of values that need to be available at + /// the insertion point after cleanup emission. If cleanup emission created + /// a shared cleanup block, these value pointers will be rewritten. + /// Otherwise, they not will be modified. + void ForceCleanup(std::initializer_list<llvm::Value**> ValuesToReload = {}) { assert(PerformCleanup && "Already forced cleanup"); CGF.DidCallStackSave = OldDidCallStackSave; - CGF.PopCleanupBlocks(CleanupStackDepth, - LifetimeExtendedCleanupStackSize); + CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize, + ValuesToReload); PerformCleanup = false; } }; @@ -620,6 +674,10 @@ public: rescopeLabels(); } + bool hasLabels() const { + return !Labels.empty(); + } + void rescopeLabels(); }; @@ -727,13 +785,17 @@ public: /// \brief Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. - void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize); + void + PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, + std::initializer_list<llvm::Value **> ValuesToReload = {}); /// \brief Takes the old cleanup stack size and emits the cleanup blocks /// that have been added, then adds all lifetime-extended cleanups from /// the given position to the stack. - void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, - size_t OldLifetimeExtendedStackSize); + void + PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, + size_t OldLifetimeExtendedStackSize, + std::initializer_list<llvm::Value **> ValuesToReload = {}); void ResolveBranchFixups(llvm::BasicBlock *Target); @@ -1054,7 +1116,7 @@ private: auto IP = CGF.Builder.saveAndClearIP(); CGF.EmitBlock(Stack.back().ExitBlock.getBlock()); CodeGen(CGF); - CGF.EmitBranchThroughCleanup(Stack.back().ContBlock); + CGF.EmitBranch(Stack.back().ContBlock.getBlock()); CGF.Builder.restoreIP(IP); Stack.back().HasBeenEmitted = true; } @@ -1116,10 +1178,11 @@ private: uint64_t LoopCount); public: - /// Increment the profiler's counter for the given statement. - void incrementProfileCounter(const Stmt *S) { + /// Increment the profiler's counter for the given statement by \p StepV. + /// If \p StepV is null, the default increment is 1. + void incrementProfileCounter(const Stmt *S, llvm::Value *StepV = nullptr) { if (CGM.getCodeGenOpts().hasProfileClangInstr()) - PGO.emitCounterIncrement(Builder, S); + PGO.emitCounterIncrement(Builder, S, StepV); PGO.setCurrentStmt(S); } @@ -1334,6 +1397,27 @@ private: /// information about the layout of the variable. llvm::DenseMap<const ValueDecl *, BlockByrefInfo> BlockByrefInfos; + /// Used by -fsanitize=nullability-return to determine whether the return + /// value can be checked. + llvm::Value *RetValNullabilityPrecondition = nullptr; + + /// Check if -fsanitize=nullability-return instrumentation is required for + /// this function. + bool requiresReturnValueNullabilityCheck() const { + return RetValNullabilityPrecondition; + } + + /// Used to store precise source locations for return statements by the + /// runtime return value checks. + Address ReturnLocation = Address::invalid(); + + /// Check if the return value of this function requires sanitization. + bool requiresReturnValueCheck() const { + return requiresReturnValueNullabilityCheck() || + (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && + CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>()); + } + llvm::BasicBlock *TerminateLandingPad; llvm::BasicBlock *TerminateHandler; llvm::BasicBlock *TrapBB; @@ -1341,16 +1425,8 @@ private: /// True if we need emit the life-time markers. const bool ShouldEmitLifetimeMarkers; - /// Add a kernel metadata node to the named metadata node 'opencl.kernels'. - /// In the kernel metadata node, reference the kernel function and metadata - /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2): - /// - A node for the vec_type_hint(<type>) qualifier contains string - /// "vec_type_hint", an undefined value of the <type> data type, - /// and a Boolean that is true if the <type> is integer and signed. - /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string - /// "work_group_size_hint", and three 32-bit integers X, Y and Z. - /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string - /// "reqd_work_group_size", and three 32-bit integers X, Y and Z. + /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to + /// the function metadata. void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); @@ -1403,6 +1479,9 @@ public: const TargetInfo &getTarget() const { return Target; } llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); } + const TargetCodeGenInfo &getTargetHooks() const { + return CGM.getTargetCodeGenInfo(); + } //===--------------------------------------------------------------------===// // Cleanups @@ -1553,6 +1632,8 @@ public: SourceLocation Loc = SourceLocation(), SourceLocation StartLoc = SourceLocation()); + static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor); + void EmitConstructorBody(FunctionArgList &Args); void EmitDestructorBody(FunctionArgList &Args); void emitImplicitAssignmentOperatorBody(FunctionArgList &Args); @@ -1671,11 +1752,6 @@ public: llvm::Value *EmitVTableTypeCheckedLoad(const CXXRecordDecl *RD, llvm::Value *VTable, uint64_t VTableByteOffset); - /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given - /// expr can be devirtualized. - bool CanDevirtualizeMemberFunctionCall(const Expr *Base, - const CXXMethodDecl *MD); - /// EnterDtorCleanups - Enter the cleanups necessary to complete the /// given phase of destruction for a destructor. The end result /// should call destructors on members and base classes in reverse @@ -1710,6 +1786,9 @@ public: void EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc, SourceLocation EndLoc); + /// Emit a test that checks if the return value \p RV is nonnull. + void EmitReturnValueCheck(llvm::Value *RV); + /// EmitStartEHSpec - Emit the start of the exception spec. void EmitStartEHSpec(const Decl *D); @@ -1817,40 +1896,65 @@ public: //===--------------------------------------------------------------------===// LValue MakeAddrLValue(Address Addr, QualType T, - AlignmentSource AlignSource = AlignmentSource::Type) { - return LValue::MakeAddr(Addr, T, getContext(), AlignSource, + LValueBaseInfo BaseInfo = + LValueBaseInfo(AlignmentSource::Type)) { + return LValue::MakeAddr(Addr, T, getContext(), BaseInfo, CGM.getTBAAInfo(T)); } LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, - AlignmentSource AlignSource = AlignmentSource::Type) { + LValueBaseInfo BaseInfo = + LValueBaseInfo(AlignmentSource::Type)) { return LValue::MakeAddr(Address(V, Alignment), T, getContext(), - AlignSource, CGM.getTBAAInfo(T)); + BaseInfo, CGM.getTBAAInfo(T)); } LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T); LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T); CharUnits getNaturalTypeAlignment(QualType T, - AlignmentSource *Source = nullptr, + LValueBaseInfo *BaseInfo = nullptr, bool forPointeeType = false); CharUnits getNaturalPointeeTypeAlignment(QualType T, - AlignmentSource *Source = nullptr); + LValueBaseInfo *BaseInfo = nullptr); Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy, - AlignmentSource *Source = nullptr); + LValueBaseInfo *BaseInfo = nullptr); LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy); Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, - AlignmentSource *Source = nullptr); + LValueBaseInfo *BaseInfo = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); - /// CreateTempAlloca - This creates a alloca and inserts it into the entry - /// block. The caller is responsible for setting an appropriate alignment on + /// CreateTempAlloca - This creates an alloca and inserts it into the entry + /// block if \p ArraySize is nullptr, otherwise inserts it at the current + /// insertion point of the builder. The caller is responsible for setting an + /// appropriate alignment on /// the alloca. - llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, - const Twine &Name = "tmp"); + /// + /// \p ArraySize is the number of array elements to be allocated if it + /// is not nullptr. + /// + /// LangAS::Default is the address space of pointers to local variables and + /// temporaries, as exposed in the source language. In certain + /// configurations, this is not the same as the alloca address space, and a + /// cast is needed to lift the pointer from the alloca AS into + /// LangAS::Default. This can happen when the target uses a restricted + /// address space for the stack but the source language requires + /// LangAS::Default to be a generic address space. The latter condition is + /// common for most programming languages; OpenCL is an exception in that + /// LangAS::Default is the private address space, which naturally maps + /// to the stack. + /// + /// Because the address of a temporary is often exposed to the program in + /// various ways, this function will perform the cast by default. The cast + /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is + /// more efficient if the caller knows that the address will not be exposed. + llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, - const Twine &Name = "tmp"); + const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr, + bool CastToDefaultAddrSpace = true); /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. @@ -1885,9 +1989,12 @@ public: Address CreateIRTemp(QualType T, const Twine &Name = "tmp"); /// CreateMemTemp - Create a temporary memory object of the given type, with - /// appropriate alignment. - Address CreateMemTemp(QualType T, const Twine &Name = "tmp"); - Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp"); + /// appropriate alignment. Cast it to the default address space if + /// \p CastToDefaultAddrSpace is true. + Address CreateMemTemp(QualType T, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); + Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); /// CreateAggTemp - Create a temporary memory object for the given /// aggregate type. @@ -1928,7 +2035,7 @@ public: /// pointer to a char. Address EmitMSVAListRef(const Expr *E); - /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will + /// EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will /// always be accessible even if no aggregate location is provided. RValue EmitAnyExprToTemp(const Expr *E); @@ -2019,6 +2126,9 @@ public: llvm::BlockAddress *GetAddrOfLabel(const LabelDecl *L); llvm::BasicBlock *GetIndirectGotoBlock(); + /// Check if \p E is a C++ "this" pointer wrapped in value-preserving casts. + static bool IsWrappedCXXThis(const Expr *E); + /// EmitNullInitialization - Generate code to set a value of the given type to /// null, If the type contains data member pointers, they will be initialized /// to -1 in accordance with the Itanium C++ ABI. @@ -2230,7 +2340,9 @@ public: TCK_Upcast, /// Checking the operand of a cast to a virtual base object. Must be an /// object within its lifetime. - TCK_UpcastToVirtualBase + TCK_UpcastToVirtualBase, + /// Checking the value assigned to a _Nonnull pointer. Must not be null. + TCK_NonnullAssign }; /// \brief Whether any type-checking sanitizers are enabled. If \c false, @@ -2241,7 +2353,7 @@ public: /// appropriate size and alignment for an object of type \p Type. void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment = CharUnits::Zero(), - bool SkipNullCheck = false); + SanitizerSet SkippedChecks = SanitizerSet()); /// \brief Emit a check that \p Base points into an array object, which /// we can access at index \p Index. \p Accessed should be \c false if we @@ -2401,6 +2513,12 @@ public: PeepholeProtection protectFromPeepholes(RValue rvalue); void unprotectFromPeepholes(PeepholeProtection protection); + void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment, + llvm::Value *OffsetValue = nullptr) { + Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment, + OffsetValue); + } + //===--------------------------------------------------------------------===// // Statement Emission //===--------------------------------------------------------------------===// @@ -2463,6 +2581,15 @@ public: void EmitObjCAutoreleasePoolStmt(const ObjCAutoreleasePoolStmt &S); void EmitCoroutineBody(const CoroutineBodyStmt &S); + void EmitCoreturnStmt(const CoreturnStmt &S); + RValue EmitCoawaitExpr(const CoawaitExpr &E, + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); + LValue EmitCoawaitLValue(const CoawaitExpr *E); + RValue EmitCoyieldExpr(const CoyieldExpr &E, + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); + LValue EmitCoyieldLValue(const CoyieldExpr *E); RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID); void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); @@ -2627,12 +2754,16 @@ public: /// the end of the directive. /// /// \param D Directive that has at least one 'reduction' directives. - void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D); + /// \param ReductionKind The kind of reduction to perform. + void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D, + const OpenMPDirectiveKind ReductionKind); /// \brief Emit initial code for linear variables. Creates private copies /// and initializes them with the values according to OpenMP standard. /// /// \param D Directive (possibly) with the 'linear' clause. - void EmitOMPLinearClauseInit(const OMPLoopDirective &D); + /// \return true if at least one linear variable is found that should be + /// initialized with the value of the original variable, false otherwise. + bool EmitOMPLinearClauseInit(const OMPLoopDirective &D); typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/, llvm::Value * /*OutlinedFn*/, @@ -2678,7 +2809,6 @@ public: void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); - void EmitOMPDistributeLoop(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); void EmitOMPDistributeParallelForSimdDirective( @@ -2704,13 +2834,16 @@ public: void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); - /// Emit outlined function for the target directive. - static std::pair<llvm::Function * /*OutlinedFn*/, - llvm::Constant * /*OutlinedFnID*/> - EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM, - const OMPTargetDirective &S, - StringRef ParentName, - bool IsOffloadEntry); + /// Emit device code for the target directive. + static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetDirective &S); + static void + EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelDirective &S); + static void + EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. @@ -2732,32 +2865,78 @@ public: void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S, OMPPrivateScope &LoopScope); + /// Helper for the OpenMP loop directives. + void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); + + /// \brief Emit code for the worksharing loop-based directive. + /// \return true, if this construct has any lastprivate clause, false - + /// otherwise. + bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds); + private: /// Helpers for blocks llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); /// Helpers for the OpenMP loop directives. - void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal( const OMPLoopDirective &D, const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); - /// \brief Emit code for the worksharing loop-based directive. - /// \return true, if this construct has any lastprivate clause, false - - /// otherwise. - bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); - void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + + void EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); + + /// struct with the values to be passed to the OpenMP loop-related functions + struct OMPLoopArguments { + /// loop lower bound + Address LB = Address::invalid(); + /// loop upper bound + Address UB = Address::invalid(); + /// loop stride + Address ST = Address::invalid(); + /// isLastIteration argument for runtime functions + Address IL = Address::invalid(); + /// Chunk value generated by sema + llvm::Value *Chunk = nullptr; + /// EnsureUpperBound + Expr *EUB = nullptr; + /// IncrementExpression + Expr *IncExpr = nullptr; + /// Loop initialization + Expr *Init = nullptr; + /// Loop exit condition + Expr *Cond = nullptr; + /// Update of LB after a whole chunk has been executed + Expr *NextLB = nullptr; + /// Update of UB after a whole chunk has been executed + Expr *NextUB = nullptr; + OMPLoopArguments() = default; + OMPLoopArguments(Address LB, Address UB, Address ST, Address IL, + llvm::Value *Chunk = nullptr, Expr *EUB = nullptr, + Expr *IncExpr = nullptr, Expr *Init = nullptr, + Expr *Cond = nullptr, Expr *NextLB = nullptr, + Expr *NextUB = nullptr) + : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB), + IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB), + NextUB(NextUB) {} + }; + void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, + const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoop, + const CodeGenOrderedTy &CodeGenOrdered); void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, bool Ordered, Address LB, - Address UB, Address ST, Address IL, - llvm::Value *Chunk); - void EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, - const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + OMPPrivateScope &LoopScope, bool Ordered, + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds); + void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind, + const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, + const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent); /// \brief Emit code for sections directive. void EmitSections(const OMPExecutableDirective &S); @@ -2843,13 +3022,20 @@ public: /// representation to its value representation. llvm::Value *EmitFromMemory(llvm::Value *Value, QualType Ty); + /// Check if the scalar \p Value is within the valid range for the given + /// type \p Ty. + /// + /// Returns true if a check is needed (even if the range is unknown). + bool EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, + SourceLocation Loc); + /// EmitLoadOfScalar - Load a scalar value from an address, taking /// care to appropriately convert from the memory representation to /// the LLVM value representation. llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, - AlignmentSource AlignSource = - AlignmentSource::Type, + LValueBaseInfo BaseInfo = + LValueBaseInfo(AlignmentSource::Type), llvm::MDNode *TBAAInfo = nullptr, QualType TBAABaseTy = QualType(), uint64_t TBAAOffset = 0, @@ -2866,7 +3052,8 @@ public: /// the LLVM value representation. void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, - AlignmentSource AlignSource = AlignmentSource::Type, + LValueBaseInfo BaseInfo = + LValueBaseInfo(AlignmentSource::Type), llvm::MDNode *TBAAInfo = nullptr, bool isInit = false, QualType TBAABaseTy = QualType(), uint64_t TBAAOffset = 0, bool isNontemporal = false); @@ -2883,7 +3070,7 @@ public: /// rvalue, returning the rvalue. RValue EmitLoadOfLValue(LValue V, SourceLocation Loc); RValue EmitLoadOfExtVectorElementLValue(LValue V); - RValue EmitLoadOfBitfieldLValue(LValue LV); + RValue EmitLoadOfBitfieldLValue(LValue LV, SourceLocation Loc); RValue EmitLoadOfGlobalRegLValue(LValue LV); /// EmitStoreThroughLValue - Store the specified rvalue into the specified @@ -2939,7 +3126,7 @@ public: RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc); Address EmitArrayToPointerDecay(const Expr *Array, - AlignmentSource *AlignSource = nullptr); + LValueBaseInfo *BaseInfo = nullptr); class ConstantEmission { llvm::PointerIntPair<llvm::Constant*, 1, bool> ValueAndIsReference; @@ -3080,7 +3267,7 @@ public: Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base, llvm::Value *memberPtr, const MemberPointerType *memberPtrType, - AlignmentSource *AlignSource = nullptr); + LValueBaseInfo *BaseInfo = nullptr); RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); @@ -3092,8 +3279,8 @@ public: RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E, ReturnValueSlot ReturnValue); - RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E, - ReturnValueSlot ReturnValue); + RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue); RValue EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, @@ -3149,6 +3336,8 @@ private: public: llvm::Value *EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E); + llvm::Value *EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args); + llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E); llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E); llvm::Value *EmitObjCBoxedExpr(const ObjCBoxedExpr *E); @@ -3215,6 +3404,7 @@ public: static Destroyer destroyARCStrongImprecise; static Destroyer destroyARCStrongPrecise; static Destroyer destroyARCWeak; + static Destroyer emitARCIntrinsicUse; void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr); llvm::Value *EmitObjCAutoreleasePoolPush(); @@ -3316,9 +3506,10 @@ public: /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global /// variables. - void GenerateCXXGlobalDtorsFunc(llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakVH, - llvm::Constant*> > &DtorsAndObjects); + void GenerateCXXGlobalDtorsFunc( + llvm::Function *Fn, + const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> + &DtorsAndObjects); void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, @@ -3396,6 +3587,26 @@ public: void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount); + /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is + /// nonnull, if \p LHS is marked _Nonnull. + void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc); + + /// An enumeration which makes it easier to specify whether or not an + /// operation is a subtraction. + enum { NotSubtraction = false, IsSubtraction = true }; + + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to + /// detect undefined behavior when the pointer overflow sanitizer is enabled. + /// \p SignedIndices indicates whether any of the GEP indices are signed. + /// \p IsSubtraction indicates whether the expression used to form the GEP + /// is a subtraction. + llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr, + ArrayRef<llvm::Value *> IdxList, + bool SignedIndices, + bool IsSubtraction, + SourceLocation Loc, + const Twine &Name = ""); + /// \brief Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); @@ -3429,13 +3640,16 @@ public: /// "trap-func-name" if specified. llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID); + /// \brief Emit a stub for the cross-DSO CFI check function. + void EmitCfiCheckStub(); + /// \brief Emit a cross-DSO CFI failure handling function. void EmitCfiCheckFail(); /// \brief Create a check for a function parameter that may potentially be /// declared as non-null. void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, - const FunctionDecl *FD, unsigned ParmNum); + AbstractCallee AC, unsigned ParmNum); /// EmitCallArg - Emit a single call argument. void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType); @@ -3490,14 +3704,18 @@ private: /// \brief Attempts to statically evaluate the object size of E. If that /// fails, emits code to figure the size of E out for us. This is /// pass_object_size aware. + /// + /// If EmittedExpr is non-null, this will use that instead of re-emitting E. llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType); + llvm::IntegerType *ResType, + llvm::Value *EmittedE); /// \brief Emits the size of E, as required by __builtin_object_size. This /// function is aware of pass_object_size parameters, and will act accordingly /// if E is a parameter with the pass_object_size attribute. llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType); + llvm::IntegerType *ResType, + llvm::Value *EmittedE); public: #ifndef NDEBUG @@ -3533,7 +3751,7 @@ public: template <typename T> void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo, llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange, - const FunctionDecl *CalleeDecl = nullptr, + AbstractCallee AC = AbstractCallee(), unsigned ParamsToSkip = 0, EvaluationOrder Order = EvaluationOrder::Default) { SmallVector<QualType, 16> ArgTypes; @@ -3575,48 +3793,40 @@ public: for (auto *A : llvm::make_range(Arg, ArgRange.end())) ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType()); - EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order); + EmitCallArgs(Args, ArgTypes, ArgRange, AC, ParamsToSkip, Order); } void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes, llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange, - const FunctionDecl *CalleeDecl = nullptr, + AbstractCallee AC = AbstractCallee(), unsigned ParamsToSkip = 0, EvaluationOrder Order = EvaluationOrder::Default); - /// EmitPointerWithAlignment - Given an expression with a pointer - /// type, emit the value and compute our best estimate of the - /// alignment of the pointee. + /// EmitPointerWithAlignment - Given an expression with a pointer type, + /// emit the value and compute our best estimate of the alignment of the + /// pointee. /// - /// Note that this function will conservatively fall back on the type - /// when it doesn't + /// \param BaseInfo - If non-null, this will be initialized with + /// information about the source of the alignment and the may-alias + /// attribute. Note that this function will conservatively fall back on + /// the type when it doesn't recognize the expression and may-alias will + /// be set to false. /// - /// \param Source - If non-null, this will be initialized with - /// information about the source of the alignment. Note that this - /// function will conservatively fall back on the type when it - /// doesn't recognize the expression, which means that sometimes - /// - /// a worst-case One - /// reasonable way to use this information is when there's a - /// language guarantee that the pointer must be aligned to some - /// stricter value, and we're simply trying to ensure that - /// sufficiently obvious uses of under-aligned objects don't get - /// miscompiled; for example, a placement new into the address of - /// a local variable. In such a case, it's quite reasonable to - /// just ignore the returned alignment when it isn't from an - /// explicit source. + /// One reasonable way to use this information is when there's a language + /// guarantee that the pointer must be aligned to some stricter value, and + /// we're simply trying to ensure that sufficiently obvious uses of under- + /// aligned objects don't get miscompiled; for example, a placement new + /// into the address of a local variable. In such a case, it's quite + /// reasonable to just ignore the returned alignment when it isn't from an + /// explicit source. Address EmitPointerWithAlignment(const Expr *Addr, - AlignmentSource *Source = nullptr); + LValueBaseInfo *BaseInfo = nullptr); void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); private: QualType getVarArgType(const Expr *Arg); - const TargetCodeGenInfo &getTargetHooks() const { - return CGM.getTargetCodeGenInfo(); - } - void EmitDeclMetadata(); BlockByrefHelpers *buildByrefHelpers(llvm::StructType &byrefType, diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp index 3600543..5561d45 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -24,7 +24,6 @@ #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "CodeGenTBAA.h" -#include "ConstantBuilder.h" #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -42,9 +41,11 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/CodeGenOptions.h" #include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -111,6 +112,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, C.getTargetInfo().getMaxPointerWidth()); Int8PtrTy = Int8Ty->getPointerTo(0); Int8PtrPtrTy = Int8PtrTy->getPointerTo(0); + AllocaInt8PtrTy = Int8Ty->getPointerTo( + M.getDataLayout().getAllocaAddrSpace()); + ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC(); @@ -367,13 +371,18 @@ void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags, if (MainFile.empty()) MainFile = "<stdin>"; Diags.Report(diag::warn_profile_data_unprofiled) << MainFile; - } else - Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Missing - << Mismatched; + } else { + if (Mismatched > 0) + Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Mismatched; + + if (Missing > 0) + Diags.Report(diag::warn_profile_data_missing) << Visited << Missing; + } } void CodeGenModule::Release() { EmitDeferred(); + EmitVTablesOpportunistically(); applyGlobalValReplacements(); applyReplacements(); checkAliases(); @@ -392,8 +401,11 @@ void CodeGenModule::Release() { } if (OpenMPRuntime) if (llvm::Function *OpenMPRegistrationFunction = - OpenMPRuntime->emitRegistrationFunction()) - AddGlobalCtor(OpenMPRegistrationFunction, 0); + OpenMPRuntime->emitRegistrationFunction()) { + auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? + OpenMPRegistrationFunction : nullptr; + AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); + } if (PGOReader) { getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); if (PGOStats.hasDiagnostics()) @@ -406,8 +418,11 @@ void CodeGenModule::Release() { EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) CoverageMapping->emit(); - if (CodeGenOpts.SanitizeCfiCrossDso) + if (CodeGenOpts.SanitizeCfiCrossDso) { CodeGenFunction(*this).EmitCfiCheckFail(); + CodeGenFunction(*this).EmitCfiCheckStub(); + } + emitAtAvailableLinkGuard(); emitLLVMUsed(); if (SanStats) SanStats->finish(); @@ -416,6 +431,12 @@ void CodeGenModule::Release() { (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) { EmitModuleLinkOptions(); } + + // Record mregparm value now so it is visible through rest of codegen. + if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) + getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", + CodeGenOpts.NumRegisterParameters); + if (CodeGenOpts.DwarfVersion) { // We actually want the latest version when there are conflicts. // We can change from Warning to Latest if such mode is supported. @@ -449,18 +470,24 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); + // Width of wchar_t in bytes + uint64_t WCharWidth = + Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); + assert((LangOpts.ShortWChar || + llvm::TargetLibraryInfoImpl::getTargetWCharSize(Target.getTriple()) == + Target.getWCharWidth() / 8) && + "LLVM wchar_t size out of sync"); + // We need to record the widths of enums and wchar_t, so that we can generate - // the correct build attributes in the ARM backend. + // the correct build attributes in the ARM backend. wchar_size is also used by + // TargetLibraryInfo. + getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); + llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch(); if ( Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb) { - // Width of wchar_t in bytes - uint64_t WCharWidth = - Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); - getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); - // The minimum width of an enum in bytes uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4; getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); @@ -479,6 +506,26 @@ void CodeGenModule::Release() { LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0); } + // Emit OpenCL specific module metadata: OpenCL/SPIR version. + if (LangOpts.OpenCL) { + EmitOpenCLMetadata(); + // Emit SPIR version. + if (getTriple().getArch() == llvm::Triple::spir || + getTriple().getArch() == llvm::Triple::spir64) { + // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the + // opencl.spir.version named metadata. + llvm::Metadata *SPIRVerElts[] = { + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, LangOpts.OpenCLVersion / 100)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, (LangOpts.OpenCLVersion / 100 > 1) ? 0 : 2))}; + llvm::NamedMDNode *SPIRVerMD = + TheModule.getOrInsertNamedMetadata("opencl.spir.version"); + llvm::LLVMContext &Ctx = TheModule.getContext(); + SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); + } + } + if (uint32_t PLevel = Context.getLangOpts().PICLevel) { assert(PLevel < 3 && "Invalid PIC Level"); getModule().setPICLevel(static_cast<llvm::PICLevel::Level>(PLevel)); @@ -502,6 +549,20 @@ void CodeGenModule::Release() { EmitTargetMetadata(); } +void CodeGenModule::EmitOpenCLMetadata() { + // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the + // opencl.ocl.version named metadata node. + llvm::Metadata *OCLVerElts[] = { + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, LangOpts.OpenCLVersion / 100)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, (LangOpts.OpenCLVersion % 100) / 10))}; + llvm::NamedMDNode *OCLVerMD = + TheModule.getOrInsertNamedMetadata("opencl.ocl.version"); + llvm::LLVMContext &Ctx = TheModule.getContext(); + OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); +} + void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { // Make sure that this type is translated. Types.UpdateCompletedType(TD); @@ -554,12 +615,8 @@ void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, void CodeGenModule::DecorateInstructionWithInvariantGroup( llvm::Instruction *I, const CXXRecordDecl *RD) { - llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); - auto *MetaDataNode = dyn_cast<llvm::MDNode>(MD); - // Check if we have to wrap MDString in MDNode. - if (!MetaDataNode) - MetaDataNode = llvm::MDNode::get(getLLVMContext(), MD); - I->setMetadata(llvm::LLVMContext::MD_invariant_group, MetaDataNode); + I->setMetadata(llvm::LLVMContext::MD_invariant_group, + llvm::MDNode::get(getLLVMContext(), {})); } void CodeGenModule::Error(SourceLocation loc, StringRef message) { @@ -740,7 +797,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { // Get the type of a ctor entry, { i32, void ()*, i8* }. llvm::StructType *CtorStructTy = llvm::StructType::get( - Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, nullptr); + Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy); // Construct the constructor and destructor arrays. ConstantInitBuilder builder(*this); @@ -830,10 +887,9 @@ void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D, const CGFunctionInfo &Info, llvm::Function *F) { unsigned CallingConv; - AttributeListType AttributeList; - ConstructAttributeList(F->getName(), Info, D, AttributeList, CallingConv, - false); - F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList)); + llvm::AttributeList PAL; + ConstructAttributeList(F->getName(), Info, D, PAL, CallingConv, false); + F->setAttributes(PAL); F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); } @@ -882,14 +938,20 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get( - F->getContext(), - llvm::AttributeSet::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); return; } - if (D->hasAttr<OptimizeNoneAttr>()) { + // Track whether we need to add the optnone LLVM attribute, + // starting with the default for this optimization level. + bool ShouldAddOptNone = + !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; + // We can't add optnone in the following cases, it won't pass the verifier. + ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>(); + ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); + ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>(); + + if (ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. @@ -943,7 +1005,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // function. if (!D->hasAttr<OptimizeNoneAttr>()) { if (D->hasAttr<ColdAttr>()) { - B.addAttribute(llvm::Attribute::OptimizeForSize); + if (!ShouldAddOptNone) + B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } @@ -951,9 +1014,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::MinSize); } - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get( - F->getContext(), llvm::AttributeSet::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) @@ -999,9 +1060,25 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO) { SetCommonAttributes(D, GO); - if (D) + if (D) { + if (auto *GV = dyn_cast<llvm::GlobalVariable>(GO)) { + if (auto *SA = D->getAttr<PragmaClangBSSSectionAttr>()) + GV->addAttribute("bss-section", SA->getName()); + if (auto *SA = D->getAttr<PragmaClangDataSectionAttr>()) + GV->addAttribute("data-section", SA->getName()); + if (auto *SA = D->getAttr<PragmaClangRodataSectionAttr>()) + GV->addAttribute("rodata-section", SA->getName()); + } + + if (auto *F = dyn_cast<llvm::Function>(GO)) { + if (auto *SA = D->getAttr<PragmaClangTextSectionAttr>()) + if (!D->getAttr<SectionAttr>()) + F->addFnAttr("implicit-section-name", SA->getName()); + } + if (const SectionAttr *SA = D->getAttr<SectionAttr>()) GO->setSection(SA->getName()); + } getTargetCodeGenInfo().setTargetAttributes(D, GO, *this); } @@ -1021,7 +1098,7 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, const NamedDecl *ND) { // Set linkage and visibility in case we never see a definition. LinkageInfo LV = ND->getLinkageAndVisibility(); - if (LV.getLinkage() != ExternalLinkage) { + if (!isExternallyVisible(LV.getLinkage())) { // Don't set internal linkage on declarations. } else { if (ND->hasAttr<DLLImportAttr>()) { @@ -1029,7 +1106,6 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } else if (ND->hasAttr<DLLExportAttr>()) { GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) { // "extern_weak" is overloaded in LLVM; we probably should have // separate linkage types for this. @@ -1101,13 +1177,17 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, setLinkageAndVisibilityForGV(F, FD); + if (FD->getAttr<PragmaClangTextSectionAttr>()) { + F->addFnAttr("implicit-section-name"); + } + if (const SectionAttr *SA = FD->getAttr<SectionAttr>()) F->setSection(SA->getName()); if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. - F->addAttribute(llvm::AttributeSet::FunctionIndex, + F->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoBuiltin); // A sane operator new returns a non-aliasing pointer. @@ -1116,7 +1196,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, auto Kind = FD->getDeclName().getCXXOverloadedOperator(); if (getCodeGenOpts().AssumeSaneOperatorNew && (Kind == OO_New || Kind == OO_Array_New)) - F->addAttribute(llvm::AttributeSet::ReturnIndex, + F->addAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::NoAlias); } @@ -1145,7 +1225,7 @@ void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) { } static void emitUsed(CodeGenModule &CGM, StringRef Name, - std::vector<llvm::WeakVH> &List) { + std::vector<llvm::WeakTrackingVH> &List) { // Don't create llvm.used if there is no need. if (List.empty()) return; @@ -1197,7 +1277,7 @@ void CodeGenModule::AddDependentLib(StringRef Lib) { /// \brief Add link options implied by the given module, including modules /// it depends on, using a postorder walk. static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, - SmallVectorImpl<llvm::Metadata *> &Metadata, + SmallVectorImpl<llvm::MDNode *> &Metadata, llvm::SmallPtrSet<Module *, 16> &Visited) { // Import this module's parent. if (Mod->Parent && Visited.insert(Mod->Parent).second) { @@ -1285,7 +1365,7 @@ void CodeGenModule::EmitModuleLinkOptions() { // Add link options for all of the imported modules in reverse topological // order. We don't do anything to try to order import link flags with respect // to linker options inserted by things like #pragma comment(). - SmallVector<llvm::Metadata *, 16> MetadataArgs; + SmallVector<llvm::MDNode *, 16> MetadataArgs; Visited.clear(); for (Module *M : LinkModules) if (Visited.insert(M).second) @@ -1294,9 +1374,9 @@ void CodeGenModule::EmitModuleLinkOptions() { LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end()); // Add the linker options metadata flag. - getModule().addModuleFlag(llvm::Module::AppendUnique, "Linker Options", - llvm::MDNode::get(getLLVMContext(), - LinkerOptionsMetadata)); + auto *NMD = getModule().getOrInsertNamedMetadata("llvm.linker.options"); + for (auto *MD : LinkerOptionsMetadata) + NMD->addOperand(MD); } void CodeGenModule::EmitDeferred() { @@ -1319,13 +1399,10 @@ void CodeGenModule::EmitDeferred() { // Grab the list of decls to emit. If EmitGlobalDefinition schedules more // work, it will not interfere with this. - std::vector<DeferredGlobal> CurDeclsToEmit; + std::vector<GlobalDecl> CurDeclsToEmit; CurDeclsToEmit.swap(DeferredDeclsToEmit); - for (DeferredGlobal &G : CurDeclsToEmit) { - GlobalDecl D = G.GD; - G.GV = nullptr; - + for (GlobalDecl &D : CurDeclsToEmit) { // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but @@ -1364,6 +1441,24 @@ void CodeGenModule::EmitDeferred() { } } +void CodeGenModule::EmitVTablesOpportunistically() { + // Try to emit external vtables as available_externally if they have emitted + // all inlined virtual functions. It runs after EmitDeferred() and therefore + // is not allowed to create new references to things that need to be emitted + // lazily. Note that it also uses fact that we eagerly emitting RTTI. + + assert((OpportunisticVTables.empty() || shouldOpportunisticallyEmitVTables()) + && "Only emit opportunistic vtables with optimizations"); + + for (const CXXRecordDecl *RD : OpportunisticVTables) { + assert(getVTables().isVTableExternal(RD) && + "This queue should only contain external vtables"); + if (getCXXABI().canSpeculativelyEmitVTable(RD)) + VTables.GenerateClassData(RD); + } + OpportunisticVTables.clear(); +} + void CodeGenModule::EmitGlobalAnnotations() { if (Annotations.empty()) return; @@ -1482,6 +1577,34 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, return false; } +bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, + StringRef Category) const { + if (!LangOpts.XRayInstrument) + return false; + const auto &XRayFilter = getContext().getXRayFilter(); + using ImbueAttr = XRayFunctionFilter::ImbueAttribute; + auto Attr = XRayFunctionFilter::ImbueAttribute::NONE; + if (Loc.isValid()) + Attr = XRayFilter.shouldImbueLocation(Loc, Category); + if (Attr == ImbueAttr::NONE) + Attr = XRayFilter.shouldImbueFunction(Fn->getName()); + switch (Attr) { + case ImbueAttr::NONE: + return false; + case ImbueAttr::ALWAYS: + Fn->addFnAttr("function-instrument", "xray-always"); + break; + case ImbueAttr::ALWAYS_ARG1: + Fn->addFnAttr("function-instrument", "xray-always"); + Fn->addFnAttr("xray-log-args", "1"); + break; + case ImbueAttr::NEVER: + Fn->addFnAttr("function-instrument", "xray-never"); + break; + } + return true; +} + bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) @@ -1678,13 +1801,13 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } StringRef MangledName = getMangledName(GD); - if (llvm::GlobalValue *GV = GetGlobalValue(MangledName)) { + if (GetGlobalValue(MangledName) != nullptr) { // The value has already been used and should therefore be emitted. - addDeferredDeclToEmit(GV, GD); + addDeferredDeclToEmit(GD); } else if (MustBeEmitted(Global)) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); - addDeferredDeclToEmit(/*GV=*/nullptr, GD); + addDeferredDeclToEmit(GD); } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into @@ -1693,6 +1816,16 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } +// Check if T is a class type with a destructor that's not dllimport. +static bool HasNonDllImportDtor(QualType T) { + if (const auto *RT = T->getBaseElementTypeUnsafe()->getAs<RecordType>()) + if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl())) + if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>()) + return true; + + return false; +} + namespace { struct FunctionIsDirectlyRecursive : public RecursiveASTVisitor<FunctionIsDirectlyRecursive> { @@ -1726,6 +1859,7 @@ namespace { } }; + // Make sure we're not referencing non-imported vars or functions. struct DLLImportFunctionVisitor : public RecursiveASTVisitor<DLLImportFunctionVisitor> { bool SafeToInline = true; @@ -1733,12 +1867,25 @@ namespace { bool shouldVisitImplicitCode() const { return true; } bool VisitVarDecl(VarDecl *VD) { - // A thread-local variable cannot be imported. - SafeToInline = !VD->getTLSKind(); + if (VD->getTLSKind()) { + // A thread-local variable cannot be imported. + SafeToInline = false; + return SafeToInline; + } + + // A variable definition might imply a destructor call. + if (VD->isThisDeclarationADefinition()) + SafeToInline = !HasNonDllImportDtor(VD->getType()); + + return SafeToInline; + } + + bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { + if (const auto *D = E->getTemporary()->getDestructor()) + SafeToInline = D->hasAttr<DLLImportAttr>(); return SafeToInline; } - // Make sure we're not referencing non-imported vars or functions. bool VisitDeclRefExpr(DeclRefExpr *E) { ValueDecl *VD = E->getDecl(); if (isa<FunctionDecl>(VD)) @@ -1747,14 +1894,28 @@ namespace { SafeToInline = !V->hasGlobalStorage() || V->hasAttr<DLLImportAttr>(); return SafeToInline; } + bool VisitCXXConstructExpr(CXXConstructExpr *E) { SafeToInline = E->getConstructor()->hasAttr<DLLImportAttr>(); return SafeToInline; } + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { + CXXMethodDecl *M = E->getMethodDecl(); + if (!M) { + // Call through a pointer to member function. This is safe to inline. + SafeToInline = true; + } else { + SafeToInline = M->hasAttr<DLLImportAttr>(); + } + return SafeToInline; + } + bool VisitCXXDeleteExpr(CXXDeleteExpr *E) { SafeToInline = E->getOperatorDelete()->hasAttr<DLLImportAttr>(); return SafeToInline; } + bool VisitCXXNewExpr(CXXNewExpr *E) { SafeToInline = E->getOperatorNew()->hasAttr<DLLImportAttr>(); return SafeToInline; @@ -1783,16 +1944,6 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { return Walker.Result; } -// Check if T is a class type with a destructor that's not dllimport. -static bool HasNonDllImportDtor(QualType T) { - if (const RecordType *RT = dyn_cast<RecordType>(T)) - if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl())) - if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>()) - return true; - - return false; -} - bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) return true; @@ -1828,20 +1979,8 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { return !isTriviallyRecursive(F); } -/// If the type for the method's class was generated by -/// CGDebugInfo::createContextChain(), the cache contains only a -/// limited DIType without any declarations. Since EmitFunctionStart() -/// needs to find the canonical declaration for each method, we need -/// to construct the complete type prior to emitting the method. -void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) { - if (!D->isInstance()) - return; - - if (CGDebugInfo *DI = getModuleDebugInfo()) - if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { - const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext())); - DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation()); - } +bool CodeGenModule::shouldOpportunisticallyEmitVTables() { + return CodeGenOpts.OptimizationLevel > 0; } void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { @@ -1858,7 +1997,6 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { return; if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) { - CompleteDIClassType(Method); // Make sure to emit the definition(s) before we emit the thunks. // This is necessary for the generation of certain thunks. if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method)) @@ -1893,13 +2031,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. -llvm::Constant * -CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, - llvm::Type *Ty, - GlobalDecl GD, bool ForVTable, - bool DontDefer, bool IsThunk, - llvm::AttributeSet ExtraAttrs, - ForDefinition_t IsForDefinition) { +llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( + StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable, + bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs, + ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); // Lookup the entry, lazily creating it if necessary. @@ -1989,12 +2124,9 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, assert(F->getName() == MangledName && "name was uniqued!"); if (D) SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); - if (ExtraAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) { - llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeSet::FunctionIndex); - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(VMContext, - llvm::AttributeSet::FunctionIndex, - B)); + if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { + llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (!DontDefer) { @@ -2004,7 +2136,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, if (D && isa<CXXDestructorDecl>(D) && getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D), GD.getDtorType())) - addDeferredDeclToEmit(F, GD); + addDeferredDeclToEmit(GD); // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end @@ -2014,7 +2146,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, // Move the potentially referenced deferred decl to the // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). - addDeferredDeclToEmit(F, DDI->second); + addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're @@ -2034,7 +2166,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, FD = FD->getPreviousDecl()) { if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) { if (FD->doesThisDeclarationHaveABody()) { - addDeferredDeclToEmit(F, GD.getWithDecl(FD)); + addDeferredDeclToEmit(GD.getWithDecl(FD)); break; } } @@ -2069,7 +2201,7 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, StringRef MangledName = getMangledName(GD); return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, - /*IsThunk=*/false, llvm::AttributeSet(), + /*IsThunk=*/false, llvm::AttributeList(), IsForDefinition); } @@ -2115,7 +2247,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { /// type and name. llvm::Constant * CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, - llvm::AttributeSet ExtraAttrs, + llvm::AttributeList ExtraAttrs, bool Local) { llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, @@ -2143,9 +2275,8 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, /// CreateBuiltinFunction - Create a new builtin function with the specified /// type and name. llvm::Constant * -CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, - StringRef Name, - llvm::AttributeSet ExtraAttrs) { +CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name, + llvm::AttributeList ExtraAttrs) { llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs); @@ -2236,11 +2367,13 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, return llvm::ConstantExpr::getBitCast(Entry, Ty); } - unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace()); + auto AddrSpace = GetGlobalVarAddressSpace(D); + auto TargetAddrSpace = getContext().getTargetAddressSpace(AddrSpace); + auto *GV = new llvm::GlobalVariable( getModule(), Ty->getElementType(), false, llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr, - llvm::GlobalVariable::NotThreadLocal, AddrSpace); + llvm::GlobalVariable::NotThreadLocal, TargetAddrSpace); // If we already created a global with the same mangled name (but different // type) before, take its name and remove it from its parent. @@ -2263,7 +2396,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). - addDeferredDeclToEmit(GV, DDI->second); + addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); } @@ -2297,8 +2430,15 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, GV->setSection(".cp.rodata"); } - if (AddrSpace != Ty->getAddressSpace()) - return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty); + auto ExpectedAS = + D ? D->getType().getAddressSpace() + : static_cast<unsigned>(LangOpts.OpenCL ? LangAS::opencl_global + : LangAS::Default); + assert(getContext().getTargetAddressSpace(ExpectedAS) == + Ty->getPointerAddressSpace()); + if (AddrSpace != ExpectedAS) + return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace, + ExpectedAS, Ty); return GV; } @@ -2432,18 +2572,28 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { getDataLayout().getTypeStoreSizeInBits(Ty)); } -unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D, - unsigned AddrSpace) { - if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) { - if (D->hasAttr<CUDAConstantAttr>()) - AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant); - else if (D->hasAttr<CUDASharedAttr>()) - AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared); +unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { + unsigned AddrSpace; + if (LangOpts.OpenCL) { + AddrSpace = D ? D->getType().getAddressSpace() + : static_cast<unsigned>(LangAS::opencl_global); + assert(AddrSpace == LangAS::opencl_global || + AddrSpace == LangAS::opencl_constant || + AddrSpace == LangAS::opencl_local || + AddrSpace >= LangAS::FirstTargetAddressSpace); + return AddrSpace; + } + + if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { + if (D && D->hasAttr<CUDAConstantAttr>()) + return LangAS::cuda_constant; + else if (D && D->hasAttr<CUDASharedAttr>()) + return LangAS::cuda_shared; else - AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device); + return LangAS::cuda_device; } - return AddrSpace; + return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } template<typename SomeDecl> @@ -2596,10 +2746,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // "extern int x[];") and then a definition of a different type (e.g. // "int x[10];"). This also happens when an initializer has a different type // from the type of the global (this happens with unions). - if (!GV || - GV->getType()->getElementType() != InitType || + if (!GV || GV->getType()->getElementType() != InitType || GV->getType()->getAddressSpace() != - GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) { + getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) { // Move the old entry aside so that we'll create a new one. Entry->setName(StringRef()); @@ -2751,6 +2900,14 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, if (D->hasAttr<SectionAttr>()) return true; + // A variable cannot be both common and exist in a section. + // We dont try to determine which is the right section in the front-end. + // If no specialized section name is applicable, it will resort to default. + if (D->hasAttr<PragmaClangBSSSectionAttr>() || + D->hasAttr<PragmaClangDataSectionAttr>() || + D->hasAttr<PragmaClangRodataSectionAttr>()) + return true; + // Thread local vars aren't considered common linkage. if (D->getTLSKind()) return true; @@ -2803,7 +2960,7 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( // We are guaranteed to have a strong definition somewhere else, // so we can use available_externally linkage. if (Linkage == GVA_AvailableExternally) - return llvm::Function::AvailableExternallyLinkage; + return llvm::GlobalValue::AvailableExternallyLinkage; // Note that Apple's kernel linker doesn't support symbol // coalescing, so we need to avoid linkonce and weak linkages there. @@ -2897,14 +3054,8 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, continue; // Get the call site's attribute list. - SmallVector<llvm::AttributeSet, 8> newAttrs; - llvm::AttributeSet oldAttrs = callSite.getAttributes(); - - // Collect any return attributes from the call. - if (oldAttrs.hasAttributes(llvm::AttributeSet::ReturnIndex)) - newAttrs.push_back( - llvm::AttributeSet::get(newFn->getContext(), - oldAttrs.getRetAttributes())); + SmallVector<llvm::AttributeSet, 8> newArgAttrs; + llvm::AttributeList oldAttrs = callSite.getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); @@ -2914,27 +3065,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; - for (llvm::Function::arg_iterator ai = newFn->arg_begin(), - ae = newFn->arg_end(); ai != ae; ++ai, ++argNo) { - if (callSite.getArgument(argNo)->getType() != ai->getType()) { + for (llvm::Argument &A : newFn->args()) { + if (callSite.getArgument(argNo)->getType() != A.getType()) { dontTransform = true; break; } // Add any parameter attributes. - if (oldAttrs.hasAttributes(argNo + 1)) - newAttrs. - push_back(llvm:: - AttributeSet::get(newFn->getContext(), - oldAttrs.getParamAttributes(argNo + 1))); + newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo)); + argNo++; } if (dontTransform) continue; - if (oldAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) - newAttrs.push_back(llvm::AttributeSet::get(newFn->getContext(), - oldAttrs.getFnAttributes())); - // Okay, we can transform this. Create the new call instruction and copy // over the required information. newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo); @@ -2958,8 +3101,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite.getInstruction()); - newCall.setAttributes( - llvm::AttributeSet::get(newFn->getContext(), newAttrs)); + newCall.setAttributes(llvm::AttributeList::get( + newFn->getContext(), oldAttrs.getFnAttributes(), + oldAttrs.getRetAttributes(), newArgAttrs)); newCall.setCallingConv(callSite.getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. @@ -3341,6 +3485,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { llvm_unreachable("unknown file format"); case llvm::Triple::COFF: case llvm::Triple::ELF: + case llvm::Triple::Wasm: GV->setSection("cfstring"); break; case llvm::Triple::MachO: @@ -3612,20 +3757,26 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( Linkage = llvm::GlobalVariable::InternalLinkage; } } - unsigned AddrSpace = GetGlobalVarAddressSpace( - VD, getContext().getTargetAddressSpace(MaterializedType)); + unsigned AddrSpace = + VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); + auto TargetAS = getContext().getTargetAddressSpace(AddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), - /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, - AddrSpace); + /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); setGlobalVisibility(GV, VD); GV->setAlignment(Align.getQuantity()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (VD->getTLSKind()) setTLSMode(GV, *VD); - MaterializedGlobalTemporaryMap[E] = GV; - return ConstantAddress(GV, Align); + llvm::Constant *CV = GV; + if (AddrSpace != LangAS::Default) + CV = getTargetCodeGenInfo().performAddrSpaceCast( + *this, GV, AddrSpace, LangAS::Default, + Type->getPointerTo( + getContext().getTargetAddressSpace(LangAS::Default))); + MaterializedGlobalTemporaryMap[E] = CV; + return ConstantAddress(CV, Align); } /// EmitObjCPropertyImplementations - Emit information for synthesized @@ -3767,11 +3918,16 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { AddDeferredUnusedCoverageMapping(D); break; + case Decl::CXXDeductionGuide: + // Function-like, but does not result in code emission. + break; + case Decl::Var: case Decl::Decomposition: // Skip variable templates if (cast<VarDecl>(D)->getDescribedVarTemplate()) return; + LLVM_FALLTHROUGH; case Decl::VarTemplateSpecialization: EmitGlobal(cast<VarDecl>(D)); if (auto *DD = dyn_cast<DecompositionDecl>(D)) @@ -3790,6 +3946,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitDeclContext(cast<NamespaceDecl>(D)); break; case Decl::CXXRecord: + if (DebugInfo) { + if (auto *ES = D->getASTContext().getExternalSource()) + if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) + DebugInfo->completeUnusedClass(cast<CXXRecordDecl>(*D)); + } // Emit any static data members, they may be definitions. for (auto *I : cast<CXXRecordDecl>(D)->decls()) if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I)) @@ -4338,18 +4499,19 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. - ParsedAttr.first.insert(ParsedAttr.first.begin(), + ParsedAttr.Features.insert(ParsedAttr.Features.begin(), Target.getTargetOpts().FeaturesAsWritten.begin(), Target.getTargetOpts().FeaturesAsWritten.end()); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "") + TargetCPU = ParsedAttr.Architecture ; // Now populate the feature map, first with the TargetCPU which is either // the default or a new one from the target attribute string. Then we'll use // the passed in features (FeaturesAsWritten) along with the new ones from // the attribute. - Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.first); + Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, + ParsedAttr.Features); } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h index 36f6785..b162e72 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h @@ -28,6 +28,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/SanitizerBlacklist.h" +#include "clang/Basic/XRayLists.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -314,14 +315,9 @@ private: /// This is a list of deferred decls which we have seen that *are* actually /// referenced. These get code generated when the module is done. - struct DeferredGlobal { - DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {} - llvm::TrackingVH<llvm::GlobalValue> GV; - GlobalDecl GD; - }; - std::vector<DeferredGlobal> DeferredDeclsToEmit; - void addDeferredDeclToEmit(llvm::GlobalValue *GV, GlobalDecl GD) { - DeferredDeclsToEmit.emplace_back(GV, GD); + std::vector<GlobalDecl> DeferredDeclsToEmit; + void addDeferredDeclToEmit(GlobalDecl GD) { + DeferredDeclsToEmit.emplace_back(GD); } /// List of alias we have emitted. Used to make sure that what they point to @@ -345,11 +341,14 @@ private: /// A queue of (optional) vtables to consider emitting. std::vector<const CXXRecordDecl*> DeferredVTables; + /// A queue of (optional) vtables that may be emitted opportunistically. + std::vector<const CXXRecordDecl *> OpportunisticVTables; + /// List of global values which are required to be present in the object file; /// bitcast to i8*. This is used for forcing visibility of symbols which may /// otherwise be optimized out. - std::vector<llvm::WeakVH> LLVMUsed; - std::vector<llvm::WeakVH> LLVMCompilerUsed; + std::vector<llvm::WeakTrackingVH> LLVMUsed; + std::vector<llvm::WeakTrackingVH> LLVMCompilerUsed; /// Store the list of global constructors and their respective priorities to /// be emitted when the translation unit is complete. @@ -420,7 +419,7 @@ private: SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits; /// Global destructor functions and arguments that need to run on termination. - std::vector<std::pair<llvm::WeakVH,llvm::Constant*> > CXXGlobalDtors; + std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> CXXGlobalDtors; /// \brief The complete set of modules that has been imported. llvm::SetVector<clang::Module *> ImportedModules; @@ -430,14 +429,14 @@ private: llvm::SmallPtrSet<clang::Module *, 16> EmittedModuleInitializers; /// \brief A vector of metadata strings. - SmallVector<llvm::Metadata *, 16> LinkerOptionsMetadata; + SmallVector<llvm::MDNode *, 16> LinkerOptionsMetadata; /// @name Cache for Objective-C runtime types /// @{ /// Cached reference to the class for constant strings. This value has type /// int * but is actually an Obj-C class pointer. - llvm::WeakVH CFConstantStringClassRef; + llvm::WeakTrackingVH CFConstantStringClassRef; /// \brief The type used to describe the state of a fast enumeration in /// Objective-C's for..in loop. @@ -454,7 +453,7 @@ private: bool isTriviallyRecursive(const FunctionDecl *F); bool shouldEmitFunction(GlobalDecl GD); - + bool shouldOpportunisticallyEmitVTables(); /// Map used to be sure we don't emit the same CompoundLiteral twice. llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *> EmittedCompoundLiterals; @@ -546,6 +545,10 @@ public: return *ObjCData; } + // Version checking function, used to implement ObjC's @available: + // i32 @__isOSVersionAtLeast(i32, i32, i32) + llvm::Constant *IsOSVersionAtLeastFn = nullptr; + InstrProfStats &getPGOStats() { return PGOStats; } llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); } @@ -707,11 +710,15 @@ public: SourceLocation Loc = SourceLocation(), bool TLS = false); - /// Return the address space of the underlying global variable for D, as + /// Return the AST address space of the underlying global variable for D, as /// determined by its declaration. Normally this is the same as the address /// space of D's type, but in CUDA, address spaces are associated with - /// declarations, not types. - unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace); + /// declarations, not types. If D is nullptr, return the default address + /// space for global variable. + /// + /// For languages without explicit address spaces, if D has default address + /// space, target-specific global or constant address space may be returned. + unsigned GetGlobalVarAddressSpace(const VarDecl *D); /// Return the llvm::Constant for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created @@ -906,14 +913,13 @@ public: /// Create a new runtime function with the specified type and name. llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, - llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false); /// Create a new compiler builtin function with the specified type and name. - llvm::Constant *CreateBuiltinFunction(llvm::FunctionType *Ty, - StringRef Name, - llvm::AttributeSet ExtraAttrs = - llvm::AttributeSet()); + llvm::Constant * + CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name, + llvm::AttributeList ExtraAttrs = llvm::AttributeList()); /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); @@ -1016,11 +1022,31 @@ public: /// \param CalleeInfo - The callee information these attributes are being /// constructed for. If valid, the attributes applied to this decl may /// contribute to the function attributes and calling convention. - /// \param PAL [out] - On return, the attribute list to use. + /// \param Attrs [out] - On return, the attribute list to use. /// \param CallingConv [out] - On return, the LLVM calling convention to use. void ConstructAttributeList(StringRef Name, const CGFunctionInfo &Info, - CGCalleeInfo CalleeInfo, AttributeListType &PAL, - unsigned &CallingConv, bool AttrOnCallSite); + CGCalleeInfo CalleeInfo, + llvm::AttributeList &Attrs, unsigned &CallingConv, + bool AttrOnCallSite); + + /// Adds attributes to F according to our CodeGenOptions and LangOptions, as + /// though we had emitted it ourselves. We remove any attributes on F that + /// conflict with the attributes we add here. + /// + /// This is useful for adding attrs to bitcode modules that you want to link + /// with but don't control, such as CUDA's libdevice. When linking with such + /// a bitcode library, you might want to set e.g. its functions' + /// "unsafe-fp-math" attribute to match the attr of the functions you're + /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of + /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM + /// will propagate unsafe-fp-math=false up to every transitive caller of a + /// function in the bitcode library! + /// + /// With the exception of fast-math attrs, this will only make the attributes + /// on the function more conservative. But it's unsafe to call this on a + /// function which relies on particular fast-math attributes for correctness. + /// It's up to you to ensure that this is safe. + void AddDefaultFnAttrs(llvm::Function &F); // Fills in the supplied string map with the set of target features for the // passed in function. @@ -1036,13 +1062,14 @@ public: void RefreshTypeCacheForClass(const CXXRecordDecl *Class); - /// \brief Appends Opts to the "Linker Options" metadata value. + /// \brief Appends Opts to the "llvm.linker.options" metadata value. void AppendLinkerOptions(StringRef Opts); /// \brief Appends a detect mismatch command to the linker options. void AddDetectMismatch(StringRef Name, StringRef Value); - /// \brief Appends a dependent lib to the "Linker Options" metadata value. + /// \brief Appends a dependent lib to the "llvm.linker.options" metadata + /// value. void AddDependentLib(StringRef Lib); llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD); @@ -1103,6 +1130,12 @@ public: QualType Ty, StringRef Category = StringRef()) const; + /// Imbue XRay attributes to a function, applying the always/never attribute + /// lists in the process. Returns true if we did imbue attributes this way, + /// false otherwise. + bool imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, + StringRef Category = StringRef()) const; + SanitizerMetadata *getSanitizerMetadata() { return SanitizerMD.get(); } @@ -1176,7 +1209,7 @@ public: void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD); - /// \breif Get the declaration of std::terminate for the platform. + /// \brief Get the declaration of std::terminate for the platform. llvm::Constant *getTerminateFn(); llvm::SanitizerStatReport &getSanStats(); @@ -1190,12 +1223,11 @@ public: llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT); private: - llvm::Constant * - GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D, - bool ForVTable, bool DontDefer = false, - bool IsThunk = false, - llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), - ForDefinition_t IsForDefinition = NotForDefinition); + llvm::Constant *GetOrCreateLLVMFunction( + StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, + bool DontDefer = false, bool IsThunk = false, + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), + ForDefinition_t IsForDefinition = NotForDefinition); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *PTy, @@ -1222,7 +1254,6 @@ private: void EmitDeclContext(const DeclContext *DC); void EmitLinkageSpec(const LinkageSpecDecl *D); - void CompleteDIClassType(const CXXMethodDecl* D); /// \brief Emit the function that initializes C++ thread_local variables. void EmitCXXThreadLocalInitFunc(); @@ -1255,6 +1286,12 @@ private: /// Emit any needed decls for which code generation was deferred. void EmitDeferred(); + /// Try to emit external vtables as available_externally if they have emitted + /// all inlined virtual functions. It runs after EmitDeferred() and therefore + /// is not allowed to create new references to things that need to be emitted + /// lazily. + void EmitVTablesOpportunistically(); + /// Call replaceAllUsesWith on all pairs in Replacements. void applyReplacements(); @@ -1266,6 +1303,10 @@ private: /// Emit any vtables which we deferred and still have a use for. void EmitDeferredVTables(); + /// Emit a dummy function that reference a CoreFoundation symbol when + /// @available is used on Darwin. + void emitAtAvailableLinkGuard(); + /// Emit the llvm.used and llvm.compiler.used metadata. void emitLLVMUsed(); @@ -1284,6 +1325,9 @@ private: /// Emits target specific Metadata for global declarations. void EmitTargetMetadata(); + /// Emits OpenCL specific Metadata e.g. OpenCL version. + void EmitOpenCLMetadata(); + /// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and /// .gcda files in a way that persists in .bc files. void EmitCoverageFile(); @@ -1304,6 +1348,12 @@ private: /// Check whether we can use a "simpler", more core exceptions personality /// function. void SimplifyPersonality(); + + /// Helper function for ConstructAttributeList and AddDefaultFnAttrs. + /// Constructs an AttrList for a function with the given properties. + void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs); }; } // end namespace CodeGen } // end namespace clang diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp index c6c3fa4..c3d66c1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp @@ -612,11 +612,14 @@ uint64_t PGOHash::finalize() { llvm::MD5::MD5Result Result; MD5.final(Result); using namespace llvm::support; - return endian::read<uint64_t, little, unaligned>(Result); + return Result.low(); } void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { const Decl *D = GD.getDecl(); + if (!D->hasBody()) + return; + bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr(); llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); if (!InstrumentRegions && !PGOReader) @@ -626,12 +629,14 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { // Constructors and destructors may be represented by several functions in IR. // If so, instrument only base variant, others are implemented by delegation // to the base one, it would be counted twice otherwise. - if (CGM.getTarget().getCXXABI().hasConstructorVariants() && - ((isa<CXXConstructorDecl>(GD.getDecl()) && - GD.getCtorType() != Ctor_Base) || - (isa<CXXDestructorDecl>(GD.getDecl()) && - GD.getDtorType() != Dtor_Base))) { + if (CGM.getTarget().getCXXABI().hasConstructorVariants()) { + if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base) return; + + if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D)) + if (GD.getCtorType() != Ctor_Base && + CodeGenFunction::IsConstructorDelegationValid(CCD)) + return; } CGM.ClearUnusedCoverageMapping(D); setFuncName(Fn); @@ -664,7 +669,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { } bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) { - if (SkipCoverageMapping) + if (!D->getBody()) return true; // Don't map the functions in system headers. @@ -737,7 +742,8 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader, Fn->setEntryCount(FunctionCount); } -void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) { +void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, + llvm::Value *StepV) { if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap) return; if (!Builder.GetInsertBlock()) @@ -745,11 +751,18 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) { unsigned Counter = (*RegionCounterMap)[S]; auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext()); - Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment), - {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FunctionHash), - Builder.getInt32(NumRegionCounters), - Builder.getInt32(Counter)}); + + llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FunctionHash), + Builder.getInt32(NumRegionCounters), + Builder.getInt32(Counter), StepV}; + if (!StepV) + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment), + makeArrayRef(Args, 4)); + else + Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step), + makeArrayRef(Args)); } // This method either inserts a call to the profile run-time during diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h index 4f229cd..0759e65 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h @@ -40,14 +40,11 @@ private: std::unique_ptr<llvm::InstrProfRecord> ProfRecord; std::vector<uint64_t> RegionCounts; uint64_t CurrentRegionCount; - /// \brief A flag that is set to true when this function doesn't need - /// to have coverage mapping data. - bool SkipCoverageMapping; public: CodeGenPGO(CodeGenModule &CGM) - : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), - FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {} + : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0), + CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been @@ -105,7 +102,8 @@ private: void emitCounterRegionMapping(const Decl *D); public: - void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S); + void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, + llvm::Value *StepV); /// Return the region count for the counter at the given index. uint64_t getRegionCount(const Stmt *S) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp index 04224e7..8a75a55 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -139,6 +139,12 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { } } + // C++1z [basic.lval]p10: "If a program attempts to access the stored value of + // an object through a glvalue of other than one of the following types the + // behavior is undefined: [...] a char, unsigned char, or std::byte type." + if (Ty->isStdByteType()) + return MetadataCache[Ty] = getChar(); + // Handle pointers. // TODO: Implement C++'s type "similarity" and consider dis-"similar" // pointers distinct. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h index 47e26bc..450eab4 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h @@ -60,6 +60,12 @@ struct CodeGenTypeCache { llvm::PointerType *Int8PtrPtrTy; }; + /// void* in alloca address space + union { + llvm::PointerType *AllocaVoidPtrTy; + llvm::PointerType *AllocaInt8PtrTy; + }; + /// The size and alignment of the builtin C type 'int'. This comes /// up enough in various ABI lowering tasks to be worth pre-computing. union { @@ -88,6 +94,8 @@ struct CodeGenTypeCache { unsigned char SizeAlignInBytes; }; + unsigned ASTAllocaAddressSpace; + CharUnits getSizeSize() const { return CharUnits::fromQuantity(SizeSizeInBytes); } @@ -105,6 +113,8 @@ struct CodeGenTypeCache { llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; } llvm::CallingConv::ID BuiltinCC; llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; } + + unsigned getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; } }; } // end namespace CodeGen diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp index adb40c8..9306c4f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp @@ -44,6 +44,10 @@ CodeGenTypes::~CodeGenTypes() { delete &*I++; } +const CodeGenOptions &CodeGenTypes::getCodeGenOpts() const { + return CGM.getCodeGenOpts(); +} + void CodeGenTypes::addRecordTypeName(const RecordDecl *RD, llvm::StructType *Ty, StringRef suffix) { @@ -472,7 +476,6 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty); break; @@ -487,10 +490,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { break; } case Type::Auto: - llvm_unreachable("Unexpected undeduced auto type!"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Unexpected undeduced type!"); case Type::Complex: { llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType()); - ResultType = llvm::StructType::get(EltTy, EltTy, nullptr); + ResultType = llvm::StructType::get(EltTy, EltTy); break; } case Type::LValueReference: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h index 2ce6591..9d0e3de 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h @@ -178,6 +178,7 @@ public: const TargetInfo &getTarget() const { return Target; } CGCXXABI &getCXXABI() const { return TheCXXABI; } llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); } + const CodeGenOptions &getCodeGenOpts() const; /// ConvertType - Convert type T into a llvm::Type. llvm::Type *ConvertType(QualType T); @@ -303,11 +304,14 @@ public: const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args, const CXXConstructorDecl *D, CXXCtorType CtorKind, - unsigned ExtraArgs); + unsigned ExtraPrefixArgs, + unsigned ExtraSuffixArgs, + bool PassProtoArgs = true); const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args, const FunctionProtoType *type, - RequiredArgs required); + RequiredArgs required, + unsigned numPrefixArgs); const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD); const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD, CXXCtorType CT); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ConstantBuilder.h b/contrib/llvm/tools/clang/lib/CodeGen/ConstantBuilder.h deleted file mode 100644 index 40b34a9..0000000 --- a/contrib/llvm/tools/clang/lib/CodeGen/ConstantBuilder.h +++ /dev/null @@ -1,444 +0,0 @@ -//===----- ConstantBuilder.h - Builder for LLVM IR constants ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class provides a convenient interface for building complex -// global initializers. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H -#define LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Constants.h" - -#include "CodeGenModule.h" - -#include <vector> - -namespace clang { -namespace CodeGen { - -class ConstantStructBuilder; -class ConstantArrayBuilder; - -/// A convenience builder class for complex constant initializers, -/// especially for anonymous global structures used by various language -/// runtimes. -/// -/// The basic usage pattern is expected to be something like: -/// ConstantInitBuilder builder(CGM); -/// auto toplevel = builder.beginStruct(); -/// toplevel.addInt(CGM.SizeTy, widgets.size()); -/// auto widgetArray = builder.beginArray(); -/// for (auto &widget : widgets) { -/// auto widgetDesc = widgetArray.beginStruct(); -/// widgetDesc.addInt(CGM.SizeTy, widget.getPower()); -/// widgetDesc.add(CGM.GetAddrOfConstantString(widget.getName())); -/// widgetDesc.add(CGM.GetAddrOfGlobal(widget.getInitializerDecl())); -/// widgetArray.add(widgetDesc.finish()); -/// } -/// toplevel.add(widgetArray.finish()); -/// auto global = toplevel.finishAndCreateGlobal("WIDGET_LIST", Align, -/// /*constant*/ true); -class ConstantInitBuilder { - struct SelfReference { - llvm::GlobalVariable *Dummy; - llvm::SmallVector<llvm::Constant*, 4> Indices; - - SelfReference(llvm::GlobalVariable *dummy) : Dummy(dummy) {} - }; - CodeGenModule &CGM; - llvm::SmallVector<llvm::Constant*, 16> Buffer; - std::vector<SelfReference> SelfReferences; - bool Frozen = false; - -public: - explicit ConstantInitBuilder(CodeGenModule &CGM) : CGM(CGM) {} - - ~ConstantInitBuilder() { - assert(Buffer.empty() && "didn't claim all values out of buffer"); - } - - class AggregateBuilderBase { - protected: - ConstantInitBuilder &Builder; - AggregateBuilderBase *Parent; - size_t Begin; - bool Finished = false; - bool Frozen = false; - - llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() { - return Builder.Buffer; - } - - const llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() const { - return Builder.Buffer; - } - - AggregateBuilderBase(ConstantInitBuilder &builder, - AggregateBuilderBase *parent) - : Builder(builder), Parent(parent), Begin(builder.Buffer.size()) { - if (parent) { - assert(!parent->Frozen && "parent already has child builder active"); - parent->Frozen = true; - } else { - assert(!builder.Frozen && "builder already has child builder active"); - builder.Frozen = true; - } - } - - ~AggregateBuilderBase() { - assert(Finished && "didn't finish aggregate builder"); - } - - void markFinished() { - assert(!Frozen && "child builder still active"); - assert(!Finished && "builder already finished"); - Finished = true; - if (Parent) { - assert(Parent->Frozen && - "parent not frozen while child builder active"); - Parent->Frozen = false; - } else { - assert(Builder.Frozen && - "builder not frozen while child builder active"); - Builder.Frozen = false; - } - } - - public: - // Not copyable. - AggregateBuilderBase(const AggregateBuilderBase &) = delete; - AggregateBuilderBase &operator=(const AggregateBuilderBase &) = delete; - - // Movable, mostly to allow returning. But we have to write this out - // properly to satisfy the assert in the destructor. - AggregateBuilderBase(AggregateBuilderBase &&other) - : Builder(other.Builder), Parent(other.Parent), Begin(other.Begin), - Finished(other.Finished), Frozen(other.Frozen) { - other.Finished = false; - } - AggregateBuilderBase &operator=(AggregateBuilderBase &&other) = delete; - - /// Abandon this builder completely. - void abandon() { - markFinished(); - auto &buffer = Builder.Buffer; - buffer.erase(buffer.begin() + Begin, buffer.end()); - } - - /// Add a new value to this initializer. - void add(llvm::Constant *value) { - assert(value && "adding null value to constant initializer"); - assert(!Finished && "cannot add more values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - Builder.Buffer.push_back(value); - } - - /// Add an integer value of type size_t. - void addSize(CharUnits size) { - add(Builder.CGM.getSize(size)); - } - - /// Add an integer value of a specific type. - void addInt(llvm::IntegerType *intTy, uint64_t value, - bool isSigned = false) { - add(llvm::ConstantInt::get(intTy, value, isSigned)); - } - - /// Add a null pointer of a specific type. - void addNullPointer(llvm::PointerType *ptrTy) { - add(llvm::ConstantPointerNull::get(ptrTy)); - } - - /// Add a bitcast of a value to a specific type. - void addBitCast(llvm::Constant *value, llvm::Type *type) { - add(llvm::ConstantExpr::getBitCast(value, type)); - } - - /// Add a bunch of new values to this initializer. - void addAll(ArrayRef<llvm::Constant *> values) { - assert(!Finished && "cannot add more values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - Builder.Buffer.append(values.begin(), values.end()); - } - - /// An opaque class to hold the abstract position of a placeholder. - class PlaceholderPosition { - size_t Index; - friend class AggregateBuilderBase; - PlaceholderPosition(size_t index) : Index(index) {} - }; - - /// Add a placeholder value to the structure. The returned position - /// can be used to set the value later; it will not be invalidated by - /// any intermediate operations except (1) filling the same position or - /// (2) finishing the entire builder. - /// - /// This is useful for emitting certain kinds of structure which - /// contain some sort of summary field, generaly a count, before any - /// of the data. By emitting a placeholder first, the structure can - /// be emitted eagerly. - PlaceholderPosition addPlaceholder() { - assert(!Finished && "cannot add more values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - Builder.Buffer.push_back(nullptr); - return Builder.Buffer.size() - 1; - } - - /// Fill a previously-added placeholder. - void fillPlaceholderWithInt(PlaceholderPosition position, - llvm::IntegerType *type, uint64_t value, - bool isSigned = false) { - fillPlaceholder(position, llvm::ConstantInt::get(type, value, isSigned)); - } - - /// Fill a previously-added placeholder. - void fillPlaceholder(PlaceholderPosition position, llvm::Constant *value) { - assert(!Finished && "cannot change values after finishing builder"); - assert(!Frozen && "cannot add values while subbuilder is active"); - llvm::Constant *&slot = Builder.Buffer[position.Index]; - assert(slot == nullptr && "placeholder already filled"); - slot = value; - } - - /// Produce an address which will eventually point to the the next - /// position to be filled. This is computed with an indexed - /// getelementptr rather than by computing offsets. - /// - /// The returned pointer will have type T*, where T is the given - /// position. - llvm::Constant *getAddrOfCurrentPosition(llvm::Type *type) { - // Make a global variable. We will replace this with a GEP to this - // position after installing the initializer. - auto dummy = - new llvm::GlobalVariable(Builder.CGM.getModule(), type, true, - llvm::GlobalVariable::PrivateLinkage, - nullptr, ""); - Builder.SelfReferences.emplace_back(dummy); - auto &entry = Builder.SelfReferences.back(); - (void) getGEPIndicesToCurrentPosition(entry.Indices); - return dummy; - } - - ArrayRef<llvm::Constant*> getGEPIndicesToCurrentPosition( - llvm::SmallVectorImpl<llvm::Constant*> &indices) { - getGEPIndicesTo(indices, Builder.Buffer.size()); - return indices; - } - - ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr); - ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr); - - private: - void getGEPIndicesTo(llvm::SmallVectorImpl<llvm::Constant*> &indices, - size_t position) const { - // Recurse on the parent builder if present. - if (Parent) { - Parent->getGEPIndicesTo(indices, Begin); - - // Otherwise, add an index to drill into the first level of pointer. - } else { - assert(indices.empty()); - indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0)); - } - - assert(position >= Begin); - // We have to use i32 here because struct GEPs demand i32 indices. - // It's rather unlikely to matter in practice. - indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, - position - Begin)); - } - }; - - template <class Impl> - class AggregateBuilder : public AggregateBuilderBase { - protected: - AggregateBuilder(ConstantInitBuilder &builder, - AggregateBuilderBase *parent) - : AggregateBuilderBase(builder, parent) {} - - Impl &asImpl() { return *static_cast<Impl*>(this); } - - public: - /// Given that this builder was created by beginning an array or struct - /// component on the given parent builder, finish the array/struct - /// component and add it to the parent. - /// - /// It is an intentional choice that the parent is passed in explicitly - /// despite it being redundant with information already kept in the - /// builder. This aids in readability by making it easier to find the - /// places that add components to a builder, as well as "bookending" - /// the sub-builder more explicitly. - void finishAndAddTo(AggregateBuilderBase &parent) { - assert(Parent == &parent && "adding to non-parent builder"); - parent.add(asImpl().finishImpl()); - } - - /// Given that this builder was created by beginning an array or struct - /// directly on a ConstantInitBuilder, finish the array/struct and - /// create a global variable with it as the initializer. - template <class... As> - llvm::GlobalVariable *finishAndCreateGlobal(As &&...args) { - assert(!Parent && "finishing non-root builder"); - return Builder.createGlobal(asImpl().finishImpl(), - std::forward<As>(args)...); - } - - /// Given that this builder was created by beginning an array or struct - /// directly on a ConstantInitBuilder, finish the array/struct and - /// set it as the initializer of the given global variable. - void finishAndSetAsInitializer(llvm::GlobalVariable *global) { - assert(!Parent && "finishing non-root builder"); - return Builder.setGlobalInitializer(global, asImpl().finishImpl()); - } - }; - - ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr); - - ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr); - -private: - llvm::GlobalVariable *createGlobal(llvm::Constant *initializer, - const llvm::Twine &name, - CharUnits alignment, - bool constant = false, - llvm::GlobalValue::LinkageTypes linkage - = llvm::GlobalValue::InternalLinkage, - unsigned addressSpace = 0) { - auto GV = new llvm::GlobalVariable(CGM.getModule(), - initializer->getType(), - constant, - linkage, - initializer, - name, - /*insert before*/ nullptr, - llvm::GlobalValue::NotThreadLocal, - addressSpace); - GV->setAlignment(alignment.getQuantity()); - resolveSelfReferences(GV); - return GV; - } - - void setGlobalInitializer(llvm::GlobalVariable *GV, - llvm::Constant *initializer) { - GV->setInitializer(initializer); - resolveSelfReferences(GV); - } - - void resolveSelfReferences(llvm::GlobalVariable *GV) { - for (auto &entry : SelfReferences) { - llvm::Constant *resolvedReference = - llvm::ConstantExpr::getInBoundsGetElementPtr( - GV->getValueType(), GV, entry.Indices); - entry.Dummy->replaceAllUsesWith(resolvedReference); - entry.Dummy->eraseFromParent(); - } - } -}; - -/// A helper class of ConstantInitBuilder, used for building constant -/// array initializers. -class ConstantArrayBuilder - : public ConstantInitBuilder::AggregateBuilder<ConstantArrayBuilder> { - llvm::Type *EltTy; - friend class ConstantInitBuilder; - template <class Impl> friend class ConstantInitBuilder::AggregateBuilder; - ConstantArrayBuilder(ConstantInitBuilder &builder, - AggregateBuilderBase *parent, llvm::Type *eltTy) - : AggregateBuilder(builder, parent), EltTy(eltTy) {} -public: - size_t size() const { - assert(!Finished); - assert(!Frozen); - assert(Begin <= getBuffer().size()); - return getBuffer().size() - Begin; - } - - bool empty() const { - return size() == 0; - } - -private: - /// Form an array constant from the values that have been added to this - /// builder. - llvm::Constant *finishImpl() { - markFinished(); - - auto &buffer = getBuffer(); - assert((Begin < buffer.size() || - (Begin == buffer.size() && EltTy)) - && "didn't add any array elements without element type"); - auto elts = llvm::makeArrayRef(buffer).slice(Begin); - auto eltTy = EltTy ? EltTy : elts[0]->getType(); - auto type = llvm::ArrayType::get(eltTy, elts.size()); - auto constant = llvm::ConstantArray::get(type, elts); - buffer.erase(buffer.begin() + Begin, buffer.end()); - return constant; - } -}; - -inline ConstantArrayBuilder -ConstantInitBuilder::beginArray(llvm::Type *eltTy) { - return ConstantArrayBuilder(*this, nullptr, eltTy); -} - -inline ConstantArrayBuilder -ConstantInitBuilder::AggregateBuilderBase::beginArray(llvm::Type *eltTy) { - return ConstantArrayBuilder(Builder, this, eltTy); -} - -/// A helper class of ConstantInitBuilder, used for building constant -/// struct initializers. -class ConstantStructBuilder - : public ConstantInitBuilder::AggregateBuilder<ConstantStructBuilder> { - llvm::StructType *Ty; - friend class ConstantInitBuilder; - template <class Impl> friend class ConstantInitBuilder::AggregateBuilder; - ConstantStructBuilder(ConstantInitBuilder &builder, - AggregateBuilderBase *parent, llvm::StructType *ty) - : AggregateBuilder(builder, parent), Ty(ty) {} - - /// Finish the struct. - llvm::Constant *finishImpl() { - markFinished(); - - auto &buffer = getBuffer(); - assert(Begin < buffer.size() && "didn't add any struct elements?"); - auto elts = llvm::makeArrayRef(buffer).slice(Begin); - - llvm::Constant *constant; - if (Ty) { - constant = llvm::ConstantStruct::get(Ty, elts); - } else { - constant = llvm::ConstantStruct::getAnon(elts, /*packed*/ false); - } - - buffer.erase(buffer.begin() + Begin, buffer.end()); - return constant; - } -}; - -inline ConstantStructBuilder -ConstantInitBuilder::beginStruct(llvm::StructType *structTy) { - return ConstantStructBuilder(*this, nullptr, structTy); -} - -inline ConstantStructBuilder -ConstantInitBuilder::AggregateBuilderBase::beginStruct( - llvm::StructType *structTy) { - return ConstantStructBuilder(Builder, this, structTy); -} - -} // end namespace CodeGen -} // end namespace clang - -#endif diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ConstantInitBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ConstantInitBuilder.cpp new file mode 100644 index 0000000..7f8d809 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/CodeGen/ConstantInitBuilder.cpp @@ -0,0 +1,280 @@ +//===--- ConstantInitBuilder.cpp - Global initializer builder -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines out-of-line routines for building initializers for +// global variables, in particular the kind of globals that are implicitly +// introduced by various language ABIs. +// +//===----------------------------------------------------------------------===// + +#include "clang/CodeGen/ConstantInitBuilder.h" +#include "CodeGenModule.h" + +using namespace clang; +using namespace CodeGen; + +llvm::Type *ConstantInitFuture::getType() const { + assert(Data && "dereferencing null future"); + if (Data.is<llvm::Constant*>()) { + return Data.get<llvm::Constant*>()->getType(); + } else { + return Data.get<ConstantInitBuilderBase*>()->Buffer[0]->getType(); + } +} + +void ConstantInitFuture::abandon() { + assert(Data && "abandoning null future"); + if (auto builder = Data.dyn_cast<ConstantInitBuilderBase*>()) { + builder->abandon(0); + } + Data = nullptr; +} + +void ConstantInitFuture::installInGlobal(llvm::GlobalVariable *GV) { + assert(Data && "installing null future"); + if (Data.is<llvm::Constant*>()) { + GV->setInitializer(Data.get<llvm::Constant*>()); + } else { + auto &builder = *Data.get<ConstantInitBuilderBase*>(); + assert(builder.Buffer.size() == 1); + builder.setGlobalInitializer(GV, builder.Buffer[0]); + builder.Buffer.clear(); + Data = nullptr; + } +} + +ConstantInitFuture +ConstantInitBuilderBase::createFuture(llvm::Constant *initializer) { + assert(Buffer.empty() && "buffer not current empty"); + Buffer.push_back(initializer); + return ConstantInitFuture(this); +} + +// Only used in this file. +inline ConstantInitFuture::ConstantInitFuture(ConstantInitBuilderBase *builder) + : Data(builder) { + assert(!builder->Frozen); + assert(builder->Buffer.size() == 1); + assert(builder->Buffer[0] != nullptr); +} + +llvm::GlobalVariable * +ConstantInitBuilderBase::createGlobal(llvm::Constant *initializer, + const llvm::Twine &name, + CharUnits alignment, + bool constant, + llvm::GlobalValue::LinkageTypes linkage, + unsigned addressSpace) { + auto GV = new llvm::GlobalVariable(CGM.getModule(), + initializer->getType(), + constant, + linkage, + initializer, + name, + /*insert before*/ nullptr, + llvm::GlobalValue::NotThreadLocal, + addressSpace); + GV->setAlignment(alignment.getQuantity()); + resolveSelfReferences(GV); + return GV; +} + +void ConstantInitBuilderBase::setGlobalInitializer(llvm::GlobalVariable *GV, + llvm::Constant *initializer){ + GV->setInitializer(initializer); + + if (!SelfReferences.empty()) + resolveSelfReferences(GV); +} + +void ConstantInitBuilderBase::resolveSelfReferences(llvm::GlobalVariable *GV) { + for (auto &entry : SelfReferences) { + llvm::Constant *resolvedReference = + llvm::ConstantExpr::getInBoundsGetElementPtr( + GV->getValueType(), GV, entry.Indices); + auto dummy = entry.Dummy; + dummy->replaceAllUsesWith(resolvedReference); + dummy->eraseFromParent(); + } + SelfReferences.clear(); +} + +void ConstantInitBuilderBase::abandon(size_t newEnd) { + // Remove all the entries we've added. + Buffer.erase(Buffer.begin() + newEnd, Buffer.end()); + + // If we're abandoning all the way to the beginning, destroy + // all the self-references, because we might not get another + // opportunity. + if (newEnd == 0) { + for (auto &entry : SelfReferences) { + auto dummy = entry.Dummy; + dummy->replaceAllUsesWith(llvm::UndefValue::get(dummy->getType())); + dummy->eraseFromParent(); + } + SelfReferences.clear(); + } +} + +void ConstantAggregateBuilderBase::addSize(CharUnits size) { + add(Builder.CGM.getSize(size)); +} + +llvm::Constant * +ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType, + llvm::Constant *target) { + // Compute the address of the relative-address slot. + auto base = getAddrOfCurrentPosition(offsetType); + + // Subtract. + base = llvm::ConstantExpr::getPtrToInt(base, Builder.CGM.IntPtrTy); + target = llvm::ConstantExpr::getPtrToInt(target, Builder.CGM.IntPtrTy); + llvm::Constant *offset = llvm::ConstantExpr::getSub(target, base); + + // Truncate to the relative-address type if necessary. + if (Builder.CGM.IntPtrTy != offsetType) { + offset = llvm::ConstantExpr::getTrunc(offset, offsetType); + } + + return offset; +} + +llvm::Constant * +ConstantAggregateBuilderBase::getAddrOfCurrentPosition(llvm::Type *type) { + // Make a global variable. We will replace this with a GEP to this + // position after installing the initializer. + auto dummy = + new llvm::GlobalVariable(Builder.CGM.getModule(), type, true, + llvm::GlobalVariable::PrivateLinkage, + nullptr, ""); + Builder.SelfReferences.emplace_back(dummy); + auto &entry = Builder.SelfReferences.back(); + (void) getGEPIndicesToCurrentPosition(entry.Indices); + return dummy; +} + +void ConstantAggregateBuilderBase::getGEPIndicesTo( + llvm::SmallVectorImpl<llvm::Constant*> &indices, + size_t position) const { + // Recurse on the parent builder if present. + if (Parent) { + Parent->getGEPIndicesTo(indices, Begin); + + // Otherwise, add an index to drill into the first level of pointer. + } else { + assert(indices.empty()); + indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0)); + } + + assert(position >= Begin); + // We have to use i32 here because struct GEPs demand i32 indices. + // It's rather unlikely to matter in practice. + indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, + position - Begin)); +} + +ConstantAggregateBuilderBase::PlaceholderPosition +ConstantAggregateBuilderBase::addPlaceholderWithSize(llvm::Type *type) { + // Bring the offset up to the last field. + CharUnits offset = getNextOffsetFromGlobal(); + + // Create the placeholder. + auto position = addPlaceholder(); + + // Advance the offset past that field. + auto &layout = Builder.CGM.getDataLayout(); + if (!Packed) + offset = offset.alignTo(CharUnits::fromQuantity( + layout.getABITypeAlignment(type))); + offset += CharUnits::fromQuantity(layout.getTypeStoreSize(type)); + + CachedOffsetEnd = Builder.Buffer.size(); + CachedOffsetFromGlobal = offset; + + return position; +} + +CharUnits ConstantAggregateBuilderBase::getOffsetFromGlobalTo(size_t end) const{ + size_t cacheEnd = CachedOffsetEnd; + assert(cacheEnd <= end); + + // Fast path: if the cache is valid, just use it. + if (cacheEnd == end) { + return CachedOffsetFromGlobal; + } + + // If the cached range ends before the index at which the current + // aggregate starts, recurse for the parent. + CharUnits offset; + if (cacheEnd < Begin) { + assert(cacheEnd == 0); + assert(Parent && "Begin != 0 for root builder"); + cacheEnd = Begin; + offset = Parent->getOffsetFromGlobalTo(Begin); + } else { + offset = CachedOffsetFromGlobal; + } + + // Perform simple layout on the elements in cacheEnd..<end. + if (cacheEnd != end) { + auto &layout = Builder.CGM.getDataLayout(); + do { + llvm::Constant *element = Builder.Buffer[cacheEnd]; + assert(element != nullptr && + "cannot compute offset when a placeholder is present"); + llvm::Type *elementType = element->getType(); + if (!Packed) + offset = offset.alignTo(CharUnits::fromQuantity( + layout.getABITypeAlignment(elementType))); + offset += CharUnits::fromQuantity(layout.getTypeStoreSize(elementType)); + } while (++cacheEnd != end); + } + + // Cache and return. + CachedOffsetEnd = cacheEnd; + CachedOffsetFromGlobal = offset; + return offset; +} + +llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy) { + markFinished(); + + auto &buffer = getBuffer(); + assert((Begin < buffer.size() || + (Begin == buffer.size() && eltTy)) + && "didn't add any array elements without element type"); + auto elts = llvm::makeArrayRef(buffer).slice(Begin); + if (!eltTy) eltTy = elts[0]->getType(); + auto type = llvm::ArrayType::get(eltTy, elts.size()); + auto constant = llvm::ConstantArray::get(type, elts); + buffer.erase(buffer.begin() + Begin, buffer.end()); + return constant; +} + +llvm::Constant * +ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) { + markFinished(); + + auto &buffer = getBuffer(); + auto elts = llvm::makeArrayRef(buffer).slice(Begin); + + if (ty == nullptr && elts.empty()) + ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed); + + llvm::Constant *constant; + if (ty) { + assert(ty->isPacked() == Packed); + constant = llvm::ConstantStruct::get(ty, elts); + } else { + constant = llvm::ConstantStruct::getAnon(elts, Packed); + } + + buffer.erase(buffer.begin() + Begin, buffer.end()); + return constant; +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp index 5bc9e50..a102347 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -961,12 +961,10 @@ struct CounterCoverageMappingBuilder } }; -bool isMachO(const CodeGenModule &CGM) { - return CGM.getTarget().getTriple().isOSBinFormatMachO(); -} - -StringRef getCoverageSection(const CodeGenModule &CGM) { - return llvm::getInstrProfCoverageSectionName(isMachO(CGM)); +std::string getCoverageSection(const CodeGenModule &CGM) { + return llvm::getInstrProfSectionName( + llvm::IPSK_covmap, + CGM.getContext().getTargetInfo().getTriple().getObjectFormat()); } std::string normalizeFilename(StringRef Filename) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h index 2435830..c7bdeac 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h @@ -202,7 +202,7 @@ public: template <std::size_t... Is> T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) { // It's important that the restores are emitted in order. The braced init - // list guarentees that. + // list guarantees that. return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...}; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index f7a8dd6..bd4cb9a 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -24,8 +24,8 @@ #include "CGVTables.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" #include "TargetInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Mangle.h" #include "clang/AST/Type.h" #include "clang/AST/StmtCXX.h" @@ -62,12 +62,20 @@ public: bool classifyReturnType(CGFunctionInfo &FI) const override; + bool passClassIndirect(const CXXRecordDecl *RD) const { + // Clang <= 4 used the pre-C++11 rule, which ignores move operations. + // The PS4 platform ABI follows the behavior of Clang 3.2. + if (CGM.getCodeGenOpts().getClangABICompat() <= + CodeGenOptions::ClangABI::Ver4 || + CGM.getTriple().getOS() == llvm::Triple::PS4) + return RD->hasNonTrivialDestructor() || + RD->hasNonTrivialCopyConstructor(); + return !canCopyArgument(RD); + } + RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override { - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are always indirect. - // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared - // special members. - if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) + // If C++ prohibits us from making a copy, pass by address. + if (passClassIndirect(RD)) return RAA_Indirect; return RAA_Default; } @@ -207,8 +215,9 @@ public: void EmitCXXConstructors(const CXXConstructorDecl *D) override; - void buildStructorSignature(const CXXMethodDecl *MD, StructorType T, - SmallVectorImpl<CanQualType> &ArgTys) override; + AddedStructorArgs + buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + SmallVectorImpl<CanQualType> &ArgTys) override; bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, CXXDtorType DT) const override { @@ -225,11 +234,10 @@ public: void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; - unsigned addImplicitConstructorArgs(CodeGenFunction &CGF, - const CXXConstructorDecl *D, - CXXCtorType Type, bool ForVirtualBase, - bool Delegating, - CallArgList &Args) override; + AddedStructorArgs + addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, + CXXCtorType Type, bool ForVirtualBase, + bool Delegating, CallArgList &Args) override; void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, @@ -366,20 +374,30 @@ public: void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; private: - bool hasAnyVirtualInlineFunction(const CXXRecordDecl *RD) const { - const auto &VtableLayout = - CGM.getItaniumVTableContext().getVTableLayout(RD); - - for (const auto &VtableComponent : VtableLayout.vtable_components()) { - // Skip empty slot. - if (!VtableComponent.isUsedFunctionPointerKind()) - continue; - - const CXXMethodDecl *Method = VtableComponent.getFunctionDecl(); - if (Method->getCanonicalDecl()->isInlined()) - return true; - } - return false; + bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const { + const auto &VtableLayout = + CGM.getItaniumVTableContext().getVTableLayout(RD); + + for (const auto &VtableComponent : VtableLayout.vtable_components()) { + // Skip empty slot. + if (!VtableComponent.isUsedFunctionPointerKind()) + continue; + + const CXXMethodDecl *Method = VtableComponent.getFunctionDecl(); + if (!Method->getCanonicalDecl()->isInlined()) + continue; + + StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl()); + auto *Entry = CGM.GetGlobalValue(Name); + // This checks if virtual inline function has already been emitted. + // Note that it is possible that this inline function would be emitted + // after trying to emit vtable speculatively. Because of this we do + // an extra pass after emitting all deferred vtables to find and emit + // these vtables opportunistically. + if (!Entry || Entry->isDeclaration()) + return true; + } + return false; } bool isVTableHidden(const CXXRecordDecl *RD) const { @@ -499,7 +517,7 @@ llvm::Type * ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) { if (MPT->isMemberDataPointer()) return CGM.PtrDiffTy; - return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy, nullptr); + return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy); } /// In the Itanium and ARM ABIs, method pointers have the form: @@ -988,10 +1006,8 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor. - // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared - // special members. - if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) { + // If C++ prohibits us from making a copy, return by address. + if (passClassIndirect(RD)) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); return true; @@ -1134,8 +1150,8 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { // Mark the function as nounwind readonly. llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind, llvm::Attribute::ReadOnly }; - llvm::AttributeSet Attrs = llvm::AttributeSet::get( - CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs); + llvm::AttributeList Attrs = llvm::AttributeList::get( + CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs); } @@ -1353,7 +1369,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { } } -void +CGCXXABI::AddedStructorArgs ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, SmallVectorImpl<CanQualType> &ArgTys) { ASTContext &Context = getContext(); @@ -1362,9 +1378,12 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, // These are Clang types, so we don't need to worry about sret yet. // Check if we need to add a VTT parameter (which has type void **). - if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) + if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) { ArgTys.insert(ArgTys.begin() + 1, Context.getPointerType(Context.VoidPtrTy)); + return AddedStructorArgs::prefix(1); + } + return AddedStructorArgs{}; } void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { @@ -1395,9 +1414,9 @@ void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, // FIXME: avoid the fake decl QualType T = Context.getPointerType(Context.VoidPtrTy); - ImplicitParamDecl *VTTDecl - = ImplicitParamDecl::Create(Context, nullptr, MD->getLocation(), - &Context.Idents.get("vtt"), T); + auto *VTTDecl = ImplicitParamDecl::Create( + Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"), + T, ImplicitParamDecl::CXXVTT); Params.insert(Params.begin() + 1, VTTDecl); getStructorImplicitParamDecl(CGF) = VTTDecl; } @@ -1429,11 +1448,11 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue); } -unsigned ItaniumCXXABI::addImplicitConstructorArgs( +CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs( CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, CallArgList &Args) { if (!NeedsVTTParameter(GlobalDecl(D, Type))) - return 0; + return AddedStructorArgs{}; // Insert the implicit 'vtt' argument as the second argument. llvm::Value *VTT = @@ -1441,7 +1460,7 @@ unsigned ItaniumCXXABI::addImplicitConstructorArgs( QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false)); - return 1; // Added one arg. + return AddedStructorArgs::prefix(1); // Added one arg. } void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, @@ -1684,11 +1703,11 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { if (CGM.getLangOpts().AppleKext) return false; - // If we don't have any inline virtual functions, and if vtable is not hidden, - // then we are safe to emit available_externally copy of vtable. + // If we don't have any not emitted inline virtual function, and if vtable is + // not hidden, then we are safe to emit available_externally copy of vtable. // FIXME we can still emit a copy of the vtable if we // can emit definition of the inline functions. - return !hasAnyVirtualInlineFunction(RD) && !isVTableHidden(RD); + return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD); } static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, Address InitialPtr, @@ -1907,10 +1926,11 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM, llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy), GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_acquire", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_acquire", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, @@ -1918,10 +1938,11 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, // void __cxa_guard_release(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_release", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_release", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, @@ -1929,10 +1950,11 @@ static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, // void __cxa_guard_abort(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_abort", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_abort", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } namespace { @@ -2015,10 +2037,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // The ABI says: "It is suggested that it be emitted in the same COMDAT // group as the associated data object." In practice, this doesn't work for - // non-ELF object formats, so only do it for ELF. + // non-ELF and non-Wasm object formats, so only do it for ELF and Wasm. llvm::Comdat *C = var->getComdat(); if (!D.isLocalVarDecl() && C && - CGM.getTarget().getTriple().isOSBinFormatELF()) { + (CGM.getTarget().getTriple().isOSBinFormatELF() || + CGM.getTarget().getTriple().isOSBinFormatWasm())) { guard->setComdat(C); // An inline variable's guard function is run from the per-TU // initialization function, not via a dedicated global ctor function, so @@ -2161,7 +2184,9 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // Create a variable that binds the atexit to this shared object. llvm::Constant *handle = - CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); + CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); + auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts()); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); llvm::Value *args[] = { llvm::ConstantExpr::getBitCast(dtor, dtorTy), @@ -2567,6 +2592,9 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) { if (!GV) { // Create a new global variable. + // Note for the future: If we would ever like to do deferred emission of + // RTTI, check if emitting vtables opportunistically need any adjustment. + GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, /*Constant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, @@ -2634,7 +2662,6 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: return false; @@ -2711,7 +2738,9 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, // function. bool IsDLLImport = RD->hasAttr<DLLImportAttr>(); if (CGM.getVTables().isVTableExternal(RD)) - return IsDLLImport ? false : true; + return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment() + ? false + : true; if (IsDLLImport) return true; @@ -2814,7 +2843,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { llvm_unreachable("References shouldn't get here"); case Type::Auto: - llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Undeduced type shouldn't get here"); case Type::Pipe: llvm_unreachable("Pipe types shouldn't get here"); @@ -2935,6 +2965,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, return llvm::GlobalValue::InternalLinkage; case VisibleNoLinkage: + case ModuleInternalLinkage: + case ModuleLinkage: case ExternalLinkage: // RTTI is not enabled, which means that this type info struct is going // to be used for exception handling. Give it linkonce_odr linkage. @@ -2946,7 +2978,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, if (RD->hasAttr<WeakAttr>()) return llvm::GlobalValue::WeakODRLinkage; if (CGM.getTriple().isWindowsItaniumEnvironment()) - if (RD->hasAttr<DLLImportAttr>()) + if (RD->hasAttr<DLLImportAttr>() && + ShouldUseExternalRTTIDescriptor(CGM, Ty)) return llvm::GlobalValue::ExternalLinkage; if (RD->isDynamicClass()) { llvm::GlobalValue::LinkageTypes LT = CGM.getVTableLinkage(RD); @@ -3044,7 +3077,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, llvm_unreachable("References shouldn't get here"); case Type::Auto: - llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Undeduced type shouldn't get here"); case Type::Pipe: llvm_unreachable("Pipe type shouldn't get here"); @@ -3158,7 +3192,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, if (DLLExport || (RD && RD->hasAttr<DLLExportAttr>())) { TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - } else if (CGM.getLangOpts().RTTI && RD && RD->hasAttr<DLLImportAttr>()) { + } else if (RD && RD->hasAttr<DLLImportAttr>() && + ShouldUseExternalRTTIDescriptor(CGM, Ty)) { TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -3534,8 +3569,9 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM, return StructorCodegen::RAUW; if (llvm::GlobalValue::isWeakForLinker(Linkage)) { - // Only ELF supports COMDATs with arbitrary names (C5/D5). - if (CGM.getTarget().getTriple().isOSBinFormatELF()) + // Only ELF and wasm support COMDATs with arbitrary names (C5/D5). + if (CGM.getTarget().getTriple().isOSBinFormatELF() || + CGM.getTarget().getTriple().isOSBinFormatWasm()) return StructorCodegen::COMDAT; return StructorCodegen::Emit; } @@ -3919,9 +3955,8 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); - llvm::Constant *fnRef = - CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate", - llvm::AttributeSet(), /*Local=*/true); + llvm::Constant *fnRef = CGM.CreateRuntimeFunction( + fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true); llvm::Function *fn = dyn_cast<llvm::Function>(fnRef); if (fn && fn->empty()) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp new file mode 100644 index 0000000..a6f21d8 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp @@ -0,0 +1,208 @@ +//===--- MacroPPCallbacks.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains implementation for the macro preprocessors callbacks. +// +//===----------------------------------------------------------------------===// + +#include "MacroPPCallbacks.h" +#include "CGDebugInfo.h" +#include "clang/CodeGen/ModuleBuilder.h" +#include "clang/Parse/Parser.h" + +using namespace clang; + +void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II, + const MacroInfo &MI, + Preprocessor &PP, raw_ostream &Name, + raw_ostream &Value) { + Name << II.getName(); + + if (MI.isFunctionLike()) { + Name << '('; + if (!MI.param_empty()) { + MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); + for (; AI + 1 != E; ++AI) { + Name << (*AI)->getName(); + Name << ','; + } + + // Last argument. + if ((*AI)->getName() == "__VA_ARGS__") + Name << "..."; + else + Name << (*AI)->getName(); + } + + if (MI.isGNUVarargs()) + // #define foo(x...) + Name << "..."; + + Name << ')'; + } + + SmallString<128> SpellingBuffer; + bool First = true; + for (const auto &T : MI.tokens()) { + if (!First && T.hasLeadingSpace()) + Value << ' '; + + Value << PP.getSpelling(T, SpellingBuffer); + First = false; + } +} + +MacroPPCallbacks::MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP) + : Gen(Gen), PP(PP), Status(NoScope) {} + +// This is the expected flow of enter/exit compiler and user files: +// - Main File Enter +// - <built-in> file enter +// {Compiler macro definitions} - (Line=0, no scope) +// - (Optional) <command line> file enter +// {Command line macro definitions} - (Line=0, no scope) +// - (Optional) <command line> file exit +// {Command line file includes} - (Line=0, Main file scope) +// {macro definitions and file includes} - (Line!=0, Parent scope) +// - <built-in> file exit +// {User code macro definitions and file includes} - (Line!=0, Parent scope) + +llvm::DIMacroFile *MacroPPCallbacks::getCurrentScope() { + if (Status == MainFileScope || Status == CommandLineIncludeScope) + return Scopes.back(); + return nullptr; +} + +SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) { + if (Status == MainFileScope || EnteredCommandLineIncludeFiles) + return Loc; + + // While parsing skipped files, location of macros is invalid. + // Invalid location represents line zero. + return SourceLocation(); +} + +static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) { + StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); + return Filename.equals("<built-in>"); +} + +static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) { + StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); + return Filename.equals("<command line>"); +} + +void MacroPPCallbacks::updateStatusToNextScope() { + switch (Status) { + case NoScope: + Status = InitializedScope; + break; + case InitializedScope: + Status = BuiltinScope; + break; + case BuiltinScope: + Status = CommandLineIncludeScope; + break; + case CommandLineIncludeScope: + Status = MainFileScope; + break; + case MainFileScope: + llvm_unreachable("There is no next scope, already in the final scope"); + } +} + +void MacroPPCallbacks::FileEntered(SourceLocation Loc) { + SourceLocation LineLoc = getCorrectLocation(LastHashLoc); + switch (Status) { + case NoScope: + updateStatusToNextScope(); + break; + case InitializedScope: + updateStatusToNextScope(); + return; + case BuiltinScope: + if (isCommandLineFile(PP.getSourceManager(), Loc)) + return; + updateStatusToNextScope(); + LLVM_FALLTHROUGH; + case CommandLineIncludeScope: + EnteredCommandLineIncludeFiles++; + break; + case MainFileScope: + break; + } + + Scopes.push_back(Gen->getCGDebugInfo()->CreateTempMacroFile(getCurrentScope(), + LineLoc, Loc)); +} + +void MacroPPCallbacks::FileExited(SourceLocation Loc) { + switch (Status) { + default: + llvm_unreachable("Do not expect to exit a file from current scope"); + case BuiltinScope: + if (!isBuiltinFile(PP.getSourceManager(), Loc)) + // Skip next scope and change status to MainFileScope. + Status = MainFileScope; + return; + case CommandLineIncludeScope: + if (!EnteredCommandLineIncludeFiles) { + updateStatusToNextScope(); + return; + } + EnteredCommandLineIncludeFiles--; + break; + case MainFileScope: + break; + } + + Scopes.pop_back(); +} + +void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) { + // Only care about enter file or exit file changes. + if (Reason == EnterFile) + FileEntered(Loc); + else if (Reason == ExitFile) + FileExited(Loc); +} + +void MacroPPCallbacks::InclusionDirective( + SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, + bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, const Module *Imported) { + + // Record the line location of the current included file. + LastHashLoc = HashLoc; +} + +void MacroPPCallbacks::MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) { + IdentifierInfo *Id = MacroNameTok.getIdentifierInfo(); + SourceLocation location = getCorrectLocation(MacroNameTok.getLocation()); + std::string NameBuffer, ValueBuffer; + llvm::raw_string_ostream Name(NameBuffer); + llvm::raw_string_ostream Value(ValueBuffer); + writeMacroDefinition(*Id, *MD->getMacroInfo(), PP, Name, Value); + Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(), + llvm::dwarf::DW_MACINFO_define, location, + Name.str(), Value.str()); +} + +void MacroPPCallbacks::MacroUndefined(const Token &MacroNameTok, + const MacroDefinition &MD, + const MacroDirective *Undef) { + IdentifierInfo *Id = MacroNameTok.getIdentifierInfo(); + SourceLocation location = getCorrectLocation(MacroNameTok.getLocation()); + Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(), + llvm::dwarf::DW_MACINFO_undef, location, + Id->getName(), ""); +} diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.h b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.h new file mode 100644 index 0000000..e117f96 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.h @@ -0,0 +1,117 @@ +//===--- MacroPPCallbacks.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines implementation for the macro preprocessors callbacks. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/PPCallbacks.h" + +namespace llvm { +class DIMacroFile; +class DIMacroNode; +} +namespace clang { +class Preprocessor; +class MacroInfo; +class CodeGenerator; + +class MacroPPCallbacks : public PPCallbacks { + /// A pointer to code generator, where debug info generator can be found. + CodeGenerator *Gen; + + /// Preprocessor. + Preprocessor &PP; + + /// Location of recent included file, used for line number. + SourceLocation LastHashLoc; + + /// Counts current number of command line included files, which were entered + /// and were not exited yet. + int EnteredCommandLineIncludeFiles = 0; + + enum FileScopeStatus { + NoScope = 0, // Scope is not initialized yet. + InitializedScope, // Main file scope is initialized but not set yet. + BuiltinScope, // <built-in> and <command line> file scopes. + CommandLineIncludeScope, // Included file, from <command line> file, scope. + MainFileScope // Main file scope. + }; + FileScopeStatus Status; + + /// Parent contains all entered files that were not exited yet according to + /// the inclusion order. + llvm::SmallVector<llvm::DIMacroFile *, 4> Scopes; + + /// Get current DIMacroFile scope. + /// \return current DIMacroFile scope or nullptr if there is no such scope. + llvm::DIMacroFile *getCurrentScope(); + + /// Get current line location or invalid location. + /// \param Loc current line location. + /// \return current line location \p `Loc`, or invalid location if it's in a + /// skipped file scope. + SourceLocation getCorrectLocation(SourceLocation Loc); + + /// Use the passed preprocessor to write the macro name and value from the + /// given macro info and identifier info into the given \p `Name` and \p + /// `Value` output streams. + /// + /// \param II Identifier info, used to get the Macro name. + /// \param MI Macro info, used to get the Macro argumets and values. + /// \param PP Preprocessor. + /// \param [out] Name Place holder for returned macro name and arguments. + /// \param [out] Value Place holder for returned macro value. + static void writeMacroDefinition(const IdentifierInfo &II, + const MacroInfo &MI, Preprocessor &PP, + raw_ostream &Name, raw_ostream &Value); + + /// Update current file scope status to next file scope. + void updateStatusToNextScope(); + + /// Handle the case when entering a file. + /// + /// \param Loc Indicates the new location. + void FileEntered(SourceLocation Loc); + + /// Handle the case when exiting a file. + /// + /// \param Loc Indicates the new location. + void FileExited(SourceLocation Loc); + +public: + MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP); + + /// Callback invoked whenever a source file is entered or exited. + /// + /// \param Loc Indicates the new location. + /// \param PrevFID the file that was exited if \p Reason is ExitFile. + void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID = FileID()) override; + + /// Callback invoked whenever a directive (#xxx) is processed. + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, + const Module *Imported) override; + + /// Hook called whenever a macro definition is seen. + void MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) override; + + /// Hook called whenever a macro \#undef is seen. + /// + /// MD is released immediately following this callback. + void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD, + const MacroDirective *Undef) override; +}; + +} // end namespace clang diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 38df455..1bd2937 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -19,8 +19,8 @@ #include "CGVTables.h" #include "CodeGenModule.h" #include "CodeGenTypes.h" -#include "ConstantBuilder.h" #include "TargetInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" @@ -206,8 +206,9 @@ public: // lacks a definition for the destructor, non-base destructors must always // delegate to or alias the base destructor. - void buildStructorSignature(const CXXMethodDecl *MD, StructorType T, - SmallVectorImpl<CanQualType> &ArgTys) override; + AddedStructorArgs + buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + SmallVectorImpl<CanQualType> &ArgTys) override; /// Non-base dtors should be emitted as delegating thunks in this ABI. bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, @@ -248,11 +249,10 @@ public: void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; - unsigned addImplicitConstructorArgs(CodeGenFunction &CGF, - const CXXConstructorDecl *D, - CXXCtorType Type, bool ForVirtualBase, - bool Delegating, - CallArgList &Args) override; + AddedStructorArgs + addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, + CXXCtorType Type, bool ForVirtualBase, + bool Delegating, CallArgList &Args) override; void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, @@ -819,46 +819,44 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { return RAA_Default; case llvm::Triple::x86_64: - // Win64 passes objects with non-trivial copy ctors indirectly. - if (RD->hasNonTrivialCopyConstructor()) - return RAA_Indirect; - - // If an object has a destructor, we'd really like to pass it indirectly + // If a class has a destructor, we'd really like to pass it indirectly // because it allows us to elide copies. Unfortunately, MSVC makes that // impossible for small types, which it will pass in a single register or // stack slot. Most objects with dtors are large-ish, so handle that early. // We can't call out all large objects as being indirect because there are // multiple x64 calling conventions and the C++ ABI code shouldn't dictate // how we pass large POD types. + // + // Note: This permits small classes with nontrivial destructors to be + // passed in registers, which is non-conforming. if (RD->hasNonTrivialDestructor() && getContext().getTypeSize(RD->getTypeForDecl()) > 64) return RAA_Indirect; - // If this is true, the implicit copy constructor that Sema would have - // created would not be deleted. FIXME: We should provide a more direct way - // for CodeGen to ask whether the constructor was deleted. - if (!RD->hasUserDeclaredCopyConstructor() && - !RD->hasUserDeclaredMoveConstructor() && - !RD->needsOverloadResolutionForMoveConstructor() && - !RD->hasUserDeclaredMoveAssignment() && - !RD->needsOverloadResolutionForMoveAssignment()) - return RAA_Default; - - // Otherwise, Sema should have created an implicit copy constructor if - // needed. - assert(!RD->needsImplicitCopyConstructor()); - - // We have to make sure the trivial copy constructor isn't deleted. - for (const CXXConstructorDecl *CD : RD->ctors()) { - if (CD->isCopyConstructor()) { - assert(CD->isTrivial()); - // We had at least one undeleted trivial copy ctor. Return directly. - if (!CD->isDeleted()) - return RAA_Default; + // If a class has at least one non-deleted, trivial copy constructor, it + // is passed according to the C ABI. Otherwise, it is passed indirectly. + // + // Note: This permits classes with non-trivial copy or move ctors to be + // passed in registers, so long as they *also* have a trivial copy ctor, + // which is non-conforming. + if (RD->needsImplicitCopyConstructor()) { + // If the copy ctor has not yet been declared, we can read its triviality + // off the AST. + if (!RD->defaultedCopyConstructorIsDeleted() && + RD->hasTrivialCopyConstructor()) + return RAA_Default; + } else { + // Otherwise, we need to find the copy constructor(s) and ask. + for (const CXXConstructorDecl *CD : RD->ctors()) { + if (CD->isCopyConstructor()) { + // We had at least one nondeleted trivial copy ctor. Return directly. + if (!CD->isDeleted() && CD->isTrivial()) + return RAA_Default; + } } } - // The trivial copy constructor was deleted. Return indirectly. + // We have no trivial, non-deleted copy constructor. return RAA_Indirect; } @@ -1261,17 +1259,19 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF, } } -void +CGCXXABI::AddedStructorArgs MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, SmallVectorImpl<CanQualType> &ArgTys) { + AddedStructorArgs Added; // TODO: 'for base' flag if (T == StructorType::Deleting) { // The scalar deleting destructor takes an implicit int parameter. ArgTys.push_back(getContext().IntTy); + ++Added.Suffix; } auto *CD = dyn_cast<CXXConstructorDecl>(MD); if (!CD) - return; + return Added; // All parameters are already in place except is_most_derived, which goes // after 'this' if it's variadic and last if it's not. @@ -1279,11 +1279,16 @@ MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, const CXXRecordDecl *Class = CD->getParent(); const FunctionProtoType *FPT = CD->getType()->castAs<FunctionProtoType>(); if (Class->getNumVBases()) { - if (FPT->isVariadic()) + if (FPT->isVariadic()) { ArgTys.insert(ArgTys.begin() + 1, getContext().IntTy); - else + ++Added.Prefix; + } else { ArgTys.push_back(getContext().IntTy); + ++Added.Suffix; + } } + + return Added; } void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { @@ -1406,11 +1411,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl()); assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)); if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) { - ImplicitParamDecl *IsMostDerived - = ImplicitParamDecl::Create(Context, nullptr, - CGF.CurGD.getDecl()->getLocation(), - &Context.Idents.get("is_most_derived"), - Context.IntTy); + auto *IsMostDerived = ImplicitParamDecl::Create( + Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(), + &Context.Idents.get("is_most_derived"), Context.IntTy, + ImplicitParamDecl::Other); // The 'most_derived' parameter goes second if the ctor is variadic and last // if it's not. Dtors can't be variadic. const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); @@ -1420,11 +1424,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, Params.push_back(IsMostDerived); getStructorImplicitParamDecl(CGF) = IsMostDerived; } else if (isDeletingDtor(CGF.CurGD)) { - ImplicitParamDecl *ShouldDelete - = ImplicitParamDecl::Create(Context, nullptr, - CGF.CurGD.getDecl()->getLocation(), - &Context.Idents.get("should_call_delete"), - Context.IntTy); + auto *ShouldDelete = ImplicitParamDecl::Create( + Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(), + &Context.Idents.get("should_call_delete"), Context.IntTy, + ImplicitParamDecl::Other); Params.push_back(ShouldDelete); getStructorImplicitParamDecl(CGF) = ShouldDelete; } @@ -1493,14 +1496,14 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { } } -unsigned MicrosoftCXXABI::addImplicitConstructorArgs( +CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs( CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, CallArgList &Args) { assert(Type == Ctor_Complete || Type == Ctor_Base); // Check if we need a 'most_derived' parameter. if (!D->getParent()->getNumVBases()) - return 0; + return AddedStructorArgs{}; // Add the 'most_derived' argument second if we are variadic or last if not. const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); @@ -1511,13 +1514,13 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs( MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete); } RValue RV = RValue::get(MostDerivedArg); - if (FPT->isVariadic()) + if (FPT->isVariadic()) { Args.insert(Args.begin() + 1, CallArg(RV, getContext().IntTy, /*needscopy=*/false)); - else - Args.add(RV, getContext().IntTy); - - return 1; // Added one arg. + return AddedStructorArgs::prefix(1); + } + Args.add(RV, getContext().IntTy); + return AddedStructorArgs::suffix(1); } void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, @@ -1554,7 +1557,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD, llvm::GlobalVariable *VTable) { - if (!CGM.getCodeGenOpts().PrepareForLTO) + if (!CGM.getCodeGenOpts().LTOUnit) return; // The location of the first virtual function pointer in the virtual table, @@ -2203,9 +2206,8 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get( CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false); - llvm::Constant *TLRegDtor = - CGF.CGM.CreateRuntimeFunction(TLRegDtorTy, "__tlregdtor", - llvm::AttributeSet(), /*Local=*/true); + llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction( + TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true); if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor)) TLRegDtorFn->setDoesNotThrow(); @@ -2301,9 +2303,9 @@ static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_header", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -2313,9 +2315,9 @@ static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_footer", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -2325,9 +2327,9 @@ static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_abort", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -3421,6 +3423,8 @@ static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) { return llvm::GlobalValue::InternalLinkage; case VisibleNoLinkage: + case ModuleInternalLinkage: + case ModuleLinkage: case ExternalLinkage: return llvm::GlobalValue::LinkOnceODRLinkage; } @@ -3713,7 +3717,7 @@ CatchTypeInfo MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type, QualType CatchHandlerType) { // TypeDescriptors for exceptions never have qualified pointer types, - // qualifiers are stored seperately in order to support qualification + // qualifiers are stored separately in order to support qualification // conversions. bool IsConst, IsVolatile, IsUnaligned; Type = @@ -3750,6 +3754,9 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) { if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName)) return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + // Note for the future: If we would ever like to do deferred emission of + // RTTI, check if emitting vtables opportunistically need any adjustment. + // Compute the fields for the TypeDescriptor. SmallString<256> TypeInfoString; { @@ -3866,18 +3873,21 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, // Following the 'this' pointer is a reference to the source object that we // are copying from. ImplicitParamDecl SrcParam( - getContext(), nullptr, SourceLocation(), &getContext().Idents.get("src"), + getContext(), /*DC=*/nullptr, SourceLocation(), + &getContext().Idents.get("src"), getContext().getLValueReferenceType(RecordTy, - /*SpelledAsLValue=*/true)); + /*SpelledAsLValue=*/true), + ImplicitParamDecl::Other); if (IsCopy) FunctionArgs.push_back(&SrcParam); // Constructors for classes which utilize virtual bases have an additional // parameter which indicates whether or not it is being delegated to by a more // derived constructor. - ImplicitParamDecl IsMostDerived(getContext(), nullptr, SourceLocation(), + ImplicitParamDecl IsMostDerived(getContext(), /*DC=*/nullptr, + SourceLocation(), &getContext().Idents.get("is_most_derived"), - getContext().IntTy); + getContext().IntTy, ImplicitParamDecl::Other); // Only add the parameter to the list if thie class has virtual bases. if (RD->getNumVBases() > 0) FunctionArgs.push_back(&IsMostDerived); @@ -3918,16 +3928,16 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0); // Insert any ABI-specific implicit constructor arguments. - unsigned ExtraArgs = addImplicitConstructorArgs(CGF, CD, Ctor_Complete, - /*ForVirtualBase=*/false, - /*Delegating=*/false, Args); - + AddedStructorArgs ExtraArgs = + addImplicitConstructorArgs(CGF, CD, Ctor_Complete, + /*ForVirtualBase=*/false, + /*Delegating=*/false, Args); // Call the destructor with our arguments. llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(CD, StructorType::Complete); CGCallee Callee = CGCallee::forDirect(CalleePtr, CD); const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall( - Args, CD, Ctor_Complete, ExtraArgs); + Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix); CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args); Cleanups.ForceCleanup(); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp index f925c25..fc64285 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp @@ -92,6 +92,10 @@ namespace { return M.get(); } + CGDebugInfo *getCGDebugInfo() { + return Builder->getModuleDebugInfo(); + } + llvm::Module *ReleaseModule() { return M.release(); } @@ -193,7 +197,7 @@ namespace { // Provide some coverage mapping even for methods that aren't emitted. // Don't do this for templated classes though, as they may not be // instantiable. - if (!MD->getParent()->getDescribedClassTemplate()) + if (!MD->getParent()->isDependentContext()) Builder->AddDeferredUnusedCoverageMapping(MD); } @@ -299,6 +303,10 @@ llvm::Module *CodeGenerator::ReleaseModule() { return static_cast<CodeGeneratorImpl*>(this)->ReleaseModule(); } +CGDebugInfo *CodeGenerator::getCGDebugInfo() { + return static_cast<CodeGeneratorImpl*>(this)->getCGDebugInfo(); +} + const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) { return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 754f996..d0760b9 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -152,6 +152,9 @@ public: CodeGenOpts.CodeModel = "default"; CodeGenOpts.ThreadModel = "single"; CodeGenOpts.DebugTypeExtRefs = true; + // When building a module MainFileName is the name of the modulemap file. + CodeGenOpts.MainFileName = + LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); } @@ -171,7 +174,8 @@ public: // Prepare CGDebugInfo to emit debug info for a clang module. auto *DI = Builder->getModuleDebugInfo(); StringRef ModuleName = llvm::sys::path::filename(MainFileName); - DI->setPCHDescriptor({ModuleName, "", OutputFileName, ~1ULL}); + DI->setPCHDescriptor({ModuleName, "", OutputFileName, + ASTFileSignature{{{~0U, ~0U, ~0U, ~0U, ~1U}}}}); DI->setModuleMap(MMap); } @@ -241,7 +245,11 @@ public: // PCH files don't have a signature field in the control block, // but LLVM detects DWO CUs by looking for a non-zero DWO id. - uint64_t Signature = Buffer->Signature ? Buffer->Signature : ~1ULL; + // We use the lower 64 bits for debug info. + uint64_t Signature = + Buffer->Signature + ? (uint64_t)Buffer->Signature[1] << 32 | Buffer->Signature[0] + : ~1ULL; Builder->getModuleDebugInfo()->setDwoId(Signature); // Finalize the Builder. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp index 0bfe30a..fc8e36d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -57,6 +57,10 @@ static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) { return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type)); } +static CharUnits getTypeAllocSize(CodeGenModule &CGM, llvm::Type *type) { + return CharUnits::fromQuantity(CGM.getDataLayout().getTypeAllocSize(type)); +} + void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) { // Deal with various aggregate types as special cases: @@ -542,7 +546,9 @@ SwiftAggLowering::getCoerceAndExpandTypes() const { packed = true; elts.push_back(entry.Type); - lastEnd = entry.End; + + lastEnd = entry.Begin + getTypeAllocSize(CGM, entry.Type); + assert(entry.End <= lastEnd); } // We don't need to adjust 'packed' to deal with possible tail padding diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index d2fc388..ece3a40 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -183,7 +183,11 @@ const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } -bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); } +const CodeGenOptions &ABIInfo::getCodeGenOpts() const { + return CGT.getCodeGenOpts(); +} + +bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; @@ -398,7 +402,17 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, } unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { - return llvm::CallingConv::C; + // OpenCL kernels are called via an explicit runtime API with arguments + // set with clSetKernelArg(), not as normal sub-functions. + // Return SPIR_KERNEL by default as the kernel calling convention to + // ensure the fingerprint is fixed such way that each OpenCL argument + // gets one matching argument in the produced kernel function argument + // list to enable feasible implementation of clSetKernelArg() with + // aggregates etc. In case we would use the default C calling conv here, + // clSetKernelArg() might break depending on the target-specific + // conventions; different targets might split structs passed as values + // to multiple function arguments etc. + return llvm::CallingConv::SPIR_KERNEL; } llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, @@ -406,13 +420,32 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule & return llvm::ConstantPointerNull::get(T); } +unsigned TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + return D ? D->getType().getAddressSpace() + : static_cast<unsigned>(LangAS::Default); +} + llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( - CodeGen::CodeGenFunction &CGF, llvm::Value *Src, QualType SrcTy, - QualType DestTy) const { + CodeGen::CodeGenFunction &CGF, llvm::Value *Src, unsigned SrcAddr, + unsigned DestAddr, llvm::Type *DestTy, bool isNonNull) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + if (auto *C = dyn_cast<llvm::Constant>(Src)) + return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy); + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy); +} + +llvm::Constant * +TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, + unsigned SrcAddr, unsigned DestAddr, + llvm::Type *DestTy) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, - CGF.ConvertType(DestTy)); + return llvm::ConstantExpr::getPointerCast(Src, DestTy); } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); @@ -942,8 +975,7 @@ class X86_32ABIInfo : public SwiftABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; - ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, - const ABIArgInfo& current) const; + /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -1197,6 +1229,39 @@ static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { return Size == 32 || Size == 64; } +static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, + uint64_t &Size) { + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), Context)) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += Context.getTypeSize(FD->getType()); + } + return true; +} + +static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, + uint64_t &Size) { + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : RD->bases()) { + if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), + Size)) + return false; + } + if (!addFieldSizes(Context, RD, Size)) + return false; + return true; +} + /// Test whether an argument type which is to be passed indirectly (on the /// stack) would have the equivalent layout if it was expanded into separate /// arguments. If so, we prefer to do the latter to avoid inhibiting @@ -1207,8 +1272,9 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { if (!RT) return false; const RecordDecl *RD = RT->getDecl(); + uint64_t Size = 0; if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!IsWin32StructABI ) { + if (!IsWin32StructABI) { // On non-Windows, we have to conservatively match our old bitcode // prototypes in order to be ABI-compatible at the bitcode level. if (!CXXRD->isCLike()) @@ -1217,30 +1283,12 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { // Don't do this for dynamic classes. if (CXXRD->isDynamicClass()) return false; - // Don't do this if there are any non-empty bases. - for (const CXXBaseSpecifier &Base : CXXRD->bases()) { - if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) - return false; - } } - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - // Scalar arguments on the stack get 4 byte alignment on x86. If the - // argument is smaller than 32-bits, expanding the struct will create - // alignment padding. - if (!is32Or64BitBasicType(FD->getType(), getContext())) + if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) + } else { + if (!addFieldSizes(getContext(), RD, Size)) return false; - - Size += getContext().getTypeSize(FD->getType()); } // We can do this if there was no alignment padding. @@ -1511,27 +1559,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } -ABIArgInfo -X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State, - const ABIArgInfo ¤t) const { - // Assumes vectorCall calling convention. - const Type *Base = nullptr; - uint64_t NumElts = 0; - - if (!Ty->isBuiltinType() && !Ty->isVectorType() && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - // HVA types get passed directly in registers if there is room. - State.FreeSSERegs -= NumElts; - return getDirectX86Hva(); - } - // If there's no room, the HVA gets passed as normal indirect - // structure. - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - return current; -} - ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. @@ -1550,35 +1577,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } } - // vectorcall adds the concept of a homogenous vector aggregate, similar - // to other targets, regcall uses some of the HVA rules. + // Regcall uses the concept of a homogenous vector aggregate, similar + // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if ((State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall) && + if (State.CC == llvm::CallingConv::X86_RegCall && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.CC == llvm::CallingConv::X86_RegCall) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - if (Ty->isBuiltinType() || Ty->isVectorType()) - return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); - - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } else if (State.CC == llvm::CallingConv::X86_VectorCall) { - if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) { - // Actual floating-point types get registers first time through if - // there is registers available - State.FreeSSERegs -= NumElts; + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); - } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { - // HVA Types only get registers after everything else has been - // set, so it gets set as indirect for now. - return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty)); - } + return ABIArgInfo::getExpand(); } + return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { @@ -1659,31 +1671,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, bool &UsedInAlloca) const { - // Vectorcall only allows the first 6 parameters to be passed in registers, - // and homogeneous vector aggregates are only put into registers as a second - // priority. - unsigned Count = 0; - CCState ZeroState = State; - ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0; - // HVAs must be done as a second priority for registers, so the deferred - // items are dealt with by going through the pattern a second time. + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // After that, integers AND HVAs are assigned Left to Right in the same pass. + // Integers are passed as ECX/EDX if one is available (in order). HVAs will + // first take up the remaining YMM/XMM registers. If insufficient registers + // remain but an integer register (ECX/EDX) is available, it will be passed + // in that, else, on the stack. for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = classifyArgumentType(I.type, State); - else - // Parameters after the 6th cannot be passed in registers, - // so pretend there are no registers left for them. - I.info = classifyArgumentType(I.type, ZeroState); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - ++Count; + // First pass do all the vector types. + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = ABIArgInfo::getDirect(); + } else { + I.info = classifyArgumentType(Ty, State); + } + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } - Count = 0; - // Go through the arguments a second time to get HVAs registers if there - // are still some available. + for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, State, I.info); - ++Count; + // Second pass, do the rest! + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts); + + if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) { + // Assign true HVAs (non vector/native FP types). + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = getDirectX86Hva(); + } else { + I.info = getIndirectResult(Ty, /*ByVal=*/false, State); + } + } else if (!IsHva) { + // Assign all Non-HVAs, so this will exclude Vector/FP args. + I.info = classifyArgumentType(Ty, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } } @@ -1885,10 +1919,7 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2068,9 +2099,14 @@ class X86_64ABIInfo : public SwiftABIInfo { return !getTarget().getTriple().isOSDarwin(); } - /// GCC classifies <1 x long long> as SSE but compatibility with older clang - // compilers require us to classify it as INTEGER. + /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to + /// classify it as INTEGER (for compatibility with older clang compilers). bool classifyIntegerMMXAsSSE() const { + // Clang <= 3.8 did not do this. + if (getCodeGenOpts().getClangABICompat() <= + CodeGenOptions::ClangABI::Ver3_8) + return false; + const llvm::Triple &Triple = getTarget().getTriple(); if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4) return false; @@ -3146,8 +3182,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, } } - llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr); - + llvm::StructType *Result = llvm::StructType::get(Lo, Hi); // Verify that the second element is at an 8-byte offset. assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && @@ -3222,8 +3257,7 @@ classifyReturnType(QualType RetTy) const { case ComplexX87: assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), - llvm::Type::getX86_FP80Ty(getVMContext()), - nullptr); + llvm::Type::getX86_FP80Ty(getVMContext())); break; } @@ -3719,7 +3753,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); llvm::Type *DoubleTy = CGF.DoubleTy; - llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr); + llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); @@ -3881,6 +3915,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, bool IsRegCall) const { unsigned Count = 0; for (auto &I : FI.arguments()) { + // Vectorcall in x64 only permits the first 6 arguments to be passed + // as XMM/YMM registers. if (Count < VectorcallMaxParamNumAsReg) I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); else { @@ -3893,11 +3929,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, ++Count; } - Count = 0; for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); - ++Count; + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); } } @@ -4624,7 +4657,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { llvm::Type *CoerceTy; if (Bits > GPRBits) { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); - CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr); + CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy); } else CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); @@ -4761,7 +4794,8 @@ class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, - DarwinPCS + DarwinPCS, + Win64 }; private: @@ -4799,10 +4833,14 @@ private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { - return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) - : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) + : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) + : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { @@ -4811,6 +4849,9 @@ private: bool isSwiftErrorInRegister() const override { return true; } + + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4819,7 +4860,7 @@ public: : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue"; + return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { @@ -4877,10 +4918,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { // Empty records are always ignored on Darwin, but actually passed in C++ mode // elsewhere for GNU compatibility. - if (isEmptyRecord(getContext(), Ty, true)) { + uint64_t Size = getContext().getTypeSize(Ty); + bool IsEmpty = isEmptyRecord(getContext(), Ty, true); + if (IsEmpty || Size == 0) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); + // GNU C mode. The only argument that gets ignored is an empty one with size + // 0. + if (IsEmpty && Size == 0) + return ABIArgInfo::getIgnore(); return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } @@ -4893,7 +4940,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { } // Aggregates <= 16 bytes are passed directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. @@ -4901,7 +4947,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { return coerceToIntArray(Ty, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(Ty); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4933,7 +4979,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { : ABIArgInfo::getDirect()); } - if (isEmptyRecord(getContext(), RetTy, true)) + uint64_t Size = getContext().getTypeSize(RetTy); + if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; @@ -4943,7 +4990,6 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); // Aggregates <= 16 bytes are returned directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. @@ -4951,7 +4997,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return coerceToIntArray(RetTy, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(RetTy); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4979,6 +5025,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const { + if (!llvm::isPowerOf2_32(elts)) + return false; + if (totalSize.getQuantity() != 8 && + (totalSize.getQuantity() != 16 || elts == 1)) + return false; + return true; +} + bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, @@ -5289,6 +5346,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } +Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// @@ -5367,6 +5432,8 @@ private: bool isSwiftErrorInRegister() const override { return true; } + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5433,10 +5500,7 @@ public: // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } }; @@ -5518,17 +5582,14 @@ void ARMABIInfo::setCCs() { // AAPCS apparently requires runtime support functions to be soft-float, but // that's almost certainly for historic reasons (Thumb1 not supporting VFP // most likely). It's more convenient for AAPCS16_VFP to be hard-float. - switch (getABIKind()) { - case APCS: - case AAPCS16_VFP: - if (abiCC != getLLVMDefaultCC()) + + // The Run-time ABI for the ARM Architecture section 4.1.2 requires + // AEABI-complying FP helper functions to use the base AAPCS. + // These AEABI functions are expanded in the ARM llvm backend, all the builtin + // support functions emitted by clang such as the _Complex helpers follow the + // abiCC. + if (abiCC != getLLVMDefaultCC()) BuiltinCC = abiCC; - break; - case AAPCS: - case AAPCS_VFP: - BuiltinCC = llvm::CallingConv::ARM_AAPCS; - break; - } } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, @@ -5882,6 +5943,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + if (!llvm::isPowerOf2_32(numElts)) + return false; + unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); + if (size > 64) + return false; + if (vectorSize.getQuantity() != 8 && + (vectorSize.getQuantity() != 16 || numElts == 1)) + return false; + return true; +} + bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. @@ -6545,6 +6620,11 @@ public: Fn->addFnAttr("nomips16"); } + if (FD->hasAttr<MicroMipsAttr>()) + Fn->addFnAttr("micromips"); + else if (FD->hasAttr<NoMicroMipsAttr>()) + Fn->addFnAttr("nomicromips"); + const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>(); if (!Attr) return; @@ -6884,6 +6964,31 @@ MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, } //===----------------------------------------------------------------------===// +// AVR ABI Implementation. +//===----------------------------------------------------------------------===// + +namespace { +class AVRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AVRTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + auto *Fn = cast<llvm::Function>(GV); + + if (FD->getAttr<AVRInterruptAttr>()) + Fn->addFnAttr("interrupt"); + + if (FD->getAttr<AVRSignalAttr>()) + Fn->addFnAttr("signal"); + } +}; +} + +//===----------------------------------------------------------------------===// // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. // Currently subclassed only to implement custom OpenCL C function attribute // handling. @@ -6997,13 +7102,13 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - uint64_t Size = getContext().getTypeSize(Ty); if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); @@ -7246,11 +7351,16 @@ public: llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; + + unsigned getASTAllocaAddressSpace() const override { + return LangAS::FirstTargetAddressSpace + + getABIInfo().getDataLayout().getAllocaAddrSpace(); + } + unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; }; } -static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM); - void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, @@ -7261,9 +7371,14 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast<llvm::Function>(GV); - if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + const auto *ReqdWGS = M.getLangOpts().OpenCL ? + FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + if (ReqdWGS || FlatWGS) { + unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; + unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); if (Min != 0) { assert(Min <= Max && "Min must be less than or equal Max"); @@ -7302,8 +7417,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } - - appendOpenCLVersionMD(M); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { @@ -7328,6 +7441,31 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } +unsigned +AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + unsigned DefaultGlobalAS = + LangAS::FirstTargetAddressSpace + + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + if (!D) + return DefaultGlobalAS; + + unsigned AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || + AddrSpace >= LangAS::FirstTargetAddressSpace); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + if (CGM.isTypeConstant(D->getType(), false)) { + if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) + return ConstAS.getValue(); + } + return DefaultGlobalAS; +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. @@ -7974,45 +8112,18 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} - void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; }; + } // End anonymous namespace. -/// Emit SPIR specific metadata: OpenCL and SPIR version. -void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the - // opencl.spir.version named metadata. - llvm::Metadata *SPIRVerElts[] = { - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))}; - llvm::NamedMDNode *SPIRVerMD = - M.getOrInsertNamedMetadata("opencl.spir.version"); - SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); - appendOpenCLVersionMD(CGM); -} - -static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the - // opencl.ocl.version named metadata node. - llvm::Metadata *OCLVerElts[] = { - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))}; - llvm::NamedMDNode *OCLVerMD = - M.getOrInsertNamedMetadata("opencl.ocl.version"); - OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); +namespace clang { +namespace CodeGen { +void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { + DefaultABIInfo SPIRABI(CGM.getTypes()); + SPIRABI.computeInfo(FI); +} +} } unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { @@ -8386,11 +8497,16 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::mips64el: return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); + case llvm::Triple::avr: + return SetCGInfo(new AVRTargetCodeGenInfo(Types)); + case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; + else if (Triple.isOSWindows()) + Kind = AArch64ABIInfo::Win64; return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h index 223d6d0..952ef96 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h @@ -229,13 +229,36 @@ public: virtual llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const; + /// Get target favored AST address space of a global variable for languages + /// other than OpenCL and CUDA. + /// If \p D is nullptr, returns the default target favored address space + /// for global variable. + virtual unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const; + + /// Get the AST address space for alloca. + virtual unsigned getASTAllocaAddressSpace() const { return LangAS::Default; } + /// Perform address space cast of an expression of pointer type. /// \param V is the LLVM value to be casted to another address space. - /// \param SrcTy is the QualType of \p V. - /// \param DestTy is the destination QualType. + /// \param SrcAddr is the language address space of \p V. + /// \param DestAddr is the targeted language address space. + /// \param DestTy is the destination LLVM pointer type. + /// \param IsNonNull is the flag indicating \p V is known to be non null. virtual llvm::Value *performAddrSpaceCast(CodeGen::CodeGenFunction &CGF, - llvm::Value *V, QualType SrcTy, QualType DestTy) const; + llvm::Value *V, unsigned SrcAddr, + unsigned DestAddr, + llvm::Type *DestTy, + bool IsNonNull = false) const; + /// Perform address space cast of a constant expression of pointer type. + /// \param V is the LLVM constant to be casted to another address space. + /// \param SrcAddr is the language address space of \p V. + /// \param DestAddr is the targeted language address space. + /// \param DestTy is the destination LLVM pointer type. + virtual llvm::Constant * + performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr, + unsigned DestAddr, llvm::Type *DestTy) const; }; } // namespace CodeGen |