summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen')
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h4
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp534
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp28
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp512
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp659
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp4
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp2
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp39
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h27
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp669
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCall.h49
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp143
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp53
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp590
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp385
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h74
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp143
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp15
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp43
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp774
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp29
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp62
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp16
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp16
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp479
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGGPUBuiltin.cpp (renamed from contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp)13
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp157
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp140
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp309
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp60
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp3
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp1334
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h257
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp1698
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h128
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp24
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp1482
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp25
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGValue.h51
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp16
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp312
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp165
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h446
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp464
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h120
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp39
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h10
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp6
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h10
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp10
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h8
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/ConstantBuilder.h444
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/ConstantInitBuilder.cpp280
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp10
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h2
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp177
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp208
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.h117
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp168
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp10
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp12
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp8
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp478
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h29
64 files changed, 10556 insertions, 4023 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
index ac31dfd..575506d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
@@ -10,6 +10,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_ABIINFO_H
#define LLVM_CLANG_LIB_CODEGEN_ABIINFO_H
+#include "clang/AST/CharUnits.h"
#include "clang/AST/Type.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Type.h"
@@ -23,6 +24,7 @@ namespace llvm {
namespace clang {
class ASTContext;
+ class CodeGenOptions;
class TargetInfo;
namespace CodeGen {
@@ -67,6 +69,7 @@ namespace swiftcall {
llvm::LLVMContext &getVMContext() const;
const llvm::DataLayout &getDataLayout() const;
const TargetInfo &getTarget() const;
+ const CodeGenOptions &getCodeGenOpts() const;
/// Return the calling convention to use for system runtime
/// functions.
@@ -148,7 +151,6 @@ namespace swiftcall {
return info->supportsSwift();
}
};
-
} // end namespace CodeGen
} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
index d2ce6ea..513896d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
@@ -35,7 +35,6 @@
#include "llvm/LTO/LTOBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -50,10 +49,12 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <memory>
using namespace clang;
@@ -61,6 +62,9 @@ using namespace llvm;
namespace {
+// Default filename used for profile generation.
+static constexpr StringLiteral DefaultProfileGenName = "default_%m.profraw";
+
class EmitAssemblyHelper {
DiagnosticsEngine &Diags;
const HeaderSearchOptions &HSOpts;
@@ -73,7 +77,6 @@ class EmitAssemblyHelper {
std::unique_ptr<raw_pwrite_stream> OS;
-private:
TargetIRAnalysis getTargetIRAnalysis() const {
if (TM)
return TM->getTargetIRAnalysis();
@@ -81,9 +84,6 @@ private:
return TargetIRAnalysis();
}
- /// Set LLVM command line options passed through -backend-option.
- void setCommandLineOpts();
-
void CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM);
/// Generates the TargetMachine.
@@ -130,16 +130,20 @@ public:
// that we add to the PassManagerBuilder.
class PassManagerBuilderWrapper : public PassManagerBuilder {
public:
- PassManagerBuilderWrapper(const CodeGenOptions &CGOpts,
+ PassManagerBuilderWrapper(const Triple &TargetTriple,
+ const CodeGenOptions &CGOpts,
const LangOptions &LangOpts)
- : PassManagerBuilder(), CGOpts(CGOpts), LangOpts(LangOpts) {}
+ : PassManagerBuilder(), TargetTriple(TargetTriple), CGOpts(CGOpts),
+ LangOpts(LangOpts) {}
+ const Triple &getTargetTriple() const { return TargetTriple; }
const CodeGenOptions &getCGOpts() const { return CGOpts; }
const LangOptions &getLangOpts() const { return LangOpts; }
+
private:
+ const Triple &TargetTriple;
const CodeGenOptions &CGOpts;
const LangOptions &LangOpts;
};
-
}
static void addObjCARCAPElimPass(const PassManagerBuilder &Builder, PassManagerBase &PM) {
@@ -183,19 +187,42 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters;
Opts.TracePC = CGOpts.SanitizeCoverageTracePC;
Opts.TracePCGuard = CGOpts.SanitizeCoverageTracePCGuard;
+ Opts.NoPrune = CGOpts.SanitizeCoverageNoPrune;
+ Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters;
PM.add(createSanitizerCoverageModulePass(Opts));
}
+// Check if ASan should use GC-friendly instrumentation for globals.
+// First of all, there is no point if -fdata-sections is off (expect for MachO,
+// where this is not a factor). Also, on ELF this feature requires an assembler
+// extension that only works with -integrated-as at the moment.
+static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) {
+ if (!CGOpts.SanitizeAddressGlobalsDeadStripping)
+ return false;
+ switch (T.getObjectFormat()) {
+ case Triple::MachO:
+ case Triple::COFF:
+ return true;
+ case Triple::ELF:
+ return CGOpts.DataSections && !CGOpts.DisableIntegratedAS;
+ default:
+ return false;
+ }
+}
+
static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
const PassManagerBuilderWrapper &BuilderWrapper =
static_cast<const PassManagerBuilderWrapper&>(Builder);
+ const Triple &T = BuilderWrapper.getTargetTriple();
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope;
+ bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts);
PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover,
UseAfterScope));
- PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover));
+ PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover,
+ UseGlobalsGC));
}
static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -262,7 +289,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
TLII->disableAllFunctions();
else {
// Disable individual libc/libm calls in TargetLibraryInfo.
- LibFunc::Func F;
+ LibFunc F;
for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs())
if (TLII->getLibFunc(FuncName, F))
TLII->setUnavailable(F);
@@ -292,6 +319,140 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts,
MPM->add(createRewriteSymbolsPass(DL));
}
+static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) {
+ switch (CodeGenOpts.OptimizationLevel) {
+ default:
+ llvm_unreachable("Invalid optimization level!");
+ case 0:
+ return CodeGenOpt::None;
+ case 1:
+ return CodeGenOpt::Less;
+ case 2:
+ return CodeGenOpt::Default; // O2/Os/Oz
+ case 3:
+ return CodeGenOpt::Aggressive;
+ }
+}
+
+static llvm::CodeModel::Model getCodeModel(const CodeGenOptions &CodeGenOpts) {
+ unsigned CodeModel =
+ llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
+ .Case("small", llvm::CodeModel::Small)
+ .Case("kernel", llvm::CodeModel::Kernel)
+ .Case("medium", llvm::CodeModel::Medium)
+ .Case("large", llvm::CodeModel::Large)
+ .Case("default", llvm::CodeModel::Default)
+ .Default(~0u);
+ assert(CodeModel != ~0u && "invalid code model!");
+ return static_cast<llvm::CodeModel::Model>(CodeModel);
+}
+
+static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) {
+ // Keep this synced with the equivalent code in
+ // lib/Frontend/CompilerInvocation.cpp
+ llvm::Optional<llvm::Reloc::Model> RM;
+ RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel)
+ .Case("static", llvm::Reloc::Static)
+ .Case("pic", llvm::Reloc::PIC_)
+ .Case("ropi", llvm::Reloc::ROPI)
+ .Case("rwpi", llvm::Reloc::RWPI)
+ .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
+ .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
+ assert(RM.hasValue() && "invalid PIC model!");
+ return *RM;
+}
+
+static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) {
+ if (Action == Backend_EmitObj)
+ return TargetMachine::CGFT_ObjectFile;
+ else if (Action == Backend_EmitMCNull)
+ return TargetMachine::CGFT_Null;
+ else {
+ assert(Action == Backend_EmitAssembly && "Invalid action!");
+ return TargetMachine::CGFT_AssemblyFile;
+ }
+}
+
+static void initTargetOptions(llvm::TargetOptions &Options,
+ const CodeGenOptions &CodeGenOpts,
+ const clang::TargetOptions &TargetOpts,
+ const LangOptions &LangOpts,
+ const HeaderSearchOptions &HSOpts) {
+ Options.ThreadModel =
+ llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel)
+ .Case("posix", llvm::ThreadModel::POSIX)
+ .Case("single", llvm::ThreadModel::Single);
+
+ // Set float ABI type.
+ assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" ||
+ CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) &&
+ "Invalid Floating Point ABI!");
+ Options.FloatABIType =
+ llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI)
+ .Case("soft", llvm::FloatABI::Soft)
+ .Case("softfp", llvm::FloatABI::Soft)
+ .Case("hard", llvm::FloatABI::Hard)
+ .Default(llvm::FloatABI::Default);
+
+ // Set FP fusion mode.
+ switch (LangOpts.getDefaultFPContractMode()) {
+ case LangOptions::FPC_Off:
+ // Preserve any contraction performed by the front-end. (Strict performs
+ // splitting of the muladd instrinsic in the backend.)
+ Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
+ break;
+ case LangOptions::FPC_On:
+ Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
+ break;
+ case LangOptions::FPC_Fast:
+ Options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
+ break;
+ }
+
+ Options.UseInitArray = CodeGenOpts.UseInitArray;
+ Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
+ Options.CompressDebugSections = CodeGenOpts.getCompressDebugSections();
+ Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations;
+
+ // Set EABI version.
+ Options.EABIVersion = TargetOpts.EABIVersion;
+
+ if (LangOpts.SjLjExceptions)
+ Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
+
+ Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
+ Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
+ Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
+ Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
+ Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
+ Options.FunctionSections = CodeGenOpts.FunctionSections;
+ Options.DataSections = CodeGenOpts.DataSections;
+ Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
+ Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
+ Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
+
+ if (CodeGenOpts.EnableSplitDwarf)
+ Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
+ Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
+ Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
+ Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
+ Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
+ Options.MCOptions.MCIncrementalLinkerCompatible =
+ CodeGenOpts.IncrementalLinkerCompatible;
+ Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations;
+ Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
+ Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
+ Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments;
+ Options.MCOptions.ABIName = TargetOpts.ABI;
+ for (const auto &Entry : HSOpts.UserEntries)
+ if (!Entry.IsFramework &&
+ (Entry.Group == frontend::IncludeDirGroup::Quoted ||
+ Entry.Group == frontend::IncludeDirGroup::Angled ||
+ Entry.Group == frontend::IncludeDirGroup::System))
+ Options.MCOptions.IASSearchPaths.push_back(
+ Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
+}
+
void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
legacy::FunctionPassManager &FPM) {
// Handle disabling of all LLVM passes, where we want to preserve the
@@ -299,8 +460,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (CodeGenOpts.DisableLLVMPasses)
return;
- PassManagerBuilderWrapper PMBuilder(CodeGenOpts, LangOpts);
-
// Figure out TargetLibraryInfo. This needs to be added to MPM and FPM
// manually (and not via PMBuilder), since some passes (eg. InstrProfiling)
// are inserted before PMBuilder ones - they'd get the default-constructed
@@ -309,6 +468,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
std::unique_ptr<TargetLibraryInfoImpl> TLII(
createTLII(TargetTriple, CodeGenOpts));
+ PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts);
+
// At O0 and O1 we only run the always inliner which is more efficient. At
// higher optimization levels we run the normal inliner.
if (CodeGenOpts.OptimizationLevel <= 1) {
@@ -316,13 +477,17 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
!CodeGenOpts.DisableLifetimeMarkers);
PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics);
} else {
+ // We do not want to inline hot callsites for SamplePGO module-summary build
+ // because profile annotation will happen again in ThinLTO backend, and we
+ // want the IR of the hot path to match the profile.
PMBuilder.Inliner = createFunctionInliningPass(
- CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize);
+ CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize,
+ (!CodeGenOpts.SampleProfileFile.empty() &&
+ CodeGenOpts.EmitSummaryIndex));
}
PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
- PMBuilder.BBVectorize = CodeGenOpts.VectorizeBB;
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
@@ -334,16 +499,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
MPM.add(new TargetLibraryInfoWrapperPass(*TLII));
- // Add target-specific passes that need to run as early as possible.
if (TM)
- PMBuilder.addExtension(
- PassManagerBuilder::EP_EarlyAsPossible,
- [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- TM->addEarlyAsPossiblePasses(PM);
- });
+ TM->adjustPassManager(PMBuilder);
- PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
- addAddDiscriminatorsPass);
+ if (CodeGenOpts.DebugInfoForProfiling ||
+ !CodeGenOpts.SampleProfileFile.empty())
+ PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
+ addAddDiscriminatorsPass);
// In ObjC ARC mode, add the main ARC optimization passes.
if (LangOpts.ObjCAutoRefCount) {
@@ -454,7 +616,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (!CodeGenOpts.InstrProfileOutput.empty())
PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput;
else
- PMBuilder.PGOInstrGen = "default_%m.profraw";
+ PMBuilder.PGOInstrGen = DefaultProfileGenName;
}
if (CodeGenOpts.hasProfileIRUse())
PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath;
@@ -466,7 +628,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
PMBuilder.populateModulePassManager(MPM);
}
-void EmitAssemblyHelper::setCommandLineOpts() {
+static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
SmallVector<const char *, 16> BackendArgs;
BackendArgs.push_back("clang"); // Fake program name.
if (!CodeGenOpts.DebugPass.empty()) {
@@ -495,126 +657,14 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
return;
}
- unsigned CodeModel =
- llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
- .Case("small", llvm::CodeModel::Small)
- .Case("kernel", llvm::CodeModel::Kernel)
- .Case("medium", llvm::CodeModel::Medium)
- .Case("large", llvm::CodeModel::Large)
- .Case("default", llvm::CodeModel::Default)
- .Default(~0u);
- assert(CodeModel != ~0u && "invalid code model!");
- llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel);
-
+ llvm::CodeModel::Model CM = getCodeModel(CodeGenOpts);
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
-
- // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp.
- llvm::Optional<llvm::Reloc::Model> RM;
- RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel)
- .Case("static", llvm::Reloc::Static)
- .Case("pic", llvm::Reloc::PIC_)
- .Case("ropi", llvm::Reloc::ROPI)
- .Case("rwpi", llvm::Reloc::RWPI)
- .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI)
- .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
- assert(RM.hasValue() && "invalid PIC model!");
-
- CodeGenOpt::Level OptLevel;
- switch (CodeGenOpts.OptimizationLevel) {
- default:
- llvm_unreachable("Invalid optimization level!");
- case 0:
- OptLevel = CodeGenOpt::None;
- break;
- case 1:
- OptLevel = CodeGenOpt::Less;
- break;
- case 2:
- OptLevel = CodeGenOpt::Default;
- break; // O2/Os/Oz
- case 3:
- OptLevel = CodeGenOpt::Aggressive;
- break;
- }
+ llvm::Reloc::Model RM = getRelocModel(CodeGenOpts);
+ CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts);
llvm::TargetOptions Options;
-
- Options.ThreadModel =
- llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel)
- .Case("posix", llvm::ThreadModel::POSIX)
- .Case("single", llvm::ThreadModel::Single);
-
- // Set float ABI type.
- assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" ||
- CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) &&
- "Invalid Floating Point ABI!");
- Options.FloatABIType =
- llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI)
- .Case("soft", llvm::FloatABI::Soft)
- .Case("softfp", llvm::FloatABI::Soft)
- .Case("hard", llvm::FloatABI::Hard)
- .Default(llvm::FloatABI::Default);
-
- // Set FP fusion mode.
- switch (CodeGenOpts.getFPContractMode()) {
- case CodeGenOptions::FPC_Off:
- Options.AllowFPOpFusion = llvm::FPOpFusion::Strict;
- break;
- case CodeGenOptions::FPC_On:
- Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
- break;
- case CodeGenOptions::FPC_Fast:
- Options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
- break;
- }
-
- Options.UseInitArray = CodeGenOpts.UseInitArray;
- Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
- Options.CompressDebugSections = CodeGenOpts.CompressDebugSections;
- Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations;
-
- // Set EABI version.
- Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion)
- .Case("4", llvm::EABI::EABI4)
- .Case("5", llvm::EABI::EABI5)
- .Case("gnu", llvm::EABI::GNU)
- .Default(llvm::EABI::Default);
-
- if (LangOpts.SjLjExceptions)
- Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
-
- Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD;
- Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
- Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
- Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
- Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
- Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
- Options.FunctionSections = CodeGenOpts.FunctionSections;
- Options.DataSections = CodeGenOpts.DataSections;
- Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
- Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
- Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
-
- Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
- Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
- Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
- Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
- Options.MCOptions.MCIncrementalLinkerCompatible =
- CodeGenOpts.IncrementalLinkerCompatible;
- Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations;
- Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
- Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
- Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments;
- Options.MCOptions.ABIName = TargetOpts.ABI;
- for (const auto &Entry : HSOpts.UserEntries)
- if (!Entry.IsFramework &&
- (Entry.Group == frontend::IncludeDirGroup::Quoted ||
- Entry.Group == frontend::IncludeDirGroup::Angled ||
- Entry.Group == frontend::IncludeDirGroup::System))
- Options.MCOptions.IASSearchPaths.push_back(
- Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
-
+ initTargetOptions(Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts);
TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr,
Options, RM, CM, OptLevel));
}
@@ -630,13 +680,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
// Normal mode, emit a .s or .o file by running the code generator. Note,
// this also adds codegenerator level optimization passes.
- TargetMachine::CodeGenFileType CGFT = TargetMachine::CGFT_AssemblyFile;
- if (Action == Backend_EmitObj)
- CGFT = TargetMachine::CGFT_ObjectFile;
- else if (Action == Backend_EmitMCNull)
- CGFT = TargetMachine::CGFT_Null;
- else
- assert(Action == Backend_EmitAssembly && "Invalid action!");
+ TargetMachine::CodeGenFileType CGFT = getCodeGenFileType(Action);
// Add ObjC ARC final-cleanup optimizations. This is done as part of the
// "codegen" passes so that it isn't run multiple times when there is
@@ -657,7 +701,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS) {
TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
- setCommandLineOpts();
+ setCommandLineOpts(CodeGenOpts);
bool UsesCodeGen = (Action != Backend_EmitNothing &&
Action != Backend_EmitBC &&
@@ -683,14 +727,31 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
+ std::unique_ptr<raw_fd_ostream> ThinLinkOS;
+
switch (Action) {
case Backend_EmitNothing:
break;
case Backend_EmitBC:
- PerModulePasses.add(createBitcodeWriterPass(
- *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex,
- CodeGenOpts.EmitSummaryIndex));
+ if (CodeGenOpts.EmitSummaryIndex) {
+ if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
+ std::error_code EC;
+ ThinLinkOS.reset(new llvm::raw_fd_ostream(
+ CodeGenOpts.ThinLinkBitcodeFile, EC,
+ llvm::sys::fs::F_None));
+ if (EC) {
+ Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile
+ << EC.message();
+ return;
+ }
+ }
+ PerModulePasses.add(
+ createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get()));
+ }
+ else
+ PerModulePasses.add(
+ createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists));
break;
case Backend_EmitLL:
@@ -769,7 +830,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) {
TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
- setCommandLineOpts();
+ setCommandLineOpts(CodeGenOpts);
// The new pass manager always makes a target machine available to passes
// during construction.
@@ -779,7 +840,28 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
return;
TheModule->setDataLayout(TM->createDataLayout());
- PassBuilder PB(TM.get());
+ PGOOptions PGOOpt;
+
+ // -fprofile-generate.
+ PGOOpt.RunProfileGen = CodeGenOpts.hasProfileIRInstr();
+ if (PGOOpt.RunProfileGen)
+ PGOOpt.ProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() ?
+ DefaultProfileGenName : CodeGenOpts.InstrProfileOutput;
+
+ // -fprofile-use.
+ if (CodeGenOpts.hasProfileIRUse())
+ PGOOpt.ProfileUseFile = CodeGenOpts.ProfileInstrumentUsePath;
+
+ if (!CodeGenOpts.SampleProfileFile.empty())
+ PGOOpt.SampleProfileFile = CodeGenOpts.SampleProfileFile;
+
+ // Only pass a PGO options struct if -fprofile-generate or
+ // -fprofile-use were passed on the cmdline.
+ PassBuilder PB(TM.get(),
+ (PGOOpt.RunProfileGen ||
+ !PGOOpt.ProfileUseFile.empty() ||
+ !PGOOpt.SampleProfileFile.empty()) ?
+ Optional<PGOOptions>(PGOOpt) : None);
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
@@ -796,20 +878,34 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PB.registerLoopAnalyses(LAM);
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
- ModulePassManager MPM;
+ ModulePassManager MPM(CodeGenOpts.DebugPassManager);
if (!CodeGenOpts.DisableLLVMPasses) {
+ bool IsThinLTO = CodeGenOpts.EmitSummaryIndex;
+ bool IsLTO = CodeGenOpts.PrepareForLTO;
+
if (CodeGenOpts.OptimizationLevel == 0) {
// Build a minimal pipeline based on the semantics required by Clang,
// which is just that always inlining occurs.
MPM.addPass(AlwaysInlinerPass());
+ if (IsThinLTO)
+ MPM.addPass(NameAnonGlobalPass());
} else {
- // Otherwise, use the default pass pipeline. We also have to map our
- // optimization levels into one of the distinct levels used to configure
- // the pipeline.
+ // Map our optimization levels into one of the distinct levels used to
+ // configure the pipeline.
PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts);
- MPM = PB.buildPerModuleDefaultPipeline(Level);
+ if (IsThinLTO) {
+ MPM = PB.buildThinLTOPreLinkDefaultPipeline(
+ Level, CodeGenOpts.DebugPassManager);
+ MPM.addPass(NameAnonGlobalPass());
+ } else if (IsLTO) {
+ MPM = PB.buildLTOPreLinkDefaultPipeline(Level,
+ CodeGenOpts.DebugPassManager);
+ } else {
+ MPM = PB.buildPerModuleDefaultPipeline(Level,
+ CodeGenOpts.DebugPassManager);
+ }
}
}
@@ -817,6 +913,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// create that pass manager here and use it as needed below.
legacy::PassManager CodeGenPasses;
bool NeedCodeGen = false;
+ Optional<raw_fd_ostream> ThinLinkOS;
// Append any output we need to the pass manager.
switch (Action) {
@@ -824,9 +921,24 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
break;
case Backend_EmitBC:
- MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
- CodeGenOpts.EmitSummaryIndex,
- CodeGenOpts.EmitSummaryIndex));
+ if (CodeGenOpts.EmitSummaryIndex) {
+ if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
+ std::error_code EC;
+ ThinLinkOS.emplace(CodeGenOpts.ThinLinkBitcodeFile, EC,
+ llvm::sys::fs::F_None);
+ if (EC) {
+ Diags.Report(diag::err_fe_unable_to_open_output)
+ << CodeGenOpts.ThinLinkBitcodeFile << EC.message();
+ return;
+ }
+ }
+ MPM.addPass(
+ ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &*ThinLinkOS : nullptr));
+ } else {
+ MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
+ CodeGenOpts.EmitSummaryIndex,
+ CodeGenOpts.EmitSummaryIndex));
+ }
break;
case Backend_EmitLL:
@@ -861,21 +973,50 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
}
}
+Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) {
+ Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef);
+ if (!BMsOrErr)
+ return BMsOrErr.takeError();
+
+ // The bitcode file may contain multiple modules, we want the one that is
+ // marked as being the ThinLTO module.
+ for (BitcodeModule &BM : *BMsOrErr) {
+ Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
+ if (LTOInfo && LTOInfo->IsThinLTO)
+ return BM;
+ }
+
+ return make_error<StringError>("Could not find module summary",
+ inconvertibleErrorCode());
+}
+
static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
- std::unique_ptr<raw_pwrite_stream> OS) {
- StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
+ const HeaderSearchOptions &HeaderOpts,
+ const CodeGenOptions &CGOpts,
+ const clang::TargetOptions &TOpts,
+ const LangOptions &LOpts,
+ std::unique_ptr<raw_pwrite_stream> OS,
+ std::string SampleProfile,
+ BackendAction Action) {
+ StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ setCommandLineOpts(CGOpts);
+
// We can simply import the values mentioned in the combined index, since
// we should only invoke this using the individual indexes written out
// via a WriteIndexesThinBackend.
FunctionImporter::ImportMapTy ImportList;
for (auto &GlobalList : *CombinedIndex) {
+ // Ignore entries for undefined references.
+ if (GlobalList.second.SummaryList.empty())
+ continue;
+
auto GUID = GlobalList.first;
- assert(GlobalList.second.size() == 1 &&
+ assert(GlobalList.second.SummaryList.size() == 1 &&
"Expected individual combined index to have one summary per GUID");
- auto &Summary = GlobalList.second[0];
+ auto &Summary = GlobalList.second.SummaryList[0];
// Skip the summaries for the importing module. These are included to
// e.g. record required linkage changes.
if (Summary->modulePath() == M->getModuleIdentifier())
@@ -897,32 +1038,15 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
return;
}
- Expected<std::vector<BitcodeModule>> BMsOrErr =
- getBitcodeModuleList(**MBOrErr);
- if (!BMsOrErr) {
- handleAllErrors(BMsOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+ Expected<BitcodeModule> BMOrErr = FindThinLTOModule(**MBOrErr);
+ if (!BMOrErr) {
+ handleAllErrors(BMOrErr.takeError(), [&](ErrorInfoBase &EIB) {
errs() << "Error loading imported file '" << I.first()
<< "': " << EIB.message() << '\n';
});
return;
}
-
- // The bitcode file may contain multiple modules, we want the one with a
- // summary.
- bool FoundModule = false;
- for (BitcodeModule &BM : *BMsOrErr) {
- Expected<bool> HasSummary = BM.hasSummary();
- if (HasSummary && *HasSummary) {
- ModuleMap.insert({I.first(), BM});
- FoundModule = true;
- break;
- }
- }
- if (!FoundModule) {
- errs() << "Error loading imported file '" << I.first()
- << "': Could not find module summary\n";
- return;
- }
+ ModuleMap.insert({I.first(), *BMOrErr});
OwnedImports.push_back(std::move(*MBOrErr));
}
@@ -930,6 +1054,36 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
return llvm::make_unique<lto::NativeObjectStream>(std::move(OS));
};
lto::Config Conf;
+ Conf.CPU = TOpts.CPU;
+ Conf.CodeModel = getCodeModel(CGOpts);
+ Conf.MAttrs = TOpts.Features;
+ Conf.RelocModel = getRelocModel(CGOpts);
+ Conf.CGOptLevel = getCGOptLevel(CGOpts);
+ initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
+ Conf.SampleProfile = std::move(SampleProfile);
+ Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
+ switch (Action) {
+ case Backend_EmitNothing:
+ Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) {
+ return false;
+ };
+ break;
+ case Backend_EmitLL:
+ Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
+ M->print(*OS, nullptr, CGOpts.EmitLLVMUseLists);
+ return false;
+ };
+ break;
+ case Backend_EmitBC:
+ Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
+ WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists);
+ return false;
+ };
+ break;
+ default:
+ Conf.CGFileType = getCodeGenFileType(Action);
+ break;
+ }
if (Error E = thinBackend(
Conf, 0, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) {
@@ -952,7 +1106,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// into memory and pass it into runThinLTOBackend, which will run the
// function importer and invoke LTO passes.
Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
- llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
+ llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile,
+ /*IgnoreEmptyThinLTOIndexFile*/true);
if (!IndexOrErr) {
logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
"Error loading index file '" +
@@ -965,7 +1120,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// of an error).
bool DoThinLTOBackend = CombinedIndex != nullptr;
if (DoThinLTOBackend) {
- runThinLTOBackend(CombinedIndex.get(), M, std::move(OS));
+ runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
+ LOpts, std::move(OS), CGOpts.SampleProfileFile, Action);
return;
}
}
@@ -996,6 +1152,7 @@ static const char* getSectionNameForBitcode(const Triple &T) {
return "__LLVM,__bitcode";
case Triple::COFF:
case Triple::ELF:
+ case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmbc";
}
@@ -1008,6 +1165,7 @@ static const char* getSectionNameForCommandline(const Triple &T) {
return "__LLVM,__cmdline";
case Triple::COFF:
case Triple::ELF:
+ case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmcmd";
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
index 9287e46..a6e6fec 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
@@ -95,7 +95,7 @@ namespace {
BFI.StorageOffset += OffsetInChars;
LVal = LValue::MakeBitfield(Address(Addr, lvalue.getAlignment()),
BFI, lvalue.getType(),
- lvalue.getAlignmentSource());
+ lvalue.getBaseInfo());
LVal.setTBAAInfo(lvalue.getTBAAInfo());
AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned);
if (AtomicTy.isNull()) {
@@ -203,7 +203,7 @@ namespace {
addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits());
return LValue::MakeAddr(addr, getValueType(), CGF.getContext(),
- LVal.getAlignmentSource(), LVal.getTBAAInfo());
+ LVal.getBaseInfo(), LVal.getTBAAInfo());
}
/// \brief Emits atomic load.
@@ -1181,15 +1181,15 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
if (LVal.isBitField())
return CGF.EmitLoadOfBitfieldLValue(
LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(),
- LVal.getAlignmentSource()));
+ LVal.getBaseInfo()), loc);
if (LVal.isVectorElt())
return CGF.EmitLoadOfLValue(
LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(),
- LVal.getAlignmentSource()), loc);
+ LVal.getBaseInfo()), loc);
assert(LVal.isExtVectorElt());
return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
addr, LVal.getExtVectorElts(), LVal.getType(),
- LVal.getAlignmentSource()));
+ LVal.getBaseInfo()));
}
RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
@@ -1506,26 +1506,26 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal,
UpdateLVal =
LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
DesiredLVal =
LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
} else if (AtomicLVal.isVectorElt()) {
UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
DesiredLVal = LValue::MakeVectorElt(
DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
} else {
assert(AtomicLVal.isExtVectorElt());
UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
DesiredLVal = LValue::MakeExtVectorElt(
DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
}
UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
@@ -1612,17 +1612,17 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics,
DesiredLVal =
LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
} else if (AtomicLVal.isVectorElt()) {
DesiredLVal =
LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(),
AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
} else {
assert(AtomicLVal.isExtVectorElt());
DesiredLVal = LValue::MakeExtVectorElt(
DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
- AtomicLVal.getAlignmentSource());
+ AtomicLVal.getBaseInfo());
}
DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
// Store new value in the corresponding memory area
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
index b250b9a..1810489 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
@@ -16,7 +16,7 @@
#include "CGObjCRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/DeclObjC.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/CallSite.h"
@@ -266,7 +266,7 @@ static bool isSafeForCXXConstantCapture(QualType type) {
static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM,
CodeGenFunction *CGF,
const VarDecl *var) {
- // Return if this is a function paramter. We shouldn't try to
+ // Return if this is a function parameter. We shouldn't try to
// rematerialize default arguments of function parameters.
if (isa<ParmVarDecl>(var))
return nullptr;
@@ -318,6 +318,19 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
elementTypes.push_back(CGM.getBlockDescriptorType());
}
+static QualType getCaptureFieldType(const CodeGenFunction &CGF,
+ const BlockDecl::Capture &CI) {
+ const VarDecl *VD = CI.getVariable();
+
+ // If the variable is captured by an enclosing block or lambda expression,
+ // use the type of the capture field.
+ if (CGF.BlockInfo && CI.isNested())
+ return CGF.BlockInfo->getCapture(VD).fieldType();
+ if (auto *FD = CGF.LambdaCaptureFields.lookup(VD))
+ return FD->getType();
+ return VD->getType();
+}
+
/// Compute the layout of the given block. Attempts to lay the block
/// out with minimal space requirements.
static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
@@ -432,15 +445,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
}
}
- QualType VT = variable->getType();
-
- // If the variable is captured by an enclosing block or lambda expression,
- // use the type of the capture field.
- if (CGF->BlockInfo && CI.isNested())
- VT = CGF->BlockInfo->getCapture(variable).fieldType();
- else if (auto *FD = CGF->LambdaCaptureFields.lookup(variable))
- VT = FD->getType();
-
+ QualType VT = getCaptureFieldType(*CGF, CI);
CharUnits size = C.getTypeSizeInChars(VT);
CharUnits align = C.getDeclAlign(variable);
@@ -606,15 +611,25 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
if (capture.isConstant()) continue;
// Ignore objects that aren't destructed.
- QualType::DestructionKind dtorKind =
- variable->getType().isDestructedType();
+ QualType VT = getCaptureFieldType(CGF, CI);
+ QualType::DestructionKind dtorKind = VT.isDestructedType();
if (dtorKind == QualType::DK_none) continue;
CodeGenFunction::Destroyer *destroyer;
// Block captures count as local values and have imprecise semantics.
// They also can't be arrays, so need to worry about that.
- if (dtorKind == QualType::DK_objc_strong_lifetime) {
+ //
+ // For const-qualified captures, emit clang.arc.use to ensure the captured
+ // object doesn't get released while we are still depending on its validity
+ // within the block.
+ if (VT.isConstQualified() &&
+ VT.getObjCLifetime() == Qualifiers::OCL_Strong &&
+ CGF.CGM.getCodeGenOpts().OptimizationLevel != 0) {
+ assert(CGF.CGM.getLangOpts().ObjCAutoRefCount &&
+ "expected ObjC ARC to be enabled");
+ destroyer = CodeGenFunction::emitARCIntrinsicUse;
+ } else if (dtorKind == QualType::DK_objc_strong_lifetime) {
destroyer = CodeGenFunction::destroyARCStrongImprecise;
} else {
destroyer = CGF.getDestroyer(dtorKind);
@@ -634,7 +649,7 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
if (useArrayEHCleanup)
cleanupKind = InactiveNormalAndEHCleanup;
- CGF.pushDestroy(cleanupKind, addr, variable->getType(),
+ CGF.pushDestroy(cleanupKind, addr, VT,
destroyer, useArrayEHCleanup);
// Remember where that cleanup was.
@@ -718,7 +733,12 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// Otherwise, we have to emit this as a local block.
- llvm::Constant *isa = CGM.getNSConcreteStackBlock();
+ llvm::Constant *isa =
+ (!CGM.getContext().getLangOpts().OpenCL)
+ ? CGM.getNSConcreteStackBlock()
+ : CGM.getNullPointer(VoidPtrPtrTy,
+ CGM.getContext().getPointerType(
+ QualType(CGM.getContext().VoidPtrTy)));
isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy);
// Build the block descriptor.
@@ -856,6 +876,13 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
} else if (type->isReferenceType()) {
Builder.CreateStore(src.getPointer(), blockField);
+ // If type is const-qualified, copy the value into the block field.
+ } else if (type.isConstQualified() &&
+ type.getObjCLifetime() == Qualifiers::OCL_Strong &&
+ CGM.getCodeGenOpts().OptimizationLevel != 0) {
+ llvm::Value *value = Builder.CreateLoad(src, "captured");
+ Builder.CreateStore(value, blockField);
+
// If this is an ARC __strong block-pointer variable, don't do a
// block copy.
//
@@ -876,9 +903,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
} else {
// Fake up a new variable so that EmitScalarInit doesn't think
// we're referring to the variable in its own initializer.
- ImplicitParamDecl blockFieldPseudoVar(getContext(), /*DC*/ nullptr,
- SourceLocation(), /*name*/ nullptr,
- type);
+ ImplicitParamDecl BlockFieldPseudoVar(getContext(), type,
+ ImplicitParamDecl::Other);
// We use one of these or the other depending on whether the
// reference is nested.
@@ -891,8 +917,9 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// FIXME: Pass a specific location for the expr init so that the store is
// attributed to a reasonable location - otherwise it may be attributed to
// locations of subexpressions in the initialization.
- EmitExprAsInit(&l2r, &blockFieldPseudoVar,
- MakeAddrLValue(blockField, type, AlignmentSource::Decl),
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ EmitExprAsInit(&l2r, &BlockFieldPseudoVar,
+ MakeAddrLValue(blockField, type, BaseInfo),
/*captured by init*/ false);
}
@@ -906,9 +933,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// Cast to the converted block-pointer type, which happens (somewhat
// unfortunately) to be a pointer to function type.
- llvm::Value *result =
- Builder.CreateBitCast(blockAddr.getPointer(),
- ConvertType(blockInfo.getBlockExpr()->getType()));
+ llvm::Value *result = Builder.CreatePointerCast(
+ blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
return result;
}
@@ -935,9 +961,8 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
// const char *signature; // the block signature
// const char *layout; // reserved
// };
- BlockDescriptorType =
- llvm::StructType::create("struct.__block_descriptor",
- UnsignedLongTy, UnsignedLongTy, nullptr);
+ BlockDescriptorType = llvm::StructType::create(
+ "struct.__block_descriptor", UnsignedLongTy, UnsignedLongTy);
// Now form a pointer to that.
unsigned AddrSpace = 0;
@@ -961,9 +986,8 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
// struct __block_descriptor *__descriptor;
// };
GenericBlockLiteralType =
- llvm::StructType::create("struct.__block_literal_generic",
- VoidPtrTy, IntTy, IntTy, VoidPtrTy,
- BlockDescPtrTy, nullptr);
+ llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+ IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
return GenericBlockLiteralType;
}
@@ -976,21 +1000,41 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
// Get a pointer to the generic block literal.
+ // For OpenCL we generate generic AS void ptr to be able to reuse the same
+ // block definition for blocks with captures generated as private AS local
+ // variables and without captures generated as global AS program scope
+ // variables.
+ unsigned AddrSpace = 0;
+ if (getLangOpts().OpenCL)
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
+
llvm::Type *BlockLiteralTy =
- llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType());
+ llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
// Bitcast the callee to a block literal.
- BlockPtr = Builder.CreateBitCast(BlockPtr, BlockLiteralTy, "block.literal");
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
// Get the function pointer from the literal.
llvm::Value *FuncPtr =
Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
- BlockPtr = Builder.CreateBitCast(BlockPtr, VoidPtrTy);
// Add the block literal.
CallArgList Args;
- Args.add(RValue::get(BlockPtr), getContext().VoidPtrTy);
+
+ QualType VoidPtrQualTy = getContext().VoidPtrTy;
+ llvm::Type *GenericVoidPtrTy = VoidPtrTy;
+ if (getLangOpts().OpenCL) {
+ GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
+ VoidPtrQualTy =
+ getContext().getPointerType(getContext().getAddrSpaceQualType(
+ getContext().VoidTy, LangAS::opencl_generic));
+ }
+
+ BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
+ Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
QualType FnType = BPT->getPointeeType();
@@ -1097,7 +1141,11 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
auto fields = builder.beginStruct();
// isa
- fields.add(CGM.getNSConcreteGlobalBlock());
+ fields.add((!CGM.getContext().getLangOpts().OpenCL)
+ ? CGM.getNSConcreteGlobalBlock()
+ : CGM.getNullPointer(CGM.VoidPtrPtrTy,
+ CGM.getContext().getPointerType(QualType(
+ CGM.getContext().VoidPtrTy))));
// __flags
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
@@ -1114,16 +1162,19 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
- llvm::Constant *literal =
- fields.finishAndCreateGlobal("__block_literal_global",
- blockInfo.BlockAlign,
- /*constant*/ true);
+ unsigned AddrSpace = 0;
+ if (CGM.getContext().getLangOpts().OpenCL)
+ AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+
+ llvm::Constant *literal = fields.finishAndCreateGlobal(
+ "__block_literal_global", blockInfo.BlockAlign,
+ /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace);
// Return a constant of the appropriately-casted type.
llvm::Type *RequiredType =
CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType());
llvm::Constant *Result =
- llvm::ConstantExpr::getBitCast(literal, RequiredType);
+ llvm::ConstantExpr::getPointerCast(literal, RequiredType);
CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
return Result;
}
@@ -1155,9 +1206,13 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
// Instead of messing around with LocalDeclMap, just set the value
// directly as BlockPointer.
- BlockPointer = Builder.CreateBitCast(arg,
- BlockInfo->StructureType->getPointerTo(),
- "block");
+ BlockPointer = Builder.CreatePointerCast(
+ arg,
+ BlockInfo->StructureType->getPointerTo(
+ getContext().getLangOpts().OpenCL
+ ? getContext().getTargetAddressSpace(LangAS::opencl_generic)
+ : 0),
+ "block");
}
Address CodeGenFunction::LoadBlockStruct() {
@@ -1196,11 +1251,21 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
// The first argument is the block pointer. Just take it as a void*
// and cast it later.
QualType selfTy = getContext().VoidPtrTy;
+
+ // For OpenCL passed block pointer can be private AS local variable or
+ // global AS program scope variable (for the case with and without captures).
+ // Generic AS is used therefore to be able to accommodate both private and
+ // generic AS in one implementation.
+ if (getLangOpts().OpenCL)
+ selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType(
+ getContext().VoidTy, LangAS::opencl_generic));
+
IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor");
- ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl),
- SourceLocation(), II, selfTy);
- args.push_back(&selfDecl);
+ ImplicitParamDecl SelfDecl(getContext(), const_cast<BlockDecl *>(blockDecl),
+ SourceLocation(), II, selfTy,
+ ImplicitParamDecl::ObjCSelf);
+ args.push_back(&SelfDecl);
// Now add the rest of the parameters.
args.append(blockDecl->param_begin(), blockDecl->param_end());
@@ -1323,23 +1388,102 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
return fn;
}
-/*
- notes.push_back(HelperInfo());
- HelperInfo &note = notes.back();
- note.index = capture.getIndex();
- note.RequiresCopying = (ci->hasCopyExpr() || BlockRequiresCopying(type));
- note.cxxbar_import = ci->getCopyExpr();
-
- if (ci->isByRef()) {
- note.flag = BLOCK_FIELD_IS_BYREF;
- if (type.isObjCGCWeak())
- note.flag |= BLOCK_FIELD_IS_WEAK;
- } else if (type->isBlockPointerType()) {
- note.flag = BLOCK_FIELD_IS_BLOCK;
- } else {
- note.flag = BLOCK_FIELD_IS_OBJECT;
- }
- */
+namespace {
+
+/// Represents a type of copy/destroy operation that should be performed for an
+/// entity that's captured by a block.
+enum class BlockCaptureEntityKind {
+ CXXRecord, // Copy or destroy
+ ARCWeak,
+ ARCStrong,
+ BlockObject, // Assign or release
+ None
+};
+
+/// Represents a captured entity that requires extra operations in order for
+/// this entity to be copied or destroyed correctly.
+struct BlockCaptureManagedEntity {
+ BlockCaptureEntityKind Kind;
+ BlockFieldFlags Flags;
+ const BlockDecl::Capture &CI;
+ const CGBlockInfo::Capture &Capture;
+
+ BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags,
+ const BlockDecl::Capture &CI,
+ const CGBlockInfo::Capture &Capture)
+ : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {}
+};
+
+} // end anonymous namespace
+
+static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
+computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
+ const LangOptions &LangOpts) {
+ if (CI.getCopyExpr()) {
+ assert(!CI.isByRef());
+ // don't bother computing flags
+ return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
+ }
+ BlockFieldFlags Flags;
+ if (CI.isByRef()) {
+ Flags = BLOCK_FIELD_IS_BYREF;
+ if (T.isObjCGCWeak())
+ Flags |= BLOCK_FIELD_IS_WEAK;
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+ }
+ if (!T->isObjCRetainableType())
+ // For all other types, the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+
+ Flags = BLOCK_FIELD_IS_OBJECT;
+ bool isBlockPointer = T->isBlockPointerType();
+ if (isBlockPointer)
+ Flags = BLOCK_FIELD_IS_BLOCK;
+
+ // Special rules for ARC captures:
+ Qualifiers QS = T.getQualifiers();
+
+ // We need to register __weak direct captures with the runtime.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
+ return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
+
+ // We need to retain the copied value for __strong direct captures.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Strong) {
+ // If it's a block pointer, we have to copy the block and
+ // assign that to the destination pointer, so we might as
+ // well use _Block_object_assign. Otherwise we can avoid that.
+ return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong
+ : BlockCaptureEntityKind::BlockObject,
+ Flags);
+ }
+
+ // Non-ARC captures of retainable pointers are strong and
+ // therefore require a call to _Block_object_assign.
+ if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+
+ // Otherwise the memcpy is fine.
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+}
+
+/// Find the set of block captures that need to be explicitly copied or destroy.
+static void findBlockCapturedManagedEntities(
+ const CGBlockInfo &BlockInfo, const LangOptions &LangOpts,
+ SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures,
+ llvm::function_ref<std::pair<BlockCaptureEntityKind, BlockFieldFlags>(
+ const BlockDecl::Capture &, QualType, const LangOptions &)>
+ Predicate) {
+ for (const auto &CI : BlockInfo.getBlockDecl()->captures()) {
+ const VarDecl *Variable = CI.getVariable();
+ const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable);
+ if (Capture.isConstant())
+ continue;
+
+ auto Info = Predicate(CI, Variable->getType(), LangOpts);
+ if (Info.first != BlockCaptureEntityKind::None)
+ ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture);
+ }
+}
/// Generate the copy-helper function for a block closure object:
/// static void block_copy_helper(block_t *dst, block_t *src);
@@ -1354,12 +1498,12 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
ASTContext &C = getContext();
FunctionArgList args;
- ImplicitParamDecl dstDecl(getContext(), nullptr, SourceLocation(), nullptr,
- C.VoidPtrTy);
- args.push_back(&dstDecl);
- ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
- C.VoidPtrTy);
- args.push_back(&srcDecl);
+ ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ args.push_back(&DstDecl);
+ ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -1391,86 +1535,36 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
auto AL = ApplyDebugLocation::CreateArtificial(*this);
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
- Address src = GetAddrOfLocalVar(&srcDecl);
+ Address src = GetAddrOfLocalVar(&SrcDecl);
src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign);
src = Builder.CreateBitCast(src, structPtrTy, "block.source");
- Address dst = GetAddrOfLocalVar(&dstDecl);
+ Address dst = GetAddrOfLocalVar(&DstDecl);
dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign);
dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest");
- const BlockDecl *blockDecl = blockInfo.getBlockDecl();
-
- for (const auto &CI : blockDecl->captures()) {
- const VarDecl *variable = CI.getVariable();
- QualType type = variable->getType();
-
- const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
- if (capture.isConstant()) continue;
-
- const Expr *copyExpr = CI.getCopyExpr();
- BlockFieldFlags flags;
-
- bool useARCWeakCopy = false;
- bool useARCStrongCopy = false;
-
- if (copyExpr) {
- assert(!CI.isByRef());
- // don't bother computing flags
+ SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures,
+ computeCopyInfoForBlockCapture);
- } else if (CI.isByRef()) {
- flags = BLOCK_FIELD_IS_BYREF;
- if (type.isObjCGCWeak())
- flags |= BLOCK_FIELD_IS_WEAK;
-
- } else if (type->isObjCRetainableType()) {
- flags = BLOCK_FIELD_IS_OBJECT;
- bool isBlockPointer = type->isBlockPointerType();
- if (isBlockPointer)
- flags = BLOCK_FIELD_IS_BLOCK;
-
- // Special rules for ARC captures:
- Qualifiers qs = type.getQualifiers();
-
- // We need to register __weak direct captures with the runtime.
- if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) {
- useARCWeakCopy = true;
-
- // We need to retain the copied value for __strong direct captures.
- } else if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) {
- // If it's a block pointer, we have to copy the block and
- // assign that to the destination pointer, so we might as
- // well use _Block_object_assign. Otherwise we can avoid that.
- if (!isBlockPointer)
- useARCStrongCopy = true;
-
- // Non-ARC captures of retainable pointers are strong and
- // therefore require a call to _Block_object_assign.
- } else if (!qs.getObjCLifetime() && !getLangOpts().ObjCAutoRefCount) {
- // fall through
-
- // Otherwise the memcpy is fine.
- } else {
- continue;
- }
-
- // For all other types, the memcpy is fine.
- } else {
- continue;
- }
+ for (const auto &CopiedCapture : CopiedCaptures) {
+ const BlockDecl::Capture &CI = CopiedCapture.CI;
+ const CGBlockInfo::Capture &capture = CopiedCapture.Capture;
+ BlockFieldFlags flags = CopiedCapture.Flags;
unsigned index = capture.getIndex();
Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset());
Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset());
// If there's an explicit copy expression, we do that.
- if (copyExpr) {
- EmitSynthesizedCXXCopyCtor(dstField, srcField, copyExpr);
- } else if (useARCWeakCopy) {
+ if (CI.getCopyExpr()) {
+ assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord);
+ EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr());
+ } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
EmitARCCopyWeak(dstField, srcField);
} else {
llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
- if (useARCStrongCopy) {
+ if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
// At -O0, store null into the destination field (so that the
// storeStrong doesn't over-release) and then call storeStrong.
// This is a workaround to not having an initStrong call.
@@ -1491,6 +1585,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
}
} else {
+ assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject);
srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
llvm::Value *dstAddr =
Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
@@ -1498,6 +1593,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
};
+ const VarDecl *variable = CI.getVariable();
bool copyCanThrow = false;
if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) {
const Expr *copyExpr =
@@ -1521,6 +1617,52 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
}
+static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
+computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
+ const LangOptions &LangOpts) {
+ BlockFieldFlags Flags;
+ if (CI.isByRef()) {
+ Flags = BLOCK_FIELD_IS_BYREF;
+ if (T.isObjCGCWeak())
+ Flags |= BLOCK_FIELD_IS_WEAK;
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+ }
+
+ if (const CXXRecordDecl *Record = T->getAsCXXRecordDecl()) {
+ if (Record->hasTrivialDestructor())
+ return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags());
+ return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
+ }
+
+ // Other types don't need to be destroy explicitly.
+ if (!T->isObjCRetainableType())
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+
+ Flags = BLOCK_FIELD_IS_OBJECT;
+ if (T->isBlockPointerType())
+ Flags = BLOCK_FIELD_IS_BLOCK;
+
+ // Special rules for ARC captures.
+ Qualifiers QS = T.getQualifiers();
+
+ // Use objc_storeStrong for __strong direct captures; the
+ // dynamic tools really like it when we do this.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Strong)
+ return std::make_pair(BlockCaptureEntityKind::ARCStrong, Flags);
+
+ // Support __weak direct captures.
+ if (QS.getObjCLifetime() == Qualifiers::OCL_Weak)
+ return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags);
+
+ // Non-ARC captures are strong, and we need to use
+ // _Block_object_dispose.
+ if (!QS.hasObjCLifetime() && !LangOpts.ObjCAutoRefCount)
+ return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags);
+
+ // Otherwise, we have nothing to do.
+ return std::make_pair(BlockCaptureEntityKind::None, Flags);
+}
+
/// Generate the destroy-helper function for a block closure object:
/// static void block_destroy_helper(block_t *theBlock);
///
@@ -1533,9 +1675,9 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
ASTContext &C = getContext();
FunctionArgList args;
- ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
- C.VoidPtrTy);
- args.push_back(&srcDecl);
+ ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -1566,83 +1708,43 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
- Address src = GetAddrOfLocalVar(&srcDecl);
+ Address src = GetAddrOfLocalVar(&SrcDecl);
src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign);
src = Builder.CreateBitCast(src, structPtrTy, "block");
- const BlockDecl *blockDecl = blockInfo.getBlockDecl();
-
CodeGenFunction::RunCleanupsScope cleanups(*this);
- for (const auto &CI : blockDecl->captures()) {
- const VarDecl *variable = CI.getVariable();
- QualType type = variable->getType();
+ SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures,
+ computeDestroyInfoForBlockCapture);
- const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
- if (capture.isConstant()) continue;
-
- BlockFieldFlags flags;
- const CXXDestructorDecl *dtor = nullptr;
-
- bool useARCWeakDestroy = false;
- bool useARCStrongDestroy = false;
-
- if (CI.isByRef()) {
- flags = BLOCK_FIELD_IS_BYREF;
- if (type.isObjCGCWeak())
- flags |= BLOCK_FIELD_IS_WEAK;
- } else if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
- if (record->hasTrivialDestructor())
- continue;
- dtor = record->getDestructor();
- } else if (type->isObjCRetainableType()) {
- flags = BLOCK_FIELD_IS_OBJECT;
- if (type->isBlockPointerType())
- flags = BLOCK_FIELD_IS_BLOCK;
-
- // Special rules for ARC captures.
- Qualifiers qs = type.getQualifiers();
-
- // Use objc_storeStrong for __strong direct captures; the
- // dynamic tools really like it when we do this.
- if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) {
- useARCStrongDestroy = true;
-
- // Support __weak direct captures.
- } else if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) {
- useARCWeakDestroy = true;
-
- // Non-ARC captures are strong, and we need to use _Block_object_dispose.
- } else if (!qs.hasObjCLifetime() && !getLangOpts().ObjCAutoRefCount) {
- // fall through
-
- // Otherwise, we have nothing to do.
- } else {
- continue;
- }
- } else {
- continue;
- }
+ for (const auto &DestroyedCapture : DestroyedCaptures) {
+ const BlockDecl::Capture &CI = DestroyedCapture.CI;
+ const CGBlockInfo::Capture &capture = DestroyedCapture.Capture;
+ BlockFieldFlags flags = DestroyedCapture.Flags;
Address srcField =
Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset());
- // If there's an explicit copy expression, we do that.
- if (dtor) {
- PushDestructorCleanup(dtor, srcField);
+ // If the captured record has a destructor then call it.
+ if (DestroyedCapture.Kind == BlockCaptureEntityKind::CXXRecord) {
+ const auto *Dtor =
+ CI.getVariable()->getType()->getAsCXXRecordDecl()->getDestructor();
+ PushDestructorCleanup(Dtor, srcField);
- // If this is a __weak capture, emit the release directly.
- } else if (useARCWeakDestroy) {
+ // If this is a __weak capture, emit the release directly.
+ } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
EmitARCDestroyWeak(srcField);
// Destroy strong objects with a call if requested.
- } else if (useARCStrongDestroy) {
+ } else if (DestroyedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
EmitARCDestroyStrong(srcField, ARCImpreciseLifetime);
// Otherwise we call _Block_object_dispose. It wouldn't be too
// hard to just emit this as a cleanup if we wanted to make sure
// that things were done in reverse.
} else {
+ assert(DestroyedCapture.Kind == BlockCaptureEntityKind::BlockObject);
llvm::Value *value = Builder.CreateLoad(srcField);
value = Builder.CreateBitCast(value, VoidPtrTy);
BuildBlockRelease(value, flags);
@@ -1815,13 +1917,13 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
QualType R = Context.VoidTy;
FunctionArgList args;
- ImplicitParamDecl dst(CGF.getContext(), nullptr, SourceLocation(), nullptr,
- Context.VoidPtrTy);
- args.push_back(&dst);
+ ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ args.push_back(&Dst);
- ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
- Context.VoidPtrTy);
- args.push_back(&src);
+ ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ args.push_back(&Src);
const CGFunctionInfo &FI =
CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
@@ -1852,7 +1954,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0);
// dst->x
- Address destField = CGF.GetAddrOfLocalVar(&dst);
+ Address destField = CGF.GetAddrOfLocalVar(&Dst);
destField = Address(CGF.Builder.CreateLoad(destField),
byrefInfo.ByrefAlignment);
destField = CGF.Builder.CreateBitCast(destField, byrefPtrType);
@@ -1860,7 +1962,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
"dest-object");
// src->x
- Address srcField = CGF.GetAddrOfLocalVar(&src);
+ Address srcField = CGF.GetAddrOfLocalVar(&Src);
srcField = Address(CGF.Builder.CreateLoad(srcField),
byrefInfo.ByrefAlignment);
srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType);
@@ -1892,9 +1994,9 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
QualType R = Context.VoidTy;
FunctionArgList args;
- ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
- Context.VoidPtrTy);
- args.push_back(&src);
+ ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ args.push_back(&Src);
const CGFunctionInfo &FI =
CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
@@ -1923,7 +2025,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
CGF.StartFunction(FD, R, Fn, FI, args);
if (generator.needsDispose()) {
- Address addr = CGF.GetAddrOfLocalVar(&src);
+ Address addr = CGF.GetAddrOfLocalVar(&Src);
addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.ByrefAlignment);
auto byrefPtrType = byrefInfo.Type->getPointerTo(0);
addr = CGF.Builder.CreateBitCast(addr, byrefPtrType);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
index b3d02f1..f3527b0 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -420,10 +420,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
llvm::Value *
CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType) {
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE) {
uint64_t ObjectSize;
if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
- return emitBuiltinObjectSize(E, Type, ResType);
+ return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
}
@@ -432,9 +433,14 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
/// it)
/// - A call to the @llvm.objectsize intrinsic
+///
+/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
+/// and we wouldn't otherwise try to reference a pass_object_size parameter,
+/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
llvm::Value *
CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType) {
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE) {
// We need to reference an argument if the pointer is a parameter with the
// pass_object_size attribute.
if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
@@ -457,16 +463,20 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
// evaluate E for side-effects. In either case, we shouldn't lower to
// @llvm.objectsize.
- if (Type == 3 || E->HasSideEffects(getContext()))
+ if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
return getDefaultBuiltinObjectSizeResult(Type, ResType);
- // LLVM only supports 0 and 2, make sure that we pass along that
- // as a boolean.
- auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
- // FIXME: Get right address space.
- llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
- Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
- return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
+ Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
+ assert(Ptr->getType()->isPointerTy() &&
+ "Non-pointer passed to __builtin_object_size?");
+
+ Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
+
+ // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
+ Value *Min = Builder.getInt1((Type & 2) != 0);
+ // For GCC compatability, __builtin_object_size treat NULL as unknown size.
+ Value *NullIsUnknown = Builder.getTrue();
+ return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
}
// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
@@ -482,10 +492,12 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedIncrement,
_InterlockedOr,
_InterlockedXor,
+ _interlockedbittestandset,
+ __fastfail,
};
Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
- const CallExpr *E) {
+ const CallExpr *E) {
switch (BuiltinID) {
case MSVCIntrin::_BitScanForward:
case MSVCIntrin::_BitScanReverse: {
@@ -548,6 +560,22 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
case MSVCIntrin::_InterlockedXor:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
+ case MSVCIntrin::_interlockedbittestandset: {
+ llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Or, Addr,
+ Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ // Shift the relevant bit to the least significant position, truncate to
+ // the result type, and test the low bit.
+ llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
+ llvm::Value *Truncated =
+ Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
+ return Builder.CreateAnd(Truncated,
+ ConstantInt::get(Truncated->getType(), 1));
+ }
+
case MSVCIntrin::_InterlockedDecrement: {
llvm::Type *IntTy = ConvertType(E->getType());
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
@@ -566,6 +594,37 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
llvm::AtomicOrdering::SequentiallyConsistent);
return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
}
+
+ case MSVCIntrin::__fastfail: {
+ // Request immediate process termination from the kernel. The instruction
+ // sequences to do this are documented on MSDN:
+ // https://msdn.microsoft.com/en-us/library/dn774154.aspx
+ llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
+ StringRef Asm, Constraints;
+ switch (ISA) {
+ default:
+ ErrorUnsupported(E, "__fastfail call for this architecture");
+ break;
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ Asm = "int $$0x29";
+ Constraints = "{cx}";
+ break;
+ case llvm::Triple::thumb:
+ Asm = "udf #251";
+ Constraints = "{r0}";
+ break;
+ }
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
+ llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoReturn);
+ CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
+ CS.setAttributes(NoReturnAttr);
+ return CS.getInstruction();
+ }
}
llvm_unreachable("Incorrect MSVC intrinsic!");
}
@@ -932,7 +991,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// We pass this builtin onto the optimizer so that it can figure out the
// object size in more complex cases.
- return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
+ return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
+ /*EmittedE=*/nullptr));
}
case Builtin::BI__builtin_prefetch: {
Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
@@ -1750,12 +1810,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__atomic_signal_fence:
case Builtin::BI__c11_atomic_thread_fence:
case Builtin::BI__c11_atomic_signal_fence: {
- llvm::SynchronizationScope Scope;
+ llvm::SyncScope::ID SSID;
if (BuiltinID == Builtin::BI__atomic_signal_fence ||
BuiltinID == Builtin::BI__c11_atomic_signal_fence)
- Scope = llvm::SingleThread;
+ SSID = llvm::SyncScope::SingleThread;
else
- Scope = llvm::CrossThread;
+ SSID = llvm::SyncScope::System;
Value *Order = EmitScalarExpr(E->getArg(0));
if (isa<llvm::ConstantInt>(Order)) {
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
@@ -1765,17 +1825,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
break;
case 1: // memory_order_consume
case 2: // memory_order_acquire
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
break;
case 3: // memory_order_release
- Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
break;
case 4: // memory_order_acq_rel
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
break;
case 5: // memory_order_seq_cst
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
break;
}
return RValue::get(nullptr);
@@ -1792,23 +1851,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
Builder.SetInsertPoint(AcquireBB);
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(1), AcquireBB);
SI->addCase(Builder.getInt32(2), AcquireBB);
Builder.SetInsertPoint(ReleaseBB);
- Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(3), ReleaseBB);
Builder.SetInsertPoint(AcqRelBB);
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(4), AcqRelBB);
Builder.SetInsertPoint(SeqCstBB);
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
+ Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(5), SeqCstBB);
@@ -2195,16 +2254,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedXor16:
case Builtin::BI_InterlockedXor:
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
- case Builtin::BI__readfsdword: {
- llvm::Type *IntTy = ConvertType(E->getType());
- Value *IntToPtr =
- Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
- llvm::PointerType::get(IntTy, 257));
- LoadInst *Load = Builder.CreateAlignedLoad(
- IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType()));
- Load->setVolatile(true);
- return RValue::get(Load);
- }
+ case Builtin::BI_interlockedbittestandset:
+ return RValue::get(
+ EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
case Builtin::BI__exception_code:
case Builtin::BI_exception_code:
@@ -2218,9 +2270,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_setjmpex: {
if (getTarget().getTriple().isOSMSVCRT()) {
llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
- llvm::AttributeSet ReturnsTwiceAttr =
- AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
+ llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::ReturnsTwice);
llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
"_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
@@ -2238,9 +2290,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
}
case Builtin::BI_setjmp: {
if (getTarget().getTriple().isOSMSVCRT()) {
- llvm::AttributeSet ReturnsTwiceAttr =
- AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::ReturnsTwice);
+ llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::ReturnsTwice);
llvm::Value *Buf = Builder.CreateBitOrPointerCast(
EmitScalarExpr(E->getArg(0)), Int8PtrTy);
llvm::CallSite CS;
@@ -2276,6 +2328,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
break;
}
+ case Builtin::BI__fastfail:
+ return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
+
case Builtin::BI__builtin_coro_size: {
auto & Context = getContext();
auto SizeTy = Context.getSizeType();
@@ -2492,25 +2547,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
unsigned NumArgs = E->getNumArgs();
llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
- llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
- llvm::Value *Range = EmitScalarExpr(E->getArg(2));
+ LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
+ llvm::Value *Range = NDRangeL.getAddress().getPointer();
+ llvm::Type *RangeTy = NDRangeL.getAddress().getType();
if (NumArgs == 4) {
// The most basic form of the call with parameters:
// queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
Name = "__enqueue_kernel_basic";
- llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
+ llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
- llvm::Value *Block =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+ llvm::Value *Block = Builder.CreatePointerCast(
+ EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
- return RValue::get(Builder.CreateCall(
- CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
+ AttrBuilder B;
+ B.addAttribute(Attribute::ByVal);
+ llvm::AttributeList ByValAttrSet =
+ llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
+
+ auto RTCall =
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
+ {Queue, Flags, Range, Block});
+ RTCall->setAttributes(ByValAttrSet);
+ return RValue::get(RTCall);
}
assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
@@ -2518,14 +2584,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
if (E->getArg(3)->getType()->isBlockPointerType()) {
// No events passed, but has variadic arguments.
Name = "__enqueue_kernel_vaargs";
- llvm::Value *Block =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+ llvm::Value *Block = Builder.CreatePointerCast(
+ EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
ConstantInt::get(IntTy, NumArgs - 4)};
- std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
- IntTy};
+ std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
+ GenericVoidPtrTy, IntTy};
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
@@ -2555,12 +2621,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Convert to generic address space.
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
- llvm::Value *Block =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
+ llvm::Value *Block = Builder.CreatePointerCast(
+ EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
- std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy,
- Int32Ty, EventPtrTy, EventPtrTy,
- Int8PtrTy};
+ std::vector<llvm::Type *> ArgTys = {
+ QueueTy, Int32Ty, RangeTy, Int32Ty,
+ EventPtrTy, EventPtrTy, GenericVoidPtrTy};
std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
EventList, ClkEvent, Block};
@@ -2592,30 +2658,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
llvm::ArrayRef<llvm::Value *>(Args)));
}
+ LLVM_FALLTHROUGH;
}
// OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
// parameter.
case Builtin::BIget_kernel_work_group_size: {
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, Int8PtrTy, false),
- "__get_kernel_work_group_size_impl"),
- Arg));
+ Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
+ "__get_kernel_work_group_size_impl"),
+ Arg));
}
case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+ Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
return RValue::get(Builder.CreateCall(
CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+ llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
"__get_kernel_preferred_work_group_multiple_impl"),
Arg));
}
case Builtin::BIprintf:
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
- return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
+ if (getTarget().getTriple().isNVPTX())
+ return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
break;
case Builtin::BI__builtin_canonicalize:
case Builtin::BI__builtin_canonicalizef:
@@ -2680,7 +2751,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Push a clang.arc.use cleanup for each object in RetainableOperands. The
// cleanup will cause the use to appear after the final log call, keeping
- // the object valid while it’s held in the log buffer. Note that if there’s
+ // the object valid while it's held in the log buffer. Note that if there's
// a release cleanup on the object, it will already be active; since
// cleanups are emitted in reverse order, the use will occur before the
// object is released.
@@ -2698,6 +2769,59 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(ConstantInt::get(ConvertType(E->getType()),
Layout.size().getQuantity()));
}
+
+ case Builtin::BI__xray_customevent: {
+ if (!ShouldXRayInstrumentFunction())
+ return RValue::getIgnored();
+ if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
+ if (XRayAttr->neverXRayInstrument())
+ return RValue::getIgnored();
+ }
+ Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
+ auto FTy = F->getFunctionType();
+ auto Arg0 = E->getArg(0);
+ auto Arg0Val = EmitScalarExpr(Arg0);
+ auto Arg0Ty = Arg0->getType();
+ auto PTy0 = FTy->getParamType(0);
+ if (PTy0 != Arg0Val->getType()) {
+ if (Arg0Ty->isArrayType())
+ Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
+ else
+ Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
+ }
+ auto Arg1 = EmitScalarExpr(E->getArg(1));
+ auto PTy1 = FTy->getParamType(1);
+ if (PTy1 != Arg1->getType())
+ Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
+ return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
+ }
+
+ case Builtin::BI__builtin_ms_va_start:
+ case Builtin::BI__builtin_ms_va_end:
+ return RValue::get(
+ EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
+ BuiltinID == Builtin::BI__builtin_ms_va_start));
+
+ case Builtin::BI__builtin_ms_va_copy: {
+ // Lower this manually. We can't reliably determine whether or not any
+ // given va_copy() is for a Win64 va_list from the calling convention
+ // alone, because it's legal to do this from a System V ABI function.
+ // With opaque pointer types, we won't have enough information in LLVM
+ // IR to determine this from the argument types, either. Best to do it
+ // now, while we have enough information.
+ Address DestAddr = EmitMSVAListRef(E->getArg(0));
+ Address SrcAddr = EmitMSVAListRef(E->getArg(1));
+
+ llvm::Type *BPP = Int8PtrPtrTy;
+
+ DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
+ DestAddr.getAlignment());
+ SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
+ SrcAddr.getAlignment());
+
+ Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
+ return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
+ }
}
// If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -3716,6 +3840,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcalt_v:
case NEON::BI__builtin_neon_vcaltq_v:
std::swap(Ops[0], Ops[1]);
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcage_v:
case NEON::BI__builtin_neon_vcageq_v:
case NEON::BI__builtin_neon_vcagt_v:
@@ -4474,7 +4599,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
? Intrinsic::arm_stlexd
: Intrinsic::arm_strexd);
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
+ llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
Value *Val = EmitScalarExpr(E->getArg(0));
@@ -4959,6 +5084,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vsri_n_v:
case NEON::BI__builtin_neon_vsriq_n_v:
rightShift = true;
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vsli_n_v:
case NEON::BI__builtin_neon_vsliq_n_v:
Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
@@ -5304,7 +5430,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
? Intrinsic::aarch64_stlxp
: Intrinsic::aarch64_stxp);
- llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
+ llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
@@ -7115,33 +7241,15 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
}
+static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
+ llvm::Type *DstTy) {
+ unsigned NumberOfElements = DstTy->getVectorNumElements();
+ Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
+ return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
+}
+
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
- if (BuiltinID == X86::BI__builtin_ms_va_start ||
- BuiltinID == X86::BI__builtin_ms_va_end)
- return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
- BuiltinID == X86::BI__builtin_ms_va_start);
- if (BuiltinID == X86::BI__builtin_ms_va_copy) {
- // Lower this manually. We can't reliably determine whether or not any
- // given va_copy() is for a Win64 va_list from the calling convention
- // alone, because it's legal to do this from a System V ABI function.
- // With opaque pointer types, we won't have enough information in LLVM
- // IR to determine this from the argument types, either. Best to do it
- // now, while we have enough information.
- Address DestAddr = EmitMSVAListRef(E->getArg(0));
- Address SrcAddr = EmitMSVAListRef(E->getArg(1));
-
- llvm::Type *BPP = Int8PtrPtrTy;
-
- DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
- DestAddr.getAlignment());
- SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
- SrcAddr.getAlignment());
-
- Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
- return Builder.CreateStore(ArgPtr, DestAddr);
- }
-
SmallVector<Value*, 4> Ops;
// Find out if any arguments are required to be integer constant expressions.
@@ -7228,39 +7336,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
AVX512PF,
AVX512VBMI,
AVX512IFMA,
+ AVX5124VNNIW, // TODO implement this fully
+ AVX5124FMAPS, // TODO implement this fully
+ AVX512VPOPCNTDQ,
MAX
};
- X86Features Feature = StringSwitch<X86Features>(FeatureStr)
- .Case("cmov", X86Features::CMOV)
- .Case("mmx", X86Features::MMX)
- .Case("popcnt", X86Features::POPCNT)
- .Case("sse", X86Features::SSE)
- .Case("sse2", X86Features::SSE2)
- .Case("sse3", X86Features::SSE3)
- .Case("ssse3", X86Features::SSSE3)
- .Case("sse4.1", X86Features::SSE4_1)
- .Case("sse4.2", X86Features::SSE4_2)
- .Case("avx", X86Features::AVX)
- .Case("avx2", X86Features::AVX2)
- .Case("sse4a", X86Features::SSE4_A)
- .Case("fma4", X86Features::FMA4)
- .Case("xop", X86Features::XOP)
- .Case("fma", X86Features::FMA)
- .Case("avx512f", X86Features::AVX512F)
- .Case("bmi", X86Features::BMI)
- .Case("bmi2", X86Features::BMI2)
- .Case("aes", X86Features::AES)
- .Case("pclmul", X86Features::PCLMUL)
- .Case("avx512vl", X86Features::AVX512VL)
- .Case("avx512bw", X86Features::AVX512BW)
- .Case("avx512dq", X86Features::AVX512DQ)
- .Case("avx512cd", X86Features::AVX512CD)
- .Case("avx512er", X86Features::AVX512ER)
- .Case("avx512pf", X86Features::AVX512PF)
- .Case("avx512vbmi", X86Features::AVX512VBMI)
- .Case("avx512ifma", X86Features::AVX512IFMA)
- .Default(X86Features::MAX);
+ X86Features Feature =
+ StringSwitch<X86Features>(FeatureStr)
+ .Case("cmov", X86Features::CMOV)
+ .Case("mmx", X86Features::MMX)
+ .Case("popcnt", X86Features::POPCNT)
+ .Case("sse", X86Features::SSE)
+ .Case("sse2", X86Features::SSE2)
+ .Case("sse3", X86Features::SSE3)
+ .Case("ssse3", X86Features::SSSE3)
+ .Case("sse4.1", X86Features::SSE4_1)
+ .Case("sse4.2", X86Features::SSE4_2)
+ .Case("avx", X86Features::AVX)
+ .Case("avx2", X86Features::AVX2)
+ .Case("sse4a", X86Features::SSE4_A)
+ .Case("fma4", X86Features::FMA4)
+ .Case("xop", X86Features::XOP)
+ .Case("fma", X86Features::FMA)
+ .Case("avx512f", X86Features::AVX512F)
+ .Case("bmi", X86Features::BMI)
+ .Case("bmi2", X86Features::BMI2)
+ .Case("aes", X86Features::AES)
+ .Case("pclmul", X86Features::PCLMUL)
+ .Case("avx512vl", X86Features::AVX512VL)
+ .Case("avx512bw", X86Features::AVX512BW)
+ .Case("avx512dq", X86Features::AVX512DQ)
+ .Case("avx512cd", X86Features::AVX512CD)
+ .Case("avx512er", X86Features::AVX512ER)
+ .Case("avx512pf", X86Features::AVX512PF)
+ .Case("avx512vbmi", X86Features::AVX512VBMI)
+ .Case("avx512ifma", X86Features::AVX512IFMA)
+ .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
+ .Default(X86Features::MAX);
assert(Feature != X86Features::MAX && "Invalid feature!");
// Matching the struct layout from the compiler-rt/libgcc structure that is
@@ -7269,8 +7382,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// unsigned int __cpu_type;
// unsigned int __cpu_subtype;
// unsigned int __cpu_features[1];
- llvm::Type *STy = llvm::StructType::get(
- Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
+ llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
+ llvm::ArrayType::get(Int32Ty, 1));
// Grab the global __cpu_model.
llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
@@ -7321,7 +7434,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
- return UndefValue::get(ConvertType(E->getType()));
+ // The x86 definition of "undef" is not the same as the LLVM definition
+ // (PR32176). We leave optimizing away an unnecessary zero constant to the
+ // IR optimizer and backend.
+ // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+ // value, we should use that here instead of a zero.
+ return llvm::Constant::getNullValue(ConvertType(E->getType()));
case X86::BI__builtin_ia32_vec_init_v8qi:
case X86::BI__builtin_ia32_vec_init_v4hi:
case X86::BI__builtin_ia32_vec_init_v2si:
@@ -7408,6 +7526,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask: {
return EmitX86MaskedStore(*this, Ops, 16);
}
+ case X86::BI__builtin_ia32_vpopcntd_512:
+ case X86::BI__builtin_ia32_vpopcntq_512: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
+ return Builder.CreateCall(F, Ops);
+ }
+ case X86::BI__builtin_ia32_cvtmask2b128:
+ case X86::BI__builtin_ia32_cvtmask2b256:
+ case X86::BI__builtin_ia32_cvtmask2b512:
+ case X86::BI__builtin_ia32_cvtmask2w128:
+ case X86::BI__builtin_ia32_cvtmask2w256:
+ case X86::BI__builtin_ia32_cvtmask2w512:
+ case X86::BI__builtin_ia32_cvtmask2d128:
+ case X86::BI__builtin_ia32_cvtmask2d256:
+ case X86::BI__builtin_ia32_cvtmask2d512:
+ case X86::BI__builtin_ia32_cvtmask2q128:
+ case X86::BI__builtin_ia32_cvtmask2q256:
+ case X86::BI__builtin_ia32_cvtmask2q512:
+ return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
+
case X86::BI__builtin_ia32_movdqa32store128_mask:
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
@@ -7788,6 +7926,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
// We can't handle 8-31 immediates with native IR, use the intrinsic.
+ // Except for predicates that create constants.
Intrinsic::ID ID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
@@ -7795,12 +7934,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
ID = Intrinsic::x86_sse_cmp_ps;
break;
case X86::BI__builtin_ia32_cmpps256:
+ // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+ // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+ if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
+ Value *Constant = (CC == 0xf || CC == 0x1f) ?
+ llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
+ llvm::Constant::getNullValue(Builder.getInt32Ty());
+ Value *Vec = Builder.CreateVectorSplat(
+ Ops[0]->getType()->getVectorNumElements(), Constant);
+ return Builder.CreateBitCast(Vec, Ops[0]->getType());
+ }
ID = Intrinsic::x86_avx_cmp_ps_256;
break;
case X86::BI__builtin_ia32_cmppd:
ID = Intrinsic::x86_sse2_cmp_pd;
break;
case X86::BI__builtin_ia32_cmppd256:
+ // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+ // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+ if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
+ Value *Constant = (CC == 0xf || CC == 0x1f) ?
+ llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
+ llvm::Constant::getNullValue(Builder.getInt64Ty());
+ Value *Vec = Builder.CreateVectorSplat(
+ Ops[0]->getType()->getVectorNumElements(), Constant);
+ return Builder.CreateBitCast(Vec, Ops[0]->getType());
+ }
ID = Intrinsic::x86_avx_cmp_pd_256;
break;
}
@@ -7881,13 +8040,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__faststorefence: {
return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::CrossThread);
+ llvm::SyncScope::System);
}
case X86::BI_ReadWriteBarrier:
case X86::BI_ReadBarrier:
case X86::BI_WriteBarrier: {
return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::SingleThread);
+ llvm::SyncScope::SingleThread);
}
case X86::BI_BitScanForward:
case X86::BI_BitScanForward64:
@@ -7922,6 +8081,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// instruction, but it will create a memset that won't be optimized away.
return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
}
+ case X86::BI__ud2:
+ // llvm.trap makes a ud2a instruction on x86.
+ return EmitTrapCall(Intrinsic::trap);
+ case X86::BI__int2c: {
+ // This syscall signals a driver assertion failure in x86 NT kernels.
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
+ llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoReturn);
+ CallSite CS = Builder.CreateCall(IA);
+ CS.setAttributes(NoReturnAttr);
+ return CS.getInstruction();
+ }
+ case X86::BI__readfsbyte:
+ case X86::BI__readfsword:
+ case X86::BI__readfsdword:
+ case X86::BI__readfsqword: {
+ llvm::Type *IntTy = ConvertType(E->getType());
+ Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
+ llvm::PointerType::get(IntTy, 257));
+ LoadInst *Load = Builder.CreateAlignedLoad(
+ IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
+ Load->setVolatile(true);
+ return Load;
+ }
+ case X86::BI__readgsbyte:
+ case X86::BI__readgsword:
+ case X86::BI__readgsdword:
+ case X86::BI__readgsqword: {
+ llvm::Type *IntTy = ConvertType(E->getType());
+ Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
+ llvm::PointerType::get(IntTy, 256));
+ LoadInst *Load = Builder.CreateAlignedLoad(
+ IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
+ Load->setVolatile(true);
+ return Load;
+ }
}
}
@@ -8279,6 +8477,80 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, Ops);
}
}
+
+ case PPC::BI__builtin_vsx_xxpermdi: {
+ ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+ assert(ArgCI && "Third arg must be constant integer!");
+
+ unsigned Index = ArgCI->getZExtValue();
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+ Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
+
+ // Element zero comes from the first input vector and element one comes from
+ // the second. The element indices within each vector are numbered in big
+ // endian order so the shuffle mask must be adjusted for this on little
+ // endian platforms (i.e. index is complemented and source vector reversed).
+ unsigned ElemIdx0;
+ unsigned ElemIdx1;
+ if (getTarget().isLittleEndian()) {
+ ElemIdx0 = (~Index & 1) + 2;
+ ElemIdx1 = (~Index & 2) >> 1;
+ } else { // BigEndian
+ ElemIdx0 = (Index & 2) >> 1;
+ ElemIdx1 = 2 + (Index & 1);
+ }
+
+ Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
+ ConstantInt::get(Int32Ty, ElemIdx1)};
+ Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+
+ Value *ShuffleCall =
+ Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+ QualType BIRetType = E->getType();
+ auto RetTy = ConvertType(BIRetType);
+ return Builder.CreateBitCast(ShuffleCall, RetTy);
+ }
+
+ case PPC::BI__builtin_vsx_xxsldwi: {
+ ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+ assert(ArgCI && "Third argument must be a compile time constant");
+ unsigned Index = ArgCI->getZExtValue() & 0x3;
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+ Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
+
+ // Create a shuffle mask
+ unsigned ElemIdx0;
+ unsigned ElemIdx1;
+ unsigned ElemIdx2;
+ unsigned ElemIdx3;
+ if (getTarget().isLittleEndian()) {
+ // Little endian element N comes from element 8+N-Index of the
+ // concatenated wide vector (of course, using modulo arithmetic on
+ // the total number of elements).
+ ElemIdx0 = (8 - Index) % 8;
+ ElemIdx1 = (9 - Index) % 8;
+ ElemIdx2 = (10 - Index) % 8;
+ ElemIdx3 = (11 - Index) % 8;
+ } else {
+ // Big endian ElemIdx<N> = Index + N
+ ElemIdx0 = Index;
+ ElemIdx1 = Index + 1;
+ ElemIdx2 = Index + 2;
+ ElemIdx3 = Index + 3;
+ }
+
+ Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
+ ConstantInt::get(Int32Ty, ElemIdx1),
+ ConstantInt::get(Int32Ty, ElemIdx2),
+ ConstantInt::get(Int32Ty, ElemIdx3)};
+
+ Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+ Value *ShuffleCall =
+ Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+ QualType BIRetType = E->getType();
+ auto RetTy = ConvertType(BIRetType);
+ return Builder.CreateBitCast(ShuffleCall, RetTy);
+ }
}
}
@@ -8326,6 +8598,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
+ llvm::SmallVector<llvm::Value *, 5> Args;
+ for (unsigned I = 0; I != 5; ++I)
+ Args.push_back(EmitScalarExpr(E->getArg(I)));
+ Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
+ Args[0]->getType());
+ return Builder.CreateCall(F, Args);
+ }
case AMDGPU::BI__builtin_amdgcn_div_fixup:
case AMDGPU::BI__builtin_amdgcn_div_fixupf:
case AMDGPU::BI__builtin_amdgcn_div_fixuph:
@@ -8391,7 +8671,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_classf:
case AMDGPU::BI__builtin_amdgcn_classh:
return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
-
+ case AMDGPU::BI__builtin_amdgcn_fmed3f:
+ case AMDGPU::BI__builtin_amdgcn_fmed3h:
+ return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
case AMDGPU::BI__builtin_amdgcn_read_exec: {
CallInst *CI = cast<CallInst>(
EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
@@ -8510,12 +8792,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {X, Undef});
}
+ case SystemZ::BI__builtin_s390_vfsqsb:
case SystemZ::BI__builtin_s390_vfsqdb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
return Builder.CreateCall(F, X);
}
+ case SystemZ::BI__builtin_s390_vfmasb:
case SystemZ::BI__builtin_s390_vfmadb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
@@ -8524,6 +8808,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
return Builder.CreateCall(F, {X, Y, Z});
}
+ case SystemZ::BI__builtin_s390_vfmssb:
case SystemZ::BI__builtin_s390_vfmsdb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
@@ -8533,12 +8818,35 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
}
+ case SystemZ::BI__builtin_s390_vfnmasb:
+ case SystemZ::BI__builtin_s390_vfnmadb: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *X = EmitScalarExpr(E->getArg(0));
+ Value *Y = EmitScalarExpr(E->getArg(1));
+ Value *Z = EmitScalarExpr(E->getArg(2));
+ Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
+ Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+ return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
+ }
+ case SystemZ::BI__builtin_s390_vfnmssb:
+ case SystemZ::BI__builtin_s390_vfnmsdb: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *X = EmitScalarExpr(E->getArg(0));
+ Value *Y = EmitScalarExpr(E->getArg(1));
+ Value *Z = EmitScalarExpr(E->getArg(2));
+ Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
+ Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+ Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
+ return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
+ }
+ case SystemZ::BI__builtin_s390_vflpsb:
case SystemZ::BI__builtin_s390_vflpdb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
return Builder.CreateCall(F, X);
}
+ case SystemZ::BI__builtin_s390_vflnsb:
case SystemZ::BI__builtin_s390_vflndb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
@@ -8546,6 +8854,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
}
+ case SystemZ::BI__builtin_s390_vfisb:
case SystemZ::BI__builtin_s390_vfidb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
@@ -8555,8 +8864,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
(void)IsConstM4; (void)IsConstM5;
- // Check whether this instance of vfidb can be represented via a LLVM
- // standard intrinsic. We only support some combinations of M4 and M5.
+ // Check whether this instance can be represented via a LLVM standard
+ // intrinsic. We only support some combinations of M4 and M5.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
switch (M4.getZExtValue()) {
default: break;
@@ -8581,11 +8890,76 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(ID, ResultType);
return Builder.CreateCall(F, X);
}
- Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
+ switch (BuiltinID) {
+ case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
+ case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
+ default: llvm_unreachable("Unknown BuiltinID");
+ }
+ Function *F = CGM.getIntrinsic(ID);
Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
return Builder.CreateCall(F, {X, M4Value, M5Value});
}
+ case SystemZ::BI__builtin_s390_vfmaxsb:
+ case SystemZ::BI__builtin_s390_vfmaxdb: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *X = EmitScalarExpr(E->getArg(0));
+ Value *Y = EmitScalarExpr(E->getArg(1));
+ // Constant-fold the M4 mask argument.
+ llvm::APSInt M4;
+ bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
+ assert(IsConstM4 && "Constant arg isn't actually constant?");
+ (void)IsConstM4;
+ // Check whether this instance can be represented via a LLVM standard
+ // intrinsic. We only support some values of M4.
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ switch (M4.getZExtValue()) {
+ default: break;
+ case 4: ID = Intrinsic::maxnum; break;
+ }
+ if (ID != Intrinsic::not_intrinsic) {
+ Function *F = CGM.getIntrinsic(ID, ResultType);
+ return Builder.CreateCall(F, {X, Y});
+ }
+ switch (BuiltinID) {
+ case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
+ case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
+ default: llvm_unreachable("Unknown BuiltinID");
+ }
+ Function *F = CGM.getIntrinsic(ID);
+ Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
+ return Builder.CreateCall(F, {X, Y, M4Value});
+ }
+ case SystemZ::BI__builtin_s390_vfminsb:
+ case SystemZ::BI__builtin_s390_vfmindb: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *X = EmitScalarExpr(E->getArg(0));
+ Value *Y = EmitScalarExpr(E->getArg(1));
+ // Constant-fold the M4 mask argument.
+ llvm::APSInt M4;
+ bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
+ assert(IsConstM4 && "Constant arg isn't actually constant?");
+ (void)IsConstM4;
+ // Check whether this instance can be represented via a LLVM standard
+ // intrinsic. We only support some values of M4.
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ switch (M4.getZExtValue()) {
+ default: break;
+ case 4: ID = Intrinsic::minnum; break;
+ }
+ if (ID != Intrinsic::not_intrinsic) {
+ Function *F = CGM.getIntrinsic(ID, ResultType);
+ return Builder.CreateCall(F, {X, Y});
+ }
+ switch (BuiltinID) {
+ case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
+ case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
+ default: llvm_unreachable("Unknown BuiltinID");
+ }
+ Function *F = CGM.getIntrinsic(ID);
+ Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
+ return Builder.CreateCall(F, {X, Y, M4Value});
+ }
// Vector intrisincs that output the post-instruction CC value.
@@ -8652,10 +9026,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
INTRINSIC_WITH_CC(s390_vstrczhs);
INTRINSIC_WITH_CC(s390_vstrczfs);
+ INTRINSIC_WITH_CC(s390_vfcesbs);
INTRINSIC_WITH_CC(s390_vfcedbs);
+ INTRINSIC_WITH_CC(s390_vfchsbs);
INTRINSIC_WITH_CC(s390_vfchdbs);
+ INTRINSIC_WITH_CC(s390_vfchesbs);
INTRINSIC_WITH_CC(s390_vfchedbs);
+ INTRINSIC_WITH_CC(s390_vftcisb);
INTRINSIC_WITH_CC(s390_vftcidb);
#undef INTRINSIC_WITH_CC
@@ -8669,9 +9047,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
auto MakeLdg = [&](unsigned IntrinsicID) {
Value *Ptr = EmitScalarExpr(E->getArg(0));
- AlignmentSource AlignSource;
clang::CharUnits Align =
- getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
+ getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
return Builder.CreateCall(
CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
Ptr->getType()}),
@@ -8923,6 +9300,16 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
return Builder.CreateCall(Callee, X);
}
+ case WebAssembly::BI__builtin_wasm_throw: {
+ Value *Tag = EmitScalarExpr(E->getArg(0));
+ Value *Obj = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
+ return Builder.CreateCall(Callee, {Tag, Obj});
+ }
+ case WebAssembly::BI__builtin_wasm_rethrow: {
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
+ return Builder.CreateCall(Callee);
+ }
default:
return nullptr;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
index 83febcb..d24ef0a 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
@@ -15,7 +15,7 @@
#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
@@ -265,7 +265,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
"__cudaRegisterFatBinary");
// struct { int magic, int version, void * gpu_binary, void * dont_care };
llvm::StructType *FatbinWrapperTy =
- llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy, nullptr);
+ llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
llvm::Function *ModuleCtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
index 59010f4..0f3141a 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
@@ -256,7 +256,7 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor(
return GetOrCreateLLVMFunction(
getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer,
- /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeSet(), IsForDefinition);
+ /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition);
}
static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp
index df75a7d..0332586 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp
@@ -30,38 +30,9 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) {
}
bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const {
- // If RD has a non-trivial move or copy constructor, we cannot copy the
- // argument.
- if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor())
- return false;
-
- // If RD has a non-trivial destructor, we cannot copy the argument.
- if (RD->hasNonTrivialDestructor())
- return false;
-
// We can only copy the argument if there exists at least one trivial,
// non-deleted copy or move constructor.
- // FIXME: This assumes that all lazily declared copy and move constructors are
- // not deleted. This assumption might not be true in some corner cases.
- bool CopyDeleted = false;
- bool MoveDeleted = false;
- for (const CXXConstructorDecl *CD : RD->ctors()) {
- if (CD->isCopyConstructor() || CD->isMoveConstructor()) {
- assert(CD->isTrivial());
- // We had at least one undeleted trivial copy or move ctor. Return
- // directly.
- if (!CD->isDeleted())
- return true;
- if (CD->isCopyConstructor())
- CopyDeleted = true;
- else
- MoveDeleted = true;
- }
- }
-
- // If all trivial copy and move constructors are deleted, we cannot copy the
- // argument.
- return !(CopyDeleted && MoveDeleted);
+ return RD->canPassInRegisters();
}
llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) {
@@ -159,10 +130,10 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
// FIXME: I'm not entirely sure I like using a fake decl just for code
// generation. Maybe we can come up with a better way?
- ImplicitParamDecl *ThisDecl
- = ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(),
- &CGM.getContext().Idents.get("this"),
- MD->getThisType(CGM.getContext()));
+ auto *ThisDecl = ImplicitParamDecl::Create(
+ CGM.getContext(), nullptr, MD->getLocation(),
+ &CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()),
+ ImplicitParamDecl::CXXThis);
params.push_back(ThisDecl);
CGF.CXXABIThisDecl = ThisDecl;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
index d53fd4c..7b912e3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
@@ -291,11 +291,26 @@ public:
/// Emit constructor variants required by this ABI.
virtual void EmitCXXConstructors(const CXXConstructorDecl *D) = 0;
+ /// Notes how many arguments were added to the beginning (Prefix) and ending
+ /// (Suffix) of an arg list.
+ ///
+ /// Note that Prefix actually refers to the number of args *after* the first
+ /// one: `this` arguments always come first.
+ struct AddedStructorArgs {
+ unsigned Prefix = 0;
+ unsigned Suffix = 0;
+ AddedStructorArgs() = default;
+ AddedStructorArgs(unsigned P, unsigned S) : Prefix(P), Suffix(S) {}
+ static AddedStructorArgs prefix(unsigned N) { return {N, 0}; }
+ static AddedStructorArgs suffix(unsigned N) { return {0, N}; }
+ };
+
/// Build the signature of the given constructor or destructor variant by
/// adding any required parameters. For convenience, ArgTys has been
/// initialized with the type of 'this'.
- virtual void buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
- SmallVectorImpl<CanQualType> &ArgTys) = 0;
+ virtual AddedStructorArgs
+ buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+ SmallVectorImpl<CanQualType> &ArgTys) = 0;
/// Returns true if the given destructor type should be emitted as a linkonce
/// delegating thunk, regardless of whether the dtor is defined in this TU or
@@ -355,9 +370,9 @@ public:
/// Add any ABI-specific implicit arguments needed to call a constructor.
///
- /// \return The number of args added to the call, which is typically zero or
- /// one.
- virtual unsigned
+ /// \return The number of arguments added at the beginning and end of the
+ /// call, which is typically zero or one.
+ virtual AddedStructorArgs
addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
CXXCtorType Type, bool ForVirtualBase,
bool Delegating, CallArgList &Args) = 0;
@@ -377,7 +392,7 @@ public:
isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
CodeGenFunction::VPtr Vptr) = 0;
- /// Checks if ABI requires to initilize vptrs for given dynamic class.
+ /// Checks if ABI requires to initialize vptrs for given dynamic class.
virtual bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) = 0;
/// Get the address point of the vtable for the given base subobject.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
index c7c61e0..316bf44 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
@@ -50,7 +50,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
case CC_X86FastCall: return llvm::CallingConv::X86_FastCall;
case CC_X86RegCall: return llvm::CallingConv::X86_RegCall;
case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall;
- case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64;
+ case CC_Win64: return llvm::CallingConv::Win64;
case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV;
case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS;
case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
@@ -101,39 +101,64 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
FTNP->getExtInfo(), {}, RequiredArgs(0));
}
-/// Adds the formal paramaters in FPT to the given prefix. If any parameter in
+static void addExtParameterInfosForCall(
+ llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+ const FunctionProtoType *proto,
+ unsigned prefixArgs,
+ unsigned totalArgs) {
+ assert(proto->hasExtParameterInfos());
+ assert(paramInfos.size() <= prefixArgs);
+ assert(proto->getNumParams() + prefixArgs <= totalArgs);
+
+ paramInfos.reserve(totalArgs);
+
+ // Add default infos for any prefix args that don't already have infos.
+ paramInfos.resize(prefixArgs);
+
+ // Add infos for the prototype.
+ for (const auto &ParamInfo : proto->getExtParameterInfos()) {
+ paramInfos.push_back(ParamInfo);
+ // pass_object_size params have no parameter info.
+ if (ParamInfo.hasPassObjectSize())
+ paramInfos.emplace_back();
+ }
+
+ assert(paramInfos.size() <= totalArgs &&
+ "Did we forget to insert pass_object_size args?");
+ // Add default infos for the variadic and/or suffix arguments.
+ paramInfos.resize(totalArgs);
+}
+
+/// Adds the formal parameters in FPT to the given prefix. If any parameter in
/// FPT has pass_object_size attrs, then we'll add parameters for those, too.
static void appendParameterTypes(const CodeGenTypes &CGT,
SmallVectorImpl<CanQualType> &prefix,
SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
- CanQual<FunctionProtoType> FPT,
- const FunctionDecl *FD) {
- // Fill out paramInfos.
- if (FPT->hasExtParameterInfos() || !paramInfos.empty()) {
- assert(paramInfos.size() <= prefix.size());
- auto protoParamInfos = FPT->getExtParameterInfos();
- paramInfos.reserve(prefix.size() + protoParamInfos.size());
- paramInfos.resize(prefix.size());
- paramInfos.append(protoParamInfos.begin(), protoParamInfos.end());
- }
-
- // Fast path: unknown target.
- if (FD == nullptr) {
+ CanQual<FunctionProtoType> FPT) {
+ // Fast path: don't touch param info if we don't need to.
+ if (!FPT->hasExtParameterInfos()) {
+ assert(paramInfos.empty() &&
+ "We have paramInfos, but the prototype doesn't?");
prefix.append(FPT->param_type_begin(), FPT->param_type_end());
return;
}
- // In the vast majority cases, we'll have precisely FPT->getNumParams()
+ unsigned PrefixSize = prefix.size();
+ // In the vast majority of cases, we'll have precisely FPT->getNumParams()
// parameters; the only thing that can change this is the presence of
// pass_object_size. So, we preallocate for the common case.
prefix.reserve(prefix.size() + FPT->getNumParams());
- assert(FD->getNumParams() == FPT->getNumParams());
+ auto ExtInfos = FPT->getExtParameterInfos();
+ assert(ExtInfos.size() == FPT->getNumParams());
for (unsigned I = 0, E = FPT->getNumParams(); I != E; ++I) {
prefix.push_back(FPT->getParamType(I));
- if (FD->getParamDecl(I)->hasAttr<PassObjectSizeAttr>())
+ if (ExtInfos[I].hasPassObjectSize())
prefix.push_back(CGT.getContext().getSizeType());
}
+
+ addExtParameterInfosForCall(paramInfos, FPT.getTypePtr(), PrefixSize,
+ prefix.size());
}
/// Arrange the LLVM function layout for a value of the given function
@@ -147,7 +172,7 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
RequiredArgs Required =
RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD);
// FIXME: Kill copy.
- appendParameterTypes(CGT, prefix, paramInfos, FTP, FD);
+ appendParameterTypes(CGT, prefix, paramInfos, FTP);
CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
@@ -193,7 +218,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
return CC_IntelOclBicc;
if (D->hasAttr<MSABIAttr>())
- return IsWindows ? CC_C : CC_X86_64Win64;
+ return IsWindows ? CC_C : CC_Win64;
if (D->hasAttr<SysVABIAttr>())
return IsWindows ? CC_X86_64SysV : CC_C;
@@ -286,9 +311,19 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
// Add the formal parameters.
if (PassParams)
- appendParameterTypes(*this, argTypes, paramInfos, FTP, MD);
-
- TheCXXABI.buildStructorSignature(MD, Type, argTypes);
+ appendParameterTypes(*this, argTypes, paramInfos, FTP);
+
+ CGCXXABI::AddedStructorArgs AddedArgs =
+ TheCXXABI.buildStructorSignature(MD, Type, argTypes);
+ if (!paramInfos.empty()) {
+ // Note: prefix implies after the first param.
+ if (AddedArgs.Prefix)
+ paramInfos.insert(paramInfos.begin() + 1, AddedArgs.Prefix,
+ FunctionProtoType::ExtParameterInfo{});
+ if (AddedArgs.Suffix)
+ paramInfos.append(AddedArgs.Suffix,
+ FunctionProtoType::ExtParameterInfo{});
+ }
RequiredArgs required =
(PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size())
@@ -321,26 +356,6 @@ getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) {
return argTypes;
}
-static void addExtParameterInfosForCall(
- llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
- const FunctionProtoType *proto,
- unsigned prefixArgs,
- unsigned totalArgs) {
- assert(proto->hasExtParameterInfos());
- assert(paramInfos.size() <= prefixArgs);
- assert(proto->getNumParams() + prefixArgs <= totalArgs);
-
- // Add default infos for any prefix args that don't already have infos.
- paramInfos.resize(prefixArgs);
-
- // Add infos for the prototype.
- auto protoInfos = proto->getExtParameterInfos();
- paramInfos.append(protoInfos.begin(), protoInfos.end());
-
- // Add default infos for the variadic arguments.
- paramInfos.resize(totalArgs);
-}
-
static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16>
getExtParameterInfosForCall(const FunctionProtoType *proto,
unsigned prefixArgs, unsigned totalArgs) {
@@ -352,18 +367,31 @@ getExtParameterInfosForCall(const FunctionProtoType *proto,
}
/// Arrange a call to a C++ method, passing the given arguments.
+///
+/// ExtraPrefixArgs is the number of ABI-specific args passed after the `this`
+/// parameter.
+/// ExtraSuffixArgs is the number of ABI-specific args passed at the end of
+/// args.
+/// PassProtoArgs indicates whether `args` has args for the parameters in the
+/// given CXXConstructorDecl.
const CGFunctionInfo &
CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
const CXXConstructorDecl *D,
CXXCtorType CtorKind,
- unsigned ExtraArgs) {
+ unsigned ExtraPrefixArgs,
+ unsigned ExtraSuffixArgs,
+ bool PassProtoArgs) {
// FIXME: Kill copy.
SmallVector<CanQualType, 16> ArgTypes;
for (const auto &Arg : args)
ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
+ // +1 for implicit this, which should always be args[0].
+ unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs;
+
CanQual<FunctionProtoType> FPT = GetFormalType(D);
- RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D);
+ RequiredArgs Required =
+ RequiredArgs::forPrototypePlus(FPT, TotalPrefixArgs + ExtraSuffixArgs, D);
GlobalDecl GD(D, CtorKind);
CanQualType ResultType = TheCXXABI.HasThisReturn(GD)
? ArgTypes.front()
@@ -372,8 +400,14 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
: Context.VoidTy;
FunctionType::ExtInfo Info = FPT->getExtInfo();
- auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs,
- ArgTypes.size());
+ llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> ParamInfos;
+ // If the prototype args are elided, we should only have ABI-specific args,
+ // which never have param info.
+ if (PassProtoArgs && FPT->hasExtParameterInfos()) {
+ // ABI-specific suffix arguments are treated the same as variadic arguments.
+ addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs,
+ ArgTypes.size());
+ }
return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true,
/*chainCall=*/false, ArgTypes, Info,
ParamInfos, Required);
@@ -617,15 +651,20 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
}
/// Arrange a call to a C++ method, passing the given arguments.
+///
+/// numPrefixArgs is the number of ABI-specific prefix arguments we have. It
+/// does not count `this`.
const CGFunctionInfo &
CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
const FunctionProtoType *proto,
- RequiredArgs required) {
- unsigned numRequiredArgs =
- (proto->isVariadic() ? required.getNumRequiredArgs() : args.size());
- unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams();
+ RequiredArgs required,
+ unsigned numPrefixArgs) {
+ assert(numPrefixArgs + 1 <= args.size() &&
+ "Emitting a call with less args than the required prefix?");
+ // Add one to account for `this`. It's a bit awkward here, but we don't count
+ // `this` in similar places elsewhere.
auto paramInfos =
- getExtParameterInfosForCall(proto, numPrefixArgs, args.size());
+ getExtParameterInfosForCall(proto, numPrefixArgs + 1, args.size());
// FIXME: Kill copy.
auto argTypes = getArgTypesForCall(Context, args);
@@ -668,6 +707,12 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature,
signature.getRequiredArgs());
}
+namespace clang {
+namespace CodeGen {
+void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI);
+}
+}
+
/// Arrange the argument and result information for an abstract value
/// of a given function type. This is the method which all of the
/// above functions ultimately defer to.
@@ -680,7 +725,7 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
RequiredArgs required) {
assert(std::all_of(argTypes.begin(), argTypes.end(),
- std::mem_fun_ref(&CanQualType::isCanonicalAsParam)));
+ [](CanQualType T) { return T.isCanonicalAsParam(); }));
// Lookup or create unique function info.
llvm::FoldingSetNodeID ID;
@@ -702,12 +747,16 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
bool inserted = FunctionsBeingProcessed.insert(FI).second;
(void)inserted;
assert(inserted && "Recursively being processed?");
-
+
// Compute ABI information.
- if (info.getCC() != CC_Swift) {
- getABIInfo().computeInfo(*FI);
- } else {
+ if (CC == llvm::CallingConv::SPIR_KERNEL) {
+ // Force target independent argument handling for the host visible
+ // kernel functions.
+ computeSPIRKernelABIInfo(CGM, *FI);
+ } else if (info.getCC() == CC_Swift) {
swiftcall::computeABIInfo(CGM, *FI);
+ } else {
+ getABIInfo().computeInfo(*FI);
}
// Loop over all of the computed argument and return value info. If any of
@@ -749,6 +798,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
FI->ChainCall = chainCall;
FI->NoReturn = info.getNoReturn();
FI->ReturnsRetained = info.getProducesResult();
+ FI->NoCallerSavedRegs = info.getNoCallerSavedRegs();
FI->Required = required;
FI->HasRegParm = info.getHasRegParm();
FI->RegParm = info.getRegParm();
@@ -1247,7 +1297,7 @@ static void CreateCoercedStore(llvm::Value *Src,
// If store is legal, just bitcast the src pointer.
if (SrcSize <= DstSize) {
- Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
+ Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
BuildAggStore(CGF, Src, Dst, DstIsVolatile);
} else {
// Otherwise do coercion through memory. This is stupid, but
@@ -1547,9 +1597,10 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
case ABIArgInfo::Indirect: {
assert(NumIRArgs == 1);
- // indirect arguments are always on the stack, which is addr space #0.
+ // indirect arguments are always on the stack, which is alloca addr space.
llvm::Type *LTy = ConvertTypeForMem(it->type);
- ArgTypes[FirstIRArg] = LTy->getPointerTo();
+ ArgTypes[FirstIRArg] = LTy->getPointerTo(
+ CGM.getDataLayout().getAllocaAddrSpace());
break;
}
@@ -1620,15 +1671,111 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
+void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs) {
+ // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
+ if (!HasOptnone) {
+ if (CodeGenOpts.OptimizeSize)
+ FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
+ if (CodeGenOpts.OptimizeSize == 2)
+ FuncAttrs.addAttribute(llvm::Attribute::MinSize);
+ }
+
+ if (CodeGenOpts.DisableRedZone)
+ FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
+ if (CodeGenOpts.NoImplicitFloat)
+ FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+
+ if (AttrOnCallSite) {
+ // Attributes that should go on the call site only.
+ if (!CodeGenOpts.SimplifyLibCalls ||
+ CodeGenOpts.isNoBuiltinFunc(Name.data()))
+ FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
+ if (!CodeGenOpts.TrapFuncName.empty())
+ FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
+ } else {
+ // Attributes that should go on the function, but not the call site.
+ if (!CodeGenOpts.DisableFPElim) {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+ } else if (CodeGenOpts.OmitLeafFramePointer) {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+ FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+ } else {
+ FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
+ FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+ }
+
+ FuncAttrs.addAttribute("less-precise-fpmad",
+ llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
+
+ if (!CodeGenOpts.FPDenormalMode.empty())
+ FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode);
+
+ FuncAttrs.addAttribute("no-trapping-math",
+ llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+
+ // TODO: Are these all needed?
+ // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
+ FuncAttrs.addAttribute("no-infs-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
+ FuncAttrs.addAttribute("no-nans-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
+ FuncAttrs.addAttribute("unsafe-fp-math",
+ llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
+ FuncAttrs.addAttribute("use-soft-float",
+ llvm::toStringRef(CodeGenOpts.SoftFloat));
+ FuncAttrs.addAttribute("stack-protector-buffer-size",
+ llvm::utostr(CodeGenOpts.SSPBufferSize));
+ FuncAttrs.addAttribute("no-signed-zeros-fp-math",
+ llvm::toStringRef(CodeGenOpts.NoSignedZeros));
+ FuncAttrs.addAttribute(
+ "correctly-rounded-divide-sqrt-fp-math",
+ llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
+
+ // TODO: Reciprocal estimate codegen options should apply to instructions?
+ std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
+ if (!Recips.empty())
+ FuncAttrs.addAttribute("reciprocal-estimates",
+ llvm::join(Recips.begin(), Recips.end(), ","));
+
+ if (CodeGenOpts.StackRealignment)
+ FuncAttrs.addAttribute("stackrealign");
+ if (CodeGenOpts.Backchain)
+ FuncAttrs.addAttribute("backchain");
+ }
+
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+ // Conservatively, mark all functions and calls in CUDA as convergent
+ // (meaning, they may call an intrinsically convergent op, such as
+ // __syncthreads(), and so can't have certain optimizations applied around
+ // them). LLVM will remove this attribute where it safely can.
+ FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+ // Exceptions aren't supported in CUDA device code.
+ FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+
+ // Respect -fcuda-flush-denormals-to-zero.
+ if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+ FuncAttrs.addAttribute("nvptx-f32ftz", "true");
+ }
+}
+
+void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
+ llvm::AttrBuilder FuncAttrs;
+ ConstructDefaultFnAttrList(F.getName(),
+ F.hasFnAttribute(llvm::Attribute::OptimizeNone),
+ /* AttrOnCallsite = */ false, FuncAttrs);
+ F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs);
+}
+
void CodeGenModule::ConstructAttributeList(
StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo,
- AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) {
+ llvm::AttributeList &AttrList, unsigned &CallingConv, bool AttrOnCallSite) {
llvm::AttrBuilder FuncAttrs;
llvm::AttrBuilder RetAttrs;
- bool HasOptnone = false;
CallingConv = FI.getEffectiveCallingConvention();
-
if (FI.isNoReturn())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
@@ -1639,7 +1786,7 @@ void CodeGenModule::ConstructAttributeList(
const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
- bool HasAnyX86InterruptAttr = false;
+ bool HasOptnone = false;
// FIXME: handle sseregparm someday...
if (TargetDecl) {
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1648,6 +1795,8 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
if (TargetDecl->hasAttr<NoReturnAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
+ if (TargetDecl->hasAttr<ColdAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::Cold);
if (TargetDecl->hasAttr<NoDuplicateAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
@@ -1678,8 +1827,9 @@ void CodeGenModule::ConstructAttributeList(
RetAttrs.addAttribute(llvm::Attribute::NoAlias);
if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
RetAttrs.addAttribute(llvm::Attribute::NonNull);
+ if (TargetDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>())
+ FuncAttrs.addAttribute("no_caller_saved_registers");
- HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>();
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
Optional<unsigned> NumElemsParam;
@@ -1691,86 +1841,19 @@ void CodeGenModule::ConstructAttributeList(
}
}
- // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
- if (!HasOptnone) {
- if (CodeGenOpts.OptimizeSize)
- FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
- if (CodeGenOpts.OptimizeSize == 2)
- FuncAttrs.addAttribute(llvm::Attribute::MinSize);
- }
+ ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs);
- if (CodeGenOpts.DisableRedZone)
- FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
- if (CodeGenOpts.NoImplicitFloat)
- FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
if (CodeGenOpts.EnableSegmentedStacks &&
!(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
FuncAttrs.addAttribute("split-stack");
- if (AttrOnCallSite) {
- // Attributes that should go on the call site only.
- if (!CodeGenOpts.SimplifyLibCalls ||
- CodeGenOpts.isNoBuiltinFunc(Name.data()))
- FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
- if (!CodeGenOpts.TrapFuncName.empty())
- FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
- } else {
- // Attributes that should go on the function, but not the call site.
- if (!CodeGenOpts.DisableFPElim) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- } else if (CodeGenOpts.OmitLeafFramePointer) {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
- } else {
- FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
- FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
- }
-
+ if (!AttrOnCallSite) {
bool DisableTailCalls =
- CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr ||
- (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
- FuncAttrs.addAttribute(
- "disable-tail-calls",
- llvm::toStringRef(DisableTailCalls));
-
- FuncAttrs.addAttribute("less-precise-fpmad",
- llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
-
- if (!CodeGenOpts.FPDenormalMode.empty())
- FuncAttrs.addAttribute("denormal-fp-math",
- CodeGenOpts.FPDenormalMode);
-
- FuncAttrs.addAttribute("no-trapping-math",
- llvm::toStringRef(CodeGenOpts.NoTrappingMath));
-
- // TODO: Are these all needed?
- // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
- FuncAttrs.addAttribute("no-infs-fp-math",
- llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
- FuncAttrs.addAttribute("no-nans-fp-math",
- llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
- FuncAttrs.addAttribute("unsafe-fp-math",
- llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
- FuncAttrs.addAttribute("use-soft-float",
- llvm::toStringRef(CodeGenOpts.SoftFloat));
- FuncAttrs.addAttribute("stack-protector-buffer-size",
- llvm::utostr(CodeGenOpts.SSPBufferSize));
- FuncAttrs.addAttribute("no-signed-zeros-fp-math",
- llvm::toStringRef(CodeGenOpts.NoSignedZeros));
- FuncAttrs.addAttribute(
- "correctly-rounded-divide-sqrt-fp-math",
- llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
-
- // TODO: Reciprocal estimate codegen options should apply to instructions?
- std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
- if (!Recips.empty())
- FuncAttrs.addAttribute("reciprocal-estimates",
- llvm::join(Recips.begin(), Recips.end(), ","));
-
- if (CodeGenOpts.StackRealignment)
- FuncAttrs.addAttribute("stackrealign");
- if (CodeGenOpts.Backchain)
- FuncAttrs.addAttribute("backchain");
+ CodeGenOpts.DisableTailCalls ||
+ (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
+ TargetDecl->hasAttr<AnyX86InterruptAttr>()));
+ FuncAttrs.addAttribute("disable-tail-calls",
+ llvm::toStringRef(DisableTailCalls));
// Add target-cpu and target-features attributes to functions. If
// we have a decl for the function and it has a target attribute then
@@ -1794,8 +1877,8 @@ void CodeGenModule::ConstructAttributeList(
// the function.
const auto *TD = FD->getAttr<TargetAttr>();
TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
- if (ParsedAttr.second != "")
- TargetCPU = ParsedAttr.second;
+ if (ParsedAttr.Architecture != "")
+ TargetCPU = ParsedAttr.Architecture;
if (TargetCPU != "")
FuncAttrs.addAttribute("target-cpu", TargetCPU);
if (!Features.empty()) {
@@ -1819,21 +1902,6 @@ void CodeGenModule::ConstructAttributeList(
}
}
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
- // Conservatively, mark all functions and calls in CUDA as convergent
- // (meaning, they may call an intrinsically convergent op, such as
- // __syncthreads(), and so can't have certain optimizations applied around
- // them). LLVM will remove this attribute where it safely can.
- FuncAttrs.addAttribute(llvm::Attribute::Convergent);
-
- // Exceptions aren't supported in CUDA device code.
- FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
-
- // Respect -fcuda-flush-denormals-to-zero.
- if (getLangOpts().CUDADeviceFlushDenormalsToZero)
- FuncAttrs.addAttribute("nvptx-f32ftz", "true");
- }
-
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
QualType RetTy = FI.getReturnType();
@@ -1876,13 +1944,8 @@ void CodeGenModule::ConstructAttributeList(
RetAttrs.addAttribute(llvm::Attribute::NonNull);
}
- // Attach return attributes.
- if (RetAttrs.hasAttributes()) {
- PAL.push_back(llvm::AttributeSet::get(
- getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs));
- }
-
bool hasUsedSRet = false;
+ SmallVector<llvm::AttributeSet, 4> ArgAttrs(IRFunctionArgs.totalIRArgs());
// Attach attributes to sret.
if (IRFunctionArgs.hasSRetArg()) {
@@ -1891,16 +1954,16 @@ void CodeGenModule::ConstructAttributeList(
hasUsedSRet = true;
if (RetAI.getInReg())
SRETAttrs.addAttribute(llvm::Attribute::InReg);
- PAL.push_back(llvm::AttributeSet::get(
- getLLVMContext(), IRFunctionArgs.getSRetArgNo() + 1, SRETAttrs));
+ ArgAttrs[IRFunctionArgs.getSRetArgNo()] =
+ llvm::AttributeSet::get(getLLVMContext(), SRETAttrs);
}
// Attach attributes to inalloca argument.
if (IRFunctionArgs.hasInallocaArg()) {
llvm::AttrBuilder Attrs;
Attrs.addAttribute(llvm::Attribute::InAlloca);
- PAL.push_back(llvm::AttributeSet::get(
- getLLVMContext(), IRFunctionArgs.getInallocaArgNo() + 1, Attrs));
+ ArgAttrs[IRFunctionArgs.getInallocaArgNo()] =
+ llvm::AttributeSet::get(getLLVMContext(), Attrs);
}
unsigned ArgNo = 0;
@@ -1913,10 +1976,12 @@ void CodeGenModule::ConstructAttributeList(
// Add attribute for padding argument, if necessary.
if (IRFunctionArgs.hasPaddingArg(ArgNo)) {
- if (AI.getPaddingInReg())
- PAL.push_back(llvm::AttributeSet::get(
- getLLVMContext(), IRFunctionArgs.getPaddingArgNo(ArgNo) + 1,
- llvm::Attribute::InReg));
+ if (AI.getPaddingInReg()) {
+ ArgAttrs[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
+ llvm::AttributeSet::get(
+ getLLVMContext(),
+ llvm::AttrBuilder().addAttribute(llvm::Attribute::InReg));
+ }
}
// 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
@@ -2031,17 +2096,15 @@ void CodeGenModule::ConstructAttributeList(
unsigned FirstIRArg, NumIRArgs;
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
for (unsigned i = 0; i < NumIRArgs; i++)
- PAL.push_back(llvm::AttributeSet::get(getLLVMContext(),
- FirstIRArg + i + 1, Attrs));
+ ArgAttrs[FirstIRArg + i] =
+ llvm::AttributeSet::get(getLLVMContext(), Attrs);
}
}
assert(ArgNo == FI.arg_size());
- if (FuncAttrs.hasAttributes())
- PAL.push_back(llvm::
- AttributeSet::get(getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- FuncAttrs));
+ AttrList = llvm::AttributeList::get(
+ getLLVMContext(), llvm::AttributeSet::get(getLLVMContext(), FuncAttrs),
+ llvm::AttributeSet::get(getLLVMContext(), RetAttrs), ArgAttrs);
}
/// An argument came in as a promoted argument; demote it back to its
@@ -2152,8 +2215,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (IRFunctionArgs.hasSRetArg()) {
auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]);
AI->setName("agg.result");
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
- llvm::Attribute::NoAlias));
+ AI->addAttr(llvm::Attribute::NoAlias);
}
// Track if we received the parameter as a pointer (indirect, byval, or
@@ -2244,9 +2306,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
PVD->getFunctionScopeIndex()))
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NonNull));
+ AI->addAttr(llvm::Attribute::NonNull);
QualType OTy = PVD->getOriginalType();
if (const auto *ArrTy =
@@ -2263,12 +2323,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::AttrBuilder Attrs;
Attrs.addDereferenceableAttr(
getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1, Attrs));
+ AI->addAttrs(Attrs);
} else if (getContext().getTargetAddressSpace(ETy) == 0) {
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NonNull));
+ AI->addAttr(llvm::Attribute::NonNull);
}
}
} else if (const auto *ArrTy =
@@ -2278,35 +2335,26 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// we know that it must be nonnull.
if (ArrTy->getSizeModifier() == VariableArrayType::Static &&
!getContext().getTargetAddressSpace(ArrTy->getElementType()))
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NonNull));
+ AI->addAttr(llvm::Attribute::NonNull);
}
const auto *AVAttr = PVD->getAttr<AlignValueAttr>();
if (!AVAttr)
if (const auto *TOTy = dyn_cast<TypedefType>(OTy))
AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>();
- if (AVAttr) {
+ if (AVAttr) {
llvm::Value *AlignmentValue =
EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI =
cast<llvm::ConstantInt>(AlignmentValue);
- unsigned Alignment =
- std::min((unsigned) AlignmentCI->getZExtValue(),
- +llvm::Value::MaximumAlignment);
-
- llvm::AttrBuilder Attrs;
- Attrs.addAlignmentAttr(Alignment);
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1, Attrs));
+ unsigned Alignment = std::min((unsigned)AlignmentCI->getZExtValue(),
+ +llvm::Value::MaximumAlignment);
+ AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment));
}
}
if (Arg->getType().isRestrictQualified())
- AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
- AI->getArgNo() + 1,
- llvm::Attribute::NoAlias));
+ AI->addAttr(llvm::Attribute::NoAlias);
// LLVM expects swifterror parameters to be used in very restricted
// ways. Copy the value into a less-restricted temporary.
@@ -2364,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
Address AddrToStoreInto = Address::invalid();
if (SrcSize <= DstSize) {
- AddrToStoreInto =
- Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+ AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
} else {
AddrToStoreInto =
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
@@ -2858,19 +2905,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
llvm::Instruction *Ret;
if (RV) {
- if (CurCodeDecl && SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) {
- if (auto RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>()) {
- SanitizerScope SanScope(this);
- llvm::Value *Cond = Builder.CreateICmpNE(
- RV, llvm::Constant::getNullValue(RV->getType()));
- llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(EndLoc),
- EmitCheckSourceLocation(RetNNAttr->getLocation()),
- };
- EmitCheck(std::make_pair(Cond, SanitizerKind::ReturnsNonnullAttribute),
- SanitizerHandler::NonnullReturn, StaticData, None);
- }
- }
+ EmitReturnValueCheck(RV);
Ret = Builder.CreateRet(RV);
} else {
Ret = Builder.CreateRetVoid();
@@ -2880,6 +2915,65 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
Ret->setDebugLoc(std::move(RetDbgLoc));
}
+void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) {
+ // A current decl may not be available when emitting vtable thunks.
+ if (!CurCodeDecl)
+ return;
+
+ ReturnsNonNullAttr *RetNNAttr = nullptr;
+ if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute))
+ RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>();
+
+ if (!RetNNAttr && !requiresReturnValueNullabilityCheck())
+ return;
+
+ // Prefer the returns_nonnull attribute if it's present.
+ SourceLocation AttrLoc;
+ SanitizerMask CheckKind;
+ SanitizerHandler Handler;
+ if (RetNNAttr) {
+ assert(!requiresReturnValueNullabilityCheck() &&
+ "Cannot check nullability and the nonnull attribute");
+ AttrLoc = RetNNAttr->getLocation();
+ CheckKind = SanitizerKind::ReturnsNonnullAttribute;
+ Handler = SanitizerHandler::NonnullReturn;
+ } else {
+ if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl))
+ if (auto *TSI = DD->getTypeSourceInfo())
+ if (auto FTL = TSI->getTypeLoc().castAs<FunctionTypeLoc>())
+ AttrLoc = FTL.getReturnLoc().findNullabilityLoc();
+ CheckKind = SanitizerKind::NullabilityReturn;
+ Handler = SanitizerHandler::NullabilityReturn;
+ }
+
+ SanitizerScope SanScope(this);
+
+ // Make sure the "return" source location is valid. If we're checking a
+ // nullability annotation, make sure the preconditions for the check are met.
+ llvm::BasicBlock *Check = createBasicBlock("nullcheck");
+ llvm::BasicBlock *NoCheck = createBasicBlock("no.nullcheck");
+ llvm::Value *SLocPtr = Builder.CreateLoad(ReturnLocation, "return.sloc.load");
+ llvm::Value *CanNullCheck = Builder.CreateIsNotNull(SLocPtr);
+ if (requiresReturnValueNullabilityCheck())
+ CanNullCheck =
+ Builder.CreateAnd(CanNullCheck, RetValNullabilityPrecondition);
+ Builder.CreateCondBr(CanNullCheck, Check, NoCheck);
+ EmitBlock(Check);
+
+ // Now do the null check.
+ llvm::Value *Cond = Builder.CreateIsNotNull(RV);
+ llvm::Constant *StaticData[] = {EmitCheckSourceLocation(AttrLoc)};
+ llvm::Value *DynamicData[] = {SLocPtr};
+ EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, DynamicData);
+
+ EmitBlock(NoCheck);
+
+#ifndef NDEBUG
+ // The return location should not be used after the check has been emitted.
+ ReturnLocation = Address::invalid();
+#endif
+}
+
static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
@@ -3188,50 +3282,63 @@ void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType,
SourceLocation ArgLoc,
- const FunctionDecl *FD,
+ AbstractCallee AC,
unsigned ParmNum) {
- if (!SanOpts.has(SanitizerKind::NonnullAttribute) || !FD)
+ if (!AC.getDecl() || !(SanOpts.has(SanitizerKind::NonnullAttribute) ||
+ SanOpts.has(SanitizerKind::NullabilityArg)))
return;
- auto PVD = ParmNum < FD->getNumParams() ? FD->getParamDecl(ParmNum) : nullptr;
+
+ // The param decl may be missing in a variadic function.
+ auto PVD = ParmNum < AC.getNumParams() ? AC.getParamDecl(ParmNum) : nullptr;
unsigned ArgNo = PVD ? PVD->getFunctionScopeIndex() : ParmNum;
- auto NNAttr = getNonNullAttr(FD, PVD, ArgType, ArgNo);
- if (!NNAttr)
+
+ // Prefer the nonnull attribute if it's present.
+ const NonNullAttr *NNAttr = nullptr;
+ if (SanOpts.has(SanitizerKind::NonnullAttribute))
+ NNAttr = getNonNullAttr(AC.getDecl(), PVD, ArgType, ArgNo);
+
+ bool CanCheckNullability = false;
+ if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD) {
+ auto Nullability = PVD->getType()->getNullability(getContext());
+ CanCheckNullability = Nullability &&
+ *Nullability == NullabilityKind::NonNull &&
+ PVD->getTypeSourceInfo();
+ }
+
+ if (!NNAttr && !CanCheckNullability)
return;
+
+ SourceLocation AttrLoc;
+ SanitizerMask CheckKind;
+ SanitizerHandler Handler;
+ if (NNAttr) {
+ AttrLoc = NNAttr->getLocation();
+ CheckKind = SanitizerKind::NonnullAttribute;
+ Handler = SanitizerHandler::NonnullArg;
+ } else {
+ AttrLoc = PVD->getTypeSourceInfo()->getTypeLoc().findNullabilityLoc();
+ CheckKind = SanitizerKind::NullabilityArg;
+ Handler = SanitizerHandler::NullabilityArg;
+ }
+
SanitizerScope SanScope(this);
assert(RV.isScalar());
llvm::Value *V = RV.getScalarVal();
llvm::Value *Cond =
Builder.CreateICmpNE(V, llvm::Constant::getNullValue(V->getType()));
llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(ArgLoc),
- EmitCheckSourceLocation(NNAttr->getLocation()),
+ EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc),
llvm::ConstantInt::get(Int32Ty, ArgNo + 1),
};
- EmitCheck(std::make_pair(Cond, SanitizerKind::NonnullAttribute),
- SanitizerHandler::NonnullArg, StaticData, None);
+ EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None);
}
void CodeGenFunction::EmitCallArgs(
CallArgList &Args, ArrayRef<QualType> ArgTypes,
llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
- const FunctionDecl *CalleeDecl, unsigned ParamsToSkip,
- EvaluationOrder Order) {
+ AbstractCallee AC, unsigned ParamsToSkip, EvaluationOrder Order) {
assert((int)ArgTypes.size() == (ArgRange.end() - ArgRange.begin()));
- auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg) {
- if (CalleeDecl == nullptr || I >= CalleeDecl->getNumParams())
- return;
- auto *PS = CalleeDecl->getParamDecl(I)->getAttr<PassObjectSizeAttr>();
- if (PS == nullptr)
- return;
-
- const auto &Context = getContext();
- auto SizeTy = Context.getSizeType();
- auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
- llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T);
- Args.add(RValue::get(V), SizeTy);
- };
-
// We *have* to evaluate arguments from right to left in the MS C++ ABI,
// because arguments are destroyed left to right in the callee. As a special
// case, there are certain language constructs that require left-to-right
@@ -3242,6 +3349,27 @@ void CodeGenFunction::EmitCallArgs(
? Order == EvaluationOrder::ForceLeftToRight
: Order != EvaluationOrder::ForceRightToLeft;
+ auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg,
+ RValue EmittedArg) {
+ if (!AC.hasFunctionDecl() || I >= AC.getNumParams())
+ return;
+ auto *PS = AC.getParamDecl(I)->getAttr<PassObjectSizeAttr>();
+ if (PS == nullptr)
+ return;
+
+ const auto &Context = getContext();
+ auto SizeTy = Context.getSizeType();
+ auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
+ assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?");
+ llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T,
+ EmittedArg.getScalarVal());
+ Args.add(RValue::get(V), SizeTy);
+ // If we're emitting args in reverse, be sure to do so with
+ // pass_object_size, as well.
+ if (!LeftToRight)
+ std::swap(Args.back(), *(&Args.back() - 1));
+ };
+
// Insert a stack save if we're going to need any inalloca args.
bool HasInAllocaArgs = false;
if (CGM.getTarget().getCXXABI().isMicrosoft()) {
@@ -3259,11 +3387,28 @@ void CodeGenFunction::EmitCallArgs(
for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
unsigned Idx = LeftToRight ? I : E - I - 1;
CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx;
- if (!LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg);
+ unsigned InitialArgSize = Args.size();
+ // If *Arg is an ObjCIndirectCopyRestoreExpr, check that either the types of
+ // the argument and parameter match or the objc method is parameterized.
+ assert((!isa<ObjCIndirectCopyRestoreExpr>(*Arg) ||
+ getContext().hasSameUnqualifiedType((*Arg)->getType(),
+ ArgTypes[Idx]) ||
+ (isa<ObjCMethodDecl>(AC.getDecl()) &&
+ isObjCMethodWithTypeParams(cast<ObjCMethodDecl>(AC.getDecl())))) &&
+ "Argument and parameter types don't match");
EmitCallArg(Args, *Arg, ArgTypes[Idx]);
- EmitNonNullArgCheck(Args.back().RV, ArgTypes[Idx], (*Arg)->getExprLoc(),
- CalleeDecl, ParamsToSkip + Idx);
- if (LeftToRight) MaybeEmitImplicitObjectSize(Idx, *Arg);
+ // In particular, we depend on it being the last arg in Args, and the
+ // objectsize bits depend on there only being one arg if !LeftToRight.
+ assert(InitialArgSize + 1 == Args.size() &&
+ "The code below depends on only adding one arg per EmitCallArg");
+ (void)InitialArgSize;
+ RValue RVArg = Args.back().RV;
+ EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
+ ParamsToSkip + Idx);
+ // @llvm.objectsize should never have side-effects and shouldn't need
+ // destruction/cleanups, so we can safely "emit" it after its arg,
+ // regardless of right-to-leftness
+ MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
}
if (!LeftToRight) {
@@ -3311,7 +3456,6 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
if (const ObjCIndirectCopyRestoreExpr *CRE
= dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) {
assert(getLangOpts().ObjCAutoRefCount);
- assert(getContext().hasSameUnqualifiedType(E->getType(), type));
return emitWritebackArg(*this, args, CRE);
}
@@ -3571,12 +3715,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Address ArgMemory = Address::invalid();
const llvm::StructLayout *ArgMemoryLayout = nullptr;
if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
- ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct);
+ const llvm::DataLayout &DL = CGM.getDataLayout();
+ ArgMemoryLayout = DL.getStructLayout(ArgStruct);
llvm::Instruction *IP = CallArgs.getStackBase();
llvm::AllocaInst *AI;
if (IP) {
IP = IP->getNextNode();
- AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
+ AI = new llvm::AllocaInst(ArgStruct, DL.getAllocaAddrSpace(),
+ "argmem", IP);
} else {
AI = CreateTempAlloca(ArgStruct, "argmem");
}
@@ -3675,7 +3821,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(NumIRArgs == 1);
if (RV.isScalar() || RV.isComplex()) {
// Make a temporary alloca to pass the argument.
- Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+ Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "indirect-arg-temp", false);
IRCallArgs[FirstIRArg] = Addr.getPointer();
LValue argLV = MakeAddrLValue(Addr, I->Ty);
@@ -3704,7 +3851,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
< Align.getQuantity()) ||
(ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
// Create an aligned temporary, and copy to it.
- Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+ Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "byval-temp", false);
IRCallArgs[FirstIRArg] = AI.getPointer();
EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
} else {
@@ -3972,13 +4120,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Compute the calling convention and attributes.
unsigned CallingConv;
- CodeGen::AttributeListType AttributeList;
+ llvm::AttributeList Attrs;
CGM.ConstructAttributeList(CalleePtr->getName(), CallInfo,
- Callee.getAbstractInfo(),
- AttributeList, CallingConv,
+ Callee.getAbstractInfo(), Attrs, CallingConv,
/*AttrOnCallSite=*/true);
- llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
- AttributeList);
// Apply some call-site-specific attributes.
// TODO: work this into building the attribute set.
@@ -3989,15 +4134,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
!(Callee.getAbstractInfo().getCalleeDecl() &&
Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) {
Attrs =
- Attrs.addAttribute(getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
+ Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::AlwaysInline);
}
// Disable inlining inside SEH __try blocks.
if (isSEHTryScope()) {
Attrs =
- Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+ Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoInline);
}
@@ -4014,7 +4158,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CannotThrow = true;
} else {
// Otherwise, nounwind call sites will never throw.
- CannotThrow = Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
+ CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind);
}
llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
@@ -4127,6 +4271,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Builder.CreateStore(elt, eltAddr);
}
// FALLTHROUGH
+ LLVM_FALLTHROUGH;
}
case ABIArgInfo::InAlloca:
@@ -4210,6 +4355,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(),
OffsetValue);
+ } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
+ llvm::Value *ParamVal =
+ CallArgs[AA->getParamIndex() - 1].RV.getScalarVal();
+ EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
}
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h
index 031ce83..7e10407 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.h
@@ -25,10 +25,10 @@
#include "ABIInfo.h"
namespace llvm {
- class AttributeSet;
- class Function;
- class Type;
- class Value;
+class AttributeList;
+class Function;
+class Type;
+class Value;
}
namespace clang {
@@ -39,28 +39,27 @@ namespace clang {
class VarDecl;
namespace CodeGen {
- typedef SmallVector<llvm::AttributeSet, 8> AttributeListType;
- /// Abstract information about a function or function prototype.
- class CGCalleeInfo {
- /// \brief The function prototype of the callee.
- const FunctionProtoType *CalleeProtoTy;
- /// \brief The function declaration of the callee.
- const Decl *CalleeDecl;
-
- public:
- explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
- CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
- : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
- CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
- : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
- CGCalleeInfo(const Decl *calleeDecl)
- : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
-
- const FunctionProtoType *getCalleeFunctionProtoType() const {
- return CalleeProtoTy;
- }
- const Decl *getCalleeDecl() const { return CalleeDecl; }
+/// Abstract information about a function or function prototype.
+class CGCalleeInfo {
+ /// \brief The function prototype of the callee.
+ const FunctionProtoType *CalleeProtoTy;
+ /// \brief The function declaration of the callee.
+ const Decl *CalleeDecl;
+
+public:
+ explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
+ CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
+ : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
+ CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
+ : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
+ CGCalleeInfo(const Decl *calleeDecl)
+ : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
+
+ const FunctionProtoType *getCalleeFunctionProtoType() const {
+ return CalleeProtoTy;
+ }
+ const Decl *getCalleeDecl() const { return CalleeDecl; }
};
/// All available information about a concrete callee.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
index 05d0567..50d702c 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
@@ -129,14 +129,14 @@ Address
CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
llvm::Value *memberPtr,
const MemberPointerType *memberPtrType,
- AlignmentSource *alignSource) {
+ LValueBaseInfo *BaseInfo) {
// Ask the ABI to compute the actual address.
llvm::Value *ptr =
CGM.getCXXABI().EmitMemberDataPointerAddress(*this, E, base,
memberPtr, memberPtrType);
QualType memberType = memberPtrType->getPointeeType();
- CharUnits memberAlign = getNaturalTypeAlignment(memberType, alignSource);
+ CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo);
memberAlign =
CGM.getDynamicOffsetAlignment(base.getAlignment(),
memberPtrType->getClass()->getAsCXXRecordDecl(),
@@ -309,8 +309,10 @@ Address CodeGenFunction::GetAddressOfBaseClass(
// just do a bitcast; null checks are unnecessary.
if (NonVirtualOffset.isZero() && !VBase) {
if (sanitizePerformTypeCheck()) {
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::Null, !NullCheckValue);
EmitTypeCheck(TCK_Upcast, Loc, Value.getPointer(),
- DerivedTy, DerivedAlign, !NullCheckValue);
+ DerivedTy, DerivedAlign, SkippedChecks);
}
return Builder.CreateBitCast(Value, BasePtrTy);
}
@@ -331,8 +333,10 @@ Address CodeGenFunction::GetAddressOfBaseClass(
}
if (sanitizePerformTypeCheck()) {
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::Null, true);
EmitTypeCheck(VBase ? TCK_UpcastToVirtualBase : TCK_Upcast, Loc,
- Value.getPointer(), DerivedTy, DerivedAlign, true);
+ Value.getPointer(), DerivedTy, DerivedAlign, SkippedChecks);
}
// Compute the virtual offset.
@@ -685,7 +689,8 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS,
/// complete-to-base constructor delegation optimization, i.e.
/// emitting the complete constructor as a simple call to the base
/// constructor.
-static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor) {
+bool CodeGenFunction::IsConstructorDelegationValid(
+ const CXXConstructorDecl *Ctor) {
// Currently we disable the optimization for classes with virtual
// bases because (1) the addresses of parameter variables need to be
@@ -1131,10 +1136,11 @@ namespace {
RHS = EC->getSubExpr();
if (!RHS)
return nullptr;
- MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
- if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
- return nullptr;
- return Field;
+ if (MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS)) {
+ if (ME2->getMemberDecl() == Field)
+ return Field;
+ }
+ return nullptr;
} else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
if (!(MD && isMemcpyEquivalentSpecialMember(MD)))
@@ -1384,6 +1390,20 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CurGD.getDecl());
CXXDtorType DtorType = CurGD.getDtorType();
+ // For an abstract class, non-base destructors are never used (and can't
+ // be emitted in general, because vbase dtors may not have been validated
+ // by Sema), but the Itanium ABI doesn't make them optional and Clang may
+ // in fact emit references to them from other compilations, so emit them
+ // as functions containing a trap instruction.
+ if (DtorType != Dtor_Base && Dtor->getParent()->isAbstract()) {
+ llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ Builder.CreateUnreachable();
+ Builder.ClearInsertionPoint();
+ return;
+ }
+
Stmt *Body = Dtor->getBody();
if (Body)
incrementProfileCounter(Body);
@@ -1416,9 +1436,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
// we'd introduce *two* handler blocks. In the Microsoft ABI, we
// always delegate because we might not have a definition in this TU.
switch (DtorType) {
- case Dtor_Comdat:
- llvm_unreachable("not expecting a COMDAT");
-
+ case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT");
case Dtor_Deleting: llvm_unreachable("already handled deleting case");
case Dtor_Complete:
@@ -1433,7 +1451,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
/*Delegating=*/false, LoadCXXThisAddress());
break;
}
+
// Fallthrough: act like we're in the base variant.
+ LLVM_FALLTHROUGH;
case Dtor_Base:
assert(Body);
@@ -1950,7 +1970,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
// Add the rest of the user-supplied arguments.
const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
- EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor());
+ EvaluationOrder Order = E->isListInitialization()
+ ? EvaluationOrder::ForceLeftToRight
+ : EvaluationOrder::Default;
+ EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(),
+ /*ParamsToSkip*/ 0, Order);
EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args);
}
@@ -1970,7 +1994,7 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
// Likewise if they're inalloca.
const CGFunctionInfo &Info =
- CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0);
+ CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0, 0);
if (Info.usesInAlloca())
return false;
}
@@ -2012,10 +2036,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
return;
}
+ bool PassPrototypeArgs = true;
// Check whether we can actually emit the constructor before trying to do so.
if (auto Inherited = D->getInheritedConstructor()) {
- if (getTypes().inheritingCtorHasParams(Inherited, Type) &&
- !canEmitDelegateCallArgs(*this, D, Type, Args)) {
+ PassPrototypeArgs = getTypes().inheritingCtorHasParams(Inherited, Type);
+ if (PassPrototypeArgs && !canEmitDelegateCallArgs(*this, D, Type, Args)) {
EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase,
Delegating, Args);
return;
@@ -2023,14 +2048,15 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
}
// Insert any ABI-specific implicit constructor arguments.
- unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(
- *this, D, Type, ForVirtualBase, Delegating, Args);
+ CGCXXABI::AddedStructorArgs ExtraArgs =
+ CGM.getCXXABI().addImplicitConstructorArgs(*this, D, Type, ForVirtualBase,
+ Delegating, Args);
// Emit the call.
llvm::Constant *CalleePtr =
CGM.getAddrOfCXXStructor(D, getFromCtorType(Type));
- const CGFunctionInfo &Info =
- CGM.getTypes().arrangeCXXConstructorCall(Args, D, Type, ExtraArgs);
+ const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
+ Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
CGCallee Callee = CGCallee::forDirect(CalleePtr, D);
EmitCall(Info, Callee, ReturnValueSlot(), Args);
@@ -2102,7 +2128,9 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase,
bool Delegating, CallArgList &Args) {
- InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType));
+ GlobalDecl GD(Ctor, CtorType);
+ InlinedInheritingConstructorScope Scope(*this, GD);
+ ApplyInlineDebugLocation DebugScope(*this, GD);
// Save the arguments to be passed to the inherited constructor.
CXXInheritedCtorInitExprArgs = Args;
@@ -2688,79 +2716,6 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
cast<llvm::PointerType>(VTable->getType())->getElementType());
}
-bool
-CodeGenFunction::CanDevirtualizeMemberFunctionCall(const Expr *Base,
- const CXXMethodDecl *MD) {
- // When building with -fapple-kext, all calls must go through the vtable since
- // the kernel linker can do runtime patching of vtables.
- if (getLangOpts().AppleKext)
- return false;
-
- // If the member function is marked 'final', we know that it can't be
- // overridden and can therefore devirtualize it unless it's pure virtual.
- if (MD->hasAttr<FinalAttr>())
- return !MD->isPure();
-
- // If the base expression (after skipping derived-to-base conversions) is a
- // class prvalue, then we can devirtualize.
- Base = Base->getBestDynamicClassTypeExpr();
- if (Base->isRValue() && Base->getType()->isRecordType())
- return true;
-
- // If we don't even know what we would call, we can't devirtualize.
- const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
- if (!BestDynamicDecl)
- return false;
-
- // There may be a method corresponding to MD in a derived class.
- const CXXMethodDecl *DevirtualizedMethod =
- MD->getCorrespondingMethodInClass(BestDynamicDecl);
-
- // If that method is pure virtual, we can't devirtualize. If this code is
- // reached, the result would be UB, not a direct call to the derived class
- // function, and we can't assume the derived class function is defined.
- if (DevirtualizedMethod->isPure())
- return false;
-
- // If that method is marked final, we can devirtualize it.
- if (DevirtualizedMethod->hasAttr<FinalAttr>())
- return true;
-
- // Similarly, if the class itself is marked 'final' it can't be overridden
- // and we can therefore devirtualize the member function call.
- if (BestDynamicDecl->hasAttr<FinalAttr>())
- return true;
-
- if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base)) {
- if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
- // This is a record decl. We know the type and can devirtualize it.
- return VD->getType()->isRecordType();
- }
-
- return false;
- }
-
- // We can devirtualize calls on an object accessed by a class member access
- // expression, since by C++11 [basic.life]p6 we know that it can't refer to
- // a derived class object constructed in the same location.
- if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base))
- if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl()))
- return VD->getType()->isRecordType();
-
- // Likewise for calls on an object accessed by a (non-reference) pointer to
- // member access.
- if (auto *BO = dyn_cast<BinaryOperator>(Base)) {
- if (BO->isPtrMemOp()) {
- auto *MPT = BO->getRHS()->getType()->castAs<MemberPointerType>();
- if (MPT->getPointeeType()->isRecordType())
- return true;
- }
- }
-
- // We can't devirtualize the call.
- return false;
-}
-
void CodeGenFunction::EmitForwardingCallToLambda(
const CXXMethodDecl *callOperator,
CallArgList &callArgs) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
index 3666858..b5453bc 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
@@ -51,8 +51,7 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) {
if (rv.isComplex()) {
CodeGenFunction::ComplexPairTy V = rv.getComplexVal();
llvm::Type *ComplexTy =
- llvm::StructType::get(V.first->getType(), V.second->getType(),
- (void*) nullptr);
+ llvm::StructType::get(V.first->getType(), V.second->getType());
Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex");
CGF.Builder.CreateStore(V.first,
CGF.Builder.CreateStructGEP(addr, 0, CharUnits()));
@@ -418,11 +417,15 @@ void CodeGenFunction::ResolveBranchFixups(llvm::BasicBlock *Block) {
}
/// Pops cleanup blocks until the given savepoint is reached.
-void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) {
+void CodeGenFunction::PopCleanupBlocks(
+ EHScopeStack::stable_iterator Old,
+ std::initializer_list<llvm::Value **> ValuesToReload) {
assert(Old.isValid());
+ bool HadBranches = false;
while (EHStack.stable_begin() != Old) {
EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.begin());
+ HadBranches |= Scope.hasBranches();
// As long as Old strictly encloses the scope's enclosing normal
// cleanup, we're going to emit another normal cleanup which
@@ -432,14 +435,48 @@ void CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old) {
PopCleanupBlock(FallThroughIsBranchThrough);
}
+
+ // If we didn't have any branches, the insertion point before cleanups must
+ // dominate the current insertion point and we don't need to reload any
+ // values.
+ if (!HadBranches)
+ return;
+
+ // Spill and reload all values that the caller wants to be live at the current
+ // insertion point.
+ for (llvm::Value **ReloadedValue : ValuesToReload) {
+ auto *Inst = dyn_cast_or_null<llvm::Instruction>(*ReloadedValue);
+ if (!Inst)
+ continue;
+
+ // Don't spill static allocas, they dominate all cleanups. These are created
+ // by binding a reference to a local variable or temporary.
+ auto *AI = dyn_cast<llvm::AllocaInst>(Inst);
+ if (AI && AI->isStaticAlloca())
+ continue;
+
+ Address Tmp =
+ CreateDefaultAlignTempAlloca(Inst->getType(), "tmp.exprcleanup");
+
+ // Find an insertion point after Inst and spill it to the temporary.
+ llvm::BasicBlock::iterator InsertBefore;
+ if (auto *Invoke = dyn_cast<llvm::InvokeInst>(Inst))
+ InsertBefore = Invoke->getNormalDest()->getFirstInsertionPt();
+ else
+ InsertBefore = std::next(Inst->getIterator());
+ CGBuilderTy(CGM, &*InsertBefore).CreateStore(Inst, Tmp);
+
+ // Reload the value at the current insertion point.
+ *ReloadedValue = Builder.CreateLoad(Tmp);
+ }
}
/// Pops cleanup blocks until the given savepoint is reached, then add the
/// cleanups from the given savepoint in the lifetime-extended cleanups stack.
-void
-CodeGenFunction::PopCleanupBlocks(EHScopeStack::stable_iterator Old,
- size_t OldLifetimeExtendedSize) {
- PopCleanupBlocks(Old);
+void CodeGenFunction::PopCleanupBlocks(
+ EHScopeStack::stable_iterator Old, size_t OldLifetimeExtendedSize,
+ std::initializer_list<llvm::Value **> ValuesToReload) {
+ PopCleanupBlocks(Old, ValuesToReload);
// Move our deferred cleanups onto the EH stack.
for (size_t I = OldLifetimeExtendedSize,
@@ -578,7 +615,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF,
llvm::SwitchInst *si = cast<llvm::SwitchInst>(use.getUser());
if (si->getNumCases() == 1 && si->getDefaultDest() == unreachableBB) {
// Replace the switch with a branch.
- llvm::BranchInst::Create(si->case_begin().getCaseSuccessor(), si);
+ llvm::BranchInst::Create(si->case_begin()->getCaseSuccessor(), si);
// The switch operand is a load from the cleanup-dest alloca.
llvm::LoadInst *condition = cast<llvm::LoadInst>(si->getCondition());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp
index 2fdb127..a65faa6 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp
@@ -11,29 +11,70 @@
//
//===----------------------------------------------------------------------===//
+#include "CGCleanup.h"
#include "CodeGenFunction.h"
+#include "llvm/ADT/ScopeExit.h"
#include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtVisitor.h"
using namespace clang;
using namespace CodeGen;
-namespace clang {
-namespace CodeGen {
+using llvm::Value;
+using llvm::BasicBlock;
+
+namespace {
+enum class AwaitKind { Init, Normal, Yield, Final };
+static constexpr llvm::StringLiteral AwaitKindStr[] = {"init", "await", "yield",
+ "final"};
+}
+
+struct clang::CodeGen::CGCoroData {
+ // What is the current await expression kind and how many
+ // await/yield expressions were encountered so far.
+ // These are used to generate pretty labels for await expressions in LLVM IR.
+ AwaitKind CurrentAwaitKind = AwaitKind::Init;
+ unsigned AwaitNum = 0;
+ unsigned YieldNum = 0;
+
+ // How many co_return statements are in the coroutine. Used to decide whether
+ // we need to add co_return; equivalent at the end of the user authored body.
+ unsigned CoreturnCount = 0;
+
+ // A branch to this block is emitted when coroutine needs to suspend.
+ llvm::BasicBlock *SuspendBB = nullptr;
+
+ // Stores the jump destination just before the coroutine memory is freed.
+ // This is the destination that every suspend point jumps to for the cleanup
+ // branch.
+ CodeGenFunction::JumpDest CleanupJD;
+
+ // Stores the jump destination just before the final suspend. The co_return
+ // statements jumps to this point after calling return_xxx promise member.
+ CodeGenFunction::JumpDest FinalJD;
-struct CGCoroData {
// Stores the llvm.coro.id emitted in the function so that we can supply it
// as the first argument to coro.begin, coro.alloc and coro.free intrinsics.
// Note: llvm.coro.id returns a token that cannot be directly expressed in a
// builtin.
llvm::CallInst *CoroId = nullptr;
+
+ // Stores the llvm.coro.begin emitted in the function so that we can replace
+ // all coro.frame intrinsics with direct SSA value of coro.begin that returns
+ // the address of the coroutine frame of the current coroutine.
+ llvm::CallInst *CoroBegin = nullptr;
+
+ // Stores the last emitted coro.free for the deallocate expressions, we use it
+ // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem).
+ llvm::CallInst *LastCoroFree = nullptr;
+
// If coro.id came from the builtin, remember the expression to give better
// diagnostic. If CoroIdExpr is nullptr, the coro.id was created by
// EmitCoroutineBody.
CallExpr const *CoroIdExpr = nullptr;
};
-}
-}
+// Defining these here allows to keep CGCoroData private to this file.
clang::CodeGen::CodeGenFunction::CGCoroInfo::CGCoroInfo() {}
CodeGenFunction::CGCoroInfo::~CGCoroInfo() {}
@@ -59,19 +100,528 @@ static void createCoroData(CodeGenFunction &CGF,
CurCoro.Data->CoroIdExpr = CoroIdExpr;
}
+// Synthesize a pretty name for a suspend point.
+static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) {
+ unsigned No = 0;
+ switch (Kind) {
+ case AwaitKind::Init:
+ case AwaitKind::Final:
+ break;
+ case AwaitKind::Normal:
+ No = ++Coro.AwaitNum;
+ break;
+ case AwaitKind::Yield:
+ No = ++Coro.YieldNum;
+ break;
+ }
+ SmallString<32> Prefix(AwaitKindStr[static_cast<unsigned>(Kind)]);
+ if (No > 1) {
+ Twine(No).toVector(Prefix);
+ }
+ return Prefix;
+}
+
+// Emit suspend expression which roughly looks like:
+//
+// auto && x = CommonExpr();
+// if (!x.await_ready()) {
+// llvm_coro_save();
+// x.await_suspend(...); (*)
+// llvm_coro_suspend(); (**)
+// }
+// x.await_resume();
+//
+// where the result of the entire expression is the result of x.await_resume()
+//
+// (*) If x.await_suspend return type is bool, it allows to veto a suspend:
+// if (x.await_suspend(...))
+// llvm_coro_suspend();
+//
+// (**) llvm_coro_suspend() encodes three possible continuations as
+// a switch instruction:
+//
+// %where-to = call i8 @llvm.coro.suspend(...)
+// switch i8 %where-to, label %coro.ret [ ; jump to epilogue to suspend
+// i8 0, label %yield.ready ; go here when resumed
+// i8 1, label %yield.cleanup ; go here when destroyed
+// ]
+//
+// See llvm's docs/Coroutines.rst for more details.
+//
+namespace {
+ struct LValueOrRValue {
+ LValue LV;
+ RValue RV;
+ };
+}
+static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
+ CoroutineSuspendExpr const &S,
+ AwaitKind Kind, AggValueSlot aggSlot,
+ bool ignoreResult, bool forLValue) {
+ auto *E = S.getCommonExpr();
+
+ auto Binder =
+ CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
+ auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); });
+
+ auto Prefix = buildSuspendPrefixStr(Coro, Kind);
+ BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
+ BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));
+ BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));
+
+ // If expression is ready, no need to suspend.
+ CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);
+
+ // Otherwise, emit suspend logic.
+ CGF.EmitBlock(SuspendBlock);
+
+ auto &Builder = CGF.Builder;
+ llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
+ auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
+ auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
+
+ auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr());
+ if (SuspendRet != nullptr) {
+ // Veto suspension if requested by bool returning await_suspend.
+ assert(SuspendRet->getType()->isIntegerTy(1) &&
+ "Sema should have already checked that it is void or bool");
+ BasicBlock *RealSuspendBlock =
+ CGF.createBasicBlock(Prefix + Twine(".suspend.bool"));
+ CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock);
+ SuspendBlock = RealSuspendBlock;
+ CGF.EmitBlock(RealSuspendBlock);
+ }
+
+ // Emit the suspend point.
+ const bool IsFinalSuspend = (Kind == AwaitKind::Final);
+ llvm::Function *CoroSuspend =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
+ auto *SuspendResult = Builder.CreateCall(
+ CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
+
+ // Create a switch capturing three possible continuations.
+ auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
+ Switch->addCase(Builder.getInt8(0), ReadyBlock);
+ Switch->addCase(Builder.getInt8(1), CleanupBlock);
+
+ // Emit cleanup for this suspend point.
+ CGF.EmitBlock(CleanupBlock);
+ CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
+
+ // Emit await_resume expression.
+ CGF.EmitBlock(ReadyBlock);
+ LValueOrRValue Res;
+ if (forLValue)
+ Res.LV = CGF.EmitLValue(S.getResumeExpr());
+ else
+ Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
+ return Res;
+}
+
+RValue CodeGenFunction::EmitCoawaitExpr(const CoawaitExpr &E,
+ AggValueSlot aggSlot,
+ bool ignoreResult) {
+ return emitSuspendExpression(*this, *CurCoro.Data, E,
+ CurCoro.Data->CurrentAwaitKind, aggSlot,
+ ignoreResult, /*forLValue*/false).RV;
+}
+RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E,
+ AggValueSlot aggSlot,
+ bool ignoreResult) {
+ return emitSuspendExpression(*this, *CurCoro.Data, E, AwaitKind::Yield,
+ aggSlot, ignoreResult, /*forLValue*/false).RV;
+}
+
+void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) {
+ ++CurCoro.Data->CoreturnCount;
+ EmitStmt(S.getPromiseCall());
+ EmitBranchThroughCleanup(CurCoro.Data->FinalJD);
+}
+
+
+#ifndef NDEBUG
+static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx,
+ const CoroutineSuspendExpr *E) {
+ const auto *RE = E->getResumeExpr();
+ // Is it possible for RE to be a CXXBindTemporaryExpr wrapping
+ // a MemberCallExpr?
+ assert(isa<CallExpr>(RE) && "unexpected suspend expression type");
+ return cast<CallExpr>(RE)->getCallReturnType(Ctx);
+}
+#endif
+
+LValue
+CodeGenFunction::EmitCoawaitLValue(const CoawaitExpr *E) {
+ assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() &&
+ "Can't have a scalar return unless the return type is a "
+ "reference type!");
+ return emitSuspendExpression(*this, *CurCoro.Data, *E,
+ CurCoro.Data->CurrentAwaitKind, AggValueSlot::ignored(),
+ /*ignoreResult*/false, /*forLValue*/true).LV;
+}
+
+LValue
+CodeGenFunction::EmitCoyieldLValue(const CoyieldExpr *E) {
+ assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() &&
+ "Can't have a scalar return unless the return type is a "
+ "reference type!");
+ return emitSuspendExpression(*this, *CurCoro.Data, *E,
+ AwaitKind::Yield, AggValueSlot::ignored(),
+ /*ignoreResult*/false, /*forLValue*/true).LV;
+}
+
+// Hunts for the parameter reference in the parameter copy/move declaration.
+namespace {
+struct GetParamRef : public StmtVisitor<GetParamRef> {
+public:
+ DeclRefExpr *Expr = nullptr;
+ GetParamRef() {}
+ void VisitDeclRefExpr(DeclRefExpr *E) {
+ assert(Expr == nullptr && "multilple declref in param move");
+ Expr = E;
+ }
+ void VisitStmt(Stmt *S) {
+ for (auto *C : S->children()) {
+ if (C)
+ Visit(C);
+ }
+ }
+};
+}
+
+// This class replaces references to parameters to their copies by changing
+// the addresses in CGF.LocalDeclMap and restoring back the original values in
+// its destructor.
+
+namespace {
+ struct ParamReferenceReplacerRAII {
+ CodeGenFunction::DeclMapTy SavedLocals;
+ CodeGenFunction::DeclMapTy& LocalDeclMap;
+
+ ParamReferenceReplacerRAII(CodeGenFunction::DeclMapTy &LocalDeclMap)
+ : LocalDeclMap(LocalDeclMap) {}
+
+ void addCopy(DeclStmt const *PM) {
+ // Figure out what param it refers to.
+
+ assert(PM->isSingleDecl());
+ VarDecl const*VD = static_cast<VarDecl const*>(PM->getSingleDecl());
+ Expr const *InitExpr = VD->getInit();
+ GetParamRef Visitor;
+ Visitor.Visit(const_cast<Expr*>(InitExpr));
+ assert(Visitor.Expr);
+ auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr);
+ auto *PD = DREOrig->getDecl();
+
+ auto it = LocalDeclMap.find(PD);
+ assert(it != LocalDeclMap.end() && "parameter is not found");
+ SavedLocals.insert({ PD, it->second });
+
+ auto copyIt = LocalDeclMap.find(VD);
+ assert(copyIt != LocalDeclMap.end() && "parameter copy is not found");
+ it->second = copyIt->getSecond();
+ }
+
+ ~ParamReferenceReplacerRAII() {
+ for (auto&& SavedLocal : SavedLocals) {
+ LocalDeclMap.insert({SavedLocal.first, SavedLocal.second});
+ }
+ }
+ };
+}
+
+// For WinEH exception representation backend needs to know what funclet coro.end
+// belongs to. That information is passed in a funclet bundle.
+static SmallVector<llvm::OperandBundleDef, 1>
+getBundlesForCoroEnd(CodeGenFunction &CGF) {
+ SmallVector<llvm::OperandBundleDef, 1> BundleList;
+
+ if (llvm::Instruction *EHPad = CGF.CurrentFuncletPad)
+ BundleList.emplace_back("funclet", EHPad);
+
+ return BundleList;
+}
+
+namespace {
+// We will insert coro.end to cut any of the destructors for objects that
+// do not need to be destroyed once the coroutine is resumed.
+// See llvm/docs/Coroutines.rst for more details about coro.end.
+struct CallCoroEnd final : public EHScopeStack::Cleanup {
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ auto &CGM = CGF.CGM;
+ auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy);
+ llvm::Function *CoroEndFn = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
+ // See if we have a funclet bundle to associate coro.end with. (WinEH)
+ auto Bundles = getBundlesForCoroEnd(CGF);
+ auto *CoroEnd = CGF.Builder.CreateCall(
+ CoroEndFn, {NullPtr, CGF.Builder.getTrue()}, Bundles);
+ if (Bundles.empty()) {
+ // Otherwise, (landingpad model), create a conditional branch that leads
+ // either to a cleanup block or a block with EH resume instruction.
+ auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true);
+ auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont");
+ CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB);
+ CGF.EmitBlock(CleanupContBB);
+ }
+ }
+};
+}
+
+namespace {
+// Make sure to call coro.delete on scope exit.
+struct CallCoroDelete final : public EHScopeStack::Cleanup {
+ Stmt *Deallocate;
+
+ // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;"
+
+ // Note: That deallocation will be emitted twice: once for a normal exit and
+ // once for exceptional exit. This usage is safe because Deallocate does not
+ // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr()
+ // builds a single call to a deallocation function which is safe to emit
+ // multiple times.
+ void Emit(CodeGenFunction &CGF, Flags) override {
+ // Remember the current point, as we are going to emit deallocation code
+ // first to get to coro.free instruction that is an argument to a delete
+ // call.
+ BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock();
+
+ auto *FreeBB = CGF.createBasicBlock("coro.free");
+ CGF.EmitBlock(FreeBB);
+ CGF.EmitStmt(Deallocate);
+
+ auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free");
+ CGF.EmitBlock(AfterFreeBB);
+
+ // We should have captured coro.free from the emission of deallocate.
+ auto *CoroFree = CGF.CurCoro.Data->LastCoroFree;
+ if (!CoroFree) {
+ CGF.CGM.Error(Deallocate->getLocStart(),
+ "Deallocation expressoin does not refer to coro.free");
+ return;
+ }
+
+ // Get back to the block we were originally and move coro.free there.
+ auto *InsertPt = SaveInsertBlock->getTerminator();
+ CoroFree->moveBefore(InsertPt);
+ CGF.Builder.SetInsertPoint(InsertPt);
+
+ // Add if (auto *mem = coro.free) Deallocate;
+ auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy);
+ auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr);
+ CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB);
+
+ // No longer need old terminator.
+ InsertPt->eraseFromParent();
+ CGF.Builder.SetInsertPoint(AfterFreeBB);
+ }
+ explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {}
+};
+}
+
+namespace {
+struct GetReturnObjectManager {
+ CodeGenFunction &CGF;
+ CGBuilderTy &Builder;
+ const CoroutineBodyStmt &S;
+
+ Address GroActiveFlag;
+ CodeGenFunction::AutoVarEmission GroEmission;
+
+ GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S)
+ : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()),
+ GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {}
+
+ // The gro variable has to outlive coroutine frame and coroutine promise, but,
+ // it can only be initialized after coroutine promise was created, thus, we
+ // split its emission in two parts. EmitGroAlloca emits an alloca and sets up
+ // cleanups. Later when coroutine promise is available we initialize the gro
+ // and sets the flag that the cleanup is now active.
+
+ void EmitGroAlloca() {
+ auto *GroDeclStmt = dyn_cast<DeclStmt>(S.getResultDecl());
+ if (!GroDeclStmt) {
+ // If get_return_object returns void, no need to do an alloca.
+ return;
+ }
+
+ auto *GroVarDecl = cast<VarDecl>(GroDeclStmt->getSingleDecl());
+
+ // Set GRO flag that it is not initialized yet
+ GroActiveFlag =
+ CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), "gro.active");
+ Builder.CreateStore(Builder.getFalse(), GroActiveFlag);
+
+ GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl);
+
+ // Remember the top of EHStack before emitting the cleanup.
+ auto old_top = CGF.EHStack.stable_begin();
+ CGF.EmitAutoVarCleanups(GroEmission);
+ auto top = CGF.EHStack.stable_begin();
+
+ // Make the cleanup conditional on gro.active
+ for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top);
+ b != e; b++) {
+ if (auto *Cleanup = dyn_cast<EHCleanupScope>(&*b)) {
+ assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?");
+ Cleanup->setActiveFlag(GroActiveFlag);
+ Cleanup->setTestFlagInEHCleanup();
+ Cleanup->setTestFlagInNormalCleanup();
+ }
+ }
+ }
+
+ void EmitGroInit() {
+ if (!GroActiveFlag.isValid()) {
+ // No Gro variable was allocated. Simply emit the call to
+ // get_return_object.
+ CGF.EmitStmt(S.getResultDecl());
+ return;
+ }
+
+ CGF.EmitAutoVarInit(GroEmission);
+ Builder.CreateStore(Builder.getTrue(), GroActiveFlag);
+ }
+};
+}
+
+static void emitBodyAndFallthrough(CodeGenFunction &CGF,
+ const CoroutineBodyStmt &S, Stmt *Body) {
+ CGF.EmitStmt(Body);
+ const bool CanFallthrough = CGF.Builder.GetInsertBlock();
+ if (CanFallthrough)
+ if (Stmt *OnFallthrough = S.getFallthroughHandler())
+ CGF.EmitStmt(OnFallthrough);
+}
+
void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
auto &TI = CGM.getContext().getTargetInfo();
unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth();
+ auto *EntryBB = Builder.GetInsertBlock();
+ auto *AllocBB = createBasicBlock("coro.alloc");
+ auto *InitBB = createBasicBlock("coro.init");
+ auto *FinalBB = createBasicBlock("coro.final");
+ auto *RetBB = createBasicBlock("coro.ret");
+
auto *CoroId = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::coro_id),
{Builder.getInt32(NewAlign), NullPtr, NullPtr, NullPtr});
createCoroData(*this, CurCoro, CoroId);
+ CurCoro.Data->SuspendBB = RetBB;
+
+ // Backend is allowed to elide memory allocations, to help it, emit
+ // auto mem = coro.alloc() ? 0 : ... allocation code ...;
+ auto *CoroAlloc = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId});
+
+ Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB);
+
+ EmitBlock(AllocBB);
+ auto *AllocateCall = EmitScalarExpr(S.getAllocate());
+ auto *AllocOrInvokeContBB = Builder.GetInsertBlock();
+
+ // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided.
+ if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) {
+ auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure");
+
+ // See if allocation was successful.
+ auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy);
+ auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr);
+ Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB);
+
+ // If not, return OnAllocFailure object.
+ EmitBlock(RetOnFailureBB);
+ EmitStmt(RetOnAllocFailure);
+ }
+ else {
+ Builder.CreateBr(InitBB);
+ }
+
+ EmitBlock(InitBB);
+
+ // Pass the result of the allocation to coro.begin.
+ auto *Phi = Builder.CreatePHI(VoidPtrTy, 2);
+ Phi->addIncoming(NullPtr, EntryBB);
+ Phi->addIncoming(AllocateCall, AllocOrInvokeContBB);
+ auto *CoroBegin = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi});
+ CurCoro.Data->CoroBegin = CoroBegin;
+
+ GetReturnObjectManager GroManager(*this, S);
+ GroManager.EmitGroAlloca();
+
+ CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB);
+ {
+ ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap);
+ CodeGenFunction::RunCleanupsScope ResumeScope(*this);
+ EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate());
+
+ // Create parameter copies. We do it before creating a promise, since an
+ // evolution of coroutine TS may allow promise constructor to observe
+ // parameter copies.
+ for (auto *PM : S.getParamMoves()) {
+ EmitStmt(PM);
+ ParamReplacer.addCopy(cast<DeclStmt>(PM));
+ // TODO: if(CoroParam(...)) need to surround ctor and dtor
+ // for the copy, so that llvm can elide it if the copy is
+ // not needed.
+ }
- EmitScalarExpr(S.getAllocate());
- // FIXME: Emit the rest of the coroutine.
- EmitStmt(S.getDeallocate());
+ EmitStmt(S.getPromiseDeclStmt());
+
+ Address PromiseAddr = GetAddrOfLocalVar(S.getPromiseDecl());
+ auto *PromiseAddrVoidPtr =
+ new llvm::BitCastInst(PromiseAddr.getPointer(), VoidPtrTy, "", CoroId);
+ // Update CoroId to refer to the promise. We could not do it earlier because
+ // promise local variable was not emitted yet.
+ CoroId->setArgOperand(1, PromiseAddrVoidPtr);
+
+ // Now we have the promise, initialize the GRO
+ GroManager.EmitGroInit();
+
+ EHStack.pushCleanup<CallCoroEnd>(EHCleanup);
+
+ CurCoro.Data->CurrentAwaitKind = AwaitKind::Init;
+ EmitStmt(S.getInitSuspendStmt());
+ CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB);
+
+ CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal;
+
+ if (auto *OnException = S.getExceptionHandler()) {
+ auto Loc = S.getLocStart();
+ CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException);
+ auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch);
+
+ EnterCXXTryStmt(*TryStmt);
+ emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock());
+ ExitCXXTryStmt(*TryStmt);
+ }
+ else {
+ emitBodyAndFallthrough(*this, S, S.getBody());
+ }
+
+ // See if we need to generate final suspend.
+ const bool CanFallthrough = Builder.GetInsertBlock();
+ const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0;
+ if (CanFallthrough || HasCoreturns) {
+ EmitBlock(FinalBB);
+ CurCoro.Data->CurrentAwaitKind = AwaitKind::Final;
+ EmitStmt(S.getFinalSuspendStmt());
+ } else {
+ // We don't need FinalBB. Emit it to make sure the block is deleted.
+ EmitBlock(FinalBB, /*IsFinished=*/true);
+ }
+ }
+
+ EmitBlock(RetBB);
+ // Emit coro.end before getReturnStmt (and parameter destructors), since
+ // resume and destroy parts of the coroutine should not include them.
+ llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
+ Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()});
+
+ if (Stmt *Ret = S.getReturnStmt())
+ EmitStmt(Ret);
}
// Emit coroutine intrinsic and patch up arguments of the token type.
@@ -81,6 +631,17 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
switch (IID) {
default:
break;
+ // The coro.frame builtin is replaced with an SSA value of the coro.begin
+ // intrinsic.
+ case llvm::Intrinsic::coro_frame: {
+ if (CurCoro.Data && CurCoro.Data->CoroBegin) {
+ return RValue::get(CurCoro.Data->CoroBegin);
+ }
+ CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin "
+ "has been used earlier in this function");
+ auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
+ return RValue::get(NullPtr);
+ }
// The following three intrinsics take a token parameter referring to a token
// returned by earlier call to @llvm.coro.id. Since we cannot represent it in
// builtins, we patch it up here.
@@ -94,6 +655,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_id has"
" been used earlier in this function");
// Fallthrough to the next case to add TokenNone as the first argument.
+ LLVM_FALLTHROUGH;
}
// @llvm.coro.suspend takes a token parameter. Add token 'none' as the first
// argument.
@@ -107,10 +669,22 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
llvm::Value *F = CGM.getIntrinsic(IID);
llvm::CallInst *Call = Builder.CreateCall(F, Args);
+ // Note: The following code is to enable to emit coro.id and coro.begin by
+ // hand to experiment with coroutines in C.
// If we see @llvm.coro.id remember it in the CoroData. We will update
// coro.alloc, coro.begin and coro.free intrinsics to refer to it.
if (IID == llvm::Intrinsic::coro_id) {
createCoroData(*this, CurCoro, Call, E);
}
+ else if (IID == llvm::Intrinsic::coro_begin) {
+ if (CurCoro.Data)
+ CurCoro.Data->CoroBegin = Call;
+ }
+ else if (IID == llvm::Intrinsic::coro_free) {
+ // Remember the last coro_free as we need it to build the conditional
+ // deletion of the coroutine frame.
+ if (CurCoro.Data)
+ CurCoro.Data->LastCoroFree = Call;
+ }
return RValue::get(Call);
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 12a6803..18b1d10 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -107,8 +107,8 @@ void ApplyDebugLocation::init(SourceLocation TemporaryLocation,
// Construct a location that has a valid scope, but no line info.
assert(!DI->LexicalBlockStack.empty());
- CGF->Builder.SetCurrentDebugLocation(
- llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back()));
+ CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
+ 0, 0, DI->LexicalBlockStack.back(), DI->getInlinedAt()));
}
ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E)
@@ -134,6 +134,30 @@ ApplyDebugLocation::~ApplyDebugLocation() {
CGF->Builder.SetCurrentDebugLocation(std::move(OriginalLocation));
}
+ApplyInlineDebugLocation::ApplyInlineDebugLocation(CodeGenFunction &CGF,
+ GlobalDecl InlinedFn)
+ : CGF(&CGF) {
+ if (!CGF.getDebugInfo()) {
+ this->CGF = nullptr;
+ return;
+ }
+ auto &DI = *CGF.getDebugInfo();
+ SavedLocation = DI.getLocation();
+ assert((DI.getInlinedAt() ==
+ CGF.Builder.getCurrentDebugLocation()->getInlinedAt()) &&
+ "CGDebugInfo and IRBuilder are out of sync");
+
+ DI.EmitInlineFunctionStart(CGF.Builder, InlinedFn);
+}
+
+ApplyInlineDebugLocation::~ApplyInlineDebugLocation() {
+ if (!CGF)
+ return;
+ auto &DI = *CGF->getDebugInfo();
+ DI.EmitInlineFunctionEnd(CGF->Builder);
+ DI.EmitLocation(CGF->Builder, SavedLocation);
+}
+
void CGDebugInfo::setLocation(SourceLocation Loc) {
// If the new location isn't valid return.
if (Loc.isInvalid())
@@ -185,7 +209,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
// Check namespace.
if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context))
- return getOrCreateNameSpace(NSDecl);
+ return getOrCreateNamespace(NSDecl);
if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
if (!RDecl->isDependentType())
@@ -249,8 +273,8 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
<< OC->getIdentifier()->getNameStart() << ')';
}
} else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) {
- OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '('
- << OCD->getIdentifier()->getNameStart() << ')';
+ OS << OCD->getClassInterface()->getName() << '('
+ << OCD->getName() << ')';
} else if (isa<ObjCProtocolDecl>(DC)) {
// We can extract the type of the class from the self pointer.
if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) {
@@ -504,12 +528,15 @@ void CGDebugInfo::CreateCompileUnit() {
// Create new compile unit.
// FIXME - Eliminate TheCU.
TheCU = DBuilder.createCompileUnit(
- LangTag, DBuilder.createFile(remapDIPath(MainFileName),
- remapDIPath(getCurrentDirname()), CSKind,
- Checksum),
+ LangTag,
+ DBuilder.createFile(remapDIPath(MainFileName),
+ remapDIPath(getCurrentDirname()), CSKind, Checksum),
Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers,
- CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */,
- CGM.getCodeGenOpts().SplitDwarfInlining);
+ CGM.getCodeGenOpts().EnableSplitDwarf
+ ? ""
+ : CGM.getCodeGenOpts().SplitDwarfFile,
+ EmissionKind, 0 /* DWOid */, CGM.getCodeGenOpts().SplitDwarfInlining,
+ CGM.getCodeGenOpts().DebugInfoForProfiling);
}
llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -581,8 +608,6 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return getOrCreateStructPtrType("opencl_clk_event_t", OCLClkEventDITy);
case BuiltinType::OCLQueue:
return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy);
- case BuiltinType::OCLNDRange:
- return getOrCreateStructPtrType("opencl_ndrange_t", OCLNDRangeDITy);
case BuiltinType::OCLReserveID:
return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy);
@@ -793,17 +818,19 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
// Bit size, align and offset of the type.
// Size is always the size of a pointer. We can't use getTypeSize here
// because that does not return the correct value for references.
- unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy);
- uint64_t Size = CGM.getTarget().getPointerWidth(AS);
+ unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy);
+ uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace);
auto Align = getTypeAlignIfRequired(Ty, CGM.getContext());
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(AddressSpace);
if (Tag == llvm::dwarf::DW_TAG_reference_type ||
Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit),
- Size, Align);
+ Size, Align, DWARFAddressSpace);
else
return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size,
- Align);
+ Align, DWARFAddressSpace);
}
llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
@@ -929,7 +956,7 @@ static unsigned getDwarfCC(CallingConv CC) {
return llvm::dwarf::DW_CC_BORLAND_pascal;
// FIXME: Create new DW_CC_ codes for these calling conventions.
- case CC_X86_64Win64:
+ case CC_Win64:
case CC_X86_64SysV:
case CC_AAPCS:
case CC_AAPCS_VFP:
@@ -1014,7 +1041,13 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl,
assert(SizeInBits > 0 && "found named 0-width bitfield");
uint64_t StorageOffsetInBits =
CGM.getContext().toBits(BitFieldInfo.StorageOffset);
- uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset;
+ uint64_t Offset = BitFieldInfo.Offset;
+ // The bit offsets for big endian machines are reversed for big
+ // endian target, compensate for that as the DIDerivedType requires
+ // un-reversed offsets.
+ if (CGM.getDataLayout().isBigEndian())
+ Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset;
+ uint64_t OffsetInBits = StorageOffsetInBits + Offset;
llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD);
return DBuilder.createBitFieldMemberType(
RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits,
@@ -1608,8 +1641,13 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy);
llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements);
unsigned Size = Context.getTypeSize(Context.VoidPtrTy);
+ unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
+
llvm::DIType *vtbl_ptr_type =
- DBuilder.createPointerType(SubTy, Size, 0, "__vtbl_ptr_type");
+ DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace,
+ "__vtbl_ptr_type");
VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size);
return VTablePtrType;
}
@@ -1648,10 +1686,14 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
unsigned VSlotCount =
VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData;
unsigned VTableWidth = PtrWidth * VSlotCount;
+ unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace();
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace);
// Create a very wide void* type and insert it directly in the element list.
llvm::DIType *VTableType =
- DBuilder.createPointerType(nullptr, VTableWidth, 0, "__vtbl_ptr_type");
+ DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace,
+ "__vtbl_ptr_type");
EltTys.push_back(VTableType);
// The vptr is a pointer to this special vtable type.
@@ -1714,7 +1756,27 @@ void CGDebugInfo::completeType(const RecordDecl *RD) {
completeRequiredType(RD);
}
+/// Return true if the class or any of its methods are marked dllimport.
+static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) {
+ if (RD->hasAttr<DLLImportAttr>())
+ return true;
+ for (const CXXMethodDecl *MD : RD->methods())
+ if (MD->hasAttr<DLLImportAttr>())
+ return true;
+ return false;
+}
+
void CGDebugInfo::completeClassData(const RecordDecl *RD) {
+ if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+ if (CXXRD->isDynamicClass() &&
+ CGM.getVTableLinkage(CXXRD) ==
+ llvm::GlobalValue::AvailableExternallyLinkage &&
+ !isClassOrMethodDLLImport(CXXRD))
+ return;
+ completeClass(RD);
+}
+
+void CGDebugInfo::completeClass(const RecordDecl *RD) {
if (DebugKind <= codegenoptions::DebugLineTablesOnly)
return;
QualType Ty = CGM.getContext().getRecordType(RD);
@@ -1760,22 +1822,16 @@ static bool isDefinedInClangModule(const RecordDecl *RD) {
return true;
}
-/// Return true if the class or any of its methods are marked dllimport.
-static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) {
- if (RD->hasAttr<DLLImportAttr>())
- return true;
- for (const CXXMethodDecl *MD : RD->methods())
- if (MD->hasAttr<DLLImportAttr>())
- return true;
- return false;
-}
-
static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind,
bool DebugTypeExtRefs, const RecordDecl *RD,
const LangOptions &LangOpts) {
if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition()))
return true;
+ if (auto *ES = RD->getASTContext().getExternalSource())
+ if (ES->hasExternalDefinitions(RD) == ExternalASTSource::EK_Always)
+ return true;
+
if (DebugKind > codegenoptions::LimitedDebugInfo)
return false;
@@ -2009,7 +2065,11 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
if (CreateSkeletonCU && IsRootModule) {
// PCH files don't have a signature field in the control block,
// but LLVM detects skeleton CUs by looking for a non-zero DWO id.
- uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL;
+ // We use the lower 64 bits for debug info.
+ uint64_t Signature =
+ Mod.getSignature()
+ ? (uint64_t)Mod.getSignature()[1] << 32 | Mod.getSignature()[0]
+ : ~1ULL;
llvm::DIBuilder DIB(CGM.getModule());
DIB.createCompileUnit(TheCU->getSourceLanguage(),
DIB.createFile(Mod.getModuleName(), Mod.getPath()),
@@ -2408,6 +2468,21 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
FullName);
}
+llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
+ unsigned MType, SourceLocation LineLoc,
+ StringRef Name, StringRef Value) {
+ unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc);
+ return DBuilder.createMacro(Parent, Line, MType, Name, Value);
+}
+
+llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent,
+ SourceLocation LineLoc,
+ SourceLocation FileLoc) {
+ llvm::DIFile *FName = getOrCreateFile(FileLoc);
+ unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc);
+ return DBuilder.createTempMacroFile(Parent, Line, FName);
+}
+
static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
Qualifiers Quals;
do {
@@ -2451,8 +2526,9 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
case Type::SubstTemplateTypeParm:
T = cast<SubstTemplateTypeParmType>(T)->getReplacementType();
break;
- case Type::Auto: {
- QualType DT = cast<AutoType>(T)->getDeducedType();
+ case Type::Auto:
+ case Type::DeducedTemplateSpecialization: {
+ QualType DT = cast<DeducedType>(T)->getDeducedType();
assert(!DT.isNull() && "Undeduced types shouldn't reach here.");
T = DT;
break;
@@ -2488,11 +2564,17 @@ void CGDebugInfo::completeTemplateDefinition(
const ClassTemplateSpecializationDecl &SD) {
if (DebugKind <= codegenoptions::DebugLineTablesOnly)
return;
+ completeUnusedClass(SD);
+}
- completeClassData(&SD);
+void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) {
+ if (DebugKind <= codegenoptions::DebugLineTablesOnly)
+ return;
+
+ completeClassData(&D);
// In case this type has no member function definitions being emitted, ensure
// it is retained
- RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr());
+ RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr());
}
llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
@@ -2537,7 +2619,7 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) {
// best to make this behavior a command line or debugger tuning
// option.
FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager());
- if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) {
+ if (Module *M = D->getOwningModule()) {
// This is a (sub-)module.
auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
return getOrCreateModuleRef(Info, /*SkeletonCU=*/false);
@@ -2618,6 +2700,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
case Type::Attributed:
case Type::Adjusted:
case Type::Decayed:
+ case Type::DeducedTemplateSpecialization:
case Type::Elaborated:
case Type::Paren:
case Type::SubstTemplateTypeParm:
@@ -2704,6 +2787,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
// them distinct if they are ODR-uniqued.
if (FullName.empty())
break;
+ LLVM_FALLTHROUGH;
case llvm::dwarf::DW_TAG_structure_type:
case llvm::dwarf::DW_TAG_union_type:
@@ -2774,16 +2858,17 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
}
// No need to replicate the linkage name if it isn't different from the
// subprogram name, no need to have it at all unless coverage is enabled or
- // debug is set to more than just line tables.
+ // debug is set to more than just line tables or extra debug info is needed.
if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs &&
!CGM.getCodeGenOpts().EmitGcovNotes &&
+ !CGM.getCodeGenOpts().DebugInfoForProfiling &&
DebugKind <= codegenoptions::DebugLineTablesOnly))
LinkageName = StringRef();
if (DebugKind >= codegenoptions::LimitedDebugInfo) {
if (const NamespaceDecl *NSDecl =
dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
- FDContext = getOrCreateNameSpace(NSDecl);
+ FDContext = getOrCreateNamespace(NSDecl);
else if (const RecordDecl *RDecl =
dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
llvm::DIScope *Mod = getParentModuleOrNull(RDecl);
@@ -2844,28 +2929,40 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
}
-llvm::DISubprogram *
-CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
+llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
+ bool Stub) {
llvm::DINodeArray TParamsArray;
StringRef Name, LinkageName;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
- SourceLocation Loc = FD->getLocation();
+ SourceLocation Loc = GD.getDecl()->getLocation();
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
unsigned Line = getLineNumber(Loc);
-
- collectFunctionDeclProps(FD, Unit, Name, LinkageName, DContext,
+ collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext,
TParamsArray, Flags);
+ auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
+
// Build function type.
SmallVector<QualType, 16> ArgTypes;
- for (const ParmVarDecl *Parm: FD->parameters())
- ArgTypes.push_back(Parm->getType());
+ if (FD)
+ for (const ParmVarDecl *Parm : FD->parameters())
+ ArgTypes.push_back(Parm->getType());
CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
QualType FnType = CGM.getContext().getFunctionType(
FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
+ if (Stub) {
+ return DBuilder.createFunction(
+ DContext, Name, LinkageName, Unit, Line,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
+ !FD->isExternallyVisible(),
+ /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize,
+ TParamsArray.get(), getFunctionDeclaration(FD));
+ }
+
llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(),
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
+ !FD->isExternallyVisible(),
/* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
TParamsArray.get(), getFunctionDeclaration(FD));
const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
@@ -2875,6 +2972,16 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
return SP;
}
+llvm::DISubprogram *
+CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) {
+ return getFunctionFwdDeclOrStub(GD, /* Stub = */ false);
+}
+
+llvm::DISubprogram *
+CGDebugInfo::getFunctionStub(GlobalDecl GD) {
+ return getFunctionFwdDeclOrStub(GD, /* Stub = */ true);
+}
+
llvm::DIGlobalVariable *
CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) {
QualType T;
@@ -3146,6 +3253,27 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
TParamsArray.get(), getFunctionDeclaration(D)));
}
+void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+ // If there is a subprogram for this function available then use it.
+ auto FI = SPCache.find(FD->getCanonicalDecl());
+ llvm::DISubprogram *SP = nullptr;
+ if (FI != SPCache.end())
+ SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second);
+ if (!SP || !SP->isDefinition())
+ SP = getFunctionStub(GD);
+ FnBeginRegionCount.push_back(LexicalBlockStack.size());
+ LexicalBlockStack.emplace_back(SP);
+ setInlinedAt(Builder.getCurrentDebugLocation());
+ EmitLocation(Builder, FD->getLocation());
+}
+
+void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) {
+ assert(CurInlinedAt && "unbalanced inline scope stack");
+ EmitFunctionEnd(Builder, nullptr);
+ setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt());
+}
+
void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
// Update our current location
setLocation(Loc);
@@ -3155,7 +3283,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
llvm::MDNode *Scope = LexicalBlockStack.back();
Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
- getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope));
+ getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope, CurInlinedAt));
}
void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
@@ -3167,14 +3295,29 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
getColumnNumber(CurLoc)));
}
+void CGDebugInfo::AppendAddressSpaceXDeref(
+ unsigned AddressSpace,
+ SmallVectorImpl<int64_t> &Expr) const {
+ Optional<unsigned> DWARFAddressSpace =
+ CGM.getTarget().getDWARFAddressSpace(AddressSpace);
+ if (!DWARFAddressSpace)
+ return;
+
+ Expr.push_back(llvm::dwarf::DW_OP_constu);
+ Expr.push_back(DWARFAddressSpace.getValue());
+ Expr.push_back(llvm::dwarf::DW_OP_swap);
+ Expr.push_back(llvm::dwarf::DW_OP_xderef);
+}
+
void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder,
SourceLocation Loc) {
// Set our current location.
setLocation(Loc);
// Emit a line table change for the current location inside the new scope.
- Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
- getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back()));
+ Builder.SetCurrentDebugLocation(
+ llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc),
+ LexicalBlockStack.back(), CurInlinedAt));
if (DebugKind <= codegenoptions::DebugLineTablesOnly)
return;
@@ -3196,7 +3339,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder,
LexicalBlockStack.pop_back();
}
-void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) {
+void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) {
assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
unsigned RCount = FnBeginRegionCount.back();
assert(RCount <= LexicalBlockStack.size() && "Region stack mismatch");
@@ -3208,6 +3351,9 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) {
LexicalBlockStack.pop_back();
}
FnBeginRegionCount.pop_back();
+
+ if (Fn && Fn->getSubprogram())
+ DBuilder.finalizeSubprogram(Fn->getSubprogram());
}
llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
@@ -3316,56 +3462,45 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
Line = getLineNumber(VD->getLocation());
Column = getColumnNumber(VD->getLocation());
}
- SmallVector<int64_t, 9> Expr;
+ SmallVector<int64_t, 13> Expr;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
if (VD->isImplicit())
Flags |= llvm::DINode::FlagArtificial;
auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
- // If this is the first argument and it is implicit then
- // give it an object pointer flag.
- // FIXME: There has to be a better way to do this, but for static
- // functions there won't be an implicit param at arg1 and
- // otherwise it is 'self' or 'this'.
- if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1)
- Flags |= llvm::DINode::FlagObjectPointer;
- if (auto *Arg = dyn_cast<llvm::Argument>(Storage))
- if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() &&
- !VD->getType()->isPointerType())
- Expr.push_back(llvm::dwarf::DW_OP_deref);
+ unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType());
+ AppendAddressSpaceXDeref(AddressSpace, Expr);
- auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
+ // If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an
+ // object pointer flag.
+ if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) {
+ if (IPD->getParameterKind() == ImplicitParamDecl::CXXThis ||
+ IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
+ Flags |= llvm::DINode::FlagObjectPointer;
+ }
+ // Note: Older versions of clang used to emit byval references with an extra
+ // DW_OP_deref, because they referenced the IR arg directly instead of
+ // referencing an alloca. Newer versions of LLVM don't treat allocas
+ // differently from other function arguments when used in a dbg.declare.
+ auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
StringRef Name = VD->getName();
if (!Name.empty()) {
if (VD->hasAttr<BlocksAttr>()) {
+ // Here, we need an offset *into* the alloca.
CharUnits offset = CharUnits::fromQuantity(32);
- Expr.push_back(llvm::dwarf::DW_OP_plus);
+ Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
// offset of __forwarding field
offset = CGM.getContext().toCharUnitsFromBits(
CGM.getTarget().getPointerWidth(0));
Expr.push_back(offset.getQuantity());
Expr.push_back(llvm::dwarf::DW_OP_deref);
- Expr.push_back(llvm::dwarf::DW_OP_plus);
+ Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
// offset of x field
offset = CGM.getContext().toCharUnitsFromBits(XOffset);
Expr.push_back(offset.getQuantity());
-
- // Create the descriptor for the variable.
- auto *D = ArgNo
- ? DBuilder.createParameterVariable(Scope, VD->getName(),
- *ArgNo, Unit, Line, Ty)
- : DBuilder.createAutoVariable(Scope, VD->getName(), Unit,
- Line, Ty, Align);
-
- // Insert an llvm.dbg.declare into the current block.
- DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
- llvm::DebugLoc::get(Line, Column, Scope),
- Builder.GetInsertBlock());
- return;
- } else if (isa<VariableArrayType>(VD->getType()))
- Expr.push_back(llvm::dwarf::DW_OP_deref);
+ }
} else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
// If VD is an anonymous union then Storage represents value for
// all union fields.
@@ -3393,9 +3528,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
Flags | llvm::DINode::FlagArtificial, FieldAlign);
// Insert an llvm.dbg.declare into the current block.
- DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
- llvm::DebugLoc::get(Line, Column, Scope),
- Builder.GetInsertBlock());
+ DBuilder.insertDeclare(
+ Storage, D, DBuilder.createExpression(Expr),
+ llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
+ Builder.GetInsertBlock());
}
}
}
@@ -3411,7 +3547,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
- llvm::DebugLoc::get(Line, Column, Scope),
+ llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt),
Builder.GetInsertBlock());
}
@@ -3453,8 +3589,9 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
// Self is passed along as an implicit non-arg variable in a
// block. Mark it as the object pointer.
- if (isa<ImplicitParamDecl>(VD) && VD->getName() == "self")
- Ty = CreateSelfType(VD->getType(), Ty);
+ if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD))
+ if (IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
+ Ty = CreateSelfType(VD->getType(), Ty);
// Get location information.
unsigned Line = getLineNumber(VD->getLocation());
@@ -3467,19 +3604,18 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
->getElementOffset(blockInfo.getCapture(VD).getIndex()));
SmallVector<int64_t, 9> addr;
- if (isa<llvm::AllocaInst>(Storage))
- addr.push_back(llvm::dwarf::DW_OP_deref);
- addr.push_back(llvm::dwarf::DW_OP_plus);
+ addr.push_back(llvm::dwarf::DW_OP_deref);
+ addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
addr.push_back(offset.getQuantity());
if (isByRef) {
addr.push_back(llvm::dwarf::DW_OP_deref);
- addr.push_back(llvm::dwarf::DW_OP_plus);
+ addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
// offset of __forwarding field
offset =
CGM.getContext().toCharUnitsFromBits(target.getPointerSizeInBits(0));
addr.push_back(offset.getQuantity());
addr.push_back(llvm::dwarf::DW_OP_deref);
- addr.push_back(llvm::dwarf::DW_OP_plus);
+ addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
// offset of x field
offset = CGM.getContext().toCharUnitsFromBits(XOffset);
addr.push_back(offset.getQuantity());
@@ -3492,13 +3628,13 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
Line, Ty, false, llvm::DINode::FlagZero, Align);
// Insert an llvm.dbg.declare into the current block.
- auto DL = llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back());
+ auto DL =
+ llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back(), CurInlinedAt);
+ auto *Expr = DBuilder.createExpression(addr);
if (InsertPoint)
- DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL,
- InsertPoint);
+ DBuilder.insertDeclare(Storage, D, Expr, DL, InsertPoint);
else
- DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL,
- Builder.GetInsertBlock());
+ DBuilder.insertDeclare(Storage, D, Expr, DL, Builder.GetInsertBlock());
}
void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI,
@@ -3660,12 +3796,13 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
// Insert an llvm.dbg.value into the current block.
DBuilder.insertDbgValueIntrinsic(
LocalAddr, 0, debugVar, DBuilder.createExpression(),
- llvm::DebugLoc::get(line, column, scope), Builder.GetInsertBlock());
+ llvm::DebugLoc::get(line, column, scope, CurInlinedAt),
+ Builder.GetInsertBlock());
}
// Insert an llvm.dbg.declare into the current block.
DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(),
- llvm::DebugLoc::get(line, column, scope),
+ llvm::DebugLoc::get(line, column, scope, CurInlinedAt),
Builder.GetInsertBlock());
}
@@ -3747,9 +3884,16 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
GVE = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext);
} else {
auto Align = getDeclAlignIfRequired(D, CGM.getContext());
+
+ SmallVector<int64_t, 4> Expr;
+ unsigned AddressSpace =
+ CGM.getContext().getTargetAddressSpace(D->getType());
+ AppendAddressSpaceXDeref(AddressSpace, Expr);
+
GVE = DBuilder.createGlobalVariableExpression(
DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
- Var->hasLocalLinkage(), /*Expr=*/nullptr,
+ Var->hasLocalLinkage(),
+ Expr.empty() ? nullptr : DBuilder.createExpression(Expr),
getOrCreateStaticDataMemberDeclarationOrNull(D), Align);
Var->addDebugInfo(GVE);
}
@@ -3826,10 +3970,10 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
const NamespaceDecl *NSDecl = UD.getNominatedNamespace();
if (!NSDecl->isAnonymousNamespace() ||
CGM.getCodeGenOpts().DebugExplicitImport) {
+ auto Loc = UD.getLocation();
DBuilder.createImportedModule(
getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())),
- getOrCreateNameSpace(NSDecl),
- getLineNumber(UD.getLocation()));
+ getOrCreateNamespace(NSDecl), getOrCreateFile(Loc), getLineNumber(Loc));
}
}
@@ -3852,10 +3996,12 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
if (AT->getDeducedType().isNull())
return;
if (llvm::DINode *Target =
- getDeclarationOrDefinition(USD.getUnderlyingDecl()))
+ getDeclarationOrDefinition(USD.getUnderlyingDecl())) {
+ auto Loc = USD.getLocation();
DBuilder.createImportedDeclaration(
getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target,
- getLineNumber(USD.getLocation()));
+ getOrCreateFile(Loc), getLineNumber(Loc));
+ }
}
void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
@@ -3863,10 +4009,11 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
return;
if (Module *M = ID.getImportedModule()) {
auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
+ auto Loc = ID.getLocation();
DBuilder.createImportedDeclaration(
getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())),
- getOrCreateModuleRef(Info, DebugTypeExtRefs),
- getLineNumber(ID.getLocation()));
+ getOrCreateModuleRef(Info, DebugTypeExtRefs), getOrCreateFile(Loc),
+ getLineNumber(Loc));
}
}
@@ -3878,35 +4025,37 @@ CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
if (VH)
return cast<llvm::DIImportedEntity>(VH);
llvm::DIImportedEntity *R;
+ auto Loc = NA.getLocation();
if (const auto *Underlying =
dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
// This could cache & dedup here rather than relying on metadata deduping.
R = DBuilder.createImportedDeclaration(
getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
- EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()),
- NA.getName());
+ EmitNamespaceAlias(*Underlying), getOrCreateFile(Loc),
+ getLineNumber(Loc), NA.getName());
else
R = DBuilder.createImportedDeclaration(
getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
- getOrCreateNameSpace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
- getLineNumber(NA.getLocation()), NA.getName());
+ getOrCreateNamespace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
+ getOrCreateFile(Loc), getLineNumber(Loc), NA.getName());
VH.reset(R);
return R;
}
llvm::DINamespace *
-CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) {
- NSDecl = NSDecl->getCanonicalDecl();
- auto I = NameSpaceCache.find(NSDecl);
- if (I != NameSpaceCache.end())
+CGDebugInfo::getOrCreateNamespace(const NamespaceDecl *NSDecl) {
+ // Don't canonicalize the NamespaceDecl here: The DINamespace will be uniqued
+ // if necessary, and this way multiple declarations of the same namespace in
+ // different parent modules stay distinct.
+ auto I = NamespaceCache.find(NSDecl);
+ if (I != NamespaceCache.end())
return cast<llvm::DINamespace>(I->second);
- unsigned LineNo = getLineNumber(NSDecl->getLocation());
- llvm::DIFile *FileD = getOrCreateFile(NSDecl->getLocation());
llvm::DIScope *Context = getDeclContextDescriptor(NSDecl);
- llvm::DINamespace *NS = DBuilder.createNameSpace(
- Context, NSDecl->getName(), FileD, LineNo, NSDecl->isInline());
- NameSpaceCache[NSDecl].reset(NS);
+ // Don't trust the context if it is a DIModule (see comment above).
+ llvm::DINamespace *NS =
+ DBuilder.createNameSpace(Context, NSDecl->getName(), NSDecl->isInline());
+ NamespaceCache[NSDecl].reset(NS);
return NS;
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
index ac2e8dd..39249c7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
@@ -61,6 +61,7 @@ class CGDebugInfo {
ModuleMap *ClangModuleMap = nullptr;
ExternalASTSource::ASTSourceDescriptor PCHDescriptor;
SourceLocation CurLoc;
+ llvm::MDNode *CurInlinedAt = nullptr;
llvm::DIType *VTablePtrType = nullptr;
llvm::DIType *ClassTy = nullptr;
llvm::DICompositeType *ObjTy = nullptr;
@@ -124,7 +125,7 @@ class CGDebugInfo {
/// Cache declarations relevant to DW_TAG_imported_declarations (C++
/// using declarations) that aren't covered by other more specific caches.
llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache;
- llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NameSpaceCache;
+ llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NamespaceCache;
llvm::DenseMap<const NamespaceAliasDecl *, llvm::TrackingMDRef>
NamespaceAliasCache;
llvm::DenseMap<const Decl *, llvm::TypedTrackingMDRef<llvm::DIDerivedType>>
@@ -193,8 +194,9 @@ class CGDebugInfo {
getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F);
/// \return debug info descriptor for vtable.
llvm::DIType *getOrCreateVTablePtrType(llvm::DIFile *F);
+
/// \return namespace descriptor for the given namespace decl.
- llvm::DINamespace *getOrCreateNameSpace(const NamespaceDecl *N);
+ llvm::DINamespace *getOrCreateNamespace(const NamespaceDecl *N);
llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty,
QualType PointeeTy, llvm::DIFile *F);
llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache);
@@ -292,6 +294,15 @@ class CGDebugInfo {
/// Create a new lexical block node and push it on the stack.
void CreateLexicalBlock(SourceLocation Loc);
+ /// If target-specific LLVM \p AddressSpace directly maps to target-specific
+ /// DWARF address space, appends extended dereferencing mechanism to complex
+ /// expression \p Expr. Otherwise, does nothing.
+ ///
+ /// Extended dereferencing mechanism is has the following format:
+ /// DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ void AppendAddressSpaceXDeref(unsigned AddressSpace,
+ SmallVectorImpl<int64_t> &Expr) const;
+
public:
CGDebugInfo(CodeGenModule &CGM);
~CGDebugInfo();
@@ -320,6 +331,17 @@ public:
/// ignored.
void setLocation(SourceLocation Loc);
+ /// Return the current source location. This does not necessarily correspond
+ /// to the IRBuilder's current DebugLoc.
+ SourceLocation getLocation() const { return CurLoc; }
+
+ /// Update the current inline scope. All subsequent calls to \p EmitLocation
+ /// will create a location with this inlinedAt field.
+ void setInlinedAt(llvm::MDNode *InlinedAt) { CurInlinedAt = InlinedAt; }
+
+ /// \return the current inline scope.
+ llvm::MDNode *getInlinedAt() const { return CurInlinedAt; }
+
// Converts a SourceLocation to a DebugLoc
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Loc);
@@ -336,11 +358,16 @@ public:
SourceLocation ScopeLoc, QualType FnType,
llvm::Function *Fn, CGBuilderTy &Builder);
+ /// Start a new scope for an inlined function.
+ void EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD);
+ /// End an inlined function scope.
+ void EmitInlineFunctionEnd(CGBuilderTy &Builder);
+
/// Emit debug info for a function declaration.
void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType);
/// Constructs the debug code for exiting a function.
- void EmitFunctionEnd(CGBuilderTy &Builder);
+ void EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn);
/// Emit metadata to indicate the beginning of a new lexical block
/// and push the block onto the stack.
@@ -409,9 +436,21 @@ public:
void completeType(const RecordDecl *RD);
void completeRequiredType(const RecordDecl *RD);
void completeClassData(const RecordDecl *RD);
+ void completeClass(const RecordDecl *RD);
void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD);
-
+ void completeUnusedClass(const CXXRecordDecl &D);
+
+ /// Create debug info for a macro defined by a #define directive or a macro
+ /// undefined by a #undef directive.
+ llvm::DIMacro *CreateMacro(llvm::DIMacroFile *Parent, unsigned MType,
+ SourceLocation LineLoc, StringRef Name,
+ StringRef Value);
+
+ /// Create debug info for a file referenced by an #include directive.
+ llvm::DIMacroFile *CreateTempMacroFile(llvm::DIMacroFile *Parent,
+ SourceLocation LineLoc,
+ SourceLocation FileLoc);
private:
/// Emit call to llvm.dbg.declare for a variable declaration.
void EmitDeclare(const VarDecl *decl, llvm::Value *AI,
@@ -491,11 +530,18 @@ private:
llvm::DIDerivedType *
getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D);
+ /// Helper that either creates a forward declaration or a stub.
+ llvm::DISubprogram *getFunctionFwdDeclOrStub(GlobalDecl GD, bool Stub);
+
/// Create a subprogram describing the forward declaration
- /// represented in the given FunctionDecl.
- llvm::DISubprogram *getFunctionForwardDeclaration(const FunctionDecl *FD);
+ /// represented in the given FunctionDecl wrapped in a GlobalDecl.
+ llvm::DISubprogram *getFunctionForwardDeclaration(GlobalDecl GD);
+
+ /// Create a DISubprogram describing the function
+ /// represented in the given FunctionDecl wrapped in a GlobalDecl.
+ llvm::DISubprogram *getFunctionStub(GlobalDecl GD);
- /// Create a global variable describing the forward decalration
+ /// Create a global variable describing the forward declaration
/// represented in the given VarDecl.
llvm::DIGlobalVariable *
getGlobalVariableForwardDeclaration(const VarDecl *VD);
@@ -622,6 +668,20 @@ public:
};
+/// A scoped helper to set the current debug location to an inlined location.
+class ApplyInlineDebugLocation {
+ SourceLocation SavedLocation;
+ CodeGenFunction *CGF;
+
+public:
+ /// Set up the CodeGenFunction's DebugInfo to produce inline locations for the
+ /// function \p InlinedFn. The current debug location becomes the inlined call
+ /// site of the inlined function.
+ ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn);
+ /// Restore everything back to the orginial state.
+ ~ApplyInlineDebugLocation();
+};
+
} // namespace CodeGen
} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
index 0a88b23..2351786 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "CodeGenFunction.h"
#include "CGBlocks.h"
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGDebugInfo.h"
#include "CGOpenCLRuntime.h"
#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/Decl.h"
@@ -50,6 +51,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::TemplateTypeParm:
case Decl::UnresolvedUsingValue:
case Decl::NonTypeTemplateParm:
+ case Decl::CXXDeductionGuide:
case Decl::CXXMethod:
case Decl::CXXConstructor:
case Decl::CXXDestructor:
@@ -151,7 +153,14 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
/// EmitVarDecl - This method handles emission of any variable declaration
/// inside a function, including static vars etc.
void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
- if (D.isStaticLocal()) {
+ if (D.hasExternalStorage())
+ // Don't emit it now, allow it to be emitted lazily on its first use.
+ return;
+
+ // Some function-scope variable does not have static storage but still
+ // needs to be emitted like a static variable, e.g. a function-scope
+ // variable in constant address space in OpenCL.
+ if (D.getStorageDuration() != SD_Automatic) {
llvm::GlobalValue::LinkageTypes Linkage =
CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false);
@@ -162,10 +171,6 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
return EmitStaticVarDecl(D, Linkage);
}
- if (D.hasExternalStorage())
- // Don't emit it now, allow it to be emitted lazily on its first use.
- return;
-
if (D.getType().getAddressSpace() == LangAS::opencl_local)
return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D);
@@ -216,8 +221,8 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
Name = getStaticDeclName(*this, D);
llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty);
- unsigned AddrSpace =
- GetGlobalVarAddressSpace(&D, getContext().getTargetAddressSpace(Ty));
+ unsigned AS = GetGlobalVarAddressSpace(&D);
+ unsigned TargetAS = getContext().getTargetAddressSpace(AS);
// Local address space cannot have an initializer.
llvm::Constant *Init = nullptr;
@@ -226,12 +231,9 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
else
Init = llvm::UndefValue::get(LTy);
- llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(getModule(), LTy,
- Ty.isConstant(getContext()), Linkage,
- Init, Name, nullptr,
- llvm::GlobalVariable::NotThreadLocal,
- AddrSpace);
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name,
+ nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
setGlobalVisibility(GV, &D);
@@ -249,11 +251,12 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
}
// Make sure the result is of the correct type.
- unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(Ty);
+ unsigned ExpectedAS = Ty.getAddressSpace();
llvm::Constant *Addr = GV;
- if (AddrSpace != ExpectedAddrSpace) {
- llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace);
- Addr = llvm::ConstantExpr::getAddrSpaceCast(GV, PTy);
+ if (AS != ExpectedAS) {
+ Addr = getTargetCodeGenInfo().performAddrSpaceCast(
+ *this, GV, AS, ExpectedAS,
+ LTy->getPointerTo(getContext().getTargetAddressSpace(ExpectedAS)));
}
setStaticLocalDeclAddress(&D, Addr);
@@ -401,6 +404,13 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
if (D.hasAttr<AnnotateAttr>())
CGM.AddGlobalAnnotations(&D, var);
+ if (auto *SA = D.getAttr<PragmaClangBSSSectionAttr>())
+ var->addAttribute("bss-section", SA->getName());
+ if (auto *SA = D.getAttr<PragmaClangDataSectionAttr>())
+ var->addAttribute("data-section", SA->getName());
+ if (auto *SA = D.getAttr<PragmaClangRodataSectionAttr>())
+ var->addAttribute("rodata-section", SA->getName());
+
if (const SectionAttr *SA = D.getAttr<SectionAttr>())
var->setSection(SA->getName());
@@ -671,6 +681,27 @@ static void drillIntoBlockVariable(CodeGenFunction &CGF,
lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var));
}
+void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS,
+ SourceLocation Loc) {
+ if (!SanOpts.has(SanitizerKind::NullabilityAssign))
+ return;
+
+ auto Nullability = LHS.getType()->getNullability(getContext());
+ if (!Nullability || *Nullability != NullabilityKind::NonNull)
+ return;
+
+ // Check if the right hand side of the assignment is nonnull, if the left
+ // hand side must be nonnull.
+ SanitizerScope SanScope(this);
+ llvm::Value *IsNotNull = Builder.CreateIsNotNull(RHS);
+ llvm::Constant *StaticData[] = {
+ EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(LHS.getType()),
+ llvm::ConstantInt::get(Int8Ty, 0), // The LogAlignment info is unused.
+ llvm::ConstantInt::get(Int8Ty, TCK_NonnullAssign)};
+ EmitCheck({{IsNotNull, SanitizerKind::NullabilityAssign}},
+ SanitizerHandler::TypeMismatch, StaticData, RHS);
+}
+
void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
LValue lvalue, bool capturedByInit) {
Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
@@ -678,6 +709,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
llvm::Value *value = EmitScalarExpr(init);
if (capturedByInit)
drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
+ EmitNullabilityCheck(lvalue, value, init->getExprLoc());
EmitStoreThroughLValue(RValue::get(value), lvalue, true);
return;
}
@@ -766,6 +798,8 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
+ EmitNullabilityCheck(lvalue, value, init->getExprLoc());
+
// If the variable might have been accessed by its initializer, we
// might have to initialize with a barrier. We have to do this for
// both __weak and __strong, but __weak got filtered out above.
@@ -899,7 +933,7 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
return nullptr;
llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size);
- Addr = Builder.CreateBitCast(Addr, Int8PtrTy);
+ Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr});
C->setDoesNotThrow();
@@ -907,7 +941,7 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
}
void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
- Addr = Builder.CreateBitCast(Addr, Int8PtrTy);
+ Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr});
C->setDoesNotThrow();
@@ -918,6 +952,7 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
CodeGenFunction::AutoVarEmission
CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
QualType Ty = D.getType();
+ assert(Ty.getAddressSpace() == LangAS::Default);
AutoVarEmission emission(D);
@@ -1010,8 +1045,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Create the alloca. Note that we set the name separately from
// building the instruction so that it's there even in no-asserts
// builds.
- address = CreateTempAlloca(allocaTy, allocaAlignment);
- address.getPointer()->setName(D.getName());
+ address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName());
// Don't emit lifetime markers for MSVC catch parameters. The lifetime of
// the catch parameter starts in the catchpad instruction, and we can't
@@ -1022,11 +1056,21 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Emit a lifetime intrinsic if meaningful. There's no point in doing this
// if we don't have a valid insertion point (?).
if (HaveInsertPoint() && !IsMSCatchParam) {
- // goto or switch-case statements can break lifetime into several
- // regions which need more efforts to handle them correctly. PR28267
- // This is rare case, but it's better just omit intrinsics than have
- // them incorrectly placed.
- if (!Bypasses.IsBypassed(&D)) {
+ // If there's a jump into the lifetime of this variable, its lifetime
+ // gets broken up into several regions in IR, which requires more work
+ // to handle correctly. For now, just omit the intrinsics; this is a
+ // rare case, and it's better to just be conservatively correct.
+ // PR28267.
+ //
+ // We have to do this in all language modes if there's a jump past the
+ // declaration. We also have to do it in C if there's a jump to an
+ // earlier point in the current block because non-VLA lifetimes begin as
+ // soon as the containing block is entered, not when its variables
+ // actually come into scope; suppressing the lifetime annotations
+ // completely in this case is unnecessarily pessimistic, but again, this
+ // is rare.
+ if (!Bypasses.IsBypassed(&D) &&
+ !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) {
uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy);
emission.SizeForLifetimeMarkers =
EmitLifetimeStart(size, address.getPointer());
@@ -1061,10 +1105,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
llvm::Type *llvmTy = ConvertTypeForMem(elementType);
// Allocate memory for the array.
- llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
- vla->setAlignment(alignment.getQuantity());
-
- address = Address(vla, alignment);
+ address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
}
setAddrOfLocalVar(&D, address);
@@ -1083,6 +1124,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
if (D.hasAttr<AnnotateAttr>())
EmitVarAnnotations(&D, address.getPointer());
+ // Make sure we call @llvm.lifetime.end.
+ if (emission.useLifetimeMarkers())
+ EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
+ emission.getAllocatedAddress(),
+ emission.getSizeForLifetimeMarkers());
+
return emission;
}
@@ -1373,13 +1420,6 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
const VarDecl &D = *emission.Variable;
- // Make sure we call @llvm.lifetime.end. This needs to happen
- // *last*, so the cleanup needs to be pushed *first*.
- if (emission.useLifetimeMarkers())
- EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
- emission.getAllocatedAddress(),
- emission.getSizeForLifetimeMarkers());
-
// Check the type for a cleanup.
if (QualType::DestructionKind dtorKind = D.getType().isDestructedType())
emitAutoVarTypeCleanup(emission, dtorKind);
@@ -1691,17 +1731,19 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin,
/// Lazily declare the @llvm.lifetime.start intrinsic.
llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() {
- if (LifetimeStartFn) return LifetimeStartFn;
+ if (LifetimeStartFn)
+ return LifetimeStartFn;
LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(),
- llvm::Intrinsic::lifetime_start);
+ llvm::Intrinsic::lifetime_start, AllocaInt8PtrTy);
return LifetimeStartFn;
}
/// Lazily declare the @llvm.lifetime.end intrinsic.
llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() {
- if (LifetimeEndFn) return LifetimeEndFn;
+ if (LifetimeEndFn)
+ return LifetimeEndFn;
LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(),
- llvm::Intrinsic::lifetime_end);
+ llvm::Intrinsic::lifetime_end, AllocaInt8PtrTy);
return LifetimeEndFn;
}
@@ -1816,6 +1858,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
lt = Qualifiers::OCL_ExplicitNone;
}
+ // Load objects passed indirectly.
+ if (Arg.isIndirect() && !ArgVal)
+ ArgVal = Builder.CreateLoad(DeclPtr);
+
if (lt == Qualifiers::OCL_Strong) {
if (!isConsumed) {
if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
@@ -1869,6 +1915,19 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
if (D.hasAttr<AnnotateAttr>())
EmitVarAnnotations(&D, DeclPtr.getPointer());
+
+ // We can only check return value nullability if all arguments to the
+ // function satisfy their nullability preconditions. This makes it necessary
+ // to emit null checks for args in the function body itself.
+ if (requiresReturnValueNullabilityCheck()) {
+ auto Nullability = Ty->getNullability(getContext());
+ if (Nullability && *Nullability == NullabilityKind::NonNull) {
+ SanitizerScope SanScope(this);
+ RetValNullabilityPrecondition =
+ Builder.CreateAnd(RetValNullabilityPrecondition,
+ Builder.CreateIsNotNull(Arg.getAnyValue()));
+ }
+ }
}
void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
index f56e182..d8768be 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -237,7 +237,7 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
llvm::FunctionType::get(IntTy, dtorStub->getType(), false);
llvm::Constant *atexit =
- CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeSet(),
+ CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(),
/*Local=*/true);
if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit))
atexitFn->setDoesNotThrow();
@@ -571,9 +571,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
FinishFunction();
}
-void CodeGenFunction::GenerateCXXGlobalDtorsFunc(llvm::Function *Fn,
- const std::vector<std::pair<llvm::WeakVH, llvm::Constant*> >
- &DtorsAndObjects) {
+void CodeGenFunction::GenerateCXXGlobalDtorsFunc(
+ llvm::Function *Fn,
+ const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>>
+ &DtorsAndObjects) {
{
auto NL = ApplyDebugLocation::CreateEmpty(*this);
StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -602,9 +603,9 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
Address addr, QualType type, Destroyer *destroyer,
bool useEHCleanupForArray, const VarDecl *VD) {
FunctionArgList args;
- ImplicitParamDecl dst(getContext(), nullptr, SourceLocation(), nullptr,
- getContext().VoidPtrTy);
- args.push_back(&dst);
+ ImplicitParamDecl Dst(getContext(), getContext().VoidPtrTy,
+ ImplicitParamDecl::Other);
+ args.push_back(&Dst);
const CGFunctionInfo &FI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
index f908bf2..40ae092 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
@@ -180,8 +180,8 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
// The GCC runtime's personality function inherently doesn't support
// mixed EH. Use the C++ personality just to avoid returning null.
case ObjCRuntime::GCC:
- case ObjCRuntime::ObjFW: // XXX: this will change soon
- return EHPersonality::GNU_ObjC;
+ case ObjCRuntime::ObjFW:
+ return getObjCPersonality(T, L);
case ObjCRuntime::GNUstep:
return EHPersonality::GNU_ObjCXX;
}
@@ -231,7 +231,7 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM,
const EHPersonality &Personality) {
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true),
Personality.PersonalityFn,
- llvm::AttributeSet(), /*Local=*/true);
+ llvm::AttributeList(), /*Local=*/true);
}
static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
@@ -765,8 +765,8 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
llvm::BasicBlock *lpad = createBasicBlock("lpad");
EmitBlock(lpad);
- llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad(
- llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0);
+ llvm::LandingPadInst *LPadInst =
+ Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0);
llvm::Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0);
Builder.CreateStore(LPadExn, getExceptionSlot());
@@ -1310,8 +1310,8 @@ llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() {
if (!CurFn->hasPersonalityFn())
CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, Personality));
- llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad(
- llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0);
+ llvm::LandingPadInst *LPadInst =
+ Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0);
LPadInst->addClause(getCatchAllValue(*this));
llvm::Value *Exn = nullptr;
@@ -1387,8 +1387,7 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
llvm::Value *Exn = getExceptionFromSlot();
llvm::Value *Sel = getSelectorFromSlot();
- llvm::Type *LPadType = llvm::StructType::get(Exn->getType(),
- Sel->getType(), nullptr);
+ llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), Sel->getType());
llvm::Value *LPadVal = llvm::UndefValue::get(LPadType);
LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val");
LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val");
@@ -1650,39 +1649,29 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
// parameters. Win32 filters take no parameters.
if (IsFilter) {
Args.push_back(ImplicitParamDecl::Create(
- getContext(), nullptr, StartLoc,
+ getContext(), /*DC=*/nullptr, StartLoc,
&getContext().Idents.get("exception_pointers"),
- getContext().VoidPtrTy));
+ getContext().VoidPtrTy, ImplicitParamDecl::Other));
} else {
Args.push_back(ImplicitParamDecl::Create(
- getContext(), nullptr, StartLoc,
+ getContext(), /*DC=*/nullptr, StartLoc,
&getContext().Idents.get("abnormal_termination"),
- getContext().UnsignedCharTy));
+ getContext().UnsignedCharTy, ImplicitParamDecl::Other));
}
Args.push_back(ImplicitParamDecl::Create(
- getContext(), nullptr, StartLoc,
- &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy));
+ getContext(), /*DC=*/nullptr, StartLoc,
+ &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy,
+ ImplicitParamDecl::Other));
}
QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy;
- llvm::Function *ParentFn = ParentCGF.CurFn;
const CGFunctionInfo &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args);
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
llvm::Function *Fn = llvm::Function::Create(
FnTy, llvm::GlobalValue::InternalLinkage, Name.str(), &CGM.getModule());
- // The filter is either in the same comdat as the function, or it's internal.
- if (llvm::Comdat *C = ParentFn->getComdat()) {
- Fn->setComdat(C);
- } else if (ParentFn->hasWeakLinkage() || ParentFn->hasLinkOnceLinkage()) {
- llvm::Comdat *C = CGM.getModule().getOrInsertComdat(ParentFn->getName());
- ParentFn->setComdat(C);
- Fn->setComdat(C);
- } else {
- Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- }
IsOutlinedSEHHelper = true;
@@ -1758,7 +1747,7 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF,
// };
// int exceptioncode = exception_pointers->ExceptionRecord->ExceptionCode;
llvm::Type *RecordTy = CGM.Int32Ty->getPointerTo();
- llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy, nullptr);
+ llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy);
llvm::Value *Ptrs = Builder.CreateBitCast(SEHInfo, PtrsTy->getPointerTo());
llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, Ptrs, 0);
Rec = Builder.CreateAlignedLoad(Rec, getPointerAlign());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
index e5e34a5..63c7b3d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
@@ -61,17 +61,39 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
/// block.
Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
- const Twine &Name) {
- auto Alloca = CreateTempAlloca(Ty, Name);
+ const Twine &Name,
+ llvm::Value *ArraySize,
+ bool CastToDefaultAddrSpace) {
+ auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
Alloca->setAlignment(Align.getQuantity());
- return Address(Alloca, Align);
-}
-
-/// CreateTempAlloca - This creates a alloca and inserts it into the entry
-/// block.
+ llvm::Value *V = Alloca;
+ // Alloca always returns a pointer in alloca address space, which may
+ // be different from the type defined by the language. For example,
+ // in C++ the auto variables are in the default address space. Therefore
+ // cast alloca to the default address space when necessary.
+ if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) {
+ auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
+ auto CurIP = Builder.saveIP();
+ Builder.SetInsertPoint(AllocaInsertPt);
+ V = getTargetHooks().performAddrSpaceCast(
+ *this, V, getASTAllocaAddressSpace(), LangAS::Default,
+ Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
+ Builder.restoreIP(CurIP);
+ }
+
+ return Address(V, Align);
+}
+
+/// CreateTempAlloca - This creates an alloca and inserts it into the entry
+/// block if \p ArraySize is nullptr, otherwise inserts it at the current
+/// insertion point of the builder.
llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
- const Twine &Name) {
- return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
+ const Twine &Name,
+ llvm::Value *ArraySize) {
+ if (ArraySize)
+ return Builder.CreateAlloca(Ty, ArraySize, Name);
+ return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(),
+ ArraySize, Name, AllocaInsertPt);
}
/// CreateDefaultAlignTempAlloca - This creates an alloca with the
@@ -98,14 +120,18 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
return CreateTempAlloca(ConvertType(Ty), Align, Name);
}
-Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) {
+Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
+ bool CastToDefaultAddrSpace) {
// FIXME: Should we prefer the preferred type alignment here?
- return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name);
+ return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name,
+ CastToDefaultAddrSpace);
}
Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
- const Twine &Name) {
- return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name);
+ const Twine &Name,
+ bool CastToDefaultAddrSpace) {
+ return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr,
+ CastToDefaultAddrSpace);
}
/// EvaluateExprAsBool - Perform the usual unary conversions on the specified
@@ -315,9 +341,10 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
}
}
-static Address
-createReferenceTemporary(CodeGenFunction &CGF,
- const MaterializeTemporaryExpr *M, const Expr *Inner) {
+static Address createReferenceTemporary(CodeGenFunction &CGF,
+ const MaterializeTemporaryExpr *M,
+ const Expr *Inner) {
+ auto &TCG = CGF.getTargetHooks();
switch (M->getStorageDuration()) {
case SD_FullExpression:
case SD_Automatic: {
@@ -330,13 +357,24 @@ createReferenceTemporary(CodeGenFunction &CGF,
(Ty->isArrayType() || Ty->isRecordType()) &&
CGF.CGM.isTypeConstant(Ty, true))
if (llvm::Constant *Init = CGF.CGM.EmitConstantExpr(Inner, Ty, &CGF)) {
- auto *GV = new llvm::GlobalVariable(
- CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp");
- CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty);
- GV->setAlignment(alignment.getQuantity());
- // FIXME: Should we put the new global into a COMDAT?
- return Address(GV, alignment);
+ if (auto AddrSpace = CGF.getTarget().getConstantAddressSpace()) {
+ auto AS = AddrSpace.getValue();
+ auto *GV = new llvm::GlobalVariable(
+ CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp", nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGF.getContext().getTargetAddressSpace(AS));
+ CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty);
+ GV->setAlignment(alignment.getQuantity());
+ llvm::Constant *C = GV;
+ if (AS != LangAS::Default)
+ C = TCG.performAddrSpaceCast(
+ CGF.CGM, GV, AS, LangAS::Default,
+ GV->getValueType()->getPointerTo(
+ CGF.getContext().getTargetAddressSpace(LangAS::Default)));
+ // FIXME: Should we put the new global into a COMDAT?
+ return Address(C, alignment);
+ }
}
return CGF.CreateMemTemp(Ty, "ref.tmp");
}
@@ -373,12 +411,14 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
// dynamic initialization or a cleanup and we can just return the address
// of the temporary.
if (Var->hasInitializer())
- return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
+ return MakeAddrLValue(Object, M->getType(),
+ LValueBaseInfo(AlignmentSource::Decl, false));
Var->setInitializer(CGM.EmitNullConstant(E->getType()));
}
LValue RefTempDst = MakeAddrLValue(Object, M->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl,
+ false));
switch (getEvaluationKind(E->getType())) {
default: llvm_unreachable("expected scalar or aggregate expression");
@@ -415,9 +455,11 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
// Create and initialize the reference temporary.
Address Object = createReferenceTemporary(*this, M, E);
- if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) {
+ if (auto *Var = dyn_cast<llvm::GlobalVariable>(
+ Object.getPointer()->stripPointerCasts())) {
Object = Address(llvm::ConstantExpr::getBitCast(
- Var, ConvertTypeForMem(E->getType())->getPointerTo()),
+ cast<llvm::Constant>(Object.getPointer()),
+ ConvertTypeForMem(E->getType())->getPointerTo()),
Object.getAlignment());
// If the temporary is a global and has a constant initializer or is a
// constant temporary that we promoted to a global, we may have already
@@ -464,7 +506,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
case SubobjectAdjustment::FieldAdjustment: {
LValue LV = MakeAddrLValue(Object, E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
LV = EmitLValueForField(LV, Adjustment.Field);
assert(LV.isSimple() &&
"materialized temporary field is not a simple lvalue");
@@ -481,7 +523,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
}
}
- return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
+ return MakeAddrLValue(Object, M->getType(),
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
RValue
@@ -534,7 +577,8 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const {
void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
llvm::Value *Ptr, QualType Ty,
- CharUnits Alignment, bool SkipNullCheck) {
+ CharUnits Alignment,
+ SanitizerSet SkippedChecks) {
if (!sanitizePerformTypeCheck())
return;
@@ -544,31 +588,52 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
if (Ptr->getType()->getPointerAddressSpace())
return;
+ // Don't check pointers to volatile data. The behavior here is implementation-
+ // defined.
+ if (Ty.isVolatileQualified())
+ return;
+
SanitizerScope SanScope(this);
SmallVector<std::pair<llvm::Value *, SanitizerMask>, 3> Checks;
llvm::BasicBlock *Done = nullptr;
+ // Quickly determine whether we have a pointer to an alloca. It's possible
+ // to skip null checks, and some alignment checks, for these pointers. This
+ // can reduce compile-time significantly.
+ auto PtrToAlloca =
+ dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases());
+
bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast ||
TCK == TCK_UpcastToVirtualBase;
if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) &&
- !SkipNullCheck) {
+ !SkippedChecks.has(SanitizerKind::Null) && !PtrToAlloca) {
// The glvalue must not be an empty glvalue.
llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr);
- if (AllowNullPointers) {
- // When performing pointer casts, it's OK if the value is null.
- // Skip the remaining checks in that case.
- Done = createBasicBlock("null");
- llvm::BasicBlock *Rest = createBasicBlock("not.null");
- Builder.CreateCondBr(IsNonNull, Rest, Done);
- EmitBlock(Rest);
- } else {
- Checks.push_back(std::make_pair(IsNonNull, SanitizerKind::Null));
+ // The IR builder can constant-fold the null check if the pointer points to
+ // a constant.
+ bool PtrIsNonNull =
+ IsNonNull == llvm::ConstantInt::getTrue(getLLVMContext());
+
+ // Skip the null check if the pointer is known to be non-null.
+ if (!PtrIsNonNull) {
+ if (AllowNullPointers) {
+ // When performing pointer casts, it's OK if the value is null.
+ // Skip the remaining checks in that case.
+ Done = createBasicBlock("null");
+ llvm::BasicBlock *Rest = createBasicBlock("not.null");
+ Builder.CreateCondBr(IsNonNull, Rest, Done);
+ EmitBlock(Rest);
+ } else {
+ Checks.push_back(std::make_pair(IsNonNull, SanitizerKind::Null));
+ }
}
}
- if (SanOpts.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) {
+ if (SanOpts.has(SanitizerKind::ObjectSize) &&
+ !SkippedChecks.has(SanitizerKind::ObjectSize) &&
+ !Ty->isIncompleteType()) {
uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity();
// The glvalue must refer to a large enough storage region.
@@ -578,22 +643,25 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy };
llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys);
llvm::Value *Min = Builder.getFalse();
+ llvm::Value *NullIsUnknown = Builder.getFalse();
llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy);
- llvm::Value *LargeEnough =
- Builder.CreateICmpUGE(Builder.CreateCall(F, {CastAddr, Min}),
- llvm::ConstantInt::get(IntPtrTy, Size));
+ llvm::Value *LargeEnough = Builder.CreateICmpUGE(
+ Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}),
+ llvm::ConstantInt::get(IntPtrTy, Size));
Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize));
}
uint64_t AlignVal = 0;
- if (SanOpts.has(SanitizerKind::Alignment)) {
+ if (SanOpts.has(SanitizerKind::Alignment) &&
+ !SkippedChecks.has(SanitizerKind::Alignment)) {
AlignVal = Alignment.getQuantity();
if (!Ty->isIncompleteType() && !AlignVal)
AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity();
// The glvalue must be suitably aligned.
- if (AlignVal) {
+ if (AlignVal > 1 &&
+ (!PtrToAlloca || PtrToAlloca->getAlignment() < AlignVal)) {
llvm::Value *Align =
Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy),
llvm::ConstantInt::get(IntPtrTy, AlignVal - 1));
@@ -624,6 +692,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
// or call a non-static member function
CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
if (SanOpts.has(SanitizerKind::Vptr) &&
+ !SkippedChecks.has(SanitizerKind::Vptr) &&
(TCK == TCK_MemberAccess || TCK == TCK_MemberCall ||
TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference ||
TCK == TCK_UpcastToVirtualBase) &&
@@ -825,7 +894,7 @@ void CodeGenModule::EmitExplicitCastExprType(const ExplicitCastExpr *E,
/// EmitPointerWithAlignment - Given an expression of pointer type, try to
/// derive a more accurate bound on the alignment of the pointer.
Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
- AlignmentSource *Source) {
+ LValueBaseInfo *BaseInfo) {
// We allow this with ObjC object pointers because of fragile ABIs.
assert(E->getType()->isPointerType() ||
E->getType()->isObjCObjectPointerType());
@@ -844,16 +913,20 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
if (PtrTy->getPointeeType()->isVoidType())
break;
- AlignmentSource InnerSource;
- Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerSource);
- if (Source) *Source = InnerSource;
+ LValueBaseInfo InnerInfo;
+ Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerInfo);
+ if (BaseInfo) *BaseInfo = InnerInfo;
// If this is an explicit bitcast, and the source l-value is
// opaque, honor the alignment of the casted-to type.
if (isa<ExplicitCastExpr>(CE) &&
- InnerSource != AlignmentSource::Decl) {
- Addr = Address(Addr.getPointer(),
- getNaturalPointeeTypeAlignment(E->getType(), Source));
+ InnerInfo.getAlignmentSource() != AlignmentSource::Decl) {
+ LValueBaseInfo ExpInfo;
+ CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(),
+ &ExpInfo);
+ if (BaseInfo)
+ BaseInfo->mergeForCast(ExpInfo);
+ Addr = Address(Addr.getPointer(), Align);
}
if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) &&
@@ -871,12 +944,12 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
// Array-to-pointer decay.
case CK_ArrayToPointerDecay:
- return EmitArrayToPointerDecay(CE->getSubExpr(), Source);
+ return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo);
// Derived-to-base conversions.
case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
- Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), Source);
+ Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo);
auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
return GetAddressOfBaseClass(Addr, Derived,
CE->path_begin(), CE->path_end(),
@@ -895,7 +968,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) {
if (UO->getOpcode() == UO_AddrOf) {
LValue LV = EmitLValue(UO->getSubExpr());
- if (Source) *Source = LV.getAlignmentSource();
+ if (BaseInfo) *BaseInfo = LV.getBaseInfo();
return LV.getAddress();
}
}
@@ -903,7 +976,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
// TODO: conditional operators, comma.
// Otherwise, use the alignment of the type.
- CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), Source);
+ CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo);
return Address(EmitScalarExpr(E), Align);
}
@@ -947,15 +1020,47 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E,
E->getType());
}
+bool CodeGenFunction::IsWrappedCXXThis(const Expr *Obj) {
+ const Expr *Base = Obj;
+ while (!isa<CXXThisExpr>(Base)) {
+ // The result of a dynamic_cast can be null.
+ if (isa<CXXDynamicCastExpr>(Base))
+ return false;
+
+ if (const auto *CE = dyn_cast<CastExpr>(Base)) {
+ Base = CE->getSubExpr();
+ } else if (const auto *PE = dyn_cast<ParenExpr>(Base)) {
+ Base = PE->getSubExpr();
+ } else if (const auto *UO = dyn_cast<UnaryOperator>(Base)) {
+ if (UO->getOpcode() == UO_Extension)
+ Base = UO->getSubExpr();
+ else
+ return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) {
LValue LV;
if (SanOpts.has(SanitizerKind::ArrayBounds) && isa<ArraySubscriptExpr>(E))
LV = EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E), /*Accessed*/true);
else
LV = EmitLValue(E);
- if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple())
+ if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) {
+ SanitizerSet SkippedChecks;
+ if (const auto *ME = dyn_cast<MemberExpr>(E)) {
+ bool IsBaseCXXThis = IsWrappedCXXThis(ME->getBase());
+ if (IsBaseCXXThis)
+ SkippedChecks.set(SanitizerKind::Alignment, true);
+ if (IsBaseCXXThis || isa<DeclRefExpr>(ME->getBase()))
+ SkippedChecks.set(SanitizerKind::Null, true);
+ }
EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(),
- E->getType(), LV.getAlignment());
+ E->getType(), LV.getAlignment(), SkippedChecks);
+ }
return LV;
}
@@ -1033,7 +1138,19 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
const auto *cleanups = cast<ExprWithCleanups>(E);
enterFullExpression(cleanups);
RunCleanupsScope Scope(*this);
- return EmitLValue(cleanups->getSubExpr());
+ LValue LV = EmitLValue(cleanups->getSubExpr());
+ if (LV.isSimple()) {
+ // Defend against branches out of gnu statement expressions surrounded by
+ // cleanups.
+ llvm::Value *V = LV.getPointer();
+ Scope.ForceCleanup({&V});
+ return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(),
+ getContext(), LV.getBaseInfo(),
+ LV.getTBAAInfo());
+ }
+ // FIXME: Is it possible to create an ExprWithCleanups that produces a
+ // bitfield lvalue or some other non-simple lvalue?
+ return LV;
}
case Expr::CXXDefaultArgExprClass:
@@ -1085,6 +1202,11 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
case Expr::MaterializeTemporaryExprClass:
return EmitMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(E));
+
+ case Expr::CoawaitExprClass:
+ return EmitCoawaitLValue(cast<CoawaitExpr>(E));
+ case Expr::CoyieldExprClass:
+ return EmitCoyieldLValue(cast<CoyieldExpr>(E));
}
}
@@ -1203,7 +1325,7 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) {
llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
SourceLocation Loc) {
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
- lvalue.getType(), Loc, lvalue.getAlignmentSource(),
+ lvalue.getType(), Loc, lvalue.getBaseInfo(),
lvalue.getTBAAInfo(),
lvalue.getTBAABaseType(), lvalue.getTBAAOffset(),
lvalue.isNontemporal());
@@ -1265,40 +1387,89 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
return MDHelper.createRange(Min, End);
}
+bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty,
+ SourceLocation Loc) {
+ bool HasBoolCheck = SanOpts.has(SanitizerKind::Bool);
+ bool HasEnumCheck = SanOpts.has(SanitizerKind::Enum);
+ if (!HasBoolCheck && !HasEnumCheck)
+ return false;
+
+ bool IsBool = hasBooleanRepresentation(Ty) ||
+ NSAPI(CGM.getContext()).isObjCBOOLType(Ty);
+ bool NeedsBoolCheck = HasBoolCheck && IsBool;
+ bool NeedsEnumCheck = HasEnumCheck && Ty->getAs<EnumType>();
+ if (!NeedsBoolCheck && !NeedsEnumCheck)
+ return false;
+
+ // Single-bit booleans don't need to be checked. Special-case this to avoid
+ // a bit width mismatch when handling bitfield values. This is handled by
+ // EmitFromMemory for the non-bitfield case.
+ if (IsBool &&
+ cast<llvm::IntegerType>(Value->getType())->getBitWidth() == 1)
+ return false;
+
+ llvm::APInt Min, End;
+ if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool))
+ return true;
+
+ SanitizerScope SanScope(this);
+ llvm::Value *Check;
+ --End;
+ if (!Min) {
+ Check = Builder.CreateICmpULE(
+ Value, llvm::ConstantInt::get(getLLVMContext(), End));
+ } else {
+ llvm::Value *Upper = Builder.CreateICmpSLE(
+ Value, llvm::ConstantInt::get(getLLVMContext(), End));
+ llvm::Value *Lower = Builder.CreateICmpSGE(
+ Value, llvm::ConstantInt::get(getLLVMContext(), Min));
+ Check = Builder.CreateAnd(Upper, Lower);
+ }
+ llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc),
+ EmitCheckTypeDescriptor(Ty)};
+ SanitizerMask Kind =
+ NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool;
+ EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue,
+ StaticArgs, EmitCheckValue(Value));
+ return true;
+}
+
llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
QualType Ty,
SourceLocation Loc,
- AlignmentSource AlignSource,
+ LValueBaseInfo BaseInfo,
llvm::MDNode *TBAAInfo,
QualType TBAABaseType,
uint64_t TBAAOffset,
bool isNontemporal) {
- // For better performance, handle vector loads differently.
- if (Ty->isVectorType()) {
- const llvm::Type *EltTy = Addr.getElementType();
-
- const auto *VTy = cast<llvm::VectorType>(EltTy);
-
- // Handle vectors of size 3 like size 4 for better performance.
- if (VTy->getNumElements() == 3) {
-
- // Bitcast to vec4 type.
- llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
- 4);
- Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
- // Now load value.
- llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
-
- // Shuffle vector to get vec3.
- V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
- {0, 1, 2}, "extractVec");
- return EmitFromMemory(V, Ty);
+ if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+ // For better performance, handle vector loads differently.
+ if (Ty->isVectorType()) {
+ const llvm::Type *EltTy = Addr.getElementType();
+
+ const auto *VTy = cast<llvm::VectorType>(EltTy);
+
+ // Handle vectors of size 3 like size 4 for better performance.
+ if (VTy->getNumElements() == 3) {
+
+ // Bitcast to vec4 type.
+ llvm::VectorType *vec4Ty =
+ llvm::VectorType::get(VTy->getElementType(), 4);
+ Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
+ // Now load value.
+ llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+
+ // Shuffle vector to get vec3.
+ V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
+ {0, 1, 2}, "extractVec");
+ return EmitFromMemory(V, Ty);
+ }
}
}
// Atomic operations have to be done on integral types.
LValue AtomicLValue =
- LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
+ LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo);
if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) {
return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
}
@@ -1310,42 +1481,17 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
}
if (TBAAInfo) {
- llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
- TBAAOffset);
- if (TBAAPath)
- CGM.DecorateInstructionWithTBAA(Load, TBAAPath,
- false /*ConvertTypeToTag*/);
+ bool MayAlias = BaseInfo.getMayAlias();
+ llvm::MDNode *TBAA = MayAlias
+ ? CGM.getTBAAInfo(getContext().CharTy)
+ : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset);
+ if (TBAA)
+ CGM.DecorateInstructionWithTBAA(Load, TBAA, MayAlias);
}
- bool IsBool = hasBooleanRepresentation(Ty) ||
- NSAPI(CGM.getContext()).isObjCBOOLType(Ty);
- bool NeedsBoolCheck = SanOpts.has(SanitizerKind::Bool) && IsBool;
- bool NeedsEnumCheck =
- SanOpts.has(SanitizerKind::Enum) && Ty->getAs<EnumType>();
- if (NeedsBoolCheck || NeedsEnumCheck) {
- SanitizerScope SanScope(this);
- llvm::APInt Min, End;
- if (getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) {
- --End;
- llvm::Value *Check;
- if (!Min)
- Check = Builder.CreateICmpULE(
- Load, llvm::ConstantInt::get(getLLVMContext(), End));
- else {
- llvm::Value *Upper = Builder.CreateICmpSLE(
- Load, llvm::ConstantInt::get(getLLVMContext(), End));
- llvm::Value *Lower = Builder.CreateICmpSGE(
- Load, llvm::ConstantInt::get(getLLVMContext(), Min));
- Check = Builder.CreateAnd(Upper, Lower);
- }
- llvm::Constant *StaticArgs[] = {
- EmitCheckSourceLocation(Loc),
- EmitCheckTypeDescriptor(Ty)
- };
- SanitizerMask Kind = NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool;
- EmitCheck(std::make_pair(Check, Kind), SanitizerHandler::LoadInvalidValue,
- StaticArgs, EmitCheckValue(Load));
- }
+ if (EmitScalarRangeCheck(Load, Ty, Loc)) {
+ // In order to prevent the optimizer from throwing away the check, don't
+ // attach range metadata to the load.
} else if (CGM.getCodeGenOpts().OptimizationLevel > 0)
if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty))
Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo);
@@ -1380,37 +1526,38 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
bool Volatile, QualType Ty,
- AlignmentSource AlignSource,
+ LValueBaseInfo BaseInfo,
llvm::MDNode *TBAAInfo,
bool isInit, QualType TBAABaseType,
uint64_t TBAAOffset,
bool isNontemporal) {
- // Handle vectors differently to get better performance.
- if (Ty->isVectorType()) {
- llvm::Type *SrcTy = Value->getType();
- auto *VecTy = cast<llvm::VectorType>(SrcTy);
- // Handle vec3 special.
- if (VecTy->getNumElements() == 3) {
- // Our source is a vec3, do a shuffle vector to make it a vec4.
- llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
- Builder.getInt32(2),
- llvm::UndefValue::get(Builder.getInt32Ty())};
- llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
- Value = Builder.CreateShuffleVector(Value,
- llvm::UndefValue::get(VecTy),
- MaskV, "extractVec");
- SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
- }
- if (Addr.getElementType() != SrcTy) {
- Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+ if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+ // Handle vectors differently to get better performance.
+ if (Ty->isVectorType()) {
+ llvm::Type *SrcTy = Value->getType();
+ auto *VecTy = dyn_cast<llvm::VectorType>(SrcTy);
+ // Handle vec3 special.
+ if (VecTy && VecTy->getNumElements() == 3) {
+ // Our source is a vec3, do a shuffle vector to make it a vec4.
+ llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
+ Builder.getInt32(2),
+ llvm::UndefValue::get(Builder.getInt32Ty())};
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
+ MaskV, "extractVec");
+ SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+ }
+ if (Addr.getElementType() != SrcTy) {
+ Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+ }
}
}
Value = EmitToMemory(Value, Ty);
LValue AtomicLValue =
- LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
+ LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo);
if (Ty->isAtomicType() ||
(!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) {
EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit);
@@ -1425,18 +1572,19 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
}
if (TBAAInfo) {
- llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
- TBAAOffset);
- if (TBAAPath)
- CGM.DecorateInstructionWithTBAA(Store, TBAAPath,
- false /*ConvertTypeToTag*/);
+ bool MayAlias = BaseInfo.getMayAlias();
+ llvm::MDNode *TBAA = MayAlias
+ ? CGM.getTBAAInfo(getContext().CharTy)
+ : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset);
+ if (TBAA)
+ CGM.DecorateInstructionWithTBAA(Store, TBAA, MayAlias);
}
}
void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
bool isInit) {
EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
- lvalue.getType(), lvalue.getAlignmentSource(),
+ lvalue.getType(), lvalue.getBaseInfo(),
lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
lvalue.getTBAAOffset(), lvalue.isNontemporal());
}
@@ -1487,10 +1635,11 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
return EmitLoadOfGlobalRegLValue(LV);
assert(LV.isBitField() && "Unknown LValue type!");
- return EmitLoadOfBitfieldLValue(LV);
+ return EmitLoadOfBitfieldLValue(LV, Loc);
}
-RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
+RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV,
+ SourceLocation Loc) {
const CGBitFieldInfo &Info = LV.getBitFieldInfo();
// Get the output type.
@@ -1515,7 +1664,7 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
"bf.clear");
}
Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast");
-
+ EmitScalarRangeCheck(Val, LV.getType(), Loc);
return RValue::get(Val);
}
@@ -1967,38 +2116,39 @@ static LValue EmitThreadPrivateVarDeclLValue(
llvm::Type *RealVarTy, SourceLocation Loc) {
Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc);
Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy);
- return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ return CGF.MakeAddrLValue(Addr, T, BaseInfo);
}
Address CodeGenFunction::EmitLoadOfReference(Address Addr,
const ReferenceType *RefTy,
- AlignmentSource *Source) {
+ LValueBaseInfo *BaseInfo) {
llvm::Value *Ptr = Builder.CreateLoad(Addr);
return Address(Ptr, getNaturalTypeAlignment(RefTy->getPointeeType(),
- Source, /*forPointee*/ true));
-
+ BaseInfo, /*forPointee*/ true));
}
LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr,
const ReferenceType *RefTy) {
- AlignmentSource Source;
- Address Addr = EmitLoadOfReference(RefAddr, RefTy, &Source);
- return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source);
+ LValueBaseInfo BaseInfo;
+ Address Addr = EmitLoadOfReference(RefAddr, RefTy, &BaseInfo);
+ return MakeAddrLValue(Addr, RefTy->getPointeeType(), BaseInfo);
}
Address CodeGenFunction::EmitLoadOfPointer(Address Ptr,
const PointerType *PtrTy,
- AlignmentSource *Source) {
+ LValueBaseInfo *BaseInfo) {
llvm::Value *Addr = Builder.CreateLoad(Ptr);
- return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source,
+ return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(),
+ BaseInfo,
/*forPointeeType=*/true));
}
LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr,
const PointerType *PtrTy) {
- AlignmentSource Source;
- Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source);
- return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source);
+ LValueBaseInfo BaseInfo;
+ Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &BaseInfo);
+ return MakeAddrLValue(Addr, PtrTy->getPointeeType(), BaseInfo);
}
static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
@@ -2024,7 +2174,8 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
LV = CGF.EmitLoadOfReferenceLValue(Addr, RefTy);
} else {
- LV = CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ LV = CGF.MakeAddrLValue(Addr, T, BaseInfo);
}
setObjCGCLValueClass(CGF.getContext(), E, LV);
return LV;
@@ -2058,7 +2209,8 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF,
const Expr *E, const FunctionDecl *FD) {
llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, FD);
CharUnits Alignment = CGF.getContext().getDeclAlign(FD);
- return CGF.MakeAddrLValue(V, E->getType(), Alignment, AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ return CGF.MakeAddrLValue(V, E->getType(), Alignment, BaseInfo);
}
static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD,
@@ -2123,8 +2275,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// Should we be using the alignment of the constant pointer we emitted?
CharUnits Alignment = getNaturalTypeAlignment(E->getType(), nullptr,
/*pointee*/ true);
-
- return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ return MakeAddrLValue(Address(Val, Alignment), T, BaseInfo);
}
// Check for captured variables.
@@ -2141,14 +2293,16 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
LValue CapLVal =
EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
CapturedStmtInfo->getContextValue());
+ bool MayAlias = CapLVal.getBaseInfo().getMayAlias();
return MakeAddrLValue(
Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)),
- CapLVal.getType(), AlignmentSource::Decl);
+ CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl, MayAlias));
}
assert(isa<BlockDecl>(CurCodeDecl));
Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>());
- return MakeAddrLValue(addr, T, AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ return MakeAddrLValue(addr, T, BaseInfo);
}
}
@@ -2162,7 +2316,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
if (ND->hasAttr<WeakRefAttr>()) {
const auto *VD = cast<ValueDecl>(ND);
ConstantAddress Aliasee = CGM.GetWeakRefReference(VD);
- return MakeAddrLValue(Aliasee, T, AlignmentSource::Decl);
+ return MakeAddrLValue(Aliasee, T,
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
if (const auto *VD = dyn_cast<VarDecl>(ND)) {
@@ -2208,7 +2363,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
LV = EmitLoadOfReferenceLValue(addr, RefTy);
} else {
- LV = MakeAddrLValue(addr, T, AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ LV = MakeAddrLValue(addr, T, BaseInfo);
}
bool isLocalStorage = VD->hasLocalStorage();
@@ -2253,9 +2409,9 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
QualType T = E->getSubExpr()->getType()->getPointeeType();
assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
- AlignmentSource AlignSource;
- Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &AlignSource);
- LValue LV = MakeAddrLValue(Addr, T, AlignSource);
+ LValueBaseInfo BaseInfo;
+ Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo);
+ LValue LV = MakeAddrLValue(Addr, T, BaseInfo);
LV.getQuals().setAddressSpace(ExprTy.getAddressSpace());
// We should not generate __weak write barrier on indirect reference
@@ -2287,7 +2443,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
(E->getOpcode() == UO_Real
? emitAddrOfRealComponent(LV.getAddress(), LV.getType())
: emitAddrOfImagComponent(LV.getAddress(), LV.getType()));
- LValue ElemLV = MakeAddrLValue(Component, T, LV.getAlignmentSource());
+ LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo());
ElemLV.getQuals().addQualifiers(LV.getQuals());
return ElemLV;
}
@@ -2307,12 +2463,14 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) {
return MakeAddrLValue(CGM.GetAddrOfConstantStringFromLiteral(E),
- E->getType(), AlignmentSource::Decl);
+ E->getType(),
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
LValue CodeGenFunction::EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E) {
return MakeAddrLValue(CGM.GetAddrOfConstantStringFromObjCEncode(E),
- E->getType(), AlignmentSource::Decl);
+ E->getType(),
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
@@ -2324,6 +2482,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
StringRef NameItems[] = {
PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
if (auto *BD = dyn_cast<BlockDecl>(CurCodeDecl)) {
std::string Name = SL->getString();
if (!Name.empty()) {
@@ -2332,14 +2491,14 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
if (Discriminator)
Name += "_" + Twine(Discriminator + 1).str();
auto C = CGM.GetAddrOfConstantCString(Name, GVName.c_str());
- return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
+ return MakeAddrLValue(C, E->getType(), BaseInfo);
} else {
auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str());
- return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
+ return MakeAddrLValue(C, E->getType(), BaseInfo);
}
}
auto C = CGM.GetAddrOfConstantStringFromLiteral(SL, GVName);
- return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
+ return MakeAddrLValue(C, E->getType(), BaseInfo);
}
/// Emit a type description suitable for use by a runtime sanitizer library. The
@@ -2545,8 +2704,8 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF,
llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(
FnType, FnName,
- llvm::AttributeSet::get(CGF.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex, B),
+ llvm::AttributeList::get(CGF.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex, B),
/*Local=*/true);
llvm::CallInst *HandlerCall = CGF.EmitNounwindRuntimeCall(Fn, FnArgs);
if (!MayReturn) {
@@ -2709,6 +2868,24 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
EmitBlock(Cont);
}
+// Emit a stub for __cfi_check function so that the linker knows about this
+// symbol in LTO mode.
+void CodeGenFunction::EmitCfiCheckStub() {
+ llvm::Module *M = &CGM.getModule();
+ auto &Ctx = M->getContext();
+ llvm::Function *F = llvm::Function::Create(
+ llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false),
+ llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M);
+ llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F);
+ // FIXME: consider emitting an intrinsic call like
+ // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2)
+ // which can be lowered in CrossDSOCFI pass to the actual contents of
+ // __cfi_check. This would allow inlining of __cfi_check calls.
+ llvm::CallInst::Create(
+ llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::trap), "", BB);
+ llvm::ReturnInst::Create(Ctx, nullptr, BB);
+}
+
// This function is basically a switch over the CFI failure kind, which is
// extracted from CFICheckFailData (1st function argument). Each case is either
// llvm.trap or a call to one of the two runtime handlers, based on
@@ -2719,10 +2896,10 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
void CodeGenFunction::EmitCfiCheckFail() {
SanitizerScope SanScope(this);
FunctionArgList Args;
- ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr,
- getContext().VoidPtrTy);
- ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr,
- getContext().VoidPtrTy);
+ ImplicitParamDecl ArgData(getContext(), getContext().VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl ArgAddr(getContext(), getContext().VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&ArgData);
Args.push_back(&ArgAddr);
@@ -2750,9 +2927,9 @@ void CodeGenFunction::EmitCfiCheckFail() {
EmitTrapCheck(DataIsNotNullPtr);
llvm::StructType *SourceLocationTy =
- llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr);
+ llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty);
llvm::StructType *CfiCheckFailDataTy =
- llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr);
+ llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy);
llvm::Value *V = Builder.CreateConstGEP2_32(
CfiCheckFailDataTy,
@@ -2821,21 +2998,21 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) {
if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
CGM.getCodeGenOpts().TrapFuncName);
- TrapCall->addAttribute(llvm::AttributeSet::FunctionIndex, A);
+ TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A);
}
return TrapCall;
}
Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E,
- AlignmentSource *AlignSource) {
+ LValueBaseInfo *BaseInfo) {
assert(E->getType()->isArrayType() &&
"Array to pointer decay must have array source type!");
// Expressions of array type can't be bitfields or vector elements.
LValue LV = EmitLValue(E);
Address Addr = LV.getAddress();
- if (AlignSource) *AlignSource = LV.getAlignmentSource();
+ if (BaseInfo) *BaseInfo = LV.getBaseInfo();
// If the array type was an incomplete type, we need to make sure
// the decay ends up being the right type.
@@ -2874,9 +3051,13 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF,
llvm::Value *ptr,
ArrayRef<llvm::Value*> indices,
bool inbounds,
+ bool signedIndices,
+ SourceLocation loc,
const llvm::Twine &name = "arrayidx") {
if (inbounds) {
- return CGF.Builder.CreateInBoundsGEP(ptr, indices, name);
+ return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices,
+ CodeGenFunction::NotSubtraction, loc,
+ name);
} else {
return CGF.Builder.CreateGEP(ptr, indices, name);
}
@@ -2907,8 +3088,9 @@ static QualType getFixedSizeElementType(const ASTContext &ctx,
}
static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
- ArrayRef<llvm::Value*> indices,
+ ArrayRef<llvm::Value *> indices,
QualType eltType, bool inbounds,
+ bool signedIndices, SourceLocation loc,
const llvm::Twine &name = "arrayidx") {
// All the indices except that last must be zero.
#ifndef NDEBUG
@@ -2928,8 +3110,8 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
CharUnits eltAlign =
getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
- llvm::Value *eltPtr =
- emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, name);
+ llvm::Value *eltPtr = emitArraySubscriptGEP(
+ CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name);
return Address(eltPtr, eltAlign);
}
@@ -2939,6 +3121,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
// in lexical order (this complexity is, sadly, required by C++17).
llvm::Value *IdxPre =
(E->getLHS() == E->getIdx()) ? EmitScalarExpr(E->getIdx()) : nullptr;
+ bool SignedIndices = false;
auto EmitIdxAfterBase = [&, IdxPre](bool Promote) -> llvm::Value * {
auto *Idx = IdxPre;
if (E->getLHS() != E->getIdx()) {
@@ -2948,6 +3131,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
QualType IdxTy = E->getIdx()->getType();
bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
+ SignedIndices |= IdxSigned;
if (SanOpts.has(SanitizerKind::ArrayBounds))
EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed);
@@ -2970,7 +3154,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
assert(LHS.isSimple() && "Can only subscript lvalue vectors here!");
return LValue::MakeVectorElt(LHS.getAddress(), Idx,
E->getBase()->getType(),
- LHS.getAlignmentSource());
+ LHS.getBaseInfo());
}
// All the other cases basically behave like simple offsetting.
@@ -2982,18 +3166,19 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
Address Addr = EmitExtVectorElementLValue(LV);
QualType EltType = LV.getType()->castAs<VectorType>()->getElementType();
- Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true);
- return MakeAddrLValue(Addr, EltType, LV.getAlignmentSource());
+ Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true,
+ SignedIndices, E->getExprLoc());
+ return MakeAddrLValue(Addr, EltType, LV.getBaseInfo());
}
- AlignmentSource AlignSource;
+ LValueBaseInfo BaseInfo;
Address Addr = Address::invalid();
if (const VariableArrayType *vla =
getContext().getAsVariableArrayType(E->getType())) {
// The base must be a pointer, which is not an aggregate. Emit
// it. It needs to be emitted first in case it's what captures
// the VLA bounds.
- Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+ Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
// The element count here is the total number of non-VLA elements.
@@ -3010,13 +3195,14 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
}
Addr = emitArraySubscriptGEP(*this, Addr, Idx, vla->getElementType(),
- !getLangOpts().isSignedOverflowDefined());
+ !getLangOpts().isSignedOverflowDefined(),
+ SignedIndices, E->getExprLoc());
} else if (const ObjCObjectType *OIT = E->getType()->getAs<ObjCObjectType>()){
// Indexing over an interface, as in "NSString *P; P[4];"
// Emit the base pointer.
- Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+ Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT);
@@ -3036,7 +3222,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
CharUnits EltAlign =
getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize);
llvm::Value *EltPtr =
- emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false);
+ emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false,
+ SignedIndices, E->getExprLoc());
Addr = Address(EltPtr, EltAlign);
// Cast back.
@@ -3058,20 +3245,21 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
// Propagate the alignment from the array itself to the result.
- Addr = emitArraySubscriptGEP(*this, ArrayLV.getAddress(),
- {CGM.getSize(CharUnits::Zero()), Idx},
- E->getType(),
- !getLangOpts().isSignedOverflowDefined());
- AlignSource = ArrayLV.getAlignmentSource();
+ Addr = emitArraySubscriptGEP(
+ *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
+ E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices,
+ E->getExprLoc());
+ BaseInfo = ArrayLV.getBaseInfo();
} else {
// The base must be a pointer; emit it with an estimate of its alignment.
- Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+ Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(),
- !getLangOpts().isSignedOverflowDefined());
+ !getLangOpts().isSignedOverflowDefined(),
+ SignedIndices, E->getExprLoc());
}
- LValue LV = MakeAddrLValue(Addr, E->getType(), AlignSource);
+ LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo);
// TODO: Preserve/extend path TBAA metadata?
@@ -3084,7 +3272,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
}
static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
- AlignmentSource &AlignSource,
+ LValueBaseInfo &BaseInfo,
QualType BaseTy, QualType ElTy,
bool IsLowerBound) {
LValue BaseLVal;
@@ -3092,7 +3280,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound);
if (BaseTy->isArrayType()) {
Address Addr = BaseLVal.getAddress();
- AlignSource = BaseLVal.getAlignmentSource();
+ BaseInfo = BaseLVal.getBaseInfo();
// If the array type was an incomplete type, we need to make sure
// the decay ends up being the right type.
@@ -3111,20 +3299,17 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
return CGF.Builder.CreateElementBitCast(Addr,
CGF.ConvertTypeForMem(ElTy));
}
- CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource);
+ LValueBaseInfo TypeInfo;
+ CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeInfo);
+ BaseInfo.mergeForCast(TypeInfo);
return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align);
}
- return CGF.EmitPointerWithAlignment(Base, &AlignSource);
+ return CGF.EmitPointerWithAlignment(Base, &BaseInfo);
}
LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
bool IsLowerBound) {
- QualType BaseTy;
- if (auto *ASE =
- dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts()))
- BaseTy = OMPArraySectionExpr::getBaseOriginalType(ASE);
- else
- BaseTy = E->getBase()->getType();
+ QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(E->getBase());
QualType ResultExprTy;
if (auto *AT = getContext().getAsArrayType(BaseTy))
ResultExprTy = AT->getElementType();
@@ -3218,13 +3403,13 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
assert(Idx);
Address EltPtr = Address::invalid();
- AlignmentSource AlignSource;
+ LValueBaseInfo BaseInfo;
if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) {
// The base must be a pointer, which is not an aggregate. Emit
// it. It needs to be emitted first in case it's what captures
// the VLA bounds.
Address Base =
- emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy,
+ emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, BaseTy,
VLA->getElementType(), IsLowerBound);
// The element count here is the total number of non-VLA elements.
llvm::Value *NumElements = getVLASize(VLA).first;
@@ -3238,7 +3423,8 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
else
Idx = Builder.CreateNSWMul(Idx, NumElements);
EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(),
- !getLangOpts().isSignedOverflowDefined());
+ !getLangOpts().isSignedOverflowDefined(),
+ /*SignedIndices=*/false, E->getExprLoc());
} else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
// If this is A[i] where A is an array, the frontend will have decayed the
// base to be a ArrayToPointerDecay implicit cast. While correct, it is
@@ -3257,16 +3443,18 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
// Propagate the alignment from the array itself to the result.
EltPtr = emitArraySubscriptGEP(
*this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
- ResultExprTy, !getLangOpts().isSignedOverflowDefined());
- AlignSource = ArrayLV.getAlignmentSource();
+ ResultExprTy, !getLangOpts().isSignedOverflowDefined(),
+ /*SignedIndices=*/false, E->getExprLoc());
+ BaseInfo = ArrayLV.getBaseInfo();
} else {
- Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource,
+ Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo,
BaseTy, ResultExprTy, IsLowerBound);
EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy,
- !getLangOpts().isSignedOverflowDefined());
+ !getLangOpts().isSignedOverflowDefined(),
+ /*SignedIndices=*/false, E->getExprLoc());
}
- return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource);
+ return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo);
}
LValue CodeGenFunction::
@@ -3278,10 +3466,10 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
if (E->isArrow()) {
// If it is a pointer to a vector, emit the address and form an lvalue with
// it.
- AlignmentSource AlignSource;
- Address Ptr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+ LValueBaseInfo BaseInfo;
+ Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo);
const PointerType *PT = E->getBase()->getType()->getAs<PointerType>();
- Base = MakeAddrLValue(Ptr, PT->getPointeeType(), AlignSource);
+ Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo);
Base.getQuals().removeObjCGCAttr();
} else if (E->getBase()->isGLValue()) {
// Otherwise, if the base is an lvalue ( as in the case of foo.x.x),
@@ -3298,7 +3486,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
Address VecMem = CreateMemTemp(E->getBase()->getType());
Builder.CreateStore(Vec, VecMem);
Base = MakeAddrLValue(VecMem, E->getBase()->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
QualType type =
@@ -3312,7 +3500,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
llvm::Constant *CV =
llvm::ConstantDataVector::get(getLLVMContext(), Indices);
return LValue::MakeExtVectorElt(Base.getAddress(), CV, type,
- Base.getAlignmentSource());
+ Base.getBaseInfo());
}
assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");
@@ -3323,20 +3511,26 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
CElts.push_back(BaseElts->getAggregateElement(Indices[i]));
llvm::Constant *CV = llvm::ConstantVector::get(CElts);
return LValue::MakeExtVectorElt(Base.getExtVectorAddress(), CV, type,
- Base.getAlignmentSource());
+ Base.getBaseInfo());
}
LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
Expr *BaseExpr = E->getBase();
-
// If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar.
LValue BaseLV;
if (E->isArrow()) {
- AlignmentSource AlignSource;
- Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource);
+ LValueBaseInfo BaseInfo;
+ Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo);
QualType PtrTy = BaseExpr->getType()->getPointeeType();
- EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy);
- BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource);
+ SanitizerSet SkippedChecks;
+ bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr);
+ if (IsBaseCXXThis)
+ SkippedChecks.set(SanitizerKind::Alignment, true);
+ if (IsBaseCXXThis || isa<DeclRefExpr>(BaseExpr))
+ SkippedChecks.set(SanitizerKind::Null, true);
+ EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy,
+ /*Alignment=*/CharUnits::Zero(), SkippedChecks);
+ BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo);
} else
BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess);
@@ -3394,10 +3588,37 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName());
}
+static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
+ const auto *RD = Type.getTypePtr()->getAsCXXRecordDecl();
+ if (!RD)
+ return false;
+
+ if (RD->isDynamicClass())
+ return true;
+
+ for (const auto &Base : RD->bases())
+ if (hasAnyVptr(Base.getType(), Context))
+ return true;
+
+ for (const FieldDecl *Field : RD->fields())
+ if (hasAnyVptr(Field->getType(), Context))
+ return true;
+
+ return false;
+}
+
LValue CodeGenFunction::EmitLValueForField(LValue base,
const FieldDecl *field) {
+ LValueBaseInfo BaseInfo = base.getBaseInfo();
AlignmentSource fieldAlignSource =
- getFieldAlignmentSource(base.getAlignmentSource());
+ getFieldAlignmentSource(BaseInfo.getAlignmentSource());
+ LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias());
+
+ QualType type = field->getType();
+ const RecordDecl *rec = field->getParent();
+ if (rec->isUnion() || rec->hasAttr<MayAliasAttr>() || type->isVectorType())
+ FieldBaseInfo.setMayAlias(true);
+ bool mayAlias = FieldBaseInfo.getMayAlias();
if (field->isBitField()) {
const CGRecordLayout &RL =
@@ -3417,14 +3638,9 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
QualType fieldType =
field->getType().withCVRQualifiers(base.getVRQualifiers());
- return LValue::MakeBitfield(Addr, Info, fieldType, fieldAlignSource);
+ return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo);
}
- const RecordDecl *rec = field->getParent();
- QualType type = field->getType();
-
- bool mayAlias = rec->hasAttr<MayAliasAttr>();
-
Address addr = base.getAddress();
unsigned cvr = base.getVRQualifiers();
bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA;
@@ -3433,6 +3649,14 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
assert(!type->isReferenceType() && "union has reference member");
// TODO: handle path-aware TBAA for union.
TBAAPath = false;
+
+ const auto FieldType = field->getType();
+ if (CGM.getCodeGenOpts().StrictVTablePointers &&
+ hasAnyVptr(FieldType, getContext()))
+ // Because unions can easily skip invariant.barriers, we need to add
+ // a barrier every time CXXRecord field with vptr is referenced.
+ addr = Address(Builder.CreateInvariantGroupBarrier(addr.getPointer()),
+ addr.getAlignment());
} else {
// For structs, we GEP to the field that the record layout suggests.
addr = emitAddrOfFieldStorage(*this, addr, field);
@@ -3458,7 +3682,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
type = refType->getPointeeType();
CharUnits alignment =
- getNaturalTypeAlignment(type, &fieldAlignSource, /*pointee*/ true);
+ getNaturalTypeAlignment(type, &FieldBaseInfo, /*pointee*/ true);
+ FieldBaseInfo.setMayAlias(false);
addr = Address(load, alignment);
// Qualifiers on the struct don't apply to the referencee, and
@@ -3479,7 +3704,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
if (field->hasAttr<AnnotateAttr>())
addr = EmitFieldAnnotations(field, addr);
- LValue LV = MakeAddrLValue(addr, type, fieldAlignSource);
+ LValue LV = MakeAddrLValue(addr, type, FieldBaseInfo);
LV.getQuals().addCVRQualifiers(cvr);
if (TBAAPath) {
const ASTRecordLayout &Layout =
@@ -3520,14 +3745,18 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base,
V = Builder.CreateElementBitCast(V, llvmType, Field->getName());
// TODO: access-path TBAA?
- auto FieldAlignSource = getFieldAlignmentSource(Base.getAlignmentSource());
- return MakeAddrLValue(V, FieldType, FieldAlignSource);
+ LValueBaseInfo BaseInfo = Base.getBaseInfo();
+ LValueBaseInfo FieldBaseInfo(
+ getFieldAlignmentSource(BaseInfo.getAlignmentSource()),
+ BaseInfo.getMayAlias());
+ return MakeAddrLValue(V, FieldType, FieldBaseInfo);
}
LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
if (E->isFileScope()) {
ConstantAddress GlobalPtr = CGM.GetAddrOfConstantCompoundLiteral(E);
- return MakeAddrLValue(GlobalPtr, E->getType(), AlignmentSource::Decl);
+ return MakeAddrLValue(GlobalPtr, E->getType(), BaseInfo);
}
if (E->getType()->isVariablyModifiedType())
// make sure to emit the VLA size.
@@ -3535,7 +3764,7 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){
Address DeclPtr = CreateMemTemp(E->getType(), ".compoundliteral");
const Expr *InitExpr = E->getInitializer();
- LValue Result = MakeAddrLValue(DeclPtr, E->getType(), AlignmentSource::Decl);
+ LValue Result = MakeAddrLValue(DeclPtr, E->getType(), BaseInfo);
EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(),
/*Init*/ true);
@@ -3632,8 +3861,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
phi->addIncoming(rhs->getPointer(), rhsBlock);
Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment()));
AlignmentSource alignSource =
- std::max(lhs->getAlignmentSource(), rhs->getAlignmentSource());
- return MakeAddrLValue(result, expr->getType(), alignSource);
+ std::max(lhs->getBaseInfo().getAlignmentSource(),
+ rhs->getBaseInfo().getAlignmentSource());
+ bool MayAlias = lhs->getBaseInfo().getMayAlias() ||
+ rhs->getBaseInfo().getMayAlias();
+ return MakeAddrLValue(result, expr->getType(),
+ LValueBaseInfo(alignSource, MayAlias));
} else {
assert((lhs || rhs) &&
"both operands of glvalue conditional are throw-expressions?");
@@ -3731,7 +3964,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
This, DerivedClassDecl, E->path_begin(), E->path_end(),
/*NullCheckValue=*/false, E->getExprLoc());
- return MakeAddrLValue(Base, E->getType(), LV.getAlignmentSource());
+ return MakeAddrLValue(Base, E->getType(), LV.getBaseInfo());
}
case CK_ToUnion:
return EmitAggExprToLValue(E);
@@ -3758,7 +3991,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
/*MayBeNull=*/false,
CFITCK_DerivedCast, E->getLocStart());
- return MakeAddrLValue(Derived, E->getType(), LV.getAlignmentSource());
+ return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo());
}
case CK_LValueBitCast: {
// This must be a reinterpret_cast (or c-style equivalent).
@@ -3774,13 +4007,13 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
/*MayBeNull=*/false,
CFITCK_UnrelatedCast, E->getLocStart());
- return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource());
+ return MakeAddrLValue(V, E->getType(), LV.getBaseInfo());
}
case CK_ObjCObjectLValueCast: {
LValue LV = EmitLValue(E->getSubExpr());
Address V = Builder.CreateElementBitCast(LV.getAddress(),
ConvertType(E->getType()));
- return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource());
+ return MakeAddrLValue(V, E->getType(), LV.getBaseInfo());
}
case CK_ZeroToOCLQueue:
llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid");
@@ -3949,6 +4182,8 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
RValue RV = EmitAnyExpr(E->getRHS());
LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store);
+ if (RV.isScalar())
+ EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
EmitStoreThroughLValue(RV, LV);
return LV;
}
@@ -3967,7 +4202,7 @@ LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) {
if (!RV.isScalar())
return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
assert(E->getCallReturnType(getContext())->isReferenceType() &&
"Can't have a scalar return unless the return type is a "
@@ -3987,7 +4222,7 @@ LValue CodeGenFunction::EmitCXXConstructLValue(const CXXConstructExpr *E) {
AggValueSlot Slot = CreateAggTemp(E->getType());
EmitCXXConstructExpr(E, Slot);
return MakeAddrLValue(Slot.getAddress(), E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
LValue
@@ -4002,7 +4237,7 @@ Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) {
LValue CodeGenFunction::EmitCXXUuidofLValue(const CXXUuidofExpr *E) {
return MakeAddrLValue(EmitCXXUuidofExpr(E), E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
LValue
@@ -4012,7 +4247,7 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) {
EmitAggExpr(E->getSubExpr(), Slot);
EmitCXXTemporary(E->getTemporary(), E->getType(), Slot.getAddress());
return MakeAddrLValue(Slot.getAddress(), E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
LValue
@@ -4020,7 +4255,7 @@ CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) {
AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue");
EmitLambdaExpr(E, Slot);
return MakeAddrLValue(Slot.getAddress(), E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
@@ -4028,7 +4263,7 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
if (!RV.isScalar())
return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
assert(E->getMethodDecl()->getReturnType()->isReferenceType() &&
"Can't have a scalar return unless the return type is a "
@@ -4040,7 +4275,8 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
LValue CodeGenFunction::EmitObjCSelectorLValue(const ObjCSelectorExpr *E) {
Address V =
CGM.getObjCRuntime().GetAddrOfSelector(*this, E->getSelector());
- return MakeAddrLValue(V, E->getType(), AlignmentSource::Decl);
+ return MakeAddrLValue(V, E->getType(),
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
llvm::Value *CodeGenFunction::EmitIvarOffset(const ObjCInterfaceDecl *Interface,
@@ -4084,7 +4320,7 @@ LValue CodeGenFunction::EmitStmtExprLValue(const StmtExpr *E) {
// Can only get l-value for message expression returning aggregate type
RValue RV = EmitAnyExprToTemp(E);
return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee,
@@ -4276,12 +4512,11 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
const MemberPointerType *MPT
= E->getRHS()->getType()->getAs<MemberPointerType>();
- AlignmentSource AlignSource;
+ LValueBaseInfo BaseInfo;
Address MemberAddr =
- EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT,
- &AlignSource);
+ EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, &BaseInfo);
- return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), AlignSource);
+ return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), BaseInfo);
}
/// Given the address of a temporary variable, produce an r-value of
@@ -4289,7 +4524,8 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
RValue CodeGenFunction::convertTempToRValue(Address addr,
QualType type,
SourceLocation loc) {
- LValue lvalue = MakeAddrLValue(addr, type, AlignmentSource::Decl);
+ LValue lvalue = MakeAddrLValue(addr, type,
+ LValueBaseInfo(AlignmentSource::Decl, false));
switch (getEvaluationKind(type)) {
case TEK_Complex:
return RValue::getComplex(EmitLoadOfComplex(lvalue, loc));
@@ -4344,9 +4580,9 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
if (ov == resultExpr && ov->isRValue() && !forLValue &&
CodeGenFunction::hasAggregateEvaluationKind(ov->getType())) {
CGF.EmitAggExpr(ov->getSourceExpr(), slot);
-
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
LValue LV = CGF.MakeAddrLValue(slot.getAddress(), ov->getType(),
- AlignmentSource::Decl);
+ BaseInfo);
opaqueData = OVMA::bind(CGF, ov, LV);
result.RV = slot.asRValue();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
index 0092447..a05a088 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
@@ -111,6 +111,13 @@ public:
void VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
Visit(GE->getResultExpr());
}
+ void VisitCoawaitExpr(CoawaitExpr *E) {
+ CGF.EmitCoawaitExpr(*E, Dest, IsResultUnused);
+ }
+ void VisitCoyieldExpr(CoyieldExpr *E) {
+ CGF.EmitCoyieldExpr(*E, Dest, IsResultUnused);
+ }
+ void VisitUnaryCoawait(UnaryOperator *E) { Visit(E->getSubExpr()); }
void VisitUnaryExtension(UnaryOperator *E) { Visit(E->getSubExpr()); }
void VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) {
return Visit(E->getReplacement());
@@ -505,12 +512,20 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
currentElement->addIncoming(element, entryBB);
// Emit the actual filler expression.
- LValue elementLV =
- CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType);
- if (filler)
- EmitInitializationToLValue(filler, elementLV);
- else
- EmitNullInitializationToLValue(elementLV);
+ {
+ // C++1z [class.temporary]p5:
+ // when a default constructor is called to initialize an element of
+ // an array with no corresponding initializer [...] the destruction of
+ // every temporary created in a default argument is sequenced before
+ // the construction of the next array element, if any
+ CodeGenFunction::RunCleanupsScope CleanupsScope(CGF);
+ LValue elementLV =
+ CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType);
+ if (filler)
+ EmitInitializationToLValue(filler, elementLV);
+ else
+ EmitNullInitializationToLValue(elementLV);
+ }
// Move on to the next element.
llvm::Value *nextElement =
@@ -1267,7 +1282,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Store the initializer into the field.
EmitInitializationToLValue(E->getInit(curInitIndex++), LV);
} else {
- // We're out of initalizers; default-initialize to null
+ // We're out of initializers; default-initialize to null
EmitNullInitializationToLValue(LV);
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
index 71c8fb8..ab17024 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
@@ -24,7 +24,15 @@
using namespace clang;
using namespace CodeGen;
-static RequiredArgs
+namespace {
+struct MemberCallInfo {
+ RequiredArgs ReqArgs;
+ // Number of prefix arguments for the call. Ignores the `this` pointer.
+ unsigned PrefixSize;
+};
+}
+
+static MemberCallInfo
commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
llvm::Value *This, llvm::Value *ImplicitParam,
QualType ImplicitParamTy, const CallExpr *CE,
@@ -48,6 +56,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD);
+ unsigned PrefixSize = Args.size() - 1;
// And the rest of the call args.
if (RtlArgs) {
@@ -65,7 +74,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
FPT->getNumParams() == 0 &&
"No CallExpr specified for function with non-zero number of arguments");
}
- return required;
+ return {required, PrefixSize};
}
RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
@@ -75,9 +84,10 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
const CallExpr *CE, CallArgList *RtlArgs) {
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
CallArgList Args;
- RequiredArgs required = commonEmitCXXMemberOrOperatorCall(
+ MemberCallInfo CallInfo = commonEmitCXXMemberOrOperatorCall(
*this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs);
- auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required);
+ auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(
+ Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize);
return EmitCall(FnInfo, Callee, ReturnValue, Args);
}
@@ -189,7 +199,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
bool CanUseVirtualCall = MD->isVirtual() && !HasQualifier;
const CXXMethodDecl *DevirtualizedMethod = nullptr;
- if (CanUseVirtualCall && CanDevirtualizeMemberFunctionCall(Base, MD)) {
+ if (CanUseVirtualCall &&
+ MD->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) {
const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl);
assert(DevirtualizedMethod);
@@ -290,10 +301,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
if (CE)
CallLoc = CE->getExprLoc();
- EmitTypeCheck(isa<CXXConstructorDecl>(CalleeDecl)
- ? CodeGenFunction::TCK_ConstructorCall
- : CodeGenFunction::TCK_MemberCall,
- CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()));
+ SanitizerSet SkippedChecks;
+ if (const auto *CMCE = dyn_cast<CXXMemberCallExpr>(CE)) {
+ auto *IOA = CMCE->getImplicitObjectArgument();
+ bool IsImplicitObjectCXXThis = IsWrappedCXXThis(IOA);
+ if (IsImplicitObjectCXXThis)
+ SkippedChecks.set(SanitizerKind::Alignment, true);
+ if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA))
+ SkippedChecks.set(SanitizerKind::Null, true);
+ }
+ EmitTypeCheck(
+ isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall
+ : CodeGenFunction::TCK_MemberCall,
+ CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()),
+ /*Alignment=*/CharUnits::Zero(), SkippedChecks);
// FIXME: Uses of 'MD' past this point need to be audited. We may need to use
// 'CalleeDecl' instead.
@@ -420,7 +441,8 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
// And the rest of the call args
EmitCallArgs(Args, FPT, E->arguments());
- return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required),
+ return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required,
+ /*PrefixSize=*/0),
Callee, ReturnValue, Args);
}
@@ -659,7 +681,10 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF,
// Emit the array size expression.
// We multiply the size of all dimensions for NumElements.
// e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6.
- numElements = CGF.EmitScalarExpr(e->getArraySize());
+ numElements = CGF.CGM.EmitConstantExpr(e->getArraySize(),
+ CGF.getContext().getSizeType(), &CGF);
+ if (!numElements)
+ numElements = CGF.EmitScalarExpr(e->getArraySize());
assert(isa<llvm::IntegerType>(numElements->getType()));
// The number of elements can be have an arbitrary integer type;
@@ -1256,10 +1281,10 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) {
// FIXME: Add addAttribute to CallSite.
if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke))
- CI->addAttribute(llvm::AttributeSet::FunctionIndex,
+ CI->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::Builtin);
else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke))
- II->addAttribute(llvm::AttributeSet::FunctionIndex,
+ II->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::Builtin);
else
llvm_unreachable("unexpected kind of call instruction");
@@ -1507,13 +1532,13 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
assert(E->getNumPlacementArgs() == 1);
const Expr *arg = *E->placement_arguments().begin();
- AlignmentSource alignSource;
- allocation = EmitPointerWithAlignment(arg, &alignSource);
+ LValueBaseInfo BaseInfo;
+ allocation = EmitPointerWithAlignment(arg, &BaseInfo);
// The pointer expression will, in many cases, be an opaque void*.
// In these cases, discard the computed alignment and use the
// formal alignment of the allocated type.
- if (alignSource != AlignmentSource::Decl)
+ if (BaseInfo.getAlignmentSource() != AlignmentSource::Decl)
allocation = Address(allocation.getPointer(), allocAlign);
// Set up allocatorArgs for the call to operator delete if it's not
@@ -1560,7 +1585,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
// FIXME: Why do we not pass a CalleeDecl here?
EmitCallArgs(allocatorArgs, allocatorType, E->placement_arguments(),
- /*CalleeDecl*/nullptr, /*ParamsToSkip*/ParamsToSkip);
+ /*AC*/AbstractCallee(), /*ParamsToSkip*/ParamsToSkip);
RValue RV =
EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
@@ -1634,8 +1659,9 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
// Passing pointer through invariant.group.barrier to avoid propagation of
// vptrs information which may be included in previous type.
+ // To not break LTO with different optimizations levels, we do it regardless
+ // of optimization level.
if (CGM.getCodeGenOpts().StrictVTablePointers &&
- CGM.getCodeGenOpts().OptimizationLevel > 0 &&
allocator->isReservedGlobalPlacementOperator())
result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()),
result.getAlignment());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp
index 59bc9cd..9809723 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp
@@ -110,6 +110,16 @@ public:
VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) {
return Visit(PE->getReplacement());
}
+ ComplexPairTy VisitCoawaitExpr(CoawaitExpr *S) {
+ return CGF.EmitCoawaitExpr(*S).getComplexVal();
+ }
+ ComplexPairTy VisitCoyieldExpr(CoyieldExpr *S) {
+ return CGF.EmitCoyieldExpr(*S).getComplexVal();
+ }
+ ComplexPairTy VisitUnaryCoawait(const UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+
// l-values.
ComplexPairTy VisitDeclRefExpr(DeclRefExpr *E) {
@@ -198,7 +208,11 @@ public:
ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) {
CGF.enterFullExpression(E);
CodeGenFunction::RunCleanupsScope Scope(CGF);
- return Visit(E->getSubExpr());
+ ComplexPairTy Vals = Visit(E->getSubExpr());
+ // Defend against dominance problems caused by jumps out of expression
+ // evaluation through the shared cleanup block.
+ Scope.ForceCleanup({&Vals.first, &Vals.second});
+ return Vals;
}
ComplexPairTy VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
assert(E->getType()->isAnyComplexType() && "Expected complex type!");
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
index 3db15c6..6b72774 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
@@ -201,7 +201,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field,
unsigned NewFieldWidth = FieldSize - BitsInPreviousByte;
if (CGM.getDataLayout().isBigEndian()) {
- Tmp = Tmp.lshr(NewFieldWidth);
+ Tmp.lshrInPlace(NewFieldWidth);
Tmp = Tmp.trunc(BitsInPreviousByte);
// We want the remaining high bits.
@@ -210,7 +210,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field,
Tmp = Tmp.trunc(BitsInPreviousByte);
// We want the remaining low bits.
- FieldValue = FieldValue.lshr(BitsInPreviousByte);
+ FieldValue.lshrInPlace(BitsInPreviousByte);
FieldValue = FieldValue.trunc(NewFieldWidth);
}
}
@@ -273,7 +273,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field,
// We want the low bits.
Tmp = FieldValue.trunc(CharWidth);
- FieldValue = FieldValue.lshr(CharWidth);
+ FieldValue.lshrInPlace(CharWidth);
}
Elements.push_back(llvm::ConstantInt::get(CGM.getLLVMContext(), Tmp));
@@ -1361,9 +1361,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
Value.getComplexIntImag());
// FIXME: the target may want to specify that this is packed.
- llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(),
- Complex[1]->getType(),
- nullptr);
+ llvm::StructType *STy =
+ llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType());
return llvm::ConstantStruct::get(STy, Complex);
}
case APValue::Float: {
@@ -1384,9 +1383,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
Value.getComplexFloatImag());
// FIXME: the target may want to specify that this is packed.
- llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(),
- Complex[1]->getType(),
- nullptr);
+ llvm::StructType *STy =
+ llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType());
return llvm::ConstantStruct::get(STy, Complex);
}
case APValue::Vector: {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 1b85c45..1170b01 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CodeGenFunction.h"
+#include "CGCleanup.h"
#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
@@ -24,10 +25,12 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
@@ -42,13 +45,85 @@ using llvm::Value;
//===----------------------------------------------------------------------===//
namespace {
+
+/// Determine whether the given binary operation may overflow.
+/// Sets \p Result to the value of the operation for BO_Add, BO_Sub, BO_Mul,
+/// and signed BO_{Div,Rem}. For these opcodes, and for unsigned BO_{Div,Rem},
+/// the returned overflow check is precise. The returned value is 'true' for
+/// all other opcodes, to be conservative.
+bool mayHaveIntegerOverflow(llvm::ConstantInt *LHS, llvm::ConstantInt *RHS,
+ BinaryOperator::Opcode Opcode, bool Signed,
+ llvm::APInt &Result) {
+ // Assume overflow is possible, unless we can prove otherwise.
+ bool Overflow = true;
+ const auto &LHSAP = LHS->getValue();
+ const auto &RHSAP = RHS->getValue();
+ if (Opcode == BO_Add) {
+ if (Signed)
+ Result = LHSAP.sadd_ov(RHSAP, Overflow);
+ else
+ Result = LHSAP.uadd_ov(RHSAP, Overflow);
+ } else if (Opcode == BO_Sub) {
+ if (Signed)
+ Result = LHSAP.ssub_ov(RHSAP, Overflow);
+ else
+ Result = LHSAP.usub_ov(RHSAP, Overflow);
+ } else if (Opcode == BO_Mul) {
+ if (Signed)
+ Result = LHSAP.smul_ov(RHSAP, Overflow);
+ else
+ Result = LHSAP.umul_ov(RHSAP, Overflow);
+ } else if (Opcode == BO_Div || Opcode == BO_Rem) {
+ if (Signed && !RHS->isZero())
+ Result = LHSAP.sdiv_ov(RHSAP, Overflow);
+ else
+ return false;
+ }
+ return Overflow;
+}
+
struct BinOpInfo {
Value *LHS;
Value *RHS;
QualType Ty; // Computation Type.
BinaryOperator::Opcode Opcode; // Opcode of BinOp to perform
- bool FPContractable;
+ FPOptions FPFeatures;
const Expr *E; // Entire expr, for error unsupported. May not be binop.
+
+ /// Check if the binop can result in integer overflow.
+ bool mayHaveIntegerOverflow() const {
+ // Without constant input, we can't rule out overflow.
+ auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS);
+ auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS);
+ if (!LHSCI || !RHSCI)
+ return true;
+
+ llvm::APInt Result;
+ return ::mayHaveIntegerOverflow(
+ LHSCI, RHSCI, Opcode, Ty->hasSignedIntegerRepresentation(), Result);
+ }
+
+ /// Check if the binop computes a division or a remainder.
+ bool isDivremOp() const {
+ return Opcode == BO_Div || Opcode == BO_Rem || Opcode == BO_DivAssign ||
+ Opcode == BO_RemAssign;
+ }
+
+ /// Check if the binop can result in an integer division by zero.
+ bool mayHaveIntegerDivisionByZero() const {
+ if (isDivremOp())
+ if (auto *CI = dyn_cast<llvm::ConstantInt>(RHS))
+ return CI->isZero();
+ return true;
+ }
+
+ /// Check if the binop can result in a float division by zero.
+ bool mayHaveFloatDivisionByZero() const {
+ if (isDivremOp())
+ if (auto *CFP = dyn_cast<llvm::ConstantFP>(RHS))
+ return CFP->isZero();
+ return true;
+ }
};
static bool MustVisitNullValue(const Expr *E) {
@@ -58,6 +133,83 @@ static bool MustVisitNullValue(const Expr *E) {
return E->getType()->isNullPtrType();
}
+/// If \p E is a widened promoted integer, get its base (unpromoted) type.
+static llvm::Optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx,
+ const Expr *E) {
+ const Expr *Base = E->IgnoreImpCasts();
+ if (E == Base)
+ return llvm::None;
+
+ QualType BaseTy = Base->getType();
+ if (!BaseTy->isPromotableIntegerType() ||
+ Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType()))
+ return llvm::None;
+
+ return BaseTy;
+}
+
+/// Check if \p E is a widened promoted integer.
+static bool IsWidenedIntegerOp(const ASTContext &Ctx, const Expr *E) {
+ return getUnwidenedIntegerType(Ctx, E).hasValue();
+}
+
+/// Check if we can skip the overflow check for \p Op.
+static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) {
+ assert((isa<UnaryOperator>(Op.E) || isa<BinaryOperator>(Op.E)) &&
+ "Expected a unary or binary operator");
+
+ // If the binop has constant inputs and we can prove there is no overflow,
+ // we can elide the overflow check.
+ if (!Op.mayHaveIntegerOverflow())
+ return true;
+
+ // If a unary op has a widened operand, the op cannot overflow.
+ if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
+ return IsWidenedIntegerOp(Ctx, UO->getSubExpr());
+
+ // We usually don't need overflow checks for binops with widened operands.
+ // Multiplication with promoted unsigned operands is a special case.
+ const auto *BO = cast<BinaryOperator>(Op.E);
+ auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS());
+ if (!OptionalLHSTy)
+ return false;
+
+ auto OptionalRHSTy = getUnwidenedIntegerType(Ctx, BO->getRHS());
+ if (!OptionalRHSTy)
+ return false;
+
+ QualType LHSTy = *OptionalLHSTy;
+ QualType RHSTy = *OptionalRHSTy;
+
+ // This is the simple case: binops without unsigned multiplication, and with
+ // widened operands. No overflow check is needed here.
+ if ((Op.Opcode != BO_Mul && Op.Opcode != BO_MulAssign) ||
+ !LHSTy->isUnsignedIntegerType() || !RHSTy->isUnsignedIntegerType())
+ return true;
+
+ // For unsigned multiplication the overflow check can be elided if either one
+ // of the unpromoted types are less than half the size of the promoted type.
+ unsigned PromotedSize = Ctx.getTypeSize(Op.E->getType());
+ return (2 * Ctx.getTypeSize(LHSTy)) < PromotedSize ||
+ (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize;
+}
+
+/// Update the FastMathFlags of LLVM IR from the FPOptions in LangOptions.
+static void updateFastMathFlags(llvm::FastMathFlags &FMF,
+ FPOptions FPFeatures) {
+ FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement());
+}
+
+/// Propagate fast-math flags from \p Op to the instruction in \p V.
+static Value *propagateFMFlags(Value *V, const BinOpInfo &Op) {
+ if (auto *I = dyn_cast<llvm::Instruction>(V)) {
+ llvm::FastMathFlags FMF = I->getFastMathFlags();
+ updateFastMathFlags(FMF, Op.FPFeatures);
+ I->setFastMathFlags(FMF);
+ }
+ return V;
+}
+
class ScalarExprEmitter
: public StmtVisitor<ScalarExprEmitter, Value*> {
CodeGenFunction &CGF;
@@ -221,6 +373,15 @@ public:
Value *VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
return Visit(GE->getResultExpr());
}
+ Value *VisitCoawaitExpr(CoawaitExpr *S) {
+ return CGF.EmitCoawaitExpr(*S).getScalarVal();
+ }
+ Value *VisitCoyieldExpr(CoyieldExpr *S) {
+ return CGF.EmitCoyieldExpr(*S).getScalarVal();
+ }
+ Value *VisitUnaryCoawait(const UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
// Leaves.
Value *VisitIntegerLiteral(const IntegerLiteral *E) {
@@ -300,6 +461,24 @@ public:
return V;
}
+ Value *VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) {
+ VersionTuple Version = E->getVersion();
+
+ // If we're checking for a platform older than our minimum deployment
+ // target, we can fold the check away.
+ if (Version <= CGF.CGM.getTarget().getPlatformMinVersion())
+ return llvm::ConstantInt::get(Builder.getInt1Ty(), 1);
+
+ Optional<unsigned> Min = Version.getMinor(), SMin = Version.getSubminor();
+ llvm::Value *Args[] = {
+ llvm::ConstantInt::get(CGF.CGM.Int32Ty, Version.getMajor()),
+ llvm::ConstantInt::get(CGF.CGM.Int32Ty, Min ? *Min : 0),
+ llvm::ConstantInt::get(CGF.CGM.Int32Ty, SMin ? *SMin : 0),
+ };
+
+ return CGF.EmitBuiltinAvailable(Args);
+ }
+
Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E);
Value *VisitShuffleVectorExpr(ShuffleVectorExpr *E);
Value *VisitConvertVectorExpr(ConvertVectorExpr *E);
@@ -405,11 +584,7 @@ public:
return CGF.LoadCXXThis();
}
- Value *VisitExprWithCleanups(ExprWithCleanups *E) {
- CGF.enterFullExpression(E);
- CodeGenFunction::RunCleanupsScope Scope(CGF);
- return Visit(E->getSubExpr());
- }
+ Value *VisitExprWithCleanups(ExprWithCleanups *E);
Value *VisitCXXNewExpr(const CXXNewExpr *E) {
return CGF.EmitCXXNewExpr(E);
}
@@ -464,16 +639,21 @@ public:
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
// Fall through.
case LangOptions::SOB_Trapping:
+ if (CanElideOverflowCheck(CGF.getContext(), Ops))
+ return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
return EmitOverflowCheckedBinOp(Ops);
}
}
if (Ops.Ty->isUnsignedIntegerType() &&
- CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow))
+ CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+ !CanElideOverflowCheck(CGF.getContext(), Ops))
return EmitOverflowCheckedBinOp(Ops);
- if (Ops.LHS->getType()->isFPOrFPVectorTy())
- return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul");
+ if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
+ Value *V = Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul");
+ return propagateFMFlags(V, Ops);
+ }
return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul");
}
/// Create a binary op that checks for overflow.
@@ -1414,10 +1594,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
}
// Since target may map different address spaces in AST to the same address
// space, an address space conversion may end up as a bitcast.
- auto *Src = Visit(E);
- return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(CGF, Src,
- E->getType(),
- DestTy);
+ return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
+ CGF, Visit(E), E->getType()->getPointeeType().getAddressSpace(),
+ DestTy->getPointeeType().getAddressSpace(), ConvertType(DestTy));
}
case CK_AtomicToNonAtomic:
case CK_NonAtomicToAtomic:
@@ -1616,6 +1795,16 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) {
E->getExprLoc());
}
+Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
+ CGF.enterFullExpression(E);
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ Value *V = Visit(E->getSubExpr());
+ // Defend against dominance problems caused by jumps out of expression
+ // evaluation through the shared cleanup block.
+ Scope.ForceCleanup({&V});
+ return V;
+}
+
//===----------------------------------------------------------------------===//
// Unary Operators
//===----------------------------------------------------------------------===//
@@ -1627,7 +1816,7 @@ static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E,
BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false);
BinOp.Ty = E->getType();
BinOp.Opcode = IsInc ? BO_Add : BO_Sub;
- BinOp.FPContractable = false;
+ // FIXME: once UnaryOperator carries FPFeatures, copy it here.
BinOp.E = E;
return BinOp;
}
@@ -1645,6 +1834,8 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
return Builder.CreateNSWAdd(InVal, Amount, Name);
// Fall through.
case LangOptions::SOB_Trapping:
+ if (IsWidenedIntegerOp(CGF.getContext(), E->getSubExpr()))
+ return Builder.CreateNSWAdd(InVal, Amount, Name);
return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
}
llvm_unreachable("Unknown SignedOverflowBehaviorTy");
@@ -1660,6 +1851,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
llvm::Value *input;
int amount = (isInc ? 1 : -1);
+ bool isSubtraction = !isInc;
if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
type = atomicTy->getValueType();
@@ -1749,7 +1941,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
if (CGF.getLangOpts().isSignedOverflowDefined())
value = Builder.CreateGEP(value, numElts, "vla.inc");
else
- value = Builder.CreateInBoundsGEP(value, numElts, "vla.inc");
+ value = CGF.EmitCheckedInBoundsGEP(
+ value, numElts, /*SignedIndices=*/false, isSubtraction,
+ E->getExprLoc(), "vla.inc");
// Arithmetic on function pointers (!) is just +-1.
} else if (type->isFunctionType()) {
@@ -1759,7 +1953,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
if (CGF.getLangOpts().isSignedOverflowDefined())
value = Builder.CreateGEP(value, amt, "incdec.funcptr");
else
- value = Builder.CreateInBoundsGEP(value, amt, "incdec.funcptr");
+ value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+ isSubtraction, E->getExprLoc(),
+ "incdec.funcptr");
value = Builder.CreateBitCast(value, input->getType());
// For everything else, we can just do a simple increment.
@@ -1768,7 +1964,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
if (CGF.getLangOpts().isSignedOverflowDefined())
value = Builder.CreateGEP(value, amt, "incdec.ptr");
else
- value = Builder.CreateInBoundsGEP(value, amt, "incdec.ptr");
+ value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+ isSubtraction, E->getExprLoc(),
+ "incdec.ptr");
}
// Vector increment/decrement.
@@ -1849,7 +2047,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
if (CGF.getLangOpts().isSignedOverflowDefined())
value = Builder.CreateGEP(value, sizeValue, "incdec.objptr");
else
- value = Builder.CreateInBoundsGEP(value, sizeValue, "incdec.objptr");
+ value = CGF.EmitCheckedInBoundsGEP(value, sizeValue,
+ /*SignedIndices=*/false, isSubtraction,
+ E->getExprLoc(), "incdec.objptr");
value = Builder.CreateBitCast(value, input->getType());
}
@@ -1891,7 +2091,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType());
BinOp.Ty = E->getType();
BinOp.Opcode = BO_Sub;
- BinOp.FPContractable = false;
+ // FIXME: once UnaryOperator carries FPFeatures, copy it here.
BinOp.E = E;
return EmitSub(BinOp);
}
@@ -2112,7 +2312,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
Result.RHS = Visit(E->getRHS());
Result.Ty = E->getType();
Result.Opcode = E->getOpcode();
- Result.FPContractable = E->isFPContractable();
+ Result.FPFeatures = E->getFPFeatures();
Result.E = E;
return Result;
}
@@ -2132,7 +2332,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
OpInfo.RHS = Visit(E->getRHS());
OpInfo.Ty = E->getComputationResultType();
OpInfo.Opcode = E->getOpcode();
- OpInfo.FPContractable = E->isFPContractable();
+ OpInfo.FPFeatures = E->getFPFeatures();
OpInfo.E = E;
// Load/convert the LHS.
LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
@@ -2263,8 +2463,11 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
SanitizerKind::IntegerDivideByZero));
}
+ const auto *BO = cast<BinaryOperator>(Ops.E);
if (CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow) &&
- Ops.Ty->hasSignedIntegerRepresentation()) {
+ Ops.Ty->hasSignedIntegerRepresentation() &&
+ !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS()) &&
+ Ops.mayHaveIntegerOverflow()) {
llvm::IntegerType *Ty = cast<llvm::IntegerType>(Zero->getType());
llvm::Value *IntMin =
@@ -2287,11 +2490,13 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) ||
CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) &&
- Ops.Ty->isIntegerType()) {
+ Ops.Ty->isIntegerType() &&
+ (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) {
llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true);
} else if (CGF.SanOpts.has(SanitizerKind::FloatDivideByZero) &&
- Ops.Ty->isRealFloatingType()) {
+ Ops.Ty->isRealFloatingType() &&
+ Ops.mayHaveFloatDivisionByZero()) {
llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
llvm::Value *NonZero = Builder.CreateFCmpUNE(Ops.RHS, Zero);
EmitBinOpCheck(std::make_pair(NonZero, SanitizerKind::FloatDivideByZero),
@@ -2324,12 +2529,13 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) {
// Rem in C can't be a floating point type: C99 6.5.5p2.
- if (CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero)) {
+ if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) ||
+ CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) &&
+ Ops.Ty->isIntegerType() &&
+ (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
-
- if (Ops.Ty->isIntegerType())
- EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false);
+ EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false);
}
if (Ops.Ty->hasUnsignedIntegerRepresentation())
@@ -2369,6 +2575,7 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
if (isSigned)
OpID |= 1;
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
llvm::Type *opTy = CGF.CGM.getTypes().ConvertType(Ops.Ty);
llvm::Function *intrinsic = CGF.CGM.getIntrinsic(IID, opTy);
@@ -2384,7 +2591,6 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
// If the signed-integer-overflow sanitizer is enabled, emit a call to its
// runtime. Otherwise, this is a -ftrapv check, so just emit a trap.
if (!isSigned || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) {
- CodeGenFunction::SanitizerScope SanScope(&CGF);
llvm::Value *NotOverflow = Builder.CreateNot(overflow);
SanitizerMask Kind = isSigned ? SanitizerKind::SignedIntegerOverflow
: SanitizerKind::UnsignedIntegerOverflow;
@@ -2460,13 +2666,14 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
std::swap(pointerOperand, indexOperand);
}
+ bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
+
unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth();
auto &DL = CGF.CGM.getDataLayout();
auto PtrTy = cast<llvm::PointerType>(pointer->getType());
if (width != DL.getTypeSizeInBits(PtrTy)) {
// Zero-extend or sign-extend the pointer value according to
// whether the index is signed or not.
- bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
index = CGF.Builder.CreateIntCast(index, DL.getIntPtrType(PtrTy), isSigned,
"idx.ext");
}
@@ -2510,7 +2717,9 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
pointer = CGF.Builder.CreateGEP(pointer, index, "add.ptr");
} else {
index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index");
- pointer = CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr");
+ pointer =
+ CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
+ op.E->getExprLoc(), "add.ptr");
}
return pointer;
}
@@ -2527,7 +2736,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
if (CGF.getLangOpts().isSignedOverflowDefined())
return CGF.Builder.CreateGEP(pointer, index, "add.ptr");
- return CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr");
+ return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
+ op.E->getExprLoc(), "add.ptr");
}
// Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and
@@ -2577,12 +2787,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
"Only fadd/fsub can be the root of an fmuladd.");
// Check whether this op is marked as fusable.
- if (!op.FPContractable)
- return nullptr;
-
- // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is
- // either disabled, or handled entirely by the LLVM backend).
- if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On)
+ if (!op.FPFeatures.allowFPContractWithinStatement())
return nullptr;
// We have a potentially fusable op. Look for a mul on one of the operands.
@@ -2605,7 +2810,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
if (op.LHS->getType()->isPointerTy() ||
op.RHS->getType()->isPointerTy())
- return emitPointerArithmetic(CGF, op, /*subtraction*/ false);
+ return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction);
if (op.Ty->isSignedIntegerOrEnumerationType()) {
switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
@@ -2616,12 +2821,15 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
// Fall through.
case LangOptions::SOB_Trapping:
+ if (CanElideOverflowCheck(CGF.getContext(), op))
+ return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
return EmitOverflowCheckedBinOp(op);
}
}
if (op.Ty->isUnsignedIntegerType() &&
- CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow))
+ CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+ !CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);
if (op.LHS->getType()->isFPOrFPVectorTy()) {
@@ -2629,7 +2837,8 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
return FMulAdd;
- return Builder.CreateFAdd(op.LHS, op.RHS, "add");
+ Value *V = Builder.CreateFAdd(op.LHS, op.RHS, "add");
+ return propagateFMFlags(V, op);
}
return Builder.CreateAdd(op.LHS, op.RHS, "add");
@@ -2647,19 +2856,23 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
// Fall through.
case LangOptions::SOB_Trapping:
+ if (CanElideOverflowCheck(CGF.getContext(), op))
+ return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
return EmitOverflowCheckedBinOp(op);
}
}
if (op.Ty->isUnsignedIntegerType() &&
- CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow))
+ CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+ !CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);
if (op.LHS->getType()->isFPOrFPVectorTy()) {
// Try to form an fmuladd.
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
return FMulAdd;
- return Builder.CreateFSub(op.LHS, op.RHS, "sub");
+ Value *V = Builder.CreateFSub(op.LHS, op.RHS, "sub");
+ return propagateFMFlags(V, op);
}
return Builder.CreateSub(op.LHS, op.RHS, "sub");
@@ -2668,7 +2881,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
// If the RHS is not a pointer, then we have normal pointer
// arithmetic.
if (!op.RHS->getType()->isPointerTy())
- return emitPointerArithmetic(CGF, op, /*subtraction*/ true);
+ return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction);
// Otherwise, this is a pointer subtraction.
@@ -2751,8 +2964,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
isa<llvm::IntegerType>(Ops.LHS->getType())) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
SmallVector<std::pair<Value *, SanitizerMask>, 2> Checks;
- llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS);
- llvm::Value *ValidExponent = Builder.CreateICmpULE(RHS, WidthMinusOne);
+ llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, Ops.RHS);
+ llvm::Value *ValidExponent = Builder.CreateICmpULE(Ops.RHS, WidthMinusOne);
if (SanitizeExponent) {
Checks.push_back(
@@ -2767,12 +2980,14 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");
llvm::BasicBlock *CheckShiftBase = CGF.createBasicBlock("check");
Builder.CreateCondBr(ValidExponent, CheckShiftBase, Cont);
+ llvm::Value *PromotedWidthMinusOne =
+ (RHS == Ops.RHS) ? WidthMinusOne
+ : GetWidthMinusOneValue(Ops.LHS, RHS);
CGF.EmitBlock(CheckShiftBase);
- llvm::Value *BitsShiftedOff =
- Builder.CreateLShr(Ops.LHS,
- Builder.CreateSub(WidthMinusOne, RHS, "shl.zeros",
- /*NUW*/true, /*NSW*/true),
- "shl.check");
+ llvm::Value *BitsShiftedOff = Builder.CreateLShr(
+ Ops.LHS, Builder.CreateSub(PromotedWidthMinusOne, RHS, "shl.zeros",
+ /*NUW*/ true, /*NSW*/ true),
+ "shl.check");
if (CGF.getLangOpts().CPlusPlus) {
// In C99, we are not permitted to shift a 1 bit into the sign bit.
// Under C++11's rules, shifting a 1 bit into the sign bit is
@@ -3038,10 +3253,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
// because the result is altered by the store, i.e., [C99 6.5.16p1]
// 'An assignment expression has the value of the left operand after
// the assignment...'.
- if (LHS.isBitField())
+ if (LHS.isBitField()) {
CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS);
- else
+ } else {
+ CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc());
CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS);
+ }
}
// If the result is clearly ignored, return now.
@@ -3327,9 +3544,11 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// safe to evaluate the LHS and RHS unconditionally.
if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
- CGF.incrementProfileCounter(E);
-
llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
+ llvm::Value *StepV = Builder.CreateZExtOrBitCast(CondV, CGF.Int64Ty);
+
+ CGF.incrementProfileCounter(E, StepV);
+
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
if (!LHS) {
@@ -3491,8 +3710,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// vector to get a vec4, then a bitcast if the target type is different.
if (NumElementsSrc == 3 && NumElementsDst != 3) {
Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- DstTy);
+
+ if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ DstTy);
+ }
+
Src->setName("astype");
return Src;
}
@@ -3501,9 +3724,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// to vec4 if the original type is not vec4, then a shuffle vector to
// get a vec3.
if (NumElementsSrc != 3 && NumElementsDst == 3) {
- auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- Vec4Ty);
+ if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+ auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ Vec4Ty);
+ }
+
Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
Src->setName("astype");
return Src;
@@ -3626,3 +3852,136 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue(
llvm_unreachable("Unhandled compound assignment operator");
}
+
+Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
+ ArrayRef<Value *> IdxList,
+ bool SignedIndices,
+ bool IsSubtraction,
+ SourceLocation Loc,
+ const Twine &Name) {
+ Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name);
+
+ // If the pointer overflow sanitizer isn't enabled, do nothing.
+ if (!SanOpts.has(SanitizerKind::PointerOverflow))
+ return GEPVal;
+
+ // If the GEP has already been reduced to a constant, leave it be.
+ if (isa<llvm::Constant>(GEPVal))
+ return GEPVal;
+
+ // Only check for overflows in the default address space.
+ if (GEPVal->getType()->getPointerAddressSpace())
+ return GEPVal;
+
+ auto *GEP = cast<llvm::GEPOperator>(GEPVal);
+ assert(GEP->isInBounds() && "Expected inbounds GEP");
+
+ SanitizerScope SanScope(this);
+ auto &VMContext = getLLVMContext();
+ const auto &DL = CGM.getDataLayout();
+ auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType());
+
+ // Grab references to the signed add/mul overflow intrinsics for intptr_t.
+ auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy);
+ auto *SAddIntrinsic =
+ CGM.getIntrinsic(llvm::Intrinsic::sadd_with_overflow, IntPtrTy);
+ auto *SMulIntrinsic =
+ CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy);
+
+ // The total (signed) byte offset for the GEP.
+ llvm::Value *TotalOffset = nullptr;
+ // The offset overflow flag - true if the total offset overflows.
+ llvm::Value *OffsetOverflows = Builder.getFalse();
+
+ /// Return the result of the given binary operation.
+ auto eval = [&](BinaryOperator::Opcode Opcode, llvm::Value *LHS,
+ llvm::Value *RHS) -> llvm::Value * {
+ assert((Opcode == BO_Add || Opcode == BO_Mul) && "Can't eval binop");
+
+ // If the operands are constants, return a constant result.
+ if (auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS)) {
+ if (auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS)) {
+ llvm::APInt N;
+ bool HasOverflow = mayHaveIntegerOverflow(LHSCI, RHSCI, Opcode,
+ /*Signed=*/true, N);
+ if (HasOverflow)
+ OffsetOverflows = Builder.getTrue();
+ return llvm::ConstantInt::get(VMContext, N);
+ }
+ }
+
+ // Otherwise, compute the result with checked arithmetic.
+ auto *ResultAndOverflow = Builder.CreateCall(
+ (Opcode == BO_Add) ? SAddIntrinsic : SMulIntrinsic, {LHS, RHS});
+ OffsetOverflows = Builder.CreateOr(
+ Builder.CreateExtractValue(ResultAndOverflow, 1), OffsetOverflows);
+ return Builder.CreateExtractValue(ResultAndOverflow, 0);
+ };
+
+ // Determine the total byte offset by looking at each GEP operand.
+ for (auto GTI = llvm::gep_type_begin(GEP), GTE = llvm::gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ llvm::Value *LocalOffset;
+ auto *Index = GTI.getOperand();
+ // Compute the local offset contributed by this indexing step:
+ if (auto *STy = GTI.getStructTypeOrNull()) {
+ // For struct indexing, the local offset is the byte position of the
+ // specified field.
+ unsigned FieldNo = cast<llvm::ConstantInt>(Index)->getZExtValue();
+ LocalOffset = llvm::ConstantInt::get(
+ IntPtrTy, DL.getStructLayout(STy)->getElementOffset(FieldNo));
+ } else {
+ // Otherwise this is array-like indexing. The local offset is the index
+ // multiplied by the element size.
+ auto *ElementSize = llvm::ConstantInt::get(
+ IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType()));
+ auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true);
+ LocalOffset = eval(BO_Mul, ElementSize, IndexS);
+ }
+
+ // If this is the first offset, set it as the total offset. Otherwise, add
+ // the local offset into the running total.
+ if (!TotalOffset || TotalOffset == Zero)
+ TotalOffset = LocalOffset;
+ else
+ TotalOffset = eval(BO_Add, TotalOffset, LocalOffset);
+ }
+
+ // Common case: if the total offset is zero, don't emit a check.
+ if (TotalOffset == Zero)
+ return GEPVal;
+
+ // Now that we've computed the total offset, add it to the base pointer (with
+ // wrapping semantics).
+ auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy);
+ auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset);
+
+ // The GEP is valid if:
+ // 1) The total offset doesn't overflow, and
+ // 2) The sign of the difference between the computed address and the base
+ // pointer matches the sign of the total offset.
+ llvm::Value *ValidGEP;
+ auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows);
+ if (SignedIndices) {
+ auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+ auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero);
+ llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
+ ValidGEP = Builder.CreateAnd(
+ Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid),
+ NoOffsetOverflow);
+ } else if (!SignedIndices && !IsSubtraction) {
+ auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+ ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow);
+ } else {
+ auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr);
+ ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow);
+ }
+
+ llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)};
+ // Pass the computed GEP to the runtime to avoid emitting poisoned arguments.
+ llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP};
+ EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow),
+ SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs);
+
+ return GEPVal;
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGGPUBuiltin.cpp
index 44dd003..48156b1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -1,4 +1,4 @@
-//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===//
+//===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Generates code for built-in CUDA calls which are not runtime-specific.
-// (Runtime-specific codegen lives in CGCUDARuntime.)
+// Generates code for built-in GPU calls which are not runtime-specific.
+// (Runtime-specific codegen lives in programming model specific files.)
//
//===----------------------------------------------------------------------===//
@@ -67,10 +67,9 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
// Note that by the time this function runs, E's args have already undergone the
// standard C vararg promotion (short -> int, float -> double, etc.).
RValue
-CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue) {
- assert(getLangOpts().CUDA);
- assert(getLangOpts().CUDAIsDevice);
+CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue) {
+ assert(getTarget().getTriple().isNVPTX());
assert(E->getBuiltinCallee() == Builtin::BIprintf);
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
index 932b8a1..90fcad26 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
@@ -1,4 +1,4 @@
-//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
+//===---- CGObjC.cpp - Emit LLVM Code for Objective-C ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -117,10 +117,24 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
const ObjCArrayLiteral *ALE = dyn_cast<ObjCArrayLiteral>(E);
if (!ALE)
DLE = cast<ObjCDictionaryLiteral>(E);
-
- // Compute the type of the array we're initializing.
+
+ // Optimize empty collections by referencing constants, when available.
uint64_t NumElements =
ALE ? ALE->getNumElements() : DLE->getNumElements();
+ if (NumElements == 0 && CGM.getLangOpts().ObjCRuntime.hasEmptyCollections()) {
+ StringRef ConstantName = ALE ? "__NSArray0__" : "__NSDictionary0__";
+ QualType IdTy(CGM.getContext().getObjCIdType());
+ llvm::Constant *Constant =
+ CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName);
+ LValue LV = MakeNaturalAlignAddrLValue(Constant, IdTy);
+ llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getLocStart());
+ cast<llvm::LoadInst>(Ptr)->setMetadata(
+ CGM.getModule().getMDKindID("invariant.load"),
+ llvm::MDNode::get(getLLVMContext(), None));
+ return Builder.CreateBitCast(Ptr, ConvertType(E->getType()));
+ }
+
+ // Compute the type of the array we're initializing.
llvm::APInt APNumElements(Context.getTypeSize(Context.getSizeType()),
NumElements);
QualType ElementType = Context.getObjCIdType().withConst();
@@ -148,7 +162,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
const Expr *Rhs = ALE->getElement(i);
LValue LV = MakeAddrLValue(
Builder.CreateConstArrayGEP(Objects, i, getPointerSize()),
- ElementType, AlignmentSource::Decl);
+ ElementType, LValueBaseInfo(AlignmentSource::Decl, false));
llvm::Value *value = EmitScalarExpr(Rhs);
EmitStoreThroughLValue(RValue::get(value), LV, true);
@@ -160,7 +174,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
const Expr *Key = DLE->getKeyValueElement(i).Key;
LValue KeyLV = MakeAddrLValue(
Builder.CreateConstArrayGEP(Keys, i, getPointerSize()),
- ElementType, AlignmentSource::Decl);
+ ElementType, LValueBaseInfo(AlignmentSource::Decl, false));
llvm::Value *keyValue = EmitScalarExpr(Key);
EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true);
@@ -168,7 +182,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
const Expr *Value = DLE->getKeyValueElement(i).Value;
LValue ValueLV = MakeAddrLValue(
Builder.CreateConstArrayGEP(Objects, i, getPointerSize()),
- ElementType, AlignmentSource::Decl);
+ ElementType, LValueBaseInfo(AlignmentSource::Decl, false));
llvm::Value *valueValue = EmitScalarExpr(Value);
EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true);
if (TrackNeededObjects) {
@@ -427,7 +441,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
QualType ResultType = method ? method->getReturnType() : E->getType();
CallArgList Args;
- EmitCallArgs(Args, method, E->arguments());
+ EmitCallArgs(Args, method, E->arguments(), /*AC*/AbstractCallee(method));
// For delegate init calls in ARC, do an unsafe store of null into
// self. This represents the call taking direct ownership of that
@@ -1316,7 +1330,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
BinaryOperator assign(&ivarRef, finalArg, BO_Assign,
ivarRef.getType(), VK_RValue, OK_Ordinary,
- SourceLocation(), false);
+ SourceLocation(), FPOptions());
EmitStmt(&assign);
}
@@ -1469,6 +1483,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
if (DI)
DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
+ RunCleanupsScope ForScope(*this);
+
// The local variable comes into scope immediately.
AutoVarEmission variable = AutoVarEmission::invalid();
if (const DeclStmt *SD = dyn_cast<DeclStmt>(S.getElement()))
@@ -1499,8 +1515,6 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
ArrayType::Normal, 0);
Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
- RunCleanupsScope ForScope(*this);
-
// Emit the collection pointer. In ARC, we do a retain.
llvm::Value *Collection;
if (getLangOpts().ObjCAutoRefCount) {
@@ -1802,26 +1816,45 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
}
+static bool IsForwarding(StringRef Name) {
+ return llvm::StringSwitch<bool>(Name)
+ .Cases("objc_autoreleaseReturnValue", // ARCInstKind::AutoreleaseRV
+ "objc_autorelease", // ARCInstKind::Autorelease
+ "objc_retainAutoreleaseReturnValue", // ARCInstKind::FusedRetainAutoreleaseRV
+ "objc_retainAutoreleasedReturnValue", // ARCInstKind::RetainRV
+ "objc_retainAutorelease", // ARCInstKind::FusedRetainAutorelease
+ "objc_retainedObject", // ARCInstKind::NoopCast
+ "objc_retain", // ARCInstKind::Retain
+ "objc_unretainedObject", // ARCInstKind::NoopCast
+ "objc_unretainedPointer", // ARCInstKind::NoopCast
+ "objc_unsafeClaimAutoreleasedReturnValue", // ARCInstKind::ClaimRV
+ true)
+ .Default(false);
+}
+
static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
- llvm::FunctionType *type,
- StringRef fnName) {
- llvm::Constant *fn = CGM.CreateRuntimeFunction(type, fnName);
+ llvm::FunctionType *FTy,
+ StringRef Name) {
+ llvm::Constant *RTF = CGM.CreateRuntimeFunction(FTy, Name);
- if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) {
+ if (auto *F = dyn_cast<llvm::Function>(RTF)) {
// If the target runtime doesn't naturally support ARC, emit weak
// references to the runtime support library. We don't really
// permit this to fail, but we need a particular relocation style.
if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC() &&
!CGM.getTriple().isOSBinFormatCOFF()) {
- f->setLinkage(llvm::Function::ExternalWeakLinkage);
- } else if (fnName == "objc_retain" || fnName == "objc_release") {
+ F->setLinkage(llvm::Function::ExternalWeakLinkage);
+ } else if (Name == "objc_retain" || Name == "objc_release") {
// If we have Native ARC, set nonlazybind attribute for these APIs for
// performance.
- f->addFnAttr(llvm::Attribute::NonLazyBind);
+ F->addFnAttr(llvm::Attribute::NonLazyBind);
}
+
+ if (IsForwarding(Name))
+ F->arg_begin()->addAttr(llvm::Attribute::Returned);
}
- return fn;
+ return RTF;
}
/// Perform an operation having the signature
@@ -1832,7 +1865,8 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
llvm::Constant *&fn,
StringRef fnName,
bool isTailCall = false) {
- if (isa<llvm::ConstantPointerNull>(value)) return value;
+ if (isa<llvm::ConstantPointerNull>(value))
+ return value;
if (!fn) {
llvm::FunctionType *fnType =
@@ -2381,6 +2415,12 @@ void CodeGenFunction::destroyARCWeak(CodeGenFunction &CGF,
CGF.EmitARCDestroyWeak(addr);
}
+void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr,
+ QualType type) {
+ llvm::Value *value = CGF.Builder.CreateLoad(addr);
+ CGF.EmitARCIntrinsicUse(value);
+}
+
namespace {
struct CallObjCAutoreleasePoolObject final : EHScopeStack::Cleanup {
llvm::Value *Token;
@@ -3206,10 +3246,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
SrcTy = C.getPointerType(SrcTy);
FunctionArgList args;
- ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
- args.push_back(&dstDecl);
- ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
- args.push_back(&srcDecl);
+ ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+ DestTy, ImplicitParamDecl::Other);
+ args.push_back(&DstDecl);
+ ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+ SrcTy, ImplicitParamDecl::Other);
+ args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -3225,12 +3267,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
StartFunction(FD, C.VoidTy, Fn, FI, args);
- DeclRefExpr DstExpr(&dstDecl, false, DestTy,
+ DeclRefExpr DstExpr(&DstDecl, false, DestTy,
VK_RValue, SourceLocation());
UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation());
- DeclRefExpr SrcExpr(&srcDecl, false, SrcTy,
+ DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
VK_RValue, SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation());
@@ -3239,7 +3281,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(),
Args, DestTy->getPointeeType(),
- VK_LValue, SourceLocation(), false);
+ VK_LValue, SourceLocation(), FPOptions());
EmitStmt(&TheCall);
@@ -3287,10 +3329,12 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
SrcTy = C.getPointerType(SrcTy);
FunctionArgList args;
- ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
- args.push_back(&dstDecl);
- ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
- args.push_back(&srcDecl);
+ ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+ DestTy, ImplicitParamDecl::Other);
+ args.push_back(&DstDecl);
+ ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
+ SrcTy, ImplicitParamDecl::Other);
+ args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
@@ -3305,7 +3349,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
StartFunction(FD, C.VoidTy, Fn, FI, args);
- DeclRefExpr SrcExpr(&srcDecl, false, SrcTy,
+ DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
VK_RValue, SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
@@ -3331,7 +3375,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
CXXConstExpr->getConstructionKind(),
SourceRange());
- DeclRefExpr DstExpr(&dstDecl, false, DestTy,
+ DeclRefExpr DstExpr(&DstDecl, false, DestTy,
VK_RValue, SourceLocation());
RValue DV = EmitAnyExpr(&DstExpr);
@@ -3375,5 +3419,54 @@ CodeGenFunction::EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty) {
return Val;
}
+llvm::Value *
+CodeGenFunction::EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args) {
+ assert(Args.size() == 3 && "Expected 3 argument here!");
+
+ if (!CGM.IsOSVersionAtLeastFn) {
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(Int32Ty, {Int32Ty, Int32Ty, Int32Ty}, false);
+ CGM.IsOSVersionAtLeastFn =
+ CGM.CreateRuntimeFunction(FTy, "__isOSVersionAtLeast");
+ }
+
+ llvm::Value *CallRes =
+ EmitNounwindRuntimeCall(CGM.IsOSVersionAtLeastFn, Args);
+
+ return Builder.CreateICmpNE(CallRes, llvm::Constant::getNullValue(Int32Ty));
+}
+
+void CodeGenModule::emitAtAvailableLinkGuard() {
+ if (!IsOSVersionAtLeastFn)
+ return;
+ // @available requires CoreFoundation only on Darwin.
+ if (!Target.getTriple().isOSDarwin())
+ return;
+ // Add -framework CoreFoundation to the linker commands. We still want to
+ // emit the core foundation reference down below because otherwise if
+ // CoreFoundation is not used in the code, the linker won't link the
+ // framework.
+ auto &Context = getLLVMContext();
+ llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"),
+ llvm::MDString::get(Context, "CoreFoundation")};
+ LinkerOptionsMetadata.push_back(llvm::MDNode::get(Context, Args));
+ // Emit a reference to a symbol from CoreFoundation to ensure that
+ // CoreFoundation is linked into the final binary.
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false);
+ llvm::Constant *CFFunc =
+ CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber");
+
+ llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false);
+ llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction(
+ CheckFTy, "__clang_at_available_requires_core_foundation_framework"));
+ CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+ CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ CodeGenFunction CGF(*this);
+ CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc));
+ CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy));
+ CGF.Builder.CreateUnreachable();
+ addCompilerUsedGlobal(CFLinkCheckFunc);
+}
CGObjCRuntime::~CGObjCRuntime() {}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
index fa2b3d8..c8b8be7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -18,7 +18,7 @@
#include "CGCleanup.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
@@ -34,7 +34,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Compiler.h"
-#include <cstdarg>
using namespace clang;
using namespace CodeGen;
@@ -58,18 +57,19 @@ public:
/// Initialises the lazy function with the name, return type, and the types
/// of the arguments.
- LLVM_END_WITH_NULL
- void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, ...) {
+ template <typename... Tys>
+ void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy,
+ Tys *... Types) {
CGM = Mod;
FunctionName = name;
Function = nullptr;
- std::vector<llvm::Type *> ArgTys;
- va_list Args;
- va_start(Args, RetTy);
- while (llvm::Type *ArgTy = va_arg(Args, llvm::Type *))
- ArgTys.push_back(ArgTy);
- va_end(Args);
- FTy = llvm::FunctionType::get(RetTy, ArgTys, false);
+ if(sizeof...(Tys)) {
+ SmallVector<llvm::Type *, 8> ArgTys({Types...});
+ FTy = llvm::FunctionType::get(RetTy, ArgTys, false);
+ }
+ else {
+ FTy = llvm::FunctionType::get(RetTy, None, false);
+ }
}
llvm::FunctionType *getType() { return FTy; }
@@ -603,11 +603,10 @@ protected:
public:
CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
// IMP objc_msg_lookup(id, SEL);
- MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
- nullptr);
+ MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy);
// IMP objc_msg_lookup_super(struct objc_super*, SEL);
MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
- PtrToObjCSuperTy, SelectorTy, nullptr);
+ PtrToObjCSuperTy, SelectorTy);
}
};
@@ -663,7 +662,7 @@ class CGObjCGNUstep : public CGObjCGNU {
}
// The lookup function is guaranteed not to capture the receiver pointer.
- LookupFn->setDoesNotCapture(1);
+ LookupFn->addParamAttr(0, llvm::Attribute::NoCapture);
llvm::Value *args[] = {
EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy),
@@ -702,52 +701,51 @@ class CGObjCGNUstep : public CGObjCGNU {
CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
- llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy,
- PtrTy, PtrTy, IntTy, IMPTy, nullptr);
+ llvm::StructType *SlotStructTy =
+ llvm::StructType::get(PtrTy, PtrTy, PtrTy, IntTy, IMPTy);
SlotTy = llvm::PointerType::getUnqual(SlotStructTy);
// Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender);
SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy,
- SelectorTy, IdTy, nullptr);
+ SelectorTy, IdTy);
// Slot_t objc_msg_lookup_super(struct objc_super*, SEL);
SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
- PtrToObjCSuperTy, SelectorTy, nullptr);
+ PtrToObjCSuperTy, SelectorTy);
// If we're in ObjC++ mode, then we want to make
if (CGM.getLangOpts().CPlusPlus) {
llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
// void *__cxa_begin_catch(void *e)
- EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, nullptr);
+ EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy);
// void __cxa_end_catch(void)
- ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, nullptr);
+ ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy);
// void _Unwind_Resume_or_Rethrow(void*)
ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy,
- PtrTy, nullptr);
+ PtrTy);
} else if (R.getVersion() >= VersionTuple(1, 7)) {
llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
// id objc_begin_catch(void *e)
- EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, nullptr);
+ EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy);
// void objc_end_catch(void)
- ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, nullptr);
+ ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy);
// void _Unwind_Resume_or_Rethrow(void*)
- ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy,
- PtrTy, nullptr);
+ ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, PtrTy);
}
llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy,
- SelectorTy, IdTy, PtrDiffTy, nullptr);
+ SelectorTy, IdTy, PtrDiffTy);
SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy,
- IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
+ IdTy, SelectorTy, IdTy, PtrDiffTy);
SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy,
- IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
+ IdTy, SelectorTy, IdTy, PtrDiffTy);
SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy",
- VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
+ VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy);
// void objc_setCppObjectAtomic(void *dest, const void *src, void
// *helper);
CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy,
- PtrTy, PtrTy, nullptr);
+ PtrTy, PtrTy);
// void objc_getCppObjectAtomic(void *dest, const void *src, void
// *helper);
CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
- PtrTy, PtrTy, nullptr);
+ PtrTy, PtrTy);
}
llvm::Constant *GetCppAtomicObjectGetFunction() override {
@@ -849,14 +847,14 @@ protected:
public:
CGObjCObjFW(CodeGenModule &Mod): CGObjCGNU(Mod, 9, 3) {
// IMP objc_msg_lookup(id, SEL);
- MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, nullptr);
+ MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy);
MsgLookupFnSRet.init(&CGM, "objc_msg_lookup_stret", IMPTy, IdTy,
- SelectorTy, nullptr);
+ SelectorTy);
// IMP objc_msg_lookup_super(struct objc_super*, SEL);
MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
- PtrToObjCSuperTy, SelectorTy, nullptr);
+ PtrToObjCSuperTy, SelectorTy);
MsgLookupSuperFnSRet.init(&CGM, "objc_msg_lookup_super_stret", IMPTy,
- PtrToObjCSuperTy, SelectorTy, nullptr);
+ PtrToObjCSuperTy, SelectorTy);
}
};
} // end anonymous namespace
@@ -945,35 +943,34 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
}
PtrToIdTy = llvm::PointerType::getUnqual(IdTy);
- ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, nullptr);
+ ObjCSuperTy = llvm::StructType::get(IdTy, IdTy);
PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy);
llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
// void objc_exception_throw(id);
- ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
- ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
+ ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy);
+ ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy);
// int objc_sync_enter(id);
- SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, nullptr);
+ SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy);
// int objc_sync_exit(id);
- SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, nullptr);
+ SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy);
// void objc_enumerationMutation (id)
- EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy,
- IdTy, nullptr);
+ EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy, IdTy);
// id objc_getProperty(id, SEL, ptrdiff_t, BOOL)
GetPropertyFn.init(&CGM, "objc_getProperty", IdTy, IdTy, SelectorTy,
- PtrDiffTy, BoolTy, nullptr);
+ PtrDiffTy, BoolTy);
// void objc_setProperty(id, SEL, ptrdiff_t, id, BOOL, BOOL)
SetPropertyFn.init(&CGM, "objc_setProperty", VoidTy, IdTy, SelectorTy,
- PtrDiffTy, IdTy, BoolTy, BoolTy, nullptr);
+ PtrDiffTy, IdTy, BoolTy, BoolTy);
// void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
- GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy,
- PtrDiffTy, BoolTy, BoolTy, nullptr);
+ GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy,
+ PtrDiffTy, BoolTy, BoolTy);
// void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
- SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy,
- PtrDiffTy, BoolTy, BoolTy, nullptr);
+ SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy,
+ PtrDiffTy, BoolTy, BoolTy);
// IMP type
llvm::Type *IMPArgs[] = { IdTy, SelectorTy };
@@ -997,21 +994,19 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
// Get functions needed in GC mode
// id objc_assign_ivar(id, id, ptrdiff_t);
- IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy,
- nullptr);
+ IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy);
// id objc_assign_strongCast (id, id*)
StrongCastAssignFn.init(&CGM, "objc_assign_strongCast", IdTy, IdTy,
- PtrToIdTy, nullptr);
+ PtrToIdTy);
// id objc_assign_global(id, id*);
- GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy,
- nullptr);
+ GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy);
// id objc_assign_weak(id, id*);
- WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, nullptr);
+ WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy);
// id objc_read_weak(id*);
- WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, nullptr);
+ WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy);
// void *objc_memmove_collectable(void*, void *, size_t);
MemMoveFn.init(&CGM, "objc_memmove_collectable", PtrTy, PtrTy, PtrTy,
- SizeTy, nullptr);
+ SizeTy);
}
}
@@ -1317,7 +1312,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
}
}
// Cast the pointer to a simplified version of the class structure
- llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy, nullptr);
+ llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy);
ReceiverClass = Builder.CreateBitCast(ReceiverClass,
llvm::PointerType::getUnqual(CastTy));
// Get the superclass pointer
@@ -1326,8 +1321,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
ReceiverClass =
Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign());
// Construct the structure used to look up the IMP
- llvm::StructType *ObjCSuperTy = llvm::StructType::get(
- Receiver->getType(), IdTy, nullptr);
+ llvm::StructType *ObjCSuperTy =
+ llvm::StructType::get(Receiver->getType(), IdTy);
// FIXME: Is this really supposed to be a dynamic alloca?
Address ObjCSuper = Address(Builder.CreateAlloca(ObjCSuperTy),
@@ -1565,11 +1560,8 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
IvarList.addInt(IntTy, (int)IvarNames.size());
// Get the ivar structure type.
- llvm::StructType *ObjCIvarTy = llvm::StructType::get(
- PtrToInt8Ty,
- PtrToInt8Ty,
- IntTy,
- nullptr);
+ llvm::StructType *ObjCIvarTy =
+ llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, IntTy);
// Array of ivar structures.
auto Ivars = IvarList.beginArray(ObjCIvarTy);
@@ -1611,7 +1603,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
// anyway; the classes will still work with the GNU runtime, they will just
// be ignored.
llvm::StructType *ClassTy = llvm::StructType::get(
- PtrToInt8Ty, // isa
+ PtrToInt8Ty, // isa
PtrToInt8Ty, // super_class
PtrToInt8Ty, // name
LongTy, // version
@@ -1620,18 +1612,18 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
IVars->getType(), // ivars
Methods->getType(), // methods
// These are all filled in by the runtime, so we pretend
- PtrTy, // dtable
- PtrTy, // subclass_list
- PtrTy, // sibling_class
- PtrTy, // protocols
- PtrTy, // gc_object_type
+ PtrTy, // dtable
+ PtrTy, // subclass_list
+ PtrTy, // sibling_class
+ PtrTy, // protocols
+ PtrTy, // gc_object_type
// New ABI:
LongTy, // abi_version
IvarOffsets->getType(), // ivar_offsets
Properties->getType(), // properties
IntPtrTy, // strong_pointers
- IntPtrTy, // weak_pointers
- nullptr);
+ IntPtrTy // weak_pointers
+ );
ConstantInitBuilder Builder(CGM);
auto Elements = Builder.beginStruct(ClassTy);
@@ -2207,7 +2199,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
IvarNames.push_back(MakeConstantString(IVD->getNameAsString()));
// Get the type encoding for this ivar
std::string TypeStr;
- Context.getObjCEncodingForType(IVD->getType(), TypeStr);
+ Context.getObjCEncodingForType(IVD->getType(), TypeStr, IVD);
IvarTypes.push_back(MakeConstantString(TypeStr));
// Get the offset
uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
index 7219592..98435fe 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
@@ -17,7 +17,7 @@
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
@@ -64,13 +64,11 @@ private:
// Add the non-lazy-bind attribute, since objc_msgSend is likely to
// be called a lot.
llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
- return
- CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy,
- params, true),
- "objc_msgSend",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NonLazyBind));
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(ObjectPtrTy, params, true), "objc_msgSend",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NonLazyBind));
}
/// void objc_msgSend_stret (id, SEL, ...)
@@ -107,8 +105,8 @@ private:
llvm::Constant *getMessageSendFp2retFn() const {
llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
llvm::Type *longDoubleType = llvm::Type::getX86_FP80Ty(VMContext);
- llvm::Type *resultType =
- llvm::StructType::get(longDoubleType, longDoubleType, nullptr);
+ llvm::Type *resultType =
+ llvm::StructType::get(longDoubleType, longDoubleType);
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(resultType,
params, true),
@@ -310,7 +308,7 @@ public:
SmallVector<CanQualType,5> Params;
Params.push_back(Ctx.VoidPtrTy);
Params.push_back(Ctx.VoidPtrTy);
- Params.push_back(Ctx.LongTy);
+ Params.push_back(Ctx.getSizeType());
Params.push_back(Ctx.BoolTy);
Params.push_back(Ctx.BoolTy);
llvm::FunctionType *FTy =
@@ -589,13 +587,11 @@ public:
llvm::Constant *getSetJmpFn() {
// This is specifically the prototype for x86.
llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() };
- return
- CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty,
- params, false),
- "_setjmp",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NonLazyBind));
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NonLazyBind));
}
public:
@@ -890,7 +886,7 @@ protected:
/// Cached reference to the class for constant strings. This value has type
/// int * but is actually an Obj-C class pointer.
- llvm::WeakVH ConstantStringClassRef;
+ llvm::WeakTrackingVH ConstantStringClassRef;
/// \brief The LLVM type corresponding to NSConstantString.
llvm::StructType *NSConstantStringType = nullptr;
@@ -1008,6 +1004,8 @@ protected:
const ObjCInterfaceDecl *ID,
ObjCCommonTypesHelper &ObjCTypes);
+ std::string GetSectionName(StringRef Section, StringRef MachOAttributes);
+
public:
/// CreateMetadataVar - Create a global variable with internal
/// linkage for use by the Objective-C runtime.
@@ -1680,7 +1678,10 @@ struct NullReturnState {
/// Complete the null-return operation. It is valid to call this
/// regardless of whether 'init' has been called.
- RValue complete(CodeGenFunction &CGF, RValue result, QualType resultType,
+ RValue complete(CodeGenFunction &CGF,
+ ReturnValueSlot returnSlot,
+ RValue result,
+ QualType resultType,
const CallArgList &CallArgs,
const ObjCMethodDecl *Method) {
// If we never had to do a null-check, just use the raw result.
@@ -1747,7 +1748,8 @@ struct NullReturnState {
// memory or (2) agg values in registers.
if (result.isAggregate()) {
assert(result.isAggregate() && "null init of non-aggregate result?");
- CGF.EmitNullInitialization(result.getAggregateAddress(), resultType);
+ if (!returnSlot.isUnused())
+ CGF.EmitNullInitialization(result.getAggregateAddress(), resultType);
if (contBB) CGF.EmitBlock(contBB);
return result;
}
@@ -2119,11 +2121,11 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
}
}
- NullReturnState nullReturn;
+ bool RequiresNullCheck = false;
llvm::Constant *Fn = nullptr;
if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
- if (ReceiverCanBeNull) nullReturn.init(CGF, Arg0);
+ if (ReceiverCanBeNull) RequiresNullCheck = true;
Fn = (ObjCABI == 2) ? ObjCTypes.getSendStretFn2(IsSuper)
: ObjCTypes.getSendStretFn(IsSuper);
} else if (CGM.ReturnTypeUsesFPRet(ResultType)) {
@@ -2136,23 +2138,30 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
// arm64 uses objc_msgSend for stret methods and yet null receiver check
// must be made for it.
if (ReceiverCanBeNull && CGM.ReturnTypeUsesSRet(MSI.CallInfo))
- nullReturn.init(CGF, Arg0);
+ RequiresNullCheck = true;
Fn = (ObjCABI == 2) ? ObjCTypes.getSendFn2(IsSuper)
: ObjCTypes.getSendFn(IsSuper);
}
+ // We don't need to emit a null check to zero out an indirect result if the
+ // result is ignored.
+ if (Return.isUnused())
+ RequiresNullCheck = false;
+
// Emit a null-check if there's a consumed argument other than the receiver.
- bool RequiresNullCheck = false;
- if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) {
+ if (!RequiresNullCheck && CGM.getLangOpts().ObjCAutoRefCount && Method) {
for (const auto *ParamDecl : Method->parameters()) {
if (ParamDecl->hasAttr<NSConsumedAttr>()) {
- if (!nullReturn.NullBB)
- nullReturn.init(CGF, Arg0);
RequiresNullCheck = true;
break;
}
}
}
+
+ NullReturnState nullReturn;
+ if (RequiresNullCheck) {
+ nullReturn.init(CGF, Arg0);
+ }
llvm::Instruction *CallSite;
Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType);
@@ -2166,7 +2175,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
llvm::CallSite(CallSite).setDoesNotReturn();
}
- return nullReturn.complete(CGF, rvalue, ResultType, CallArgs,
+ return nullReturn.complete(CGF, Return, rvalue, ResultType, CallArgs,
RequiresNullCheck ? Method : nullptr);
}
@@ -4790,6 +4799,27 @@ llvm::Value *CGObjCMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
/* *** Private Interface *** */
+std::string CGObjCCommonMac::GetSectionName(StringRef Section,
+ StringRef MachOAttributes) {
+ switch (CGM.getTriple().getObjectFormat()) {
+ default:
+ llvm_unreachable("unexpected object file format");
+ case llvm::Triple::MachO: {
+ if (MachOAttributes.empty())
+ return ("__DATA," + Section).str();
+ return ("__DATA," + Section + "," + MachOAttributes).str();
+ }
+ case llvm::Triple::ELF:
+ assert(Section.substr(0, 2) == "__" &&
+ "expected the name to begin with __");
+ return Section.substr(2).str();
+ case llvm::Triple::COFF:
+ assert(Section.substr(0, 2) == "__" &&
+ "expected the name to begin with __");
+ return ("." + Section.substr(2) + "$B").str();
+ }
+}
+
/// EmitImageInfo - Emit the image info marker used to encode some module
/// level information.
///
@@ -4813,9 +4843,10 @@ enum ImageInfoFlags {
void CGObjCCommonMac::EmitImageInfo() {
unsigned version = 0; // Version is unused?
- const char *Section = (ObjCABI == 1) ?
- "__OBJC, __image_info,regular" :
- "__DATA, __objc_imageinfo, regular, no_dead_strip";
+ std::string Section =
+ (ObjCABI == 1)
+ ? "__OBJC,__image_info,regular"
+ : GetSectionName("__objc_imageinfo", "regular,no_dead_strip");
// Generate module-level named metadata to convey this information to the
// linker and code-gen.
@@ -4826,7 +4857,7 @@ void CGObjCCommonMac::EmitImageInfo() {
Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Version",
version);
Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Section",
- llvm::MDString::get(VMContext,Section));
+ llvm::MDString::get(VMContext, Section));
if (CGM.getLangOpts().getGC() == LangOptions::NonGC) {
// Non-GC overrides those files which specify GC.
@@ -5510,17 +5541,15 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
// char *name;
// char *attributes;
// }
- PropertyTy = llvm::StructType::create("struct._prop_t",
- Int8PtrTy, Int8PtrTy, nullptr);
+ PropertyTy = llvm::StructType::create("struct._prop_t", Int8PtrTy, Int8PtrTy);
// struct _prop_list_t {
// uint32_t entsize; // sizeof(struct _prop_t)
// uint32_t count_of_properties;
// struct _prop_t prop_list[count_of_properties];
// }
- PropertyListTy =
- llvm::StructType::create("struct._prop_list_t", IntTy, IntTy,
- llvm::ArrayType::get(PropertyTy, 0), nullptr);
+ PropertyListTy = llvm::StructType::create(
+ "struct._prop_list_t", IntTy, IntTy, llvm::ArrayType::get(PropertyTy, 0));
// struct _prop_list_t *
PropertyListPtrTy = llvm::PointerType::getUnqual(PropertyListTy);
@@ -5529,9 +5558,8 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
// char *method_type;
// char *_imp;
// }
- MethodTy = llvm::StructType::create("struct._objc_method",
- SelectorPtrTy, Int8PtrTy, Int8PtrTy,
- nullptr);
+ MethodTy = llvm::StructType::create("struct._objc_method", SelectorPtrTy,
+ Int8PtrTy, Int8PtrTy);
// struct _objc_cache *
CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache");
@@ -5544,17 +5572,16 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// SEL name;
// char *types;
// }
- MethodDescriptionTy =
- llvm::StructType::create("struct._objc_method_description",
- SelectorPtrTy, Int8PtrTy, nullptr);
+ MethodDescriptionTy = llvm::StructType::create(
+ "struct._objc_method_description", SelectorPtrTy, Int8PtrTy);
// struct _objc_method_description_list {
// int count;
// struct _objc_method_description[1];
// }
- MethodDescriptionListTy = llvm::StructType::create(
- "struct._objc_method_description_list", IntTy,
- llvm::ArrayType::get(MethodDescriptionTy, 0), nullptr);
+ MethodDescriptionListTy =
+ llvm::StructType::create("struct._objc_method_description_list", IntTy,
+ llvm::ArrayType::get(MethodDescriptionTy, 0));
// struct _objc_method_description_list *
MethodDescriptionListPtrTy =
@@ -5570,11 +5597,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// const char ** extendedMethodTypes;
// struct _objc_property_list *class_properties;
// }
- ProtocolExtensionTy =
- llvm::StructType::create("struct._objc_protocol_extension",
- IntTy, MethodDescriptionListPtrTy,
- MethodDescriptionListPtrTy, PropertyListPtrTy,
- Int8PtrPtrTy, PropertyListPtrTy, nullptr);
+ ProtocolExtensionTy = llvm::StructType::create(
+ "struct._objc_protocol_extension", IntTy, MethodDescriptionListPtrTy,
+ MethodDescriptionListPtrTy, PropertyListPtrTy, Int8PtrPtrTy,
+ PropertyListPtrTy);
// struct _objc_protocol_extension *
ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy);
@@ -5586,10 +5612,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
ProtocolListTy =
llvm::StructType::create(VMContext, "struct._objc_protocol_list");
- ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy),
- LongTy,
- llvm::ArrayType::get(ProtocolTy, 0),
- nullptr);
+ ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), LongTy,
+ llvm::ArrayType::get(ProtocolTy, 0));
// struct _objc_protocol {
// struct _objc_protocol_extension *isa;
@@ -5600,9 +5624,7 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// }
ProtocolTy->setBody(ProtocolExtensionPtrTy, Int8PtrTy,
llvm::PointerType::getUnqual(ProtocolListTy),
- MethodDescriptionListPtrTy,
- MethodDescriptionListPtrTy,
- nullptr);
+ MethodDescriptionListPtrTy, MethodDescriptionListPtrTy);
// struct _objc_protocol_list *
ProtocolListPtrTy = llvm::PointerType::getUnqual(ProtocolListTy);
@@ -5616,8 +5638,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// char *ivar_type;
// int ivar_offset;
// }
- IvarTy = llvm::StructType::create("struct._objc_ivar",
- Int8PtrTy, Int8PtrTy, IntTy, nullptr);
+ IvarTy = llvm::StructType::create("struct._objc_ivar", Int8PtrTy, Int8PtrTy,
+ IntTy);
// struct _objc_ivar_list *
IvarListTy =
@@ -5630,9 +5652,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
MethodListPtrTy = llvm::PointerType::getUnqual(MethodListTy);
// struct _objc_class_extension *
- ClassExtensionTy =
- llvm::StructType::create("struct._objc_class_extension",
- IntTy, Int8PtrTy, PropertyListPtrTy, nullptr);
+ ClassExtensionTy = llvm::StructType::create(
+ "struct._objc_class_extension", IntTy, Int8PtrTy, PropertyListPtrTy);
ClassExtensionPtrTy = llvm::PointerType::getUnqual(ClassExtensionTy);
ClassTy = llvm::StructType::create(VMContext, "struct._objc_class");
@@ -5652,18 +5673,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// struct _objc_class_ext *ext;
// };
ClassTy->setBody(llvm::PointerType::getUnqual(ClassTy),
- llvm::PointerType::getUnqual(ClassTy),
- Int8PtrTy,
- LongTy,
- LongTy,
- LongTy,
- IvarListPtrTy,
- MethodListPtrTy,
- CachePtrTy,
- ProtocolListPtrTy,
- Int8PtrTy,
- ClassExtensionPtrTy,
- nullptr);
+ llvm::PointerType::getUnqual(ClassTy), Int8PtrTy, LongTy,
+ LongTy, LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy,
+ ProtocolListPtrTy, Int8PtrTy, ClassExtensionPtrTy);
ClassPtrTy = llvm::PointerType::getUnqual(ClassTy);
@@ -5677,12 +5689,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// struct _objc_property_list *instance_properties;// category's @property
// struct _objc_property_list *class_properties;
// }
- CategoryTy =
- llvm::StructType::create("struct._objc_category",
- Int8PtrTy, Int8PtrTy, MethodListPtrTy,
- MethodListPtrTy, ProtocolListPtrTy,
- IntTy, PropertyListPtrTy, PropertyListPtrTy,
- nullptr);
+ CategoryTy = llvm::StructType::create(
+ "struct._objc_category", Int8PtrTy, Int8PtrTy, MethodListPtrTy,
+ MethodListPtrTy, ProtocolListPtrTy, IntTy, PropertyListPtrTy,
+ PropertyListPtrTy);
// Global metadata structures
@@ -5693,10 +5703,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// short cat_def_cnt;
// char *defs[cls_def_cnt + cat_def_cnt];
// }
- SymtabTy =
- llvm::StructType::create("struct._objc_symtab",
- LongTy, SelectorPtrTy, ShortTy, ShortTy,
- llvm::ArrayType::get(Int8PtrTy, 0), nullptr);
+ SymtabTy = llvm::StructType::create("struct._objc_symtab", LongTy,
+ SelectorPtrTy, ShortTy, ShortTy,
+ llvm::ArrayType::get(Int8PtrTy, 0));
SymtabPtrTy = llvm::PointerType::getUnqual(SymtabTy);
// struct _objc_module {
@@ -5705,10 +5714,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// char *name;
// struct _objc_symtab* symtab;
// }
- ModuleTy =
- llvm::StructType::create("struct._objc_module",
- LongTy, LongTy, Int8PtrTy, SymtabPtrTy, nullptr);
-
+ ModuleTy = llvm::StructType::create("struct._objc_module", LongTy, LongTy,
+ Int8PtrTy, SymtabPtrTy);
// FIXME: This is the size of the setjmp buffer and should be target
// specific. 18 is what's used on 32-bit X86.
@@ -5717,10 +5724,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
// Exceptions
llvm::Type *StackPtrTy = llvm::ArrayType::get(CGM.Int8PtrTy, 4);
- ExceptionDataTy =
- llvm::StructType::create("struct._objc_exception_data",
- llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize),
- StackPtrTy, nullptr);
+ ExceptionDataTy = llvm::StructType::create(
+ "struct._objc_exception_data",
+ llvm::ArrayType::get(CGM.Int32Ty, SetJmpBufferSize), StackPtrTy);
}
ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm)
@@ -5731,8 +5737,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// struct _objc_method method_list[method_count];
// }
MethodListnfABITy =
- llvm::StructType::create("struct.__method_list_t", IntTy, IntTy,
- llvm::ArrayType::get(MethodTy, 0), nullptr);
+ llvm::StructType::create("struct.__method_list_t", IntTy, IntTy,
+ llvm::ArrayType::get(MethodTy, 0));
// struct method_list_t *
MethodListnfABIPtrTy = llvm::PointerType::getUnqual(MethodListnfABITy);
@@ -5756,14 +5762,12 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
ProtocolListnfABITy =
llvm::StructType::create(VMContext, "struct._objc_protocol_list");
- ProtocolnfABITy =
- llvm::StructType::create("struct._protocol_t", ObjectPtrTy, Int8PtrTy,
- llvm::PointerType::getUnqual(ProtocolListnfABITy),
- MethodListnfABIPtrTy, MethodListnfABIPtrTy,
- MethodListnfABIPtrTy, MethodListnfABIPtrTy,
- PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy,
- Int8PtrTy, PropertyListPtrTy,
- nullptr);
+ ProtocolnfABITy = llvm::StructType::create(
+ "struct._protocol_t", ObjectPtrTy, Int8PtrTy,
+ llvm::PointerType::getUnqual(ProtocolListnfABITy), MethodListnfABIPtrTy,
+ MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy,
+ PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, Int8PtrTy,
+ PropertyListPtrTy);
// struct _protocol_t*
ProtocolnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolnfABITy);
@@ -5773,8 +5777,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// struct _protocol_t *[protocol_count];
// }
ProtocolListnfABITy->setBody(LongTy,
- llvm::ArrayType::get(ProtocolnfABIPtrTy, 0),
- nullptr);
+ llvm::ArrayType::get(ProtocolnfABIPtrTy, 0));
// struct _objc_protocol_list*
ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy);
@@ -5788,7 +5791,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// }
IvarnfABITy = llvm::StructType::create(
"struct._ivar_t", llvm::PointerType::getUnqual(IvarOffsetVarTy),
- Int8PtrTy, Int8PtrTy, IntTy, IntTy, nullptr);
+ Int8PtrTy, Int8PtrTy, IntTy, IntTy);
// struct _ivar_list_t {
// uint32 entsize; // sizeof(struct _ivar_t)
@@ -5796,8 +5799,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// struct _iver_t list[count];
// }
IvarListnfABITy =
- llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy,
- llvm::ArrayType::get(IvarnfABITy, 0), nullptr);
+ llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy,
+ llvm::ArrayType::get(IvarnfABITy, 0));
IvarListnfABIPtrTy = llvm::PointerType::getUnqual(IvarListnfABITy);
@@ -5816,13 +5819,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// }
// FIXME. Add 'reserved' field in 64bit abi mode!
- ClassRonfABITy = llvm::StructType::create("struct._class_ro_t",
- IntTy, IntTy, IntTy, Int8PtrTy,
- Int8PtrTy, MethodListnfABIPtrTy,
- ProtocolListnfABIPtrTy,
- IvarListnfABIPtrTy,
- Int8PtrTy, PropertyListPtrTy,
- nullptr);
+ ClassRonfABITy = llvm::StructType::create(
+ "struct._class_ro_t", IntTy, IntTy, IntTy, Int8PtrTy, Int8PtrTy,
+ MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, IvarListnfABIPtrTy,
+ Int8PtrTy, PropertyListPtrTy);
// ImpnfABITy - LLVM for id (*)(id, SEL, ...)
llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
@@ -5839,11 +5839,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
ClassnfABITy = llvm::StructType::create(VMContext, "struct._class_t");
ClassnfABITy->setBody(llvm::PointerType::getUnqual(ClassnfABITy),
- llvm::PointerType::getUnqual(ClassnfABITy),
- CachePtrTy,
+ llvm::PointerType::getUnqual(ClassnfABITy), CachePtrTy,
llvm::PointerType::getUnqual(ImpnfABITy),
- llvm::PointerType::getUnqual(ClassRonfABITy),
- nullptr);
+ llvm::PointerType::getUnqual(ClassRonfABITy));
// LLVM for struct _class_t *
ClassnfABIPtrTy = llvm::PointerType::getUnqual(ClassnfABITy);
@@ -5858,15 +5856,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// const struct _prop_list_t * const class_properties;
// const uint32_t size;
// }
- CategorynfABITy = llvm::StructType::create("struct._category_t",
- Int8PtrTy, ClassnfABIPtrTy,
- MethodListnfABIPtrTy,
- MethodListnfABIPtrTy,
- ProtocolListnfABIPtrTy,
- PropertyListPtrTy,
- PropertyListPtrTy,
- IntTy,
- nullptr);
+ CategorynfABITy = llvm::StructType::create(
+ "struct._category_t", Int8PtrTy, ClassnfABIPtrTy, MethodListnfABIPtrTy,
+ MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, PropertyListPtrTy,
+ PropertyListPtrTy, IntTy);
// New types for nonfragile abi messaging.
CodeGen::CodeGenTypes &Types = CGM.getTypes();
@@ -5903,9 +5896,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// SUPER_IMP messenger;
// SEL name;
// };
- SuperMessageRefTy =
- llvm::StructType::create("struct._super_message_ref_t",
- ImpnfABITy, SelectorPtrTy, nullptr);
+ SuperMessageRefTy = llvm::StructType::create("struct._super_message_ref_t",
+ ImpnfABITy, SelectorPtrTy);
// SuperMessageRefPtrTy - LLVM for struct _super_message_ref_t*
SuperMessageRefPtrTy = llvm::PointerType::getUnqual(SuperMessageRefTy);
@@ -5916,10 +5908,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// const char* name; // c++ typeinfo string
// Class cls;
// };
- EHTypeTy =
- llvm::StructType::create("struct._objc_typeinfo",
- llvm::PointerType::getUnqual(Int8PtrTy),
- Int8PtrTy, ClassnfABIPtrTy, nullptr);
+ EHTypeTy = llvm::StructType::create("struct._objc_typeinfo",
+ llvm::PointerType::getUnqual(Int8PtrTy),
+ Int8PtrTy, ClassnfABIPtrTy);
EHTypePtrTy = llvm::PointerType::getUnqual(EHTypeTy);
}
@@ -5974,17 +5965,21 @@ void CGObjCNonFragileABIMac::FinishNonFragileABIModule() {
}
AddModuleClassList(DefinedClasses, "OBJC_LABEL_CLASS_$",
- "__DATA, __objc_classlist, regular, no_dead_strip");
+ GetSectionName("__objc_classlist",
+ "regular,no_dead_strip"));
AddModuleClassList(DefinedNonLazyClasses, "OBJC_LABEL_NONLAZY_CLASS_$",
- "__DATA, __objc_nlclslist, regular, no_dead_strip");
+ GetSectionName("__objc_nlclslist",
+ "regular,no_dead_strip"));
// Build list of all implemented category addresses in array
// L_OBJC_LABEL_CATEGORY_$.
AddModuleClassList(DefinedCategories, "OBJC_LABEL_CATEGORY_$",
- "__DATA, __objc_catlist, regular, no_dead_strip");
+ GetSectionName("__objc_catlist",
+ "regular,no_dead_strip"));
AddModuleClassList(DefinedNonLazyCategories, "OBJC_LABEL_NONLAZY_CATEGORY_$",
- "__DATA, __objc_nlcatlist, regular, no_dead_strip");
+ GetSectionName("__objc_nlcatlist",
+ "regular,no_dead_strip"));
EmitImageInfo();
}
@@ -6397,15 +6392,15 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
llvm::GlobalVariable *PTGV = CGM.getModule().getGlobalVariable(ProtocolName);
if (PTGV)
return CGF.Builder.CreateAlignedLoad(PTGV, Align);
- PTGV = new llvm::GlobalVariable(
- CGM.getModule(),
- Init->getType(), false,
- llvm::GlobalValue::WeakAnyLinkage,
- Init,
- ProtocolName);
- PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip");
+ PTGV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
+ llvm::GlobalValue::WeakAnyLinkage, Init,
+ ProtocolName);
+ PTGV->setSection(GetSectionName("__objc_protorefs",
+ "coalesced,no_dead_strip"));
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
PTGV->setAlignment(Align.getQuantity());
+ if (!CGM.getTriple().isOSBinFormatMachO())
+ PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName));
CGM.addCompilerUsedGlobal(PTGV);
return CGF.Builder.CreateAlignedLoad(PTGV, Align);
}
@@ -6862,8 +6857,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef));
PTGV->setAlignment(
CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy));
- if (CGM.getTriple().isOSBinFormatMachO())
- PTGV->setSection("__DATA, __objc_protolist, coalesced, no_dead_strip");
+ PTGV->setSection(GetSectionName("__objc_protolist",
+ "coalesced,no_dead_strip"));
PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
CGM.addCompilerUsedGlobal(PTGV);
return Entry;
@@ -7059,7 +7054,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
/*constant*/ false,
llvm::GlobalValue::WeakAnyLinkage);
messageRef->setVisibility(llvm::GlobalValue::HiddenVisibility);
- messageRef->setSection("__DATA, __objc_msgrefs, coalesced");
+ messageRef->setSection(GetSectionName("__objc_msgrefs", "coalesced"));
}
bool requiresnullCheck = false;
@@ -7089,7 +7084,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
CGCallee callee(CGCalleeInfo(), calleePtr);
RValue result = CGF.EmitCall(MSI.CallInfo, callee, returnSlot, args);
- return nullReturn.complete(CGF, result, resultType, formalArgs,
+ return nullReturn.complete(CGF, returnSlot, result, resultType, formalArgs,
requiresnullCheck ? method : nullptr);
}
@@ -7170,7 +7165,8 @@ CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
false, llvm::GlobalValue::PrivateLinkage,
ClassGV, "OBJC_CLASSLIST_REFERENCES_$_");
Entry->setAlignment(Align.getQuantity());
- Entry->setSection("__DATA, __objc_classrefs, regular, no_dead_strip");
+ Entry->setSection(GetSectionName("__objc_classrefs",
+ "regular,no_dead_strip"));
CGM.addCompilerUsedGlobal(Entry);
}
return CGF.Builder.CreateAlignedLoad(Entry, Align);
@@ -7204,7 +7200,8 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
false, llvm::GlobalValue::PrivateLinkage,
ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_");
Entry->setAlignment(Align.getQuantity());
- Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
+ Entry->setSection(GetSectionName("__objc_superrefs",
+ "regular,no_dead_strip"));
CGM.addCompilerUsedGlobal(Entry);
}
return CGF.Builder.CreateAlignedLoad(Entry, Align);
@@ -7226,7 +7223,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_");
Entry->setAlignment(Align.getQuantity());
- Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
+ Entry->setSection(GetSectionName("__objc_superrefs",
+ "regular,no_dead_strip"));
CGM.addCompilerUsedGlobal(Entry);
}
@@ -7322,7 +7320,8 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF,
false, llvm::GlobalValue::PrivateLinkage,
Casted, "OBJC_SELECTOR_REFERENCES_");
Entry->setExternallyInitialized(true);
- Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip");
+ Entry->setSection(GetSectionName("__objc_selrefs",
+ "literal_pointers,no_dead_strip"));
Entry->setAlignment(Align.getQuantity());
CGM.addCompilerUsedGlobal(Entry);
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
index 3da7ed2..4cfddcb 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -26,61 +26,27 @@
using namespace clang;
using namespace CodeGen;
-static uint64_t LookupFieldBitOffset(CodeGen::CodeGenModule &CGM,
- const ObjCInterfaceDecl *OID,
- const ObjCImplementationDecl *ID,
- const ObjCIvarDecl *Ivar) {
- const ObjCInterfaceDecl *Container = Ivar->getContainingInterface();
-
- // FIXME: We should eliminate the need to have ObjCImplementationDecl passed
- // in here; it should never be necessary because that should be the lexical
- // decl context for the ivar.
-
- // If we know have an implementation (and the ivar is in it) then
- // look up in the implementation layout.
- const ASTRecordLayout *RL;
- if (ID && declaresSameEntity(ID->getClassInterface(), Container))
- RL = &CGM.getContext().getASTObjCImplementationLayout(ID);
- else
- RL = &CGM.getContext().getASTObjCInterfaceLayout(Container);
-
- // Compute field index.
- //
- // FIXME: The index here is closely tied to how ASTContext::getObjCLayout is
- // implemented. This should be fixed to get the information from the layout
- // directly.
- unsigned Index = 0;
-
- for (const ObjCIvarDecl *IVD = Container->all_declared_ivar_begin();
- IVD; IVD = IVD->getNextIvar()) {
- if (Ivar == IVD)
- break;
- ++Index;
- }
- assert(Index < RL->getFieldCount() && "Ivar is not inside record layout!");
-
- return RL->getFieldOffset(Index);
-}
-
uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM,
const ObjCInterfaceDecl *OID,
const ObjCIvarDecl *Ivar) {
- return LookupFieldBitOffset(CGM, OID, nullptr, Ivar) /
- CGM.getContext().getCharWidth();
+ return CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar) /
+ CGM.getContext().getCharWidth();
}
uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM,
const ObjCImplementationDecl *OID,
const ObjCIvarDecl *Ivar) {
- return LookupFieldBitOffset(CGM, OID->getClassInterface(), OID, Ivar) /
- CGM.getContext().getCharWidth();
+ return CGM.getContext().lookupFieldBitOffset(OID->getClassInterface(), OID,
+ Ivar) /
+ CGM.getContext().getCharWidth();
}
unsigned CGObjCRuntime::ComputeBitfieldBitOffset(
CodeGen::CodeGenModule &CGM,
const ObjCInterfaceDecl *ID,
const ObjCIvarDecl *Ivar) {
- return LookupFieldBitOffset(CGM, ID, ID->getImplementation(), Ivar);
+ return CGM.getContext().lookupFieldBitOffset(ID, ID->getImplementation(),
+ Ivar);
}
LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
@@ -90,7 +56,11 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
unsigned CVRQualifiers,
llvm::Value *Offset) {
// Compute (type*) ( (char *) BaseValue + Offset)
- QualType IvarTy = Ivar->getType().withCVRQualifiers(CVRQualifiers);
+ QualType InterfaceTy{OID->getTypeForDecl(), 0};
+ QualType ObjectPtrTy =
+ CGF.CGM.getContext().getObjCObjectPointerType(InterfaceTy);
+ QualType IvarTy =
+ Ivar->getUsageType(ObjectPtrTy).withCVRQualifiers(CVRQualifiers);
llvm::Type *LTy = CGF.CGM.getTypes().ConvertTypeForMem(IvarTy);
llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, CGF.Int8PtrTy);
V = CGF.Builder.CreateInBoundsGEP(V, Offset, "add.ptr");
@@ -115,7 +85,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
// Note, there is a subtle invariant here: we can only call this routine on
// non-synthesized ivars but we may be called for synthesized ivars. However,
// a synthesized ivar can never be a bit-field, so this is safe.
- uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, nullptr, Ivar);
+ uint64_t FieldBitOffset =
+ CGF.CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar);
uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth();
uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign();
uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext());
@@ -138,7 +109,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
Addr = CGF.Builder.CreateElementBitCast(Addr,
llvm::Type::getIntNTy(CGF.getLLVMContext(),
Info->StorageSize));
- return LValue::MakeBitfield(Addr, *Info, IvarTy, AlignmentSource::Decl);
+ return LValue::MakeBitfield(Addr, *Info, IvarTy,
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
namespace {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 9062936..db02c63 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -58,9 +58,6 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
case BuiltinType::OCLQueue:
return llvm::PointerType::get(
llvm::StructType::create(Ctx, "opencl.queue_t"), 0);
- case BuiltinType::OCLNDRange:
- return llvm::PointerType::get(
- llvm::StructType::create(Ctx, "opencl.ndrange_t"), 0);
case BuiltinType::OCLReserveID:
return llvm::PointerType::get(
llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 4025217..9f8aa6c 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -15,7 +15,7 @@
#include "CGCleanup.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
#include "llvm/ADT/ArrayRef.h"
@@ -264,6 +264,13 @@ public:
return nullptr;
}
+ /// \brief Get an LValue for the current ThreadID variable.
+ LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
+ if (OuterRegionInfo)
+ return OuterRegionInfo->getThreadIDVariableLValue(CGF);
+ llvm_unreachable("No LValue for inlined OpenMP construct");
+ }
+
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override {
if (auto *OuterRegionInfo = getOldCSI())
@@ -643,6 +650,12 @@ enum OpenMPRTLFunction {
// Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
OMPRTL__kmpc_doacross_wait,
+ // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ OMPRTL__kmpc_task_reduction_init,
+ // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ OMPRTL__kmpc_task_reduction_get_th_data,
//
// Offloading related calls
@@ -697,6 +710,419 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
}
}
+/// Check if the combiner is a call to UDR combiner and if it is so return the
+/// UDR decl used for reduction.
+static const OMPDeclareReductionDecl *
+getReductionInit(const Expr *ReductionOp) {
+ if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
+ if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+ if (auto *DRE =
+ dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
+ if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
+ return DRD;
+ return nullptr;
+}
+
+static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
+ const OMPDeclareReductionDecl *DRD,
+ const Expr *InitOp,
+ Address Private, Address Original,
+ QualType Ty) {
+ if (DRD->getInitializer()) {
+ std::pair<llvm::Function *, llvm::Function *> Reduction =
+ CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
+ auto *CE = cast<CallExpr>(InitOp);
+ auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
+ const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+ const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+ auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
+ auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
+ [=]() -> Address { return Private; });
+ PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
+ [=]() -> Address { return Original; });
+ (void)PrivateScope.Privatize();
+ RValue Func = RValue::get(Reduction.second);
+ CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
+ CGF.EmitIgnoredExpr(InitOp);
+ } else {
+ llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
+ auto *GV = new llvm::GlobalVariable(
+ CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage, Init, ".init");
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
+ RValue InitRVal;
+ switch (CGF.getEvaluationKind(Ty)) {
+ case TEK_Scalar:
+ InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
+ break;
+ case TEK_Complex:
+ InitRVal =
+ RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
+ break;
+ case TEK_Aggregate:
+ InitRVal = RValue::getAggregate(LV.getAddress());
+ break;
+ }
+ OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
+ CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
+ /*IsInitializer=*/false);
+ }
+}
+
+/// \brief Emit initialization of arrays of complex types.
+/// \param DestAddr Address of the array.
+/// \param Type Type of array.
+/// \param Init Initial expression of array.
+/// \param SrcAddr Address of the original array.
+static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
+ QualType Type, bool EmitDeclareReductionInit,
+ const Expr *Init,
+ const OMPDeclareReductionDecl *DRD,
+ Address SrcAddr = Address::invalid()) {
+ // Perform element-by-element initialization.
+ QualType ElementTy;
+
+ // Drill down to the base element type on both arrays.
+ auto ArrayTy = Type->getAsArrayTypeUnsafe();
+ auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
+ DestAddr =
+ CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
+ if (DRD)
+ SrcAddr =
+ CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
+
+ llvm::Value *SrcBegin = nullptr;
+ if (DRD)
+ SrcBegin = SrcAddr.getPointer();
+ auto DestBegin = DestAddr.getPointer();
+ // Cast from pointer to array type to pointer to single element.
+ auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
+ // The basic structure here is a while-do loop.
+ auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
+ auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
+ auto IsEmpty =
+ CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
+ CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+
+ // Enter the loop body, making that address the current address.
+ auto EntryBB = CGF.Builder.GetInsertBlock();
+ CGF.EmitBlock(BodyBB);
+
+ CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
+
+ llvm::PHINode *SrcElementPHI = nullptr;
+ Address SrcElementCurrent = Address::invalid();
+ if (DRD) {
+ SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
+ "omp.arraycpy.srcElementPast");
+ SrcElementPHI->addIncoming(SrcBegin, EntryBB);
+ SrcElementCurrent =
+ Address(SrcElementPHI,
+ SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+ }
+ llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
+ DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
+ DestElementPHI->addIncoming(DestBegin, EntryBB);
+ Address DestElementCurrent =
+ Address(DestElementPHI,
+ DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+
+ // Emit copy.
+ {
+ CodeGenFunction::RunCleanupsScope InitScope(CGF);
+ if (EmitDeclareReductionInit) {
+ emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
+ SrcElementCurrent, ElementTy);
+ } else
+ CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
+ /*IsInitializer=*/false);
+ }
+
+ if (DRD) {
+ // Shift the address forward by one element.
+ auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
+ SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+ SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
+ }
+
+ // Shift the address forward by one element.
+ auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
+ DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+ // Check whether we've reached the end.
+ auto Done =
+ CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
+ CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
+ DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
+
+ // Done.
+ CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
+LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
+ if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
+ return CGF.EmitOMPArraySectionExpr(OASE);
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
+ return CGF.EmitLValue(ASE);
+ auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+ CGF.CapturedStmtInfo &&
+ CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
+ E->getType(), VK_LValue, E->getExprLoc());
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ return CGF.EmitLValue(&DRE);
+}
+
+LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
+ const Expr *E) {
+ if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
+ return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
+ return LValue();
+}
+
+void ReductionCodeGen::emitAggregateInitialization(
+ CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
+ const OMPDeclareReductionDecl *DRD) {
+ // Emit VarDecl with copy init for arrays.
+ // Get the address of the original variable captured in current
+ // captured region.
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ bool EmitDeclareReductionInit =
+ DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
+ EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
+ EmitDeclareReductionInit,
+ EmitDeclareReductionInit ? ClausesData[N].ReductionOp
+ : PrivateVD->getInit(),
+ DRD, SharedLVal.getAddress());
+}
+
+ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> ReductionOps) {
+ ClausesData.reserve(Shareds.size());
+ SharedAddresses.reserve(Shareds.size());
+ Sizes.reserve(Shareds.size());
+ BaseDecls.reserve(Shareds.size());
+ auto IPriv = Privates.begin();
+ auto IRed = ReductionOps.begin();
+ for (const auto *Ref : Shareds) {
+ ClausesData.emplace_back(Ref, *IPriv, *IRed);
+ std::advance(IPriv, 1);
+ std::advance(IRed, 1);
+ }
+}
+
+void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
+ assert(SharedAddresses.size() == N &&
+ "Number of generated lvalues must be exactly N.");
+ SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
+ emitSharedLValueUB(CGF, ClausesData[N].Ref));
+}
+
+void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
+ if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+ Sizes.emplace_back(
+ CGF.getTypeSize(
+ SharedAddresses[N].first.getType().getNonReferenceType()),
+ nullptr);
+ return;
+ }
+ llvm::Value *Size;
+ llvm::Value *SizeInChars;
+ llvm::Type *ElemType =
+ cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
+ ->getElementType();
+ auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
+ if (AsArraySection) {
+ Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
+ SharedAddresses[N].first.getPointer());
+ Size = CGF.Builder.CreateNUWAdd(
+ Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
+ SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
+ } else {
+ SizeInChars = CGF.getTypeSize(
+ SharedAddresses[N].first.getType().getNonReferenceType());
+ Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
+ }
+ Sizes.emplace_back(SizeInChars, Size);
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(
+ CGF,
+ cast<OpaqueValueExpr>(
+ CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
+ RValue::get(Size));
+ CGF.EmitVariablyModifiedType(PrivateType);
+}
+
+void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
+ llvm::Value *Size) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
+ if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+ assert(!Size && !Sizes[N].second &&
+ "Size should be nullptr for non-variably modified redution "
+ "items.");
+ return;
+ }
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(
+ CGF,
+ cast<OpaqueValueExpr>(
+ CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
+ RValue::get(Size));
+ CGF.EmitVariablyModifiedType(PrivateType);
+}
+
+void ReductionCodeGen::emitInitialization(
+ CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
+ llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
+ assert(SharedAddresses.size() > N && "No variable was generated");
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+ QualType PrivateType = PrivateVD->getType();
+ PrivateAddr = CGF.Builder.CreateElementBitCast(
+ PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
+ QualType SharedType = SharedAddresses[N].first.getType();
+ SharedLVal = CGF.MakeAddrLValue(
+ CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
+ CGF.ConvertTypeForMem(SharedType)),
+ SharedType, SharedAddresses[N].first.getBaseInfo());
+ if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
+ CGF.getContext().getAsArrayType(PrivateVD->getType())) {
+ emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
+ } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+ emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
+ PrivateAddr, SharedLVal.getAddress(),
+ SharedLVal.getType());
+ } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
+ !CGF.isTrivialInitializer(PrivateVD->getInit())) {
+ CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
+ PrivateVD->getType().getQualifiers(),
+ /*IsInitializer=*/false);
+ }
+}
+
+bool ReductionCodeGen::needCleanups(unsigned N) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
+ return DTorKind != QualType::DK_none;
+}
+
+void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
+ Address PrivateAddr) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
+ if (needCleanups(N)) {
+ PrivateAddr = CGF.Builder.CreateElementBitCast(
+ PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
+ CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
+ }
+}
+
+static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+ LValue BaseLV) {
+ BaseTy = BaseTy.getNonReferenceType();
+ while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+ !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+ if (auto *PtrTy = BaseTy->getAs<PointerType>())
+ BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
+ else {
+ BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
+ BaseTy->castAs<ReferenceType>());
+ }
+ BaseTy = BaseTy->getPointeeType();
+ }
+ return CGF.MakeAddrLValue(
+ CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
+ CGF.ConvertTypeForMem(ElTy)),
+ BaseLV.getType(), BaseLV.getBaseInfo());
+}
+
+static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+ llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
+ llvm::Value *Addr) {
+ Address Tmp = Address::invalid();
+ Address TopTmp = Address::invalid();
+ Address MostTopTmp = Address::invalid();
+ BaseTy = BaseTy.getNonReferenceType();
+ while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+ !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+ Tmp = CGF.CreateMemTemp(BaseTy);
+ if (TopTmp.isValid())
+ CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
+ else
+ MostTopTmp = Tmp;
+ TopTmp = Tmp;
+ BaseTy = BaseTy->getPointeeType();
+ }
+ llvm::Type *Ty = BaseLVType;
+ if (Tmp.isValid())
+ Ty = Tmp.getElementType();
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
+ if (Tmp.isValid()) {
+ CGF.Builder.CreateStore(Addr, Tmp);
+ return MostTopTmp;
+ }
+ return Address(Addr, BaseLVAlignment);
+}
+
+Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
+ Address PrivateAddr) {
+ const DeclRefExpr *DE;
+ const VarDecl *OrigVD = nullptr;
+ if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
+ auto *Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+ Base = TempOASE->getBase()->IgnoreParenImpCasts();
+ while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ DE = cast<DeclRefExpr>(Base);
+ OrigVD = cast<VarDecl>(DE->getDecl());
+ } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
+ auto *Base = ASE->getBase()->IgnoreParenImpCasts();
+ while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ DE = cast<DeclRefExpr>(Base);
+ OrigVD = cast<VarDecl>(DE->getDecl());
+ }
+ if (OrigVD) {
+ BaseDecls.emplace_back(OrigVD);
+ auto OriginalBaseLValue = CGF.EmitLValue(DE);
+ LValue BaseLValue =
+ loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
+ OriginalBaseLValue);
+ llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
+ BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
+ llvm::Value *Ptr =
+ CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
+ return castToBase(CGF, OrigVD->getType(),
+ SharedAddresses[N].first.getType(),
+ OriginalBaseLValue.getPointer()->getType(),
+ OriginalBaseLValue.getAlignment(), Ptr);
+ }
+ BaseDecls.emplace_back(
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
+ return PrivateAddr;
+}
+
+bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
+ auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+ return DRD && DRD->getInitializer();
+}
+
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
@@ -720,7 +1146,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
@@ -728,7 +1154,7 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
IdentTy = llvm::StructType::create(
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
- CGM.Int8PtrTy /* psource */, nullptr);
+ CGM.Int8PtrTy /* psource */);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
loadOffloadInfoMetadata();
@@ -747,9 +1173,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
QualType PtrTy = C.getPointerType(Ty).withRestrict();
FunctionArgList Args;
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
- /*Id=*/nullptr, PtrTy);
+ /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
- /*Id=*/nullptr, PtrTy);
+ /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
Args.push_back(&OmpOutParm);
Args.push_back(&OmpInParm);
auto &FnInfo =
@@ -760,6 +1186,7 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
Fn->removeFnAttr(llvm::Attribute::NoInline);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
@@ -842,12 +1269,12 @@ static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
}
-llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
- const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
- OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+static llvm::Value *emitParallelOrTeamsOutlinedFunction(
+ CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
+ const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
+ const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
- const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
@@ -857,11 +1284,27 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
- HasCancel, getOutlinedHelperName());
+ HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
}
+llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
+ return emitParallelOrTeamsOutlinedFunction(
+ CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
+}
+
+llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
+ return emitParallelOrTeamsOutlinedFunction(
+ CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
+}
+
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
@@ -1537,6 +1980,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
break;
}
+ case OMPRTL__kmpc_task_reduction_init: {
+ // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
+ break;
+ }
+ case OMPRTL__kmpc_task_reduction_get_th_data: {
+ // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
@@ -1791,8 +2254,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
// threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
- /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+ ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1822,8 +2285,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
// of the variable VD
CodeGenFunction DtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
- /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+ ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1887,6 +2350,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
+Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+ QualType VarType,
+ StringRef Name) {
+ llvm::Twine VarName(Name, ".artificial.");
+ llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
+ llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, SourceLocation()),
+ getThreadID(CGF, SourceLocation()),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
+ CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
+ /*IsSigned=*/false),
+ getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
+ return Address(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+ VarLVType->getPointerTo(/*AddrSpace=*/0)),
+ CGM.getPointerAlign());
+}
+
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
@@ -2174,10 +2658,8 @@ static llvm::Value *emitCopyprivateCopyFunction(
auto &C = CGM.getContext();
// void copy_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
- ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
+ ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
@@ -2450,16 +2932,14 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
return Schedule | Modifier;
}
-void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
- SourceLocation Loc,
- const OpenMPScheduleTy &ScheduleKind,
- unsigned IVSize, bool IVSigned,
- bool Ordered, llvm::Value *UB,
- llvm::Value *Chunk) {
+void CGOpenMPRuntime::emitForDispatchInit(
+ CodeGenFunction &CGF, SourceLocation Loc,
+ const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
+ bool Ordered, const DispatchRTInput &DispatchValues) {
if (!CGF.HaveInsertPoint())
return;
- OpenMPSchedType Schedule =
- getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
+ OpenMPSchedType Schedule = getRuntimeSchedule(
+ ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
assert(Ordered ||
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
@@ -2470,14 +2950,14 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
// kmp_int[32|64] stride, kmp_int[32|64] chunk);
// If the Chunk was not specified in the clause - use default value 1.
- if (Chunk == nullptr)
- Chunk = CGF.Builder.getIntN(IVSize, 1);
+ llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
+ : CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.getInt32(addMonoNonMonoModifier(
Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
- CGF.Builder.getIntN(IVSize, 0), // Lower
- UB, // Upper
+ DispatchValues.LB, // Lower
+ DispatchValues.UB, // Upper
CGF.Builder.getIntN(IVSize, 1), // Stride
Chunk // Chunk
};
@@ -2686,6 +3166,8 @@ enum KmpTaskTFields {
KmpTaskTStride,
/// (Taskloops only) Is last iteration flag.
KmpTaskTLastIter,
+ /// (Taskloops only) Reduction data.
+ KmpTaskTReductions,
};
} // anonymous namespace
@@ -2770,8 +3252,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
const RegionCodeGenTy &Codegen) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
- /*Id=*/nullptr, C.VoidPtrTy);
+ ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
Args.push_back(&DummyPtr);
CodeGenFunction CGF(CGM);
@@ -2874,7 +3355,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
// descriptor, so we can reuse the logic that emits Ctors and Dtors.
auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
- IdentInfo, C.CharTy);
+ IdentInfo, C.CharTy, ImplicitParamDecl::Other);
auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
CGM, ".omp_offloading.descriptor_unreg",
@@ -2889,6 +3370,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
Desc);
CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
});
+ if (CGM.supportsCOMDAT()) {
+ // It is sufficient to call registration function only once, so create a
+ // COMDAT group for registration/unregistration functions and associated
+ // data. That would reduce startup time and code size. Registration
+ // function serves as a COMDAT group key.
+ auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
+ RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+ RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ RegFn->setComdat(ComdatKey);
+ UnRegFn->setComdat(ComdatKey);
+ DeviceImages->setComdat(ComdatKey);
+ Desc->setComdat(ComdatKey);
+ }
return RegFn;
}
@@ -2958,7 +3452,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create the offloading info metadata node.
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
- // Auxiliar methods to create metadata values and strings.
+ // Auxiliary methods to create metadata values and strings.
auto getMDInt = [&](unsigned v) {
return llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
@@ -3225,6 +3719,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
// kmp_uint64 ub;
// kmp_int64 st;
// kmp_int32 liter;
+ // void * reductions;
// };
auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
UD->startDefinition();
@@ -3248,6 +3743,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
addFieldToRecordDecl(C, RD, KmpUInt64Ty);
addFieldToRecordDecl(C, RD, KmpInt64Ty);
addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ addFieldToRecordDecl(C, RD, C.VoidPtrTy);
}
RD->completeDefinition();
return RD;
@@ -3278,7 +3774,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
/// For taskloops:
/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
-/// tt->shareds);
+/// tt->reductions, tt->shareds);
/// return 0;
/// }
/// \endcode
@@ -3291,10 +3787,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
llvm::Value *TaskPrivatesMap) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
- ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr,
- KmpTaskTWithPrivatesPtrQTy.withRestrict());
+ ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy.withRestrict(),
+ ImplicitParamDecl::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
auto &TaskEntryFnInfo =
@@ -3363,10 +3860,14 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+ auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
+ auto RLVal = CGF.EmitLValueForField(Base, *RFI);
+ auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
CallArgs.push_back(LBParam);
CallArgs.push_back(UBParam);
CallArgs.push_back(StParam);
CallArgs.push_back(LIParam);
+ CallArgs.push_back(RParam);
}
CallArgs.push_back(SharedsParam);
@@ -3385,10 +3886,11 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
QualType KmpTaskTWithPrivatesQTy) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
- ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr,
- KmpTaskTWithPrivatesPtrQTy.withRestrict());
+ ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy.withRestrict(),
+ ImplicitParamDecl::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
FunctionType::ExtInfo Info;
@@ -3444,36 +3946,40 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
FunctionArgList Args;
ImplicitParamDecl TaskPrivatesArg(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.getPointerType(PrivatesQTy).withConst().withRestrict());
+ C.getPointerType(PrivatesQTy).withConst().withRestrict(),
+ ImplicitParamDecl::Other);
Args.push_back(&TaskPrivatesArg);
llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
unsigned Counter = 1;
for (auto *E: PrivateVars) {
Args.push_back(ImplicitParamDecl::Create(
- C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
- .withConst()
- .withRestrict()));
+ C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.getPointerType(C.getPointerType(E->getType()))
+ .withConst()
+ .withRestrict(),
+ ImplicitParamDecl::Other));
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
}
for (auto *E : FirstprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
- C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
- .withConst()
- .withRestrict()));
+ C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.getPointerType(C.getPointerType(E->getType()))
+ .withConst()
+ .withRestrict(),
+ ImplicitParamDecl::Other));
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
}
for (auto *E: LastprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
- C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
- .withConst()
- .withRestrict()));
+ C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.getPointerType(C.getPointerType(E->getType()))
+ .withConst()
+ .withRestrict(),
+ ImplicitParamDecl::Other));
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
@@ -3488,6 +3994,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
TaskPrivatesMapFnInfo);
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
+ TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
CGF.disableDebugInfo();
@@ -3551,7 +4058,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
SharedRefLValue = CGF.MakeAddrLValue(
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
- SharedRefLValue.getType(), AlignmentSource::Decl);
+ SharedRefLValue.getType(),
+ LValueBaseInfo(AlignmentSource::Decl,
+ SharedRefLValue.getBaseInfo().getMayAlias()));
QualType Type = OriginalVD->getType();
if (Type->isArrayType()) {
// Initialize firstprivate array.
@@ -3561,7 +4070,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
SharedRefLValue.getAddress(), Type);
} else {
// Initialize firstprivate array using element-by-element
- // intialization.
+ // initialization.
CGF.EmitOMPAggregateAssign(
PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
[&CGF, Elem, Init, &CapturesInfo](Address DestElement,
@@ -3630,12 +4139,14 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
- ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
- ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.IntTy);
+ ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+ ImplicitParamDecl::Other);
Args.push_back(&DstArg);
Args.push_back(&SrcArg);
Args.push_back(&LastprivArg);
@@ -3745,9 +4256,20 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
// Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty);
// Build type kmp_task_t (if not built yet).
- if (KmpTaskTQTy.isNull()) {
- KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
- CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
+ if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
+ if (SavedKmpTaskloopTQTy.isNull()) {
+ SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
+ CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
+ }
+ KmpTaskTQTy = SavedKmpTaskloopTQTy;
+ } else if (D.getDirectiveKind() == OMPD_task) {
+ assert(D.getDirectiveKind() == OMPD_task &&
+ "Expected taskloop or task directive");
+ if (SavedKmpTaskTQTy.isNull()) {
+ SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
+ CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
+ }
+ KmpTaskTQTy = SavedKmpTaskTQTy;
}
auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
// Build particular struct kmp_task_t for the given task.
@@ -3764,9 +4286,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
// Emit initial values for private copies (if any).
llvm::Value *TaskPrivatesMap = nullptr;
auto *TaskPrivatesMapTy =
- std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
- 3)
- ->getType();
+ std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
TaskPrivatesMap = emitTaskPrivateMappingFunction(
@@ -4006,8 +4526,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DepTaskArgs[5] = CGF.Builder.getInt32(0);
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
- auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
- NumDependencies, &TaskArgs,
+ auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
+ &TaskArgs,
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
if (!Data.Tied) {
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
@@ -4121,11 +4641,27 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
/*IsInitializer=*/true);
+ // Store reductions address.
+ LValue RedLVal = CGF.EmitLValueForField(
+ Result.TDBase,
+ *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
+ if (Data.Reductions)
+ CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
+ else {
+ CGF.EmitNullInitialization(RedLVal.getAddress(),
+ CGF.getContext().VoidPtrTy);
+ }
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
- UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
- UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
- llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
+ UpLoc,
+ ThreadID,
+ Result.NewTask,
+ IfVal,
+ LBLVal.getPointer(),
+ UBLVal.getPointer(),
+ CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+ llvm::ConstantInt::getNullValue(
+ CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -4134,10 +4670,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
/*isSigned=*/false)
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
- Result.TaskDupFn
- ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
- CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
}
@@ -4241,20 +4776,16 @@ static void emitReductionCombiner(CodeGenFunction &CGF,
CGF.EmitIgnoredExpr(ReductionOp);
}
-static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
- llvm::Type *ArgsType,
- ArrayRef<const Expr *> Privates,
- ArrayRef<const Expr *> LHSExprs,
- ArrayRef<const Expr *> RHSExprs,
- ArrayRef<const Expr *> ReductionOps) {
+llvm::Value *CGOpenMPRuntime::emitReductionFunction(
+ CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps) {
auto &C = CGM.getContext();
// void reduction_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
- ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
+ ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
@@ -4329,11 +4860,11 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
return Fn;
}
-static void emitSingleReductionCombiner(CodeGenFunction &CGF,
- const Expr *ReductionOp,
- const Expr *PrivateRef,
- const DeclRefExpr *LHS,
- const DeclRefExpr *RHS) {
+void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
+ const Expr *ReductionOp,
+ const Expr *PrivateRef,
+ const DeclRefExpr *LHS,
+ const DeclRefExpr *RHS) {
if (PrivateRef->getType()->isArrayType()) {
// Emit reduction for array section.
auto *LHSVar = cast<VarDecl>(LHS->getDecl());
@@ -4353,9 +4884,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> LHSExprs,
ArrayRef<const Expr *> RHSExprs,
ArrayRef<const Expr *> ReductionOps,
- bool WithNowait, bool SimpleReduction) {
+ ReductionOptionsTy Options) {
if (!CGF.HaveInsertPoint())
return;
+
+ bool WithNowait = Options.WithNowait;
+ bool SimpleReduction = Options.SimpleReduction;
+
// Next code should be emitted for reduction:
//
// static kmp_critical_name lock = { 0 };
@@ -4497,12 +5032,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
};
auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &RT = CGF.CGM.getOpenMPRuntime();
auto IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
for (auto *E : ReductionOps) {
- emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
- cast<DeclRefExpr>(*IRHS));
+ RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+ cast<DeclRefExpr>(*IRHS));
++IPriv;
++ILHS;
++IRHS;
@@ -4562,7 +5098,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
if (XExpr) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto &&AtomicRedGen = [BO, VD, IPriv,
+ auto &&AtomicRedGen = [BO, VD,
Loc](CodeGenFunction &CGF, const Expr *XExpr,
const Expr *EExpr, const Expr *UpExpr) {
LValue X = CGF.EmitLValue(XExpr);
@@ -4572,7 +5108,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitOMPAtomicSimpleUpdateExpr(
X, E, BO, /*IsXLHSInRHSPart=*/true,
llvm::AtomicOrdering::Monotonic, Loc,
- [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
+ [&CGF, UpExpr, VD, Loc](RValue XRValue) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(
VD, [&CGF, VD, XRValue, Loc]() -> Address {
@@ -4640,6 +5176,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
}
+/// Generates unique name for artificial threadprivate variables.
+/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
+static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
+ unsigned N) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
+ return Out.str();
+}
+
+/// Emits reduction initializer function:
+/// \code
+/// void @.red_init(void* %arg) {
+/// %0 = bitcast void* %arg to <type>*
+/// store <type> <init>, <type>* %0
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N) {
+ auto &C = CGM.getContext();
+ FunctionArgList Args;
+ ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&Param);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_init.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ Address PrivateAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&Param),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ LValue SharedLVal;
+ // If initializer uses initializer from declare reduction construct, emit a
+ // pointer to the address of the original reduction item (reuired by reduction
+ // initializer)
+ if (RCG.usesReductionInitializer(N)) {
+ Address SharedAddr =
+ CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().VoidPtrTy,
+ generateUniqueName("reduction", Loc, N));
+ SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
+ } else {
+ SharedLVal = CGF.MakeNaturalAlignAddrLValue(
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ CGM.getContext().VoidPtrTy);
+ }
+ // Emit the initializer:
+ // %0 = bitcast void* %arg to <type>*
+ // store <type> <init>, <type>* %0
+ RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
+ [](CodeGenFunction &) { return false; });
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emits reduction combiner function:
+/// \code
+/// void @.red_comb(void* %arg0, void* %arg1) {
+/// %lhs = bitcast void* %arg0 to <type>*
+/// %rhs = bitcast void* %arg1 to <type>*
+/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
+/// store <type> %2, <type>* %lhs
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N,
+ const Expr *ReductionOp,
+ const Expr *LHS, const Expr *RHS,
+ const Expr *PrivateRef) {
+ auto &C = CGM.getContext();
+ auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
+ auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
+ FunctionArgList Args;
+ ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&ParamInOut);
+ Args.emplace_back(&ParamIn);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_comb.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ // Remap lhs and rhs variables to the addresses of the function arguments.
+ // %lhs = bitcast void* %arg0 to <type>*
+ // %rhs = bitcast void* %arg1 to <type>*
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
+ // Pull out the pointer to the variable.
+ Address PtrAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&ParamInOut),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ return CGF.Builder.CreateElementBitCast(
+ PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
+ });
+ PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
+ // Pull out the pointer to the variable.
+ Address PtrAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&ParamIn),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ return CGF.Builder.CreateElementBitCast(
+ PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
+ });
+ PrivateScope.Privatize();
+ // Emit the combiner body:
+ // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
+ // store <type> %2, <type>* %lhs
+ CGM.getOpenMPRuntime().emitSingleReductionCombiner(
+ CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
+ cast<DeclRefExpr>(RHS));
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emits reduction finalizer function:
+/// \code
+/// void @.red_fini(void* %arg) {
+/// %0 = bitcast void* %arg to <type>*
+/// <destroy>(<type>* %0)
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N) {
+ if (!RCG.needCleanups(N))
+ return nullptr;
+ auto &C = CGM.getContext();
+ FunctionArgList Args;
+ ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&Param);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_fini.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ Address PrivateAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&Param),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ // Emit the finalizer body:
+ // <destroy>(<type>* %0)
+ RCG.emitCleanups(CGF, N, PrivateAddr);
+ CGF.FinishFunction();
+ return Fn;
+}
+
+llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
+ if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
+ return nullptr;
+
+ // Build typedef struct:
+ // kmp_task_red_input {
+ // void *reduce_shar; // shared reduction item
+ // size_t reduce_size; // size of data item
+ // void *reduce_init; // data initialization routine
+ // void *reduce_fini; // data finalization routine
+ // void *reduce_comb; // data combiner routine
+ // kmp_task_red_flags_t flags; // flags for additional info from compiler
+ // } kmp_task_red_input_t;
+ ASTContext &C = CGM.getContext();
+ auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
+ RD->startDefinition();
+ const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
+ const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *FlagsFD = addFieldToRecordDecl(
+ C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
+ RD->completeDefinition();
+ QualType RDType = C.getRecordType(RD);
+ unsigned Size = Data.ReductionVars.size();
+ llvm::APInt ArraySize(/*numBits=*/64, Size);
+ QualType ArrayRDType = C.getConstantArrayType(
+ RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ // kmp_task_red_input_t .rd_input.[Size];
+ Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
+ ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
+ Data.ReductionOps);
+ for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
+ // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
+ llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
+ llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
+ llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
+ TaskRedInput.getPointer(), Idxs,
+ /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
+ ".rd_input.gep.");
+ LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
+ // ElemLVal.reduce_shar = &Shareds[Cnt];
+ LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
+ RCG.emitSharedLValue(CGF, Cnt);
+ llvm::Value *CastedShared =
+ CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
+ CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
+ RCG.emitAggregateType(CGF, Cnt);
+ llvm::Value *SizeValInChars;
+ llvm::Value *SizeVal;
+ std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
+ // We use delayed creation/initialization for VLAs, array sections and
+ // custom reduction initializations. It is required because runtime does not
+ // provide the way to pass the sizes of VLAs/array sections to
+ // initializer/combiner/finalizer functions and does not pass the pointer to
+ // original reduction item to the initializer. Instead threadprivate global
+ // variables are used to store these values and use them in the functions.
+ bool DelayedCreation = !!SizeVal;
+ SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
+ /*isSigned=*/false);
+ LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
+ CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
+ // ElemLVal.reduce_init = init;
+ LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
+ llvm::Value *InitAddr =
+ CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
+ CGF.EmitStoreOfScalar(InitAddr, InitLVal);
+ DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
+ // ElemLVal.reduce_fini = fini;
+ LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
+ llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
+ llvm::Value *FiniAddr = Fini
+ ? CGF.EmitCastToVoidPtr(Fini)
+ : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+ CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
+ // ElemLVal.reduce_comb = comb;
+ LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
+ llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
+ CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
+ RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
+ CGF.EmitStoreOfScalar(CombAddr, CombLVal);
+ // ElemLVal.flags = 0;
+ LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
+ if (DelayedCreation) {
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
+ FlagsLVal);
+ } else
+ CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
+ }
+ // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ llvm::Value *Args[] = {
+ CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+ /*isSigned=*/true),
+ llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
+ CGM.VoidPtrTy)};
+ return CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
+}
+
+void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG,
+ unsigned N) {
+ auto Sizes = RCG.getSizes(N);
+ // Emit threadprivate global variable if the type is non-constant
+ // (Sizes.second = nullptr).
+ if (Sizes.second) {
+ llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
+ /*isSigned=*/false);
+ Address SizeAddr = getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
+ }
+ // Store address of the original reduction item if custom initializer is used.
+ if (RCG.usesReductionInitializer(N)) {
+ Address SharedAddr = getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().VoidPtrTy,
+ generateUniqueName("reduction", Loc, N));
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
+ SharedAddr, /*IsVolatile=*/false);
+ }
+}
+
+Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ llvm::Value *ReductionsPtr,
+ LValue SharedLVal) {
+ // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ llvm::Value *Args[] = {
+ CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+ /*isSigned=*/true),
+ ReductionsPtr,
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
+ CGM.VoidPtrTy)};
+ return Address(
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
+ SharedLVal.getAlignment());
+}
+
void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
@@ -4874,25 +5757,45 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
return Body;
}
-/// \brief Emit the num_teams clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no num_teams clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of teams for a target directive. Inspect the num_teams
+/// clause associated with a teams construct combined or closely nested
+/// with the target directive.
+///
+/// Emit a team of size one for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
static llvm::Value *
-emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
- const OMPExecutableDirective &D) {
+emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any num_teams clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
- "Not expecting clause in directive.");
+ auto &Bld = CGF.Builder;
+
+ // If the target directive is combined with a teams directive:
+ // Return the value in the num_teams clause, if any.
+ // Otherwise, return 0 to denote the runtime default.
+ if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
+ if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
+ CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
+ auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
+ /*IgnoreResultAssign*/ true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+
+ // The default value is 0.
+ return Bld.getInt32(0);
+ }
+
+ // If the target directive is combined with a parallel directive but not a
+ // teams directive, start one team.
+ if (isOpenMPParallelDirective(D.getDirectiveKind()))
+ return Bld.getInt32(1);
// If the current target region has a teams region enclosed, we need to get
// the number of teams to pass to the runtime function call. This is done
@@ -4910,38 +5813,92 @@ emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
- return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
- /*IsSigned=*/true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
}
// If we have an enclosed teams directive but no num_teams clause we use
// the default value 0.
- return CGF.Builder.getInt32(0);
+ return Bld.getInt32(0);
}
// No teams associated with the directive.
return nullptr;
}
-/// \brief Emit the thread_limit clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no thread_limit clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of threads for a target directive. Inspect the
+/// thread_limit clause associated with a teams construct combined or closely
+/// nested with the target directive.
+///
+/// Emit the num_threads clause for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
static llvm::Value *
-emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
- const OMPExecutableDirective &D) {
+emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any thread_limit clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
- "Not expecting clause in directive.");
+ auto &Bld = CGF.Builder;
+
+ //
+ // If the target directive is combined with a teams directive:
+ // Return the value in the thread_limit clause, if any.
+ //
+ // If the target directive is combined with a parallel directive:
+ // Return the value in the num_threads clause, if any.
+ //
+ // If both clauses are set, select the minimum of the two.
+ //
+ // If neither teams or parallel combined directives set the number of threads
+ // in a team, return 0 to denote the runtime default.
+ //
+ // If this is not a teams directive return nullptr.
+
+ if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
+ isOpenMPParallelDirective(D.getDirectiveKind())) {
+ llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
+ llvm::Value *NumThreadsVal = nullptr;
+ llvm::Value *ThreadLimitVal = nullptr;
+
+ if (const auto *ThreadLimitClause =
+ D.getSingleClause<OMPThreadLimitClause>()) {
+ CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
+ auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
+ /*IgnoreResultAssign*/ true);
+ ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+
+ if (const auto *NumThreadsClause =
+ D.getSingleClause<OMPNumThreadsClause>()) {
+ CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+ llvm::Value *NumThreads =
+ CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
+ /*IgnoreResultAssign*/ true);
+ NumThreadsVal =
+ Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
+ }
+
+ // Select the lesser of thread_limit and num_threads.
+ if (NumThreadsVal)
+ ThreadLimitVal = ThreadLimitVal
+ ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
+ ThreadLimitVal),
+ NumThreadsVal, ThreadLimitVal)
+ : NumThreadsVal;
+
+ // Set default value passed to the runtime if either teams or a target
+ // parallel type directive is found but no clause is specified.
+ if (!ThreadLimitVal)
+ ThreadLimitVal = DefaultThreadLimitVal;
+
+ return ThreadLimitVal;
+ }
// If the current target region has a teams region enclosed, we need to get
// the thread limit to pass to the runtime function call. This is done
@@ -5494,7 +6451,7 @@ public:
// We have to process the component lists that relate with the same
// declaration in a single chunk so that we can generate the map flags
// correctly. Therefore, we organize all lists in a map.
- llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
+ llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
// Helper function to fill the information map for the different supported
// clauses.
@@ -5818,16 +6775,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
llvm::Value *BPVal = *BasePointers[i];
- if (BPVal->getType()->isPointerTy())
- BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
- else {
- assert(BPVal->getType()->isIntegerTy() &&
- "If not a pointer, the value type must be an integer.");
- BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
- }
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.BasePointersArray, 0, i);
+ BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(BPVal, BPAddr);
@@ -5836,16 +6788,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
llvm::Value *PVal = Pointers[i];
- if (PVal->getType()->isPointerTy())
- PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
- else {
- assert(PVal->getType()->isIntegerTy() &&
- "If not a pointer, the value type must be an integer.");
- PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
- }
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.PointersArray, 0, i);
+ P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(PVal, PAddr);
@@ -5984,8 +6931,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
OffloadError);
// Fill up the pointer arrays and transfer execution to the device.
- auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
- OutlinedFnID, OffloadError, OffloadErrorQType,
+ auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device,
+ OutlinedFnID, OffloadError,
&D](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
// Emit the offloading arrays.
@@ -6021,24 +6968,50 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Return value of the runtime offloading call.
llvm::Value *Return;
- auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
- auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
+ auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
+ auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
- // If we have NumTeams defined this means that we have an enclosed teams
- // region. Therefore we also expect to have ThreadLimit defined. These two
- // values should be defined in the presence of a teams directive, regardless
- // of having any clauses associated. If the user is using teams but no
- // clauses, these two values will be the default that should be passed to
- // the runtime library - a 32-bit integer with the value zero.
+ // The target region is an outlined function launched by the runtime
+ // via calls __tgt_target() or __tgt_target_teams().
+ //
+ // __tgt_target() launches a target region with one team and one thread,
+ // executing a serial region. This master thread may in turn launch
+ // more threads within its team upon encountering a parallel region,
+ // however, no additional teams can be launched on the device.
+ //
+ // __tgt_target_teams() launches a target region with one or more teams,
+ // each with one or more threads. This call is required for target
+ // constructs such as:
+ // 'target teams'
+ // 'target' / 'teams'
+ // 'target teams distribute parallel for'
+ // 'target parallel'
+ // and so on.
+ //
+ // Note that on the host and CPU targets, the runtime implementation of
+ // these calls simply call the outlined function without forking threads.
+ // The outlined functions themselves have runtime calls to
+ // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
+ // the compiler in emitTeamsCall() and emitParallelCall().
+ //
+ // In contrast, on the NVPTX target, the implementation of
+ // __tgt_target_teams() launches a GPU kernel with the requested number
+ // of teams and threads so no additional calls to the runtime are required.
if (NumTeams) {
- assert(ThreadLimit && "Thread limit expression should be available along "
- "with number of teams.");
+ // If we have NumTeams defined this means that we have an enclosed teams
+ // region. Therefore we also expect to have NumThreads defined. These two
+ // values should be defined in the presence of a teams directive,
+ // regardless of having any clauses associated. If the user is using teams
+ // but no clauses, these two values will be the default that should be
+ // passed to the runtime library - a 32-bit integer with the value zero.
+ assert(NumThreads && "Thread limit expression should be available along "
+ "with number of teams.");
llvm::Value *OffloadingArgs[] = {
DeviceID, OutlinedFnID,
PointerNum, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
Info.MapTypesArray, NumTeams,
- ThreadLimit};
+ NumThreads};
Return = CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
} else {
@@ -6095,17 +7068,18 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (!S)
return;
- // If we find a OMP target directive, codegen the outline function and
- // register the result.
- // FIXME: Add other directives with target when they become supported.
- bool isTargetDirective = isa<OMPTargetDirective>(S);
+ // Codegen OMP target directives that offload compute to the device.
+ bool requiresDeviceCodegen =
+ isa<OMPExecutableDirective>(S) &&
+ isOpenMPTargetExecutionDirective(
+ cast<OMPExecutableDirective>(S)->getDirectiveKind());
- if (isTargetDirective) {
- auto *E = cast<OMPExecutableDirective>(S);
+ if (requiresDeviceCodegen) {
+ auto &E = *cast<OMPExecutableDirective>(S);
unsigned DeviceID;
unsigned FileID;
unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
+ getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
FileID, Line);
// Is this a target region that should not be emitted as an entry point? If
@@ -6114,13 +7088,22 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
ParentName, Line))
return;
- llvm::Function *Fn;
- llvm::Constant *Addr;
- std::tie(Fn, Addr) =
- CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
- CGM, cast<OMPTargetDirective>(*E), ParentName,
- /*isOffloadEntry=*/true);
- assert(Fn && Addr && "Target region emission failed.");
+ switch (S->getStmtClass()) {
+ case Stmt::OMPTargetDirectiveClass:
+ CodeGenFunction::EmitOMPTargetDeviceFunction(
+ CGM, ParentName, cast<OMPTargetDirective>(*S));
+ break;
+ case Stmt::OMPTargetParallelDirectiveClass:
+ CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
+ CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
+ break;
+ case Stmt::OMPTargetTeamsDirectiveClass:
+ CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
+ break;
+ default:
+ llvm_unreachable("Unknown target directive for OpenMP device codegen.");
+ }
return;
}
@@ -6182,7 +7165,7 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
}
}
- // If we are in target mode we do not emit any global (declare target is not
+ // If we are in target mode, we do not emit any global (declare target is not
// implemented yet). Therefore we signal that GD was processed in this case.
return true;
}
@@ -6271,8 +7254,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Generate the code for the opening of the data environment. Capture all the
// arguments of the runtime call by reference because they are used in the
// closing of the region.
- auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
- CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
+ PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
@@ -6318,8 +7301,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
};
// Generate code for the closing of the data region.
- auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
assert(Info.isValid() && "Invalid data environment closing arguments.");
llvm::Value *BasePointersArrayArg = nullptr;
@@ -6397,7 +7379,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
"Expecting either target enter, exit data, or update directives.");
// Generate the code for the opening of the data environment.
- auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
index 61ddc70..185c01d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -96,15 +96,106 @@ struct OMPTaskDataTy final {
SmallVector<const Expr *, 4> FirstprivateInits;
SmallVector<const Expr *, 4> LastprivateVars;
SmallVector<const Expr *, 4> LastprivateCopies;
+ SmallVector<const Expr *, 4> ReductionVars;
+ SmallVector<const Expr *, 4> ReductionCopies;
+ SmallVector<const Expr *, 4> ReductionOps;
SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences;
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule;
llvm::PointerIntPair<llvm::Value *, 1, bool> Priority;
+ llvm::Value *Reductions = nullptr;
unsigned NumberOfParts = 0;
bool Tied = true;
bool Nogroup = false;
};
+/// Class intended to support codegen of all kind of the reduction clauses.
+class ReductionCodeGen {
+private:
+ /// Data required for codegen of reduction clauses.
+ struct ReductionData {
+ /// Reference to the original shared item.
+ const Expr *Ref = nullptr;
+ /// Helper expression for generation of private copy.
+ const Expr *Private = nullptr;
+ /// Helper expression for generation reduction operation.
+ const Expr *ReductionOp = nullptr;
+ ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp)
+ : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {}
+ };
+ /// List of reduction-based clauses.
+ SmallVector<ReductionData, 4> ClausesData;
+
+ /// List of addresses of original shared variables/expressions.
+ SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses;
+ /// Sizes of the reduction items in chars.
+ SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes;
+ /// Base declarations for the reduction items.
+ SmallVector<const VarDecl *, 4> BaseDecls;
+
+ /// Emits lvalue for shared expresion.
+ LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E);
+ /// Emits upper bound for shared expression (if array section).
+ LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E);
+ /// Performs aggregate initialization.
+ /// \param N Number of reduction item in the common list.
+ /// \param PrivateAddr Address of the corresponding private item.
+ /// \param SharedLVal Address of the original shared variable.
+ /// \param DRD Declare reduction construct used for reduction item.
+ void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N,
+ Address PrivateAddr, LValue SharedLVal,
+ const OMPDeclareReductionDecl *DRD);
+
+public:
+ ReductionCodeGen(ArrayRef<const Expr *> Shareds,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> ReductionOps);
+ /// Emits lvalue for a reduction item.
+ /// \param N Number of the reduction item.
+ void emitSharedLValue(CodeGenFunction &CGF, unsigned N);
+ /// Emits the code for the variable-modified type, if required.
+ /// \param N Number of the reduction item.
+ void emitAggregateType(CodeGenFunction &CGF, unsigned N);
+ /// Emits the code for the variable-modified type, if required.
+ /// \param N Number of the reduction item.
+ /// \param Size Size of the type in chars.
+ void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size);
+ /// Performs initialization of the private copy for the reduction item.
+ /// \param N Number of the reduction item.
+ /// \param PrivateAddr Address of the corresponding private item.
+ /// \param DefaultInit Default initialization sequence that should be
+ /// performed if no reduction specific initialization is found.
+ /// \param SharedLVal Address of the original shared variable.
+ void
+ emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr,
+ LValue SharedLVal,
+ llvm::function_ref<bool(CodeGenFunction &)> DefaultInit);
+ /// Returns true if the private copy requires cleanups.
+ bool needCleanups(unsigned N);
+ /// Emits cleanup code for the reduction item.
+ /// \param N Number of the reduction item.
+ /// \param PrivateAddr Address of the corresponding private item.
+ void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr);
+ /// Adjusts \p PrivatedAddr for using instead of the original variable
+ /// address in normal operations.
+ /// \param N Number of the reduction item.
+ /// \param PrivateAddr Address of the corresponding private item.
+ Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
+ Address PrivateAddr);
+ /// Returns LValue for the reduction item.
+ LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; }
+ /// Returns the size of the reduction item (in chars and total number of
+ /// elements in the item), or nullptr, if the size is a constant.
+ std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const {
+ return Sizes[N];
+ }
+ /// Returns the base declaration of the reduction item.
+ const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; }
+ /// Returns true if the initialization of the reduction item uses initializer
+ /// from declare reduction construct.
+ bool usesReductionInitializer(unsigned N) const;
+};
+
class CGOpenMPRuntime {
protected:
CodeGenModule &CGM;
@@ -121,7 +212,7 @@ protected:
/// \param OutlinedFnID Outlined function ID value to be defined by this call.
/// \param IsOffloadEntry True if the outlined function is an offload entry.
/// \param CodeGen Lambda codegen specific to an accelerator device.
- /// An oulined function may not be an entry if, e.g. the if clause always
+ /// An outlined function may not be an entry if, e.g. the if clause always
/// evaluates to false.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D,
StringRef ParentName,
@@ -222,6 +313,10 @@ private:
/// deconstructors of firstprivate C++ objects */
/// } kmp_task_t;
QualType KmpTaskTQTy;
+ /// Saved kmp_task_t for task directive.
+ QualType SavedKmpTaskTQTy;
+ /// Saved kmp_task_t for taskloop-based directive.
+ QualType SavedKmpTaskloopTQTy;
/// \brief Type typedef struct kmp_depend_info {
/// kmp_intptr_t base_addr;
/// size_t len;
@@ -527,6 +622,7 @@ public:
/// Get combiner/initializer for the specified user-defined reduction, if any.
virtual std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl *D);
+
/// \brief Emits outlined function for the specified OpenMP parallel directive
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
@@ -535,7 +631,19 @@ public:
/// \param InnermostKind Kind of innermost directive (for simple directives it
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
- virtual llvm::Value *emitParallelOrTeamsOutlinedFunction(
+ virtual llvm::Value *emitParallelOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
+
+ /// \brief Emits outlined function for the specified OpenMP teams directive
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ virtual llvm::Value *emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
@@ -659,16 +767,50 @@ public:
///
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
+ /// struct with the values to be passed to the dispatch runtime function
+ struct DispatchRTInput {
+ /// Loop lower bound
+ llvm::Value *LB = nullptr;
+ /// Loop upper bound
+ llvm::Value *UB = nullptr;
+ /// Chunk size specified using 'schedule' clause (nullptr if chunk
+ /// was not specified)
+ llvm::Value *Chunk = nullptr;
+ DispatchRTInput() = default;
+ DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk)
+ : LB(LB), UB(UB), Chunk(Chunk) {}
+ };
+
+ /// Call the appropriate runtime routine to initialize it before start
+ /// of loop.
+
+ /// This is used for non static scheduled types and when the ordered
+ /// clause is present on the loop construct.
+ /// Depending on the loop schedule, it is necessary to call some runtime
+ /// routine before start of the OpenMP loop to get the loop upper / lower
+ /// bounds \a LB and \a UB and stride \a ST.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ /// \param IVSize Size of the iteration variable in bits.
+ /// \param IVSigned Sign of the iteration variable.
+ /// \param Ordered true if loop is ordered, false otherwise.
+ /// \param DispatchValues struct containing llvm values for lower bound, upper
+ /// bound, and chunk expression.
+ /// For the default (nullptr) value, the chunk 1 will be used.
+ ///
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
const OpenMPScheduleTy &ScheduleKind,
unsigned IVSize, bool IVSigned, bool Ordered,
- llvm::Value *UB,
- llvm::Value *Chunk = nullptr);
+ const DispatchRTInput &DispatchValues);
/// \brief Call the appropriate runtime routine to initialize it before start
/// of loop.
///
- /// Depending on the loop schedule, it is nesessary to call some runtime
+ /// This is used only in case of static schedule, when the user did not
+ /// specify a ordered clause on the loop construct.
+ /// Depending on the loop schedule, it is necessary to call some runtime
/// routine before start of the OpenMP loop to get the loop upper / lower
/// bounds \a LB and \a UB and stride \a ST.
///
@@ -676,7 +818,7 @@ public:
/// \param Loc Clang source location.
/// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
/// \param IVSize Size of the iteration variable in bits.
- /// \param IVSigned Sign of the interation variable.
+ /// \param IVSigned Sign of the iteration variable.
/// \param Ordered true if loop is ordered, false otherwise.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
@@ -685,7 +827,7 @@ public:
/// \param UB Address of the output variable in which the upper iteration
/// number is returned.
/// \param ST Address of the output variable in which the stride value is
- /// returned nesessary to generated the static_chunked scheduled loop.
+ /// returned necessary to generated the static_chunked scheduled loop.
/// \param Chunk Value of the chunk for the static_chunked scheduled loop.
/// For the default (nullptr) value, the chunk 1 will be used.
///
@@ -700,7 +842,7 @@ public:
/// \param Loc Clang source location.
/// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
/// \param IVSize Size of the iteration variable in bits.
- /// \param IVSigned Sign of the interation variable.
+ /// \param IVSigned Sign of the iteration variable.
/// \param Ordered true if loop is ordered, false otherwise.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
@@ -709,7 +851,7 @@ public:
/// \param UB Address of the output variable in which the upper iteration
/// number is returned.
/// \param ST Address of the output variable in which the stride value is
- /// returned nesessary to generated the static_chunked scheduled loop.
+ /// returned necessary to generated the static_chunked scheduled loop.
/// \param Chunk Value of the chunk for the static_chunked scheduled loop.
/// For the default (nullptr) value, the chunk 1 will be used.
///
@@ -726,7 +868,7 @@ public:
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param IVSize Size of the iteration variable in bits.
- /// \param IVSigned Sign of the interation variable.
+ /// \param IVSigned Sign of the iteration variable.
///
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF,
SourceLocation Loc, unsigned IVSize,
@@ -745,7 +887,7 @@ public:
/// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
/// kmp_int[32|64] *p_stride);
/// \param IVSize Size of the iteration variable in bits.
- /// \param IVSigned Sign of the interation variable.
+ /// \param IVSigned Sign of the iteration variable.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
/// \param LB Address of the output variable in which the lower iteration
@@ -797,6 +939,14 @@ public:
SourceLocation Loc, bool PerformInit,
CodeGenFunction *CGF = nullptr);
+ /// Creates artificial threadprivate variable with name \p Name and type \p
+ /// VarType.
+ /// \param VarType Type of the artificial threadprivate variable.
+ /// \param Name Name of the artificial threadprivate variable.
+ virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+ QualType VarType,
+ StringRef Name);
+
/// \brief Emit flush of the variables specified in 'omp flush' directive.
/// \param Vars List of variables to flush.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
@@ -880,6 +1030,32 @@ public:
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen,
bool HasCancel = false);
+
+ /// Emits reduction function.
+ /// \param ArgsType Array type containing pointers to reduction variables.
+ /// \param Privates List of private copies for original reduction arguments.
+ /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+ /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+ /// or 'operator binop(LHS, RHS)'.
+ llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps);
+
+ /// Emits single reduction combiner
+ void emitSingleReductionCombiner(CodeGenFunction &CGF,
+ const Expr *ReductionOp,
+ const Expr *PrivateRef,
+ const DeclRefExpr *LHS,
+ const DeclRefExpr *RHS);
+
+ struct ReductionOptionsTy {
+ bool WithNowait;
+ bool SimpleReduction;
+ OpenMPDirectiveKind ReductionKind;
+ };
/// \brief Emit a code for reduction clause. Next code should be emitted for
/// reduction:
/// \code
@@ -916,14 +1092,63 @@ public:
/// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
/// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
/// or 'operator binop(LHS, RHS)'.
- /// \param WithNowait true if parent directive has also nowait clause, false
- /// otherwise.
+ /// \param Options List of options for reduction codegen:
+ /// WithNowait true if parent directive has also nowait clause, false
+ /// otherwise.
+ /// SimpleReduction Emit reduction operation only. Used for omp simd
+ /// directive on the host.
+ /// ReductionKind The kind of reduction to perform.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> LHSExprs,
ArrayRef<const Expr *> RHSExprs,
ArrayRef<const Expr *> ReductionOps,
- bool WithNowait, bool SimpleReduction);
+ ReductionOptionsTy Options);
+
+ /// Emit a code for initialization of task reduction clause. Next code
+ /// should be emitted for reduction:
+ /// \code
+ ///
+ /// _task_red_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// \endcode
+ ///
+ /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
+ /// \param Data Additional data for task generation like tiedness, final
+ /// state, list of privates, reductions etc.
+ virtual llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ const OMPTaskDataTy &Data);
+
+ /// Required to resolve existing problems in the runtime. Emits threadprivate
+ /// variables to store the size of the VLAs/array sections for
+ /// initializer/combiner/finalizer functions + emits threadprivate variable to
+ /// store the pointer to the original reduction item for the custom
+ /// initializer defined by declare reduction construct.
+ /// \param RCG Allows to reuse an existing data for the reductions.
+ /// \param N Reduction item for which fixups must be emitted.
+ virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N);
+
+ /// Get the address of `void *` type of the privatue copy of the reduction
+ /// item specified by the \p SharedLVal.
+ /// \param ReductionsPtr Pointer to the reduction data returned by the
+ /// emitTaskReductionInit function.
+ /// \param SharedLVal Address of the original reduction item.
+ virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *ReductionsPtr,
+ LValue SharedLVal);
/// \brief Emit code for 'taskwait' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
@@ -952,7 +1177,7 @@ public:
/// \param OutlinedFnID Outlined function ID value to be defined by this call.
/// \param IsOffloadEntry True if the outlined function is an offload entry.
/// \param CodeGen Code generation sequence for the \a D directive.
- /// An oulined function may not be an entry if, e.g. the if clause always
+ /// An outlined function may not be an entry if, e.g. the if clause always
/// evaluates to false.
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
StringRef ParentName,
@@ -991,7 +1216,7 @@ public:
virtual bool emitTargetGlobalVariable(GlobalDecl GD);
/// \brief Emit the global \a GD if it is meaningful for the target. Returns
- /// if it was emitted succesfully.
+ /// if it was emitted successfully.
/// \param GD Global to scan.
virtual bool emitTargetGlobal(GlobalDecl GD);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 6a6d832..3ced05d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -26,6 +26,11 @@ enum OpenMPRTLFunctionNVPTX {
OMPRTL_NVPTX__kmpc_kernel_init,
/// \brief Call to void __kmpc_kernel_deinit();
OMPRTL_NVPTX__kmpc_kernel_deinit,
+ /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+ /// short RequiresOMPRuntime, short RequiresDataSharing);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_init,
+ /// \brief Call to void __kmpc_spmd_kernel_deinit();
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
/// \brief Call to void __kmpc_kernel_prepare_parallel(void
/// *outlined_function);
OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
@@ -39,6 +44,30 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL_NVPTX__kmpc_end_serialized_parallel,
+ /// \brief Call to int32_t __kmpc_shuffle_int32(int32_t element,
+ /// int16_t lane_offset, int16_t warp_size);
+ OMPRTL_NVPTX__kmpc_shuffle_int32,
+ /// \brief Call to int64_t __kmpc_shuffle_int64(int64_t element,
+ /// int16_t lane_offset, int16_t warp_size);
+ OMPRTL_NVPTX__kmpc_shuffle_int64,
+ /// \brief Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32
+ /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+ OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
+ /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
+ /// int32_t num_vars, size_t reduce_size, void *reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
+ /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
+ /// int32_t index, int32_t width),
+ /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t
+ /// index, int32_t width, int32_t reduce))
+ OMPRTL_NVPTX__kmpc_teams_reduce_nowait,
+ /// \brief Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
+ OMPRTL_NVPTX__kmpc_end_reduce_nowait
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -76,6 +105,47 @@ public:
CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
}
};
+
+// A class to track the execution mode when codegening directives within
+// a target region. The appropriate mode (generic/spmd) is set on entry
+// to the target region and used by containing directives such as 'parallel'
+// to emit optimized code.
+class ExecutionModeRAII {
+private:
+ CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
+ CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
+
+public:
+ ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode,
+ CGOpenMPRuntimeNVPTX::ExecutionMode NewMode)
+ : Mode(Mode) {
+ SavedMode = Mode;
+ Mode = NewMode;
+ }
+ ~ExecutionModeRAII() { Mode = SavedMode; }
+};
+
+/// GPU Configuration: This information can be derived from cuda registers,
+/// however, providing compile time constants helps generate more efficient
+/// code. For all practical purposes this is fine because the configuration
+/// is the same for all known NVPTX architectures.
+enum MachineConfiguration : unsigned {
+ WarpSize = 32,
+ /// Number of bits required to represent a lane identifier, which is
+ /// computed as log_2(WarpSize).
+ LaneIDBits = 5,
+ LaneIDMask = WarpSize - 1,
+
+ /// Global memory alignment for performance.
+ GlobalMemoryAlignment = 256,
+};
+
+enum NamedBarrier : unsigned {
+ /// Synchronize on this barrier #ID using a named barrier primitive.
+ /// Only the subset of active threads in a parallel region arrive at the
+ /// barrier.
+ NB_Parallel = 1,
+};
} // anonymous namespace
/// Get the GPU warp size.
@@ -96,6 +166,23 @@ static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
llvm::None, "nvptx_tid");
}
+/// Get the id of the warp in the block.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
+}
+
+/// Get the id of the current lane in the Warp.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
+ "nvptx_lane_id");
+}
+
/// Get the maximum number of threads in a block of the GPU.
static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
@@ -112,16 +199,37 @@ static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
}
+/// Get barrier #ID to synchronize selected (multiple of warp size) threads in
+/// a CTA.
+static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
+ llvm::Value *NumThreads) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
+ Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(),
+ llvm::Intrinsic::nvvm_barrier),
+ Args);
+}
+
/// Synchronize all GPU threads in a block.
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
+/// Synchronize worker threads in a parallel region.
+static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
+ return getNVPTXBarrier(CGF, NB_Parallel, NumThreads);
+}
+
/// Get the value of the thread_limit clause in the teams directive.
-/// The runtime encodes thread_limit in the launch parameter, always starting
-/// thread_limit+warpSize threads per team.
-static llvm::Value *getThreadLimit(CodeGenFunction &CGF) {
+/// For the 'generic' execution mode, the runtime encodes thread_limit in
+/// the launch parameters, always starting thread_limit+warpSize threads per
+/// CTA. The threads in the last warp are reserved for master execution.
+/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
+static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
+ bool IsInSpmdExecutionMode = false) {
CGBuilderTy &Bld = CGF.Builder;
- return Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
- "thread_limit");
+ return IsInSpmdExecutionMode
+ ? getNVPTXNumThreads(CGF)
+ : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
+ "thread_limit");
}
/// Get the thread id of the OMP master thread.
@@ -159,12 +267,34 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
}
+bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
+ return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
+}
+
+static CGOpenMPRuntimeNVPTX::ExecutionMode
+getExecutionModeForDirective(CodeGenModule &CGM,
+ const OMPExecutableDirective &D) {
+ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+ switch (DirectiveKind) {
+ case OMPD_target:
+ case OMPD_target_teams:
+ return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
+ case OMPD_target_parallel:
+ return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
+ default:
+ llvm_unreachable("Unsupported directive on NVPTX device.");
+ }
+ llvm_unreachable("Unsupported directive on NVPTX device.");
+}
+
void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode,
+ CGOpenMPRuntimeNVPTX::ExecutionMode::Generic);
EntryFunctionState EST;
WorkerFunctionState WST(CGM);
Work.clear();
@@ -252,6 +382,94 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
EST.ExitBB = nullptr;
}
+void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) {
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode,
+ CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd);
+ EntryFunctionState EST;
+
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ CGOpenMPRuntimeNVPTX &RT;
+ CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
+ const OMPExecutableDirective &D;
+
+ public:
+ NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
+ CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+ const OMPExecutableDirective &D)
+ : RT(RT), EST(EST), D(D) {}
+ void Enter(CodeGenFunction &CGF) override {
+ RT.emitSpmdEntryHeader(CGF, EST, D);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ RT.emitSpmdEntryFooter(CGF, EST);
+ }
+ } Action(*this, EST, D);
+ CodeGen.setAction(Action);
+ emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+ IsOffloadEntry, CodeGen);
+ return;
+}
+
+void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
+ CodeGenFunction &CGF, EntryFunctionState &EST,
+ const OMPExecutableDirective &D) {
+ auto &Bld = CGF.Builder;
+
+ // Setup BBs in entry function.
+ llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
+ EST.ExitBB = CGF.createBasicBlock(".exit");
+
+ // Initialize the OMP state in the runtime; called by all active threads.
+ // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
+ // based on code analysis of the target region.
+ llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true),
+ /*RequiresOMPRuntime=*/Bld.getInt16(1),
+ /*RequiresDataSharing=*/Bld.getInt16(1)};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
+ CGF.EmitBranch(ExecuteBB);
+
+ CGF.EmitBlock(ExecuteBB);
+}
+
+void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
+ EntryFunctionState &EST) {
+ if (!EST.ExitBB)
+ EST.ExitBB = CGF.createBasicBlock(".exit");
+
+ llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
+ CGF.EmitBranch(OMPDeInitBB);
+
+ CGF.EmitBlock(OMPDeInitBB);
+ // DeInitialize the OMP state in the runtime; called by all active threads.
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
+ CGF.EmitBranch(EST.ExitBB);
+
+ CGF.EmitBlock(EST.ExitBB);
+ EST.ExitBB = nullptr;
+}
+
+// Create a unique global variable to indicate the execution mode of this target
+// region. The execution mode is either 'generic', or 'spmd' depending on the
+// target directive. This variable is picked up by the offload library to setup
+// the device appropriately before kernel launch. If the execution mode is
+// 'generic', the runtime reserves one warp for the master, otherwise, all
+// warps participate in parallel work.
+static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
+ CGOpenMPRuntimeNVPTX::ExecutionMode Mode) {
+ (void)new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode"));
+}
+
void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
auto &Ctx = CGM.getContext();
@@ -385,6 +603,22 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
break;
}
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
+ // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+ // short RequiresOMPRuntime, short RequiresDataSharing);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
+ // Build void __kmpc_spmd_kernel_deinit();
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
+ break;
+ }
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
/// Build void __kmpc_kernel_prepare_parallel(
/// void *outlined_function);
@@ -428,6 +662,103 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
}
+ case OMPRTL_NVPTX__kmpc_shuffle_int32: {
+ // Build int32_t __kmpc_shuffle_int32(int32_t element,
+ // int16_t lane_offset, int16_t warp_size);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_shuffle_int64: {
+ // Build int64_t __kmpc_shuffle_int64(int64_t element,
+ // int16_t lane_offset, int16_t warp_size);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: {
+ // Build int32_t kmpc_nvptx_parallel_reduce_nowait(kmp_int32 global_tid,
+ // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ // lane_offset, int16_t Algorithm Version),
+ // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.Int32Ty,
+ CGM.SizeTy,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
+ // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
+ // int32_t num_vars, size_t reduce_size, void *reduce_data,
+ // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ // lane_offset, int16_t shortCircuit),
+ // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
+ // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
+ // int32_t index, int32_t width),
+ // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad,
+ // int32_t index, int32_t width, int32_t reduce))
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy,
+ CGM.Int32Ty, CGM.Int32Ty};
+ auto *CopyToScratchpadFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *LoadReduceTypeParams[] = {
+ CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty};
+ auto *LoadReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.Int32Ty,
+ CGM.SizeTy,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo(),
+ CopyToScratchpadFnTy->getPointerTo(),
+ LoadReduceFnTy->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+ // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+ break;
+ }
}
return RTLFn;
}
@@ -463,39 +794,75 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");
- emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
- CodeGen);
+ CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
+ getExecutionModeForDirective(CGM, D);
+ switch (Mode) {
+ case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
+ emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+ break;
+ case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd:
+ emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+ break;
+ case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown:
+ llvm_unreachable(
+ "Unknown programming model for OpenMP directive on NVPTX target.");
+ }
+
+ setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
- : CGOpenMPRuntime(CGM) {
+ : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP NVPTX can only handle device code.");
}
+void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) {
+ // Do nothing in case of Spmd mode and L0 parallel.
+ // TODO: If in Spmd mode and L1 parallel emit the clause.
+ if (isInSpmdExecutionMode())
+ return;
+
+ CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
+}
+
+void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
+ llvm::Value *NumThreads,
+ SourceLocation Loc) {
+ // Do nothing in case of Spmd mode and L0 parallel.
+ // TODO: If in Spmd mode and L1 parallel emit the clause.
+ if (isInSpmdExecutionMode())
+ return;
+
+ CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
+}
+
void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
const Expr *NumTeams,
const Expr *ThreadLimit,
SourceLocation Loc) {}
-llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction(
+llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar,
+ InnermostKind, CodeGen);
+}
- llvm::Function *OutlinedFun = nullptr;
- if (isa<OMPTeamsDirective>(D)) {
- llvm::Value *OutlinedFunVal =
- CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
- D, ThreadIDVar, InnermostKind, CodeGen);
- OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
- OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
- OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
- } else {
- llvm::Value *OutlinedFunVal =
- CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
- D, ThreadIDVar, InnermostKind, CodeGen);
- OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
- }
+llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+
+ llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
+ D, ThreadIDVar, InnermostKind, CodeGen);
+ llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
+ OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+ OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+ OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
return OutlinedFun;
}
@@ -525,7 +892,10 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall(
if (!CGF.HaveInsertPoint())
return;
- emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+ if (isInSpmdExecutionMode())
+ emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+ else
+ emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
}
void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
@@ -533,8 +903,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
- auto &&L0ParallelGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) {
CGBuilderTy &Bld = CGF.Builder;
// Prepare for parallel region. Indicate the outlined function.
@@ -565,8 +934,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
PrePostActionTy &) {
- auto &&CodeGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
+ auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
Action.Enter(CGF);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
@@ -596,3 +965,1276 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
ThenRCG(CGF);
}
}
+
+void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
+ // Just call the outlined function to execute the parallel region.
+ // OutlinedFn(&GTid, &zero, CapturedStruct);
+ //
+ // TODO: Do something with IfCond when support for the 'if' clause
+ // is added on Spmd target directives.
+ llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+ OutlinedFnArgs.push_back(
+ llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ OutlinedFnArgs.push_back(
+ llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+ CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+}
+
+/// This function creates calls to one of two shuffle functions to copy
+/// variables between lanes in a warp.
+static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
+ QualType ElemTy,
+ llvm::Value *Elem,
+ llvm::Value *Offset) {
+ auto &CGM = CGF.CGM;
+ auto &C = CGM.getContext();
+ auto &Bld = CGF.Builder;
+ CGOpenMPRuntimeNVPTX &RT =
+ *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
+
+ unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity();
+ assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction.");
+
+ OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4
+ ? OMPRTL_NVPTX__kmpc_shuffle_int32
+ : OMPRTL_NVPTX__kmpc_shuffle_int64;
+
+ // Cast all types to 32- or 64-bit values before calling shuffle routines.
+ auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
+ auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy);
+ auto *WarpSize = CGF.EmitScalarConversion(
+ getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true),
+ C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation());
+
+ auto *ShuffledVal =
+ CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
+ {ElemCast, Offset, WarpSize});
+
+ return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy));
+}
+
+namespace {
+enum CopyAction : unsigned {
+ // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
+ // the warp using shuffle instructions.
+ RemoteLaneToThread,
+ // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
+ ThreadCopy,
+ // ThreadToScratchpad: Copy a team-reduced array to the scratchpad.
+ ThreadToScratchpad,
+ // ScratchpadToThread: Copy from a scratchpad array in global memory
+ // containing team-reduced data to a thread's stack.
+ ScratchpadToThread,
+};
+} // namespace
+
+struct CopyOptionsTy {
+ llvm::Value *RemoteLaneOffset;
+ llvm::Value *ScratchpadIndex;
+ llvm::Value *ScratchpadWidth;
+};
+
+/// Emit instructions to copy a Reduce list, which contains partially
+/// aggregated values, in the specified direction.
+static void emitReductionListCopy(
+ CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy,
+ ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase,
+ CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
+
+ auto &CGM = CGF.CGM;
+ auto &C = CGM.getContext();
+ auto &Bld = CGF.Builder;
+
+ auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
+ auto *ScratchpadIndex = CopyOptions.ScratchpadIndex;
+ auto *ScratchpadWidth = CopyOptions.ScratchpadWidth;
+
+ // Iterates, element-by-element, through the source Reduce list and
+ // make a copy.
+ unsigned Idx = 0;
+ unsigned Size = Privates.size();
+ for (auto &Private : Privates) {
+ Address SrcElementAddr = Address::invalid();
+ Address DestElementAddr = Address::invalid();
+ Address DestElementPtrAddr = Address::invalid();
+ // Should we shuffle in an element from a remote lane?
+ bool ShuffleInElement = false;
+ // Set to true to update the pointer in the dest Reduce list to a
+ // newly created element.
+ bool UpdateDestListPtr = false;
+ // Increment the src or dest pointer to the scratchpad, for each
+ // new element.
+ bool IncrScratchpadSrc = false;
+ bool IncrScratchpadDest = false;
+
+ switch (Action) {
+ case RemoteLaneToThread: {
+ // Step 1.1: Get the address for the src element in the Reduce list.
+ Address SrcElementPtrAddr =
+ Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
+ llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
+ SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ SrcElementAddr =
+ Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+
+ // Step 1.2: Create a temporary to store the element in the destination
+ // Reduce list.
+ DestElementPtrAddr =
+ Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
+ DestElementAddr =
+ CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+ ShuffleInElement = true;
+ UpdateDestListPtr = true;
+ break;
+ }
+ case ThreadCopy: {
+ // Step 1.1: Get the address for the src element in the Reduce list.
+ Address SrcElementPtrAddr =
+ Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
+ llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
+ SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ SrcElementAddr =
+ Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+
+ // Step 1.2: Get the address for dest element. The destination
+ // element has already been created on the thread's stack.
+ DestElementPtrAddr =
+ Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
+ llvm::Value *DestElementPtr =
+ CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation());
+ Address DestElemAddr =
+ Address(DestElementPtr, C.getTypeAlignInChars(Private->getType()));
+ DestElementAddr = Bld.CreateElementBitCast(
+ DestElemAddr, CGF.ConvertTypeForMem(Private->getType()));
+ break;
+ }
+ case ThreadToScratchpad: {
+ // Step 1.1: Get the address for the src element in the Reduce list.
+ Address SrcElementPtrAddr =
+ Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
+ llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
+ SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ SrcElementAddr =
+ Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+
+ // Step 1.2: Get the address for dest element:
+ // address = base + index * ElementSizeInChars.
+ unsigned ElementSizeInChars =
+ C.getTypeSizeInChars(Private->getType()).getQuantity();
+ auto *CurrentOffset =
+ Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
+ ScratchpadIndex);
+ auto *ScratchPadElemAbsolutePtrVal =
+ Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
+ ScratchPadElemAbsolutePtrVal =
+ Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+ Address ScratchpadPtr =
+ Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
+ DestElementAddr = Bld.CreateElementBitCast(
+ ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType()));
+ IncrScratchpadDest = true;
+ break;
+ }
+ case ScratchpadToThread: {
+ // Step 1.1: Get the address for the src element in the scratchpad.
+ // address = base + index * ElementSizeInChars.
+ unsigned ElementSizeInChars =
+ C.getTypeSizeInChars(Private->getType()).getQuantity();
+ auto *CurrentOffset =
+ Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
+ ScratchpadIndex);
+ auto *ScratchPadElemAbsolutePtrVal =
+ Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset);
+ ScratchPadElemAbsolutePtrVal =
+ Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+ SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
+ IncrScratchpadSrc = true;
+
+ // Step 1.2: Create a temporary to store the element in the destination
+ // Reduce list.
+ DestElementPtrAddr =
+ Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
+ DestElementAddr =
+ CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+ UpdateDestListPtr = true;
+ break;
+ }
+ }
+
+ // Regardless of src and dest of copy, we emit the load of src
+ // element as this is required in all directions
+ SrcElementAddr = Bld.CreateElementBitCast(
+ SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
+ llvm::Value *Elem =
+ CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
+ Private->getType(), SourceLocation());
+
+ // Now that all active lanes have read the element in the
+ // Reduce list, shuffle over the value from the remote lane.
+ if (ShuffleInElement) {
+ Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem,
+ RemoteLaneOffset);
+ }
+
+ // Store the source element value to the dest element address.
+ CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
+ Private->getType());
+
+ // Step 3.1: Modify reference in dest Reduce list as needed.
+ // Modifying the reference in Reduce list to point to the newly
+ // created element. The element is live in the current function
+ // scope and that of functions it invokes (i.e., reduce_function).
+ // RemoteReduceData[i] = (void*)&RemoteElem
+ if (UpdateDestListPtr) {
+ CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast(
+ DestElementAddr.getPointer(), CGF.VoidPtrTy),
+ DestElementPtrAddr, /*Volatile=*/false,
+ C.VoidPtrTy);
+ }
+
+ // Step 4.1: Increment SrcBase/DestBase so that it points to the starting
+ // address of the next element in scratchpad memory, unless we're currently
+ // processing the last one. Memory alignment is also taken care of here.
+ if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
+ llvm::Value *ScratchpadBasePtr =
+ IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
+ unsigned ElementSizeInChars =
+ C.getTypeSizeInChars(Private->getType()).getQuantity();
+ ScratchpadBasePtr = Bld.CreateAdd(
+ ScratchpadBasePtr,
+ Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get(
+ CGM.SizeTy, ElementSizeInChars)));
+
+ // Take care of global memory alignment for performance
+ ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateSDiv(
+ ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+ ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateMul(
+ ScratchpadBasePtr,
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+
+ if (IncrScratchpadDest)
+ DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+ else /* IncrScratchpadSrc = true */
+ SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+ }
+
+ Idx++;
+ }
+}
+
+/// This function emits a helper that loads data from the scratchpad array
+/// and (optionally) reduces it with the input operand.
+///
+/// load_and_reduce(local, scratchpad, index, width, should_reduce)
+/// reduce_data remote;
+/// for elem in remote:
+/// remote.elem = Scratchpad[elem_id][index]
+/// if (should_reduce)
+/// local = local @ remote
+/// else
+/// local = remote
+static llvm::Value *
+emitReduceScratchpadFunction(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy, llvm::Value *ReduceFn) {
+ auto &C = CGM.getContext();
+ auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+
+ // Destination of the copy.
+ ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ // Base address of the scratchpad array, with each element storing a
+ // Reduce list per team.
+ ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ // A source index into the scratchpad array.
+ ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+ // Row width of an element in the scratchpad array, typically
+ // the number of teams.
+ ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+ // If should_reduce == 1, then it's load AND reduce,
+ // If should_reduce == 0 (or otherwise), then it only loads (+ copy).
+ // The latter case is used for initialization.
+ ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other);
+
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&ScratchPadArg);
+ Args.push_back(&IndexArg);
+ Args.push_back(&WidthArg);
+ Args.push_back(&ShouldReduceArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_load_and_reduce", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ // Get local Reduce list pointer.
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address ReduceListAddr(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
+ llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
+ AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+
+ Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
+ llvm::Value *IndexVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGM.SizeTy, /*isSigned=*/true);
+
+ Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
+ llvm::Value *WidthVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGM.SizeTy, /*isSigned=*/true);
+
+ Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg);
+ llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar(
+ AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, SourceLocation());
+
+ // The absolute ptr address to the base addr of the next element to copy.
+ llvm::Value *CumulativeElemBasePtr =
+ Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
+ Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
+
+ // Create a Remote Reduce list to store the elements read from the
+ // scratchpad array.
+ Address RemoteReduceList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list");
+
+ // Assemble remote Reduce list from scratchpad array.
+ emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates,
+ SrcDataAddr, RemoteReduceList,
+ {/*RemoteLaneOffset=*/nullptr,
+ /*ScratchpadIndex=*/IndexVal,
+ /*ScratchpadWidth=*/WidthVal});
+
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+
+ auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1));
+ Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
+
+ CGF.EmitBlock(ThenBB);
+ // We should reduce with the local Reduce list.
+ // reduce_function(LocalReduceList, RemoteReduceList)
+ llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ ReduceListAddr.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ RemoteReduceList.getPointer(), CGF.VoidPtrTy);
+ CGF.EmitCallOrInvoke(ReduceFn, {LocalDataPtr, RemoteDataPtr});
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ // No reduction; just copy:
+ // Local Reduce list = Remote Reduce list.
+ emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
+ RemoteReduceList, ReduceListAddr);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// This function emits a helper that stores reduced data from the team
+/// master to a scratchpad array in global memory.
+///
+/// for elem in Reduce List:
+/// scratchpad[elem_id][index] = elem
+///
+static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy) {
+
+ auto &C = CGM.getContext();
+ auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+
+ // Source of the copy.
+ ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ // Base address of the scratchpad array, with each element storing a
+ // Reduce list per team.
+ ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ // A destination index into the scratchpad array, typically the team
+ // identifier.
+ ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other);
+ // Row width of an element in the scratchpad array, typically
+ // the number of teams.
+ ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other);
+
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&ScratchPadArg);
+ Args.push_back(&IndexArg);
+ Args.push_back(&WidthArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_copy_to_scratchpad", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address SrcDataAddr(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
+ llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
+ AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+
+ Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
+ llvm::Value *IndexVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGF.SizeTy, /*isSigned=*/true);
+
+ Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
+ llvm::Value *WidthVal =
+ Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
+ Int32Ty, SourceLocation()),
+ CGF.SizeTy, /*isSigned=*/true);
+
+ // The absolute ptr address to the base addr of the next element to copy.
+ llvm::Value *CumulativeElemBasePtr =
+ Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
+ Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
+
+ emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates,
+ SrcDataAddr, DestDataAddr,
+ {/*RemoteLaneOffset=*/nullptr,
+ /*ScratchpadIndex=*/IndexVal,
+ /*ScratchpadWidth=*/WidthVal});
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// This function emits a helper that gathers Reduce lists from the first
+/// lane of every active warp to lanes in the first warp.
+///
+/// void inter_warp_copy_func(void* reduce_data, num_warps)
+/// shared smem[warp_size];
+/// For all data entries D in reduce_data:
+/// If (I am the first lane in each warp)
+/// Copy my local D to smem[warp_id]
+/// sync
+/// if (I am the first warp)
+/// Copy smem[thread_id] to my local D
+/// sync
+static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy) {
+ auto &C = CGM.getContext();
+ auto &M = CGM.getModule();
+
+ // ReduceList: thread local Reduce list.
+ // At the stage of the computation when this function is called, partially
+ // aggregated values reside in the first lane of every active warp.
+ ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ // NumWarps: number of warps active in the parallel region. This could
+ // be smaller than 32 (max warps in a CTA) for partial block reduction.
+ ImplicitParamDecl NumWarpsArg(C,
+ C.getIntTypeForBitwidth(32, /* Signed */ true),
+ ImplicitParamDecl::Other);
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&NumWarpsArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_inter_warp_copy_func", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*DC=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ // This array is used as a medium to transfer, one reduce element at a time,
+ // the data from the first lane of every warp to lanes in the first warp
+ // in order to perform the final step of a reduction in a parallel region
+ // (reduction across warps). The array is placed in NVPTX __shared__ memory
+ // for reduced latency, as well as to have a distinct copy for concurrently
+ // executing target regions. The array is declared with common linkage so
+ // as to be shared across compilation units.
+ const char *TransferMediumName =
+ "__openmp_nvptx_data_transfer_temporary_storage";
+ llvm::GlobalVariable *TransferMedium =
+ M.getGlobalVariable(TransferMediumName);
+ if (!TransferMedium) {
+ auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize);
+ unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
+ TransferMedium = new llvm::GlobalVariable(
+ M, Ty,
+ /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
+ llvm::Constant::getNullValue(Ty), TransferMediumName,
+ /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+ SharedAddressSpace);
+ }
+
+ // Get the CUDA thread id of the current OpenMP thread on the GPU.
+ auto *ThreadID = getNVPTXThreadID(CGF);
+ // nvptx_lane_id = nvptx_id % warpsize
+ auto *LaneID = getNVPTXLaneID(CGF);
+ // nvptx_warp_id = nvptx_id / warpsize
+ auto *WarpID = getNVPTXWarpID(CGF);
+
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address LocalReduceList(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ unsigned Idx = 0;
+ for (auto &Private : Privates) {
+ //
+ // Warp master copies reduce element to transfer medium in __shared__
+ // memory.
+ //
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+
+ // if (lane_id == 0)
+ auto IsWarpMaster =
+ Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master");
+ Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
+ CGF.EmitBlock(ThenBB);
+
+ // Reduce element = LocalReduceList[i]
+ Address ElemPtrPtrAddr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+ ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ // elemptr = (type[i]*)(elemptrptr)
+ Address ElemPtr =
+ Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ ElemPtr = Bld.CreateElementBitCast(
+ ElemPtr, CGF.ConvertTypeForMem(Private->getType()));
+ // elem = *elemptr
+ llvm::Value *Elem = CGF.EmitLoadOfScalar(
+ ElemPtr, /*Volatile=*/false, Private->getType(), SourceLocation());
+
+ // Get pointer to location in transfer medium.
+ // MediumPtr = &medium[warp_id]
+ llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
+ Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType()));
+ // Casting to actual data type.
+ // MediumPtr = (type[i]*)MediumPtrAddr;
+ MediumPtr = Bld.CreateElementBitCast(
+ MediumPtr, CGF.ConvertTypeForMem(Private->getType()));
+
+ //*MediumPtr = elem
+ Bld.CreateStore(Elem, MediumPtr);
+
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
+ llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
+ AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation());
+
+ auto *NumActiveThreads = Bld.CreateNSWMul(
+ NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads");
+ // named_barrier_sync(ParallelBarrierID, num_active_threads)
+ syncParallelThreads(CGF, NumActiveThreads);
+
+ //
+ // Warp 0 copies reduce element from transfer medium.
+ //
+ llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
+
+ // Up to 32 threads in warp 0 are active.
+ auto IsActiveThread =
+ Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
+ Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
+
+ CGF.EmitBlock(W0ThenBB);
+
+ // SrcMediumPtr = &medium[tid]
+ llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
+ Address SrcMediumPtr(SrcMediumPtrVal,
+ C.getTypeAlignInChars(Private->getType()));
+ // SrcMediumVal = *SrcMediumPtr;
+ SrcMediumPtr = Bld.CreateElementBitCast(
+ SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType()));
+ llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar(
+ SrcMediumPtr, /*Volatile=*/false, Private->getType(), SourceLocation());
+
+ // TargetElemPtr = (type[i]*)(SrcDataAddr[i])
+ Address TargetElemPtrPtr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
+ TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ Address TargetElemPtr =
+ Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType()));
+ TargetElemPtr = Bld.CreateElementBitCast(
+ TargetElemPtr, CGF.ConvertTypeForMem(Private->getType()));
+
+ // *TargetElemPtr = SrcMediumVal;
+ CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
+ Private->getType());
+ Bld.CreateBr(W0MergeBB);
+
+ CGF.EmitBlock(W0ElseBB);
+ Bld.CreateBr(W0MergeBB);
+
+ CGF.EmitBlock(W0MergeBB);
+
+ // While warp 0 copies values from transfer medium, all other warps must
+ // wait.
+ syncParallelThreads(CGF, NumActiveThreads);
+ Idx++;
+ }
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emit a helper that reduces data across two OpenMP threads (lanes)
+/// in the same warp. It uses shuffle instructions to copy over data from
+/// a remote lane's stack. The reduction algorithm performed is specified
+/// by the fourth parameter.
+///
+/// Algorithm Versions.
+/// Full Warp Reduce (argument value 0):
+/// This algorithm assumes that all 32 lanes are active and gathers
+/// data from these 32 lanes, producing a single resultant value.
+/// Contiguous Partial Warp Reduce (argument value 1):
+/// This algorithm assumes that only a *contiguous* subset of lanes
+/// are active. This happens for the last warp in a parallel region
+/// when the user specified num_threads is not an integer multiple of
+/// 32. This contiguous subset always starts with the zeroth lane.
+/// Partial Warp Reduce (argument value 2):
+/// This algorithm gathers data from any number of lanes at any position.
+/// All reduced values are stored in the lowest possible lane. The set
+/// of problems every algorithm addresses is a super set of those
+/// addressable by algorithms with a lower version number. Overhead
+/// increases as algorithm version increases.
+///
+/// Terminology
+/// Reduce element:
+/// Reduce element refers to the individual data field with primitive
+/// data types to be combined and reduced across threads.
+/// Reduce list:
+/// Reduce list refers to a collection of local, thread-private
+/// reduce elements.
+/// Remote Reduce list:
+/// Remote Reduce list refers to a collection of remote (relative to
+/// the current thread) reduce elements.
+///
+/// We distinguish between three states of threads that are important to
+/// the implementation of this function.
+/// Alive threads:
+/// Threads in a warp executing the SIMT instruction, as distinguished from
+/// threads that are inactive due to divergent control flow.
+/// Active threads:
+/// The minimal set of threads that has to be alive upon entry to this
+/// function. The computation is correct iff active threads are alive.
+/// Some threads are alive but they are not active because they do not
+/// contribute to the computation in any useful manner. Turning them off
+/// may introduce control flow overheads without any tangible benefits.
+/// Effective threads:
+/// In order to comply with the argument requirements of the shuffle
+/// function, we must keep all lanes holding data alive. But at most
+/// half of them perform value aggregation; we refer to this half of
+/// threads as effective. The other half is simply handing off their
+/// data.
+///
+/// Procedure
+/// Value shuffle:
+/// In this step active threads transfer data from higher lane positions
+/// in the warp to lower lane positions, creating Remote Reduce list.
+/// Value aggregation:
+/// In this step, effective threads combine their thread local Reduce list
+/// with Remote Reduce list and store the result in the thread local
+/// Reduce list.
+/// Value copy:
+/// In this step, we deal with the assumption made by algorithm 2
+/// (i.e. contiguity assumption). When we have an odd number of lanes
+/// active, say 2k+1, only k threads will be effective and therefore k
+/// new values will be produced. However, the Reduce list owned by the
+/// (2k+1)th thread is ignored in the value aggregation. Therefore
+/// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
+/// that the contiguity assumption still holds.
+static llvm::Value *
+emitShuffleAndReduceFunction(CodeGenModule &CGM,
+ ArrayRef<const Expr *> Privates,
+ QualType ReductionArrayTy, llvm::Value *ReduceFn) {
+ auto &C = CGM.getContext();
+
+ // Thread local Reduce list used to host the values of data to be reduced.
+ ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ // Current lane id; could be logical.
+ ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other);
+ // Offset of the remote source lane relative to the current lane.
+ ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy,
+ ImplicitParamDecl::Other);
+ // Algorithm version. This is expected to be known at compile time.
+ ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other);
+ FunctionArgList Args;
+ Args.push_back(&ReduceListArg);
+ Args.push_back(&LaneIDArg);
+ Args.push_back(&RemoteLaneOffsetArg);
+ Args.push_back(&AlgoVerArg);
+
+ auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
+ CodeGenFunction CGF(CGM);
+ // We don't need debug information in this function as nothing here refers to
+ // user code.
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ auto &Bld = CGF.Builder;
+
+ Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+ Address LocalReduceList(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+ C.VoidPtrTy, SourceLocation()),
+ CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+ CGF.getPointerAlign());
+
+ Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg);
+ llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar(
+ AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+ Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg);
+ llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar(
+ AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+ Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg);
+ llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar(
+ AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+ // Create a local thread-private variable to host the Reduce list
+ // from a remote lane.
+ Address RemoteReduceList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list");
+
+ // This loop iterates through the list of reduce elements and copies,
+ // element by element, from a remote lane in the warp to RemoteReduceList,
+ // hosted on the thread's stack.
+ emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates,
+ LocalReduceList, RemoteReduceList,
+ {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal,
+ /*ScratchpadIndex=*/nullptr,
+ /*ScratchpadWidth=*/nullptr});
+
+ // The actions to be performed on the Remote Reduce list is dependent
+ // on the algorithm version.
+ //
+ // if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
+ // LaneId % 2 == 0 && Offset > 0):
+ // do the reduction value aggregation
+ //
+ // The thread local variable Reduce list is mutated in place to host the
+ // reduced data, which is the aggregated value produced from local and
+ // remote lanes.
+ //
+ // Note that AlgoVer is expected to be a constant integer known at compile
+ // time.
+ // When AlgoVer==0, the first conjunction evaluates to true, making
+ // the entire predicate true during compile time.
+ // When AlgoVer==1, the second conjunction has only the second part to be
+ // evaluated during runtime. Other conjunctions evaluates to false
+ // during compile time.
+ // When AlgoVer==2, the third conjunction has only the second part to be
+ // evaluated during runtime. Other conjunctions evaluates to false
+ // during compile time.
+ auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0));
+
+ auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+ auto CondAlgo1 = Bld.CreateAnd(
+ Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+ auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
+ auto CondAlgo2 = Bld.CreateAnd(
+ Algo2,
+ Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)),
+ Bld.getInt16(0)));
+ CondAlgo2 = Bld.CreateAnd(
+ CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
+
+ auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
+ CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
+
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+ Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
+
+ CGF.EmitBlock(ThenBB);
+ // reduce_function(LocalReduceList, RemoteReduceList)
+ llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ LocalReduceList.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ RemoteReduceList.getPointer(), CGF.VoidPtrTy);
+ CGF.EmitCallOrInvoke(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
+ // Reduce list.
+ Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+ auto CondCopy = Bld.CreateAnd(
+ Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+ llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont");
+ Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
+
+ CGF.EmitBlock(CpyThenBB);
+ emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
+ RemoteReduceList, LocalReduceList);
+ Bld.CreateBr(CpyMergeBB);
+
+ CGF.EmitBlock(CpyElseBB);
+ Bld.CreateBr(CpyMergeBB);
+
+ CGF.EmitBlock(CpyMergeBB);
+
+ CGF.FinishFunction();
+ return Fn;
+}
+
+///
+/// Design of OpenMP reductions on the GPU
+///
+/// Consider a typical OpenMP program with one or more reduction
+/// clauses:
+///
+/// float foo;
+/// double bar;
+/// #pragma omp target teams distribute parallel for \
+/// reduction(+:foo) reduction(*:bar)
+/// for (int i = 0; i < N; i++) {
+/// foo += A[i]; bar *= B[i];
+/// }
+///
+/// where 'foo' and 'bar' are reduced across all OpenMP threads in
+/// all teams. In our OpenMP implementation on the NVPTX device an
+/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
+/// within a team are mapped to CUDA threads within a threadblock.
+/// Our goal is to efficiently aggregate values across all OpenMP
+/// threads such that:
+///
+/// - the compiler and runtime are logically concise, and
+/// - the reduction is performed efficiently in a hierarchical
+/// manner as follows: within OpenMP threads in the same warp,
+/// across warps in a threadblock, and finally across teams on
+/// the NVPTX device.
+///
+/// Introduction to Decoupling
+///
+/// We would like to decouple the compiler and the runtime so that the
+/// latter is ignorant of the reduction variables (number, data types)
+/// and the reduction operators. This allows a simpler interface
+/// and implementation while still attaining good performance.
+///
+/// Pseudocode for the aforementioned OpenMP program generated by the
+/// compiler is as follows:
+///
+/// 1. Create private copies of reduction variables on each OpenMP
+/// thread: 'foo_private', 'bar_private'
+/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
+/// to it and writes the result in 'foo_private' and 'bar_private'
+/// respectively.
+/// 3. Call the OpenMP runtime on the GPU to reduce within a team
+/// and store the result on the team master:
+///
+/// __kmpc_nvptx_parallel_reduce_nowait(...,
+/// reduceData, shuffleReduceFn, interWarpCpyFn)
+///
+/// where:
+/// struct ReduceData {
+/// double *foo;
+/// double *bar;
+/// } reduceData
+/// reduceData.foo = &foo_private
+/// reduceData.bar = &bar_private
+///
+/// 'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
+/// auxiliary functions generated by the compiler that operate on
+/// variables of type 'ReduceData'. They aid the runtime perform
+/// algorithmic steps in a data agnostic manner.
+///
+/// 'shuffleReduceFn' is a pointer to a function that reduces data
+/// of type 'ReduceData' across two OpenMP threads (lanes) in the
+/// same warp. It takes the following arguments as input:
+///
+/// a. variable of type 'ReduceData' on the calling lane,
+/// b. its lane_id,
+/// c. an offset relative to the current lane_id to generate a
+/// remote_lane_id. The remote lane contains the second
+/// variable of type 'ReduceData' that is to be reduced.
+/// d. an algorithm version parameter determining which reduction
+/// algorithm to use.
+///
+/// 'shuffleReduceFn' retrieves data from the remote lane using
+/// efficient GPU shuffle intrinsics and reduces, using the
+/// algorithm specified by the 4th parameter, the two operands
+/// element-wise. The result is written to the first operand.
+///
+/// Different reduction algorithms are implemented in different
+/// runtime functions, all calling 'shuffleReduceFn' to perform
+/// the essential reduction step. Therefore, based on the 4th
+/// parameter, this function behaves slightly differently to
+/// cooperate with the runtime to ensure correctness under
+/// different circumstances.
+///
+/// 'InterWarpCpyFn' is a pointer to a function that transfers
+/// reduced variables across warps. It tunnels, through CUDA
+/// shared memory, the thread-private data of type 'ReduceData'
+/// from lane 0 of each warp to a lane in the first warp.
+/// 4. Call the OpenMP runtime on the GPU to reduce across teams.
+/// The last team writes the global reduced value to memory.
+///
+/// ret = __kmpc_nvptx_teams_reduce_nowait(...,
+/// reduceData, shuffleReduceFn, interWarpCpyFn,
+/// scratchpadCopyFn, loadAndReduceFn)
+///
+/// 'scratchpadCopyFn' is a helper that stores reduced
+/// data from the team master to a scratchpad array in
+/// global memory.
+///
+/// 'loadAndReduceFn' is a helper that loads data from
+/// the scratchpad array and reduces it with the input
+/// operand.
+///
+/// These compiler generated functions hide address
+/// calculation and alignment information from the runtime.
+/// 5. if ret == 1:
+/// The team master of the last team stores the reduced
+/// result to the globals in memory.
+/// foo += reduceData.foo; bar *= reduceData.bar
+///
+///
+/// Warp Reduction Algorithms
+///
+/// On the warp level, we have three algorithms implemented in the
+/// OpenMP runtime depending on the number of active lanes:
+///
+/// Full Warp Reduction
+///
+/// The reduce algorithm within a warp where all lanes are active
+/// is implemented in the runtime as follows:
+///
+/// full_warp_reduce(void *reduce_data,
+/// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+/// for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
+/// ShuffleReduceFn(reduce_data, 0, offset, 0);
+/// }
+///
+/// The algorithm completes in log(2, WARPSIZE) steps.
+///
+/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
+/// not used therefore we save instructions by not retrieving lane_id
+/// from the corresponding special registers. The 4th parameter, which
+/// represents the version of the algorithm being used, is set to 0 to
+/// signify full warp reduction.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// #reduce_elem refers to an element in the local lane's data structure
+/// #remote_elem is retrieved from a remote lane
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem;
+///
+/// Contiguous Partial Warp Reduction
+///
+/// This reduce algorithm is used within a warp where only the first
+/// 'n' (n <= WARPSIZE) lanes are active. It is typically used when the
+/// number of OpenMP threads in a parallel region is not a multiple of
+/// WARPSIZE. The algorithm is implemented in the runtime as follows:
+///
+/// void
+/// contiguous_partial_reduce(void *reduce_data,
+/// kmp_ShuffleReductFctPtr ShuffleReduceFn,
+/// int size, int lane_id) {
+/// int curr_size;
+/// int offset;
+/// curr_size = size;
+/// mask = curr_size/2;
+/// while (offset>0) {
+/// ShuffleReduceFn(reduce_data, lane_id, offset, 1);
+/// curr_size = (curr_size+1)/2;
+/// offset = curr_size/2;
+/// }
+/// }
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id < offset)
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+/// reduce_elem = remote_elem
+///
+/// This algorithm assumes that the data to be reduced are located in a
+/// contiguous subset of lanes starting from the first. When there is
+/// an odd number of active lanes, the data in the last lane is not
+/// aggregated with any other lane's dat but is instead copied over.
+///
+/// Dispersed Partial Warp Reduction
+///
+/// This algorithm is used within a warp when any discontiguous subset of
+/// lanes are active. It is used to implement the reduction operation
+/// across lanes in an OpenMP simd region or in a nested parallel region.
+///
+/// void
+/// dispersed_partial_reduce(void *reduce_data,
+/// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+/// int size, remote_id;
+/// int logical_lane_id = number_of_active_lanes_before_me() * 2;
+/// do {
+/// remote_id = next_active_lane_id_right_after_me();
+/// # the above function returns 0 of no active lane
+/// # is present right after the current lane.
+/// size = number_of_active_lanes_in_this_warp();
+/// logical_lane_id /= 2;
+/// ShuffleReduceFn(reduce_data, logical_lane_id,
+/// remote_id-1-threadIdx.x, 2);
+/// } while (logical_lane_id % 2 == 0 && size > 1);
+/// }
+///
+/// There is no assumption made about the initial state of the reduction.
+/// Any number of lanes (>=1) could be active at any position. The reduction
+/// result is returned in the first active lane.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id % 2 == 0 && offset > 0)
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+/// reduce_elem = remote_elem
+///
+///
+/// Intra-Team Reduction
+///
+/// This function, as implemented in the runtime call
+/// '__kmpc_nvptx_parallel_reduce_nowait', aggregates data across OpenMP
+/// threads in a team. It first reduces within a warp using the
+/// aforementioned algorithms. We then proceed to gather all such
+/// reduced values at the first warp.
+///
+/// The runtime makes use of the function 'InterWarpCpyFn', which copies
+/// data from each of the "warp master" (zeroth lane of each warp, where
+/// warp-reduced data is held) to the zeroth warp. This step reduces (in
+/// a mathematical sense) the problem of reduction across warp masters in
+/// a block to the problem of warp reduction.
+///
+///
+/// Inter-Team Reduction
+///
+/// Once a team has reduced its data to a single value, it is stored in
+/// a global scratchpad array. Since each team has a distinct slot, this
+/// can be done without locking.
+///
+/// The last team to write to the scratchpad array proceeds to reduce the
+/// scratchpad array. One or more workers in the last team use the helper
+/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
+/// the k'th worker reduces every k'th element.
+///
+/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait' to
+/// reduce across workers and compute a globally reduced value.
+///
+void CGOpenMPRuntimeNVPTX::emitReduction(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
+ if (!CGF.HaveInsertPoint())
+ return;
+
+ bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
+ bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
+ // FIXME: Add support for simd reduction.
+ assert((TeamsReduction || ParallelReduction) &&
+ "Invalid reduction selection in emitReduction.");
+
+ auto &C = CGM.getContext();
+
+ // 1. Build a list of reduction variables.
+ // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+ auto Size = RHSExprs.size();
+ for (auto *E : Privates) {
+ if (E->getType()->isVariablyModifiedType())
+ // Reserve place for array size.
+ ++Size;
+ }
+ llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
+ QualType ReductionArrayTy =
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ Address ReductionList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+ auto IPriv = Privates.begin();
+ unsigned Idx = 0;
+ for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
+ Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+ Elem);
+ if ((*IPriv)->getType()->isVariablyModifiedType()) {
+ // Store array size.
+ ++Idx;
+ Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ llvm::Value *Size = CGF.Builder.CreateIntCast(
+ CGF.getVLASize(
+ CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+ .first,
+ CGF.SizeTy, /*isSigned=*/false);
+ CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+ Elem);
+ }
+ }
+
+ // 2. Emit reduce_func().
+ auto *ReductionFn = emitReductionFunction(
+ CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
+ LHSExprs, RHSExprs, ReductionOps);
+
+ // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
+ // RedList, shuffle_reduce_func, interwarp_copy_func);
+ auto *ThreadId = getThreadID(CGF, Loc);
+ auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ ReductionList.getPointer(), CGF.VoidPtrTy);
+
+ auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
+ CGM, Privates, ReductionArrayTy, ReductionFn);
+ auto *InterWarpCopyFn =
+ emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy);
+
+ llvm::Value *Res = nullptr;
+ if (ParallelReduction) {
+ llvm::Value *Args[] = {ThreadId,
+ CGF.Builder.getInt32(RHSExprs.size()),
+ ReductionArrayTySize,
+ RL,
+ ShuffleAndReduceFn,
+ InterWarpCopyFn};
+
+ Res = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait),
+ Args);
+ }
+
+ if (TeamsReduction) {
+ auto *ScratchPadCopyFn =
+ emitCopyToScratchpad(CGM, Privates, ReductionArrayTy);
+ auto *LoadAndReduceFn = emitReduceScratchpadFunction(
+ CGM, Privates, ReductionArrayTy, ReductionFn);
+
+ llvm::Value *Args[] = {ThreadId,
+ CGF.Builder.getInt32(RHSExprs.size()),
+ ReductionArrayTySize,
+ RL,
+ ShuffleAndReduceFn,
+ InterWarpCopyFn,
+ ScratchPadCopyFn,
+ LoadAndReduceFn};
+ Res = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait),
+ Args);
+ }
+
+ // 5. Build switch(res)
+ auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
+ auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
+
+ // 6. Build case 1: where we have reduced values in the master
+ // thread in each team.
+ // __kmpc_end_reduce{_nowait}(<gtid>);
+ // break;
+ auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
+ SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
+ CGF.EmitBlock(Case1BB);
+
+ // Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
+ llvm::Value *EndArgs[] = {ThreadId};
+ auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps,
+ this](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto IPriv = Privates.begin();
+ auto ILHS = LHSExprs.begin();
+ auto IRHS = RHSExprs.begin();
+ for (auto *E : ReductionOps) {
+ emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+ cast<DeclRefExpr>(*IRHS));
+ ++IPriv;
+ ++ILHS;
+ ++IRHS;
+ }
+ };
+ RegionCodeGenTy RCG(CodeGen);
+ NVPTXActionTy Action(
+ nullptr, llvm::None,
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+ EndArgs);
+ RCG.setAction(Action);
+ RCG(CGF);
+ CGF.EmitBranch(DefaultBB);
+ CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 4010b46..ae25e94 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -43,6 +43,8 @@ private:
void createWorkerFunction(CodeGenModule &CGM);
};
+ bool isInSpmdExecutionMode() const;
+
/// \brief Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
@@ -58,11 +60,12 @@ private:
/// function.
void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
- /// \brief Returns specified OpenMP runtime function for the current OpenMP
- /// implementation. Specialized for the NVPTX device.
- /// \param Function OpenMP runtime function.
- /// \return Specified function.
- llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
+ /// \brief Helper for Spmd mode target directive's entry function.
+ void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+ const OMPExecutableDirective &D);
+
+ /// \brief Signal termination of Spmd mode execution.
+ void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
//
// Base class overrides.
@@ -87,6 +90,22 @@ private:
llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
+ /// \brief Emit outlined function specialized for the Single Program
+ /// Multiple Data programming model for applicable target directives on the
+ /// NVPTX device.
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// \param CodeGen Object containing the target statements.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen);
+
/// \brief Emit outlined function for 'target' directive on the NVPTX
/// device.
/// \param D Directive to emit.
@@ -118,6 +137,22 @@ private:
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond);
+ /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
+ /// This call is for a parallel directive within an SPMD target directive.
+ /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+ /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+ /// \param CapturedVars A pointer to the record with the references to
+ /// variables used in \a OutlinedFn function.
+ /// \param IfCond Condition in the associated 'if' clause, if it was
+ /// specified, nullptr otherwise.
+ ///
+ void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars,
+ const Expr *IfCond);
+
protected:
/// \brief Get the function name of an outlined region.
// The name can be customized depending on the target.
@@ -129,6 +164,20 @@ protected:
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+ /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+ /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+ virtual void emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) override;
+
+ /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+ /// clause.
+ /// \param NumThreads An integer value of threads.
+ virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+ llvm::Value *NumThreads,
+ SourceLocation Loc) override;
+
/// \brief This function ought to emit, in the general case, a call to
// the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
// as these numbers are obtained through the PTX grid and block configuration.
@@ -138,7 +187,22 @@ public:
const Expr *ThreadLimit, SourceLocation Loc) override;
/// \brief Emits inlined function for the specified OpenMP parallel
- // directive but an inlined function for teams.
+ // directive.
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ llvm::Value *
+ emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// \brief Emits inlined function for the specified OpenMP teams
+ // directive.
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
/// \param D OpenMP directive.
@@ -147,10 +211,10 @@ public:
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
llvm::Value *
- emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
- const VarDecl *ThreadIDVar,
- OpenMPDirectiveKind InnermostKind,
- const RegionCodeGenTy &CodeGen) override;
+ emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
/// \brief Emits code for teams call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
@@ -177,6 +241,50 @@ public:
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) override;
+
+ /// Emit a code for reduction clause.
+ ///
+ /// \param Privates List of private copies for original reduction arguments.
+ /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+ /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+ /// or 'operator binop(LHS, RHS)'.
+ /// \param Options List of options for reduction codegen:
+ /// WithNowait true if parent directive has also nowait clause, false
+ /// otherwise.
+ /// SimpleReduction Emit reduction operation only. Used for omp simd
+ /// directive on the host.
+ /// ReductionKind The kind of reduction to perform.
+ virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps,
+ ReductionOptionsTy Options) override;
+
+ /// Returns specified OpenMP runtime function for the current OpenMP
+ /// implementation. Specialized for the NVPTX device.
+ /// \param Function OpenMP runtime function.
+ /// \return Specified function.
+ llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
+
+ /// Target codegen is specialized based on two programming models: the
+ /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd'
+ /// model for constructs like 'target parallel' that support it.
+ enum ExecutionMode {
+ /// Single Program Multiple Data.
+ Spmd,
+ /// Generic codegen to support fork-join model.
+ Generic,
+ Unknown,
+ };
+
+private:
+ // Track the execution mode when codegening directives within a target
+ // region. The appropriate mode (generic/spmd) is set on entry to the
+ // target region and used by containing directives such as 'parallel'
+ // to emit optimized code.
+ ExecutionMode CurrentExecutionMode;
};
} // CodeGen namespace.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
index 8370607..a13c386 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -145,7 +145,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
EmitCoroutineBody(cast<CoroutineBodyStmt>(*S));
break;
case Stmt::CoreturnStmtClass:
- CGM.ErrorUnsupported(S, "coroutine");
+ EmitCoreturnStmt(cast<CoreturnStmt>(*S));
break;
case Stmt::CapturedStmtClass: {
const CapturedStmt *CS = cast<CapturedStmt>(S);
@@ -1024,6 +1024,18 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
/// if the function returns void, or may be missing one if the function returns
/// non-void. Fun stuff :).
void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
+ if (requiresReturnValueCheck()) {
+ llvm::Constant *SLoc = EmitCheckSourceLocation(S.getLocStart());
+ auto *SLocPtr =
+ new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false,
+ llvm::GlobalVariable::PrivateLinkage, SLoc);
+ SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr);
+ assert(ReturnLocation.isValid() && "No valid return location");
+ Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy),
+ ReturnLocation);
+ }
+
// Returning from an outlined SEH helper is UB, and we already warn on it.
if (IsOutlinedSEHHelper) {
Builder.CreateUnreachable();
@@ -1166,7 +1178,7 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
if (Rem)
Rem--;
SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
- LHS++;
+ ++LHS;
}
return;
}
@@ -2127,16 +2139,16 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
/* IsAlignStack */ false, AsmDialect);
llvm::CallInst *Result = Builder.CreateCall(IA, Args);
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoUnwind);
// Attach readnone and readonly attributes.
if (!HasSideEffect) {
if (ReadNone)
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::ReadNone);
else if (ReadOnly)
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::ReadOnly);
}
@@ -2157,7 +2169,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Conservatively, mark all inline asm blocks in CUDA as convergent
// (meaning, they may call an intrinsically convergent op, such as bar.sync,
// and so can't have certain optimizations applied around them).
- Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ Result->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::Convergent);
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 39e1cdf..cf430f8 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -26,7 +26,7 @@ using namespace CodeGen;
namespace {
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
/// for captured expressions.
-class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
+class OMPLexicalScope : public CodeGenFunction::LexicalScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
for (const auto *C : S.clauses()) {
if (auto *CPI = OMPClauseWithPreInit::get(C)) {
@@ -54,10 +54,11 @@ class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
public:
OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
- bool AsInlined = false)
+ bool AsInlined = false, bool EmitPreInitStmt = true)
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
InlinedShareds(CGF) {
- emitPreInitStmt(CGF, S);
+ if (EmitPreInitStmt)
+ emitPreInitStmt(CGF, S);
if (AsInlined) {
if (S.hasAssociatedStmt()) {
auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
@@ -81,6 +82,39 @@ public:
}
};
+/// Lexical scope for OpenMP parallel construct, that handles correct codegen
+/// for captured expressions.
+class OMPParallelScope final : public OMPLexicalScope {
+ bool EmitPreInitStmt(const OMPExecutableDirective &S) {
+ OpenMPDirectiveKind Kind = S.getDirectiveKind();
+ return !(isOpenMPTargetExecutionDirective(Kind) ||
+ isOpenMPLoopBoundSharingDirective(Kind)) &&
+ isOpenMPParallelDirective(Kind);
+ }
+
+public:
+ OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : OMPLexicalScope(CGF, S,
+ /*AsInlined=*/false,
+ /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+};
+
+/// Lexical scope for OpenMP teams construct, that handles correct codegen
+/// for captured expressions.
+class OMPTeamsScope final : public OMPLexicalScope {
+ bool EmitPreInitStmt(const OMPExecutableDirective &S) {
+ OpenMPDirectiveKind Kind = S.getDirectiveKind();
+ return !isOpenMPTargetExecutionDirective(Kind) &&
+ isOpenMPTeamsDirective(Kind);
+ }
+
+public:
+ OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : OMPLexicalScope(CGF, S,
+ /*AsInlined=*/false,
+ /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+};
+
/// Private scope for OpenMP loop-based directives, that supports capturing
/// of used expression from loop statement.
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
@@ -194,21 +228,58 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
return TmpAddr;
}
-llvm::Function *
-CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
- assert(
- CapturedStmtInfo &&
- "CapturedStmtInfo should be set when generating the captured function");
- const CapturedDecl *CD = S.getCapturedDecl();
- const RecordDecl *RD = S.getCapturedRecordDecl();
+static QualType getCanonicalParamType(ASTContext &C, QualType T) {
+ if (T->isLValueReferenceType()) {
+ return C.getLValueReferenceType(
+ getCanonicalParamType(C, T.getNonReferenceType()),
+ /*SpelledAsLValue=*/false);
+ }
+ if (T->isPointerType())
+ return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
+ return C.getCanonicalParamType(T);
+}
+
+namespace {
+ /// Contains required data for proper outlined function codegen.
+ struct FunctionOptions {
+ /// Captured statement for which the function is generated.
+ const CapturedStmt *S = nullptr;
+ /// true if cast to/from UIntPtr is required for variables captured by
+ /// value.
+ bool UIntPtrCastRequired = true;
+ /// true if only casted argumefnts must be registered as local args or VLA
+ /// sizes.
+ bool RegisterCastedArgsOnly = false;
+ /// Name of the generated function.
+ StringRef FunctionName;
+ explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
+ bool RegisterCastedArgsOnly,
+ StringRef FunctionName)
+ : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
+ RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
+ FunctionName(FunctionName) {}
+ };
+}
+
+static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
+ CodeGenFunction &CGF, FunctionArgList &Args,
+ llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>>
+ &LocalAddrs,
+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
+ &VLASizes,
+ llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
+ const CapturedDecl *CD = FO.S->getCapturedDecl();
+ const RecordDecl *RD = FO.S->getCapturedRecordDecl();
assert(CD->hasBody() && "missing CapturedDecl body");
+ CXXThisValue = nullptr;
// Build the argument list.
+ CodeGenModule &CGM = CGF.CGM;
ASTContext &Ctx = CGM.getContext();
- FunctionArgList Args;
+ bool HasUIntPtrArgs = false;
Args.append(CD->param_begin(),
std::next(CD->param_begin(), CD->getContextParamPosition()));
- auto I = S.captures().begin();
+ auto I = FO.S->captures().begin();
for (auto *FD : RD->fields()) {
QualType ArgType = FD->getType();
IdentifierInfo *II = nullptr;
@@ -220,29 +291,26 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
// deal with pointers. We can pass in the same way the VLA type sizes to the
// outlined function.
if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
- I->capturesVariableArrayType())
- ArgType = Ctx.getUIntPtrType();
+ I->capturesVariableArrayType()) {
+ HasUIntPtrArgs = true;
+ if (FO.UIntPtrCastRequired)
+ ArgType = Ctx.getUIntPtrType();
+ }
if (I->capturesVariable() || I->capturesVariableByCopy()) {
CapVar = I->getCapturedVar();
II = CapVar->getIdentifier();
} else if (I->capturesThis())
- II = &getContext().Idents.get("this");
+ II = &Ctx.Idents.get("this");
else {
assert(I->capturesVariableArrayType());
- II = &getContext().Idents.get("vla");
- }
- if (ArgType->isVariablyModifiedType()) {
- bool IsReference = ArgType->isLValueReferenceType();
- ArgType =
- getContext().getCanonicalParamType(ArgType.getNonReferenceType());
- if (IsReference && !ArgType->isPointerType()) {
- ArgType = getContext().getLValueReferenceType(
- ArgType, /*SpelledAsLValue=*/false);
- }
+ II = &Ctx.Idents.get("vla");
}
- Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
- FD->getLocation(), II, ArgType));
+ if (ArgType->isVariablyModifiedType())
+ ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
+ Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr,
+ FD->getLocation(), II, ArgType,
+ ImplicitParamDecl::Other));
++I;
}
Args.append(
@@ -255,89 +323,166 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
- llvm::Function *F = llvm::Function::Create(
- FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
- CapturedStmtInfo->getHelperName(), &CGM.getModule());
+ llvm::Function *F =
+ llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
+ FO.FunctionName, &CGM.getModule());
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
if (CD->isNothrow())
F->addFnAttr(llvm::Attribute::NoUnwind);
// Generate the function.
- StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
- CD->getBody()->getLocStart());
+ CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+ CD->getBody()->getLocStart());
unsigned Cnt = CD->getContextParamPosition();
- I = S.captures().begin();
+ I = FO.S->captures().begin();
for (auto *FD : RD->fields()) {
// If we are capturing a pointer by copy we don't need to do anything, just
// use the value that we get from the arguments.
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
const VarDecl *CurVD = I->getCapturedVar();
- Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]);
+ Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
// If the variable is a reference we need to materialize it here.
if (CurVD->getType()->isReferenceType()) {
- Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(),
- ".materialized_ref");
- EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false,
- CurVD->getType());
+ Address RefAddr = CGF.CreateMemTemp(
+ CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
+ CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
+ /*Volatile=*/false, CurVD->getType());
LocalAddr = RefAddr;
}
- setAddrOfLocalVar(CurVD, LocalAddr);
+ if (!FO.RegisterCastedArgsOnly)
+ LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
++Cnt;
++I;
continue;
}
- LValue ArgLVal =
- MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]),
+ Args[Cnt]->getType(), BaseInfo);
if (FD->hasCapturedVLAType()) {
- LValue CastedArgLVal =
- MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
- Args[Cnt]->getName(), ArgLVal),
- FD->getType(), AlignmentSource::Decl);
+ if (FO.UIntPtrCastRequired) {
+ ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
+ Args[Cnt]->getName(),
+ ArgLVal),
+ FD->getType(), BaseInfo);
+ }
auto *ExprArg =
- EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
+ CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
auto VAT = FD->getCapturedVLAType();
- VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+ VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
} else if (I->capturesVariable()) {
auto *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
Address ArgAddr = ArgLVal.getAddress();
if (!VarTy->isReferenceType()) {
if (ArgLVal.getType()->isLValueReferenceType()) {
- ArgAddr = EmitLoadOfReference(
+ ArgAddr = CGF.EmitLoadOfReference(
ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
} else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
assert(ArgLVal.getType()->isPointerType());
- ArgAddr = EmitLoadOfPointer(
+ ArgAddr = CGF.EmitLoadOfPointer(
ArgAddr, ArgLVal.getType()->castAs<PointerType>());
}
}
- setAddrOfLocalVar(
- Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
+ if (!FO.RegisterCastedArgsOnly) {
+ LocalAddrs.insert(
+ {Args[Cnt],
+ {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
+ }
} else if (I->capturesVariableByCopy()) {
assert(!FD->getType()->isAnyPointerType() &&
"Not expecting a captured pointer.");
auto *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
- setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
- Args[Cnt]->getName(), ArgLVal,
- VarTy->isReferenceType()));
+ LocalAddrs.insert(
+ {Args[Cnt],
+ {Var,
+ FO.UIntPtrCastRequired
+ ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
+ ArgLVal, VarTy->isReferenceType())
+ : ArgLVal.getAddress()}});
} else {
// If 'this' is captured, load it into CXXThisValue.
assert(I->capturesThis());
- CXXThisValue =
- EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
+ CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
+ .getScalarVal();
+ LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
}
++Cnt;
++I;
}
+ return {F, HasUIntPtrArgs};
+}
+
+llvm::Function *
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+ assert(
+ CapturedStmtInfo &&
+ "CapturedStmtInfo should be set when generating the captured function");
+ const CapturedDecl *CD = S.getCapturedDecl();
+ // Build the argument list.
+ bool NeedWrapperFunction =
+ getDebugInfo() &&
+ CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
+ FunctionArgList Args;
+ llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
+ FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
+ CapturedStmtInfo->getHelperName());
+ llvm::Function *F;
+ bool HasUIntPtrArgs;
+ std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue(
+ *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO);
+ for (const auto &LocalAddrPair : LocalAddrs) {
+ if (LocalAddrPair.second.first) {
+ setAddrOfLocalVar(LocalAddrPair.second.first,
+ LocalAddrPair.second.second);
+ }
+ }
+ for (const auto &VLASizePair : VLASizes)
+ VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
PGO.assignRegionCounters(GlobalDecl(CD), F);
CapturedStmtInfo->EmitBody(*this, CD->getBody());
FinishFunction(CD->getBodyRBrace());
-
- return F;
+ if (!NeedWrapperFunction || !HasUIntPtrArgs)
+ return F;
+
+ FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
+ /*RegisterCastedArgsOnly=*/true,
+ ".nondebug_wrapper.");
+ CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
+ WrapperCGF.disableDebugInfo();
+ Args.clear();
+ LocalAddrs.clear();
+ VLASizes.clear();
+ llvm::Function *WrapperF =
+ emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
+ WrapperCGF.CXXThisValue, WrapperFO).first;
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ llvm::SmallVector<llvm::Value *, 4> CallArgs;
+ for (const auto *Arg : Args) {
+ llvm::Value *CallArg;
+ auto I = LocalAddrs.find(Arg);
+ if (I != LocalAddrs.end()) {
+ LValue LV =
+ WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+ } else {
+ auto EI = VLASizes.find(Arg);
+ if (EI != VLASizes.end())
+ CallArg = EI->second.second;
+ else {
+ LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
+ Arg->getType(), BaseInfo);
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+ }
+ }
+ CallArgs.emplace_back(CallArg);
+ }
+ WrapperCGF.Builder.CreateCall(F, CallArgs);
+ WrapperCGF.FinishFunction();
+ return WrapperF;
}
//===----------------------------------------------------------------------===//
@@ -404,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
-/// Check if the combiner is a call to UDR combiner and if it is so return the
-/// UDR decl used for reduction.
-static const OMPDeclareReductionDecl *
-getReductionInit(const Expr *ReductionOp) {
- if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
- if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
- if (auto *DRE =
- dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
- if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
- return DRD;
- return nullptr;
-}
-
-static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
- const OMPDeclareReductionDecl *DRD,
- const Expr *InitOp,
- Address Private, Address Original,
- QualType Ty) {
- if (DRD->getInitializer()) {
- std::pair<llvm::Function *, llvm::Function *> Reduction =
- CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
- auto *CE = cast<CallExpr>(InitOp);
- auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
- const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
- const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
- auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
- auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
- CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
- PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
- [=]() -> Address { return Private; });
- PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
- [=]() -> Address { return Original; });
- (void)PrivateScope.Privatize();
- RValue Func = RValue::get(Reduction.second);
- CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
- CGF.EmitIgnoredExpr(InitOp);
- } else {
- llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
- auto *GV = new llvm::GlobalVariable(
- CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage, Init, ".init");
- LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
- RValue InitRVal;
- switch (CGF.getEvaluationKind(Ty)) {
- case TEK_Scalar:
- InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
- break;
- case TEK_Complex:
- InitRVal =
- RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
- break;
- case TEK_Aggregate:
- InitRVal = RValue::getAggregate(LV.getAddress());
- break;
- }
- OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
- CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
- CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
- /*IsInitializer=*/false);
- }
-}
-
-/// \brief Emit initialization of arrays of complex types.
-/// \param DestAddr Address of the array.
-/// \param Type Type of array.
-/// \param Init Initial expression of array.
-/// \param SrcAddr Address of the original array.
-static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
- QualType Type, const Expr *Init,
- Address SrcAddr = Address::invalid()) {
- auto *DRD = getReductionInit(Init);
- // Perform element-by-element initialization.
- QualType ElementTy;
-
- // Drill down to the base element type on both arrays.
- auto ArrayTy = Type->getAsArrayTypeUnsafe();
- auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
- DestAddr =
- CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
- if (DRD)
- SrcAddr =
- CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
-
- llvm::Value *SrcBegin = nullptr;
- if (DRD)
- SrcBegin = SrcAddr.getPointer();
- auto DestBegin = DestAddr.getPointer();
- // Cast from pointer to array type to pointer to single element.
- auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
- // The basic structure here is a while-do loop.
- auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
- auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
- auto IsEmpty =
- CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
- CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
-
- // Enter the loop body, making that address the current address.
- auto EntryBB = CGF.Builder.GetInsertBlock();
- CGF.EmitBlock(BodyBB);
-
- CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
-
- llvm::PHINode *SrcElementPHI = nullptr;
- Address SrcElementCurrent = Address::invalid();
- if (DRD) {
- SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
- "omp.arraycpy.srcElementPast");
- SrcElementPHI->addIncoming(SrcBegin, EntryBB);
- SrcElementCurrent =
- Address(SrcElementPHI,
- SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
- }
- llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
- DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
- DestElementPHI->addIncoming(DestBegin, EntryBB);
- Address DestElementCurrent =
- Address(DestElementPHI,
- DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
-
- // Emit copy.
- {
- CodeGenFunction::RunCleanupsScope InitScope(CGF);
- if (DRD && (DRD->getInitializer() || !Init)) {
- emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
- SrcElementCurrent, ElementTy);
- } else
- CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
- /*IsInitializer=*/false);
- }
-
- if (DRD) {
- // Shift the address forward by one element.
- auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
- SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
- SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
- }
-
- // Shift the address forward by one element.
- auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
- DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
- // Check whether we've reached the end.
- auto Done =
- CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
- CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
- DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
-
- // Done.
- CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
-}
-
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
Address SrcAddr, const VarDecl *DestVD,
const VarDecl *SrcVD, const Expr *Copy) {
@@ -906,259 +901,111 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
- LValue BaseLV, llvm::Value *Addr) {
- Address Tmp = Address::invalid();
- Address TopTmp = Address::invalid();
- Address MostTopTmp = Address::invalid();
- BaseTy = BaseTy.getNonReferenceType();
- while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
- !CGF.getContext().hasSameType(BaseTy, ElTy)) {
- Tmp = CGF.CreateMemTemp(BaseTy);
- if (TopTmp.isValid())
- CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
- else
- MostTopTmp = Tmp;
- TopTmp = Tmp;
- BaseTy = BaseTy->getPointeeType();
- }
- llvm::Type *Ty = BaseLV.getPointer()->getType();
- if (Tmp.isValid())
- Ty = Tmp.getElementType();
- Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
- if (Tmp.isValid()) {
- CGF.Builder.CreateStore(Addr, Tmp);
- return MostTopTmp;
- }
- return Address(Addr, BaseLV.getAlignment());
-}
-
-static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
- LValue BaseLV) {
- BaseTy = BaseTy.getNonReferenceType();
- while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
- !CGF.getContext().hasSameType(BaseTy, ElTy)) {
- if (auto *PtrTy = BaseTy->getAs<PointerType>())
- BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
- else {
- BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
- BaseTy->castAs<ReferenceType>());
- }
- BaseTy = BaseTy->getPointeeType();
- }
- return CGF.MakeAddrLValue(
- Address(
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
- BaseLV.getAlignment()),
- BaseLV.getType(), BaseLV.getAlignmentSource());
-}
-
void CodeGenFunction::EmitOMPReductionClauseInit(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return;
+ SmallVector<const Expr *, 4> Shareds;
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
- for (auto IRef : C->varlists()) {
- auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
- auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
- auto *DRD = getReductionInit(*IRed);
- if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
- auto *Base = OASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
- Base = TempOASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
- Base = TempASE->getBase()->IgnoreParenImpCasts();
- auto *DE = cast<DeclRefExpr>(Base);
- auto *OrigVD = cast<VarDecl>(DE->getDecl());
- auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
- auto OASELValueUB =
- EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
- auto OriginalBaseLValue = EmitLValue(DE);
- LValue BaseLValue =
- loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
- OriginalBaseLValue);
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
- return OASELValueLB.getAddress();
- });
- // Emit reduction copy.
- bool IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
- OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
- // Emit VarDecl with copy init for arrays.
- // Get the address of the original variable captured in current
- // captured region.
- auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
- OASELValueLB.getPointer());
- Size = Builder.CreateNUWAdd(
- Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
- CodeGenFunction::OpaqueValueMapping OpaqueMap(
- *this, cast<OpaqueValueExpr>(
- getContext()
- .getAsVariableArrayType(PrivateVD->getType())
- ->getSizeExpr()),
- RValue::get(Size));
- EmitVariablyModifiedType(PrivateVD->getType());
- auto Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- auto *Init = PrivateVD->getInit();
- EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
- DRD ? *IRed : Init,
- OASELValueLB.getAddress());
- EmitAutoVarCleanups(Emission);
- // Emit private VarDecl with reduction init.
- auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
- OASELValueLB.getPointer());
- auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
- return castToBase(*this, OrigVD->getType(),
- OASELValueLB.getType(), OriginalBaseLValue,
- Ptr);
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
- return GetAddrOfLocalVar(PrivateVD);
- });
- } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
- auto *Base = ASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
- Base = TempASE->getBase()->IgnoreParenImpCasts();
- auto *DE = cast<DeclRefExpr>(Base);
- auto *OrigVD = cast<VarDecl>(DE->getDecl());
- auto ASELValue = EmitLValue(ASE);
- auto OriginalBaseLValue = EmitLValue(DE);
- LValue BaseLValue = loadToBegin(
- *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
- return ASELValue.getAddress();
- });
- // Emit reduction copy.
- bool IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
- OriginalBaseLValue, DRD, IRed]() -> Address {
- // Emit private VarDecl with reduction init.
- AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
- emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
- ASELValue.getAddress(),
- ASELValue.getType());
- } else
- EmitAutoVarInit(Emission);
- EmitAutoVarCleanups(Emission);
- auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
- ASELValue.getPointer());
- auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
- return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
- OriginalBaseLValue, Ptr);
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
- return Builder.CreateElementBitCast(
- GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
- "rhs.begin");
- });
- } else {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
- QualType Type = PrivateVD->getType();
- if (getContext().getAsArrayType(Type)) {
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
- CapturedStmtInfo->lookup(OrigVD) != nullptr,
- IRef->getType(), VK_LValue, IRef->getExprLoc());
- Address OriginalAddr = EmitLValue(&DRE).getAddress();
- PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
- LHSVD]() -> Address {
- OriginalAddr = Builder.CreateElementBitCast(
- OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
- return OriginalAddr;
- });
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
- if (Type->isVariablyModifiedType()) {
- CodeGenFunction::OpaqueValueMapping OpaqueMap(
- *this, cast<OpaqueValueExpr>(
- getContext()
- .getAsVariableArrayType(PrivateVD->getType())
- ->getSizeExpr()),
- RValue::get(
- getTypeSize(OrigVD->getType().getNonReferenceType())));
- EmitVariablyModifiedType(Type);
- }
- auto Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- auto *Init = PrivateVD->getInit();
- EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
- DRD ? *IRed : Init, OriginalAddr);
- EmitAutoVarCleanups(Emission);
- return Emission.getAllocatedAddress();
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
- return Builder.CreateElementBitCast(
- GetAddrOfLocalVar(PrivateVD),
- ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
- });
- } else {
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- Address OriginalAddr = Address::invalid();
- PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
- &OriginalAddr]() -> Address {
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
- CapturedStmtInfo->lookup(OrigVD) != nullptr,
- IRef->getType(), VK_LValue, IRef->getExprLoc());
- OriginalAddr = EmitLValue(&DRE).getAddress();
- return OriginalAddr;
- });
- // Emit reduction copy.
- bool IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
- // Emit private VarDecl with reduction init.
- AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
- emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
- OriginalAddr,
- PrivateVD->getType());
- } else
- EmitAutoVarInit(Emission);
- EmitAutoVarCleanups(Emission);
- return Addr;
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
- return GetAddrOfLocalVar(PrivateVD);
- });
- }
+ auto ILHS = C->lhs_exprs().begin();
+ auto IRHS = C->rhs_exprs().begin();
+ for (const auto *Ref : C->varlists()) {
+ Shareds.emplace_back(Ref);
+ Privates.emplace_back(*IPriv);
+ ReductionOps.emplace_back(*IRed);
+ LHSs.emplace_back(*ILHS);
+ RHSs.emplace_back(*IRHS);
+ std::advance(IPriv, 1);
+ std::advance(IRed, 1);
+ std::advance(ILHS, 1);
+ std::advance(IRHS, 1);
+ }
+ }
+ ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
+ unsigned Count = 0;
+ auto ILHS = LHSs.begin();
+ auto IRHS = RHSs.begin();
+ auto IPriv = Privates.begin();
+ for (const auto *IRef : Shareds) {
+ auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
+ // Emit private VarDecl with reduction init.
+ RedCG.emitSharedLValue(*this, Count);
+ RedCG.emitAggregateType(*this, Count);
+ auto Emission = EmitAutoVarAlloca(*PrivateVD);
+ RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
+ RedCG.getSharedLValue(Count),
+ [&Emission](CodeGenFunction &CGF) {
+ CGF.EmitAutoVarInit(Emission);
+ return true;
+ });
+ EmitAutoVarCleanups(Emission);
+ Address BaseAddr = RedCG.adjustPrivateAddress(
+ *this, Count, Emission.getAllocatedAddress());
+ bool IsRegistered = PrivateScope.addPrivate(
+ RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
+ assert(IsRegistered && "private var already registered as private");
+ // Silence the warning about unused variable.
+ (void)IsRegistered;
+
+ auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ if (isa<OMPArraySectionExpr>(IRef)) {
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+ return RedCG.getSharedLValue(Count).getAddress();
+ });
+ PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+ return GetAddrOfLocalVar(PrivateVD);
+ });
+ } else if (isa<ArraySubscriptExpr>(IRef)) {
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+ return RedCG.getSharedLValue(Count).getAddress();
+ });
+ PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+ return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
+ ConvertTypeForMem(RHSVD->getType()),
+ "rhs.begin");
+ });
+ } else {
+ QualType Type = PrivateVD->getType();
+ bool IsArray = getContext().getAsArrayType(Type) != nullptr;
+ Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ if (IsArray) {
+ OriginalAddr = Builder.CreateElementBitCast(
+ OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
}
- ++ILHS;
- ++IRHS;
- ++IPriv;
- ++IRed;
+ PrivateScope.addPrivate(
+ LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
+ PrivateScope.addPrivate(
+ RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
+ return IsArray
+ ? Builder.CreateElementBitCast(
+ GetAddrOfLocalVar(PrivateVD),
+ ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
+ : GetAddrOfLocalVar(PrivateVD);
+ });
}
+ ++ILHS;
+ ++IRHS;
+ ++IPriv;
+ ++Count;
}
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
- const OMPExecutableDirective &D) {
+ const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
if (!HaveInsertPoint())
return;
llvm::SmallVector<const Expr *, 8> Privates;
@@ -1174,14 +1021,15 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
}
if (HasAtLeastOneReduction) {
+ bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
+ isOpenMPParallelDirective(D.getDirectiveKind()) ||
+ D.getDirectiveKind() == OMPD_simd;
+ bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
CGM.getOpenMPRuntime().emitReduction(
*this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
- D.getSingleClause<OMPNowaitClause>() ||
- isOpenMPParallelDirective(D.getDirectiveKind()) ||
- D.getDirectiveKind() == OMPD_simd,
- D.getDirectiveKind() == OMPD_simd);
+ {WithNowait, SimpleReduction, ReductionKind});
}
}
@@ -1210,14 +1058,23 @@ static void emitPostUpdateForReductionClause(
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
- const OMPExecutableDirective &S,
- OpenMPDirectiveKind InnermostKind,
- const RegionCodeGenTy &CodeGen) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
- emitParallelOrTeamsOutlinedFunction(S,
- *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+namespace {
+/// Codegen lambda for appending distribute lower and upper bounds to outlined
+/// parallel function. This is necessary for combined constructs such as
+/// 'distribute parallel for'
+typedef llvm::function_ref<void(CodeGenFunction &,
+ const OMPExecutableDirective &,
+ llvm::SmallVectorImpl<llvm::Value *> &)>
+ CodeGenBoundParametersTy;
+} // anonymous namespace
+
+static void emitCommonOMPParallelDirective(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+ const CodeGenBoundParametersTy &CodeGenBoundParameters) {
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
+ auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
@@ -1239,13 +1096,22 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
}
}
- OMPLexicalScope Scope(CGF, S);
+ OMPParallelScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
+ // lower and upper bounds with the pragma 'for' chunking mechanism.
+ // The following lambda takes care of appending the lower and upper bound
+ // parameters when necessary
+ CodeGenBoundParameters(CGF, S, CapturedVars);
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
CapturedVars, IfCond);
}
+static void emitEmptyBoundParameters(CodeGenFunction &,
+ const OMPExecutableDirective &,
+ llvm::SmallVectorImpl<llvm::Value *> &) {}
+
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -1264,9 +1130,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
emitPostUpdateForReductionClause(
*this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
@@ -1343,12 +1210,14 @@ void CodeGenFunction::EmitOMPInnerLoop(
EmitBlock(LoopExit.getBlock());
}
-void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
+bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
if (!HaveInsertPoint())
- return;
+ return false;
// Emit inits for the linear variables.
+ bool HasLinears = false;
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
for (auto *Init : C->inits()) {
+ HasLinears = true;
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
@@ -1373,6 +1242,7 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
EmitIgnoredExpr(CS);
}
}
+ return HasLinears;
}
void CodeGenFunction::EmitOMPLinearClauseFinal(
@@ -1611,6 +1481,13 @@ void CodeGenFunction::EmitOMPSimdFinal(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
+static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ CodeGenFunction::JumpDest LoopExit) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+}
+
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
@@ -1655,7 +1532,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
CGF.EmitOMPSimdInit(S);
emitAlignedClause(CGF, S);
- CGF.EmitOMPLinearClauseInit(S);
+ (void)CGF.EmitOMPLinearClauseInit(S);
{
OMPPrivateScope LoopScope(CGF);
CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
@@ -1677,7 +1554,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
// Emit final copy of the lastprivate variables at the end of loops.
if (HasLastprivateClause)
CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
emitPostUpdateForReductionClause(
CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
@@ -1693,9 +1570,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
-void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
- const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+void CodeGenFunction::EmitOMPOuterLoop(
+ bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
+ CodeGenFunction::OMPPrivateScope &LoopScope,
+ const CodeGenFunction::OMPLoopArguments &LoopArgs,
+ const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
+ const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
auto &RT = CGM.getOpenMPRuntime();
const Expr *IVExpr = S.getIterationVariable();
@@ -1713,15 +1593,18 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
llvm::Value *BoolCondVal = nullptr;
if (!DynamicOrOrdered) {
- // UB = min(UB, GlobalUB)
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ // UB = min(UB, GlobalUB) or
+ // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
+ // 'distribute parallel for')
+ EmitIgnoredExpr(LoopArgs.EUB);
// IV = LB
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(LoopArgs.Init);
// IV < UB
- BoolCondVal = EvaluateExprAsBool(S.getCond());
+ BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
} else {
- BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
- LB, UB, ST);
+ BoolCondVal =
+ RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
+ LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
}
// If there are any cleanups between here and the loop-exit scope,
@@ -1741,7 +1624,7 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
// Emit "IV = LB" (in case of static schedule, we have already calculated new
// LB for loop condition and emitted it above).
if (DynamicOrOrdered)
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(LoopArgs.Init);
// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
@@ -1755,24 +1638,27 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
EmitOMPSimdInit(S, IsMonotonic);
SourceLocation Loc = S.getLocStart();
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
- },
- [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
- if (Ordered) {
- CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
- CGF, Loc, IVSize, IVSigned);
- }
- });
+
+ // when 'distribute' is not combined with a 'for':
+ // while (idx <= UB) { BODY; ++idx; }
+ // when 'distribute' is combined with a 'for'
+ // (e.g. 'distribute parallel for')
+ // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ EmitOMPInnerLoop(
+ S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
+ [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+ CodeGenLoop(CGF, S, LoopExit);
+ },
+ [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
+ CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
+ });
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
if (!DynamicOrOrdered) {
// Emit "LB = LB + Stride", "UB = UB + Stride".
- EmitIgnoredExpr(S.getNextLowerBound());
- EmitIgnoredExpr(S.getNextUpperBound());
+ EmitIgnoredExpr(LoopArgs.NextLB);
+ EmitIgnoredExpr(LoopArgs.NextUB);
}
EmitBranch(CondBlock);
@@ -1791,7 +1677,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
void CodeGenFunction::EmitOMPForOuterLoop(
const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds) {
auto &RT = CGM.getOpenMPRuntime();
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
@@ -1800,7 +1687,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(
assert((Ordered ||
!RT.isStaticNonchunked(ScheduleKind.Schedule,
- /*Chunked=*/Chunk != nullptr)) &&
+ LoopArgs.Chunk != nullptr)) &&
"static non-chunked schedule does not need outer loop");
// Emit outer loop.
@@ -1858,22 +1745,46 @@ void CodeGenFunction::EmitOMPForOuterLoop(
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
if (DynamicOrOrdered) {
- llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
+ auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
+ llvm::Value *LBVal = DispatchBounds.first;
+ llvm::Value *UBVal = DispatchBounds.second;
+ CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
+ LoopArgs.Chunk};
RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
- IVSigned, Ordered, UBVal, Chunk);
+ IVSigned, Ordered, DipatchRTInputValues);
} else {
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
- Ordered, IL, LB, UB, ST, Chunk);
+ Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
+ LoopArgs.ST, LoopArgs.Chunk);
}
- EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
- ST, IL, Chunk);
+ auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
+ const unsigned IVSize,
+ const bool IVSigned) {
+ if (Ordered) {
+ CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
+ IVSigned);
+ }
+ };
+
+ OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
+ LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
+ OuterLoopArgs.IncExpr = S.getInc();
+ OuterLoopArgs.Init = S.getInit();
+ OuterLoopArgs.Cond = S.getCond();
+ OuterLoopArgs.NextLB = S.getNextLowerBound();
+ OuterLoopArgs.NextUB = S.getNextUpperBound();
+ EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
+ emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
}
+static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
+ const unsigned IVSize, const bool IVSigned) {}
+
void CodeGenFunction::EmitOMPDistributeOuterLoop(
- OpenMPDistScheduleClauseKind ScheduleKind,
- const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+ OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
+ OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
+ const CodeGenLoopTy &CodeGenLoopContent) {
auto &RT = CGM.getOpenMPRuntime();
@@ -1886,26 +1797,159 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
- RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
- IVSize, IVSigned, /* Ordered = */ false,
- IL, LB, UB, ST, Chunk);
+ RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
+ IVSigned, /* Ordered = */ false, LoopArgs.IL,
+ LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
+ LoopArgs.Chunk);
+
+ // for combined 'distribute' and 'for' the increment expression of distribute
+ // is store in DistInc. For 'distribute' alone, it is in Inc.
+ Expr *IncExpr;
+ if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
+ IncExpr = S.getDistInc();
+ else
+ IncExpr = S.getInc();
+
+ // this routine is shared by 'omp distribute parallel for' and
+ // 'omp distribute': select the right EUB expression depending on the
+ // directive
+ OMPLoopArguments OuterLoopArgs;
+ OuterLoopArgs.LB = LoopArgs.LB;
+ OuterLoopArgs.UB = LoopArgs.UB;
+ OuterLoopArgs.ST = LoopArgs.ST;
+ OuterLoopArgs.IL = LoopArgs.IL;
+ OuterLoopArgs.Chunk = LoopArgs.Chunk;
+ OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedEnsureUpperBound()
+ : S.getEnsureUpperBound();
+ OuterLoopArgs.IncExpr = IncExpr;
+ OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedInit()
+ : S.getInit();
+ OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedCond()
+ : S.getCond();
+ OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedNextLowerBound()
+ : S.getNextLowerBound();
+ OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedNextUpperBound()
+ : S.getNextUpperBound();
+
+ EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
+ LoopScope, OuterLoopArgs, CodeGenLoopContent,
+ emitEmptyOrdered);
+}
+
+/// Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+ const DeclRefExpr *Helper) {
+ auto VDecl = cast<VarDecl>(Helper->getDecl());
+ CGF.EmitVarDecl(*VDecl);
+ return CGF.EmitLValue(Helper);
+}
+
+static std::pair<LValue, LValue>
+emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ LValue LB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
+
+ // When composing 'distribute' with 'for' (e.g. as in 'distribute
+ // parallel for') we need to use the 'distribute'
+ // chunk lower and upper bounds rather than the whole loop iteration
+ // space. These are parameters to the outlined function for 'parallel'
+ // and we copy the bounds of the previous schedule into the
+ // the current ones.
+ LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
+ LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
+ llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
+ PrevLBVal = CGF.EmitScalarConversion(
+ PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
+ LS.getIterationVariable()->getType(), SourceLocation());
+ llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
+ PrevUBVal = CGF.EmitScalarConversion(
+ PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
+ LS.getIterationVariable()->getType(), SourceLocation());
+
+ CGF.EmitStoreOfScalar(PrevLBVal, LB);
+ CGF.EmitStoreOfScalar(PrevUBVal, UB);
+
+ return {LB, UB};
+}
+
+/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
+/// we need to use the LB and UB expressions generated by the worksharing
+/// code generation support, whereas in non combined situations we would
+/// just emit 0 and the LastIteration expression
+/// This function is necessary due to the difference of the LB and UB
+/// types for the RT emission routines for 'for_static_init' and
+/// 'for_dispatch_init'
+static std::pair<llvm::Value *, llvm::Value *>
+emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ Address LB, Address UB) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const Expr *IVExpr = LS.getIterationVariable();
+ // when implementing a dynamic schedule for a 'for' combined with a
+ // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
+ // is not normalized as each team only executes its own assigned
+ // distribute chunk
+ QualType IteratorTy = IVExpr->getType();
+ llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
+ SourceLocation());
+ llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
+ SourceLocation());
+ return {LBVal, UBVal};
+}
+
+static void emitDistributeParallelForDistributeInnerBoundParams(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
+ const auto &Dir = cast<OMPLoopDirective>(S);
+ LValue LB =
+ CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
+ auto LBCast = CGF.Builder.CreateIntCast(
+ CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
+ CapturedVars.push_back(LBCast);
+ LValue UB =
+ CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
+
+ auto UBCast = CGF.Builder.CreateIntCast(
+ CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
+ CapturedVars.push_back(UBCast);
+}
+
+static void
+emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ CodeGenFunction::JumpDest LoopExit) {
+ auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
+ emitDistributeParallelForInnerBounds,
+ emitDistributeParallelForDispatchBounds);
+ };
- EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
- S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
+ emitCommonOMPParallelDirective(
+ CGF, S, OMPD_for, CGInlinedWorksharingLoop,
+ emitDistributeParallelForDistributeInnerBoundParams);
}
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+ S.getDistInc());
+ };
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_distribute_parallel_for,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- OMPLoopScope PreInitScope(CGF, S);
- OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for,
- /*HasCancel=*/false);
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
+ OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
+ /*HasCancel=*/false);
+ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
+ /*HasCancel=*/false);
}
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
@@ -2003,15 +2047,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
});
}
-void CodeGenFunction::EmitOMPTargetTeamsDirective(
- const OMPTargetTeamsDirective &S) {
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
-}
-
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
const OMPTargetTeamsDistributeDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(
@@ -2052,14 +2087,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
});
}
-/// \brief Emit a helper variable and return corresponding lvalue.
-static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
- const DeclRefExpr *Helper) {
- auto VDecl = cast<VarDecl>(Helper->getDecl());
- CGF.EmitVarDecl(*VDecl);
- return CGF.EmitLValue(Helper);
-}
-
namespace {
struct ScheduleKindModifiersTy {
OpenMPScheduleClauseKind Kind;
@@ -2072,7 +2099,10 @@ namespace {
};
} // namespace
-bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+bool CodeGenFunction::EmitOMPWorksharingLoop(
+ const OMPLoopDirective &S, Expr *EUB,
+ const CodeGenLoopBoundsTy &CodeGenLoopBounds,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -2120,12 +2150,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
llvm::DenseSet<const Expr *> EmittedFinals;
emitAlignedClause(*this, S);
- EmitOMPLinearClauseInit(S);
+ bool HasLinears = EmitOMPLinearClauseInit(S);
// Emit helper vars inits.
- LValue LB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+
+ std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
+ LValue LB = Bounds.first;
+ LValue UB = Bounds.second;
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -2134,7 +2164,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
// Emit 'then' code.
{
OMPPrivateScope LoopScope(*this);
- if (EmitOMPFirstprivateClause(S, LoopScope)) {
+ if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
// Emit implicit barrier to synchronize threads and avoid data races on
// initialization of firstprivate variables and post-update of
// lastprivate variables.
@@ -2211,9 +2241,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
+ const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
+ ST.getAddress(), IL.getAddress(),
+ Chunk, EUB);
EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
- LB.getAddress(), UB.getAddress(), ST.getAddress(),
- IL.getAddress(), Chunk);
+ LoopArguments, CGDispatchBounds);
}
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
EmitOMPSimdFinal(S,
@@ -2222,7 +2254,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
CGF.EmitLoadOfScalar(IL, S.getLocStart()));
});
}
- EmitOMPReductionClauseFinal(S);
+ EmitOMPReductionClauseFinal(
+ S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
+ ? /*Parallel and Simd*/ OMPD_parallel_for_simd
+ : /*Parallel only*/ OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
@@ -2248,12 +2283,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
return HasLastprivateClause;
}
+/// The following two functions generate expressions for the loop lower
+/// and upper bounds in case of static and dynamic (dispatch) schedule
+/// of the associated 'for' or 'distribute' loop.
+static std::pair<LValue, LValue>
+emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ LValue LB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
+ return {LB, UB};
+}
+
+/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
+/// consider the lower and upper bound expressions generated by the
+/// worksharing loop support, but we use 0 and the iteration space size as
+/// constants
+static std::pair<llvm::Value *, llvm::Value *>
+emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ Address LB, Address UB) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const Expr *IVExpr = LS.getIterationVariable();
+ const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
+ llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
+ llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
+ return {LBVal, UBVal};
+}
+
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2271,7 +2336,9 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2320,8 +2387,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
// Generate condition for loop.
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
- OK_Ordinary, S.getLocStart(),
- /*fpContractable=*/false);
+ OK_Ordinary, S.getLocStart(), FPOptions());
// Increment for loop counter.
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
S.getLocStart());
@@ -2397,7 +2463,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
};
CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
@@ -2523,9 +2589,11 @@ void CodeGenFunction::EmitOMPParallelForDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
- CGF.EmitOMPWorksharingLoop(S);
+ CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -2533,9 +2601,11 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPWorksharingLoop(S);
+ CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -2545,7 +2615,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitSections(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
@@ -2629,11 +2700,32 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
++ID;
}
}
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ auto IPriv = C->privates().begin();
+ auto IRed = C->reduction_ops().begin();
+ auto ILHS = C->lhs_exprs().begin();
+ auto IRHS = C->rhs_exprs().begin();
+ for (const auto *Ref : C->varlists()) {
+ Data.ReductionVars.emplace_back(Ref);
+ Data.ReductionCopies.emplace_back(*IPriv);
+ Data.ReductionOps.emplace_back(*IRed);
+ LHSs.emplace_back(*ILHS);
+ RHSs.emplace_back(*IRHS);
+ std::advance(IPriv, 1);
+ std::advance(IRed, 1);
+ std::advance(ILHS, 1);
+ std::advance(IRHS, 1);
+ }
+ }
+ Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
+ *this, S.getLocStart(), LHSs, RHSs, Data);
// Build list of dependences.
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
for (auto *IRef : C->varlists())
Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
- auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
+ auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
CodeGenFunction &CGF, PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
@@ -2688,6 +2780,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
}
}
+ if (Data.Reductions) {
+ OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
+ ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
+ Data.ReductionOps);
+ llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
+ for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
+ RedCG.emitSharedLValue(CGF, Cnt);
+ RedCG.emitAggregateType(CGF, Cnt);
+ Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
+ CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
+ Replacement =
+ Address(CGF.EmitScalarConversion(
+ Replacement.getPointer(), CGF.getContext().VoidPtrTy,
+ CGF.getContext().getPointerType(
+ Data.ReductionCopies[Cnt]->getType()),
+ SourceLocation()),
+ Replacement.getAlignment());
+ Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
+ Scope.addPrivate(RedCG.getBaseDecl(Cnt),
+ [Replacement]() { return Replacement; });
+ // FIXME: This must removed once the runtime library is fixed.
+ // Emit required threadprivate variables for
+ // initilizer/combiner/finalizer.
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+ RedCG, Cnt);
+ }
+ }
(void)Scope.Privatize();
Action.Enter(CGF);
@@ -2763,7 +2883,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
}(), S.getLocStart());
}
-void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
+void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
+ const CodeGenLoopTy &CodeGenLoop,
+ Expr *IncExpr) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -2804,10 +2926,17 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
// Emit 'then' code.
{
// Emit helper vars inits.
- LValue LB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+
+ LValue LB = EmitOMPHelperVar(
+ *this, cast<DeclRefExpr>(
+ (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedLowerBoundVariable()
+ : S.getLowerBoundVariable())));
+ LValue UB = EmitOMPHelperVar(
+ *this, cast<DeclRefExpr>(
+ (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedUpperBoundVariable()
+ : S.getUpperBoundVariable())));
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -2859,15 +2988,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
auto LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedEnsureUpperBound()
+ : S.getEnsureUpperBound());
// IV = LB;
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedInit()
+ : S.getInit());
+
+ Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedCond()
+ : S.getCond();
+
+ // for distribute alone, codegen
// while (idx <= UB) { BODY; ++idx; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
- S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
+ // when combined with 'for' (e.g. as in 'distribute parallel for')
+ // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
+ [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+ CodeGenLoop(CGF, S, LoopExit);
},
[](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
@@ -2876,9 +3015,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
- EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
- LB.getAddress(), UB.getAddress(), ST.getAddress(),
- IL.getAddress(), Chunk);
+ const OMPLoopArguments LoopArguments = {
+ LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
+ Chunk};
+ EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
+ CodeGenLoop);
}
// Emit final copy of the lastprivate variables if IsLastIter != 0.
@@ -2900,7 +3041,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
void CodeGenFunction::EmitOMPDistributeDirective(
const OMPDistributeDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPDistributeLoop(S);
+
+ CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
@@ -3250,7 +3392,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
NewVValType = XRValExpr->getType();
auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
- IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
+ IsPostfixUpdate](RValue XRValue) -> RValue {
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
RValue Res = CGF.EmitAnyExpr(UE);
@@ -3277,7 +3419,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
NewVValType = X->getType().getNonReferenceType();
ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
X->getType().getNonReferenceType(), Loc);
- auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
+ auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
NewVVal = XRValue;
return ExprRValue;
};
@@ -3327,6 +3469,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_firstprivate:
case OMPC_lastprivate:
case OMPC_reduction:
+ case OMPC_task_reduction:
case OMPC_safelen:
case OMPC_simdlen:
case OMPC_collapse:
@@ -3404,41 +3547,24 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
}
-std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
-CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
- CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
- bool IsOffloadEntry) {
- llvm::Function *OutlinedFn = nullptr;
- llvm::Constant *OutlinedFnID = nullptr;
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
- OMPPrivateScope PrivateScope(CGF);
- (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
- CGF.EmitOMPPrivateClause(S, PrivateScope);
- (void)PrivateScope.Privatize();
-
- Action.Enter(CGF);
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- };
- // Emit target region as a standalone region.
- CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
- S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
- return std::make_pair(OutlinedFn, OutlinedFnID);
-}
-
-void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ const RegionCodeGenTy &CodeGen) {
+ assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
+ CodeGenModule &CGM = CGF.CGM;
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
- llvm::SmallVector<llvm::Value *, 16> CapturedVars;
- GenerateOpenMPCapturedVars(CS, CapturedVars);
-
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
- // Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
-
- if (auto *C = S.getSingleClause<OMPIfClause>()) {
- IfCond = C->getCondition();
+ // Check for the at most one if clause associated with the target region.
+ for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+ if (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_target) {
+ IfCond = C->getCondition();
+ break;
+ }
}
// Check if we have any device clause associated with the directive.
@@ -3453,43 +3579,76 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
bool IsOffloadEntry = true;
if (IfCond) {
bool Val;
- if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
+ if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
IsOffloadEntry = false;
}
if (CGM.getLangOpts().OMPTargetTriples.empty())
IsOffloadEntry = false;
- assert(CurFuncDecl && "No parent declaration for target region!");
+ assert(CGF.CurFuncDecl && "No parent declaration for target region!");
StringRef ParentName;
// In case we have Ctors/Dtors we use the complete type variant to produce
// the mangling of the device outlined kernel.
- if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
+ if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
- else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
+ else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
else
ParentName =
- CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
+ CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
- std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
- CGM, S, ParentName, IsOffloadEntry);
- OMPLexicalScope Scope(*this, S);
- CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
+ IsOffloadEntry, CodeGen);
+ OMPLexicalScope Scope(CGF, S);
+ llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+ CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
CapturedVars);
}
+static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
+ PrePostActionTy &Action) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+
+ Action.Enter(CGF);
+ CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+}
+
+void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
+ StringRef ParentName,
+ const OMPTargetDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
- emitParallelOrTeamsOutlinedFunction(S,
- *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
+ auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
- const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
- const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
- const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
+ const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
+ const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
@@ -3498,7 +3657,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
S.getLocStart());
}
- OMPLexicalScope Scope(CGF, S);
+ OMPTeamsScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
@@ -3511,10 +3670,47 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
+ emitPostUpdateForReductionClause(
+ *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+}
+
+static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
+ const OMPTargetTeamsDirective &S) {
+ auto *CS = S.getCapturedStmt(OMPD_teams);
+ Action.Enter(CGF);
+ auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ // TODO: Add support for clauses.
+ CGF.EmitStmt(CS->getCapturedStmt());
+ };
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDirective(
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitOMPCancellationPointDirective(
@@ -3740,9 +3936,48 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective(
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}
+static void emitTargetParallelRegion(CodeGenFunction &CGF,
+ const OMPTargetParallelDirective &S,
+ PrePostActionTy &Action) {
+ // Get the captured statement associated with the 'parallel' region.
+ auto *CS = S.getCapturedStmt(OMPD_parallel);
+ Action.Enter(CGF);
+ auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ // TODO: Add support for clauses.
+ CGF.EmitStmt(CS->getCapturedStmt());
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
+ };
+ emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
+ emitPostUpdateForReductionClause(
+ CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetParallelDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetParallelRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
void CodeGenFunction::EmitOMPTargetParallelDirective(
const OMPTargetParallelDirective &S) {
- // TODO: codegen for target parallel.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetParallelRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitOMPTargetParallelForDirective(
@@ -3884,7 +4119,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
CodeGen);
};
- EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ if (Data.Nogroup)
+ EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ else {
+ CGM.getOpenMPRuntime().emitTaskgroupRegion(
+ *this,
+ [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ },
+ S.getLocStart());
+ }
}
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
index 1a09830..64b6d0d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
@@ -14,7 +14,7 @@
#include "CGCXXABI.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/RecordLayout.h"
#include "clang/CodeGen/CGFunctionInfo.h"
@@ -284,6 +284,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
if (isa<CXXDestructorDecl>(MD))
CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs);
+#ifndef NDEBUG
+ unsigned PrefixArgs = CallArgs.size() - 1;
+#endif
// Add the rest of the arguments.
for (const ParmVarDecl *PD : MD->parameters())
EmitDelegateCallArg(CallArgs, PD, SourceLocation());
@@ -292,7 +295,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
#ifndef NDEBUG
const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall(
- CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD));
+ CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD), PrefixArgs);
assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() &&
CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() &&
CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention());
@@ -376,12 +379,9 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
// Apply the standard set of call attributes.
unsigned CallingConv;
- CodeGen::AttributeListType AttributeList;
- CGM.ConstructAttributeList(CalleePtr->getName(),
- *CurFnInfo, MD, AttributeList,
+ llvm::AttributeList Attrs;
+ CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, Attrs,
CallingConv, /*AttrOnCallSite=*/true);
- llvm::AttributeSet Attrs =
- llvm::AttributeSet::get(getLLVMContext(), AttributeList);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
@@ -744,9 +744,10 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
switch (keyFunction->getTemplateSpecializationKind()) {
case TSK_Undeclared:
case TSK_ExplicitSpecialization:
- assert((def || CodeGenOpts.OptimizationLevel > 0) &&
- "Shouldn't query vtable linkage without key function or "
- "optimizations");
+ assert((def || CodeGenOpts.OptimizationLevel > 0 ||
+ CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo) &&
+ "Shouldn't query vtable linkage without key function, "
+ "optimizations, or debug info");
if (!def && CodeGenOpts.OptimizationLevel > 0)
return llvm::GlobalVariable::AvailableExternallyLinkage;
@@ -900,6 +901,8 @@ void CodeGenModule::EmitDeferredVTables() {
for (const CXXRecordDecl *RD : DeferredVTables)
if (shouldEmitVTableAtEndOfTranslationUnit(*this, RD))
VTables.GenerateClassData(RD);
+ else if (shouldOpportunisticallyEmitVTables())
+ OpportunisticVTables.push_back(RD);
assert(savedSize == DeferredVTables.size() &&
"deferred extra vtables during vtable emission?");
@@ -942,7 +945,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
const VTableLayout &VTLayout) {
- if (!getCodeGenOpts().PrepareForLTO)
+ if (!getCodeGenOpts().LTOUnit)
return;
CharUnits PointerWidth =
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
index 53a376d..b768eb8 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
@@ -146,6 +146,25 @@ static inline AlignmentSource getFieldAlignmentSource(AlignmentSource Source) {
return AlignmentSource::Decl;
}
+class LValueBaseInfo {
+ AlignmentSource AlignSource;
+ bool MayAlias;
+
+public:
+ explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type,
+ bool Alias = false)
+ : AlignSource(Source), MayAlias(Alias) {}
+ AlignmentSource getAlignmentSource() const { return AlignSource; }
+ void setAlignmentSource(AlignmentSource Source) { AlignSource = Source; }
+ bool getMayAlias() const { return MayAlias; }
+ void setMayAlias(bool Alias) { MayAlias = Alias; }
+
+ void mergeForCast(const LValueBaseInfo &Info) {
+ setAlignmentSource(Info.getAlignmentSource());
+ setMayAlias(getMayAlias() || Info.getMayAlias());
+ }
+};
+
/// LValue - This represents an lvalue references. Because C/C++ allow
/// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a
/// bitrange.
@@ -200,7 +219,7 @@ class LValue {
// to make the default bitfield pattern all-zeroes.
bool ImpreciseLifetime : 1;
- unsigned AlignSource : 2;
+ LValueBaseInfo BaseInfo;
// This flag shows if a nontemporal load/stores should be used when accessing
// this lvalue.
@@ -218,7 +237,7 @@ class LValue {
private:
void Initialize(QualType Type, Qualifiers Quals,
- CharUnits Alignment, AlignmentSource AlignSource,
+ CharUnits Alignment, LValueBaseInfo BaseInfo,
llvm::MDNode *TBAAInfo = nullptr) {
assert((!Alignment.isZero() || Type->isIncompleteType()) &&
"initializing l-value with zero alignment!");
@@ -227,7 +246,7 @@ private:
this->Alignment = Alignment.getQuantity();
assert(this->Alignment == Alignment.getQuantity() &&
"Alignment exceeds allowed max!");
- this->AlignSource = unsigned(AlignSource);
+ this->BaseInfo = BaseInfo;
// Initialize Objective-C flags.
this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
@@ -316,12 +335,8 @@ public:
CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); }
void setAlignment(CharUnits A) { Alignment = A.getQuantity(); }
- AlignmentSource getAlignmentSource() const {
- return AlignmentSource(AlignSource);
- }
- void setAlignmentSource(AlignmentSource Source) {
- AlignSource = unsigned(Source);
- }
+ LValueBaseInfo getBaseInfo() const { return BaseInfo; }
+ void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; }
// simple lvalue
llvm::Value *getPointer() const {
@@ -370,7 +385,7 @@ public:
static LValue MakeAddr(Address address, QualType type,
ASTContext &Context,
- AlignmentSource alignSource,
+ LValueBaseInfo BaseInfo,
llvm::MDNode *TBAAInfo = nullptr) {
Qualifiers qs = type.getQualifiers();
qs.setObjCGCAttr(Context.getObjCGCAttrKind(type));
@@ -379,29 +394,29 @@ public:
R.LVType = Simple;
assert(address.getPointer()->getType()->isPointerTy());
R.V = address.getPointer();
- R.Initialize(type, qs, address.getAlignment(), alignSource, TBAAInfo);
+ R.Initialize(type, qs, address.getAlignment(), BaseInfo, TBAAInfo);
return R;
}
static LValue MakeVectorElt(Address vecAddress, llvm::Value *Idx,
- QualType type, AlignmentSource alignSource) {
+ QualType type, LValueBaseInfo BaseInfo) {
LValue R;
R.LVType = VectorElt;
R.V = vecAddress.getPointer();
R.VectorIdx = Idx;
R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
- alignSource);
+ BaseInfo);
return R;
}
static LValue MakeExtVectorElt(Address vecAddress, llvm::Constant *Elts,
- QualType type, AlignmentSource alignSource) {
+ QualType type, LValueBaseInfo BaseInfo) {
LValue R;
R.LVType = ExtVectorElt;
R.V = vecAddress.getPointer();
R.VectorElts = Elts;
R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
- alignSource);
+ BaseInfo);
return R;
}
@@ -414,12 +429,12 @@ public:
static LValue MakeBitfield(Address Addr,
const CGBitFieldInfo &Info,
QualType type,
- AlignmentSource alignSource) {
+ LValueBaseInfo BaseInfo) {
LValue R;
R.LVType = BitField;
R.V = Addr.getPointer();
R.BitFieldInfo = &Info;
- R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), alignSource);
+ R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo);
return R;
}
@@ -428,7 +443,7 @@ public:
R.LVType = GlobalReg;
R.V = Reg.getPointer();
R.Initialize(type, type.getQualifiers(), Reg.getAlignment(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
return R;
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
index 166f44f..0735a9c 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
@@ -64,3 +64,19 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM,
returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes,
info, {}, args);
}
+
+llvm::FunctionType *
+CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) {
+ assert(FD != nullptr && "Expected a non-null function declaration!");
+ llvm::Type *T = CGM.getTypes().ConvertFunctionType(FD->getType(), FD);
+
+ if (auto FT = dyn_cast<llvm::FunctionType>(T))
+ return FT;
+
+ return nullptr;
+}
+
+llvm::Type *
+CodeGen::convertTypeForMemory(CodeGenModule &CGM, QualType T) {
+ return CGM.getTypes().ConvertTypeForMem(T);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
index 5f74141..4f03de5 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,7 +7,10 @@
//
//===----------------------------------------------------------------------===//
+#include "clang/CodeGen/CodeGenAction.h"
+#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
+#include "MacroPPCallbacks.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
@@ -16,15 +19,16 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/BackendUtil.h"
-#include "clang/CodeGen/CodeGenAction.h"
#include "clang/CodeGen/ModuleBuilder.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
@@ -35,12 +39,16 @@
#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Transforms/IPO/Internalize.h"
+
#include <memory>
using namespace clang;
using namespace llvm;
namespace clang {
class BackendConsumer : public ASTConsumer {
+ using LinkModule = CodeGenAction::LinkModule;
+
virtual void anchor();
DiagnosticsEngine &Diags;
BackendAction Action;
@@ -61,43 +69,39 @@ namespace clang {
std::unique_ptr<CodeGenerator> Gen;
- SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4>
- LinkModules;
+ SmallVector<LinkModule, 4> LinkModules;
// This is here so that the diagnostic printer knows the module a diagnostic
// refers to.
llvm::Module *CurLinkModule = nullptr;
public:
- BackendConsumer(
- BackendAction Action, DiagnosticsEngine &Diags,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts, const LangOptions &LangOpts,
- bool TimePasses, const std::string &InFile,
- const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
- std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
- CoverageSourceInfo *CoverageInfo = nullptr)
+ BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const PreprocessorOptions &PPOpts,
+ const CodeGenOptions &CodeGenOpts,
+ const TargetOptions &TargetOpts,
+ const LangOptions &LangOpts, bool TimePasses,
+ const std::string &InFile,
+ SmallVector<LinkModule, 4> LinkModules,
+ std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
+ CoverageSourceInfo *CoverageInfo = nullptr)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
AsmOutStream(std::move(OS)), Context(nullptr),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
- CodeGenOpts, C, CoverageInfo)) {
+ CodeGenOpts, C, CoverageInfo)),
+ LinkModules(std::move(LinkModules)) {
llvm::TimePassesIsEnabled = TimePasses;
- for (auto &I : LinkModules)
- this->LinkModules.push_back(
- std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second)));
}
llvm::Module *getModule() const { return Gen->GetModule(); }
std::unique_ptr<llvm::Module> takeModule() {
return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
}
- void releaseLinkModules() {
- for (auto &I : LinkModules)
- I.second.release();
- }
+
+ CodeGenerator *getCodeGenerator() { return Gen.get(); }
void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
Gen->HandleCXXStaticMemberVarInstantiation(VD);
@@ -159,6 +163,35 @@ namespace clang {
HandleTopLevelDecl(D);
}
+ // Links each entry in LinkModules into our module. Returns true on error.
+ bool LinkInModules() {
+ for (auto &LM : LinkModules) {
+ if (LM.PropagateAttrs)
+ for (Function &F : *LM.Module)
+ Gen->CGM().AddDefaultFnAttrs(F);
+
+ CurLinkModule = LM.Module.get();
+
+ bool Err;
+ if (LM.Internalize) {
+ Err = Linker::linkModules(
+ *getModule(), std::move(LM.Module), LM.LinkFlags,
+ [](llvm::Module &M, const llvm::StringSet<> &GVS) {
+ internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
+ return !GV.hasName() || (GVS.count(GV.getName()) == 0);
+ });
+ });
+ } else {
+ Err = Linker::linkModules(*getModule(), std::move(LM.Module),
+ LM.LinkFlags);
+ }
+
+ if (Err)
+ return true;
+ }
+ return false; // success
+ }
+
void HandleTranslationUnit(ASTContext &C) override {
{
PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
@@ -195,7 +228,10 @@ namespace clang {
Ctx.getDiagnosticHandler();
void *OldDiagnosticContext = Ctx.getDiagnosticContext();
Ctx.setDiagnosticHandler(DiagnosticHandler, this);
- Ctx.setDiagnosticHotnessRequested(CodeGenOpts.DiagnosticsWithHotness);
+ Ctx.setDiagnosticsHotnessRequested(CodeGenOpts.DiagnosticsWithHotness);
+ if (CodeGenOpts.DiagnosticsHotnessThreshold != 0)
+ Ctx.setDiagnosticsHotnessThreshold(
+ CodeGenOpts.DiagnosticsHotnessThreshold);
std::unique_ptr<llvm::tool_output_file> OptRecordFile;
if (!CodeGenOpts.OptRecordFile.empty()) {
@@ -213,16 +249,12 @@ namespace clang {
llvm::make_unique<yaml::Output>(OptRecordFile->os()));
if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
- Ctx.setDiagnosticHotnessRequested(true);
+ Ctx.setDiagnosticsHotnessRequested(true);
}
- // Link LinkModule into this module if present, preserving its validity.
- for (auto &I : LinkModules) {
- unsigned LinkFlags = I.first;
- CurLinkModule = I.second.get();
- if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags))
- return;
- }
+ // Link each LinkModule into our module.
+ if (LinkInModules())
+ return;
EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
@@ -275,7 +307,7 @@ namespace clang {
/// Get the best possible source location to represent a diagnostic that
/// may have associated debug info.
const FullSourceLoc
- getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithDebugLocBase &D,
+ getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithLocationBase &D,
bool &BadDebugInfo, StringRef &Filename,
unsigned &Line, unsigned &Column) const;
@@ -298,9 +330,8 @@ namespace clang {
/// them.
void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D,
unsigned DiagID);
- void OptimizationRemarkHandler(const llvm::OptimizationRemark &D);
- void OptimizationRemarkHandler(const llvm::OptimizationRemarkMissed &D);
- void OptimizationRemarkHandler(const llvm::OptimizationRemarkAnalysis &D);
+ void
+ OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D);
void OptimizationRemarkHandler(
const llvm::OptimizationRemarkAnalysisFPCommute &D);
void OptimizationRemarkHandler(
@@ -308,7 +339,7 @@ namespace clang {
void OptimizationFailureHandler(
const llvm::DiagnosticInfoOptimizationFailure &D);
};
-
+
void BackendConsumer::anchor() {}
}
@@ -377,7 +408,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D,
// code.
if (LocCookie.isValid()) {
Diags.Report(LocCookie, DiagID).AddString(Message);
-
+
if (D.getLoc().isValid()) {
DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here);
// Convert the SMDiagnostic ranges into SourceRange and attach them
@@ -390,7 +421,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D,
}
return;
}
-
+
// Otherwise, report the backend issue as occurring in the generated .s file.
// If Loc is invalid, we still need to report the issue, it just gets no
// location info.
@@ -477,8 +508,8 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
}
const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
- const llvm::DiagnosticInfoWithDebugLocBase &D, bool &BadDebugInfo, StringRef &Filename,
- unsigned &Line, unsigned &Column) const {
+ const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo,
+ StringRef &Filename, unsigned &Line, unsigned &Column) const {
SourceManager &SourceMgr = Context->getSourceManager();
FileManager &FileMgr = SourceMgr.getFileManager();
SourceLocation DILoc;
@@ -520,9 +551,9 @@ void BackendConsumer::UnsupportedDiagHandler(
StringRef Filename;
unsigned Line, Column;
- bool BadDebugInfo;
- FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
- Line, Column);
+ bool BadDebugInfo = false;
+ FullSourceLoc Loc =
+ getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column);
Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str();
@@ -544,8 +575,8 @@ void BackendConsumer::EmitOptimizationMessage(
StringRef Filename;
unsigned Line, Column;
bool BadDebugInfo = false;
- FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
- Line, Column);
+ FullSourceLoc Loc =
+ getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column);
std::string Msg;
raw_string_ostream MsgStream(Msg);
@@ -568,36 +599,34 @@ void BackendConsumer::EmitOptimizationMessage(
}
void BackendConsumer::OptimizationRemarkHandler(
- const llvm::OptimizationRemark &D) {
- // Optimization remarks are active only if the -Rpass flag has a regular
- // expression that matches the name of the pass name in \p D.
- if (CodeGenOpts.OptimizationRemarkPattern &&
- CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName()))
- EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark);
-}
-
-void BackendConsumer::OptimizationRemarkHandler(
- const llvm::OptimizationRemarkMissed &D) {
- // Missed optimization remarks are active only if the -Rpass-missed
- // flag has a regular expression that matches the name of the pass
- // name in \p D.
- if (CodeGenOpts.OptimizationRemarkMissedPattern &&
- CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName()))
- EmitOptimizationMessage(D,
- diag::remark_fe_backend_optimization_remark_missed);
-}
-
-void BackendConsumer::OptimizationRemarkHandler(
- const llvm::OptimizationRemarkAnalysis &D) {
- // Optimization analysis remarks are active if the pass name is set to
- // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
- // regular expression that matches the name of the pass name in \p D.
-
- if (D.shouldAlwaysPrint() ||
- (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
- CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
- EmitOptimizationMessage(
- D, diag::remark_fe_backend_optimization_remark_analysis);
+ const llvm::DiagnosticInfoOptimizationBase &D) {
+ if (D.isPassed()) {
+ // Optimization remarks are active only if the -Rpass flag has a regular
+ // expression that matches the name of the pass name in \p D.
+ if (CodeGenOpts.OptimizationRemarkPattern &&
+ CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName()))
+ EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark);
+ } else if (D.isMissed()) {
+ // Missed optimization remarks are active only if the -Rpass-missed
+ // flag has a regular expression that matches the name of the pass
+ // name in \p D.
+ if (CodeGenOpts.OptimizationRemarkMissedPattern &&
+ CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName()))
+ EmitOptimizationMessage(
+ D, diag::remark_fe_backend_optimization_remark_missed);
+ } else {
+ assert(D.isAnalysis() && "Unknown remark type");
+
+ bool ShouldAlwaysPrint = false;
+ if (auto *ORA = dyn_cast<llvm::OptimizationRemarkAnalysis>(&D))
+ ShouldAlwaysPrint = ORA->shouldAlwaysPrint();
+
+ if (ShouldAlwaysPrint ||
+ (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
+ CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
+ EmitOptimizationMessage(
+ D, diag::remark_fe_backend_optimization_remark_analysis);
+ }
}
void BackendConsumer::OptimizationRemarkHandler(
@@ -680,6 +709,21 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
// handler. There is no generic way of emitting them.
OptimizationRemarkHandler(cast<OptimizationRemarkAnalysisAliasing>(DI));
return;
+ case llvm::DK_MachineOptimizationRemark:
+ // Optimization remarks are always handled completely by this
+ // handler. There is no generic way of emitting them.
+ OptimizationRemarkHandler(cast<MachineOptimizationRemark>(DI));
+ return;
+ case llvm::DK_MachineOptimizationRemarkMissed:
+ // Optimization remarks are always handled completely by this
+ // handler. There is no generic way of emitting them.
+ OptimizationRemarkHandler(cast<MachineOptimizationRemarkMissed>(DI));
+ return;
+ case llvm::DK_MachineOptimizationRemarkAnalysis:
+ // Optimization remarks are always handled completely by this
+ // handler. There is no generic way of emitting them.
+ OptimizationRemarkHandler(cast<MachineOptimizationRemarkAnalysis>(DI));
+ return;
case llvm::DK_OptimizationFailure:
// Optimization failures are always handled completely by this
// handler.
@@ -729,10 +773,6 @@ void CodeGenAction::EndSourceFileAction() {
if (!getCompilerInstance().hasASTConsumer())
return;
- // Take back ownership of link modules we passed to consumer.
- if (!LinkModules.empty())
- BEConsumer->releaseLinkModules();
-
// Steal the module from the consumer.
TheModule = BEConsumer->takeModule();
}
@@ -775,13 +815,12 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
// Load bitcode modules to link with, if we need to.
if (LinkModules.empty())
- for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) {
- const std::string &LinkBCFile = I.second;
-
- auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile);
+ for (const CodeGenOptions::BitcodeFileToLink &F :
+ CI.getCodeGenOpts().LinkBitcodeFiles) {
+ auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
if (!BCBuf) {
CI.getDiagnostics().Report(diag::err_cannot_open_file)
- << LinkBCFile << BCBuf.getError().message();
+ << F.Filename << BCBuf.getError().message();
LinkModules.clear();
return nullptr;
}
@@ -791,12 +830,13 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
if (!ModuleOrErr) {
handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
CI.getDiagnostics().Report(diag::err_cannot_open_file)
- << LinkBCFile << EIB.message();
+ << F.Filename << EIB.message();
});
LinkModules.clear();
return nullptr;
}
- addLinkModule(ModuleOrErr.get().release(), I.first);
+ LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
}
CoverageSourceInfo *CoverageInfo = nullptr;
@@ -810,9 +850,20 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
- CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules,
- std::move(OS), *VMContext, CoverageInfo));
+ CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
+ std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
BEConsumer = Result.get();
+
+ // Enable generating macro debug info only when debug info is not disabled and
+ // also macro debug info is enabled.
+ if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo &&
+ CI.getCodeGenOpts().MacroDebugInfo) {
+ std::unique_ptr<PPCallbacks> Callbacks =
+ llvm::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(),
+ CI.getPreprocessor());
+ CI.getPreprocessor().addPPCallbacks(std::move(Callbacks));
+ }
+
return std::move(Result);
}
@@ -838,9 +889,65 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM,
Diags->Report(DiagID).AddString("cannot compile inline asm");
}
+std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) {
+ CompilerInstance &CI = getCompilerInstance();
+ SourceManager &SM = CI.getSourceManager();
+
+ // For ThinLTO backend invocations, ensure that the context
+ // merges types based on ODR identifiers. We also need to read
+ // the correct module out of a multi-module bitcode file.
+ if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) {
+ VMContext->enableDebugTypeODRUniquing();
+
+ auto DiagErrors = [&](Error E) -> std::unique_ptr<llvm::Module> {
+ unsigned DiagID =
+ CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
+ handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
+ CI.getDiagnostics().Report(DiagID) << EIB.message();
+ });
+ return {};
+ };
+
+ Expected<llvm::BitcodeModule> BMOrErr = FindThinLTOModule(MBRef);
+ if (!BMOrErr)
+ return DiagErrors(BMOrErr.takeError());
+
+ Expected<std::unique_ptr<llvm::Module>> MOrErr =
+ BMOrErr->parseModule(*VMContext);
+ if (!MOrErr)
+ return DiagErrors(MOrErr.takeError());
+ return std::move(*MOrErr);
+ }
+
+ llvm::SMDiagnostic Err;
+ if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext))
+ return M;
+
+ // Translate from the diagnostic info to the SourceManager location if
+ // available.
+ // TODO: Unify this with ConvertBackendLocation()
+ SourceLocation Loc;
+ if (Err.getLineNo() > 0) {
+ assert(Err.getColumnNo() >= 0);
+ Loc = SM.translateFileLineCol(SM.getFileEntryForID(SM.getMainFileID()),
+ Err.getLineNo(), Err.getColumnNo() + 1);
+ }
+
+ // Strip off a leading diagnostic code if there is one.
+ StringRef Msg = Err.getMessage();
+ if (Msg.startswith("error: "))
+ Msg = Msg.substr(7);
+
+ unsigned DiagID =
+ CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
+
+ CI.getDiagnostics().Report(Loc, DiagID) << Msg;
+ return {};
+}
+
void CodeGenAction::ExecuteAction() {
// If this is an IR file, we have to treat it specially.
- if (getCurrentFileKind() == IK_LLVM_IR) {
+ if (getCurrentFileKind().getLanguage() == InputKind::LLVM_IR) {
BackendAction BA = static_cast<BackendAction>(Act);
CompilerInstance &CI = getCompilerInstance();
std::unique_ptr<raw_pwrite_stream> OS =
@@ -855,35 +962,10 @@ void CodeGenAction::ExecuteAction() {
if (Invalid)
return;
- // For ThinLTO backend invocations, ensure that the context
- // merges types based on ODR identifiers.
- if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty())
- VMContext->enableDebugTypeODRUniquing();
-
- llvm::SMDiagnostic Err;
- TheModule = parseIR(MainFile->getMemBufferRef(), Err, *VMContext);
- if (!TheModule) {
- // Translate from the diagnostic info to the SourceManager location if
- // available.
- // TODO: Unify this with ConvertBackendLocation()
- SourceLocation Loc;
- if (Err.getLineNo() > 0) {
- assert(Err.getColumnNo() >= 0);
- Loc = SM.translateFileLineCol(SM.getFileEntryForID(FID),
- Err.getLineNo(), Err.getColumnNo() + 1);
- }
-
- // Strip off a leading diagnostic code if there is one.
- StringRef Msg = Err.getMessage();
- if (Msg.startswith("error: "))
- Msg = Msg.substr(7);
-
- unsigned DiagID =
- CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
-
- CI.getDiagnostics().Report(Loc, DiagID) << Msg;
+ TheModule = loadModule(*MainFile);
+ if (!TheModule)
return;
- }
+
const TargetOptions &TargetOpts = CI.getTargetOpts();
if (TheModule->getTargetTriple() != TargetOpts.Triple) {
CI.getDiagnostics().Report(SourceLocation(),
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index e142a21..c23b25e 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -22,6 +22,7 @@
#include "CodeGenPGO.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTLambda.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/StmtCXX.h"
@@ -45,15 +46,15 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
if (CGOpts.DisableLifetimeMarkers)
return false;
- // Asan uses markers for use-after-scope checks.
- if (CGOpts.SanitizeAddressUseAfterScope)
- return true;
-
// Disable lifetime markers in msan builds.
// FIXME: Remove this when msan works with lifetime markers.
if (LangOpts.Sanitize.has(SanitizerKind::Memory))
return false;
+ // Asan uses markers for use-after-scope checks.
+ if (CGOpts.SanitizeAddressUseAfterScope)
+ return true;
+
// For now, only in optimized builds.
return CGOpts.OptimizationLevel != 0;
}
@@ -117,25 +118,27 @@ CodeGenFunction::~CodeGenFunction() {
}
CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T,
- AlignmentSource *Source) {
- return getNaturalTypeAlignment(T->getPointeeType(), Source,
+ LValueBaseInfo *BaseInfo) {
+ return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo,
/*forPointee*/ true);
}
CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
- AlignmentSource *Source,
+ LValueBaseInfo *BaseInfo,
bool forPointeeType) {
// Honor alignment typedef attributes even on incomplete types.
// We also honor them straight for C++ class types, even as pointees;
// there's an expressivity gap here.
if (auto TT = T->getAs<TypedefType>()) {
if (auto Align = TT->getDecl()->getMaxAlignment()) {
- if (Source) *Source = AlignmentSource::AttributedType;
+ if (BaseInfo)
+ *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType, false);
return getContext().toCharUnitsFromBits(Align);
}
}
- if (Source) *Source = AlignmentSource::Type;
+ if (BaseInfo)
+ *BaseInfo = LValueBaseInfo(AlignmentSource::Type, false);
CharUnits Alignment;
if (T->isIncompleteType()) {
@@ -149,6 +152,8 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
Alignment = CGM.getClassPointerAlignment(RD);
} else {
Alignment = getContext().getTypeAlignInChars(T);
+ if (T.getQualifiers().hasUnaligned())
+ Alignment = CharUnits::One();
}
// Cap to the global maximum type alignment unless the alignment
@@ -163,9 +168,9 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
}
LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
- AlignmentSource AlignSource;
- CharUnits Alignment = getNaturalTypeAlignment(T, &AlignSource);
- return LValue::MakeAddr(Address(V, Alignment), T, getContext(), AlignSource,
+ LValueBaseInfo BaseInfo;
+ CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo);
+ return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo,
CGM.getTBAAInfo(T));
}
@@ -173,9 +178,9 @@ LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
/// construct an l-value with the natural pointee alignment of T.
LValue
CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) {
- AlignmentSource AlignSource;
- CharUnits Align = getNaturalTypeAlignment(T, &AlignSource, /*pointee*/ true);
- return MakeAddrLValue(Address(V, Align), T, AlignSource);
+ LValueBaseInfo BaseInfo;
+ CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, /*pointee*/ true);
+ return MakeAddrLValue(Address(V, Align), T, BaseInfo);
}
@@ -200,7 +205,8 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
llvm_unreachable("non-canonical or dependent type in IR-generation");
case Type::Auto:
- llvm_unreachable("undeduced auto type in IR-generation");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("undeduced type in IR-generation");
// Various scalar types.
case Type::Builtin:
@@ -343,7 +349,7 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// Emit debug descriptor for function end.
if (CGDebugInfo *DI = getDebugInfo())
- DI->EmitFunctionEnd(Builder);
+ DI->EmitFunctionEnd(Builder, CurFn);
// Reset the debug location to that of the simple 'return' expression, if any
// rather than that of the end of the function's scope '}'.
@@ -607,11 +613,6 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
argBaseTypeNames.push_back(llvm::MDString::get(Context, baseTypeName));
- // Get argument type qualifiers:
- if (ty.isConstQualified())
- typeQuals = "const";
- if (ty.isVolatileQualified())
- typeQuals += typeQuals.empty() ? "volatile" : " volatile";
if (isPipe)
typeQuals = "pipe";
}
@@ -660,34 +661,42 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext());
if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
- QualType hintQTy = A->getTypeHint();
- const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
- bool isSignedInteger =
- hintQTy->isSignedIntegerType() ||
- (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
- llvm::Metadata *attrMDArgs[] = {
+ QualType HintQTy = A->getTypeHint();
+ const ExtVectorType *HintEltQTy = HintQTy->getAs<ExtVectorType>();
+ bool IsSignedInteger =
+ HintQTy->isSignedIntegerType() ||
+ (HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType());
+ llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
CGM.getTypes().ConvertType(A->getTypeHint()))),
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
llvm::IntegerType::get(Context, 32),
- llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
- Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs));
+ llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))};
+ Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs));
}
if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
- llvm::Metadata *attrMDArgs[] = {
+ llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
- Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs));
+ Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs));
}
if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
- llvm::Metadata *attrMDArgs[] = {
+ llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
- Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs));
+ Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs));
+ }
+
+ if (const OpenCLIntelReqdSubGroupSizeAttr *A =
+ FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
+ llvm::Metadata *AttrMDArgs[] = {
+ llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))};
+ Fn->setMetadata("intel_reqd_sub_group_size",
+ llvm::MDNode::get(Context, AttrMDArgs));
}
}
@@ -707,6 +716,11 @@ static bool endsWithReturn(const Decl* F) {
return false;
}
+static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) {
+ Fn->addFnAttr("sanitize_thread_no_checking_at_run_time");
+ Fn->removeFnAttr(llvm::Attribute::SanitizeThread);
+}
+
void CodeGenFunction::StartFunction(GlobalDecl GD,
QualType RetTy,
llvm::Function *Fn,
@@ -750,16 +764,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Fn->addFnAttr(llvm::Attribute::SafeStack);
// Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize,
- // .cxx_destruct and all of their calees at run time.
+ // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time.
if (SanOpts.has(SanitizerKind::Thread)) {
if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) {
IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0);
if (OMD->getMethodFamily() == OMF_dealloc ||
OMD->getMethodFamily() == OMF_initialize ||
(OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) {
- Fn->addFnAttr("sanitize_thread_no_checking_at_run_time");
- Fn->removeFnAttr(llvm::Attribute::SanitizeThread);
+ markAsIgnoreThreadCheckingAtRuntime(Fn);
}
+ } else if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ IdentifierInfo *II = FD->getIdentifier();
+ if (II && II->isStr("__destroy_helper_block_"))
+ markAsIgnoreThreadCheckingAtRuntime(Fn);
}
}
@@ -770,10 +787,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Fn->addFnAttr("function-instrument", "xray-always");
if (XRayAttr->neverXRayInstrument())
Fn->addFnAttr("function-instrument", "xray-never");
+ if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) {
+ Fn->addFnAttr("xray-log-args",
+ llvm::utostr(LogArgs->getArgumentCount()));
+ }
} else {
- Fn->addFnAttr(
- "xray-instruction-threshold",
- llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
+ if (!CGM.imbueXRayAttrs(Fn, Loc))
+ Fn->addFnAttr(
+ "xray-instruction-threshold",
+ llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
}
}
@@ -807,6 +829,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
}
}
+ // If we're checking nullability, we need to know whether we can check the
+ // return value. Initialize the flag to 'true' and refine it in EmitParmDecl.
+ if (SanOpts.has(SanitizerKind::NullabilityReturn)) {
+ auto Nullability = FnRetTy->getNullability(getContext());
+ if (Nullability && *Nullability == NullabilityKind::NonNull) {
+ if (!(SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) &&
+ CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>()))
+ RetValNullabilityPrecondition =
+ llvm::ConstantInt::getTrue(getLLVMContext());
+ }
+ }
+
// If we're in C++ mode and the function name is "main", it is guaranteed
// to be norecurse by the standard (3.6.1.3 "The function main shall not be
// used within a program").
@@ -827,6 +861,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Builder.SetInsertPoint(EntryBB);
+ // If we're checking the return value, allocate space for a pointer to a
+ // precise source location of the checked return statement.
+ if (requiresReturnValueCheck()) {
+ ReturnLocation = CreateDefaultAlignTempAlloca(Int8PtrTy, "return.sloc.ptr");
+ InitTempAlloca(ReturnLocation, llvm::ConstantPointerNull::get(Int8PtrTy));
+ }
+
// Emit subprogram debug descriptor.
if (CGDebugInfo *DI = getDebugInfo()) {
// Reconstruct the type from the argument list so that implicit parameters,
@@ -851,8 +892,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// inlining, we just add an attribute to insert a mcount call in backend.
// The attribute "counting-function" is set to mcount function name which is
// architecture dependent.
- if (CGM.getCodeGenOpts().InstrumentForProfiling)
- Fn->addFnAttr("counting-function", getTarget().getMCountName());
+ if (CGM.getCodeGenOpts().InstrumentForProfiling) {
+ if (CGM.getCodeGenOpts().CallFEntry)
+ Fn->addFnAttr("fentry-call", "true");
+ else {
+ if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>())
+ Fn->addFnAttr("counting-function", getTarget().getMCountName());
+ }
+ }
if (RetTy->isVoidType()) {
// Void type; nothing to return.
@@ -935,6 +982,27 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// fast register allocator would be happier...
CXXThisValue = CXXABIThisValue;
}
+
+ // Check the 'this' pointer once per function, if it's available.
+ if (CXXABIThisValue) {
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::ObjectSize, true);
+ QualType ThisTy = MD->getThisType(getContext());
+
+ // If this is the call operator of a lambda with no capture-default, it
+ // may have a static invoker function, which may call this operator with
+ // a null 'this' pointer.
+ if (isLambdaCallOperator(MD) &&
+ cast<CXXRecordDecl>(MD->getParent())->getLambdaCaptureDefault() ==
+ LCD_None)
+ SkippedChecks.set(SanitizerKind::Null, true);
+
+ EmitTypeCheck(isa<CXXConstructorDecl>(MD) ? TCK_ConstructorCall
+ : TCK_MemberCall,
+ Loc, CXXABIThisValue, ThisTy,
+ getContext().getTypeAlignInChars(ThisTy->getPointeeType()),
+ SkippedChecks);
+ }
}
// If any of the arguments have a variably modified type, make sure to
@@ -1036,10 +1104,9 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
if (!Param->hasAttr<PassObjectSizeAttr>())
continue;
- IdentifierInfo *NoID = nullptr;
auto *Implicit = ImplicitParamDecl::Create(
- getContext(), Param->getDeclContext(), Param->getLocation(), NoID,
- getContext().getSizeType());
+ getContext(), Param->getDeclContext(), Param->getLocation(),
+ /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamDecl::Other);
SizeArguments[Param] = Implicit;
Args.push_back(Implicit);
}
@@ -1076,8 +1143,13 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
if (FD->hasAttr<NoDebugAttr>())
DebugInfo = nullptr; // disable debug info indefinitely for this function
+ // The function might not have a body if we're generating thunks for a
+ // function declaration.
SourceRange BodyRange;
- if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
+ if (Stmt *Body = FD->getBody())
+ BodyRange = Body->getSourceRange();
+ else
+ BodyRange = FD->getLocation();
CurEHLocation = BodyRange.getEnd();
// Use the location of the start of the function to determine where
@@ -1891,6 +1963,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
case Type::Typedef:
case Type::Decltype:
case Type::Auto:
+ case Type::DeducedTemplateSpecialization:
// Stop walking: nothing to do.
return;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
index 5861340..6a1fa48 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
@@ -115,9 +115,12 @@ enum TypeEvaluationKind {
SANITIZER_CHECK(MissingReturn, missing_return, 0) \
SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \
SANITIZER_CHECK(NegateOverflow, negate_overflow, 0) \
+ SANITIZER_CHECK(NullabilityArg, nullability_arg, 0) \
+ SANITIZER_CHECK(NullabilityReturn, nullability_return, 1) \
SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \
- SANITIZER_CHECK(NonnullReturn, nonnull_return, 0) \
+ SANITIZER_CHECK(NonnullReturn, nonnull_return, 1) \
SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \
+ SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0) \
SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \
SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \
SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \
@@ -173,6 +176,25 @@ public:
// because of jumps.
VarBypassDetector Bypasses;
+ // CodeGen lambda for loops and support for ordered clause
+ typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &,
+ JumpDest)>
+ CodeGenLoopTy;
+ typedef llvm::function_ref<void(CodeGenFunction &, SourceLocation,
+ const unsigned, const bool)>
+ CodeGenOrderedTy;
+
+ // Codegen lambda for loop bounds in worksharing loop constructs
+ typedef llvm::function_ref<std::pair<LValue, LValue>(
+ CodeGenFunction &, const OMPExecutableDirective &S)>
+ CodeGenLoopBoundsTy;
+
+ // Codegen lambda for loop bounds in dispatch-based loop implementation
+ typedef llvm::function_ref<std::pair<llvm::Value *, llvm::Value *>(
+ CodeGenFunction &, const OMPExecutableDirective &S, Address LB,
+ Address UB)>
+ CodeGenDispatchBoundsTy;
+
/// \brief CGBuilder insert helper. This function is called after an
/// instruction is created using Builder.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
@@ -212,6 +234,13 @@ public:
/// value. This is invalid iff the function has no return value.
Address ReturnValue;
+ /// Return true if a label was seen in the current scope.
+ bool hasLabelBeenSeenInCurrentScope() const {
+ if (CurLexicalScope)
+ return CurLexicalScope->hasLabels();
+ return !LabelMap.empty();
+ }
+
/// AllocaInsertPoint - This is an instruction in the entry block before which
/// we prefer to insert allocas.
llvm::AssertingVH<llvm::Instruction> AllocaInsertPt;
@@ -298,6 +327,31 @@ public:
~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; }
};
+ /// An abstract representation of regular/ObjC call/message targets.
+ class AbstractCallee {
+ /// The function declaration of the callee.
+ const Decl *CalleeDecl;
+
+ public:
+ AbstractCallee() : CalleeDecl(nullptr) {}
+ AbstractCallee(const FunctionDecl *FD) : CalleeDecl(FD) {}
+ AbstractCallee(const ObjCMethodDecl *OMD) : CalleeDecl(OMD) {}
+ bool hasFunctionDecl() const {
+ return dyn_cast_or_null<FunctionDecl>(CalleeDecl);
+ }
+ const Decl *getDecl() const { return CalleeDecl; }
+ unsigned getNumParams() const {
+ if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl))
+ return FD->getNumParams();
+ return cast<ObjCMethodDecl>(CalleeDecl)->param_size();
+ }
+ const ParmVarDecl *getParamDecl(unsigned I) const {
+ if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl))
+ return FD->getParamDecl(I);
+ return *(cast<ObjCMethodDecl>(CalleeDecl)->param_begin() + I);
+ }
+ };
+
/// \brief Sanitizers enabled for this function.
SanitizerSet SanOpts;
@@ -548,14 +602,10 @@ public:
CGF.DidCallStackSave = false;
}
- /// \brief Exit this cleanup scope, emitting any accumulated
- /// cleanups.
+ /// \brief Exit this cleanup scope, emitting any accumulated cleanups.
~RunCleanupsScope() {
- if (PerformCleanup) {
- CGF.DidCallStackSave = OldDidCallStackSave;
- CGF.PopCleanupBlocks(CleanupStackDepth,
- LifetimeExtendedCleanupStackSize);
- }
+ if (PerformCleanup)
+ ForceCleanup();
}
/// \brief Determine whether this scope requires any cleanups.
@@ -565,11 +615,15 @@ public:
/// \brief Force the emission of cleanups now, instead of waiting
/// until this object is destroyed.
- void ForceCleanup() {
+ /// \param ValuesToReload - A list of values that need to be available at
+ /// the insertion point after cleanup emission. If cleanup emission created
+ /// a shared cleanup block, these value pointers will be rewritten.
+ /// Otherwise, they not will be modified.
+ void ForceCleanup(std::initializer_list<llvm::Value**> ValuesToReload = {}) {
assert(PerformCleanup && "Already forced cleanup");
CGF.DidCallStackSave = OldDidCallStackSave;
- CGF.PopCleanupBlocks(CleanupStackDepth,
- LifetimeExtendedCleanupStackSize);
+ CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize,
+ ValuesToReload);
PerformCleanup = false;
}
};
@@ -620,6 +674,10 @@ public:
rescopeLabels();
}
+ bool hasLabels() const {
+ return !Labels.empty();
+ }
+
void rescopeLabels();
};
@@ -727,13 +785,17 @@ public:
/// \brief Takes the old cleanup stack size and emits the cleanup blocks
/// that have been added.
- void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize);
+ void
+ PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
+ std::initializer_list<llvm::Value **> ValuesToReload = {});
/// \brief Takes the old cleanup stack size and emits the cleanup blocks
/// that have been added, then adds all lifetime-extended cleanups from
/// the given position to the stack.
- void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
- size_t OldLifetimeExtendedStackSize);
+ void
+ PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
+ size_t OldLifetimeExtendedStackSize,
+ std::initializer_list<llvm::Value **> ValuesToReload = {});
void ResolveBranchFixups(llvm::BasicBlock *Target);
@@ -1054,7 +1116,7 @@ private:
auto IP = CGF.Builder.saveAndClearIP();
CGF.EmitBlock(Stack.back().ExitBlock.getBlock());
CodeGen(CGF);
- CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
+ CGF.EmitBranch(Stack.back().ContBlock.getBlock());
CGF.Builder.restoreIP(IP);
Stack.back().HasBeenEmitted = true;
}
@@ -1116,10 +1178,11 @@ private:
uint64_t LoopCount);
public:
- /// Increment the profiler's counter for the given statement.
- void incrementProfileCounter(const Stmt *S) {
+ /// Increment the profiler's counter for the given statement by \p StepV.
+ /// If \p StepV is null, the default increment is 1.
+ void incrementProfileCounter(const Stmt *S, llvm::Value *StepV = nullptr) {
if (CGM.getCodeGenOpts().hasProfileClangInstr())
- PGO.emitCounterIncrement(Builder, S);
+ PGO.emitCounterIncrement(Builder, S, StepV);
PGO.setCurrentStmt(S);
}
@@ -1334,6 +1397,27 @@ private:
/// information about the layout of the variable.
llvm::DenseMap<const ValueDecl *, BlockByrefInfo> BlockByrefInfos;
+ /// Used by -fsanitize=nullability-return to determine whether the return
+ /// value can be checked.
+ llvm::Value *RetValNullabilityPrecondition = nullptr;
+
+ /// Check if -fsanitize=nullability-return instrumentation is required for
+ /// this function.
+ bool requiresReturnValueNullabilityCheck() const {
+ return RetValNullabilityPrecondition;
+ }
+
+ /// Used to store precise source locations for return statements by the
+ /// runtime return value checks.
+ Address ReturnLocation = Address::invalid();
+
+ /// Check if the return value of this function requires sanitization.
+ bool requiresReturnValueCheck() const {
+ return requiresReturnValueNullabilityCheck() ||
+ (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) &&
+ CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>());
+ }
+
llvm::BasicBlock *TerminateLandingPad;
llvm::BasicBlock *TerminateHandler;
llvm::BasicBlock *TrapBB;
@@ -1341,16 +1425,8 @@ private:
/// True if we need emit the life-time markers.
const bool ShouldEmitLifetimeMarkers;
- /// Add a kernel metadata node to the named metadata node 'opencl.kernels'.
- /// In the kernel metadata node, reference the kernel function and metadata
- /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2):
- /// - A node for the vec_type_hint(<type>) qualifier contains string
- /// "vec_type_hint", an undefined value of the <type> data type,
- /// and a Boolean that is true if the <type> is integer and signed.
- /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string
- /// "work_group_size_hint", and three 32-bit integers X, Y and Z.
- /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string
- /// "reqd_work_group_size", and three 32-bit integers X, Y and Z.
+ /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to
+ /// the function metadata.
void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::Function *Fn);
@@ -1403,6 +1479,9 @@ public:
const TargetInfo &getTarget() const { return Target; }
llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); }
+ const TargetCodeGenInfo &getTargetHooks() const {
+ return CGM.getTargetCodeGenInfo();
+ }
//===--------------------------------------------------------------------===//
// Cleanups
@@ -1553,6 +1632,8 @@ public:
SourceLocation Loc = SourceLocation(),
SourceLocation StartLoc = SourceLocation());
+ static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor);
+
void EmitConstructorBody(FunctionArgList &Args);
void EmitDestructorBody(FunctionArgList &Args);
void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
@@ -1671,11 +1752,6 @@ public:
llvm::Value *EmitVTableTypeCheckedLoad(const CXXRecordDecl *RD, llvm::Value *VTable,
uint64_t VTableByteOffset);
- /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given
- /// expr can be devirtualized.
- bool CanDevirtualizeMemberFunctionCall(const Expr *Base,
- const CXXMethodDecl *MD);
-
/// EnterDtorCleanups - Enter the cleanups necessary to complete the
/// given phase of destruction for a destructor. The end result
/// should call destructors on members and base classes in reverse
@@ -1710,6 +1786,9 @@ public:
void EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc,
SourceLocation EndLoc);
+ /// Emit a test that checks if the return value \p RV is nonnull.
+ void EmitReturnValueCheck(llvm::Value *RV);
+
/// EmitStartEHSpec - Emit the start of the exception spec.
void EmitStartEHSpec(const Decl *D);
@@ -1817,40 +1896,65 @@ public:
//===--------------------------------------------------------------------===//
LValue MakeAddrLValue(Address Addr, QualType T,
- AlignmentSource AlignSource = AlignmentSource::Type) {
- return LValue::MakeAddr(Addr, T, getContext(), AlignSource,
+ LValueBaseInfo BaseInfo =
+ LValueBaseInfo(AlignmentSource::Type)) {
+ return LValue::MakeAddr(Addr, T, getContext(), BaseInfo,
CGM.getTBAAInfo(T));
}
LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment,
- AlignmentSource AlignSource = AlignmentSource::Type) {
+ LValueBaseInfo BaseInfo =
+ LValueBaseInfo(AlignmentSource::Type)) {
return LValue::MakeAddr(Address(V, Alignment), T, getContext(),
- AlignSource, CGM.getTBAAInfo(T));
+ BaseInfo, CGM.getTBAAInfo(T));
}
LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T);
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T);
CharUnits getNaturalTypeAlignment(QualType T,
- AlignmentSource *Source = nullptr,
+ LValueBaseInfo *BaseInfo = nullptr,
bool forPointeeType = false);
CharUnits getNaturalPointeeTypeAlignment(QualType T,
- AlignmentSource *Source = nullptr);
+ LValueBaseInfo *BaseInfo = nullptr);
Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy,
- AlignmentSource *Source = nullptr);
+ LValueBaseInfo *BaseInfo = nullptr);
LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy);
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy,
- AlignmentSource *Source = nullptr);
+ LValueBaseInfo *BaseInfo = nullptr);
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
- /// CreateTempAlloca - This creates a alloca and inserts it into the entry
- /// block. The caller is responsible for setting an appropriate alignment on
+ /// CreateTempAlloca - This creates an alloca and inserts it into the entry
+ /// block if \p ArraySize is nullptr, otherwise inserts it at the current
+ /// insertion point of the builder. The caller is responsible for setting an
+ /// appropriate alignment on
/// the alloca.
- llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty,
- const Twine &Name = "tmp");
+ ///
+ /// \p ArraySize is the number of array elements to be allocated if it
+ /// is not nullptr.
+ ///
+ /// LangAS::Default is the address space of pointers to local variables and
+ /// temporaries, as exposed in the source language. In certain
+ /// configurations, this is not the same as the alloca address space, and a
+ /// cast is needed to lift the pointer from the alloca AS into
+ /// LangAS::Default. This can happen when the target uses a restricted
+ /// address space for the stack but the source language requires
+ /// LangAS::Default to be a generic address space. The latter condition is
+ /// common for most programming languages; OpenCL is an exception in that
+ /// LangAS::Default is the private address space, which naturally maps
+ /// to the stack.
+ ///
+ /// Because the address of a temporary is often exposed to the program in
+ /// various ways, this function will perform the cast by default. The cast
+ /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is
+ /// more efficient if the caller knows that the address will not be exposed.
+ llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
+ llvm::Value *ArraySize = nullptr);
Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
- const Twine &Name = "tmp");
+ const Twine &Name = "tmp",
+ llvm::Value *ArraySize = nullptr,
+ bool CastToDefaultAddrSpace = true);
/// CreateDefaultAlignedTempAlloca - This creates an alloca with the
/// default ABI alignment of the given LLVM type.
@@ -1885,9 +1989,12 @@ public:
Address CreateIRTemp(QualType T, const Twine &Name = "tmp");
/// CreateMemTemp - Create a temporary memory object of the given type, with
- /// appropriate alignment.
- Address CreateMemTemp(QualType T, const Twine &Name = "tmp");
- Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp");
+ /// appropriate alignment. Cast it to the default address space if
+ /// \p CastToDefaultAddrSpace is true.
+ Address CreateMemTemp(QualType T, const Twine &Name = "tmp",
+ bool CastToDefaultAddrSpace = true);
+ Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp",
+ bool CastToDefaultAddrSpace = true);
/// CreateAggTemp - Create a temporary memory object for the given
/// aggregate type.
@@ -1928,7 +2035,7 @@ public:
/// pointer to a char.
Address EmitMSVAListRef(const Expr *E);
- /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will
+ /// EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will
/// always be accessible even if no aggregate location is provided.
RValue EmitAnyExprToTemp(const Expr *E);
@@ -2019,6 +2126,9 @@ public:
llvm::BlockAddress *GetAddrOfLabel(const LabelDecl *L);
llvm::BasicBlock *GetIndirectGotoBlock();
+ /// Check if \p E is a C++ "this" pointer wrapped in value-preserving casts.
+ static bool IsWrappedCXXThis(const Expr *E);
+
/// EmitNullInitialization - Generate code to set a value of the given type to
/// null, If the type contains data member pointers, they will be initialized
/// to -1 in accordance with the Itanium C++ ABI.
@@ -2230,7 +2340,9 @@ public:
TCK_Upcast,
/// Checking the operand of a cast to a virtual base object. Must be an
/// object within its lifetime.
- TCK_UpcastToVirtualBase
+ TCK_UpcastToVirtualBase,
+ /// Checking the value assigned to a _Nonnull pointer. Must not be null.
+ TCK_NonnullAssign
};
/// \brief Whether any type-checking sanitizers are enabled. If \c false,
@@ -2241,7 +2353,7 @@ public:
/// appropriate size and alignment for an object of type \p Type.
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V,
QualType Type, CharUnits Alignment = CharUnits::Zero(),
- bool SkipNullCheck = false);
+ SanitizerSet SkippedChecks = SanitizerSet());
/// \brief Emit a check that \p Base points into an array object, which
/// we can access at index \p Index. \p Accessed should be \c false if we
@@ -2401,6 +2513,12 @@ public:
PeepholeProtection protectFromPeepholes(RValue rvalue);
void unprotectFromPeepholes(PeepholeProtection protection);
+ void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment,
+ llvm::Value *OffsetValue = nullptr) {
+ Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment,
+ OffsetValue);
+ }
+
//===--------------------------------------------------------------------===//
// Statement Emission
//===--------------------------------------------------------------------===//
@@ -2463,6 +2581,15 @@ public:
void EmitObjCAutoreleasePoolStmt(const ObjCAutoreleasePoolStmt &S);
void EmitCoroutineBody(const CoroutineBodyStmt &S);
+ void EmitCoreturnStmt(const CoreturnStmt &S);
+ RValue EmitCoawaitExpr(const CoawaitExpr &E,
+ AggValueSlot aggSlot = AggValueSlot::ignored(),
+ bool ignoreResult = false);
+ LValue EmitCoawaitLValue(const CoawaitExpr *E);
+ RValue EmitCoyieldExpr(const CoyieldExpr &E,
+ AggValueSlot aggSlot = AggValueSlot::ignored(),
+ bool ignoreResult = false);
+ LValue EmitCoyieldLValue(const CoyieldExpr *E);
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID);
void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
@@ -2627,12 +2754,16 @@ public:
/// the end of the directive.
///
/// \param D Directive that has at least one 'reduction' directives.
- void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D);
+ /// \param ReductionKind The kind of reduction to perform.
+ void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D,
+ const OpenMPDirectiveKind ReductionKind);
/// \brief Emit initial code for linear variables. Creates private copies
/// and initializes them with the values according to OpenMP standard.
///
/// \param D Directive (possibly) with the 'linear' clause.
- void EmitOMPLinearClauseInit(const OMPLoopDirective &D);
+ /// \return true if at least one linear variable is found that should be
+ /// initialized with the value of the original variable, false otherwise.
+ bool EmitOMPLinearClauseInit(const OMPLoopDirective &D);
typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/,
llvm::Value * /*OutlinedFn*/,
@@ -2678,7 +2809,6 @@ public:
void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
- void EmitOMPDistributeLoop(const OMPDistributeDirective &S);
void EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S);
void EmitOMPDistributeParallelForSimdDirective(
@@ -2704,13 +2834,16 @@ public:
void EmitOMPTargetTeamsDistributeSimdDirective(
const OMPTargetTeamsDistributeSimdDirective &S);
- /// Emit outlined function for the target directive.
- static std::pair<llvm::Function * /*OutlinedFn*/,
- llvm::Constant * /*OutlinedFnID*/>
- EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM,
- const OMPTargetDirective &S,
- StringRef ParentName,
- bool IsOffloadEntry);
+ /// Emit device code for the target directive.
+ static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
+ StringRef ParentName,
+ const OMPTargetDirective &S);
+ static void
+ EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetParallelDirective &S);
+ static void
+ EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDirective &S);
/// \brief Emit inner loop of the worksharing/simd construct.
///
/// \param S Directive, for which the inner loop must be emitted.
@@ -2732,32 +2865,78 @@ public:
void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S,
OMPPrivateScope &LoopScope);
+ /// Helper for the OpenMP loop directives.
+ void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
+
+ /// \brief Emit code for the worksharing loop-based directive.
+ /// \return true, if this construct has any lastprivate clause, false -
+ /// otherwise.
+ bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB,
+ const CodeGenLoopBoundsTy &CodeGenLoopBounds,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds);
+
private:
/// Helpers for blocks
llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
/// Helpers for the OpenMP loop directives.
- void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
void EmitOMPSimdFinal(
const OMPLoopDirective &D,
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
- /// \brief Emit code for the worksharing loop-based directive.
- /// \return true, if this construct has any lastprivate clause, false -
- /// otherwise.
- bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
- void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered,
- const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
+
+ void EmitOMPDistributeLoop(const OMPLoopDirective &S,
+ const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr);
+
+ /// struct with the values to be passed to the OpenMP loop-related functions
+ struct OMPLoopArguments {
+ /// loop lower bound
+ Address LB = Address::invalid();
+ /// loop upper bound
+ Address UB = Address::invalid();
+ /// loop stride
+ Address ST = Address::invalid();
+ /// isLastIteration argument for runtime functions
+ Address IL = Address::invalid();
+ /// Chunk value generated by sema
+ llvm::Value *Chunk = nullptr;
+ /// EnsureUpperBound
+ Expr *EUB = nullptr;
+ /// IncrementExpression
+ Expr *IncExpr = nullptr;
+ /// Loop initialization
+ Expr *Init = nullptr;
+ /// Loop exit condition
+ Expr *Cond = nullptr;
+ /// Update of LB after a whole chunk has been executed
+ Expr *NextLB = nullptr;
+ /// Update of UB after a whole chunk has been executed
+ Expr *NextUB = nullptr;
+ OMPLoopArguments() = default;
+ OMPLoopArguments(Address LB, Address UB, Address ST, Address IL,
+ llvm::Value *Chunk = nullptr, Expr *EUB = nullptr,
+ Expr *IncExpr = nullptr, Expr *Init = nullptr,
+ Expr *Cond = nullptr, Expr *NextLB = nullptr,
+ Expr *NextUB = nullptr)
+ : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB),
+ IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB),
+ NextUB(NextUB) {}
+ };
+ void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
+ const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenLoopTy &CodeGenLoop,
+ const CodeGenOrderedTy &CodeGenOrdered);
void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind,
bool IsMonotonic, const OMPLoopDirective &S,
- OMPPrivateScope &LoopScope, bool Ordered, Address LB,
- Address UB, Address ST, Address IL,
- llvm::Value *Chunk);
- void EmitOMPDistributeOuterLoop(
- OpenMPDistScheduleClauseKind ScheduleKind,
- const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
+ OMPPrivateScope &LoopScope, bool Ordered,
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds);
+ void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,
+ const OMPLoopDirective &S,
+ OMPPrivateScope &LoopScope,
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenLoopTy &CodeGenLoopContent);
/// \brief Emit code for sections directive.
void EmitSections(const OMPExecutableDirective &S);
@@ -2843,13 +3022,20 @@ public:
/// representation to its value representation.
llvm::Value *EmitFromMemory(llvm::Value *Value, QualType Ty);
+ /// Check if the scalar \p Value is within the valid range for the given
+ /// type \p Ty.
+ ///
+ /// Returns true if a check is needed (even if the range is unknown).
+ bool EmitScalarRangeCheck(llvm::Value *Value, QualType Ty,
+ SourceLocation Loc);
+
/// EmitLoadOfScalar - Load a scalar value from an address, taking
/// care to appropriately convert from the memory representation to
/// the LLVM value representation.
llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty,
SourceLocation Loc,
- AlignmentSource AlignSource =
- AlignmentSource::Type,
+ LValueBaseInfo BaseInfo =
+ LValueBaseInfo(AlignmentSource::Type),
llvm::MDNode *TBAAInfo = nullptr,
QualType TBAABaseTy = QualType(),
uint64_t TBAAOffset = 0,
@@ -2866,7 +3052,8 @@ public:
/// the LLVM value representation.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr,
bool Volatile, QualType Ty,
- AlignmentSource AlignSource = AlignmentSource::Type,
+ LValueBaseInfo BaseInfo =
+ LValueBaseInfo(AlignmentSource::Type),
llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
QualType TBAABaseTy = QualType(),
uint64_t TBAAOffset = 0, bool isNontemporal = false);
@@ -2883,7 +3070,7 @@ public:
/// rvalue, returning the rvalue.
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc);
RValue EmitLoadOfExtVectorElementLValue(LValue V);
- RValue EmitLoadOfBitfieldLValue(LValue LV);
+ RValue EmitLoadOfBitfieldLValue(LValue LV, SourceLocation Loc);
RValue EmitLoadOfGlobalRegLValue(LValue LV);
/// EmitStoreThroughLValue - Store the specified rvalue into the specified
@@ -2939,7 +3126,7 @@ public:
RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc);
Address EmitArrayToPointerDecay(const Expr *Array,
- AlignmentSource *AlignSource = nullptr);
+ LValueBaseInfo *BaseInfo = nullptr);
class ConstantEmission {
llvm::PointerIntPair<llvm::Constant*, 1, bool> ValueAndIsReference;
@@ -3080,7 +3267,7 @@ public:
Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
llvm::Value *memberPtr,
const MemberPointerType *memberPtrType,
- AlignmentSource *AlignSource = nullptr);
+ LValueBaseInfo *BaseInfo = nullptr);
RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
ReturnValueSlot ReturnValue);
@@ -3092,8 +3279,8 @@ public:
RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue);
- RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue);
+ RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
+ ReturnValueSlot ReturnValue);
RValue EmitBuiltinExpr(const FunctionDecl *FD,
unsigned BuiltinID, const CallExpr *E,
@@ -3149,6 +3336,8 @@ private:
public:
llvm::Value *EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E);
+ llvm::Value *EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args);
+
llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E);
llvm::Value *EmitObjCBoxedExpr(const ObjCBoxedExpr *E);
@@ -3215,6 +3404,7 @@ public:
static Destroyer destroyARCStrongImprecise;
static Destroyer destroyARCStrongPrecise;
static Destroyer destroyARCWeak;
+ static Destroyer emitARCIntrinsicUse;
void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr);
llvm::Value *EmitObjCAutoreleasePoolPush();
@@ -3316,9 +3506,10 @@ public:
/// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
/// variables.
- void GenerateCXXGlobalDtorsFunc(llvm::Function *Fn,
- const std::vector<std::pair<llvm::WeakVH,
- llvm::Constant*> > &DtorsAndObjects);
+ void GenerateCXXGlobalDtorsFunc(
+ llvm::Function *Fn,
+ const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>>
+ &DtorsAndObjects);
void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
const VarDecl *D,
@@ -3396,6 +3587,26 @@ public:
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
+ /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is
+ /// nonnull, if \p LHS is marked _Nonnull.
+ void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc);
+
+ /// An enumeration which makes it easier to specify whether or not an
+ /// operation is a subtraction.
+ enum { NotSubtraction = false, IsSubtraction = true };
+
+ /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to
+ /// detect undefined behavior when the pointer overflow sanitizer is enabled.
+ /// \p SignedIndices indicates whether any of the GEP indices are signed.
+ /// \p IsSubtraction indicates whether the expression used to form the GEP
+ /// is a subtraction.
+ llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr,
+ ArrayRef<llvm::Value *> IdxList,
+ bool SignedIndices,
+ bool IsSubtraction,
+ SourceLocation Loc,
+ const Twine &Name = "");
+
/// \brief Emit a description of a type in a format suitable for passing to
/// a runtime sanitizer handler.
llvm::Constant *EmitCheckTypeDescriptor(QualType T);
@@ -3429,13 +3640,16 @@ public:
/// "trap-func-name" if specified.
llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID);
+ /// \brief Emit a stub for the cross-DSO CFI check function.
+ void EmitCfiCheckStub();
+
/// \brief Emit a cross-DSO CFI failure handling function.
void EmitCfiCheckFail();
/// \brief Create a check for a function parameter that may potentially be
/// declared as non-null.
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
- const FunctionDecl *FD, unsigned ParmNum);
+ AbstractCallee AC, unsigned ParmNum);
/// EmitCallArg - Emit a single call argument.
void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType);
@@ -3490,14 +3704,18 @@ private:
/// \brief Attempts to statically evaluate the object size of E. If that
/// fails, emits code to figure the size of E out for us. This is
/// pass_object_size aware.
+ ///
+ /// If EmittedExpr is non-null, this will use that instead of re-emitting E.
llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType);
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE);
/// \brief Emits the size of E, as required by __builtin_object_size. This
/// function is aware of pass_object_size parameters, and will act accordingly
/// if E is a parameter with the pass_object_size attribute.
llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType);
+ llvm::IntegerType *ResType,
+ llvm::Value *EmittedE);
public:
#ifndef NDEBUG
@@ -3533,7 +3751,7 @@ public:
template <typename T>
void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo,
llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
- const FunctionDecl *CalleeDecl = nullptr,
+ AbstractCallee AC = AbstractCallee(),
unsigned ParamsToSkip = 0,
EvaluationOrder Order = EvaluationOrder::Default) {
SmallVector<QualType, 16> ArgTypes;
@@ -3575,48 +3793,40 @@ public:
for (auto *A : llvm::make_range(Arg, ArgRange.end()))
ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType());
- EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order);
+ EmitCallArgs(Args, ArgTypes, ArgRange, AC, ParamsToSkip, Order);
}
void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes,
llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
- const FunctionDecl *CalleeDecl = nullptr,
+ AbstractCallee AC = AbstractCallee(),
unsigned ParamsToSkip = 0,
EvaluationOrder Order = EvaluationOrder::Default);
- /// EmitPointerWithAlignment - Given an expression with a pointer
- /// type, emit the value and compute our best estimate of the
- /// alignment of the pointee.
+ /// EmitPointerWithAlignment - Given an expression with a pointer type,
+ /// emit the value and compute our best estimate of the alignment of the
+ /// pointee.
///
- /// Note that this function will conservatively fall back on the type
- /// when it doesn't
+ /// \param BaseInfo - If non-null, this will be initialized with
+ /// information about the source of the alignment and the may-alias
+ /// attribute. Note that this function will conservatively fall back on
+ /// the type when it doesn't recognize the expression and may-alias will
+ /// be set to false.
///
- /// \param Source - If non-null, this will be initialized with
- /// information about the source of the alignment. Note that this
- /// function will conservatively fall back on the type when it
- /// doesn't recognize the expression, which means that sometimes
- ///
- /// a worst-case One
- /// reasonable way to use this information is when there's a
- /// language guarantee that the pointer must be aligned to some
- /// stricter value, and we're simply trying to ensure that
- /// sufficiently obvious uses of under-aligned objects don't get
- /// miscompiled; for example, a placement new into the address of
- /// a local variable. In such a case, it's quite reasonable to
- /// just ignore the returned alignment when it isn't from an
- /// explicit source.
+ /// One reasonable way to use this information is when there's a language
+ /// guarantee that the pointer must be aligned to some stricter value, and
+ /// we're simply trying to ensure that sufficiently obvious uses of under-
+ /// aligned objects don't get miscompiled; for example, a placement new
+ /// into the address of a local variable. In such a case, it's quite
+ /// reasonable to just ignore the returned alignment when it isn't from an
+ /// explicit source.
Address EmitPointerWithAlignment(const Expr *Addr,
- AlignmentSource *Source = nullptr);
+ LValueBaseInfo *BaseInfo = nullptr);
void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
private:
QualType getVarArgType(const Expr *Arg);
- const TargetCodeGenInfo &getTargetHooks() const {
- return CGM.getTargetCodeGenInfo();
- }
-
void EmitDeclMetadata();
BlockByrefHelpers *buildByrefHelpers(llvm::StructType &byrefType,
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
index 3600543..5561d45 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -24,7 +24,6 @@
#include "CodeGenFunction.h"
#include "CodeGenPGO.h"
#include "CodeGenTBAA.h"
-#include "ConstantBuilder.h"
#include "CoverageMappingGen.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
@@ -42,9 +41,11 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -111,6 +112,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
C.getTargetInfo().getMaxPointerWidth());
Int8PtrTy = Int8Ty->getPointerTo(0);
Int8PtrPtrTy = Int8PtrTy->getPointerTo(0);
+ AllocaInt8PtrTy = Int8Ty->getPointerTo(
+ M.getDataLayout().getAllocaAddrSpace());
+ ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace();
RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC();
@@ -367,13 +371,18 @@ void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags,
if (MainFile.empty())
MainFile = "<stdin>";
Diags.Report(diag::warn_profile_data_unprofiled) << MainFile;
- } else
- Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Missing
- << Mismatched;
+ } else {
+ if (Mismatched > 0)
+ Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Mismatched;
+
+ if (Missing > 0)
+ Diags.Report(diag::warn_profile_data_missing) << Visited << Missing;
+ }
}
void CodeGenModule::Release() {
EmitDeferred();
+ EmitVTablesOpportunistically();
applyGlobalValReplacements();
applyReplacements();
checkAliases();
@@ -392,8 +401,11 @@ void CodeGenModule::Release() {
}
if (OpenMPRuntime)
if (llvm::Function *OpenMPRegistrationFunction =
- OpenMPRuntime->emitRegistrationFunction())
- AddGlobalCtor(OpenMPRegistrationFunction, 0);
+ OpenMPRuntime->emitRegistrationFunction()) {
+ auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ?
+ OpenMPRegistrationFunction : nullptr;
+ AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey);
+ }
if (PGOReader) {
getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext));
if (PGOStats.hasDiagnostics())
@@ -406,8 +418,11 @@ void CodeGenModule::Release() {
EmitDeferredUnusedCoverageMappings();
if (CoverageMapping)
CoverageMapping->emit();
- if (CodeGenOpts.SanitizeCfiCrossDso)
+ if (CodeGenOpts.SanitizeCfiCrossDso) {
CodeGenFunction(*this).EmitCfiCheckFail();
+ CodeGenFunction(*this).EmitCfiCheckStub();
+ }
+ emitAtAvailableLinkGuard();
emitLLVMUsed();
if (SanStats)
SanStats->finish();
@@ -416,6 +431,12 @@ void CodeGenModule::Release() {
(Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) {
EmitModuleLinkOptions();
}
+
+ // Record mregparm value now so it is visible through rest of codegen.
+ if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
+ getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
+ CodeGenOpts.NumRegisterParameters);
+
if (CodeGenOpts.DwarfVersion) {
// We actually want the latest version when there are conflicts.
// We can change from Warning to Latest if such mode is supported.
@@ -449,18 +470,24 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version",
llvm::DEBUG_METADATA_VERSION);
+ // Width of wchar_t in bytes
+ uint64_t WCharWidth =
+ Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity();
+ assert((LangOpts.ShortWChar ||
+ llvm::TargetLibraryInfoImpl::getTargetWCharSize(Target.getTriple()) ==
+ Target.getWCharWidth() / 8) &&
+ "LLVM wchar_t size out of sync");
+
// We need to record the widths of enums and wchar_t, so that we can generate
- // the correct build attributes in the ARM backend.
+ // the correct build attributes in the ARM backend. wchar_size is also used by
+ // TargetLibraryInfo.
+ getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
+
llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch();
if ( Arch == llvm::Triple::arm
|| Arch == llvm::Triple::armeb
|| Arch == llvm::Triple::thumb
|| Arch == llvm::Triple::thumbeb) {
- // Width of wchar_t in bytes
- uint64_t WCharWidth =
- Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity();
- getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
-
// The minimum width of an enum in bytes
uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4;
getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth);
@@ -479,6 +506,26 @@ void CodeGenModule::Release() {
LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
}
+ // Emit OpenCL specific module metadata: OpenCL/SPIR version.
+ if (LangOpts.OpenCL) {
+ EmitOpenCLMetadata();
+ // Emit SPIR version.
+ if (getTriple().getArch() == llvm::Triple::spir ||
+ getTriple().getArch() == llvm::Triple::spir64) {
+ // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
+ // opencl.spir.version named metadata.
+ llvm::Metadata *SPIRVerElts[] = {
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, LangOpts.OpenCLVersion / 100)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, (LangOpts.OpenCLVersion / 100 > 1) ? 0 : 2))};
+ llvm::NamedMDNode *SPIRVerMD =
+ TheModule.getOrInsertNamedMetadata("opencl.spir.version");
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+ SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
+ }
+ }
+
if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
assert(PLevel < 3 && "Invalid PIC Level");
getModule().setPICLevel(static_cast<llvm::PICLevel::Level>(PLevel));
@@ -502,6 +549,20 @@ void CodeGenModule::Release() {
EmitTargetMetadata();
}
+void CodeGenModule::EmitOpenCLMetadata() {
+ // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
+ // opencl.ocl.version named metadata node.
+ llvm::Metadata *OCLVerElts[] = {
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, LangOpts.OpenCLVersion / 100)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, (LangOpts.OpenCLVersion % 100) / 10))};
+ llvm::NamedMDNode *OCLVerMD =
+ TheModule.getOrInsertNamedMetadata("opencl.ocl.version");
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+ OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+}
+
void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
// Make sure that this type is translated.
Types.UpdateCompletedType(TD);
@@ -554,12 +615,8 @@ void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst,
void CodeGenModule::DecorateInstructionWithInvariantGroup(
llvm::Instruction *I, const CXXRecordDecl *RD) {
- llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
- auto *MetaDataNode = dyn_cast<llvm::MDNode>(MD);
- // Check if we have to wrap MDString in MDNode.
- if (!MetaDataNode)
- MetaDataNode = llvm::MDNode::get(getLLVMContext(), MD);
- I->setMetadata(llvm::LLVMContext::MD_invariant_group, MetaDataNode);
+ I->setMetadata(llvm::LLVMContext::MD_invariant_group,
+ llvm::MDNode::get(getLLVMContext(), {}));
}
void CodeGenModule::Error(SourceLocation loc, StringRef message) {
@@ -740,7 +797,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
// Get the type of a ctor entry, { i32, void ()*, i8* }.
llvm::StructType *CtorStructTy = llvm::StructType::get(
- Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, nullptr);
+ Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy);
// Construct the constructor and destructor arrays.
ConstantInitBuilder builder(*this);
@@ -830,10 +887,9 @@ void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
const CGFunctionInfo &Info,
llvm::Function *F) {
unsigned CallingConv;
- AttributeListType AttributeList;
- ConstructAttributeList(F->getName(), Info, D, AttributeList, CallingConv,
- false);
- F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList));
+ llvm::AttributeList PAL;
+ ConstructAttributeList(F->getName(), Info, D, PAL, CallingConv, false);
+ F->setAttributes(PAL);
F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
}
@@ -882,14 +938,20 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
B.addAttribute(llvm::Attribute::NoInline);
- F->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(
- F->getContext(),
- llvm::AttributeSet::FunctionIndex, B));
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
return;
}
- if (D->hasAttr<OptimizeNoneAttr>()) {
+ // Track whether we need to add the optnone LLVM attribute,
+ // starting with the default for this optimization level.
+ bool ShouldAddOptNone =
+ !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0;
+ // We can't add optnone in the following cases, it won't pass the verifier.
+ ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
+ ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline);
+ ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
+
+ if (ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) {
B.addAttribute(llvm::Attribute::OptimizeNone);
// OptimizeNone implies noinline; we should not be inlining such functions.
@@ -943,7 +1005,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// function.
if (!D->hasAttr<OptimizeNoneAttr>()) {
if (D->hasAttr<ColdAttr>()) {
- B.addAttribute(llvm::Attribute::OptimizeForSize);
+ if (!ShouldAddOptNone)
+ B.addAttribute(llvm::Attribute::OptimizeForSize);
B.addAttribute(llvm::Attribute::Cold);
}
@@ -951,9 +1014,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::MinSize);
}
- F->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(
- F->getContext(), llvm::AttributeSet::FunctionIndex, B));
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
if (alignment)
@@ -999,9 +1060,25 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D,
llvm::GlobalObject *GO) {
SetCommonAttributes(D, GO);
- if (D)
+ if (D) {
+ if (auto *GV = dyn_cast<llvm::GlobalVariable>(GO)) {
+ if (auto *SA = D->getAttr<PragmaClangBSSSectionAttr>())
+ GV->addAttribute("bss-section", SA->getName());
+ if (auto *SA = D->getAttr<PragmaClangDataSectionAttr>())
+ GV->addAttribute("data-section", SA->getName());
+ if (auto *SA = D->getAttr<PragmaClangRodataSectionAttr>())
+ GV->addAttribute("rodata-section", SA->getName());
+ }
+
+ if (auto *F = dyn_cast<llvm::Function>(GO)) {
+ if (auto *SA = D->getAttr<PragmaClangTextSectionAttr>())
+ if (!D->getAttr<SectionAttr>())
+ F->addFnAttr("implicit-section-name", SA->getName());
+ }
+
if (const SectionAttr *SA = D->getAttr<SectionAttr>())
GO->setSection(SA->getName());
+ }
getTargetCodeGenInfo().setTargetAttributes(D, GO, *this);
}
@@ -1021,7 +1098,7 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
const NamedDecl *ND) {
// Set linkage and visibility in case we never see a definition.
LinkageInfo LV = ND->getLinkageAndVisibility();
- if (LV.getLinkage() != ExternalLinkage) {
+ if (!isExternallyVisible(LV.getLinkage())) {
// Don't set internal linkage on declarations.
} else {
if (ND->hasAttr<DLLImportAttr>()) {
@@ -1029,7 +1106,6 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
} else if (ND->hasAttr<DLLExportAttr>()) {
GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
} else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) {
// "extern_weak" is overloaded in LLVM; we probably should have
// separate linkage types for this.
@@ -1101,13 +1177,17 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
setLinkageAndVisibilityForGV(F, FD);
+ if (FD->getAttr<PragmaClangTextSectionAttr>()) {
+ F->addFnAttr("implicit-section-name");
+ }
+
if (const SectionAttr *SA = FD->getAttr<SectionAttr>())
F->setSection(SA->getName());
if (FD->isReplaceableGlobalAllocationFunction()) {
// A replaceable global allocation function does not act like a builtin by
// default, only if it is invoked by a new-expression or delete-expression.
- F->addAttribute(llvm::AttributeSet::FunctionIndex,
+ F->addAttribute(llvm::AttributeList::FunctionIndex,
llvm::Attribute::NoBuiltin);
// A sane operator new returns a non-aliasing pointer.
@@ -1116,7 +1196,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
auto Kind = FD->getDeclName().getCXXOverloadedOperator();
if (getCodeGenOpts().AssumeSaneOperatorNew &&
(Kind == OO_New || Kind == OO_Array_New))
- F->addAttribute(llvm::AttributeSet::ReturnIndex,
+ F->addAttribute(llvm::AttributeList::ReturnIndex,
llvm::Attribute::NoAlias);
}
@@ -1145,7 +1225,7 @@ void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) {
}
static void emitUsed(CodeGenModule &CGM, StringRef Name,
- std::vector<llvm::WeakVH> &List) {
+ std::vector<llvm::WeakTrackingVH> &List) {
// Don't create llvm.used if there is no need.
if (List.empty())
return;
@@ -1197,7 +1277,7 @@ void CodeGenModule::AddDependentLib(StringRef Lib) {
/// \brief Add link options implied by the given module, including modules
/// it depends on, using a postorder walk.
static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
- SmallVectorImpl<llvm::Metadata *> &Metadata,
+ SmallVectorImpl<llvm::MDNode *> &Metadata,
llvm::SmallPtrSet<Module *, 16> &Visited) {
// Import this module's parent.
if (Mod->Parent && Visited.insert(Mod->Parent).second) {
@@ -1285,7 +1365,7 @@ void CodeGenModule::EmitModuleLinkOptions() {
// Add link options for all of the imported modules in reverse topological
// order. We don't do anything to try to order import link flags with respect
// to linker options inserted by things like #pragma comment().
- SmallVector<llvm::Metadata *, 16> MetadataArgs;
+ SmallVector<llvm::MDNode *, 16> MetadataArgs;
Visited.clear();
for (Module *M : LinkModules)
if (Visited.insert(M).second)
@@ -1294,9 +1374,9 @@ void CodeGenModule::EmitModuleLinkOptions() {
LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end());
// Add the linker options metadata flag.
- getModule().addModuleFlag(llvm::Module::AppendUnique, "Linker Options",
- llvm::MDNode::get(getLLVMContext(),
- LinkerOptionsMetadata));
+ auto *NMD = getModule().getOrInsertNamedMetadata("llvm.linker.options");
+ for (auto *MD : LinkerOptionsMetadata)
+ NMD->addOperand(MD);
}
void CodeGenModule::EmitDeferred() {
@@ -1319,13 +1399,10 @@ void CodeGenModule::EmitDeferred() {
// Grab the list of decls to emit. If EmitGlobalDefinition schedules more
// work, it will not interfere with this.
- std::vector<DeferredGlobal> CurDeclsToEmit;
+ std::vector<GlobalDecl> CurDeclsToEmit;
CurDeclsToEmit.swap(DeferredDeclsToEmit);
- for (DeferredGlobal &G : CurDeclsToEmit) {
- GlobalDecl D = G.GD;
- G.GV = nullptr;
-
+ for (GlobalDecl &D : CurDeclsToEmit) {
// We should call GetAddrOfGlobal with IsForDefinition set to true in order
// to get GlobalValue with exactly the type we need, not something that
// might had been created for another decl with the same mangled name but
@@ -1364,6 +1441,24 @@ void CodeGenModule::EmitDeferred() {
}
}
+void CodeGenModule::EmitVTablesOpportunistically() {
+ // Try to emit external vtables as available_externally if they have emitted
+ // all inlined virtual functions. It runs after EmitDeferred() and therefore
+ // is not allowed to create new references to things that need to be emitted
+ // lazily. Note that it also uses fact that we eagerly emitting RTTI.
+
+ assert((OpportunisticVTables.empty() || shouldOpportunisticallyEmitVTables())
+ && "Only emit opportunistic vtables with optimizations");
+
+ for (const CXXRecordDecl *RD : OpportunisticVTables) {
+ assert(getVTables().isVTableExternal(RD) &&
+ "This queue should only contain external vtables");
+ if (getCXXABI().canSpeculativelyEmitVTable(RD))
+ VTables.GenerateClassData(RD);
+ }
+ OpportunisticVTables.clear();
+}
+
void CodeGenModule::EmitGlobalAnnotations() {
if (Annotations.empty())
return;
@@ -1482,6 +1577,34 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV,
return false;
}
+bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
+ StringRef Category) const {
+ if (!LangOpts.XRayInstrument)
+ return false;
+ const auto &XRayFilter = getContext().getXRayFilter();
+ using ImbueAttr = XRayFunctionFilter::ImbueAttribute;
+ auto Attr = XRayFunctionFilter::ImbueAttribute::NONE;
+ if (Loc.isValid())
+ Attr = XRayFilter.shouldImbueLocation(Loc, Category);
+ if (Attr == ImbueAttr::NONE)
+ Attr = XRayFilter.shouldImbueFunction(Fn->getName());
+ switch (Attr) {
+ case ImbueAttr::NONE:
+ return false;
+ case ImbueAttr::ALWAYS:
+ Fn->addFnAttr("function-instrument", "xray-always");
+ break;
+ case ImbueAttr::ALWAYS_ARG1:
+ Fn->addFnAttr("function-instrument", "xray-always");
+ Fn->addFnAttr("xray-log-args", "1");
+ break;
+ case ImbueAttr::NEVER:
+ Fn->addFnAttr("function-instrument", "xray-never");
+ break;
+ }
+ return true;
+}
+
bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
// Never defer when EmitAllDecls is specified.
if (LangOpts.EmitAllDecls)
@@ -1678,13 +1801,13 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
}
StringRef MangledName = getMangledName(GD);
- if (llvm::GlobalValue *GV = GetGlobalValue(MangledName)) {
+ if (GetGlobalValue(MangledName) != nullptr) {
// The value has already been used and should therefore be emitted.
- addDeferredDeclToEmit(GV, GD);
+ addDeferredDeclToEmit(GD);
} else if (MustBeEmitted(Global)) {
// The value must be emitted, but cannot be emitted eagerly.
assert(!MayBeEmittedEagerly(Global));
- addDeferredDeclToEmit(/*GV=*/nullptr, GD);
+ addDeferredDeclToEmit(GD);
} else {
// Otherwise, remember that we saw a deferred decl with this name. The
// first use of the mangled name will cause it to move into
@@ -1693,6 +1816,16 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
}
}
+// Check if T is a class type with a destructor that's not dllimport.
+static bool HasNonDllImportDtor(QualType T) {
+ if (const auto *RT = T->getBaseElementTypeUnsafe()->getAs<RecordType>())
+ if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+ if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>())
+ return true;
+
+ return false;
+}
+
namespace {
struct FunctionIsDirectlyRecursive :
public RecursiveASTVisitor<FunctionIsDirectlyRecursive> {
@@ -1726,6 +1859,7 @@ namespace {
}
};
+ // Make sure we're not referencing non-imported vars or functions.
struct DLLImportFunctionVisitor
: public RecursiveASTVisitor<DLLImportFunctionVisitor> {
bool SafeToInline = true;
@@ -1733,12 +1867,25 @@ namespace {
bool shouldVisitImplicitCode() const { return true; }
bool VisitVarDecl(VarDecl *VD) {
- // A thread-local variable cannot be imported.
- SafeToInline = !VD->getTLSKind();
+ if (VD->getTLSKind()) {
+ // A thread-local variable cannot be imported.
+ SafeToInline = false;
+ return SafeToInline;
+ }
+
+ // A variable definition might imply a destructor call.
+ if (VD->isThisDeclarationADefinition())
+ SafeToInline = !HasNonDllImportDtor(VD->getType());
+
+ return SafeToInline;
+ }
+
+ bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
+ if (const auto *D = E->getTemporary()->getDestructor())
+ SafeToInline = D->hasAttr<DLLImportAttr>();
return SafeToInline;
}
- // Make sure we're not referencing non-imported vars or functions.
bool VisitDeclRefExpr(DeclRefExpr *E) {
ValueDecl *VD = E->getDecl();
if (isa<FunctionDecl>(VD))
@@ -1747,14 +1894,28 @@ namespace {
SafeToInline = !V->hasGlobalStorage() || V->hasAttr<DLLImportAttr>();
return SafeToInline;
}
+
bool VisitCXXConstructExpr(CXXConstructExpr *E) {
SafeToInline = E->getConstructor()->hasAttr<DLLImportAttr>();
return SafeToInline;
}
+
+ bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
+ CXXMethodDecl *M = E->getMethodDecl();
+ if (!M) {
+ // Call through a pointer to member function. This is safe to inline.
+ SafeToInline = true;
+ } else {
+ SafeToInline = M->hasAttr<DLLImportAttr>();
+ }
+ return SafeToInline;
+ }
+
bool VisitCXXDeleteExpr(CXXDeleteExpr *E) {
SafeToInline = E->getOperatorDelete()->hasAttr<DLLImportAttr>();
return SafeToInline;
}
+
bool VisitCXXNewExpr(CXXNewExpr *E) {
SafeToInline = E->getOperatorNew()->hasAttr<DLLImportAttr>();
return SafeToInline;
@@ -1783,16 +1944,6 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) {
return Walker.Result;
}
-// Check if T is a class type with a destructor that's not dllimport.
-static bool HasNonDllImportDtor(QualType T) {
- if (const RecordType *RT = dyn_cast<RecordType>(T))
- if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
- if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>())
- return true;
-
- return false;
-}
-
bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage)
return true;
@@ -1828,20 +1979,8 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
return !isTriviallyRecursive(F);
}
-/// If the type for the method's class was generated by
-/// CGDebugInfo::createContextChain(), the cache contains only a
-/// limited DIType without any declarations. Since EmitFunctionStart()
-/// needs to find the canonical declaration for each method, we need
-/// to construct the complete type prior to emitting the method.
-void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) {
- if (!D->isInstance())
- return;
-
- if (CGDebugInfo *DI = getModuleDebugInfo())
- if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) {
- const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext()));
- DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation());
- }
+bool CodeGenModule::shouldOpportunisticallyEmitVTables() {
+ return CodeGenOpts.OptimizationLevel > 0;
}
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
@@ -1858,7 +1997,6 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
return;
if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
- CompleteDIClassType(Method);
// Make sure to emit the definition(s) before we emit the thunks.
// This is necessary for the generation of certain thunks.
if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method))
@@ -1893,13 +2031,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
///
/// If D is non-null, it specifies a decl that correspond to this. This is used
/// to set the attributes on the function when it is first created.
-llvm::Constant *
-CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
- llvm::Type *Ty,
- GlobalDecl GD, bool ForVTable,
- bool DontDefer, bool IsThunk,
- llvm::AttributeSet ExtraAttrs,
- ForDefinition_t IsForDefinition) {
+llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
+ StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable,
+ bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs,
+ ForDefinition_t IsForDefinition) {
const Decl *D = GD.getDecl();
// Lookup the entry, lazily creating it if necessary.
@@ -1989,12 +2124,9 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
assert(F->getName() == MangledName && "name was uniqued!");
if (D)
SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
- if (ExtraAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) {
- llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeSet::FunctionIndex);
- F->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(VMContext,
- llvm::AttributeSet::FunctionIndex,
- B));
+ if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) {
+ llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex);
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
}
if (!DontDefer) {
@@ -2004,7 +2136,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
if (D && isa<CXXDestructorDecl>(D) &&
getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
GD.getDtorType()))
- addDeferredDeclToEmit(F, GD);
+ addDeferredDeclToEmit(GD);
// This is the first use or definition of a mangled name. If there is a
// deferred decl with this name, remember that we need to emit it at the end
@@ -2014,7 +2146,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
// Move the potentially referenced deferred decl to the
// DeferredDeclsToEmit list, and remove it from DeferredDecls (since we
// don't need it anymore).
- addDeferredDeclToEmit(F, DDI->second);
+ addDeferredDeclToEmit(DDI->second);
DeferredDecls.erase(DDI);
// Otherwise, there are cases we have to worry about where we're
@@ -2034,7 +2166,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
FD = FD->getPreviousDecl()) {
if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) {
if (FD->doesThisDeclarationHaveABody()) {
- addDeferredDeclToEmit(F, GD.getWithDecl(FD));
+ addDeferredDeclToEmit(GD.getWithDecl(FD));
break;
}
}
@@ -2069,7 +2201,7 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
StringRef MangledName = getMangledName(GD);
return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
- /*IsThunk=*/false, llvm::AttributeSet(),
+ /*IsThunk=*/false, llvm::AttributeList(),
IsForDefinition);
}
@@ -2115,7 +2247,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) {
/// type and name.
llvm::Constant *
CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
- llvm::AttributeSet ExtraAttrs,
+ llvm::AttributeList ExtraAttrs,
bool Local) {
llvm::Constant *C =
GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
@@ -2143,9 +2275,8 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
/// CreateBuiltinFunction - Create a new builtin function with the specified
/// type and name.
llvm::Constant *
-CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy,
- StringRef Name,
- llvm::AttributeSet ExtraAttrs) {
+CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name,
+ llvm::AttributeList ExtraAttrs) {
llvm::Constant *C =
GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
/*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs);
@@ -2236,11 +2367,13 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
return llvm::ConstantExpr::getBitCast(Entry, Ty);
}
- unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
+ auto AddrSpace = GetGlobalVarAddressSpace(D);
+ auto TargetAddrSpace = getContext().getTargetAddressSpace(AddrSpace);
+
auto *GV = new llvm::GlobalVariable(
getModule(), Ty->getElementType(), false,
llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr,
- llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+ llvm::GlobalVariable::NotThreadLocal, TargetAddrSpace);
// If we already created a global with the same mangled name (but different
// type) before, take its name and remove it from its parent.
@@ -2263,7 +2396,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
if (DDI != DeferredDecls.end()) {
// Move the potentially referenced deferred decl to the DeferredDeclsToEmit
// list, and remove it from DeferredDecls (since we don't need it anymore).
- addDeferredDeclToEmit(GV, DDI->second);
+ addDeferredDeclToEmit(DDI->second);
DeferredDecls.erase(DDI);
}
@@ -2297,8 +2430,15 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
GV->setSection(".cp.rodata");
}
- if (AddrSpace != Ty->getAddressSpace())
- return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty);
+ auto ExpectedAS =
+ D ? D->getType().getAddressSpace()
+ : static_cast<unsigned>(LangOpts.OpenCL ? LangAS::opencl_global
+ : LangAS::Default);
+ assert(getContext().getTargetAddressSpace(ExpectedAS) ==
+ Ty->getPointerAddressSpace());
+ if (AddrSpace != ExpectedAS)
+ return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace,
+ ExpectedAS, Ty);
return GV;
}
@@ -2432,18 +2572,28 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const {
getDataLayout().getTypeStoreSizeInBits(Ty));
}
-unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
- unsigned AddrSpace) {
- if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) {
- if (D->hasAttr<CUDAConstantAttr>())
- AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
- else if (D->hasAttr<CUDASharedAttr>())
- AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared);
+unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
+ unsigned AddrSpace;
+ if (LangOpts.OpenCL) {
+ AddrSpace = D ? D->getType().getAddressSpace()
+ : static_cast<unsigned>(LangAS::opencl_global);
+ assert(AddrSpace == LangAS::opencl_global ||
+ AddrSpace == LangAS::opencl_constant ||
+ AddrSpace == LangAS::opencl_local ||
+ AddrSpace >= LangAS::FirstTargetAddressSpace);
+ return AddrSpace;
+ }
+
+ if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
+ if (D && D->hasAttr<CUDAConstantAttr>())
+ return LangAS::cuda_constant;
+ else if (D && D->hasAttr<CUDASharedAttr>())
+ return LangAS::cuda_shared;
else
- AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device);
+ return LangAS::cuda_device;
}
- return AddrSpace;
+ return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D);
}
template<typename SomeDecl>
@@ -2596,10 +2746,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// "extern int x[];") and then a definition of a different type (e.g.
// "int x[10];"). This also happens when an initializer has a different type
// from the type of the global (this happens with unions).
- if (!GV ||
- GV->getType()->getElementType() != InitType ||
+ if (!GV || GV->getType()->getElementType() != InitType ||
GV->getType()->getAddressSpace() !=
- GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) {
+ getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) {
// Move the old entry aside so that we'll create a new one.
Entry->setName(StringRef());
@@ -2751,6 +2900,14 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
if (D->hasAttr<SectionAttr>())
return true;
+ // A variable cannot be both common and exist in a section.
+ // We dont try to determine which is the right section in the front-end.
+ // If no specialized section name is applicable, it will resort to default.
+ if (D->hasAttr<PragmaClangBSSSectionAttr>() ||
+ D->hasAttr<PragmaClangDataSectionAttr>() ||
+ D->hasAttr<PragmaClangRodataSectionAttr>())
+ return true;
+
// Thread local vars aren't considered common linkage.
if (D->getTLSKind())
return true;
@@ -2803,7 +2960,7 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
// We are guaranteed to have a strong definition somewhere else,
// so we can use available_externally linkage.
if (Linkage == GVA_AvailableExternally)
- return llvm::Function::AvailableExternallyLinkage;
+ return llvm::GlobalValue::AvailableExternallyLinkage;
// Note that Apple's kernel linker doesn't support symbol
// coalescing, so we need to avoid linkonce and weak linkages there.
@@ -2897,14 +3054,8 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
continue;
// Get the call site's attribute list.
- SmallVector<llvm::AttributeSet, 8> newAttrs;
- llvm::AttributeSet oldAttrs = callSite.getAttributes();
-
- // Collect any return attributes from the call.
- if (oldAttrs.hasAttributes(llvm::AttributeSet::ReturnIndex))
- newAttrs.push_back(
- llvm::AttributeSet::get(newFn->getContext(),
- oldAttrs.getRetAttributes()));
+ SmallVector<llvm::AttributeSet, 8> newArgAttrs;
+ llvm::AttributeList oldAttrs = callSite.getAttributes();
// If the function was passed too few arguments, don't transform.
unsigned newNumArgs = newFn->arg_size();
@@ -2914,27 +3065,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
// If any of the types mismatch, we don't transform.
unsigned argNo = 0;
bool dontTransform = false;
- for (llvm::Function::arg_iterator ai = newFn->arg_begin(),
- ae = newFn->arg_end(); ai != ae; ++ai, ++argNo) {
- if (callSite.getArgument(argNo)->getType() != ai->getType()) {
+ for (llvm::Argument &A : newFn->args()) {
+ if (callSite.getArgument(argNo)->getType() != A.getType()) {
dontTransform = true;
break;
}
// Add any parameter attributes.
- if (oldAttrs.hasAttributes(argNo + 1))
- newAttrs.
- push_back(llvm::
- AttributeSet::get(newFn->getContext(),
- oldAttrs.getParamAttributes(argNo + 1)));
+ newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo));
+ argNo++;
}
if (dontTransform)
continue;
- if (oldAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex))
- newAttrs.push_back(llvm::AttributeSet::get(newFn->getContext(),
- oldAttrs.getFnAttributes()));
-
// Okay, we can transform this. Create the new call instruction and copy
// over the required information.
newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo);
@@ -2958,8 +3101,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
if (!newCall->getType()->isVoidTy())
newCall->takeName(callSite.getInstruction());
- newCall.setAttributes(
- llvm::AttributeSet::get(newFn->getContext(), newAttrs));
+ newCall.setAttributes(llvm::AttributeList::get(
+ newFn->getContext(), oldAttrs.getFnAttributes(),
+ oldAttrs.getRetAttributes(), newArgAttrs));
newCall.setCallingConv(callSite.getCallingConv());
// Finally, remove the old call, replacing any uses with the new one.
@@ -3341,6 +3485,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
llvm_unreachable("unknown file format");
case llvm::Triple::COFF:
case llvm::Triple::ELF:
+ case llvm::Triple::Wasm:
GV->setSection("cfstring");
break;
case llvm::Triple::MachO:
@@ -3612,20 +3757,26 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
Linkage = llvm::GlobalVariable::InternalLinkage;
}
}
- unsigned AddrSpace = GetGlobalVarAddressSpace(
- VD, getContext().getTargetAddressSpace(MaterializedType));
+ unsigned AddrSpace =
+ VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace();
+ auto TargetAS = getContext().getTargetAddressSpace(AddrSpace);
auto *GV = new llvm::GlobalVariable(
getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(),
- /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
- AddrSpace);
+ /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS);
setGlobalVisibility(GV, VD);
GV->setAlignment(Align.getQuantity());
if (supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
if (VD->getTLSKind())
setTLSMode(GV, *VD);
- MaterializedGlobalTemporaryMap[E] = GV;
- return ConstantAddress(GV, Align);
+ llvm::Constant *CV = GV;
+ if (AddrSpace != LangAS::Default)
+ CV = getTargetCodeGenInfo().performAddrSpaceCast(
+ *this, GV, AddrSpace, LangAS::Default,
+ Type->getPointerTo(
+ getContext().getTargetAddressSpace(LangAS::Default)));
+ MaterializedGlobalTemporaryMap[E] = CV;
+ return ConstantAddress(CV, Align);
}
/// EmitObjCPropertyImplementations - Emit information for synthesized
@@ -3767,11 +3918,16 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
AddDeferredUnusedCoverageMapping(D);
break;
+ case Decl::CXXDeductionGuide:
+ // Function-like, but does not result in code emission.
+ break;
+
case Decl::Var:
case Decl::Decomposition:
// Skip variable templates
if (cast<VarDecl>(D)->getDescribedVarTemplate())
return;
+ LLVM_FALLTHROUGH;
case Decl::VarTemplateSpecialization:
EmitGlobal(cast<VarDecl>(D));
if (auto *DD = dyn_cast<DecompositionDecl>(D))
@@ -3790,6 +3946,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
EmitDeclContext(cast<NamespaceDecl>(D));
break;
case Decl::CXXRecord:
+ if (DebugInfo) {
+ if (auto *ES = D->getASTContext().getExternalSource())
+ if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never)
+ DebugInfo->completeUnusedClass(cast<CXXRecordDecl>(*D));
+ }
// Emit any static data members, they may be definitions.
for (auto *I : cast<CXXRecordDecl>(D)->decls())
if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I))
@@ -4338,18 +4499,19 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
// Make a copy of the features as passed on the command line into the
// beginning of the additional features from the function to override.
- ParsedAttr.first.insert(ParsedAttr.first.begin(),
+ ParsedAttr.Features.insert(ParsedAttr.Features.begin(),
Target.getTargetOpts().FeaturesAsWritten.begin(),
Target.getTargetOpts().FeaturesAsWritten.end());
- if (ParsedAttr.second != "")
- TargetCPU = ParsedAttr.second;
+ if (ParsedAttr.Architecture != "")
+ TargetCPU = ParsedAttr.Architecture ;
// Now populate the feature map, first with the TargetCPU which is either
// the default or a new one from the target attribute string. Then we'll use
// the passed in features (FeaturesAsWritten) along with the new ones from
// the attribute.
- Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.first);
+ Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
+ ParsedAttr.Features);
} else {
Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
Target.getTargetOpts().Features);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
index 36f6785..b162e72 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
@@ -28,6 +28,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SanitizerBlacklist.h"
+#include "clang/Basic/XRayLists.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -314,14 +315,9 @@ private:
/// This is a list of deferred decls which we have seen that *are* actually
/// referenced. These get code generated when the module is done.
- struct DeferredGlobal {
- DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {}
- llvm::TrackingVH<llvm::GlobalValue> GV;
- GlobalDecl GD;
- };
- std::vector<DeferredGlobal> DeferredDeclsToEmit;
- void addDeferredDeclToEmit(llvm::GlobalValue *GV, GlobalDecl GD) {
- DeferredDeclsToEmit.emplace_back(GV, GD);
+ std::vector<GlobalDecl> DeferredDeclsToEmit;
+ void addDeferredDeclToEmit(GlobalDecl GD) {
+ DeferredDeclsToEmit.emplace_back(GD);
}
/// List of alias we have emitted. Used to make sure that what they point to
@@ -345,11 +341,14 @@ private:
/// A queue of (optional) vtables to consider emitting.
std::vector<const CXXRecordDecl*> DeferredVTables;
+ /// A queue of (optional) vtables that may be emitted opportunistically.
+ std::vector<const CXXRecordDecl *> OpportunisticVTables;
+
/// List of global values which are required to be present in the object file;
/// bitcast to i8*. This is used for forcing visibility of symbols which may
/// otherwise be optimized out.
- std::vector<llvm::WeakVH> LLVMUsed;
- std::vector<llvm::WeakVH> LLVMCompilerUsed;
+ std::vector<llvm::WeakTrackingVH> LLVMUsed;
+ std::vector<llvm::WeakTrackingVH> LLVMCompilerUsed;
/// Store the list of global constructors and their respective priorities to
/// be emitted when the translation unit is complete.
@@ -420,7 +419,7 @@ private:
SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits;
/// Global destructor functions and arguments that need to run on termination.
- std::vector<std::pair<llvm::WeakVH,llvm::Constant*> > CXXGlobalDtors;
+ std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> CXXGlobalDtors;
/// \brief The complete set of modules that has been imported.
llvm::SetVector<clang::Module *> ImportedModules;
@@ -430,14 +429,14 @@ private:
llvm::SmallPtrSet<clang::Module *, 16> EmittedModuleInitializers;
/// \brief A vector of metadata strings.
- SmallVector<llvm::Metadata *, 16> LinkerOptionsMetadata;
+ SmallVector<llvm::MDNode *, 16> LinkerOptionsMetadata;
/// @name Cache for Objective-C runtime types
/// @{
/// Cached reference to the class for constant strings. This value has type
/// int * but is actually an Obj-C class pointer.
- llvm::WeakVH CFConstantStringClassRef;
+ llvm::WeakTrackingVH CFConstantStringClassRef;
/// \brief The type used to describe the state of a fast enumeration in
/// Objective-C's for..in loop.
@@ -454,7 +453,7 @@ private:
bool isTriviallyRecursive(const FunctionDecl *F);
bool shouldEmitFunction(GlobalDecl GD);
-
+ bool shouldOpportunisticallyEmitVTables();
/// Map used to be sure we don't emit the same CompoundLiteral twice.
llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *>
EmittedCompoundLiterals;
@@ -546,6 +545,10 @@ public:
return *ObjCData;
}
+ // Version checking function, used to implement ObjC's @available:
+ // i32 @__isOSVersionAtLeast(i32, i32, i32)
+ llvm::Constant *IsOSVersionAtLeastFn = nullptr;
+
InstrProfStats &getPGOStats() { return PGOStats; }
llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
@@ -707,11 +710,15 @@ public:
SourceLocation Loc = SourceLocation(),
bool TLS = false);
- /// Return the address space of the underlying global variable for D, as
+ /// Return the AST address space of the underlying global variable for D, as
/// determined by its declaration. Normally this is the same as the address
/// space of D's type, but in CUDA, address spaces are associated with
- /// declarations, not types.
- unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace);
+ /// declarations, not types. If D is nullptr, return the default address
+ /// space for global variable.
+ ///
+ /// For languages without explicit address spaces, if D has default address
+ /// space, target-specific global or constant address space may be returned.
+ unsigned GetGlobalVarAddressSpace(const VarDecl *D);
/// Return the llvm::Constant for the address of the given global variable.
/// If Ty is non-null and if the global doesn't exist, then it will be created
@@ -906,14 +913,13 @@ public:
/// Create a new runtime function with the specified type and name.
llvm::Constant *
CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name,
- llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(),
+ llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
bool Local = false);
/// Create a new compiler builtin function with the specified type and name.
- llvm::Constant *CreateBuiltinFunction(llvm::FunctionType *Ty,
- StringRef Name,
- llvm::AttributeSet ExtraAttrs =
- llvm::AttributeSet());
+ llvm::Constant *
+ CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name,
+ llvm::AttributeList ExtraAttrs = llvm::AttributeList());
/// Create a new runtime global variable with the specified type and name.
llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
StringRef Name);
@@ -1016,11 +1022,31 @@ public:
/// \param CalleeInfo - The callee information these attributes are being
/// constructed for. If valid, the attributes applied to this decl may
/// contribute to the function attributes and calling convention.
- /// \param PAL [out] - On return, the attribute list to use.
+ /// \param Attrs [out] - On return, the attribute list to use.
/// \param CallingConv [out] - On return, the LLVM calling convention to use.
void ConstructAttributeList(StringRef Name, const CGFunctionInfo &Info,
- CGCalleeInfo CalleeInfo, AttributeListType &PAL,
- unsigned &CallingConv, bool AttrOnCallSite);
+ CGCalleeInfo CalleeInfo,
+ llvm::AttributeList &Attrs, unsigned &CallingConv,
+ bool AttrOnCallSite);
+
+ /// Adds attributes to F according to our CodeGenOptions and LangOptions, as
+ /// though we had emitted it ourselves. We remove any attributes on F that
+ /// conflict with the attributes we add here.
+ ///
+ /// This is useful for adding attrs to bitcode modules that you want to link
+ /// with but don't control, such as CUDA's libdevice. When linking with such
+ /// a bitcode library, you might want to set e.g. its functions'
+ /// "unsafe-fp-math" attribute to match the attr of the functions you're
+ /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of
+ /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
+ /// will propagate unsafe-fp-math=false up to every transitive caller of a
+ /// function in the bitcode library!
+ ///
+ /// With the exception of fast-math attrs, this will only make the attributes
+ /// on the function more conservative. But it's unsafe to call this on a
+ /// function which relies on particular fast-math attributes for correctness.
+ /// It's up to you to ensure that this is safe.
+ void AddDefaultFnAttrs(llvm::Function &F);
// Fills in the supplied string map with the set of target features for the
// passed in function.
@@ -1036,13 +1062,14 @@ public:
void RefreshTypeCacheForClass(const CXXRecordDecl *Class);
- /// \brief Appends Opts to the "Linker Options" metadata value.
+ /// \brief Appends Opts to the "llvm.linker.options" metadata value.
void AppendLinkerOptions(StringRef Opts);
/// \brief Appends a detect mismatch command to the linker options.
void AddDetectMismatch(StringRef Name, StringRef Value);
- /// \brief Appends a dependent lib to the "Linker Options" metadata value.
+ /// \brief Appends a dependent lib to the "llvm.linker.options" metadata
+ /// value.
void AddDependentLib(StringRef Lib);
llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD);
@@ -1103,6 +1130,12 @@ public:
QualType Ty,
StringRef Category = StringRef()) const;
+ /// Imbue XRay attributes to a function, applying the always/never attribute
+ /// lists in the process. Returns true if we did imbue attributes this way,
+ /// false otherwise.
+ bool imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
+ StringRef Category = StringRef()) const;
+
SanitizerMetadata *getSanitizerMetadata() {
return SanitizerMD.get();
}
@@ -1176,7 +1209,7 @@ public:
void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset,
const CXXRecordDecl *RD);
- /// \breif Get the declaration of std::terminate for the platform.
+ /// \brief Get the declaration of std::terminate for the platform.
llvm::Constant *getTerminateFn();
llvm::SanitizerStatReport &getSanStats();
@@ -1190,12 +1223,11 @@ public:
llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT);
private:
- llvm::Constant *
- GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
- bool ForVTable, bool DontDefer = false,
- bool IsThunk = false,
- llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(),
- ForDefinition_t IsForDefinition = NotForDefinition);
+ llvm::Constant *GetOrCreateLLVMFunction(
+ StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
+ bool DontDefer = false, bool IsThunk = false,
+ llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
+ ForDefinition_t IsForDefinition = NotForDefinition);
llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
llvm::PointerType *PTy,
@@ -1222,7 +1254,6 @@ private:
void EmitDeclContext(const DeclContext *DC);
void EmitLinkageSpec(const LinkageSpecDecl *D);
- void CompleteDIClassType(const CXXMethodDecl* D);
/// \brief Emit the function that initializes C++ thread_local variables.
void EmitCXXThreadLocalInitFunc();
@@ -1255,6 +1286,12 @@ private:
/// Emit any needed decls for which code generation was deferred.
void EmitDeferred();
+ /// Try to emit external vtables as available_externally if they have emitted
+ /// all inlined virtual functions. It runs after EmitDeferred() and therefore
+ /// is not allowed to create new references to things that need to be emitted
+ /// lazily.
+ void EmitVTablesOpportunistically();
+
/// Call replaceAllUsesWith on all pairs in Replacements.
void applyReplacements();
@@ -1266,6 +1303,10 @@ private:
/// Emit any vtables which we deferred and still have a use for.
void EmitDeferredVTables();
+ /// Emit a dummy function that reference a CoreFoundation symbol when
+ /// @available is used on Darwin.
+ void emitAtAvailableLinkGuard();
+
/// Emit the llvm.used and llvm.compiler.used metadata.
void emitLLVMUsed();
@@ -1284,6 +1325,9 @@ private:
/// Emits target specific Metadata for global declarations.
void EmitTargetMetadata();
+ /// Emits OpenCL specific Metadata e.g. OpenCL version.
+ void EmitOpenCLMetadata();
+
/// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and
/// .gcda files in a way that persists in .bc files.
void EmitCoverageFile();
@@ -1304,6 +1348,12 @@ private:
/// Check whether we can use a "simpler", more core exceptions personality
/// function.
void SimplifyPersonality();
+
+ /// Helper function for ConstructAttributeList and AddDefaultFnAttrs.
+ /// Constructs an AttrList for a function with the given properties.
+ void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs);
};
} // end namespace CodeGen
} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
index c6c3fa4..c3d66c1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -612,11 +612,14 @@ uint64_t PGOHash::finalize() {
llvm::MD5::MD5Result Result;
MD5.final(Result);
using namespace llvm::support;
- return endian::read<uint64_t, little, unaligned>(Result);
+ return Result.low();
}
void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
const Decl *D = GD.getDecl();
+ if (!D->hasBody())
+ return;
+
bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
if (!InstrumentRegions && !PGOReader)
@@ -626,12 +629,14 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
// Constructors and destructors may be represented by several functions in IR.
// If so, instrument only base variant, others are implemented by delegation
// to the base one, it would be counted twice otherwise.
- if (CGM.getTarget().getCXXABI().hasConstructorVariants() &&
- ((isa<CXXConstructorDecl>(GD.getDecl()) &&
- GD.getCtorType() != Ctor_Base) ||
- (isa<CXXDestructorDecl>(GD.getDecl()) &&
- GD.getDtorType() != Dtor_Base))) {
+ if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
+ if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base)
return;
+
+ if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
+ if (GD.getCtorType() != Ctor_Base &&
+ CodeGenFunction::IsConstructorDelegationValid(CCD))
+ return;
}
CGM.ClearUnusedCoverageMapping(D);
setFuncName(Fn);
@@ -664,7 +669,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) {
}
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
- if (SkipCoverageMapping)
+ if (!D->getBody())
return true;
// Don't map the functions in system headers.
@@ -737,7 +742,8 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
Fn->setEntryCount(FunctionCount);
}
-void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
+void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
+ llvm::Value *StepV) {
if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
return;
if (!Builder.GetInsertBlock())
@@ -745,11 +751,18 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
unsigned Counter = (*RegionCounterMap)[S];
auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
- Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
- {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
- Builder.getInt64(FunctionHash),
- Builder.getInt32(NumRegionCounters),
- Builder.getInt32(Counter)});
+
+ llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+ Builder.getInt64(FunctionHash),
+ Builder.getInt32(NumRegionCounters),
+ Builder.getInt32(Counter), StepV};
+ if (!StepV)
+ Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
+ makeArrayRef(Args, 4));
+ else
+ Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
+ makeArrayRef(Args));
}
// This method either inserts a call to the profile run-time during
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
index 4f229cd..0759e65 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
@@ -40,14 +40,11 @@ private:
std::unique_ptr<llvm::InstrProfRecord> ProfRecord;
std::vector<uint64_t> RegionCounts;
uint64_t CurrentRegionCount;
- /// \brief A flag that is set to true when this function doesn't need
- /// to have coverage mapping data.
- bool SkipCoverageMapping;
public:
CodeGenPGO(CodeGenModule &CGM)
- : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0),
- FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {}
+ : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0),
+ CurrentRegionCount(0) {}
/// Whether or not we have PGO region data for the current function. This is
/// false both when we have no data at all and when our data has been
@@ -105,7 +102,8 @@ private:
void emitCounterRegionMapping(const Decl *D);
public:
- void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S);
+ void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
+ llvm::Value *StepV);
/// Return the region count for the counter at the given index.
uint64_t getRegionCount(const Stmt *S) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
index 04224e7..8a75a55 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -139,6 +139,12 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) {
}
}
+ // C++1z [basic.lval]p10: "If a program attempts to access the stored value of
+ // an object through a glvalue of other than one of the following types the
+ // behavior is undefined: [...] a char, unsigned char, or std::byte type."
+ if (Ty->isStdByteType())
+ return MetadataCache[Ty] = getChar();
+
// Handle pointers.
// TODO: Implement C++'s type "similarity" and consider dis-"similar"
// pointers distinct.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h
index 47e26bc..450eab4 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -60,6 +60,12 @@ struct CodeGenTypeCache {
llvm::PointerType *Int8PtrPtrTy;
};
+ /// void* in alloca address space
+ union {
+ llvm::PointerType *AllocaVoidPtrTy;
+ llvm::PointerType *AllocaInt8PtrTy;
+ };
+
/// The size and alignment of the builtin C type 'int'. This comes
/// up enough in various ABI lowering tasks to be worth pre-computing.
union {
@@ -88,6 +94,8 @@ struct CodeGenTypeCache {
unsigned char SizeAlignInBytes;
};
+ unsigned ASTAllocaAddressSpace;
+
CharUnits getSizeSize() const {
return CharUnits::fromQuantity(SizeSizeInBytes);
}
@@ -105,6 +113,8 @@ struct CodeGenTypeCache {
llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
llvm::CallingConv::ID BuiltinCC;
llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; }
+
+ unsigned getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; }
};
} // end namespace CodeGen
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
index adb40c8..9306c4f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -44,6 +44,10 @@ CodeGenTypes::~CodeGenTypes() {
delete &*I++;
}
+const CodeGenOptions &CodeGenTypes::getCodeGenOpts() const {
+ return CGM.getCodeGenOpts();
+}
+
void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
llvm::StructType *Ty,
StringRef suffix) {
@@ -472,7 +476,6 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
- case BuiltinType::OCLNDRange:
case BuiltinType::OCLReserveID:
ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
break;
@@ -487,10 +490,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
break;
}
case Type::Auto:
- llvm_unreachable("Unexpected undeduced auto type!");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("Unexpected undeduced type!");
case Type::Complex: {
llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType());
- ResultType = llvm::StructType::get(EltTy, EltTy, nullptr);
+ ResultType = llvm::StructType::get(EltTy, EltTy);
break;
}
case Type::LValueReference:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
index 2ce6591..9d0e3de 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
@@ -178,6 +178,7 @@ public:
const TargetInfo &getTarget() const { return Target; }
CGCXXABI &getCXXABI() const { return TheCXXABI; }
llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); }
+ const CodeGenOptions &getCodeGenOpts() const;
/// ConvertType - Convert type T into a llvm::Type.
llvm::Type *ConvertType(QualType T);
@@ -303,11 +304,14 @@ public:
const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args,
const CXXConstructorDecl *D,
CXXCtorType CtorKind,
- unsigned ExtraArgs);
+ unsigned ExtraPrefixArgs,
+ unsigned ExtraSuffixArgs,
+ bool PassProtoArgs = true);
const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args,
const FunctionProtoType *type,
- RequiredArgs required);
+ RequiredArgs required,
+ unsigned numPrefixArgs);
const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD);
const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD,
CXXCtorType CT);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ConstantBuilder.h b/contrib/llvm/tools/clang/lib/CodeGen/ConstantBuilder.h
deleted file mode 100644
index 40b34a9..0000000
--- a/contrib/llvm/tools/clang/lib/CodeGen/ConstantBuilder.h
+++ /dev/null
@@ -1,444 +0,0 @@
-//===----- ConstantBuilder.h - Builder for LLVM IR constants ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class provides a convenient interface for building complex
-// global initializers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H
-#define LLVM_CLANG_LIB_CODEGEN_CONSTANTBUILDER_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Constants.h"
-
-#include "CodeGenModule.h"
-
-#include <vector>
-
-namespace clang {
-namespace CodeGen {
-
-class ConstantStructBuilder;
-class ConstantArrayBuilder;
-
-/// A convenience builder class for complex constant initializers,
-/// especially for anonymous global structures used by various language
-/// runtimes.
-///
-/// The basic usage pattern is expected to be something like:
-/// ConstantInitBuilder builder(CGM);
-/// auto toplevel = builder.beginStruct();
-/// toplevel.addInt(CGM.SizeTy, widgets.size());
-/// auto widgetArray = builder.beginArray();
-/// for (auto &widget : widgets) {
-/// auto widgetDesc = widgetArray.beginStruct();
-/// widgetDesc.addInt(CGM.SizeTy, widget.getPower());
-/// widgetDesc.add(CGM.GetAddrOfConstantString(widget.getName()));
-/// widgetDesc.add(CGM.GetAddrOfGlobal(widget.getInitializerDecl()));
-/// widgetArray.add(widgetDesc.finish());
-/// }
-/// toplevel.add(widgetArray.finish());
-/// auto global = toplevel.finishAndCreateGlobal("WIDGET_LIST", Align,
-/// /*constant*/ true);
-class ConstantInitBuilder {
- struct SelfReference {
- llvm::GlobalVariable *Dummy;
- llvm::SmallVector<llvm::Constant*, 4> Indices;
-
- SelfReference(llvm::GlobalVariable *dummy) : Dummy(dummy) {}
- };
- CodeGenModule &CGM;
- llvm::SmallVector<llvm::Constant*, 16> Buffer;
- std::vector<SelfReference> SelfReferences;
- bool Frozen = false;
-
-public:
- explicit ConstantInitBuilder(CodeGenModule &CGM) : CGM(CGM) {}
-
- ~ConstantInitBuilder() {
- assert(Buffer.empty() && "didn't claim all values out of buffer");
- }
-
- class AggregateBuilderBase {
- protected:
- ConstantInitBuilder &Builder;
- AggregateBuilderBase *Parent;
- size_t Begin;
- bool Finished = false;
- bool Frozen = false;
-
- llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() {
- return Builder.Buffer;
- }
-
- const llvm::SmallVectorImpl<llvm::Constant*> &getBuffer() const {
- return Builder.Buffer;
- }
-
- AggregateBuilderBase(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent)
- : Builder(builder), Parent(parent), Begin(builder.Buffer.size()) {
- if (parent) {
- assert(!parent->Frozen && "parent already has child builder active");
- parent->Frozen = true;
- } else {
- assert(!builder.Frozen && "builder already has child builder active");
- builder.Frozen = true;
- }
- }
-
- ~AggregateBuilderBase() {
- assert(Finished && "didn't finish aggregate builder");
- }
-
- void markFinished() {
- assert(!Frozen && "child builder still active");
- assert(!Finished && "builder already finished");
- Finished = true;
- if (Parent) {
- assert(Parent->Frozen &&
- "parent not frozen while child builder active");
- Parent->Frozen = false;
- } else {
- assert(Builder.Frozen &&
- "builder not frozen while child builder active");
- Builder.Frozen = false;
- }
- }
-
- public:
- // Not copyable.
- AggregateBuilderBase(const AggregateBuilderBase &) = delete;
- AggregateBuilderBase &operator=(const AggregateBuilderBase &) = delete;
-
- // Movable, mostly to allow returning. But we have to write this out
- // properly to satisfy the assert in the destructor.
- AggregateBuilderBase(AggregateBuilderBase &&other)
- : Builder(other.Builder), Parent(other.Parent), Begin(other.Begin),
- Finished(other.Finished), Frozen(other.Frozen) {
- other.Finished = false;
- }
- AggregateBuilderBase &operator=(AggregateBuilderBase &&other) = delete;
-
- /// Abandon this builder completely.
- void abandon() {
- markFinished();
- auto &buffer = Builder.Buffer;
- buffer.erase(buffer.begin() + Begin, buffer.end());
- }
-
- /// Add a new value to this initializer.
- void add(llvm::Constant *value) {
- assert(value && "adding null value to constant initializer");
- assert(!Finished && "cannot add more values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- Builder.Buffer.push_back(value);
- }
-
- /// Add an integer value of type size_t.
- void addSize(CharUnits size) {
- add(Builder.CGM.getSize(size));
- }
-
- /// Add an integer value of a specific type.
- void addInt(llvm::IntegerType *intTy, uint64_t value,
- bool isSigned = false) {
- add(llvm::ConstantInt::get(intTy, value, isSigned));
- }
-
- /// Add a null pointer of a specific type.
- void addNullPointer(llvm::PointerType *ptrTy) {
- add(llvm::ConstantPointerNull::get(ptrTy));
- }
-
- /// Add a bitcast of a value to a specific type.
- void addBitCast(llvm::Constant *value, llvm::Type *type) {
- add(llvm::ConstantExpr::getBitCast(value, type));
- }
-
- /// Add a bunch of new values to this initializer.
- void addAll(ArrayRef<llvm::Constant *> values) {
- assert(!Finished && "cannot add more values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- Builder.Buffer.append(values.begin(), values.end());
- }
-
- /// An opaque class to hold the abstract position of a placeholder.
- class PlaceholderPosition {
- size_t Index;
- friend class AggregateBuilderBase;
- PlaceholderPosition(size_t index) : Index(index) {}
- };
-
- /// Add a placeholder value to the structure. The returned position
- /// can be used to set the value later; it will not be invalidated by
- /// any intermediate operations except (1) filling the same position or
- /// (2) finishing the entire builder.
- ///
- /// This is useful for emitting certain kinds of structure which
- /// contain some sort of summary field, generaly a count, before any
- /// of the data. By emitting a placeholder first, the structure can
- /// be emitted eagerly.
- PlaceholderPosition addPlaceholder() {
- assert(!Finished && "cannot add more values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- Builder.Buffer.push_back(nullptr);
- return Builder.Buffer.size() - 1;
- }
-
- /// Fill a previously-added placeholder.
- void fillPlaceholderWithInt(PlaceholderPosition position,
- llvm::IntegerType *type, uint64_t value,
- bool isSigned = false) {
- fillPlaceholder(position, llvm::ConstantInt::get(type, value, isSigned));
- }
-
- /// Fill a previously-added placeholder.
- void fillPlaceholder(PlaceholderPosition position, llvm::Constant *value) {
- assert(!Finished && "cannot change values after finishing builder");
- assert(!Frozen && "cannot add values while subbuilder is active");
- llvm::Constant *&slot = Builder.Buffer[position.Index];
- assert(slot == nullptr && "placeholder already filled");
- slot = value;
- }
-
- /// Produce an address which will eventually point to the the next
- /// position to be filled. This is computed with an indexed
- /// getelementptr rather than by computing offsets.
- ///
- /// The returned pointer will have type T*, where T is the given
- /// position.
- llvm::Constant *getAddrOfCurrentPosition(llvm::Type *type) {
- // Make a global variable. We will replace this with a GEP to this
- // position after installing the initializer.
- auto dummy =
- new llvm::GlobalVariable(Builder.CGM.getModule(), type, true,
- llvm::GlobalVariable::PrivateLinkage,
- nullptr, "");
- Builder.SelfReferences.emplace_back(dummy);
- auto &entry = Builder.SelfReferences.back();
- (void) getGEPIndicesToCurrentPosition(entry.Indices);
- return dummy;
- }
-
- ArrayRef<llvm::Constant*> getGEPIndicesToCurrentPosition(
- llvm::SmallVectorImpl<llvm::Constant*> &indices) {
- getGEPIndicesTo(indices, Builder.Buffer.size());
- return indices;
- }
-
- ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr);
- ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr);
-
- private:
- void getGEPIndicesTo(llvm::SmallVectorImpl<llvm::Constant*> &indices,
- size_t position) const {
- // Recurse on the parent builder if present.
- if (Parent) {
- Parent->getGEPIndicesTo(indices, Begin);
-
- // Otherwise, add an index to drill into the first level of pointer.
- } else {
- assert(indices.empty());
- indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0));
- }
-
- assert(position >= Begin);
- // We have to use i32 here because struct GEPs demand i32 indices.
- // It's rather unlikely to matter in practice.
- indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty,
- position - Begin));
- }
- };
-
- template <class Impl>
- class AggregateBuilder : public AggregateBuilderBase {
- protected:
- AggregateBuilder(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent)
- : AggregateBuilderBase(builder, parent) {}
-
- Impl &asImpl() { return *static_cast<Impl*>(this); }
-
- public:
- /// Given that this builder was created by beginning an array or struct
- /// component on the given parent builder, finish the array/struct
- /// component and add it to the parent.
- ///
- /// It is an intentional choice that the parent is passed in explicitly
- /// despite it being redundant with information already kept in the
- /// builder. This aids in readability by making it easier to find the
- /// places that add components to a builder, as well as "bookending"
- /// the sub-builder more explicitly.
- void finishAndAddTo(AggregateBuilderBase &parent) {
- assert(Parent == &parent && "adding to non-parent builder");
- parent.add(asImpl().finishImpl());
- }
-
- /// Given that this builder was created by beginning an array or struct
- /// directly on a ConstantInitBuilder, finish the array/struct and
- /// create a global variable with it as the initializer.
- template <class... As>
- llvm::GlobalVariable *finishAndCreateGlobal(As &&...args) {
- assert(!Parent && "finishing non-root builder");
- return Builder.createGlobal(asImpl().finishImpl(),
- std::forward<As>(args)...);
- }
-
- /// Given that this builder was created by beginning an array or struct
- /// directly on a ConstantInitBuilder, finish the array/struct and
- /// set it as the initializer of the given global variable.
- void finishAndSetAsInitializer(llvm::GlobalVariable *global) {
- assert(!Parent && "finishing non-root builder");
- return Builder.setGlobalInitializer(global, asImpl().finishImpl());
- }
- };
-
- ConstantArrayBuilder beginArray(llvm::Type *eltTy = nullptr);
-
- ConstantStructBuilder beginStruct(llvm::StructType *structTy = nullptr);
-
-private:
- llvm::GlobalVariable *createGlobal(llvm::Constant *initializer,
- const llvm::Twine &name,
- CharUnits alignment,
- bool constant = false,
- llvm::GlobalValue::LinkageTypes linkage
- = llvm::GlobalValue::InternalLinkage,
- unsigned addressSpace = 0) {
- auto GV = new llvm::GlobalVariable(CGM.getModule(),
- initializer->getType(),
- constant,
- linkage,
- initializer,
- name,
- /*insert before*/ nullptr,
- llvm::GlobalValue::NotThreadLocal,
- addressSpace);
- GV->setAlignment(alignment.getQuantity());
- resolveSelfReferences(GV);
- return GV;
- }
-
- void setGlobalInitializer(llvm::GlobalVariable *GV,
- llvm::Constant *initializer) {
- GV->setInitializer(initializer);
- resolveSelfReferences(GV);
- }
-
- void resolveSelfReferences(llvm::GlobalVariable *GV) {
- for (auto &entry : SelfReferences) {
- llvm::Constant *resolvedReference =
- llvm::ConstantExpr::getInBoundsGetElementPtr(
- GV->getValueType(), GV, entry.Indices);
- entry.Dummy->replaceAllUsesWith(resolvedReference);
- entry.Dummy->eraseFromParent();
- }
- }
-};
-
-/// A helper class of ConstantInitBuilder, used for building constant
-/// array initializers.
-class ConstantArrayBuilder
- : public ConstantInitBuilder::AggregateBuilder<ConstantArrayBuilder> {
- llvm::Type *EltTy;
- friend class ConstantInitBuilder;
- template <class Impl> friend class ConstantInitBuilder::AggregateBuilder;
- ConstantArrayBuilder(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent, llvm::Type *eltTy)
- : AggregateBuilder(builder, parent), EltTy(eltTy) {}
-public:
- size_t size() const {
- assert(!Finished);
- assert(!Frozen);
- assert(Begin <= getBuffer().size());
- return getBuffer().size() - Begin;
- }
-
- bool empty() const {
- return size() == 0;
- }
-
-private:
- /// Form an array constant from the values that have been added to this
- /// builder.
- llvm::Constant *finishImpl() {
- markFinished();
-
- auto &buffer = getBuffer();
- assert((Begin < buffer.size() ||
- (Begin == buffer.size() && EltTy))
- && "didn't add any array elements without element type");
- auto elts = llvm::makeArrayRef(buffer).slice(Begin);
- auto eltTy = EltTy ? EltTy : elts[0]->getType();
- auto type = llvm::ArrayType::get(eltTy, elts.size());
- auto constant = llvm::ConstantArray::get(type, elts);
- buffer.erase(buffer.begin() + Begin, buffer.end());
- return constant;
- }
-};
-
-inline ConstantArrayBuilder
-ConstantInitBuilder::beginArray(llvm::Type *eltTy) {
- return ConstantArrayBuilder(*this, nullptr, eltTy);
-}
-
-inline ConstantArrayBuilder
-ConstantInitBuilder::AggregateBuilderBase::beginArray(llvm::Type *eltTy) {
- return ConstantArrayBuilder(Builder, this, eltTy);
-}
-
-/// A helper class of ConstantInitBuilder, used for building constant
-/// struct initializers.
-class ConstantStructBuilder
- : public ConstantInitBuilder::AggregateBuilder<ConstantStructBuilder> {
- llvm::StructType *Ty;
- friend class ConstantInitBuilder;
- template <class Impl> friend class ConstantInitBuilder::AggregateBuilder;
- ConstantStructBuilder(ConstantInitBuilder &builder,
- AggregateBuilderBase *parent, llvm::StructType *ty)
- : AggregateBuilder(builder, parent), Ty(ty) {}
-
- /// Finish the struct.
- llvm::Constant *finishImpl() {
- markFinished();
-
- auto &buffer = getBuffer();
- assert(Begin < buffer.size() && "didn't add any struct elements?");
- auto elts = llvm::makeArrayRef(buffer).slice(Begin);
-
- llvm::Constant *constant;
- if (Ty) {
- constant = llvm::ConstantStruct::get(Ty, elts);
- } else {
- constant = llvm::ConstantStruct::getAnon(elts, /*packed*/ false);
- }
-
- buffer.erase(buffer.begin() + Begin, buffer.end());
- return constant;
- }
-};
-
-inline ConstantStructBuilder
-ConstantInitBuilder::beginStruct(llvm::StructType *structTy) {
- return ConstantStructBuilder(*this, nullptr, structTy);
-}
-
-inline ConstantStructBuilder
-ConstantInitBuilder::AggregateBuilderBase::beginStruct(
- llvm::StructType *structTy) {
- return ConstantStructBuilder(Builder, this, structTy);
-}
-
-} // end namespace CodeGen
-} // end namespace clang
-
-#endif
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ConstantInitBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ConstantInitBuilder.cpp
new file mode 100644
index 0000000..7f8d809
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ConstantInitBuilder.cpp
@@ -0,0 +1,280 @@
+//===--- ConstantInitBuilder.cpp - Global initializer builder -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines out-of-line routines for building initializers for
+// global variables, in particular the kind of globals that are implicitly
+// introduced by various language ABIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "CodeGenModule.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+llvm::Type *ConstantInitFuture::getType() const {
+ assert(Data && "dereferencing null future");
+ if (Data.is<llvm::Constant*>()) {
+ return Data.get<llvm::Constant*>()->getType();
+ } else {
+ return Data.get<ConstantInitBuilderBase*>()->Buffer[0]->getType();
+ }
+}
+
+void ConstantInitFuture::abandon() {
+ assert(Data && "abandoning null future");
+ if (auto builder = Data.dyn_cast<ConstantInitBuilderBase*>()) {
+ builder->abandon(0);
+ }
+ Data = nullptr;
+}
+
+void ConstantInitFuture::installInGlobal(llvm::GlobalVariable *GV) {
+ assert(Data && "installing null future");
+ if (Data.is<llvm::Constant*>()) {
+ GV->setInitializer(Data.get<llvm::Constant*>());
+ } else {
+ auto &builder = *Data.get<ConstantInitBuilderBase*>();
+ assert(builder.Buffer.size() == 1);
+ builder.setGlobalInitializer(GV, builder.Buffer[0]);
+ builder.Buffer.clear();
+ Data = nullptr;
+ }
+}
+
+ConstantInitFuture
+ConstantInitBuilderBase::createFuture(llvm::Constant *initializer) {
+ assert(Buffer.empty() && "buffer not current empty");
+ Buffer.push_back(initializer);
+ return ConstantInitFuture(this);
+}
+
+// Only used in this file.
+inline ConstantInitFuture::ConstantInitFuture(ConstantInitBuilderBase *builder)
+ : Data(builder) {
+ assert(!builder->Frozen);
+ assert(builder->Buffer.size() == 1);
+ assert(builder->Buffer[0] != nullptr);
+}
+
+llvm::GlobalVariable *
+ConstantInitBuilderBase::createGlobal(llvm::Constant *initializer,
+ const llvm::Twine &name,
+ CharUnits alignment,
+ bool constant,
+ llvm::GlobalValue::LinkageTypes linkage,
+ unsigned addressSpace) {
+ auto GV = new llvm::GlobalVariable(CGM.getModule(),
+ initializer->getType(),
+ constant,
+ linkage,
+ initializer,
+ name,
+ /*insert before*/ nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ addressSpace);
+ GV->setAlignment(alignment.getQuantity());
+ resolveSelfReferences(GV);
+ return GV;
+}
+
+void ConstantInitBuilderBase::setGlobalInitializer(llvm::GlobalVariable *GV,
+ llvm::Constant *initializer){
+ GV->setInitializer(initializer);
+
+ if (!SelfReferences.empty())
+ resolveSelfReferences(GV);
+}
+
+void ConstantInitBuilderBase::resolveSelfReferences(llvm::GlobalVariable *GV) {
+ for (auto &entry : SelfReferences) {
+ llvm::Constant *resolvedReference =
+ llvm::ConstantExpr::getInBoundsGetElementPtr(
+ GV->getValueType(), GV, entry.Indices);
+ auto dummy = entry.Dummy;
+ dummy->replaceAllUsesWith(resolvedReference);
+ dummy->eraseFromParent();
+ }
+ SelfReferences.clear();
+}
+
+void ConstantInitBuilderBase::abandon(size_t newEnd) {
+ // Remove all the entries we've added.
+ Buffer.erase(Buffer.begin() + newEnd, Buffer.end());
+
+ // If we're abandoning all the way to the beginning, destroy
+ // all the self-references, because we might not get another
+ // opportunity.
+ if (newEnd == 0) {
+ for (auto &entry : SelfReferences) {
+ auto dummy = entry.Dummy;
+ dummy->replaceAllUsesWith(llvm::UndefValue::get(dummy->getType()));
+ dummy->eraseFromParent();
+ }
+ SelfReferences.clear();
+ }
+}
+
+void ConstantAggregateBuilderBase::addSize(CharUnits size) {
+ add(Builder.CGM.getSize(size));
+}
+
+llvm::Constant *
+ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType,
+ llvm::Constant *target) {
+ // Compute the address of the relative-address slot.
+ auto base = getAddrOfCurrentPosition(offsetType);
+
+ // Subtract.
+ base = llvm::ConstantExpr::getPtrToInt(base, Builder.CGM.IntPtrTy);
+ target = llvm::ConstantExpr::getPtrToInt(target, Builder.CGM.IntPtrTy);
+ llvm::Constant *offset = llvm::ConstantExpr::getSub(target, base);
+
+ // Truncate to the relative-address type if necessary.
+ if (Builder.CGM.IntPtrTy != offsetType) {
+ offset = llvm::ConstantExpr::getTrunc(offset, offsetType);
+ }
+
+ return offset;
+}
+
+llvm::Constant *
+ConstantAggregateBuilderBase::getAddrOfCurrentPosition(llvm::Type *type) {
+ // Make a global variable. We will replace this with a GEP to this
+ // position after installing the initializer.
+ auto dummy =
+ new llvm::GlobalVariable(Builder.CGM.getModule(), type, true,
+ llvm::GlobalVariable::PrivateLinkage,
+ nullptr, "");
+ Builder.SelfReferences.emplace_back(dummy);
+ auto &entry = Builder.SelfReferences.back();
+ (void) getGEPIndicesToCurrentPosition(entry.Indices);
+ return dummy;
+}
+
+void ConstantAggregateBuilderBase::getGEPIndicesTo(
+ llvm::SmallVectorImpl<llvm::Constant*> &indices,
+ size_t position) const {
+ // Recurse on the parent builder if present.
+ if (Parent) {
+ Parent->getGEPIndicesTo(indices, Begin);
+
+ // Otherwise, add an index to drill into the first level of pointer.
+ } else {
+ assert(indices.empty());
+ indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0));
+ }
+
+ assert(position >= Begin);
+ // We have to use i32 here because struct GEPs demand i32 indices.
+ // It's rather unlikely to matter in practice.
+ indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty,
+ position - Begin));
+}
+
+ConstantAggregateBuilderBase::PlaceholderPosition
+ConstantAggregateBuilderBase::addPlaceholderWithSize(llvm::Type *type) {
+ // Bring the offset up to the last field.
+ CharUnits offset = getNextOffsetFromGlobal();
+
+ // Create the placeholder.
+ auto position = addPlaceholder();
+
+ // Advance the offset past that field.
+ auto &layout = Builder.CGM.getDataLayout();
+ if (!Packed)
+ offset = offset.alignTo(CharUnits::fromQuantity(
+ layout.getABITypeAlignment(type)));
+ offset += CharUnits::fromQuantity(layout.getTypeStoreSize(type));
+
+ CachedOffsetEnd = Builder.Buffer.size();
+ CachedOffsetFromGlobal = offset;
+
+ return position;
+}
+
+CharUnits ConstantAggregateBuilderBase::getOffsetFromGlobalTo(size_t end) const{
+ size_t cacheEnd = CachedOffsetEnd;
+ assert(cacheEnd <= end);
+
+ // Fast path: if the cache is valid, just use it.
+ if (cacheEnd == end) {
+ return CachedOffsetFromGlobal;
+ }
+
+ // If the cached range ends before the index at which the current
+ // aggregate starts, recurse for the parent.
+ CharUnits offset;
+ if (cacheEnd < Begin) {
+ assert(cacheEnd == 0);
+ assert(Parent && "Begin != 0 for root builder");
+ cacheEnd = Begin;
+ offset = Parent->getOffsetFromGlobalTo(Begin);
+ } else {
+ offset = CachedOffsetFromGlobal;
+ }
+
+ // Perform simple layout on the elements in cacheEnd..<end.
+ if (cacheEnd != end) {
+ auto &layout = Builder.CGM.getDataLayout();
+ do {
+ llvm::Constant *element = Builder.Buffer[cacheEnd];
+ assert(element != nullptr &&
+ "cannot compute offset when a placeholder is present");
+ llvm::Type *elementType = element->getType();
+ if (!Packed)
+ offset = offset.alignTo(CharUnits::fromQuantity(
+ layout.getABITypeAlignment(elementType)));
+ offset += CharUnits::fromQuantity(layout.getTypeStoreSize(elementType));
+ } while (++cacheEnd != end);
+ }
+
+ // Cache and return.
+ CachedOffsetEnd = cacheEnd;
+ CachedOffsetFromGlobal = offset;
+ return offset;
+}
+
+llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy) {
+ markFinished();
+
+ auto &buffer = getBuffer();
+ assert((Begin < buffer.size() ||
+ (Begin == buffer.size() && eltTy))
+ && "didn't add any array elements without element type");
+ auto elts = llvm::makeArrayRef(buffer).slice(Begin);
+ if (!eltTy) eltTy = elts[0]->getType();
+ auto type = llvm::ArrayType::get(eltTy, elts.size());
+ auto constant = llvm::ConstantArray::get(type, elts);
+ buffer.erase(buffer.begin() + Begin, buffer.end());
+ return constant;
+}
+
+llvm::Constant *
+ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) {
+ markFinished();
+
+ auto &buffer = getBuffer();
+ auto elts = llvm::makeArrayRef(buffer).slice(Begin);
+
+ if (ty == nullptr && elts.empty())
+ ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed);
+
+ llvm::Constant *constant;
+ if (ty) {
+ assert(ty->isPacked() == Packed);
+ constant = llvm::ConstantStruct::get(ty, elts);
+ } else {
+ constant = llvm::ConstantStruct::getAnon(elts, Packed);
+ }
+
+ buffer.erase(buffer.begin() + Begin, buffer.end());
+ return constant;
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
index 5bc9e50..a102347 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -961,12 +961,10 @@ struct CounterCoverageMappingBuilder
}
};
-bool isMachO(const CodeGenModule &CGM) {
- return CGM.getTarget().getTriple().isOSBinFormatMachO();
-}
-
-StringRef getCoverageSection(const CodeGenModule &CGM) {
- return llvm::getInstrProfCoverageSectionName(isMachO(CGM));
+std::string getCoverageSection(const CodeGenModule &CGM) {
+ return llvm::getInstrProfSectionName(
+ llvm::IPSK_covmap,
+ CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
}
std::string normalizeFilename(StringRef Filename) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
index 2435830..c7bdeac 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
@@ -202,7 +202,7 @@ public:
template <std::size_t... Is>
T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) {
// It's important that the restores are emitted in order. The braced init
- // list guarentees that.
+ // list guarantees that.
return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...};
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
index f7a8dd6..bd4cb9a 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -24,8 +24,8 @@
#include "CGVTables.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
-#include "ConstantBuilder.h"
#include "TargetInfo.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/Type.h"
#include "clang/AST/StmtCXX.h"
@@ -62,12 +62,20 @@ public:
bool classifyReturnType(CGFunctionInfo &FI) const override;
+ bool passClassIndirect(const CXXRecordDecl *RD) const {
+ // Clang <= 4 used the pre-C++11 rule, which ignores move operations.
+ // The PS4 platform ABI follows the behavior of Clang 3.2.
+ if (CGM.getCodeGenOpts().getClangABICompat() <=
+ CodeGenOptions::ClangABI::Ver4 ||
+ CGM.getTriple().getOS() == llvm::Triple::PS4)
+ return RD->hasNonTrivialDestructor() ||
+ RD->hasNonTrivialCopyConstructor();
+ return !canCopyArgument(RD);
+ }
+
RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
- // Structures with either a non-trivial destructor or a non-trivial
- // copy constructor are always indirect.
- // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
- // special members.
- if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
+ // If C++ prohibits us from making a copy, pass by address.
+ if (passClassIndirect(RD))
return RAA_Indirect;
return RAA_Default;
}
@@ -207,8 +215,9 @@ public:
void EmitCXXConstructors(const CXXConstructorDecl *D) override;
- void buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
- SmallVectorImpl<CanQualType> &ArgTys) override;
+ AddedStructorArgs
+ buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+ SmallVectorImpl<CanQualType> &ArgTys) override;
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
CXXDtorType DT) const override {
@@ -225,11 +234,10 @@ public:
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
- const CXXConstructorDecl *D,
- CXXCtorType Type, bool ForVirtualBase,
- bool Delegating,
- CallArgList &Args) override;
+ AddedStructorArgs
+ addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating, CallArgList &Args) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
@@ -366,20 +374,30 @@ public:
void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
private:
- bool hasAnyVirtualInlineFunction(const CXXRecordDecl *RD) const {
- const auto &VtableLayout =
- CGM.getItaniumVTableContext().getVTableLayout(RD);
-
- for (const auto &VtableComponent : VtableLayout.vtable_components()) {
- // Skip empty slot.
- if (!VtableComponent.isUsedFunctionPointerKind())
- continue;
-
- const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
- if (Method->getCanonicalDecl()->isInlined())
- return true;
- }
- return false;
+ bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const {
+ const auto &VtableLayout =
+ CGM.getItaniumVTableContext().getVTableLayout(RD);
+
+ for (const auto &VtableComponent : VtableLayout.vtable_components()) {
+ // Skip empty slot.
+ if (!VtableComponent.isUsedFunctionPointerKind())
+ continue;
+
+ const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
+ if (!Method->getCanonicalDecl()->isInlined())
+ continue;
+
+ StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl());
+ auto *Entry = CGM.GetGlobalValue(Name);
+ // This checks if virtual inline function has already been emitted.
+ // Note that it is possible that this inline function would be emitted
+ // after trying to emit vtable speculatively. Because of this we do
+ // an extra pass after emitting all deferred vtables to find and emit
+ // these vtables opportunistically.
+ if (!Entry || Entry->isDeclaration())
+ return true;
+ }
+ return false;
}
bool isVTableHidden(const CXXRecordDecl *RD) const {
@@ -499,7 +517,7 @@ llvm::Type *
ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
if (MPT->isMemberDataPointer())
return CGM.PtrDiffTy;
- return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy, nullptr);
+ return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy);
}
/// In the Itanium and ARM ABIs, method pointers have the form:
@@ -988,10 +1006,8 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (!RD)
return false;
- // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor.
- // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
- // special members.
- if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) {
+ // If C++ prohibits us from making a copy, return by address.
+ if (passClassIndirect(RD)) {
auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
return true;
@@ -1134,8 +1150,8 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) {
// Mark the function as nounwind readonly.
llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
llvm::Attribute::ReadOnly };
- llvm::AttributeSet Attrs = llvm::AttributeSet::get(
- CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
+ llvm::AttributeList Attrs = llvm::AttributeList::get(
+ CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs);
return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs);
}
@@ -1353,7 +1369,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
}
}
-void
+CGCXXABI::AddedStructorArgs
ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) {
ASTContext &Context = getContext();
@@ -1362,9 +1378,12 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
// These are Clang types, so we don't need to worry about sret yet.
// Check if we need to add a VTT parameter (which has type void **).
- if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0)
+ if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) {
ArgTys.insert(ArgTys.begin() + 1,
Context.getPointerType(Context.VoidPtrTy));
+ return AddedStructorArgs::prefix(1);
+ }
+ return AddedStructorArgs{};
}
void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
@@ -1395,9 +1414,9 @@ void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
// FIXME: avoid the fake decl
QualType T = Context.getPointerType(Context.VoidPtrTy);
- ImplicitParamDecl *VTTDecl
- = ImplicitParamDecl::Create(Context, nullptr, MD->getLocation(),
- &Context.Idents.get("vtt"), T);
+ auto *VTTDecl = ImplicitParamDecl::Create(
+ Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"),
+ T, ImplicitParamDecl::CXXVTT);
Params.insert(Params.begin() + 1, VTTDecl);
getStructorImplicitParamDecl(CGF) = VTTDecl;
}
@@ -1429,11 +1448,11 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
}
-unsigned ItaniumCXXABI::addImplicitConstructorArgs(
+CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating, CallArgList &Args) {
if (!NeedsVTTParameter(GlobalDecl(D, Type)))
- return 0;
+ return AddedStructorArgs{};
// Insert the implicit 'vtt' argument as the second argument.
llvm::Value *VTT =
@@ -1441,7 +1460,7 @@ unsigned ItaniumCXXABI::addImplicitConstructorArgs(
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
Args.insert(Args.begin() + 1,
CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false));
- return 1; // Added one arg.
+ return AddedStructorArgs::prefix(1); // Added one arg.
}
void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
@@ -1684,11 +1703,11 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
if (CGM.getLangOpts().AppleKext)
return false;
- // If we don't have any inline virtual functions, and if vtable is not hidden,
- // then we are safe to emit available_externally copy of vtable.
+ // If we don't have any not emitted inline virtual function, and if vtable is
+ // not hidden, then we are safe to emit available_externally copy of vtable.
// FIXME we can still emit a copy of the vtable if we
// can emit definition of the inline functions.
- return !hasAnyVirtualInlineFunction(RD) && !isVTableHidden(RD);
+ return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD);
}
static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
Address InitialPtr,
@@ -1907,10 +1926,11 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM,
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy),
GuardPtrTy, /*isVarArg=*/false);
- return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_acquire",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind));
+ return CGM.CreateRuntimeFunction(
+ FTy, "__cxa_guard_acquire",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind));
}
static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
@@ -1918,10 +1938,11 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
// void __cxa_guard_release(__guard *guard_object);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
- return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_release",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind));
+ return CGM.CreateRuntimeFunction(
+ FTy, "__cxa_guard_release",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind));
}
static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
@@ -1929,10 +1950,11 @@ static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
// void __cxa_guard_abort(__guard *guard_object);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
- return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_abort",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind));
+ return CGM.CreateRuntimeFunction(
+ FTy, "__cxa_guard_abort",
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind));
}
namespace {
@@ -2015,10 +2037,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
// The ABI says: "It is suggested that it be emitted in the same COMDAT
// group as the associated data object." In practice, this doesn't work for
- // non-ELF object formats, so only do it for ELF.
+ // non-ELF and non-Wasm object formats, so only do it for ELF and Wasm.
llvm::Comdat *C = var->getComdat();
if (!D.isLocalVarDecl() && C &&
- CGM.getTarget().getTriple().isOSBinFormatELF()) {
+ (CGM.getTarget().getTriple().isOSBinFormatELF() ||
+ CGM.getTarget().getTriple().isOSBinFormatWasm())) {
guard->setComdat(C);
// An inline variable's guard function is run from the per-TU
// initialization function, not via a dedicated global ctor function, so
@@ -2161,7 +2184,9 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
// Create a variable that binds the atexit to this shared object.
llvm::Constant *handle =
- CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
+ CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
+ auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts());
+ GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
llvm::Value *args[] = {
llvm::ConstantExpr::getBitCast(dtor, dtorTy),
@@ -2567,6 +2592,9 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
if (!GV) {
// Create a new global variable.
+ // Note for the future: If we would ever like to do deferred emission of
+ // RTTI, check if emitting vtables opportunistically need any adjustment.
+
GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
/*Constant=*/true,
llvm::GlobalValue::ExternalLinkage, nullptr,
@@ -2634,7 +2662,6 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
- case BuiltinType::OCLNDRange:
case BuiltinType::OCLReserveID:
return false;
@@ -2711,7 +2738,9 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
// function.
bool IsDLLImport = RD->hasAttr<DLLImportAttr>();
if (CGM.getVTables().isVTableExternal(RD))
- return IsDLLImport ? false : true;
+ return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment()
+ ? false
+ : true;
if (IsDLLImport)
return true;
@@ -2814,7 +2843,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
llvm_unreachable("References shouldn't get here");
case Type::Auto:
- llvm_unreachable("Undeduced auto type shouldn't get here");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("Undeduced type shouldn't get here");
case Type::Pipe:
llvm_unreachable("Pipe types shouldn't get here");
@@ -2935,6 +2965,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
return llvm::GlobalValue::InternalLinkage;
case VisibleNoLinkage:
+ case ModuleInternalLinkage:
+ case ModuleLinkage:
case ExternalLinkage:
// RTTI is not enabled, which means that this type info struct is going
// to be used for exception handling. Give it linkonce_odr linkage.
@@ -2946,7 +2978,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
if (RD->hasAttr<WeakAttr>())
return llvm::GlobalValue::WeakODRLinkage;
if (CGM.getTriple().isWindowsItaniumEnvironment())
- if (RD->hasAttr<DLLImportAttr>())
+ if (RD->hasAttr<DLLImportAttr>() &&
+ ShouldUseExternalRTTIDescriptor(CGM, Ty))
return llvm::GlobalValue::ExternalLinkage;
if (RD->isDynamicClass()) {
llvm::GlobalValue::LinkageTypes LT = CGM.getVTableLinkage(RD);
@@ -3044,7 +3077,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
llvm_unreachable("References shouldn't get here");
case Type::Auto:
- llvm_unreachable("Undeduced auto type shouldn't get here");
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("Undeduced type shouldn't get here");
case Type::Pipe:
llvm_unreachable("Pipe type shouldn't get here");
@@ -3158,7 +3192,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
if (DLLExport || (RD && RD->hasAttr<DLLExportAttr>())) {
TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
- } else if (CGM.getLangOpts().RTTI && RD && RD->hasAttr<DLLImportAttr>()) {
+ } else if (RD && RD->hasAttr<DLLImportAttr>() &&
+ ShouldUseExternalRTTIDescriptor(CGM, Ty)) {
TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
@@ -3534,8 +3569,9 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM,
return StructorCodegen::RAUW;
if (llvm::GlobalValue::isWeakForLinker(Linkage)) {
- // Only ELF supports COMDATs with arbitrary names (C5/D5).
- if (CGM.getTarget().getTriple().isOSBinFormatELF())
+ // Only ELF and wasm support COMDATs with arbitrary names (C5/D5).
+ if (CGM.getTarget().getTriple().isOSBinFormatELF() ||
+ CGM.getTarget().getTriple().isOSBinFormatWasm())
return StructorCodegen::COMDAT;
return StructorCodegen::Emit;
}
@@ -3919,9 +3955,8 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
llvm::FunctionType *fnTy =
llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
- llvm::Constant *fnRef =
- CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate",
- llvm::AttributeSet(), /*Local=*/true);
+ llvm::Constant *fnRef = CGM.CreateRuntimeFunction(
+ fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true);
llvm::Function *fn = dyn_cast<llvm::Function>(fnRef);
if (fn && fn->empty()) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp
new file mode 100644
index 0000000..a6f21d8
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp
@@ -0,0 +1,208 @@
+//===--- MacroPPCallbacks.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains implementation for the macro preprocessors callbacks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MacroPPCallbacks.h"
+#include "CGDebugInfo.h"
+#include "clang/CodeGen/ModuleBuilder.h"
+#include "clang/Parse/Parser.h"
+
+using namespace clang;
+
+void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II,
+ const MacroInfo &MI,
+ Preprocessor &PP, raw_ostream &Name,
+ raw_ostream &Value) {
+ Name << II.getName();
+
+ if (MI.isFunctionLike()) {
+ Name << '(';
+ if (!MI.param_empty()) {
+ MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
+ for (; AI + 1 != E; ++AI) {
+ Name << (*AI)->getName();
+ Name << ',';
+ }
+
+ // Last argument.
+ if ((*AI)->getName() == "__VA_ARGS__")
+ Name << "...";
+ else
+ Name << (*AI)->getName();
+ }
+
+ if (MI.isGNUVarargs())
+ // #define foo(x...)
+ Name << "...";
+
+ Name << ')';
+ }
+
+ SmallString<128> SpellingBuffer;
+ bool First = true;
+ for (const auto &T : MI.tokens()) {
+ if (!First && T.hasLeadingSpace())
+ Value << ' ';
+
+ Value << PP.getSpelling(T, SpellingBuffer);
+ First = false;
+ }
+}
+
+MacroPPCallbacks::MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP)
+ : Gen(Gen), PP(PP), Status(NoScope) {}
+
+// This is the expected flow of enter/exit compiler and user files:
+// - Main File Enter
+// - <built-in> file enter
+// {Compiler macro definitions} - (Line=0, no scope)
+// - (Optional) <command line> file enter
+// {Command line macro definitions} - (Line=0, no scope)
+// - (Optional) <command line> file exit
+// {Command line file includes} - (Line=0, Main file scope)
+// {macro definitions and file includes} - (Line!=0, Parent scope)
+// - <built-in> file exit
+// {User code macro definitions and file includes} - (Line!=0, Parent scope)
+
+llvm::DIMacroFile *MacroPPCallbacks::getCurrentScope() {
+ if (Status == MainFileScope || Status == CommandLineIncludeScope)
+ return Scopes.back();
+ return nullptr;
+}
+
+SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) {
+ if (Status == MainFileScope || EnteredCommandLineIncludeFiles)
+ return Loc;
+
+ // While parsing skipped files, location of macros is invalid.
+ // Invalid location represents line zero.
+ return SourceLocation();
+}
+
+static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) {
+ StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
+ return Filename.equals("<built-in>");
+}
+
+static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) {
+ StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
+ return Filename.equals("<command line>");
+}
+
+void MacroPPCallbacks::updateStatusToNextScope() {
+ switch (Status) {
+ case NoScope:
+ Status = InitializedScope;
+ break;
+ case InitializedScope:
+ Status = BuiltinScope;
+ break;
+ case BuiltinScope:
+ Status = CommandLineIncludeScope;
+ break;
+ case CommandLineIncludeScope:
+ Status = MainFileScope;
+ break;
+ case MainFileScope:
+ llvm_unreachable("There is no next scope, already in the final scope");
+ }
+}
+
+void MacroPPCallbacks::FileEntered(SourceLocation Loc) {
+ SourceLocation LineLoc = getCorrectLocation(LastHashLoc);
+ switch (Status) {
+ case NoScope:
+ updateStatusToNextScope();
+ break;
+ case InitializedScope:
+ updateStatusToNextScope();
+ return;
+ case BuiltinScope:
+ if (isCommandLineFile(PP.getSourceManager(), Loc))
+ return;
+ updateStatusToNextScope();
+ LLVM_FALLTHROUGH;
+ case CommandLineIncludeScope:
+ EnteredCommandLineIncludeFiles++;
+ break;
+ case MainFileScope:
+ break;
+ }
+
+ Scopes.push_back(Gen->getCGDebugInfo()->CreateTempMacroFile(getCurrentScope(),
+ LineLoc, Loc));
+}
+
+void MacroPPCallbacks::FileExited(SourceLocation Loc) {
+ switch (Status) {
+ default:
+ llvm_unreachable("Do not expect to exit a file from current scope");
+ case BuiltinScope:
+ if (!isBuiltinFile(PP.getSourceManager(), Loc))
+ // Skip next scope and change status to MainFileScope.
+ Status = MainFileScope;
+ return;
+ case CommandLineIncludeScope:
+ if (!EnteredCommandLineIncludeFiles) {
+ updateStatusToNextScope();
+ return;
+ }
+ EnteredCommandLineIncludeFiles--;
+ break;
+ case MainFileScope:
+ break;
+ }
+
+ Scopes.pop_back();
+}
+
+void MacroPPCallbacks::FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID) {
+ // Only care about enter file or exit file changes.
+ if (Reason == EnterFile)
+ FileEntered(Loc);
+ else if (Reason == ExitFile)
+ FileExited(Loc);
+}
+
+void MacroPPCallbacks::InclusionDirective(
+ SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
+ bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File,
+ StringRef SearchPath, StringRef RelativePath, const Module *Imported) {
+
+ // Record the line location of the current included file.
+ LastHashLoc = HashLoc;
+}
+
+void MacroPPCallbacks::MacroDefined(const Token &MacroNameTok,
+ const MacroDirective *MD) {
+ IdentifierInfo *Id = MacroNameTok.getIdentifierInfo();
+ SourceLocation location = getCorrectLocation(MacroNameTok.getLocation());
+ std::string NameBuffer, ValueBuffer;
+ llvm::raw_string_ostream Name(NameBuffer);
+ llvm::raw_string_ostream Value(ValueBuffer);
+ writeMacroDefinition(*Id, *MD->getMacroInfo(), PP, Name, Value);
+ Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(),
+ llvm::dwarf::DW_MACINFO_define, location,
+ Name.str(), Value.str());
+}
+
+void MacroPPCallbacks::MacroUndefined(const Token &MacroNameTok,
+ const MacroDefinition &MD,
+ const MacroDirective *Undef) {
+ IdentifierInfo *Id = MacroNameTok.getIdentifierInfo();
+ SourceLocation location = getCorrectLocation(MacroNameTok.getLocation());
+ Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(),
+ llvm::dwarf::DW_MACINFO_undef, location,
+ Id->getName(), "");
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.h b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.h
new file mode 100644
index 0000000..e117f96
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.h
@@ -0,0 +1,117 @@
+//===--- MacroPPCallbacks.h -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines implementation for the macro preprocessors callbacks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/PPCallbacks.h"
+
+namespace llvm {
+class DIMacroFile;
+class DIMacroNode;
+}
+namespace clang {
+class Preprocessor;
+class MacroInfo;
+class CodeGenerator;
+
+class MacroPPCallbacks : public PPCallbacks {
+ /// A pointer to code generator, where debug info generator can be found.
+ CodeGenerator *Gen;
+
+ /// Preprocessor.
+ Preprocessor &PP;
+
+ /// Location of recent included file, used for line number.
+ SourceLocation LastHashLoc;
+
+ /// Counts current number of command line included files, which were entered
+ /// and were not exited yet.
+ int EnteredCommandLineIncludeFiles = 0;
+
+ enum FileScopeStatus {
+ NoScope = 0, // Scope is not initialized yet.
+ InitializedScope, // Main file scope is initialized but not set yet.
+ BuiltinScope, // <built-in> and <command line> file scopes.
+ CommandLineIncludeScope, // Included file, from <command line> file, scope.
+ MainFileScope // Main file scope.
+ };
+ FileScopeStatus Status;
+
+ /// Parent contains all entered files that were not exited yet according to
+ /// the inclusion order.
+ llvm::SmallVector<llvm::DIMacroFile *, 4> Scopes;
+
+ /// Get current DIMacroFile scope.
+ /// \return current DIMacroFile scope or nullptr if there is no such scope.
+ llvm::DIMacroFile *getCurrentScope();
+
+ /// Get current line location or invalid location.
+ /// \param Loc current line location.
+ /// \return current line location \p `Loc`, or invalid location if it's in a
+ /// skipped file scope.
+ SourceLocation getCorrectLocation(SourceLocation Loc);
+
+ /// Use the passed preprocessor to write the macro name and value from the
+ /// given macro info and identifier info into the given \p `Name` and \p
+ /// `Value` output streams.
+ ///
+ /// \param II Identifier info, used to get the Macro name.
+ /// \param MI Macro info, used to get the Macro argumets and values.
+ /// \param PP Preprocessor.
+ /// \param [out] Name Place holder for returned macro name and arguments.
+ /// \param [out] Value Place holder for returned macro value.
+ static void writeMacroDefinition(const IdentifierInfo &II,
+ const MacroInfo &MI, Preprocessor &PP,
+ raw_ostream &Name, raw_ostream &Value);
+
+ /// Update current file scope status to next file scope.
+ void updateStatusToNextScope();
+
+ /// Handle the case when entering a file.
+ ///
+ /// \param Loc Indicates the new location.
+ void FileEntered(SourceLocation Loc);
+
+ /// Handle the case when exiting a file.
+ ///
+ /// \param Loc Indicates the new location.
+ void FileExited(SourceLocation Loc);
+
+public:
+ MacroPPCallbacks(CodeGenerator *Gen, Preprocessor &PP);
+
+ /// Callback invoked whenever a source file is entered or exited.
+ ///
+ /// \param Loc Indicates the new location.
+ /// \param PrevFID the file that was exited if \p Reason is ExitFile.
+ void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID = FileID()) override;
+
+ /// Callback invoked whenever a directive (#xxx) is processed.
+ void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+ StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange, const FileEntry *File,
+ StringRef SearchPath, StringRef RelativePath,
+ const Module *Imported) override;
+
+ /// Hook called whenever a macro definition is seen.
+ void MacroDefined(const Token &MacroNameTok,
+ const MacroDirective *MD) override;
+
+ /// Hook called whenever a macro \#undef is seen.
+ ///
+ /// MD is released immediately following this callback.
+ void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD,
+ const MacroDirective *Undef) override;
+};
+
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 38df455..1bd2937 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -19,8 +19,8 @@
#include "CGVTables.h"
#include "CodeGenModule.h"
#include "CodeGenTypes.h"
-#include "ConstantBuilder.h"
#include "TargetInfo.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/StmtCXX.h"
@@ -206,8 +206,9 @@ public:
// lacks a definition for the destructor, non-base destructors must always
// delegate to or alias the base destructor.
- void buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
- SmallVectorImpl<CanQualType> &ArgTys) override;
+ AddedStructorArgs
+ buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+ SmallVectorImpl<CanQualType> &ArgTys) override;
/// Non-base dtors should be emitted as delegating thunks in this ABI.
bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
@@ -248,11 +249,10 @@ public:
void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
- unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
- const CXXConstructorDecl *D,
- CXXCtorType Type, bool ForVirtualBase,
- bool Delegating,
- CallArgList &Args) override;
+ AddedStructorArgs
+ addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
+ CXXCtorType Type, bool ForVirtualBase,
+ bool Delegating, CallArgList &Args) override;
void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
CXXDtorType Type, bool ForVirtualBase,
@@ -819,46 +819,44 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
return RAA_Default;
case llvm::Triple::x86_64:
- // Win64 passes objects with non-trivial copy ctors indirectly.
- if (RD->hasNonTrivialCopyConstructor())
- return RAA_Indirect;
-
- // If an object has a destructor, we'd really like to pass it indirectly
+ // If a class has a destructor, we'd really like to pass it indirectly
// because it allows us to elide copies. Unfortunately, MSVC makes that
// impossible for small types, which it will pass in a single register or
// stack slot. Most objects with dtors are large-ish, so handle that early.
// We can't call out all large objects as being indirect because there are
// multiple x64 calling conventions and the C++ ABI code shouldn't dictate
// how we pass large POD types.
+ //
+ // Note: This permits small classes with nontrivial destructors to be
+ // passed in registers, which is non-conforming.
if (RD->hasNonTrivialDestructor() &&
getContext().getTypeSize(RD->getTypeForDecl()) > 64)
return RAA_Indirect;
- // If this is true, the implicit copy constructor that Sema would have
- // created would not be deleted. FIXME: We should provide a more direct way
- // for CodeGen to ask whether the constructor was deleted.
- if (!RD->hasUserDeclaredCopyConstructor() &&
- !RD->hasUserDeclaredMoveConstructor() &&
- !RD->needsOverloadResolutionForMoveConstructor() &&
- !RD->hasUserDeclaredMoveAssignment() &&
- !RD->needsOverloadResolutionForMoveAssignment())
- return RAA_Default;
-
- // Otherwise, Sema should have created an implicit copy constructor if
- // needed.
- assert(!RD->needsImplicitCopyConstructor());
-
- // We have to make sure the trivial copy constructor isn't deleted.
- for (const CXXConstructorDecl *CD : RD->ctors()) {
- if (CD->isCopyConstructor()) {
- assert(CD->isTrivial());
- // We had at least one undeleted trivial copy ctor. Return directly.
- if (!CD->isDeleted())
- return RAA_Default;
+ // If a class has at least one non-deleted, trivial copy constructor, it
+ // is passed according to the C ABI. Otherwise, it is passed indirectly.
+ //
+ // Note: This permits classes with non-trivial copy or move ctors to be
+ // passed in registers, so long as they *also* have a trivial copy ctor,
+ // which is non-conforming.
+ if (RD->needsImplicitCopyConstructor()) {
+ // If the copy ctor has not yet been declared, we can read its triviality
+ // off the AST.
+ if (!RD->defaultedCopyConstructorIsDeleted() &&
+ RD->hasTrivialCopyConstructor())
+ return RAA_Default;
+ } else {
+ // Otherwise, we need to find the copy constructor(s) and ask.
+ for (const CXXConstructorDecl *CD : RD->ctors()) {
+ if (CD->isCopyConstructor()) {
+ // We had at least one nondeleted trivial copy ctor. Return directly.
+ if (!CD->isDeleted() && CD->isTrivial())
+ return RAA_Default;
+ }
}
}
- // The trivial copy constructor was deleted. Return indirectly.
+ // We have no trivial, non-deleted copy constructor.
return RAA_Indirect;
}
@@ -1261,17 +1259,19 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
}
}
-void
+CGCXXABI::AddedStructorArgs
MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
SmallVectorImpl<CanQualType> &ArgTys) {
+ AddedStructorArgs Added;
// TODO: 'for base' flag
if (T == StructorType::Deleting) {
// The scalar deleting destructor takes an implicit int parameter.
ArgTys.push_back(getContext().IntTy);
+ ++Added.Suffix;
}
auto *CD = dyn_cast<CXXConstructorDecl>(MD);
if (!CD)
- return;
+ return Added;
// All parameters are already in place except is_most_derived, which goes
// after 'this' if it's variadic and last if it's not.
@@ -1279,11 +1279,16 @@ MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
const CXXRecordDecl *Class = CD->getParent();
const FunctionProtoType *FPT = CD->getType()->castAs<FunctionProtoType>();
if (Class->getNumVBases()) {
- if (FPT->isVariadic())
+ if (FPT->isVariadic()) {
ArgTys.insert(ArgTys.begin() + 1, getContext().IntTy);
- else
+ ++Added.Prefix;
+ } else {
ArgTys.push_back(getContext().IntTy);
+ ++Added.Suffix;
+ }
}
+
+ return Added;
}
void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
@@ -1406,11 +1411,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
- ImplicitParamDecl *IsMostDerived
- = ImplicitParamDecl::Create(Context, nullptr,
- CGF.CurGD.getDecl()->getLocation(),
- &Context.Idents.get("is_most_derived"),
- Context.IntTy);
+ auto *IsMostDerived = ImplicitParamDecl::Create(
+ Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
+ &Context.Idents.get("is_most_derived"), Context.IntTy,
+ ImplicitParamDecl::Other);
// The 'most_derived' parameter goes second if the ctor is variadic and last
// if it's not. Dtors can't be variadic.
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
@@ -1420,11 +1424,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
Params.push_back(IsMostDerived);
getStructorImplicitParamDecl(CGF) = IsMostDerived;
} else if (isDeletingDtor(CGF.CurGD)) {
- ImplicitParamDecl *ShouldDelete
- = ImplicitParamDecl::Create(Context, nullptr,
- CGF.CurGD.getDecl()->getLocation(),
- &Context.Idents.get("should_call_delete"),
- Context.IntTy);
+ auto *ShouldDelete = ImplicitParamDecl::Create(
+ Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
+ &Context.Idents.get("should_call_delete"), Context.IntTy,
+ ImplicitParamDecl::Other);
Params.push_back(ShouldDelete);
getStructorImplicitParamDecl(CGF) = ShouldDelete;
}
@@ -1493,14 +1496,14 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
}
}
-unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
+CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
bool ForVirtualBase, bool Delegating, CallArgList &Args) {
assert(Type == Ctor_Complete || Type == Ctor_Base);
// Check if we need a 'most_derived' parameter.
if (!D->getParent()->getNumVBases())
- return 0;
+ return AddedStructorArgs{};
// Add the 'most_derived' argument second if we are variadic or last if not.
const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
@@ -1511,13 +1514,13 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
}
RValue RV = RValue::get(MostDerivedArg);
- if (FPT->isVariadic())
+ if (FPT->isVariadic()) {
Args.insert(Args.begin() + 1,
CallArg(RV, getContext().IntTy, /*needscopy=*/false));
- else
- Args.add(RV, getContext().IntTy);
-
- return 1; // Added one arg.
+ return AddedStructorArgs::prefix(1);
+ }
+ Args.add(RV, getContext().IntTy);
+ return AddedStructorArgs::suffix(1);
}
void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
@@ -1554,7 +1557,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info,
const CXXRecordDecl *RD,
llvm::GlobalVariable *VTable) {
- if (!CGM.getCodeGenOpts().PrepareForLTO)
+ if (!CGM.getCodeGenOpts().LTOUnit)
return;
// The location of the first virtual function pointer in the virtual table,
@@ -2203,9 +2206,8 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD,
llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get(
CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false);
- llvm::Constant *TLRegDtor =
- CGF.CGM.CreateRuntimeFunction(TLRegDtorTy, "__tlregdtor",
- llvm::AttributeSet(), /*Local=*/true);
+ llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction(
+ TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true);
if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor))
TLRegDtorFn->setDoesNotThrow();
@@ -2301,9 +2303,9 @@ static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) {
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_header",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind),
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind),
/*Local=*/true);
}
@@ -2313,9 +2315,9 @@ static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) {
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_footer",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind),
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind),
/*Local=*/true);
}
@@ -2325,9 +2327,9 @@ static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) {
CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(
FTy, "_Init_thread_abort",
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- llvm::Attribute::NoUnwind),
+ llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NoUnwind),
/*Local=*/true);
}
@@ -3421,6 +3423,8 @@ static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) {
return llvm::GlobalValue::InternalLinkage;
case VisibleNoLinkage:
+ case ModuleInternalLinkage:
+ case ModuleLinkage:
case ExternalLinkage:
return llvm::GlobalValue::LinkOnceODRLinkage;
}
@@ -3713,7 +3717,7 @@ CatchTypeInfo
MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
QualType CatchHandlerType) {
// TypeDescriptors for exceptions never have qualified pointer types,
- // qualifiers are stored seperately in order to support qualification
+ // qualifiers are stored separately in order to support qualification
// conversions.
bool IsConst, IsVolatile, IsUnaligned;
Type =
@@ -3750,6 +3754,9 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+ // Note for the future: If we would ever like to do deferred emission of
+ // RTTI, check if emitting vtables opportunistically need any adjustment.
+
// Compute the fields for the TypeDescriptor.
SmallString<256> TypeInfoString;
{
@@ -3866,18 +3873,21 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
// Following the 'this' pointer is a reference to the source object that we
// are copying from.
ImplicitParamDecl SrcParam(
- getContext(), nullptr, SourceLocation(), &getContext().Idents.get("src"),
+ getContext(), /*DC=*/nullptr, SourceLocation(),
+ &getContext().Idents.get("src"),
getContext().getLValueReferenceType(RecordTy,
- /*SpelledAsLValue=*/true));
+ /*SpelledAsLValue=*/true),
+ ImplicitParamDecl::Other);
if (IsCopy)
FunctionArgs.push_back(&SrcParam);
// Constructors for classes which utilize virtual bases have an additional
// parameter which indicates whether or not it is being delegated to by a more
// derived constructor.
- ImplicitParamDecl IsMostDerived(getContext(), nullptr, SourceLocation(),
+ ImplicitParamDecl IsMostDerived(getContext(), /*DC=*/nullptr,
+ SourceLocation(),
&getContext().Idents.get("is_most_derived"),
- getContext().IntTy);
+ getContext().IntTy, ImplicitParamDecl::Other);
// Only add the parameter to the list if thie class has virtual bases.
if (RD->getNumVBases() > 0)
FunctionArgs.push_back(&IsMostDerived);
@@ -3918,16 +3928,16 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0);
// Insert any ABI-specific implicit constructor arguments.
- unsigned ExtraArgs = addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
- /*ForVirtualBase=*/false,
- /*Delegating=*/false, Args);
-
+ AddedStructorArgs ExtraArgs =
+ addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
+ /*ForVirtualBase=*/false,
+ /*Delegating=*/false, Args);
// Call the destructor with our arguments.
llvm::Constant *CalleePtr =
CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
CGCallee Callee = CGCallee::forDirect(CalleePtr, CD);
const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
- Args, CD, Ctor_Complete, ExtraArgs);
+ Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix);
CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args);
Cleanups.ForceCleanup();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
index f925c25..fc64285 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -92,6 +92,10 @@ namespace {
return M.get();
}
+ CGDebugInfo *getCGDebugInfo() {
+ return Builder->getModuleDebugInfo();
+ }
+
llvm::Module *ReleaseModule() {
return M.release();
}
@@ -193,7 +197,7 @@ namespace {
// Provide some coverage mapping even for methods that aren't emitted.
// Don't do this for templated classes though, as they may not be
// instantiable.
- if (!MD->getParent()->getDescribedClassTemplate())
+ if (!MD->getParent()->isDependentContext())
Builder->AddDeferredUnusedCoverageMapping(MD);
}
@@ -299,6 +303,10 @@ llvm::Module *CodeGenerator::ReleaseModule() {
return static_cast<CodeGeneratorImpl*>(this)->ReleaseModule();
}
+CGDebugInfo *CodeGenerator::getCGDebugInfo() {
+ return static_cast<CodeGeneratorImpl*>(this)->getCGDebugInfo();
+}
+
const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) {
return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name);
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 754f996..d0760b9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -152,6 +152,9 @@ public:
CodeGenOpts.CodeModel = "default";
CodeGenOpts.ThreadModel = "single";
CodeGenOpts.DebugTypeExtRefs = true;
+ // When building a module MainFileName is the name of the modulemap file.
+ CodeGenOpts.MainFileName =
+ LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule;
CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo);
CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning());
}
@@ -171,7 +174,8 @@ public:
// Prepare CGDebugInfo to emit debug info for a clang module.
auto *DI = Builder->getModuleDebugInfo();
StringRef ModuleName = llvm::sys::path::filename(MainFileName);
- DI->setPCHDescriptor({ModuleName, "", OutputFileName, ~1ULL});
+ DI->setPCHDescriptor({ModuleName, "", OutputFileName,
+ ASTFileSignature{{{~0U, ~0U, ~0U, ~0U, ~1U}}}});
DI->setModuleMap(MMap);
}
@@ -241,7 +245,11 @@ public:
// PCH files don't have a signature field in the control block,
// but LLVM detects DWO CUs by looking for a non-zero DWO id.
- uint64_t Signature = Buffer->Signature ? Buffer->Signature : ~1ULL;
+ // We use the lower 64 bits for debug info.
+ uint64_t Signature =
+ Buffer->Signature
+ ? (uint64_t)Buffer->Signature[1] << 32 | Buffer->Signature[0]
+ : ~1ULL;
Builder->getModuleDebugInfo()->setDwoId(Signature);
// Finalize the Builder.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
index 0bfe30a..fc8e36d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -57,6 +57,10 @@ static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) {
return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type));
}
+static CharUnits getTypeAllocSize(CodeGenModule &CGM, llvm::Type *type) {
+ return CharUnits::fromQuantity(CGM.getDataLayout().getTypeAllocSize(type));
+}
+
void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) {
// Deal with various aggregate types as special cases:
@@ -542,7 +546,9 @@ SwiftAggLowering::getCoerceAndExpandTypes() const {
packed = true;
elts.push_back(entry.Type);
- lastEnd = entry.End;
+
+ lastEnd = entry.Begin + getTypeAllocSize(CGM, entry.Type);
+ assert(entry.End <= lastEnd);
}
// We don't need to adjust 'packed' to deal with possible tail padding
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
index d2fc388..ece3a40 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
@@ -183,7 +183,11 @@ const TargetInfo &ABIInfo::getTarget() const {
return CGT.getTarget();
}
-bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); }
+const CodeGenOptions &ABIInfo::getCodeGenOpts() const {
+ return CGT.getCodeGenOpts();
+}
+
+bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); }
bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
return false;
@@ -398,7 +402,17 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
}
unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
- return llvm::CallingConv::C;
+ // OpenCL kernels are called via an explicit runtime API with arguments
+ // set with clSetKernelArg(), not as normal sub-functions.
+ // Return SPIR_KERNEL by default as the kernel calling convention to
+ // ensure the fingerprint is fixed such way that each OpenCL argument
+ // gets one matching argument in the produced kernel function argument
+ // list to enable feasible implementation of clSetKernelArg() with
+ // aggregates etc. In case we would use the default C calling conv here,
+ // clSetKernelArg() might break depending on the target-specific
+ // conventions; different targets might split structs passed as values
+ // to multiple function arguments etc.
+ return llvm::CallingConv::SPIR_KERNEL;
}
llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
@@ -406,13 +420,32 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &
return llvm::ConstantPointerNull::get(T);
}
+unsigned TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const {
+ assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+ return D ? D->getType().getAddressSpace()
+ : static_cast<unsigned>(LangAS::Default);
+}
+
llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
- CodeGen::CodeGenFunction &CGF, llvm::Value *Src, QualType SrcTy,
- QualType DestTy) const {
+ CodeGen::CodeGenFunction &CGF, llvm::Value *Src, unsigned SrcAddr,
+ unsigned DestAddr, llvm::Type *DestTy, bool isNonNull) const {
+ // Since target may map different address spaces in AST to the same address
+ // space, an address space conversion may end up as a bitcast.
+ if (auto *C = dyn_cast<llvm::Constant>(Src))
+ return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy);
+ return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy);
+}
+
+llvm::Constant *
+TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
+ unsigned SrcAddr, unsigned DestAddr,
+ llvm::Type *DestTy) const {
// Since target may map different address spaces in AST to the same address
// space, an address space conversion may end up as a bitcast.
- return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src,
- CGF.ConvertType(DestTy));
+ return llvm::ConstantExpr::getPointerCast(Src, DestTy);
}
static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
@@ -942,8 +975,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
- ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State,
- const ABIArgInfo& current) const;
+
/// \brief Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
@@ -1197,6 +1229,39 @@ static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
return Size == 32 || Size == 64;
}
+static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
+ uint64_t &Size) {
+ for (const auto *FD : RD->fields()) {
+ // Scalar arguments on the stack get 4 byte alignment on x86. If the
+ // argument is smaller than 32-bits, expanding the struct will create
+ // alignment padding.
+ if (!is32Or64BitBasicType(FD->getType(), Context))
+ return false;
+
+ // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
+ // how to expand them yet, and the predicate for telling if a bitfield still
+ // counts as "basic" is more complicated than what we were doing previously.
+ if (FD->isBitField())
+ return false;
+
+ Size += Context.getTypeSize(FD->getType());
+ }
+ return true;
+}
+
+static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
+ uint64_t &Size) {
+ // Don't do this if there are any non-empty bases.
+ for (const CXXBaseSpecifier &Base : RD->bases()) {
+ if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(),
+ Size))
+ return false;
+ }
+ if (!addFieldSizes(Context, RD, Size))
+ return false;
+ return true;
+}
+
/// Test whether an argument type which is to be passed indirectly (on the
/// stack) would have the equivalent layout if it was expanded into separate
/// arguments. If so, we prefer to do the latter to avoid inhibiting
@@ -1207,8 +1272,9 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
if (!RT)
return false;
const RecordDecl *RD = RT->getDecl();
+ uint64_t Size = 0;
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
- if (!IsWin32StructABI ) {
+ if (!IsWin32StructABI) {
// On non-Windows, we have to conservatively match our old bitcode
// prototypes in order to be ABI-compatible at the bitcode level.
if (!CXXRD->isCLike())
@@ -1217,30 +1283,12 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
// Don't do this for dynamic classes.
if (CXXRD->isDynamicClass())
return false;
- // Don't do this if there are any non-empty bases.
- for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
- if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true))
- return false;
- }
}
- }
-
- uint64_t Size = 0;
-
- for (const auto *FD : RD->fields()) {
- // Scalar arguments on the stack get 4 byte alignment on x86. If the
- // argument is smaller than 32-bits, expanding the struct will create
- // alignment padding.
- if (!is32Or64BitBasicType(FD->getType(), getContext()))
+ if (!addBaseAndFieldSizes(getContext(), CXXRD, Size))
return false;
-
- // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
- // how to expand them yet, and the predicate for telling if a bitfield still
- // counts as "basic" is more complicated than what we were doing previously.
- if (FD->isBitField())
+ } else {
+ if (!addFieldSizes(getContext(), RD, Size))
return false;
-
- Size += getContext().getTypeSize(FD->getType());
}
// We can do this if there was no alignment padding.
@@ -1511,27 +1559,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
return true;
}
-ABIArgInfo
-X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
- const ABIArgInfo &current) const {
- // Assumes vectorCall calling convention.
- const Type *Base = nullptr;
- uint64_t NumElts = 0;
-
- if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
- isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- // HVA types get passed directly in registers if there is room.
- State.FreeSSERegs -= NumElts;
- return getDirectX86Hva();
- }
- // If there's no room, the HVA gets passed as normal indirect
- // structure.
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- }
- return current;
-}
-
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
@@ -1550,35 +1577,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
}
- // vectorcall adds the concept of a homogenous vector aggregate, similar
- // to other targets, regcall uses some of the HVA rules.
+ // Regcall uses the concept of a homogenous vector aggregate, similar
+ // to other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if ((State.CC == llvm::CallingConv::X86_VectorCall ||
- State.CC == llvm::CallingConv::X86_RegCall) &&
+ if (State.CC == llvm::CallingConv::X86_RegCall &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.CC == llvm::CallingConv::X86_RegCall) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- if (Ty->isBuiltinType() || Ty->isVectorType())
- return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
-
- }
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
- if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
- // Actual floating-point types get registers first time through if
- // there is registers available
- State.FreeSSERegs -= NumElts;
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
- } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
- // HVA Types only get registers after everything else has been
- // set, so it gets set as indirect for now.
- return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
- }
+ return ABIArgInfo::getExpand();
}
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
@@ -1659,31 +1671,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
bool &UsedInAlloca) const {
- // Vectorcall only allows the first 6 parameters to be passed in registers,
- // and homogeneous vector aggregates are only put into registers as a second
- // priority.
- unsigned Count = 0;
- CCState ZeroState = State;
- ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
- // HVAs must be done as a second priority for registers, so the deferred
- // items are dealt with by going through the pattern a second time.
+ // Vectorcall x86 works subtly different than in x64, so the format is
+ // a bit different than the x64 version. First, all vector types (not HVAs)
+ // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
+ // This differs from the x64 implementation, where the first 6 by INDEX get
+ // registers.
+ // After that, integers AND HVAs are assigned Left to Right in the same pass.
+ // Integers are passed as ECX/EDX if one is available (in order). HVAs will
+ // first take up the remaining YMM/XMM registers. If insufficient registers
+ // remain but an integer register (ECX/EDX) is available, it will be passed
+ // in that, else, on the stack.
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = classifyArgumentType(I.type, State);
- else
- // Parameters after the 6th cannot be passed in registers,
- // so pretend there are no registers left for them.
- I.info = classifyArgumentType(I.type, ZeroState);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- ++Count;
+ // First pass do all the vector types.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = ABIArgInfo::getDirect();
+ } else {
+ I.info = classifyArgumentType(Ty, State);
+ }
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
- Count = 0;
- // Go through the arguments a second time to get HVAs registers if there
- // are still some available.
+
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, State, I.info);
- ++Count;
+ // Second pass, do the rest!
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts);
+
+ if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) {
+ // Assign true HVAs (non vector/native FP types).
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = getDirectX86Hva();
+ } else {
+ I.info = getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+ } else if (!IsHva) {
+ // Assign all Non-HVAs, so this will exclude Vector/FP args.
+ I.info = classifyArgumentType(Ty, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
}
@@ -1885,10 +1919,7 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
// Now add the 'alignstack' attribute with a value of 16.
llvm::AttrBuilder B;
B.addStackAlignmentAttr(16);
- Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- B));
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
}
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -2068,9 +2099,14 @@ class X86_64ABIInfo : public SwiftABIInfo {
return !getTarget().getTriple().isOSDarwin();
}
- /// GCC classifies <1 x long long> as SSE but compatibility with older clang
- // compilers require us to classify it as INTEGER.
+ /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
+ /// classify it as INTEGER (for compatibility with older clang compilers).
bool classifyIntegerMMXAsSSE() const {
+ // Clang <= 3.8 did not do this.
+ if (getCodeGenOpts().getClangABICompat() <=
+ CodeGenOptions::ClangABI::Ver3_8)
+ return false;
+
const llvm::Triple &Triple = getTarget().getTriple();
if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4)
return false;
@@ -3146,8 +3182,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
}
}
- llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr);
-
+ llvm::StructType *Result = llvm::StructType::get(Lo, Hi);
// Verify that the second element is at an 8-byte offset.
assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
@@ -3222,8 +3257,7 @@ classifyReturnType(QualType RetTy) const {
case ComplexX87:
assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
- llvm::Type::getX86_FP80Ty(getVMContext()),
- nullptr);
+ llvm::Type::getX86_FP80Ty(getVMContext()));
break;
}
@@ -3719,7 +3753,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
CharUnits::fromQuantity(16));
llvm::Type *DoubleTy = CGF.DoubleTy;
- llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr);
+ llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy);
llvm::Value *V;
Address Tmp = CGF.CreateMemTemp(Ty);
Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
@@ -3881,6 +3915,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
bool IsRegCall) const {
unsigned Count = 0;
for (auto &I : FI.arguments()) {
+ // Vectorcall in x64 only permits the first 6 arguments to be passed
+ // as XMM/YMM registers.
if (Count < VectorcallMaxParamNumAsReg)
I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
else {
@@ -3893,11 +3929,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
++Count;
}
- Count = 0;
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
- ++Count;
+ I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
}
}
@@ -4624,7 +4657,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
llvm::Type *CoerceTy;
if (Bits > GPRBits) {
CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
- CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr);
+ CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy);
} else
CoerceTy =
llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
@@ -4761,7 +4794,8 @@ class AArch64ABIInfo : public SwiftABIInfo {
public:
enum ABIKind {
AAPCS = 0,
- DarwinPCS
+ DarwinPCS,
+ Win64
};
private:
@@ -4799,10 +4833,14 @@ private:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override {
- return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
- : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+ return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
+ : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
+ : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
}
+ Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
+
bool shouldPassIndirectlyForSwift(CharUnits totalSize,
ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
@@ -4811,6 +4849,9 @@ private:
bool isSwiftErrorInRegister() const override {
return true;
}
+
+ bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
+ unsigned elts) const override;
};
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4819,7 +4860,7 @@ public:
: TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
- return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue";
+ return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue";
}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
@@ -4877,10 +4918,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
// Empty records are always ignored on Darwin, but actually passed in C++ mode
// elsewhere for GNU compatibility.
- if (isEmptyRecord(getContext(), Ty, true)) {
+ uint64_t Size = getContext().getTypeSize(Ty);
+ bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
+ if (IsEmpty || Size == 0) {
if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
return ABIArgInfo::getIgnore();
+ // GNU C mode. The only argument that gets ignored is an empty one with size
+ // 0.
+ if (IsEmpty && Size == 0)
+ return ABIArgInfo::getIgnore();
return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
}
@@ -4893,7 +4940,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
}
// Aggregates <= 16 bytes are passed directly in registers or on the stack.
- uint64_t Size = getContext().getTypeSize(Ty);
if (Size <= 128) {
// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
// same size and alignment.
@@ -4901,7 +4947,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
return coerceToIntArray(Ty, getContext(), getVMContext());
}
unsigned Alignment = getContext().getTypeAlign(Ty);
- Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
@@ -4933,7 +4979,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
: ABIArgInfo::getDirect());
}
- if (isEmptyRecord(getContext(), RetTy, true))
+ uint64_t Size = getContext().getTypeSize(RetTy);
+ if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
return ABIArgInfo::getIgnore();
const Type *Base = nullptr;
@@ -4943,7 +4990,6 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
return ABIArgInfo::getDirect();
// Aggregates <= 16 bytes are returned directly in registers or on the stack.
- uint64_t Size = getContext().getTypeSize(RetTy);
if (Size <= 128) {
// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
// same size and alignment.
@@ -4951,7 +4997,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
return coerceToIntArray(RetTy, getContext(), getVMContext());
}
unsigned Alignment = getContext().getTypeAlign(RetTy);
- Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
@@ -4979,6 +5025,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
return false;
}
+bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize,
+ llvm::Type *eltTy,
+ unsigned elts) const {
+ if (!llvm::isPowerOf2_32(elts))
+ return false;
+ if (totalSize.getQuantity() != 8 &&
+ (totalSize.getQuantity() != 16 || elts == 1))
+ return false;
+ return true;
+}
+
bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
// Homogeneous aggregates for AAPCS64 must have base types of a floating
// point type or a short-vector type. This is the same as the 32-bit ABI,
@@ -5289,6 +5346,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
TyInfo, SlotSize, /*AllowHigherAlign*/ true);
}
+Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+ CGF.getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(8),
+ /*allowHigherAlign*/ false);
+}
+
//===----------------------------------------------------------------------===//
// ARM ABI Implementation
//===----------------------------------------------------------------------===//
@@ -5367,6 +5432,8 @@ private:
bool isSwiftErrorInRegister() const override {
return true;
}
+ bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
+ unsigned elts) const override;
};
class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5433,10 +5500,7 @@ public:
// the backend to perform a realignment as part of the function prologue.
llvm::AttrBuilder B;
B.addStackAlignmentAttr(8);
- Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- B));
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
}
};
@@ -5518,17 +5582,14 @@ void ARMABIInfo::setCCs() {
// AAPCS apparently requires runtime support functions to be soft-float, but
// that's almost certainly for historic reasons (Thumb1 not supporting VFP
// most likely). It's more convenient for AAPCS16_VFP to be hard-float.
- switch (getABIKind()) {
- case APCS:
- case AAPCS16_VFP:
- if (abiCC != getLLVMDefaultCC())
+
+ // The Run-time ABI for the ARM Architecture section 4.1.2 requires
+ // AEABI-complying FP helper functions to use the base AAPCS.
+ // These AEABI functions are expanded in the ARM llvm backend, all the builtin
+ // support functions emitted by clang such as the _Complex helpers follow the
+ // abiCC.
+ if (abiCC != getLLVMDefaultCC())
BuiltinCC = abiCC;
- break;
- case AAPCS:
- case AAPCS_VFP:
- BuiltinCC = llvm::CallingConv::ARM_AAPCS;
- break;
- }
}
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
@@ -5882,6 +5943,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
return false;
}
+bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
+ llvm::Type *eltTy,
+ unsigned numElts) const {
+ if (!llvm::isPowerOf2_32(numElts))
+ return false;
+ unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy);
+ if (size > 64)
+ return false;
+ if (vectorSize.getQuantity() != 8 &&
+ (vectorSize.getQuantity() != 16 || numElts == 1))
+ return false;
+ return true;
+}
+
bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
// Homogeneous aggregates for AAPCS-VFP must have base types of float,
// double, or 64-bit or 128-bit vectors.
@@ -6545,6 +6620,11 @@ public:
Fn->addFnAttr("nomips16");
}
+ if (FD->hasAttr<MicroMipsAttr>())
+ Fn->addFnAttr("micromips");
+ else if (FD->hasAttr<NoMicroMipsAttr>())
+ Fn->addFnAttr("nomicromips");
+
const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>();
if (!Attr)
return;
@@ -6884,6 +6964,31 @@ MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
}
//===----------------------------------------------------------------------===//
+// AVR ABI Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class AVRTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ AVRTargetCodeGenInfo(CodeGenTypes &CGT)
+ : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { }
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) const override {
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD) return;
+ auto *Fn = cast<llvm::Function>(GV);
+
+ if (FD->getAttr<AVRInterruptAttr>())
+ Fn->addFnAttr("interrupt");
+
+ if (FD->getAttr<AVRSignalAttr>())
+ Fn->addFnAttr("signal");
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
// TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults.
// Currently subclassed only to implement custom OpenCL C function attribute
// handling.
@@ -6997,13 +7102,13 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
}
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
// Ignore empty records.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
- if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
- return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
-
uint64_t Size = getContext().getTypeSize(Ty);
if (Size > 64)
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
@@ -7246,11 +7351,16 @@ public:
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const override;
+
+ unsigned getASTAllocaAddressSpace() const override {
+ return LangAS::FirstTargetAddressSpace +
+ getABIInfo().getDataLayout().getAllocaAddrSpace();
+ }
+ unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const override;
};
}
-static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM);
-
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const Decl *D,
llvm::GlobalValue *GV,
@@ -7261,9 +7371,14 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
llvm::Function *F = cast<llvm::Function>(GV);
- if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
- unsigned Min = Attr->getMin();
- unsigned Max = Attr->getMax();
+ const auto *ReqdWGS = M.getLangOpts().OpenCL ?
+ FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+ const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
+ if (ReqdWGS || FlatWGS) {
+ unsigned Min = FlatWGS ? FlatWGS->getMin() : 0;
+ unsigned Max = FlatWGS ? FlatWGS->getMax() : 0;
+ if (ReqdWGS && Min == 0 && Max == 0)
+ Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
if (Min != 0) {
assert(Min <= Max && "Min must be less than or equal Max");
@@ -7302,8 +7417,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
-
- appendOpenCLVersionMD(M);
}
unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
@@ -7328,6 +7441,31 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
llvm::ConstantPointerNull::get(NPT), PT);
}
+unsigned
+AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const {
+ assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+ unsigned DefaultGlobalAS =
+ LangAS::FirstTargetAddressSpace +
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+ if (!D)
+ return DefaultGlobalAS;
+
+ unsigned AddrSpace = D->getType().getAddressSpace();
+ assert(AddrSpace == LangAS::Default ||
+ AddrSpace >= LangAS::FirstTargetAddressSpace);
+ if (AddrSpace != LangAS::Default)
+ return AddrSpace;
+
+ if (CGM.isTypeConstant(D->getType(), false)) {
+ if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
+ return ConstAS.getValue();
+ }
+ return DefaultGlobalAS;
+}
+
//===----------------------------------------------------------------------===//
// SPARC v8 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.
@@ -7974,45 +8112,18 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
- void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
};
+
} // End anonymous namespace.
-/// Emit SPIR specific metadata: OpenCL and SPIR version.
-void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
- llvm::LLVMContext &Ctx = CGM.getModule().getContext();
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
- llvm::Module &M = CGM.getModule();
- // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
- // opencl.spir.version named metadata.
- llvm::Metadata *SPIRVerElts[] = {
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))};
- llvm::NamedMDNode *SPIRVerMD =
- M.getOrInsertNamedMetadata("opencl.spir.version");
- SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
- appendOpenCLVersionMD(CGM);
-}
-
-static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) {
- llvm::LLVMContext &Ctx = CGM.getModule().getContext();
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
- llvm::Module &M = CGM.getModule();
- // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
- // opencl.ocl.version named metadata node.
- llvm::Metadata *OCLVerElts[] = {
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))};
- llvm::NamedMDNode *OCLVerMD =
- M.getOrInsertNamedMetadata("opencl.ocl.version");
- OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+namespace clang {
+namespace CodeGen {
+void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
+ DefaultABIInfo SPIRABI(CGM.getTypes());
+ SPIRABI.computeInfo(FI);
+}
+}
}
unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
@@ -8386,11 +8497,16 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::mips64el:
return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));
+ case llvm::Triple::avr:
+ return SetCGInfo(new AVRTargetCodeGenInfo(Types));
+
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be: {
AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
if (getTarget().getABI() == "darwinpcs")
Kind = AArch64ABIInfo::DarwinPCS;
+ else if (Triple.isOSWindows())
+ Kind = AArch64ABIInfo::Win64;
return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
index 223d6d0..952ef96 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
@@ -229,13 +229,36 @@ public:
virtual llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const;
+ /// Get target favored AST address space of a global variable for languages
+ /// other than OpenCL and CUDA.
+ /// If \p D is nullptr, returns the default target favored address space
+ /// for global variable.
+ virtual unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const;
+
+ /// Get the AST address space for alloca.
+ virtual unsigned getASTAllocaAddressSpace() const { return LangAS::Default; }
+
/// Perform address space cast of an expression of pointer type.
/// \param V is the LLVM value to be casted to another address space.
- /// \param SrcTy is the QualType of \p V.
- /// \param DestTy is the destination QualType.
+ /// \param SrcAddr is the language address space of \p V.
+ /// \param DestAddr is the targeted language address space.
+ /// \param DestTy is the destination LLVM pointer type.
+ /// \param IsNonNull is the flag indicating \p V is known to be non null.
virtual llvm::Value *performAddrSpaceCast(CodeGen::CodeGenFunction &CGF,
- llvm::Value *V, QualType SrcTy, QualType DestTy) const;
+ llvm::Value *V, unsigned SrcAddr,
+ unsigned DestAddr,
+ llvm::Type *DestTy,
+ bool IsNonNull = false) const;
+ /// Perform address space cast of a constant expression of pointer type.
+ /// \param V is the LLVM constant to be casted to another address space.
+ /// \param SrcAddr is the language address space of \p V.
+ /// \param DestAddr is the targeted language address space.
+ /// \param DestTy is the destination LLVM pointer type.
+ virtual llvm::Constant *
+ performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr,
+ unsigned DestAddr, llvm::Type *DestTy) const;
};
} // namespace CodeGen
OpenPOWER on IntegriCloud