summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2016-01-06 20:19:13 +0000
committerdim <dim@FreeBSD.org>2016-01-06 20:19:13 +0000
commite06c171d67ab436f270b15f7e364a8d8f77c01f2 (patch)
treeb7c03c042b220d85a294b0e2e89936b631d3e6ad
parentdb873d7452584205dd063528dc8addbf28aa396b (diff)
parentff2ba393a56d9d99dcb76ceada542233db28af9a (diff)
downloadFreeBSD-src-e06c171d67ab436f270b15f7e364a8d8f77c01f2.zip
FreeBSD-src-e06c171d67ab436f270b15f7e364a8d8f77c01f2.tar.gz
Update llvm to trunk r256945.
-rw-r--r--contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h5
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstr.h27
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h5
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h2
-rw-r--r--contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h4
-rw-r--r--contrib/llvm/include/llvm/IR/CallSite.h5
-rw-r--r--contrib/llvm/include/llvm/IR/IRBuilder.h60
-rw-r--r--contrib/llvm/include/llvm/IR/Instructions.h21
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsX86.td152
-rw-r--r--contrib/llvm/include/llvm/IR/Metadata.h17
-rw-r--r--contrib/llvm/include/llvm/IR/Statepoint.h56
-rw-r--r--contrib/llvm/include/llvm/MC/SubtargetFeature.h22
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProf.h48
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProfData.inc21
-rw-r--r--contrib/llvm/include/llvm/Support/ARMTargetParser.def1
-rw-r--r--contrib/llvm/include/llvm/Support/Program.h2
-rw-r--r--contrib/llvm/include/llvm/Support/YAMLParser.h23
-rw-r--r--contrib/llvm/include/llvm/TableGen/Record.h35
-rw-r--r--contrib/llvm/include/llvm/Target/Target.td4
-rw-r--r--contrib/llvm/include/llvm/Target/TargetLowering.h6
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h10
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h3
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp25
-rw-r--r--contrib/llvm/lib/Analysis/GlobalsModRef.cpp17
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp22
-rw-r--r--contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp13
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp25
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp38
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp94
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp57
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp301
-rw-r--r--contrib/llvm/lib/IR/AsmWriter.cpp15
-rw-r--r--contrib/llvm/lib/IR/Attributes.cpp17
-rw-r--r--contrib/llvm/lib/IR/Instruction.cpp13
-rw-r--r--contrib/llvm/lib/IR/Instructions.cpp25
-rw-r--r--contrib/llvm/lib/IR/Metadata.cpp6
-rw-r--r--contrib/llvm/lib/IR/Statepoint.cpp15
-rw-r--r--contrib/llvm/lib/IR/Verifier.cpp28
-rw-r--r--contrib/llvm/lib/Linker/IRMover.cpp23
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp10
-rw-r--r--contrib/llvm/lib/MC/MCObjectFileInfo.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCSubtargetInfo.cpp17
-rw-r--r--contrib/llvm/lib/MC/SubtargetFeature.cpp30
-rw-r--r--contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp15
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProf.cpp99
-rw-r--r--contrib/llvm/lib/Support/Unix/Program.inc4
-rw-r--r--contrib/llvm/lib/Support/Windows/Program.inc9
-rw-r--r--contrib/llvm/lib/Support/Windows/WindowsSupport.h18
-rw-r--r--contrib/llvm/lib/Support/raw_ostream.cpp19
-rw-r--r--contrib/llvm/lib/TableGen/Record.cpp8
-rw-r--r--contrib/llvm/lib/TableGen/TGParser.cpp27
-rw-r--r--contrib/llvm/lib/TableGen/TGParser.h9
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td10
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp104
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp26
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h10
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp13
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td35
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/CIInstructions.td76
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp86
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td28
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VIInstructions.td9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp12
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td5
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td113
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt17
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp1
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp165
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h16
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp34
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp10
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp186
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td1
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td26
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp60
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td32
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMPX.td6
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td54
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td15
-rw-r--r--contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h43
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp190
-rw-r--r--contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h45
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp53
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp29
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp115
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp182
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp98
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp52
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp66
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp78
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp12
-rw-r--r--contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp307
-rw-r--r--contrib/llvm/utils/TableGen/SubtargetEmitter.cpp28
-rw-r--r--contrib/llvm/utils/TableGen/TableGen.cpp3
133 files changed, 2995 insertions, 1578 deletions
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
index 87fb3ef..493a99a 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -59,11 +59,6 @@ bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
-/// \brief Tests if a value is a call or invoke to a library function that
-/// allocates memory and never returns null (such as operator new).
-bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
- bool LookThroughBitCast = false);
-
//===----------------------------------------------------------------------===//
// malloc Call Utility Functions.
//
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
index 978864e..05c9a9e0 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -97,7 +97,7 @@ private:
// of memory operands required to be precise exceeds the maximum value of
// NumMemRefs - currently 256 - we remove the operands entirely. Note also
// that this is a non-owning reference to a shared copy on write buffer owned
- // by the MachineFunction and created via MF.allocateMemRefsArray.
+ // by the MachineFunction and created via MF.allocateMemRefsArray.
mmo_iterator MemRefs;
DebugLoc debugLoc; // Source line information.
@@ -354,7 +354,7 @@ public:
mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
/// Return true if we don't have any memory operands which described the the
/// memory access done by this instruction. If this is true, calling code
- /// must be conservative.
+ /// must be conservative.
bool memoperands_empty() const { return NumMemRefs == 0; }
iterator_range<mmo_iterator> memoperands() {
@@ -774,7 +774,7 @@ public:
bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
- bool isMSInlineAsm() const {
+ bool isMSInlineAsm() const {
return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect();
}
bool isStackAligningInlineAsm() const;
@@ -1180,11 +1180,26 @@ public:
/// Assign this MachineInstr's memory reference descriptor list.
/// This does not transfer ownership.
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
- MemRefs = NewMemRefs;
- NumMemRefs = uint8_t(NewMemRefsEnd - NewMemRefs);
- assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs");
+ setMemRefs(std::make_pair(NewMemRefs, NewMemRefsEnd-NewMemRefs));
}
+ /// Assign this MachineInstr's memory reference descriptor list. First
+ /// element in the pair is the begin iterator/pointer to the array; the
+ /// second is the number of MemoryOperands. This does not transfer ownership
+ /// of the underlying memory.
+ void setMemRefs(std::pair<mmo_iterator, unsigned> NewMemRefs) {
+ MemRefs = NewMemRefs.first;
+ NumMemRefs = uint8_t(NewMemRefs.second);
+ assert(NumMemRefs == NewMemRefs.second &&
+ "Too many memrefs - must drop memory operands");
+ }
+
+ /// Return a set of memrefs (begin iterator, size) which conservatively
+ /// describe the memory behavior of both MachineInstrs. This is appropriate
+ /// for use when merging two MachineInstrs into one. This routine does not
+ /// modify the memrefs of the this MachineInstr.
+ std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
+
/// Clear this MachineInstr's memory reference descriptor list. This resets
/// the memrefs to their most conservative state. This should be used only
/// as a last resort since it greatly pessimizes our knowledge of the memory
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index aa5f4b2..8fe9b28 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -162,6 +162,11 @@ public:
return *this;
}
+ const MachineInstrBuilder &setMemRefs(std::pair<MachineInstr::mmo_iterator,
+ unsigned> MemOperandsRef) const {
+ MI->setMemRefs(MemOperandsRef);
+ return *this;
+ }
const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
MI->addOperand(*MF, MO);
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index 4fbe206..4e88606 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -178,7 +178,7 @@ public:
/// register.
bool FullyDefined;
- /// Reg or ont of its aliases is read. The register may only be read
+ /// Reg or one of its aliases is read. The register may only be read
/// partially.
bool Read;
/// Reg or a super-register is read. The full register is read.
diff --git a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
index 70d558f..f6ad7a8 100644
--- a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -83,7 +83,9 @@ enum class ClrHandlerType { Catch, Finally, Fault, Filter };
struct ClrEHUnwindMapEntry {
MBBOrBasicBlock Handler;
uint32_t TypeToken;
- int Parent;
+ int HandlerParentState; ///< Outer handler enclosing this entry's handler
+ int TryParentState; ///< Outer try region enclosing this entry's try region,
+ ///< treating later catches on same try as "outer"
ClrHandlerType HandlerType;
};
diff --git a/contrib/llvm/include/llvm/IR/CallSite.h b/contrib/llvm/include/llvm/IR/CallSite.h
index f4b8a8a..f7bfb47 100644
--- a/contrib/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm/include/llvm/IR/CallSite.h
@@ -310,6 +310,11 @@ public:
CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
}
+ /// \brief Return true if this function has the given attribute.
+ bool hasFnAttr(StringRef A) const {
+ CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
+ }
+
/// \brief Return true if the call or the callee has the given attribute.
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A));
diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h
index 7fe04f2..a305054 100644
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@@ -61,9 +61,13 @@ protected:
MDNode *DefaultFPMathTag;
FastMathFlags FMF;
+ ArrayRef<OperandBundleDef> DefaultOperandBundles;
+
public:
- IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr)
- : Context(context), DefaultFPMathTag(FPMathTag), FMF() {
+ IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : Context(context), DefaultFPMathTag(FPMathTag), FMF(),
+ DefaultOperandBundles(OpBundles) {
ClearInsertionPoint();
}
@@ -538,37 +542,44 @@ class IRBuilder : public IRBuilderBase, public Inserter {
public:
IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(),
- MDNode *FPMathTag = nullptr)
- : IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {}
-
- explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr)
- : IRBuilderBase(C, FPMathTag), Folder() {
- }
-
- explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr)
- : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
+ MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : IRBuilderBase(C, FPMathTag, OpBundles), Inserter(std::move(I)),
+ Folder(F) {}
+
+ explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : IRBuilderBase(C, FPMathTag, OpBundles), Folder() {}
+
+ explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
SetInsertPoint(TheBB);
}
- explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr)
- : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
+ explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
SetInsertPoint(TheBB);
}
- explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr)
- : IRBuilderBase(IP->getContext(), FPMathTag), Folder() {
+ explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : IRBuilderBase(IP->getContext(), FPMathTag, OpBundles), Folder() {
SetInsertPoint(IP);
}
- IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
- MDNode *FPMathTag = nullptr)
- : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
+ IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T &F,
+ MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
SetInsertPoint(TheBB, IP);
}
IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP,
- MDNode *FPMathTag = nullptr)
- : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
+ MDNode *FPMathTag = nullptr,
+ ArrayRef<OperandBundleDef> OpBundles = None)
+ : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
SetInsertPoint(TheBB, IP);
}
@@ -1529,8 +1540,11 @@ public:
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
ArrayRef<OperandBundleDef> OpBundles = None,
- const Twine &Name = "") {
- return Insert(CallInst::Create(Callee, Args, OpBundles), Name);
+ const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+ CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
+ if (isa<FPMathOperator>(CI))
+ CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
+ return Insert(CI, Name);
}
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
@@ -1543,7 +1557,7 @@ public:
CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee,
ArrayRef<Value *> Args, const Twine &Name = "",
MDNode *FPMathTag = nullptr) {
- CallInst *CI = CallInst::Create(FTy, Callee, Args);
+ CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
if (isa<FPMathOperator>(CI))
CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
return Insert(CI, Name);
diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h
index d781c7a..aba48ca 100644
--- a/contrib/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm/include/llvm/IR/Instructions.h
@@ -3550,6 +3550,11 @@ public:
return hasFnAttrImpl(A);
}
+ /// \brief Determine whether this call has the given attribute.
+ bool hasFnAttr(StringRef A) const {
+ return hasFnAttrImpl(A);
+ }
+
/// \brief Determine whether the call or the callee has the given attributes.
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
@@ -3734,7 +3739,19 @@ private:
unsigned getNumSuccessorsV() const override;
void setSuccessorV(unsigned idx, BasicBlock *B) override;
- bool hasFnAttrImpl(Attribute::AttrKind A) const;
+ template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
+ if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+ return true;
+
+ // Operand bundles override attributes on the called function, but don't
+ // override attributes directly present on the invoke instruction.
+ if (isFnAttrDisallowedByOpBundle(A))
+ return false;
+
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+ return false;
+ }
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@@ -3966,6 +3983,8 @@ public:
/// point to the added handler.
void addHandler(BasicBlock *Dest);
+ void removeHandler(handler_iterator HI);
+
unsigned getNumSuccessors() const { return getNumOperands() - 1; }
BasicBlock *getSuccessor(unsigned Idx) const {
assert(Idx < getNumSuccessors() &&
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index 18390f8..54bcbd8 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -33,6 +33,19 @@ let TargetPrefix = "x86" in {
}
//===----------------------------------------------------------------------===//
+// FLAGS.
+let TargetPrefix = "x86" in {
+ def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">,
+ Intrinsic<[llvm_i32_ty], [], []>;
+ def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">,
+ Intrinsic<[llvm_i64_ty], [], []>;
+ def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">,
+ Intrinsic<[], [llvm_i32_ty], []>;
+ def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">,
+ Intrinsic<[], [llvm_i64_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
// Read Time Stamp Counter.
let TargetPrefix = "x86" in {
def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">,
@@ -2211,6 +2224,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_w_128 : GCCBuiltin<"__builtin_ia32_psraw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_w_256 : GCCBuiltin<"__builtin_ia32_psraw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_wi_128 : GCCBuiltin<"__builtin_ia32_psrawi128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_wi_256 : GCCBuiltin<"__builtin_ia32_psrawi256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_wi_512 : GCCBuiltin<"__builtin_ia32_psrawi512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+
def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
@@ -2229,6 +2261,69 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_d_256 : GCCBuiltin<"__builtin_ia32_psrad256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_di_128 : GCCBuiltin<"__builtin_ia32_psradi128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_di_256 : GCCBuiltin<"__builtin_ia32_psradi256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_di_512 : GCCBuiltin<"__builtin_ia32_psradi512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_qi_128 : GCCBuiltin<"__builtin_ia32_psraqi128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_qi_256 : GCCBuiltin<"__builtin_ia32_psraqi256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_qi_512 : GCCBuiltin<"__builtin_ia32_psraqi512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_psrl_d_128: GCCBuiltin<"__builtin_ia32_psrld128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_d_256: GCCBuiltin<"__builtin_ia32_psrld256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
+ llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_di_128: GCCBuiltin<"__builtin_ia32_psrldi128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
+ llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_di_256: GCCBuiltin<"__builtin_ia32_psrldi256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
+ llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_di_512: GCCBuiltin<"__builtin_ia32_psrldi512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [ llvm_v16i32_ty,
+ llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty ], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_psrl_q_128: GCCBuiltin<"__builtin_ia32_psrlq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_q_256: GCCBuiltin<"__builtin_ia32_psrlq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_qi_128: GCCBuiltin<"__builtin_ia32_psrlqi128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_qi_256: GCCBuiltin<"__builtin_ia32_psrlqi256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_qi_512: GCCBuiltin<"__builtin_ia32_psrlqi512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Pack ops.
@@ -2696,6 +2791,59 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
[IntrNoMem]>;
+
+ def int_x86_avx512_mask_psll_d_128 : GCCBuiltin<"__builtin_ia32_pslld128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_d_256 : GCCBuiltin<"__builtin_ia32_pslld256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_di_128 : GCCBuiltin<"__builtin_ia32_pslldi128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_di_256 : GCCBuiltin<"__builtin_ia32_pslldi256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_di_512 : GCCBuiltin<"__builtin_ia32_pslldi512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_q_128 : GCCBuiltin<"__builtin_ia32_psllq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_q_256 : GCCBuiltin<"__builtin_ia32_psllq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_qi_128 : GCCBuiltin<"__builtin_ia32_psllqi128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_qi_256 : GCCBuiltin<"__builtin_ia32_psllqi256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_qi_512 : GCCBuiltin<"__builtin_ia32_psllqi512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrlv2_di : GCCBuiltin<"__builtin_ia32_psrlv2di_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrlv4_di : GCCBuiltin<"__builtin_ia32_psrlv4di_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrlv4_si : GCCBuiltin<"__builtin_ia32_psrlv4si_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Gather ops
@@ -3919,9 +4067,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Support protection key
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">,
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [], []>;
def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">,
- Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[], [llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
// Half float conversion
diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h
index 2ea5913..4a8557d 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@@ -283,14 +283,20 @@ private:
LLVMContext &Context;
uint64_t NextIndex;
SmallDenseMap<void *, std::pair<OwnerTy, uint64_t>, 4> UseMap;
+ /// Flag that can be set to false if this metadata should not be
+ /// RAUW'ed, e.g. if it is used as the key of a map.
+ bool CanReplace;
public:
ReplaceableMetadataImpl(LLVMContext &Context)
- : Context(Context), NextIndex(0) {}
+ : Context(Context), NextIndex(0), CanReplace(true) {}
~ReplaceableMetadataImpl() {
assert(UseMap.empty() && "Cannot destroy in-use replaceable metadata");
}
+ /// Set the CanReplace flag to the given value.
+ void setCanReplace(bool Replaceable) { CanReplace = Replaceable; }
+
LLVMContext &getContext() const { return Context; }
/// \brief Replace all uses of this with MD.
@@ -901,14 +907,19 @@ public:
Context.getReplaceableUses()->replaceAllUsesWith(MD);
}
+ /// Set the CanReplace flag to the given value.
+ void setCanReplace(bool Replaceable) {
+ Context.getReplaceableUses()->setCanReplace(Replaceable);
+ }
+
/// \brief Resolve cycles.
///
/// Once all forward declarations have been resolved, force cycles to be
- /// resolved. If \p MDMaterialized is true, then any temporary metadata
+ /// resolved. If \p AllowTemps is true, then any temporary metadata
/// is ignored, otherwise it asserts when encountering temporary metadata.
///
/// \pre No operands (or operands' operands, etc.) have \a isTemporary().
- void resolveCycles(bool MDMaterialized = true);
+ void resolveCycles(bool AllowTemps = false);
/// \brief Replace a temporary node with a permanent one.
///
diff --git a/contrib/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm/include/llvm/IR/Statepoint.h
index 7310c56..51a0951 100644
--- a/contrib/llvm/include/llvm/IR/Statepoint.h
+++ b/contrib/llvm/include/llvm/IR/Statepoint.h
@@ -22,6 +22,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
@@ -36,14 +37,13 @@ enum class StatepointFlags {
MaskAll = GCTransition ///< A bitmask that includes all valid flags.
};
-class GCRelocateOperands;
+class GCRelocateInst;
class ImmutableStatepoint;
bool isStatepoint(const ImmutableCallSite &CS);
bool isStatepoint(const Value *V);
bool isStatepoint(const Value &V);
-bool isGCRelocate(const Value *V);
bool isGCRelocate(const ImmutableCallSite &CS);
bool isGCResult(const Value *V);
@@ -247,7 +247,7 @@ public:
/// May contain several relocations for the same base/derived pair.
/// For example this could happen due to relocations on unwinding
/// path of invoke.
- std::vector<GCRelocateOperands> getRelocates() const;
+ std::vector<const GCRelocateInst *> getRelocates() const;
/// Get the experimental_gc_result call tied to this statepoint. Can be
/// nullptr if there isn't a gc_result tied to this statepoint. Guaranteed to
@@ -305,33 +305,27 @@ public:
explicit Statepoint(CallSite CS) : Base(CS) {}
};
-/// Wraps a call to a gc.relocate and provides access to it's operands.
-/// TODO: This should likely be refactored to resememble the wrappers in
-/// InstrinsicInst.h.
-class GCRelocateOperands {
- ImmutableCallSite RelocateCS;
-
+/// This represents the gc.relocate intrinsic.
+class GCRelocateInst : public IntrinsicInst {
public:
- GCRelocateOperands(const User *U) : RelocateCS(U) { assert(isGCRelocate(U)); }
- GCRelocateOperands(const Instruction *inst) : RelocateCS(inst) {
- assert(isGCRelocate(inst));
+ static inline bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
- GCRelocateOperands(CallSite CS) : RelocateCS(CS) { assert(isGCRelocate(CS)); }
/// Return true if this relocate is tied to the invoke statepoint.
/// This includes relocates which are on the unwinding path.
bool isTiedToInvoke() const {
- const Value *Token = RelocateCS.getArgument(0);
+ const Value *Token = getArgOperand(0);
return isa<LandingPadInst>(Token) || isa<InvokeInst>(Token);
}
- /// Get enclosed relocate intrinsic
- ImmutableCallSite getUnderlyingCallSite() { return RelocateCS; }
-
/// The statepoint with which this gc.relocate is associated.
- const Instruction *getStatepoint() {
- const Value *Token = RelocateCS.getArgument(0);
+ const Instruction *getStatepoint() const {
+ const Value *Token = getArgOperand(0);
// This takes care both of relocates for call statepoints and relocates
// on normal path of invoke statepoint.
@@ -354,22 +348,22 @@ public:
/// The index into the associate statepoint's argument list
/// which contains the base pointer of the pointer whose
/// relocation this gc.relocate describes.
- unsigned getBasePtrIndex() {
- return cast<ConstantInt>(RelocateCS.getArgument(1))->getZExtValue();
+ unsigned getBasePtrIndex() const {
+ return cast<ConstantInt>(getArgOperand(1))->getZExtValue();
}
/// The index into the associate statepoint's argument list which
/// contains the pointer whose relocation this gc.relocate describes.
- unsigned getDerivedPtrIndex() {
- return cast<ConstantInt>(RelocateCS.getArgument(2))->getZExtValue();
+ unsigned getDerivedPtrIndex() const {
+ return cast<ConstantInt>(getArgOperand(2))->getZExtValue();
}
- Value *getBasePtr() {
+ Value *getBasePtr() const {
ImmutableCallSite CS(getStatepoint());
return *(CS.arg_begin() + getBasePtrIndex());
}
- Value *getDerivedPtr() {
+ Value *getDerivedPtr() const {
ImmutableCallSite CS(getStatepoint());
return *(CS.arg_begin() + getDerivedPtrIndex());
}
@@ -377,11 +371,11 @@ public:
template <typename FunTy, typename InstructionTy, typename ValueTy,
typename CallSiteTy>
-std::vector<GCRelocateOperands>
+std::vector<const GCRelocateInst *>
StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
const {
- std::vector<GCRelocateOperands> Result;
+ std::vector<const GCRelocateInst *> Result;
CallSiteTy StatepointCS = getCallSite();
@@ -389,8 +383,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
// gc_relocates ensures that we only get pairs which are actually relocated
// and used after the statepoint.
for (const User *U : getInstruction()->users())
- if (isGCRelocate(U))
- Result.push_back(GCRelocateOperands(U));
+ if (auto *Relocate = dyn_cast<GCRelocateInst>(U))
+ Result.push_back(Relocate);
if (!StatepointCS.isInvoke())
return Result;
@@ -401,8 +395,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
// Search for gc relocates that are attached to this landingpad.
for (const User *LandingPadUser : LandingPad->users()) {
- if (isGCRelocate(LandingPadUser))
- Result.push_back(GCRelocateOperands(LandingPadUser));
+ if (auto *Relocate = dyn_cast<GCRelocateInst>(LandingPadUser))
+ Result.push_back(Relocate);
}
return Result;
}
diff --git a/contrib/llvm/include/llvm/MC/SubtargetFeature.h b/contrib/llvm/include/llvm/MC/SubtargetFeature.h
index 0d97b22..75d1e79 100644
--- a/contrib/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/contrib/llvm/include/llvm/MC/SubtargetFeature.h
@@ -39,8 +39,8 @@ public:
FeatureBitset(const bitset<MAX_SUBTARGET_FEATURES>& B) : bitset(B) {}
FeatureBitset(std::initializer_list<unsigned> Init) : bitset() {
- for (auto I = Init.begin() , E = Init.end(); I != E; ++I)
- set(*I);
+ for (auto I : Init)
+ set(I);
}
};
@@ -59,6 +59,11 @@ struct SubtargetFeatureKV {
bool operator<(StringRef S) const {
return StringRef(Key) < S;
}
+
+ // Compare routine for std::is_sorted.
+ bool operator<(const SubtargetFeatureKV &Other) const {
+ return StringRef(Key) < StringRef(Other.Key);
+ }
};
//===----------------------------------------------------------------------===//
@@ -98,14 +103,13 @@ public:
/// Adding Features.
void AddFeature(StringRef String, bool Enable = true);
- /// ToggleFeature - Toggle a feature and returns the newly updated feature
- /// bits.
- FeatureBitset ToggleFeature(FeatureBitset Bits, StringRef String,
- ArrayRef<SubtargetFeatureKV> FeatureTable);
+ /// ToggleFeature - Toggle a feature and update the feature bits.
+ static void ToggleFeature(FeatureBitset &Bits, StringRef String,
+ ArrayRef<SubtargetFeatureKV> FeatureTable);
- /// Apply the feature flag and return the newly updated feature bits.
- FeatureBitset ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
- ArrayRef<SubtargetFeatureKV> FeatureTable);
+ /// Apply the feature flag and update the feature bits.
+ static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
+ ArrayRef<SubtargetFeatureKV> FeatureTable);
/// Get feature bits of a CPU.
FeatureBitset getFeatureBits(StringRef CPU,
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
index 4688759..49569d8 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
@@ -155,11 +155,36 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName);
GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef FuncName);
+/// Return the initializer in string of the PGO name var \c NameVar.
+StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
/// Given a PGO function name, remove the filename prefix and return
/// the original (static) function name.
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
+/// Given a vector of strings (function PGO names) \c NameStrs, the
+/// method generates a combined string \c Result thatis ready to be
+/// serialized. The \c Result string is comprised of three fields:
+/// The first field is the legnth of the uncompressed strings, and the
+/// the second field is the length of the zlib-compressed string.
+/// Both fields are encoded in ULEB128. If \c doCompress is false, the
+/// third field is the uncompressed strings; otherwise it is the
+/// compressed string. When the string compression is off, the
+/// second field will have value zero.
+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+ bool doCompression, std::string &Result);
+/// Produce \c Result string with the same format described above. The input
+/// is vector of PGO function name variables that are referenced.
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+ std::string &Result);
+class InstrProfSymtab;
+/// \c NameStrings is a string composed of one of more sub-strings encoded in
+/// the
+/// format described above. The substrings are seperated by 0 or more zero
+/// bytes.
+/// This method decodes the string and populates the \c Symtab.
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
+
const std::error_category &instrprof_category();
enum class instrprof_error {
@@ -235,6 +260,11 @@ public:
/// This interface is used by reader of CoverageMapping test
/// format.
inline std::error_code create(StringRef D, uint64_t BaseAddr);
+ /// \c NameStrings is a string composed of one of more sub-strings
+ /// encoded in the format described above. The substrings are
+ /// seperated by 0 or more zero bytes. This method decodes the
+ /// string and populates the \c Symtab.
+ inline std::error_code create(StringRef NameStrings);
/// Create InstrProfSymtab from a set of names iteratable from
/// \p IterRange. This interface is used by IndexedProfReader.
template <typename NameIterRange> void create(const NameIterRange &IterRange);
@@ -255,8 +285,8 @@ public:
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
}
AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
- /// Return function's PGO name from the function name's symabol
- /// address in the object file. If an error occurs, Return
+ /// Return function's PGO name from the function name's symbol
+ /// address in the object file. If an error occurs, return
/// an empty string.
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
/// Return function's PGO name from the name's md5 hash value.
@@ -270,6 +300,12 @@ std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
return std::error_code();
}
+std::error_code InstrProfSymtab::create(StringRef NameStrings) {
+ if (readPGOFuncNameStrings(NameStrings, *this))
+ return make_error_code(instrprof_error::malformed);
+ return std::error_code();
+}
+
template <typename NameIterRange>
void InstrProfSymtab::create(const NameIterRange &IterRange) {
for (auto Name : IterRange)
@@ -576,8 +612,14 @@ template <class IntPtrT> struct CovMapFunctionRecord {
#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
#include "llvm/ProfileData/InstrProfData.inc"
};
-LLVM_PACKED_END
+// Per module coverage mapping data header, i.e. CoverageMapFileHeader
+// documented above.
+struct CovMapHeader {
+#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+LLVM_PACKED_END
}
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
index 48dae50..3a7c0c5 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -1,4 +1,4 @@
-/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\
+/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\
|*
|* The LLVM Compiler Infrastructure
|*
@@ -167,6 +167,25 @@ COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
#undef COVMAP_FUNC_RECORD
/* COVMAP_FUNC_RECORD end. */
+/* COVMAP_HEADER start */
+/* Definition of member fields of coverage map header.
+ */
+#ifndef COVMAP_HEADER
+#define COVMAP_HEADER(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+COVMAP_HEADER(uint32_t, Int32Ty, NRecords, \
+ llvm::ConstantInt::get(Int32Ty, FunctionRecords.size()))
+COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \
+ llvm::ConstantInt::get(Int32Ty, FilenamesSize))
+COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \
+ llvm::ConstantInt::get(Int32Ty, CoverageMappingSize))
+COVMAP_HEADER(uint32_t, Int32Ty, Version, \
+ llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1))
+#undef COVMAP_HEADER
+/* COVMAP_HEADER end. */
+
#ifdef INSTR_PROF_VALUE_PROF_DATA
#define INSTR_PROF_DATA_DEFINED
diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
index 2f99b07..c895b09 100644
--- a/contrib/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
@@ -213,6 +213,7 @@ ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC)
ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
// Non-standard Arch names.
ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE)
ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE)
diff --git a/contrib/llvm/include/llvm/Support/Program.h b/contrib/llvm/include/llvm/Support/Program.h
index 4330210..727864d 100644
--- a/contrib/llvm/include/llvm/Support/Program.h
+++ b/contrib/llvm/include/llvm/Support/Program.h
@@ -130,7 +130,7 @@ struct ProcessInfo {
/// Return true if the given arguments fit within system-specific
/// argument length limits.
- bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args);
+ bool commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args);
/// File encoding options when writing contents that a non-UTF8 tool will
/// read (on Windows systems). For UNIX, we always use UTF-8.
diff --git a/contrib/llvm/include/llvm/Support/YAMLParser.h b/contrib/llvm/include/llvm/Support/YAMLParser.h
index b056ab6..a5addfa 100644
--- a/contrib/llvm/include/llvm/Support/YAMLParser.h
+++ b/contrib/llvm/include/llvm/Support/YAMLParser.h
@@ -305,7 +305,7 @@ private:
/// increment() which must set CurrentEntry to 0 to create an end iterator.
template <class BaseT, class ValueT>
class basic_collection_iterator
- : public std::iterator<std::forward_iterator_tag, ValueT> {
+ : public std::iterator<std::input_iterator_tag, ValueT> {
public:
basic_collection_iterator() : Base(nullptr) {}
basic_collection_iterator(BaseT *B) : Base(B) {}
@@ -326,11 +326,24 @@ public:
return Base->CurrentEntry;
}
+ /// Note on EqualityComparable:
+ ///
+ /// The iterator is not re-entrant,
+ /// it is meant to be used for parsing YAML on-demand
+ /// Once iteration started - it can point only to one entry at a time
+ /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
+ /// iff Base and Other.Base are equal.
+ bool operator==(const basic_collection_iterator &Other) const {
+ if (Base && (Base == Other.Base)) {
+ assert((Base->CurrentEntry == Other.Base->CurrentEntry)
+ && "Equal Bases expected to point to equal Entries");
+ }
+
+ return Base == Other.Base;
+ }
+
bool operator!=(const basic_collection_iterator &Other) const {
- if (Base != Other.Base)
- return true;
- return (Base && Other.Base) &&
- Base->CurrentEntry != Other.Base->CurrentEntry;
+ return !(Base == Other.Base);
}
basic_collection_iterator &operator++() {
diff --git a/contrib/llvm/include/llvm/TableGen/Record.h b/contrib/llvm/include/llvm/TableGen/Record.h
index eb1c5c7..4c1ef40 100644
--- a/contrib/llvm/include/llvm/TableGen/Record.h
+++ b/contrib/llvm/include/llvm/TableGen/Record.h
@@ -232,7 +232,7 @@ protected:
/// We could pack these a bit tighter by not having the IK_FirstXXXInit
/// and IK_LastXXXInit be their own values, but that would degrade
/// readability for really no benefit.
- enum InitKind {
+ enum InitKind : uint8_t {
IK_BitInit,
IK_FirstTypedInit,
IK_BitsInit,
@@ -256,6 +256,9 @@ protected:
private:
const InitKind Kind;
+protected:
+ uint8_t Opc; // Used by UnOpInit, BinOpInit, and TernOpInit
+private:
Init(const Init &) = delete;
Init &operator=(const Init &) = delete;
virtual void anchor();
@@ -264,7 +267,7 @@ public:
InitKind getKind() const { return Kind; }
protected:
- explicit Init(InitKind K) : Kind(K) {}
+ explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {}
public:
virtual ~Init() {}
@@ -365,7 +368,8 @@ class TypedInit : public Init {
TypedInit &operator=(const TypedInit &Other) = delete;
protected:
- explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {}
+ explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0)
+ : Init(K, Opc), Ty(T) {}
~TypedInit() override {
// If this is a DefInit we need to delete the RecordRecTy.
if (getKind() == IK_DefInit)
@@ -650,7 +654,8 @@ class OpInit : public TypedInit {
OpInit &operator=(OpInit &Other) = delete;
protected:
- explicit OpInit(InitKind K, RecTy *Type) : TypedInit(K, Type) {}
+ explicit OpInit(InitKind K, RecTy *Type, uint8_t Opc)
+ : TypedInit(K, Type, Opc) {}
public:
static bool classof(const Init *I) {
@@ -677,14 +682,13 @@ public:
///
class UnOpInit : public OpInit {
public:
- enum UnaryOp { CAST, HEAD, TAIL, EMPTY };
+ enum UnaryOp : uint8_t { CAST, HEAD, TAIL, EMPTY };
private:
- UnaryOp Opc;
Init *LHS;
UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type)
- : OpInit(IK_UnOpInit, Type), Opc(opc), LHS(lhs) {}
+ : OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {}
UnOpInit(const UnOpInit &Other) = delete;
UnOpInit &operator=(const UnOpInit &Other) = delete;
@@ -708,7 +712,7 @@ public:
return getOperand();
}
- UnaryOp getOpcode() const { return Opc; }
+ UnaryOp getOpcode() const { return (UnaryOp)Opc; }
Init *getOperand() const { return LHS; }
// Fold - If possible, fold this to a simpler init. Return this if not
@@ -724,14 +728,14 @@ public:
///
class BinOpInit : public OpInit {
public:
- enum BinaryOp { ADD, AND, SHL, SRA, SRL, LISTCONCAT, STRCONCAT, CONCAT, EQ };
+ enum BinaryOp : uint8_t { ADD, AND, SHL, SRA, SRL, LISTCONCAT,
+ STRCONCAT, CONCAT, EQ };
private:
- BinaryOp Opc;
Init *LHS, *RHS;
BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) :
- OpInit(IK_BinOpInit, Type), Opc(opc), LHS(lhs), RHS(rhs) {}
+ OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {}
BinOpInit(const BinOpInit &Other) = delete;
BinOpInit &operator=(const BinOpInit &Other) = delete;
@@ -759,7 +763,7 @@ public:
}
}
- BinaryOp getOpcode() const { return Opc; }
+ BinaryOp getOpcode() const { return (BinaryOp)Opc; }
Init *getLHS() const { return LHS; }
Init *getRHS() const { return RHS; }
@@ -776,15 +780,14 @@ public:
///
class TernOpInit : public OpInit {
public:
- enum TernaryOp { SUBST, FOREACH, IF };
+ enum TernaryOp : uint8_t { SUBST, FOREACH, IF };
private:
- TernaryOp Opc;
Init *LHS, *MHS, *RHS;
TernOpInit(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs,
RecTy *Type) :
- OpInit(IK_TernOpInit, Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
+ OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
TernOpInit(const TernOpInit &Other) = delete;
TernOpInit &operator=(const TernOpInit &Other) = delete;
@@ -815,7 +818,7 @@ public:
}
}
- TernaryOp getOpcode() const { return Opc; }
+ TernaryOp getOpcode() const { return (TernaryOp)Opc; }
Init *getLHS() const { return LHS; }
Init *getMHS() const { return MHS; }
Init *getRHS() const { return RHS; }
diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td
index 79046b2..c869341 100644
--- a/contrib/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm/include/llvm/Target/Target.td
@@ -936,6 +936,10 @@ class AsmParser {
// ShouldEmitMatchRegisterName - Set to false if the target needs a hand
// written register name matcher
bit ShouldEmitMatchRegisterName = 1;
+
+ // HasMnemonicFirst - Set to false if target instructions don't always
+ // start with a mnemonic as the first token.
+ bit HasMnemonicFirst = 1;
}
def DefaultAsmParser : AsmParser;
diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h
index 140c3659..863b7cd 100644
--- a/contrib/llvm/include/llvm/Target/TargetLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetLowering.h
@@ -2269,6 +2269,12 @@ public:
return false;
}
+ /// Return true if the MachineFunction contains a COPY which would imply
+ /// HasOpaqueSPAdjustment.
+ virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const {
+ return false;
+ }
+
/// Perform necessary initialization to handle a subset of CSRs explicitly
/// via copies. This function is called at the beginning of instruction
/// selection.
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h b/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
index 0d081c0..af0d60b 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -23,11 +23,13 @@
namespace llvm {
-/// This optimization identifies DIV instructions that can be
+/// This optimization identifies DIV instructions in a BB that can be
/// profitably bypassed and carried out with a shorter, faster divide.
-bool bypassSlowDivision(Function &F,
- Function::iterator &I,
- const DenseMap<unsigned int, unsigned int> &BypassWidth);
+///
+/// This optimization may add basic blocks immediately after BB; for obvious
+/// reasons, you shouldn't pass those blocks to bypassSlowDivision.
+bool bypassSlowDivision(
+ BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidth);
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 17aaee0..2cfacb6 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -39,6 +40,8 @@ struct LICMSafetyInfo {
bool MayThrow; // The current loop contains an instruction which
// may throw.
bool HeaderMayThrow; // Same as previous, but specific to loop header
+ // Used to update funclet bundle operands.
+ DenseMap<BasicBlock *, ColorVector> BlockColors;
LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false)
{}
};
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 00f346e..85404d8 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -543,7 +543,6 @@ static bool isMemsetPattern16(const Function *MS,
isa<IntegerType>(MemsetType->getParamType(2)))
return true;
}
-
return false;
}
@@ -583,9 +582,6 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
if (F->onlyAccessesArgMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
- if (isMemsetPattern16(F, TLI))
- Min = FMRB_OnlyAccessesArgumentPointees;
-
// Otherwise be conservative.
return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
}
@@ -599,22 +595,21 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
- assert((ArgIdx == 0 || ArgIdx == 1) &&
- "Invalid argument index for memory intrinsic");
- return ArgIdx ? MRI_Ref : MRI_Mod;
+ // We don't currently have a writeonly attribute. All other properties
+ // of these intrinsics are nicely described via attributes in
+ // Intrinsics.td and handled generically below.
+ if (ArgIdx == 0)
+ return MRI_Mod;
}
// We can bound the aliasing properties of memset_pattern16 just as we can
// for memcpy/memset. This is particularly important because the
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
- // whenever possible.
- if (CS.getCalledFunction() &&
- isMemsetPattern16(CS.getCalledFunction(), TLI)) {
- assert((ArgIdx == 0 || ArgIdx == 1) &&
- "Invalid argument index for memset_pattern16");
- return ArgIdx ? MRI_Ref : MRI_Mod;
- }
- // FIXME: Handle memset_pattern4 and memset_pattern8 also.
+ // whenever possible. Note that all but the missing writeonly attribute are
+ // handled via InferFunctionAttr.
+ if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
+ if (ArgIdx == 0)
+ return MRI_Mod;
if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
return MRI_Ref;
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index ab2263a..249f395 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -376,15 +376,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
} else {
return true; // Argument of an unknown call.
}
- // If the Callee is not ReadNone, it may read the global,
- // and if it is not ReadOnly, it may also write to it.
- Function *CalleeF = CS.getCalledFunction();
- if (!CalleeF->doesNotAccessMemory()) {
- if (Readers)
- Readers->insert(CalleeF);
- if (Writers && !CalleeF->onlyReadsMemory())
- Writers->insert(CalleeF);
- }
}
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
@@ -516,7 +507,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
if (F->isDeclaration()) {
// Try to get mod/ref behaviour from function attributes.
- if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) {
+ if (F->doesNotAccessMemory()) {
// Can't do better than that!
} else if (F->onlyReadsMemory()) {
FI.addModRefInfo(MRI_Ref);
@@ -524,12 +515,6 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// This function might call back into the module and read a global -
// consider every global as possibly being read by this function.
FI.setMayReadAnyGlobal();
- } else if (F->onlyAccessesArgMemory() ||
- F->onlyAccessesInaccessibleMemOrArgMem()) {
- // This function may only access (read/write) memory pointed to by its
- // arguments. If this pointer is to a global, this escaping use of the
- // pointer is captured in AnalyzeUsesOfPointer().
- FI.addModRefInfo(MRI_ModRef);
} else {
FI.addModRefInfo(MRI_ModRef);
// Can't say anything useful unless it's an intrinsic - they don't
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index b19ecad..9e896ae 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -187,13 +187,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, AllocLike, TLI, LookThroughBitCast);
}
-/// \brief Tests if a value is a call or invoke to a library function that
-/// allocates memory and never returns null (such as operator new).
-bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
- bool LookThroughBitCast) {
- return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
-}
-
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3e80bfe..6918360 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -477,7 +477,7 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
// being 42. A key property of this program however is that if either
// 1 or 4 were missing, there would be a race between the store of 42
// either the store of 0 or the load (making the whole progam racy).
- // The paper mentionned above shows that the same property is respected
+ // The paper mentioned above shows that the same property is respected
// by every program that can detect any optimisation of that kind: either
// it is racy (undefined) or there is a release followed by an acquire
// between the pair of accesses under consideration.
@@ -685,13 +685,13 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
return MemDepResult::getDef(Inst);
if (isInvariantLoad)
continue;
- // Be conservative if the accessed pointer may alias the allocation.
- if (AA->alias(Inst, AccessPtr) != NoAlias)
- return MemDepResult::getClobber(Inst);
- // If the allocation is not aliased and does not read memory (like
- // strdup), it is safe to ignore.
- if (isa<AllocaInst>(Inst) ||
- isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
+ // Be conservative if the accessed pointer may alias the allocation -
+ // fallback to the generic handling below.
+ if ((AA->alias(Inst, AccessPtr) == NoAlias) &&
+ // If the allocation is not aliased and does not read memory (like
+ // strdup), it is safe to ignore.
+ (isa<AllocaInst>(Inst) || isMallocLikeFn(Inst, TLI) ||
+ isCallocLikeFn(Inst, TLI)))
continue;
}
@@ -792,10 +792,8 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
int Count = -1) {
if (Count == -1) Count = Cache.size();
- if (Count == 0) return;
-
- for (unsigned i = 1; i != unsigned(Count); ++i)
- assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
+ assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
+ "Cache isn't sorted!");
}
#endif
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index e00f4ae..ce38819 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -52,14 +52,13 @@ static bool hasSinCosPiStret(const Triple &T) {
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
- const char *const *StandardNames) {
-#ifndef NDEBUG
+ ArrayRef<const char *> StandardNames) {
// Verify that the StandardNames array is in alphabetical order.
- for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
- if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
- llvm_unreachable("TargetLibraryInfoImpl function names must be sorted");
- }
-#endif // !NDEBUG
+ assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
+ [](const char *LHS, const char *RHS) {
+ return strcmp(LHS, RHS) < 0;
+ }) &&
+ "TargetLibraryInfoImpl function names must be sorted");
if (T.getArch() == Triple::r600 ||
T.getArch() == Triple::amdgcn) {
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 314ec9c..abc57ed 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -1743,9 +1743,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
return false;
Value *X = nullptr, *Y = nullptr;
- // A shift of a power of two is a power of two or zero.
+ // A shift left or a logical shift right of a power of two is a power of two
+ // or zero.
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
- match(V, m_Shr(m_Value(X), m_Value()))))
+ match(V, m_LShr(m_Value(X), m_Value()))))
return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
@@ -2829,7 +2830,12 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const DataLayout &DL) {
unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
APInt ByteOffset(BitWidth, 0);
- while (1) {
+
+ // We walk up the defs but use a visited set to handle unreachable code. In
+ // that case, we stop after accumulating the cycle once (not that it
+ // matters).
+ SmallPtrSet<Value *, 16> Visited;
+ while (Visited.insert(Ptr).second) {
if (Ptr->getType()->isVectorTy())
break;
@@ -3268,12 +3274,9 @@ static bool isDereferenceableAndAlignedPointer(
}
// For gc.relocate, look through relocations
- if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
- if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
- GCRelocateOperands RelocateInst(I);
- return isDereferenceableAndAlignedPointer(
- RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
- }
+ if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
+ return isDereferenceableAndAlignedPointer(
+ RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
@@ -3474,10 +3477,6 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (CS.isReturnNonNull())
return true;
- // operator new never returns null.
- if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
- return true;
-
return false;
}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 2e670d5..c7606fd 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3071,7 +3071,12 @@ void BitcodeReader::saveMetadataList(
for (unsigned ID = 0; ID < MetadataList.size(); ++ID) {
Metadata *MD = MetadataList[ID];
auto *N = dyn_cast_or_null<MDNode>(MD);
+ assert((!N || (N->isResolved() || N->isTemporary())) &&
+ "Found non-resolved non-temp MDNode while saving metadata");
// Save all values if !OnlyTempMD, otherwise just the temporary metadata.
+ // Note that in the !OnlyTempMD case we need to save all Metadata, not
+ // just MDNode, as we may have references to other types of module-level
+ // metadata (e.g. ValueAsMetadata) from instructions.
if (!OnlyTempMD || (N && N->isTemporary())) {
// Will call this after materializing each function, in order to
// handle remapping of the function's instructions/metadata.
@@ -3080,6 +3085,11 @@ void BitcodeReader::saveMetadataList(
assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id");
continue;
}
+ if (N && N->isTemporary())
+ // Ensure that we assert if someone tries to RAUW this temporary
+ // metadata while it is the key of a map. The flag will be set back
+ // to true when the saved metadata list is destroyed.
+ N->setCanReplace(false);
MetadataToIDs[MD] = ID;
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 48b7104..4da5b58 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -976,32 +976,32 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
}
}
-static int getRank(const WinEHFuncInfo &FuncInfo, int State) {
+static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) {
int Rank = 0;
while (State != -1) {
++Rank;
- State = FuncInfo.ClrEHUnwindMap[State].Parent;
+ State = FuncInfo.ClrEHUnwindMap[State].TryParentState;
}
return Rank;
}
-static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
- int LeftRank = getRank(FuncInfo, Left);
- int RightRank = getRank(FuncInfo, Right);
+static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
+ int LeftRank = getTryRank(FuncInfo, Left);
+ int RightRank = getTryRank(FuncInfo, Right);
while (LeftRank < RightRank) {
- Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+ Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
--RightRank;
}
while (RightRank < LeftRank) {
- Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+ Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
--LeftRank;
}
while (Left != Right) {
- Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
- Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+ Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
+ Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
}
return Left;
@@ -1035,9 +1035,9 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
HandlerStates[HandlerBlock] = State;
// Use this loop through all handlers to verify our assumption (used in
- // the MinEnclosingState computation) that ancestors have lower state
- // numbers than their descendants.
- assert(FuncInfo.ClrEHUnwindMap[State].Parent < State &&
+ // the MinEnclosingState computation) that enclosing funclets have lower
+ // state numbers than their enclosed funclets.
+ assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State &&
"ill-formed state numbering");
}
// Map the main function to the NullState.
@@ -1070,7 +1070,6 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
// Visit the root function and each funclet.
-
for (MachineFunction::const_iterator FuncletStart = MF->begin(),
FuncletEnd = MF->begin(),
End = MF->end();
@@ -1100,17 +1099,18 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
for (const auto &StateChange :
InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
// Close any try regions we're not still under
- int AncestorState =
- getAncestor(FuncInfo, CurrentState, StateChange.NewState);
- while (CurrentState != AncestorState) {
- assert(CurrentState != NullState && "Failed to find ancestor!");
+ int StillPendingState =
+ getTryAncestor(FuncInfo, CurrentState, StateChange.NewState);
+ while (CurrentState != StillPendingState) {
+ assert(CurrentState != NullState &&
+ "Failed to find still-pending state!");
// Close the pending clause
Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
CurrentState, FuncletState});
- // Now the parent handler is current
- CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent;
+ // Now the next-outer try region is current
+ CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState;
// Pop the new start label from the handler stack if we've exited all
- // descendants of the corresponding handler.
+ // inner try regions of the corresponding try region.
if (HandlerStack.back().second == CurrentState)
CurrentStartLabel = HandlerStack.pop_back_val().first;
}
@@ -1121,7 +1121,8 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
// it.
for (int EnteredState = StateChange.NewState;
EnteredState != CurrentState;
- EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) {
+ EnteredState =
+ FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) {
int &MinEnclosingState = MinClauseMap[EnteredState];
if (FuncletState < MinEnclosingState)
MinEnclosingState = FuncletState;
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 5844124..6fbdea8 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -225,8 +225,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
- for (Function::iterator I = F.begin(); I != F.end(); I++)
- EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
+ BasicBlock* BB = &*F.begin();
+ while (BB != nullptr) {
+ // bypassSlowDivision may create new BBs, but we don't want to reapply the
+ // optimization to those blocks.
+ BasicBlock* Next = BB->getNextNode();
+ EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+ BB = Next;
+ }
}
// Eliminate blocks that contain only PHI nodes and an
@@ -526,19 +532,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
// Computes a map of base pointer relocation instructions to corresponding
// derived pointer relocation instructions given a vector of all relocate calls
static void computeBaseDerivedRelocateMap(
- const SmallVectorImpl<User *> &AllRelocateCalls,
- DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
- RelocateInstMap) {
+ const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
+ DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
+ &RelocateInstMap) {
// Collect information in two maps: one primarily for locating the base object
// while filling the second map; the second map is the final structure holding
// a mapping between Base and corresponding Derived relocate calls
- DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
- for (auto &U : AllRelocateCalls) {
- GCRelocateOperands ThisRelocate(U);
- IntrinsicInst *I = cast<IntrinsicInst>(U);
- auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
- ThisRelocate.getDerivedPtrIndex());
- RelocateIdxMap.insert(std::make_pair(K, I));
+ DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
+ for (auto *ThisRelocate : AllRelocateCalls) {
+ auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
+ ThisRelocate->getDerivedPtrIndex());
+ RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
}
for (auto &Item : RelocateIdxMap) {
std::pair<unsigned, unsigned> Key = Item.first;
@@ -546,7 +550,7 @@ static void computeBaseDerivedRelocateMap(
// Base relocation: nothing to insert
continue;
- IntrinsicInst *I = Item.second;
+ GCRelocateInst *I = Item.second;
auto BaseKey = std::make_pair(Key.first, Key.first);
// We're iterating over RelocateIdxMap so we cannot modify it.
@@ -579,16 +583,13 @@ static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
// replace, computes a replacement, and affects it.
static bool
-simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
- const SmallVectorImpl<IntrinsicInst *> &Targets) {
+simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
+ const SmallVectorImpl<GCRelocateInst *> &Targets) {
bool MadeChange = false;
- for (auto &ToReplace : Targets) {
- GCRelocateOperands MasterRelocate(RelocatedBase);
- GCRelocateOperands ThisRelocate(ToReplace);
-
- assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
+ for (GCRelocateInst *ToReplace : Targets) {
+ assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
"Not relocating a derived object of the original base object");
- if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
+ if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
// A duplicate relocate call. TODO: coalesce duplicates.
continue;
}
@@ -601,8 +602,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
continue;
}
- Value *Base = ThisRelocate.getBasePtr();
- auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
+ Value *Base = ToReplace->getBasePtr();
+ auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
continue;
@@ -680,12 +681,12 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
// %val = load %ptr'
bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
bool MadeChange = false;
- SmallVector<User *, 2> AllRelocateCalls;
+ SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
for (auto *U : I.users())
- if (isGCRelocate(dyn_cast<Instruction>(U)))
+ if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
// Collect all the relocate calls associated with a statepoint
- AllRelocateCalls.push_back(U);
+ AllRelocateCalls.push_back(Relocate);
// We need atleast one base pointer relocation + one derived pointer
// relocation to mangle
@@ -694,7 +695,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
// RelocateInstMap is a mapping from the base relocate instruction to the
// corresponding derived relocate instructions
- DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
+ DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
if (RelocateInstMap.empty())
return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 021707b..aad376c 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -122,8 +122,7 @@ INITIALIZE_PASS_END(MachineCSE, "machine-cse",
bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
MachineBasicBlock *MBB) {
bool Changed = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -186,8 +185,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return true;
bool SeenDef = false;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = I->getOperand(i);
+ for (const MachineOperand &MO : I->operands()) {
if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
SeenDef = true;
if (!MO.isReg() || !MO.getReg())
@@ -220,8 +218,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
SmallVectorImpl<unsigned> &PhysDefs,
bool &PhysUseDef) const{
// First, add all uses to PhysRefs.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef())
continue;
unsigned Reg = MO.getReg();
@@ -239,8 +236,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
// (which currently contains only uses), set the PhysUseDef flag.
PhysUseDef = false;
MachineBasicBlock::const_iterator I = MI; I = std::next(I);
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
@@ -311,8 +307,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
if (I == E)
return true;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = I->getOperand(i);
+ for (const MachineOperand &MO : I->operands()) {
// RegMasks go on instructions like calls that clobber lots of physregs.
// Don't attempt to CSE across such an instruction.
if (MO.isRegMask())
@@ -398,8 +393,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// Heuristics #2: If the expression doesn't not use a vr and the only use
// of the redundant computation are copies, do not cse.
bool HasVRegUse = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isUse() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
HasVRegUse = true;
@@ -580,9 +574,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Actually perform the elimination.
if (DoCSE) {
- for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
- unsigned OldReg = CSEPairs[i].first;
- unsigned NewReg = CSEPairs[i].second;
+ for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
+ unsigned OldReg = CSEPair.first;
+ unsigned NewReg = CSEPair.second;
// OldReg may have been unused but is used now, clear the Dead flag
MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
assert(Def != nullptr && "CSEd register has no unique definition?");
@@ -594,8 +588,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
// we should make sure it is not dead at CSMI.
- for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
- CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+ for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
+ CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
// Go through implicit defs of CSMI and MI, and clear the kill flags on
// their uses in all the instructions between CSMI and MI.
@@ -685,18 +679,14 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
Node = WorkList.pop_back_val();
Scopes.push_back(Node);
const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
- unsigned NumChildren = Children.size();
- OpenChildren[Node] = NumChildren;
- for (unsigned i = 0; i != NumChildren; ++i) {
- MachineDomTreeNode *Child = Children[i];
+ OpenChildren[Node] = Children.size();
+ for (MachineDomTreeNode *Child : Children)
WorkList.push_back(Child);
- }
} while (!WorkList.empty());
// Now perform CSE.
bool Changed = false;
- for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
- MachineDomTreeNode *Node = Scopes[i];
+ for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
Changed |= ProcessBlock(MBB);
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 1eb2edc..6b8eecc 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -866,6 +866,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
setMemRefs(NewMemRefs, NewMemRefs + NewNum);
}
+std::pair<MachineInstr::mmo_iterator, unsigned>
+MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
+ // TODO: If we end up with too many memory operands, return the empty
+ // conservative set rather than failing asserts.
+ // TODO: consider uniquing elements within the operand lists to reduce
+ // space usage and fall back to conservative information less often.
+ size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin())
+ + (Other.memoperands_end() - Other.memoperands_begin());
+
+ MachineFunction *MF = getParent()->getParent();
+ mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
+ mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
+ MemBegin);
+ MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
+ MemEnd);
+ assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
+ "missing memrefs");
+
+ return std::make_pair(MemBegin, CombinedNumMemRefs);
+}
+
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
@@ -1738,7 +1759,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool HaveSemi = false;
const unsigned PrintableFlags = FrameSetup | FrameDestroy;
if (Flags & PrintableFlags) {
- if (!HaveSemi) OS << ";"; HaveSemi = true;
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
OS << " flags: ";
if (Flags & FrameSetup)
@@ -1749,7 +1773,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (!memoperands_empty()) {
- if (!HaveSemi) OS << ";"; HaveSemi = true;
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
OS << " mem:";
for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
@@ -1762,7 +1789,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Print the regclass of any virtual registers encountered.
if (MRI && !VirtRegs.empty()) {
- if (!HaveSemi) OS << ";"; HaveSemi = true;
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
OS << " " << TRI->getRegClassName(RC)
@@ -1781,7 +1811,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Print debug location information.
if (isDebugValue() && getOperand(e - 2).isMetadata()) {
- if (!HaveSemi) OS << ";";
+ if (!HaveSemi)
+ OS << ";";
auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
OS << " line no:" << DV->getLine();
if (auto *InlinedAt = debugLoc->getInlinedAt()) {
@@ -1795,7 +1826,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (isIndirectDebugValue())
OS << " indirect";
} else if (debugLoc && MF) {
- if (!HaveSemi) OS << ";";
+ if (!HaveSemi)
+ OS << ";";
OS << " dbg:";
debugLoc.print(OS);
}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 3eaf4c5..4619daf 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -315,7 +315,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
if (!TRI->regsOverlap(MOReg, Reg))
continue;
- bool Covered = TRI->isSuperRegisterEq(MOReg, Reg);
+ bool Covered = TRI->isSuperRegisterEq(Reg, MOReg);
if (MO.readsReg()) {
PRI.Read = true;
if (Covered) {
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 8382b09..3749b1d 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -97,9 +97,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
CurrSetPressure[*PSetI] += Weight;
- if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) {
- P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI];
- }
+ P.MaxSetPressure[*PSetI] =
+ std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0872d7a..bc2405b9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
+ // The original load itself didn't wrap, so an offset within it doesn't.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, DL, PtrType));
+ DAG.getConstant(PtrOff, DL, PtrType),
+ &Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index abbc48e..96bf914 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2843,6 +2843,43 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
return (AZero | BZero).isAllOnesValue();
}
+static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops,
+ llvm::SelectionDAG &DAG) {
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ // Concat of UNDEFs is UNDEF.
+ if (std::all_of(Ops.begin(), Ops.end(),
+ [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified
+ // to one big BUILD_VECTOR.
+ // FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well.
+ if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) {
+ return Op.getOpcode() == ISD::BUILD_VECTOR;
+ }))
+ return SDValue();
+
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 16> Elts;
+ for (SDValue Op : Ops)
+ Elts.append(Op->op_begin(), Op->op_end());
+
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ for (SDValue Op : Elts)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+
+ if (SVT.bitsGT(VT.getScalarType()))
+ for (SDValue &Op : Elts)
+ Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, DL, SVT)
+ : DAG.getSExtOrTrunc(Op, DL, SVT);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
+}
+
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
@@ -3426,34 +3463,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
if (N2.getOpcode() == ISD::EntryToken) return N1;
if (N1 == N2) return N1;
break;
- case ISD::CONCAT_VECTORS:
- // Concat of UNDEFs is UNDEF.
- if (N1.getOpcode() == ISD::UNDEF &&
- N2.getOpcode() == ISD::UNDEF)
- return getUNDEF(VT);
-
- // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
- // one big BUILD_VECTOR.
- if (N1.getOpcode() == ISD::BUILD_VECTOR &&
- N2.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
- N1.getNode()->op_end());
- Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
-
- // BUILD_VECTOR requires all inputs to be of the same type, find the
- // maximum type and extend them all.
- EVT SVT = VT.getScalarType();
- for (SDValue Op : Elts)
- SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
- if (SVT.bitsGT(VT.getScalarType()))
- for (SDValue &Op : Elts)
- Op = TLI->isZExtFree(Op.getValueType(), SVT)
- ? getZExtOrTrunc(Op, DL, SVT)
- : getSExtOrTrunc(Op, DL, SVT);
-
- return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
- }
+ case ISD::CONCAT_VECTORS: {
+ // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
break;
+ }
case ISD::AND:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
@@ -3911,19 +3927,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
break;
}
- case ISD::CONCAT_VECTORS:
- // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
- // one big BUILD_VECTOR.
- if (N1.getOpcode() == ISD::BUILD_VECTOR &&
- N2.getOpcode() == ISD::BUILD_VECTOR &&
- N3.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
- N1.getNode()->op_end());
- Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
- Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
- return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
- }
+ case ISD::CONCAT_VECTORS: {
+ // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
break;
+ }
case ISD::SETCC: {
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
@@ -5462,6 +5472,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
switch (Opcode) {
default: break;
+ case ISD::CONCAT_VECTORS: {
+ // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+ if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
case ISD::SELECT_CC: {
assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
assert(Ops[0].getValueType() == Ops[1].getValueType() &&
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d2ea85ab..e446a93 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
SmallVector<SDValue, 4> Chains(NumValues);
for (unsigned i = 0; i != NumValues; ++i) {
SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
RetPtr.getValueType(), RetPtr,
DAG.getIntPtrConstant(Offsets[i],
- getCurSDLoc()));
+ getCurSDLoc()),
+ &Flags);
Chains[i] =
DAG.getStore(Chain, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (Field) {
// N = N + Offset
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+
+ // In an inbouds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
- DAG.getConstant(Offset, dl, N.getValueType()));
+ DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
}
Ty = StTy->getElementType(Field);
@@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue OffsVal = VectorWidth ?
DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
DAG.getConstant(Offs, dl, PtrTy);
- N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
+
+ // In an inbouds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
continue;
}
@@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
Align = 0;
// Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size.
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
AllocSize = DAG.getNode(ISD::ADD, dl,
AllocSize.getValueType(), AllocSize,
- DAG.getIntPtrConstant(StackAlign - 1, dl));
+ DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
// Mask out the low bits for alignment purposes.
AllocSize = DAG.getNode(ISD::AND, dl,
@@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile)
Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
+ // An aggregate load cannot wrap around the address space, so offsets to its
+ // parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
EVT PtrVT = Ptr.getValueType();
@@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
SDValue A = DAG.getNode(ISD::ADD, dl,
PtrVT, Ptr,
- DAG.getConstant(Offsets[i], dl, PtrVT));
+ DAG.getConstant(Offsets[i], dl, PtrVT),
+ &Flags);
SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
isNonTemporal, isInvariant, Alignment, AAInfo,
@@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
+ // An aggregate load cannot wrap around the address space, so offsets to its
+ // parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// See visitLoad comments.
@@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
ChainI = 0;
}
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
- DAG.getConstant(Offsets[i], dl, PtrVT));
+ DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
SDValue St = DAG.getStore(Root, dl,
SDValue(Src.getNode(), Src.getResNo() + i),
Add, MachinePointerInfo(PtrV, Offsets[i]),
@@ -5189,7 +5223,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::experimental_gc_relocate: {
- visitGCRelocate(I);
+ visitGCRelocate(cast<GCRelocateInst>(I));
return nullptr;
}
case Intrinsic::instrprof_increment:
@@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ReturnValues.resize(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
- PtrVT));
+ PtrVT), &Flags);
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 49a3872..8fb85ff 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -855,7 +855,7 @@ private:
// These three are implemented in StatepointLowering.cpp
void visitStatepoint(const CallInst &I);
- void visitGCRelocate(const CallInst &I);
+ void visitGCRelocate(const GCRelocateInst &I);
void visitGCResult(const CallInst &I);
void visitUserOp1(const Instruction &I) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 853a21a..9f8759d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -633,6 +633,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MRI.replaceRegWith(From, To);
}
+ if (TLI->hasCopyImplyingStackAdjustment(MF))
+ MFI->setHasOpaqueSPAdjustment(true);
+
// Freeze the set of reserved registers now that MachineFrameInfo has been
// set up. All the information required by getReservedRegs() should be
// available now.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 050ec21..6547a62 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -128,13 +128,11 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
return Optional<int>();
// Spill location is known for gc relocates
- if (isGCRelocate(Val)) {
- GCRelocateOperands RelocOps(cast<Instruction>(Val));
-
+ if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
- Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()];
+ Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()];
- auto It = SpillMap.find(RelocOps.getDerivedPtr());
+ auto It = SpillMap.find(Relocate->getDerivedPtr());
if (It == SpillMap.end())
return Optional<int>();
@@ -401,10 +399,10 @@ static void getIncomingStatepointGCValues(
SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
SelectionDAGBuilder &Builder) {
- for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) {
- Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
- Bases.push_back(relocateOpers.getBasePtr());
- Ptrs.push_back(relocateOpers.getDerivedPtr());
+ for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
+ Relocs.push_back(Relocate);
+ Bases.push_back(Relocate->getBasePtr());
+ Ptrs.push_back(Relocate->getDerivedPtr());
}
// Remove any redundant llvm::Values which map to the same SDValue as another
@@ -602,8 +600,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];
- for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) {
- const Value *V = RelocateOpers.getDerivedPtr();
+ for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
+ const Value *V = Relocate->getDerivedPtr();
SDValue SDV = Builder.getValue(V);
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
@@ -624,8 +622,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// uses of the corresponding values so that it would automatically
// export them. Relocates of the spilled values does not use original
// value.
- if (RelocateOpers.getUnderlyingCallSite().getParent() !=
- StatepointInstr->getParent())
+ if (Relocate->getParent() != StatepointInstr->getParent())
Builder.ExportFromCurrentBlock(V);
}
}
@@ -656,7 +653,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// statepoint.
for (const User *U : CS->users()) {
const CallInst *Call = cast<CallInst>(U);
- if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
+ if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent())
StatepointLowering.scheduleRelocCall(*Call);
}
#endif
@@ -859,24 +856,22 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
}
}
-void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
- GCRelocateOperands RelocateOpers(&CI);
-
+void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
#ifndef NDEBUG
// Consistency check
// We skip this check for relocates not in the same basic block as thier
// statepoint. It would be too expensive to preserve validation info through
// different basic blocks.
- if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
- StatepointLowering.relocCallVisited(CI);
+ if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) {
+ StatepointLowering.relocCallVisited(Relocate);
}
#endif
- const Value *DerivedPtr = RelocateOpers.getDerivedPtr();
+ const Value *DerivedPtr = Relocate.getDerivedPtr();
SDValue SD = getValue(DerivedPtr);
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
- FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()];
+ FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()];
// We should have recorded location for this pointer
assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value");
@@ -885,7 +880,7 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
// We didn't need to spill these special cases (constants and allocas).
// See the handling in spillIncomingValueForStatepoint for detail.
if (!DerivedPtrLocation) {
- setValue(&CI, SD);
+ setValue(&Relocate, SD);
return;
}
@@ -907,5 +902,5 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
DAG.setRoot(SpillLoad.getValue(1));
assert(SpillLoad.getNode());
- setValue(&CI, SpillLoad);
+ setValue(&Relocate, SpillLoad);
}
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index fc65639..1c4558c 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -212,7 +212,7 @@ unsigned TargetSchedModel::computeOperandLatency(
&& !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
&& SchedModel.isComplete()) {
errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
- << *DefMI;
+ << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
llvm_unreachable("incomplete machine model");
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 52fb922..2426c27 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -17,11 +17,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -435,11 +438,12 @@ void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState,
- ClrHandlerType HandlerType, uint32_t TypeToken,
- const BasicBlock *Handler) {
+static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
+ int TryParentState, ClrHandlerType HandlerType,
+ uint32_t TypeToken, const BasicBlock *Handler) {
ClrEHUnwindMapEntry Entry;
- Entry.Parent = ParentState;
+ Entry.HandlerParentState = HandlerParentState;
+ Entry.TryParentState = TryParentState;
Entry.Handler = Handler;
Entry.HandlerType = HandlerType;
Entry.TypeToken = TypeToken;
@@ -453,82 +457,199 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
if (!FuncInfo.EHPadStateMap.empty())
return;
+ // This numbering assigns one state number to each catchpad and cleanuppad.
+ // It also computes two tree-like relations over states:
+ // 1) Each state has a "HandlerParentState", which is the state of the next
+ // outer handler enclosing this state's handler (same as nearest ancestor
+ // per the ParentPad linkage on EH pads, but skipping over catchswitches).
+ // 2) Each state has a "TryParentState", which:
+ // a) for a catchpad that's not the last handler on its catchswitch, is
+ // the state of the next catchpad on that catchswitch
+ // b) for all other pads, is the state of the pad whose try region is the
+ // next outer try region enclosing this state's try region. The "try
+ // regions are not present as such in the IR, but will be inferred
+ // based on the placement of invokes and pads which reach each other
+ // by exceptional exits
+ // Catchswitches do not get their own states, but each gets mapped to the
+ // state of its first catchpad.
+
+ // Step one: walk down from outermost to innermost funclets, assigning each
+ // catchpad and cleanuppad a state number. Add an entry to the
+ // ClrEHUnwindMap for each state, recording its HandlerParentState and
+ // handler attributes. Record the TryParentState as well for each catchpad
+ // that's not the last on its catchswitch, but initialize all other entries'
+ // TryParentStates to a sentinel -1 value that the next pass will update.
+
+ // Seed a worklist with pads that have no parent.
SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
-
- // Each pad needs to be able to refer to its parent, so scan the function
- // looking for top-level handlers and seed the worklist with them.
for (const BasicBlock &BB : *Fn) {
- if (!BB.isEHPad())
- continue;
- if (BB.isLandingPad())
- report_fatal_error("CoreCLR EH cannot use landingpads");
const Instruction *FirstNonPHI = BB.getFirstNonPHI();
- if (!isTopLevelPadForMSVC(FirstNonPHI))
+ const Value *ParentPad;
+ if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI))
+ ParentPad = CPI->getParentPad();
+ else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI))
+ ParentPad = CSI->getParentPad();
+ else
continue;
- // queue this with sentinel parent state -1 to mean unwind to caller.
- Worklist.emplace_back(FirstNonPHI, -1);
+ if (isa<ConstantTokenNone>(ParentPad))
+ Worklist.emplace_back(FirstNonPHI, -1);
}
+ // Use the worklist to visit all pads, from outer to inner. Record
+ // HandlerParentState for all pads. Record TryParentState only for catchpads
+ // that aren't the last on their catchswitch (setting all other entries'
+ // TryParentStates to an initial value of -1). This loop is also responsible
+ // for setting the EHPadStateMap entry for all catchpads, cleanuppads, and
+ // catchswitches.
while (!Worklist.empty()) {
const Instruction *Pad;
- int ParentState;
- std::tie(Pad, ParentState) = Worklist.pop_back_val();
-
- Value *ParentPad;
- int PredState;
- if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
- // A cleanup can have multiple exits; don't re-process after the first.
- if (FuncInfo.EHPadStateMap.count(Cleanup))
- continue;
- // CoreCLR personality uses arity to distinguish faults from finallies.
- const BasicBlock *PadBlock = Cleanup->getParent();
+ int HandlerParentState;
+ std::tie(Pad, HandlerParentState) = Worklist.pop_back_val();
+
+ if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
+ // Create the entry for this cleanup with the appropriate handler
+ // properties. Finaly and fault handlers are distinguished by arity.
ClrHandlerType HandlerType =
- (Cleanup->getNumOperands() ? ClrHandlerType::Fault
- : ClrHandlerType::Finally);
- int NewState =
- addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock);
- FuncInfo.EHPadStateMap[Cleanup] = NewState;
- // Propagate the new state to all preds of the cleanup
- ParentPad = Cleanup->getParentPad();
- PredState = NewState;
- } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
- SmallVector<const CatchPadInst *, 1> Handlers;
- for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
- const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
- Handlers.push_back(Catch);
- }
- FuncInfo.EHPadStateMap[CatchSwitch] = ParentState;
- int NewState = ParentState;
- for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend();
- HandlerI != HandlerE; ++HandlerI) {
- const CatchPadInst *Catch = *HandlerI;
- const BasicBlock *PadBlock = Catch->getParent();
+ (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
+ : ClrHandlerType::Finally);
+ int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1,
+ HandlerType, 0, Pad->getParent());
+ // Queue any child EH pads on the worklist.
+ for (const User *U : Cleanup->users())
+ if (const auto *I = dyn_cast<Instruction>(U))
+ if (I->isEHPad())
+ Worklist.emplace_back(I, CleanupState);
+ // Remember this pad's state.
+ FuncInfo.EHPadStateMap[Cleanup] = CleanupState;
+ } else {
+ // Walk the handlers of this catchswitch in reverse order since all but
+ // the last need to set the following one as its TryParentState.
+ const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
+ int CatchState = -1, FollowerState = -1;
+ SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
+ for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend();
+ CBI != CBE; ++CBI, FollowerState = CatchState) {
+ const BasicBlock *CatchBlock = *CBI;
+ // Create the entry for this catch with the appropriate handler
+ // properties.
+ const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
uint32_t TypeToken = static_cast<uint32_t>(
cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
- NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch,
- TypeToken, PadBlock);
- FuncInfo.EHPadStateMap[Catch] = NewState;
+ CatchState =
+ addClrEHHandler(FuncInfo, HandlerParentState, FollowerState,
+ ClrHandlerType::Catch, TypeToken, CatchBlock);
+ // Queue any child EH pads on the worklist.
+ for (const User *U : Catch->users())
+ if (const auto *I = dyn_cast<Instruction>(U))
+ if (I->isEHPad())
+ Worklist.emplace_back(I, CatchState);
+ // Remember this catch's state.
+ FuncInfo.EHPadStateMap[Catch] = CatchState;
}
- for (const auto *CatchPad : Handlers) {
- for (const User *U : CatchPad->users()) {
- const auto *UserI = cast<Instruction>(U);
- if (UserI->isEHPad())
- Worklist.emplace_back(UserI, ParentState);
+ // Associate the catchswitch with the state of its first catch.
+ assert(CatchSwitch->getNumHandlers());
+ FuncInfo.EHPadStateMap[CatchSwitch] = CatchState;
+ }
+ }
+
+ // Step two: record the TryParentState of each state. For cleanuppads that
+ // don't have cleanuprets, we may need to infer this from their child pads,
+ // so visit pads in descendant-most to ancestor-most order.
+ for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(),
+ End = FuncInfo.ClrEHUnwindMap.rend();
+ Entry != End; ++Entry) {
+ const Instruction *Pad =
+ Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI();
+ // For most pads, the TryParentState is the state associated with the
+ // unwind dest of exceptional exits from it.
+ const BasicBlock *UnwindDest;
+ if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) {
+ // If a catch is not the last in its catchswitch, its TryParentState is
+ // the state associated with the next catch in the switch, even though
+ // that's not the unwind dest of exceptions escaping the catch. Those
+ // cases were already assigned a TryParentState in the first pass, so
+ // skip them.
+ if (Entry->TryParentState != -1)
+ continue;
+ // Otherwise, get the unwind dest from the catchswitch.
+ UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
+ } else {
+ const auto *Cleanup = cast<CleanupPadInst>(Pad);
+ UnwindDest = nullptr;
+ for (const User *U : Cleanup->users()) {
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
+ // Common and unambiguous case -- cleanupret indicates cleanup's
+ // unwind dest.
+ UnwindDest = CleanupRet->getUnwindDest();
+ break;
+ }
+
+ // Get an unwind dest for the user
+ const BasicBlock *UserUnwindDest = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
+ UserUnwindDest = Invoke->getUnwindDest();
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) {
+ UserUnwindDest = CatchSwitch->getUnwindDest();
+ } else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) {
+ int UserState = FuncInfo.EHPadStateMap[ChildCleanup];
+ int UserUnwindState =
+ FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
+ if (UserUnwindState != -1)
+ UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState]
+ .Handler.get<const BasicBlock *>();
}
+
+ // Not having an unwind dest for this user might indicate that it
+ // doesn't unwind, so can't be taken as proof that the cleanup itself
+ // may unwind to caller (see e.g. SimplifyUnreachable and
+ // RemoveUnwindEdge).
+ if (!UserUnwindDest)
+ continue;
+
+ // Now we have an unwind dest for the user, but we need to see if it
+ // unwinds all the way out of the cleanup or if it stays within it.
+ const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI();
+ const Value *UserUnwindParent;
+ if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad))
+ UserUnwindParent = CSI->getParentPad();
+ else
+ UserUnwindParent =
+ cast<CleanupPadInst>(UserUnwindPad)->getParentPad();
+
+ // The unwind stays within the cleanup iff it targets a child of the
+ // cleanup.
+ if (UserUnwindParent == Cleanup)
+ continue;
+
+ // This unwind exits the cleanup, so its dest is the cleanup's dest.
+ UnwindDest = UserUnwindDest;
+ break;
}
- PredState = NewState;
- ParentPad = CatchSwitch->getParentPad();
- } else {
- llvm_unreachable("Unexpected EH pad");
}
- // Queue all predecessors with the given state
- for (const BasicBlock *Pred : predecessors(Pad->getParent())) {
- if ((Pred = getEHPadFromPredecessor(Pred, ParentPad)))
- Worklist.emplace_back(Pred->getFirstNonPHI(), PredState);
+ // Record the state of the unwind dest as the TryParentState.
+ int UnwindDestState;
+
+ // If UnwindDest is null at this point, either the pad in question can
+ // be exited by unwind to caller, or it cannot be exited by unwind. In
+ // either case, reporting such cases as unwinding to caller is correct.
+ // This can lead to EH tables that "look strange" -- if this pad's is in
+ // a parent funclet which has other children that do unwind to an enclosing
+ // pad, the try region for this pad will be missing the "duplicate" EH
+ // clause entries that you'd expect to see covering the whole parent. That
+ // should be benign, since the unwind never actually happens. If it were
+ // an issue, we could add a subsequent pass that pushes unwind dests down
+ // from parents that have them to children that appear to unwind to caller.
+ if (!UnwindDest) {
+ UnwindDestState = -1;
+ } else {
+ UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
}
+
+ Entry->TryParentState = UnwindDestState;
}
+ // Step three: transfer information from pads to invokes.
calculateStateNumbersForInvokes(Fn, FuncInfo);
}
@@ -597,6 +718,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
for (auto &Funclets : FuncletBlocks) {
BasicBlock *FuncletPadBB = Funclets.first;
std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
+ Value *FuncletToken;
+ if (FuncletPadBB == &F.getEntryBlock())
+ FuncletToken = ConstantTokenNone::get(F.getContext());
+ else
+ FuncletToken = FuncletPadBB->getFirstNonPHI();
std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
ValueToValueMapTy VMap;
@@ -668,15 +794,44 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
RemapInstruction(&I, VMap,
RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);
+ // Catchrets targeting cloned blocks need to be updated separately from
+ // the loop above because they are not in the current funclet.
+ SmallVector<CatchReturnInst *, 2> FixupCatchrets;
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+
+ FixupCatchrets.clear();
+ for (BasicBlock *Pred : predecessors(OldBlock))
+ if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
+ if (CatchRet->getParentPad() == FuncletToken)
+ FixupCatchrets.push_back(CatchRet);
+
+ for (CatchReturnInst *CatchRet : FixupCatchrets)
+ CatchRet->setSuccessor(NewBlock);
+ }
+
auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
unsigned NumPreds = PN->getNumIncomingValues();
for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
++PredIdx) {
BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
- ColorVector &IncomingColors = BlockColors[IncomingBlock];
- bool BlockInFunclet = IncomingColors.size() == 1 &&
- IncomingColors.front() == FuncletPadBB;
- if (IsForOldBlock != BlockInFunclet)
+ bool EdgeTargetsFunclet;
+ if (auto *CRI =
+ dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+ EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken);
+ } else {
+ ColorVector &IncomingColors = BlockColors[IncomingBlock];
+ assert(!IncomingColors.empty() && "Block not colored!");
+ assert((IncomingColors.size() == 1 ||
+ llvm::all_of(IncomingColors,
+ [&](BasicBlock *Color) {
+ return Color != FuncletPadBB;
+ })) &&
+ "Cloning should leave this funclet's blocks monochromatic");
+ EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
+ }
+ if (IsForOldBlock != EdgeTargetsFunclet)
continue;
PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
// Revisit the next entry.
@@ -864,7 +1019,6 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
}
void WinEHPrepare::verifyPreparedFunclets(Function &F) {
- // Recolor the CFG to verify that all is well.
for (BasicBlock &BB : F) {
size_t NumColors = BlockColors[&BB].size();
assert(NumColors == 1 && "Expected monochromatic BB!");
@@ -872,12 +1026,8 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) {
report_fatal_error("Uncolored BB!");
if (NumColors > 1)
report_fatal_error("Multicolor BB!");
- if (!DisableDemotion) {
- bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin());
- assert(!EHPadHasPHI && "EH Pad still has a PHI!");
- if (EHPadHasPHI)
- report_fatal_error("EH Pad still has a PHI!");
- }
+ assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) &&
+ "EH Pad still has a PHI!");
}
}
@@ -896,12 +1046,17 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {
demotePHIsOnFunclets(F);
if (!DisableCleanups) {
+ DEBUG(verifyFunction(F));
removeImplausibleInstructions(F);
+ DEBUG(verifyFunction(F));
cleanupPreparedFunclets(F);
}
- verifyPreparedFunclets(F);
+ DEBUG(verifyPreparedFunclets(F));
+ // Recolor the CFG to verify that all is well.
+ DEBUG(colorFunclets(F));
+ DEBUG(verifyPreparedFunclets(F));
BlockColors.clear();
FuncletBlocks.clear();
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index 185db47..1ebe9b7 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -2060,7 +2060,7 @@ private:
// printGCRelocateComment - print comment after call to the gc.relocate
// intrinsic indicating base and derived pointer names.
- void printGCRelocateComment(const Value &V);
+ void printGCRelocateComment(const GCRelocateInst &Relocate);
};
} // namespace
@@ -2722,14 +2722,11 @@ void AssemblyWriter::printInstructionLine(const Instruction &I) {
/// printGCRelocateComment - print comment after call to the gc.relocate
/// intrinsic indicating base and derived pointer names.
-void AssemblyWriter::printGCRelocateComment(const Value &V) {
- assert(isGCRelocate(&V));
- GCRelocateOperands GCOps(cast<Instruction>(&V));
-
+void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) {
Out << " ; (";
- writeOperand(GCOps.getBasePtr(), false);
+ writeOperand(Relocate.getBasePtr(), false);
Out << ", ";
- writeOperand(GCOps.getDerivedPtr(), false);
+ writeOperand(Relocate.getDerivedPtr(), false);
Out << ")";
}
@@ -2737,8 +2734,8 @@ void AssemblyWriter::printGCRelocateComment(const Value &V) {
/// which slot it occupies.
///
void AssemblyWriter::printInfoComment(const Value &V) {
- if (isGCRelocate(&V))
- printGCRelocateComment(V);
+ if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V))
+ printGCRelocateComment(*Relocate);
if (AnnotationWriter)
AnnotationWriter->printInfoComment(V, Out);
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index bcf7dc3..6c01bb6 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -641,14 +641,15 @@ AttributeSet AttributeSet::get(LLVMContext &C,
if (Attrs.empty())
return AttributeSet();
-#ifndef NDEBUG
- for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
- assert((!i || Attrs[i-1].first <= Attrs[i].first) &&
- "Misordered Attributes list!");
- assert(!Attrs[i].second.hasAttribute(Attribute::None) &&
- "Pointless attribute!");
- }
-#endif
+ assert(std::is_sorted(Attrs.begin(), Attrs.end(),
+ [](const std::pair<unsigned, Attribute> &LHS,
+ const std::pair<unsigned, Attribute> &RHS) {
+ return LHS.first < RHS.first;
+ }) && "Misordered Attributes list!");
+ assert(std::none_of(Attrs.begin(), Attrs.end(),
+ [](const std::pair<unsigned, Attribute> &Pair) {
+ return Pair.second.hasAttribute(Attribute::None);
+ }) && "Pointless attribute!");
// Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
// list.
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
index a0bd2c9..4b33d2e 100644
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -76,22 +76,21 @@ iplist<Instruction>::iterator Instruction::eraseFromParent() {
return getParent()->getInstList().erase(getIterator());
}
-/// insertBefore - Insert an unlinked instructions into a basic block
-/// immediately before the specified instruction.
+/// Insert an unlinked instruction into a basic block immediately before the
+/// specified instruction.
void Instruction::insertBefore(Instruction *InsertPos) {
InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this);
}
-/// insertAfter - Insert an unlinked instructions into a basic block
-/// immediately after the specified instruction.
+/// Insert an unlinked instruction into a basic block immediately after the
+/// specified instruction.
void Instruction::insertAfter(Instruction *InsertPos) {
InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(),
this);
}
-/// moveBefore - Unlink this instruction from its current basic block and
-/// insert it into the basic block that MovePos lives in, right before
-/// MovePos.
+/// Unlink this instruction from its current basic block and insert it into the
+/// basic block that MovePos lives in, right before MovePos.
void Instruction::moveBefore(Instruction *MovePos) {
MovePos->getParent()->getInstList().splice(
MovePos->getIterator(), getParent()->getInstList(), getIterator());
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index 4ae2fd5..7c64ca7 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -609,20 +609,6 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
return setSuccessor(idx, B);
}
-bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const {
- if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
- return true;
-
- // Operand bundles override attributes on the called function, but don't
- // override attributes directly present on the invoke instruction.
- if (isFnAttrDisallowedByOpBundle(A))
- return false;
-
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
- return false;
-}
-
bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
@@ -934,6 +920,17 @@ void CatchSwitchInst::addHandler(BasicBlock *Handler) {
getOperandList()[OpNo] = Handler;
}
+void CatchSwitchInst::removeHandler(handler_iterator HI) {
+ // Move all subsequent handlers up one.
+ Use *EndDst = op_end() - 1;
+ for (Use *CurDst = HI.getCurrent(); CurDst != EndDst; ++CurDst)
+ *CurDst = *(CurDst + 1);
+ // Null out the last handler use.
+ *EndDst = nullptr;
+
+ setNumHungOffUseOperands(getNumOperands() - 1);
+}
+
BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const {
return getSuccessor(idx);
}
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index ab1ba5e..d8eaceb 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -190,6 +190,8 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New,
void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
assert(!(MD && isa<MDNode>(MD) && cast<MDNode>(MD)->isTemporary()) &&
"Expected non-temp node");
+ assert(CanReplace &&
+ "Attempted to replace Metadata marked for no replacement");
if (UseMap.empty())
return;
@@ -555,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() {
resolve();
}
-void MDNode::resolveCycles(bool MDMaterialized) {
+void MDNode::resolveCycles(bool AllowTemps) {
if (isResolved())
return;
@@ -568,7 +570,7 @@ void MDNode::resolveCycles(bool MDMaterialized) {
if (!N)
continue;
- if (N->isTemporary() && !MDMaterialized)
+ if (N->isTemporary() && AllowTemps)
continue;
assert(!N->isTemporary() &&
"Expected all forward declarations to be resolved");
diff --git a/contrib/llvm/lib/IR/Statepoint.cpp b/contrib/llvm/lib/IR/Statepoint.cpp
index d45c188..27a990e 100644
--- a/contrib/llvm/lib/IR/Statepoint.cpp
+++ b/contrib/llvm/lib/IR/Statepoint.cpp
@@ -40,20 +40,7 @@ bool llvm::isStatepoint(const Value &inst) {
}
bool llvm::isGCRelocate(const ImmutableCallSite &CS) {
- if (!CS.getInstruction()) {
- // This is not a call site
- return false;
- }
-
- return isGCRelocate(CS.getInstruction());
-}
-bool llvm::isGCRelocate(const Value *inst) {
- if (const CallInst *call = dyn_cast<CallInst>(inst)) {
- if (const Function *F = call->getCalledFunction()) {
- return F->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
- }
- }
- return false;
+ return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction());
}
bool llvm::isGCResult(const ImmutableCallSite &CS) {
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 81c87e4..6dfb05d 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -1657,14 +1657,14 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
const CallInst *Call = dyn_cast<const CallInst>(U);
Assert(Call, "illegal use of statepoint token", &CI, U);
if (!Call) continue;
- Assert(isGCRelocate(Call) || isGCResult(Call),
+ Assert(isa<GCRelocateInst>(Call) || isGCResult(Call),
"gc.result or gc.relocate are the only value uses"
"of a gc.statepoint",
&CI, U);
if (isGCResult(Call)) {
Assert(Call->getArgOperand(0) == &CI,
"gc.result connected to wrong gc.statepoint", &CI, Call);
- } else if (isGCRelocate(Call)) {
+ } else if (isa<GCRelocateInst>(Call)) {
Assert(Call->getArgOperand(0) == &CI,
"gc.relocate connected to wrong gc.statepoint", &CI, Call);
}
@@ -3019,8 +3019,7 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
&CPI);
auto *ParentPad = CPI.getParentPad();
- Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
- isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+ Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
"CleanupPadInst has an invalid parent.", &CPI);
User *FirstUser = nullptr;
@@ -3077,10 +3076,17 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
}
auto *ParentPad = CatchSwitch.getParentPad();
- Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
- isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+ Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
"CatchSwitchInst has an invalid parent.", ParentPad);
+ Assert(CatchSwitch.getNumHandlers() != 0,
+ "CatchSwitchInst cannot have empty handler list", &CatchSwitch);
+
+ for (BasicBlock *Handler : CatchSwitch.handlers()) {
+ Assert(isa<CatchPadInst>(Handler->getFirstNonPHI()),
+ "CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler);
+ }
+
visitTerminatorInst(CatchSwitch);
}
@@ -3675,8 +3681,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// Verify rest of the relocate arguments
- GCRelocateOperands Ops(CS);
- ImmutableCallSite StatepointCS(Ops.getStatepoint());
+ ImmutableCallSite StatepointCS(
+ cast<GCRelocateInst>(*CS.getInstruction()).getStatepoint());
// Both the base and derived must be piped through the safepoint
Value* Base = CS.getArgOperand(1);
@@ -3731,14 +3737,14 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// Relocated value must be a pointer type, but gc_relocate does not need to return the
// same pointer type as the relocated pointer. It can be casted to the correct type later
// if it's desired. However, they must have the same address space.
- GCRelocateOperands Operands(CS);
- Assert(Operands.getDerivedPtr()->getType()->isPointerTy(),
+ GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
+ Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(),
"gc.relocate: relocated value must be a gc pointer", CS);
// gc_relocate return type must be a pointer type, and is verified earlier in
// VerifyIntrinsicType().
Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
- cast<PointerType>(Operands.getDerivedPtr()->getType())->getAddressSpace(),
+ cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(),
"gc.relocate: relocating a pointer shouldn't change its address space", CS);
break;
}
diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp
index fa6e375..309690f 100644
--- a/contrib/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@@ -524,6 +524,23 @@ public:
ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata;
}
+ ~IRLinker() {
+ // In the case where we are not linking metadata, we unset the CanReplace
+ // flag on all temporary metadata in the MetadataToIDs map to ensure
+ // none was replaced while being a map key. Now that we are destructing
+ // the map, set the flag back to true, so that it is replaceable during
+ // metadata linking.
+ if (!shouldLinkMetadata()) {
+ for (auto MDI : MetadataToIDs) {
+ Metadata *MD = const_cast<Metadata *>(MDI.first);
+ MDNode *Node = dyn_cast<MDNode>(MD);
+ assert((Node && Node->isTemporary()) &&
+ "Found non-temp metadata in map when not linking metadata");
+ Node->setCanReplace(true);
+ }
+ }
+ }
+
bool run();
Value *materializeDeclFor(Value *V, bool ForAlias);
void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias);
@@ -1111,7 +1128,8 @@ bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
// a function and before remapping metadata on instructions below
// in RemapInstruction, as the saved mapping is used to handle
// the temporary metadata hanging off instructions.
- SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true);
+ SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
+ /* OnlyTempMD = */ true);
// Link in the prefix data.
if (Src.hasPrefixData())
@@ -1514,7 +1532,8 @@ bool IRLinker::run() {
// Ensure metadata materialized
if (SrcM.getMaterializer()->materializeMetadata())
return true;
- SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false);
+ SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
+ /* OnlyTempMD = */ false);
}
linkNamedMDNodes();
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index a99ac4e..dafa768 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -514,13 +514,13 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
MCOS->EmitULEB128IntValue(1);
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
- EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list,
- context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, context.getDwarfVersion() >= 4
+ ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4);
if (context.getGenDwarfSectionSyms().size() > 1 &&
context.getDwarfVersion() >= 3) {
- EmitAbbrev(MCOS, dwarf::DW_AT_ranges,
- context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ EmitAbbrev(MCOS, dwarf::DW_AT_ranges, context.getDwarfVersion() >= 4
+ ? dwarf::DW_FORM_sec_offset
: dwarf::DW_FORM_data4);
} else {
EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index 028f2e9..34f49ca 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MObjectFileInfo.cpp - Object File Information ---------------------===//
+//===-- MCObjectFileInfo.cpp - Object File Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
index dc864d3..1b59250 100644
--- a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -63,31 +63,30 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
/// ToggleFeature - Toggle a feature and returns the re-computed feature
/// bits. This version will also change all implied bits.
FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
- SubtargetFeatures Features;
- FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures);
+ SubtargetFeatures::ToggleFeature(FeatureBits, FS, ProcFeatures);
return FeatureBits;
}
FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
- SubtargetFeatures Features;
- FeatureBits = Features.ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
+ SubtargetFeatures::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
return FeatureBits;
}
const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
assert(ProcSchedModels && "Processor machine model not available!");
- size_t NumProcs = ProcDesc.size();
- assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs,
+ ArrayRef<SubtargetInfoKV> SchedModels(ProcSchedModels, ProcDesc.size());
+
+ assert(std::is_sorted(SchedModels.begin(), SchedModels.end(),
[](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
return strcmp(LHS.Key, RHS.Key) < 0;
}) &&
"Processor machine model table is not sorted");
// Find entry
- const SubtargetInfoKV *Found =
- std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
- if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
+ auto Found =
+ std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU);
+ if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) {
if (CPU != "help") // Don't error if the user asked for help.
errs() << "'" << CPU
<< "' is not a recognized processor for this target"
diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp
index b642f17..7cce0fe 100644
--- a/contrib/llvm/lib/MC/SubtargetFeature.cpp
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@@ -160,10 +160,9 @@ void ClearImpliedBits(FeatureBitset &Bits,
}
}
-/// ToggleFeature - Toggle a feature and returns the newly updated feature
-/// bits.
-FeatureBitset
-SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
+/// ToggleFeature - Toggle a feature and update the feature bits.
+void
+SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
// Find feature in table.
@@ -186,12 +185,9 @@ SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
<< "' is not a recognized feature for this target"
<< " (ignoring feature)\n";
}
-
- return Bits;
}
-FeatureBitset
-SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
+void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
assert(hasFlag(Feature));
@@ -203,7 +199,7 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
if (FeatureEntry) {
// Enable/disable feature in bits
if (isEnabled(Feature)) {
- Bits |= FeatureEntry->Value;
+ Bits |= FeatureEntry->Value;
// For each feature that this implies, set it.
SetImpliedBits(Bits, FeatureEntry, FeatureTable);
@@ -218,8 +214,6 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
<< "' is not a recognized feature for this target"
<< " (ignoring feature)\n";
}
-
- return Bits;
}
@@ -234,14 +228,10 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
return FeatureBitset();
#ifndef NDEBUG
- for (size_t i = 1, e = CPUTable.size(); i != e; ++i) {
- assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
- "CPU table is not sorted");
- }
- for (size_t i = 1, e = FeatureTable.size(); i != e; ++i) {
- assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
- "CPU features table is not sorted");
- }
+ assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) &&
+ "CPU table is not sorted");
+ assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) &&
+ "CPU features table is not sorted");
#endif
// Resulting bits
FeatureBitset Bits;
@@ -277,7 +267,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
if (Feature == "+help")
Help(CPUTable, FeatureTable);
- Bits = ApplyFeatureFlag(Bits, Feature, FeatureTable);
+ ApplyFeatureFlag(Bits, Feature, FeatureTable);
}
return Bits;
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
index a0f82a0..32c692d 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
@@ -316,12 +316,17 @@ static std::error_code readCoverageMappingData(
// Read the records in the coverage data section.
for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
- if (Buf + 4 * sizeof(uint32_t) > End)
+ if (Buf + sizeof(CovMapHeader) > End)
return coveragemap_error::malformed;
- uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf);
- uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
- uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
- uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+ auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
+ uint32_t NRecords =
+ endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords);
+ uint32_t FilenamesSize =
+ endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize);
+ uint32_t CoverageSize =
+ endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize);
+ uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version);
+ Buf = reinterpret_cast<const char *>(++CovHeader);
switch (Version) {
case CoverageMappingVersion1:
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index f5acd23..027f0f7 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -12,12 +12,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -162,6 +165,98 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
}
+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+ bool doCompression, std::string &Result) {
+ uint8_t Header[16], *P = Header;
+ std::string UncompressedNameStrings =
+ join(NameStrs.begin(), NameStrs.end(), StringRef(" "));
+
+ unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
+ P += EncLen;
+
+ auto WriteStringToResult = [&](size_t CompressedLen,
+ const std::string &InputStr) {
+ EncLen = encodeULEB128(CompressedLen, P);
+ P += EncLen;
+ char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
+ unsigned HeaderLen = P - &Header[0];
+ Result.append(HeaderStr, HeaderLen);
+ Result += InputStr;
+ return 0;
+ };
+
+ if (!doCompression)
+ return WriteStringToResult(0, UncompressedNameStrings);
+
+ SmallVector<char, 128> CompressedNameStrings;
+ zlib::Status Success =
+ zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
+ zlib::BestSizeCompression);
+
+ if (Success != zlib::StatusOK)
+ return 1;
+
+ return WriteStringToResult(
+ CompressedNameStrings.size(),
+ std::string(CompressedNameStrings.data(), CompressedNameStrings.size()));
+}
+
+StringRef getPGOFuncNameInitializer(GlobalVariable *NameVar) {
+ auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
+ StringRef NameStr =
+ Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
+ return NameStr;
+}
+
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+ std::string &Result) {
+ std::vector<std::string> NameStrs;
+ for (auto *NameVar : NameVars) {
+ NameStrs.push_back(getPGOFuncNameInitializer(NameVar));
+ }
+ return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
+}
+
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
+ const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
+ const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
+ NameStrings.size());
+ while (P < EndP) {
+ uint32_t N;
+ uint64_t UncompressedSize = decodeULEB128(P, &N);
+ P += N;
+ uint64_t CompressedSize = decodeULEB128(P, &N);
+ P += N;
+ bool isCompressed = (CompressedSize != 0);
+ SmallString<128> UncompressedNameStrings;
+ StringRef NameStrings;
+ if (isCompressed) {
+ StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
+ CompressedSize);
+ if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
+ UncompressedSize) != zlib::StatusOK)
+ return 1;
+ P += CompressedSize;
+ NameStrings = StringRef(UncompressedNameStrings.data(),
+ UncompressedNameStrings.size());
+ } else {
+ NameStrings =
+ StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
+ P += UncompressedSize;
+ }
+ // Now parse the name strings.
+ SmallVector<StringRef, 0> Names;
+ NameStrings.split(Names, ' ');
+ for (StringRef &Name : Names)
+ Symtab.addFuncName(Name);
+
+ while (P < EndP && *P == 0)
+ P++;
+ }
+ Symtab.finalizeSymtab();
+ return 0;
+}
+
instrprof_error
InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
uint64_t Weight) {
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
index a8d1fe3..7d3537e 100644
--- a/contrib/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -446,7 +446,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
return EC;
}
-bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
static long ArgMax = sysconf(_SC_ARG_MAX);
// System says no practical limit.
@@ -456,7 +456,7 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
// Conservatively account for space required by environment variables.
long HalfArgMax = ArgMax / 2;
- size_t ArgLength = 0;
+ size_t ArgLength = Program.size() + 1;
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
ArgLength += strlen(*I) + 1;
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index d4e14dd..78fc538 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -535,14 +535,15 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
return EC;
}
-bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
// The documented max length of the command line passed to CreateProcess.
static const size_t MaxCommandStringLength = 32768;
- size_t ArgLength = 0;
+ // Account for the trailing space for the program path and the
+ // trailing NULL of the last argument.
+ size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2;
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
- // Account for the trailing space for every arg but the last one and the
- // trailing NULL of the last argument.
+ // Account for the trailing space for every arg
ArgLength += ArgLenWithQuotes(*I) + 1;
if (ArgLength > MaxCommandStringLength) {
return false;
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index 34d961b..c65e314 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -30,6 +30,9 @@
#define _WIN32_WINNT 0x0601
#define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed.
#define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -44,6 +47,21 @@
#include <string>
#include <vector>
+#if !defined(__CYGWIN__) && !defined(__MINGW32__)
+#include <VersionHelpers.h>
+#else
+// Cygwin does not have the IsWindows8OrGreater() API.
+// Some version of mingw does not have the API either.
+inline bool IsWindows8OrGreater() {
+ OSVERSIONINFO osvi = {};
+ osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+ if (!::GetVersionEx(&osvi))
+ return false;
+ return (osvi.dwMajorVersion > 6 ||
+ (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2));
+}
+#endif // __CYGWIN__
+
inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
if (!ErrMsg)
return true;
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 57c7ac3..57162dc 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -57,6 +57,10 @@
#endif
#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/WindowsSupport.h"
+#endif
+
using namespace llvm;
raw_ostream::~raw_ostream() {
@@ -567,8 +571,21 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
+#ifndef LLVM_ON_WIN32
+ bool ShouldWriteInChunks = false;
+#else
+ // Writing a large size of output to Windows console returns ENOMEM. It seems
+ // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
+ // the latter has a size limit (66000 bytes or less, depending on heap usage).
+ bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater();
+#endif
+
do {
- ssize_t ret = ::write(FD, Ptr, Size);
+ size_t ChunkSize = Size;
+ if (ChunkSize > 32767 && ShouldWriteInChunks)
+ ChunkSize = 32767;
+
+ ssize_t ret = ::write(FD, Ptr, ChunkSize);
if (ret < 0) {
// If it's a recoverable error, swallow it and retry the write.
diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp
index 87a3422..11e35b7 100644
--- a/contrib/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@@ -722,7 +722,7 @@ Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
std::string UnOpInit::getAsString() const {
std::string Result;
- switch (Opc) {
+ switch (getOpcode()) {
case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
case HEAD: Result = "!head"; break;
case TAIL: Result = "!tail"; break;
@@ -850,7 +850,7 @@ Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
std::string BinOpInit::getAsString() const {
std::string Result;
- switch (Opc) {
+ switch (getOpcode()) {
case CONCAT: Result = "!con"; break;
case ADD: Result = "!add"; break;
case AND: Result = "!and"; break;
@@ -1054,7 +1054,7 @@ Init *TernOpInit::resolveReferences(Record &R,
const RecordVal *RV) const {
Init *lhs = LHS->resolveReferences(R, RV);
- if (Opc == IF && lhs != LHS) {
+ if (getOpcode() == IF && lhs != LHS) {
IntInit *Value = dyn_cast<IntInit>(lhs);
if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
Value = dyn_cast<IntInit>(I);
@@ -1082,7 +1082,7 @@ Init *TernOpInit::resolveReferences(Record &R,
std::string TernOpInit::getAsString() const {
std::string Result;
- switch (Opc) {
+ switch (getOpcode()) {
case SUBST: Result = "!subst"; break;
case FOREACH: Result = "!foreach"; break;
case IF: Result = "!if"; break;
diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp
index e5f6f16..1506a71 100644
--- a/contrib/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm/lib/TableGen/TGParser.cpp
@@ -77,7 +77,8 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
/// SetValue -
/// Return true on error, false on success.
bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
- const std::vector<unsigned> &BitList, Init *V) {
+ ArrayRef<unsigned> BitList, Init *V,
+ bool AllowSelfAssignment) {
if (!V) return false;
if (!CurRec) CurRec = &CurMultiClass->Rec;
@@ -91,8 +92,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
// in the resolution machinery.
if (BitList.empty())
if (VarInit *VI = dyn_cast<VarInit>(V))
- if (VI->getNameInit() == ValName)
- return false;
+ if (VI->getNameInit() == ValName && !AllowSelfAssignment)
+ return true;
// If we are assigning to a subset of the bits in the value... then we must be
// assigning to a field of BitsRecTy, which must have a BitsInit
@@ -165,7 +166,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
if (i < SubClass.TemplateArgs.size()) {
// If a value is specified for this template arg, set it now.
if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i],
- std::vector<unsigned>(), SubClass.TemplateArgs[i]))
+ None, SubClass.TemplateArgs[i]))
return true;
// Resolve it next.
@@ -243,8 +244,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
// If a value is specified for this template arg, set it in the
// superclass now.
if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i],
- std::vector<unsigned>(),
- SubMultiClass.TemplateArgs[i]))
+ None, SubMultiClass.TemplateArgs[i]))
return true;
// Resolve it next.
@@ -258,8 +258,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
for (const auto &Def :
makeArrayRef(CurMC->DefPrototypes).slice(newDefStart)) {
if (SetValue(Def.get(), SubMultiClass.RefRange.Start, SMCTArgs[i],
- std::vector<unsigned>(),
- SubMultiClass.TemplateArgs[i]))
+ None, SubMultiClass.TemplateArgs[i]))
return true;
// Resolve it next.
@@ -332,8 +331,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));
- if (SetValue(IterRec.get(), Loc, IterVar->getName(),
- std::vector<unsigned>(), IVal))
+ if (SetValue(IterRec.get(), Loc, IterVar->getName(), None, IVal))
return Error(Loc, "when instantiating this def");
// Resolve it next.
@@ -1728,7 +1726,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
SMLoc ValLoc = Lex.getLoc();
Init *Val = ParseValue(CurRec, Type);
if (!Val ||
- SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
+ SetValue(CurRec, ValLoc, DeclName, None, Val))
// Return the name, even if an error is thrown. This is so that we can
// continue to make some progress, even without the value having been
// initialized.
@@ -2358,8 +2356,8 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
// Set the value for NAME. We don't resolve references to it 'til later,
// though, so that uses in nested multiclass names don't get
// confused.
- if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME",
- std::vector<unsigned>(), DefmPrefix)) {
+ if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME", None, DefmPrefix,
+ /*AllowSelfAssignment*/true)) {
Error(DefmPrefixRange.Start, "Could not resolve " +
CurRec->getNameInitAsString() + ":NAME to '" +
DefmPrefix->getAsUnquotedString() + "'");
@@ -2446,8 +2444,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec,
// Check if a value is specified for this temp-arg.
if (i < TemplateVals.size()) {
// Set it now.
- if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
- TemplateVals[i]))
+ if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], None, TemplateVals[i]))
return true;
// Resolve it next.
diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h
index 8b41134..739d9a9 100644
--- a/contrib/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@@ -105,10 +105,13 @@ public:
private: // Semantic analysis methods.
bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
- const std::vector<unsigned> &BitList, Init *V);
+ ArrayRef<unsigned> BitList, Init *V,
+ bool AllowSelfAssignment = false);
bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
- const std::vector<unsigned> &BitList, Init *V) {
- return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V);
+ ArrayRef<unsigned> BitList, Init *V,
+ bool AllowSelfAssignment = false) {
+ return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V,
+ AllowSelfAssignment);
}
bool AddSubClass(Record *Rec, SubClassReference &SubClass);
bool AddSubMultiClass(MultiClass *CurMC,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index 0bff9b5..46ef2c1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -124,6 +124,14 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeaturePerfMon,
FeatureZCRegMove, FeatureZCZeroing]>;
+def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
+ "Samsung Exynos-M1 processors",
+ [FeatureFPARMv8,
+ FeatureNEON,
+ FeatureCrypto,
+ FeatureCRC,
+ FeaturePerfMon]>;
+
def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
FeatureCRC,
@@ -136,6 +144,8 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
+// FIXME: Exynos-M1 is currently modelled without a specific SchedModel.
+def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 79a84ad..3d1ab4e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -158,7 +158,7 @@ INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
"AArch64 A57 FP Load-Balancing", false, false)
namespace {
-/// A Chain is a sequence of instructions that are linked together by
+/// A Chain is a sequence of instructions that are linked together by
/// an accumulation operand. For example:
///
/// fmul d0<def>, ?
@@ -285,7 +285,7 @@ public:
std::string str() const {
std::string S;
raw_string_ostream OS(S);
-
+
OS << "{";
StartInst->print(OS, /* SkipOpers= */true);
OS << " -> ";
@@ -427,7 +427,7 @@ Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
return Ch;
}
}
-
+
// Bailout case - just return the first item.
Chain *Ch = L.front();
L.erase(L.begin());
@@ -495,7 +495,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
RS.enterBasicBlock(&MBB);
RS.forward(MachineBasicBlock::iterator(G->getStart()));
- // Can we find an appropriate register that is available throughout the life
+ // Can we find an appropriate register that is available throughout the life
// of the chain?
unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f5beff..4ecfbe9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2426,7 +2426,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
continue;
}
-
+
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
@@ -5074,7 +5074,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
// The index of an EXT is the first element if it is not UNDEF.
// Watch out for the beginning UNDEFs. The EXT index should be the expected
- // value of the first element. E.g.
+ // value of the first element. E.g.
// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
// <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
// ExpectedElt is the last mask index plus 1.
@@ -9491,6 +9491,103 @@ static SDValue performBRCONDCombine(SDNode *N,
return SDValue();
}
+// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
+// as well as whether the test should be inverted. This code is required to
+// catch these cases (as opposed to standard dag combines) because
+// AArch64ISD::TBZ is matched during legalization.
+static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
+ SelectionDAG &DAG) {
+
+ if (!Op->hasOneUse())
+ return Op;
+
+ // We don't handle undef/constant-fold cases below, as they should have
+ // already been taken care of (e.g. and of 0, test of undefined shifted bits,
+ // etc.)
+
+ // (tbz (trunc x), b) -> (tbz x, b)
+ // This case is just here to enable more of the below cases to be caught.
+ if (Op->getOpcode() == ISD::TRUNCATE &&
+ Bit < Op->getValueType(0).getSizeInBits()) {
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+
+ if (Op->getNumOperands() != 2)
+ return Op;
+
+ auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!C)
+ return Op;
+
+ switch (Op->getOpcode()) {
+ default:
+ return Op;
+
+ // (tbz (and x, m), b) -> (tbz x, b)
+ case ISD::AND:
+ if ((C->getZExtValue() >> Bit) & 1)
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ return Op;
+
+ // (tbz (shl x, c), b) -> (tbz x, b-c)
+ case ISD::SHL:
+ if (C->getZExtValue() <= Bit &&
+ (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
+ Bit = Bit - C->getZExtValue();
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+ return Op;
+
+ // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
+ case ISD::SRA:
+ Bit = Bit + C->getZExtValue();
+ if (Bit >= Op->getValueType(0).getSizeInBits())
+ Bit = Op->getValueType(0).getSizeInBits() - 1;
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+
+ // (tbz (srl x, c), b) -> (tbz x, b+c)
+ case ISD::SRL:
+ if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
+ Bit = Bit + C->getZExtValue();
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+ return Op;
+
+ // (tbz (xor x, -1), b) -> (tbnz x, b)
+ case ISD::XOR:
+ if ((C->getZExtValue() >> Bit) & 1)
+ Invert = !Invert;
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+}
+
+// Optimize test single bit zero/non-zero and branch.
+static SDValue performTBZCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ bool Invert = false;
+ SDValue TestSrc = N->getOperand(1);
+ SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
+
+ if (TestSrc == NewTestSrc)
+ return SDValue();
+
+ unsigned NewOpc = N->getOpcode();
+ if (Invert) {
+ if (NewOpc == AArch64ISD::TBZ)
+ NewOpc = AArch64ISD::TBNZ;
+ else {
+ assert(NewOpc == AArch64ISD::TBNZ);
+ NewOpc = AArch64ISD::TBZ;
+ }
+ }
+
+ SDLoc DL(N);
+ return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
+ DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
+}
+
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
@@ -9642,6 +9739,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
+ case AArch64ISD::TBNZ:
+ case AArch64ISD::TBZ:
+ return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCONDCombine(N, DCI, DAG, 2, 3);
case AArch64ISD::DUP:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 566aa2c..43664df 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -613,21 +613,6 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
-// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
-static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
- MachineInstr *Op1) {
- assert(MI->memoperands_empty() && "expected a new machineinstr");
- size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
- (Op1->memoperands_end() - Op1->memoperands_begin());
-
- MachineFunction *MF = MI->getParent()->getParent();
- MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
- MachineSDNode::mmo_iterator MemEnd =
- std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
- MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
- MI->setMemRefs(MemBegin, MemEnd);
-}
-
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -692,10 +677,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
TII->get(NewOpc))
.addOperand(getLdStRegOp(RtNewDest))
.addOperand(BaseRegOp)
- .addImm(OffsetImm);
-
- // Copy MachineMemOperands from the original loads.
- concatenateMemOperands(NewMemMI, I, Paired);
+ .addImm(OffsetImm)
+ .setMemRefs(I->mergeMemRefsWith(*Paired));
DEBUG(
dbgs()
@@ -786,9 +769,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
TII->get(NewOpc))
.addOperand(getLdStRegOp(I))
.addOperand(BaseRegOp)
- .addImm(OffsetImm);
- // Copy MachineMemOperands from the original stores.
- concatenateMemOperands(MIB, I, Paired);
+ .addImm(OffsetImm)
+ .setMemRefs(I->mergeMemRefsWith(*Paired));
} else {
// Handle Unscaled
if (IsUnscaled)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 1b8b9b2..151133b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -33,7 +33,14 @@ class Triple;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
protected:
- enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
+ enum ARMProcFamilyEnum {
+ Others,
+ CortexA35,
+ CortexA53,
+ CortexA57,
+ Cyclone,
+ ExynosM1
+ };
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
@@ -143,6 +150,7 @@ public:
bool isCyclone() const { return CPUString == "cyclone"; }
bool isCortexA57() const { return CPUString == "cortex-a57"; }
bool isCortexA53() const { return CPUString == "cortex-a53"; }
+ bool isExynosM1() const { return CPUString == "exynos-m1"; }
bool useAA() const override { return isCortexA53(); }
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 78f5289..cde1c6d 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -834,7 +834,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
};
uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name,
+AArch64SysReg::SysRegMapper::fromString(StringRef Name,
const FeatureBitset& FeatureBits, bool &Valid) const {
std::string NameLower = Name.lower();
@@ -878,7 +878,7 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name,
}
std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
const FeatureBitset& FeatureBits) const {
// First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index f649cb9..e63627e 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -285,17 +285,17 @@ struct AArch64NamedImmMapper {
// Zero value of FeatureBitSet means the mapping is always available
FeatureBitset FeatureBitSet;
- bool isNameEqual(std::string Other,
+ bool isNameEqual(std::string Other,
const FeatureBitset& FeatureBits) const {
- if (FeatureBitSet.any() &&
+ if (FeatureBitSet.any() &&
(FeatureBitSet & FeatureBits).none())
return false;
return Name == Other;
}
- bool isValueEqual(uint32_t Other,
+ bool isValueEqual(uint32_t Other,
const FeatureBitset& FeatureBits) const {
- if (FeatureBitSet.any() &&
+ if (FeatureBitSet.any() &&
(FeatureBitSet & FeatureBits).none())
return false;
return Value == Other;
@@ -310,7 +310,7 @@ struct AArch64NamedImmMapper {
StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits,
bool &Valid) const;
// Maps string to value, depending on availability for FeatureBits given
- uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
+ uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
bool &Valid) const;
/// Many of the instructions allow an alternative assembly form consisting of
@@ -1322,7 +1322,7 @@ namespace AArch64TLBI {
return true;
}
}
-}
+}
namespace AArch64II {
/// Target Operand Flag enum.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index d4af8d2..db869cf 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -118,6 +118,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"true",
"Support flat address space">;
+def FeatureXNACK : SubtargetFeature<"xnack",
+ "EnableXNACK",
+ "true",
+ "Enable XNACK support">;
+
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"EnableVGPRSpilling",
"true",
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index ba71dc0..9c37902 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -417,13 +417,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
}
- if (VCCUsed || FlatUsed)
+ if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
MaxSGPR += 2;
- if (FlatUsed) {
- MaxSGPR += 2;
- // 2 additional for VI+.
- if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (FlatUsed)
+ MaxSGPR += 2;
+
+ if (STM.isXNACKEnabled())
MaxSGPR += 2;
}
@@ -620,6 +620,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ if (STM.isXNACKEnabled())
+ header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 11f6139..2a7ce6a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -204,14 +204,6 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
-def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@@ -243,14 +235,6 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
-def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@@ -299,16 +283,6 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
-def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei8 node:$val, node:$ptr), [{
- return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei16 node:$val, node:$ptr), [{
- return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
def local_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isLocalStore(dyn_cast<StoreSDNode>(N));
@@ -385,15 +359,6 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
-def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def flat_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 44e0c47..c6af5b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -73,6 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
+ EnableXNACK(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 9c7bb88..d371227 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -76,6 +76,7 @@ private:
bool EnableIfCvt;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
+ bool EnableXNACK;
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
@@ -290,6 +291,10 @@ public:
}
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+ bool isXNACKEnabled() const {
+ return EnableXNACK;
+ }
+
unsigned getMaxWavesPerCU() const {
if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return 10;
diff --git a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
index 88a090d..c543814 100644
--- a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
@@ -264,42 +264,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
-//===----------------------------------------------------------------------===//
-// Flat Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasFlatAddressSpace] in {
-
-class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
- PatFrag flat_ld> :
- Pat <(vt (flat_ld i64:$ptr)),
- (Instr_ADDR64 $ptr, 0, 0, 0)
->;
-
-def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
-
-class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
- Pat <(st vt:$value, i64:$ptr),
- (Instr $value, $ptr, 0, 0, 0)
- >;
-
-def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
-def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
-def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
-
-} // End HasFlatAddressSpace predicate
-
let Predicates = [isCI] in {
// Convert (x - floor(x)) to fract(x)
@@ -320,20 +284,10 @@ def : Pat <
//===----------------------------------------------------------------------===//
-// Patterns to generate flat for global
+// Flat Patterns
//===----------------------------------------------------------------------===//
-def useFlatForGlobal : Predicate <
- "Subtarget->useFlatForGlobal() || "
- "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
-
-let Predicates = [useFlatForGlobal] in {
-
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
- (SIload_constant v4i32:$sbase, IMM20bit:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
->;
+let Predicates = [isCIVI] in {
// Patterns for global loads with no offset
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
@@ -341,24 +295,24 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(inst $addr, 0, 0, 0)
>;
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(node vt:$data, i64:$addr),
(inst $data, $addr, 0, 0, 0)
>;
-def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
+def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
+def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr, vt:$data)),
@@ -376,4 +330,4 @@ def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
-} // End Predicates = [useFlatForGlobal]
+} // End Predicates = [isCIVI]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 6b3c81c..7d20509 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -105,51 +105,53 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
- // We reserved the last registers for this. Shift it down to the end of those
- // which were actually used.
- //
- // FIXME: It might be safer to use a pseudoregister before replacement.
-
- // FIXME: We should be able to eliminate unused input registers. We only
- // cannot do this for the resources required for scratch access. For now we
- // skip over user SGPRs and may leave unused holes.
-
- // We find the resource first because it has an alignment requirement.
- if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
- // Skip the last 2 elements because the last one is reserved for VCC, and
- // this is the 2nd to last element already.
- for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
- // Pick the first unallocated one. Make sure we don't clobber the other
- // reserved input we needed.
- if (!MRI.isPhysRegUsed(Reg)) {
- assert(MRI.isAllocatable(Reg));
- MRI.replaceRegWith(ScratchRsrcReg, Reg);
- ScratchRsrcReg = Reg;
- MFI->setScratchRSrcReg(ScratchRsrcReg);
- break;
+ if (!ST.hasSGPRInitBug()) {
+ // We reserved the last registers for this. Shift it down to the end of those
+ // which were actually used.
+ //
+ // FIXME: It might be safer to use a pseudoregister before replacement.
+
+ // FIXME: We should be able to eliminate unused input registers. We only
+ // cannot do this for the resources required for scratch access. For now we
+ // skip over user SGPRs and may leave unused holes.
+
+ // We find the resource first because it has an alignment requirement.
+ if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
+ // Skip the last 2 elements because the last one is reserved for VCC, and
+ // this is the 2nd to last element already.
+ for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
+ // Pick the first unallocated one. Make sure we don't clobber the other
+ // reserved input we needed.
+ if (!MRI.isPhysRegUsed(Reg)) {
+ assert(MRI.isAllocatable(Reg));
+ MRI.replaceRegWith(ScratchRsrcReg, Reg);
+ ScratchRsrcReg = Reg;
+ MFI->setScratchRSrcReg(ScratchRsrcReg);
+ break;
+ }
}
}
- }
- if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- // Skip the last 2 elements because the last one is reserved for VCC, and
- // this is the 2nd to last element already.
- unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
- for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
- // Pick the first unallocated SGPR. Be careful not to pick an alias of the
- // scratch descriptor, since we haven’t added its uses yet.
- if (!MRI.isPhysRegUsed(Reg)) {
- assert(MRI.isAllocatable(Reg) &&
- !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
-
- MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
- ScratchWaveOffsetReg = Reg;
- MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
- break;
+ if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Skip the last 2 elements because the last one is reserved for VCC, and
+ // this is the 2nd to last element already.
+ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+ for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
+ // Pick the first unallocated SGPR. Be careful not to pick an alias of the
+ // scratch descriptor, since we haven’t added its uses yet.
+ if (!MRI.isPhysRegUsed(Reg)) {
+ assert(MRI.isAllocatable(Reg) &&
+ !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
+
+ MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ ScratchWaveOffsetReg = Reg;
+ MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ break;
+ }
}
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 10f2adde..8735277 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -134,6 +134,34 @@ def SIconstdata_ptr : SDNode<
SDTCisVT<0, i64>]>
>;
+//===----------------------------------------------------------------------===//
+// PatFrags for FLAT instructions
+//===----------------------------------------------------------------------===//
+
+class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
+ (ld node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N)) ||
+ isGlobalLoad(dyn_cast<LoadSDNode>(N)) ||
+ isConstantLoad(cast<LoadSDNode>(N), -1);
+}]>;
+
+def flat_load : flat_ld <load>;
+def flat_az_extloadi8 : flat_ld <az_extloadi8>;
+def flat_sextloadi8 : flat_ld <sextloadi8>;
+def flat_az_extloadi16 : flat_ld <az_extloadi16>;
+def flat_sextloadi16 : flat_ld <sextloadi16>;
+
+class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ return isFlatStore(dyn_cast<StoreSDNode>(N)) ||
+ isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def flat_store: flat_st <store>;
+def flat_truncstorei8 : flat_st <truncstorei8>;
+def flat_truncstorei16 : flat_st <truncstorei16>;
+
+
def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
return isGlobalLoad(cast<LoadSDNode>(N)) ||
isConstantLoad(cast<LoadSDNode>(N), -1);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index 6f653c7..b7df058 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -59,8 +59,6 @@ defm EXP : EXP_m;
// SMRD Instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1 in {
-
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
@@ -90,8 +88,6 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
-} // mayLoad = 1
-
//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 935aad4..bf15516 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -156,6 +156,17 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
+
+ if (LaneVGPR == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+
+ // When compiling from inside Mesa, the compilation continues.
+ // Select an arbitrary register to avoid triggering assertions
+ // during subsequent passes.
+ LaneVGPR = AMDGPU::VGPR0;
+ }
+
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
// Add this register as live-in to all blocks to avoid machine verifer
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3cdffef..2afa009 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
+ if (ST.isXNACKEnabled())
+ BaseIdx -= 4;
+
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
- // next sgpr128 down.
+ // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
+ // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
+ // either way.
return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
}
@@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
- unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ unsigned Idx;
+
+ if (!ST.isXNACKEnabled())
+ Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ else
+ Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
+
return AMDGPU::SGPR_32RegClass.getRegister(Idx);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // Next register before reservations for flat_scr and vcc.
- return AMDGPU::SGPR97;
+ if (!ST.isXNACKEnabled()) {
+ // Next register before reservations for flat_scr and vcc.
+ return AMDGPU::SGPR97;
+ } else {
+ // Next register before reservations for flat_scr, xnack_mask, vcc,
+ // and scratch resource.
+ return AMDGPU::SGPR91;
+ }
}
return AMDGPU::SGPR95;
@@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// for VCC/FLAT_SCR.
reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
+
+ if (ST.isXNACKEnabled())
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
}
// Tonga and Iceland can only allocate a fixed number of SGPRs due
@@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
- // Assume XNACK_MASK is unused.
unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+ if (ST.isXNACKEnabled())
+ Limit -= 2;
+
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
@@ -282,11 +303,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
- if (Spill.VGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
- }
-
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
@@ -315,11 +331,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
- if (Spill.VGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
- }
-
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
index 20a026a..1a7801c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
@@ -101,3 +101,12 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
+let Predicates = [isVI] in {
+
+// 1. Offset as 20bit DWORD immediate
+def : Pat <
+ (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+>;
+
+} // End Predicates = [isVI]
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index a44dc83..c171656 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -252,6 +252,8 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors", []>;
+def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
+ "Samsung Exynos-M1 processors", []>;
def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
"Cortex-R4 ARM processors", []>;
@@ -649,6 +651,12 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureCrypto,
FeatureZCZeroing]>;
+def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index e89757c..55c1684 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -340,12 +340,12 @@ namespace {
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARMConstantIslands::verify() {
#ifndef NDEBUG
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = &*MBBI;
- unsigned MBBId = MBB->getNumber();
- assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
- }
+ assert(std::is_sorted(MF->begin(), MF->end(),
+ [this](const MachineBasicBlock &LHS,
+ const MachineBasicBlock &RHS) {
+ return BBInfo[LHS.getNumber()].postOffset() <
+ BBInfo[RHS.getNumber()].postOffset();
+ }));
DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 725b838..6e7e47b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1986,23 +1986,6 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
return AddedRegPressure.size() <= MemRegs.size() * 2;
}
-
-/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
-static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
- MachineInstr *Op1) {
- assert(MI->memoperands_empty() && "expected a new machineinstr");
- size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
- + (Op1->memoperands_end() - Op1->memoperands_begin());
-
- MachineFunction *MF = MI->getParent()->getParent();
- MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
- MachineSDNode::mmo_iterator MemEnd =
- std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
- MemEnd =
- std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
- MI->setMemRefs(MemBegin, MemEnd);
-}
-
bool
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl, unsigned &NewOpc,
@@ -2196,7 +2179,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
- concatenateMemOperands(MIB, Op0, Op1);
+ MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumLDRDFormed;
} else {
@@ -2210,7 +2193,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
- concatenateMemOperands(MIB, Op0, Op1);
+ MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumSTRDFormed;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index a8b2801..4d54e57 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -44,7 +44,7 @@ protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
- CortexA57, CortexA72, Krait, Swift
+ CortexA57, CortexA72, Krait, Swift, ExynosM1
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 1189cfd..5a7eb21 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -251,6 +251,10 @@ def : Proc<"hexagonv60", HexagonModelV60,
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
+def HexagonAsmParser : AsmParser {
+ bit HasMnemonicFirst = 0;
+}
+
def HexagonAsmParserVariant : AsmParserVariant {
int Variant = 0;
string TokenizingCharacters = "#()=:.<>!+*";
@@ -259,5 +263,6 @@ def HexagonAsmParserVariant : AsmParserVariant {
def Hexagon : Target {
// Pull in Instruction Info:
let InstructionSet = HexagonInstrInfo;
+ let AssemblyParsers = [HexagonAsmParser];
let AssemblyParserVariants = [HexagonAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index 5cfeba7..421403f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -5807,3 +5807,5 @@ include "HexagonInstrInfoV60.td"
include "HexagonInstrInfoVector.td"
include "HexagonInstrAlias.td"
+include "HexagonSystemInst.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td
new file mode 100644
index 0000000..784686a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td
@@ -0,0 +1,113 @@
+//==- HexagonSystemInst.td - System Instructions for Hexagon -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Cache manipulation instructions.
+//===----------------------------------------------------------------------===//
+let mayStore = 1 in
+class ST_MISC_CACHEOP<dag outs, dag ins,
+ string asmstr, list<dag> pattern = [],
+ bits<3> amode, bits<3> type, bits<1> un>
+ : ST0Inst<outs, ins, asmstr, pattern, "", ST_tc_ld_SLOT0> {
+
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+ let Inst{31-28} = 0b1010;
+ let Inst{27-25} = amode;
+ let Inst{24-22} = type;
+ let Inst{21} = un;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+let mayStore = 1 in
+class ST_MISC_CACHEOP_SYS<dag outs, dag ins,
+ string asmstr, list<dag> pattern = [],
+ bits<3> amode, bits<3> type, bits<1> un>
+ : SYSInst<outs, ins, asmstr, pattern, ""> {
+
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+ let Inst{31-28} = 0b1010;
+ let Inst{27-25} = amode;
+ let Inst{24-22} = type;
+ let Inst{21} = un;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+
+let isSolo = 1, Rs = 0, Rt = 0, Rd = 0 in {
+def Y2_syncht: ST_MISC_CACHEOP <(outs), (ins),
+ "syncht" , [], 0b100, 0b001, 0b0>;
+}
+
+let Rt = 0, Rd = 0 in {
+let isSoloAin1 = 1 in {
+ def Y2_dccleana: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
+ "dccleana($Rs)", [], 0b000, 0b000, 0b0>;
+ def Y2_dcinva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
+ "dcinva($Rs)", [], 0b000, 0b000, 0b1>;
+ def Y2_dccleaninva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
+ "dccleaninva($Rs)", [], 0b000, 0b001, 0b0>;
+ }
+}
+
+let isSoloAX = 1, hasSideEffects = 1, Rd = 0 in {
+ def Y4_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "l2fetch($Rs, $Rt)", [], 0b011, 0b000, 0b0>;
+ def Y5_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, DoubleRegs:$Rt),
+ "l2fetch($Rs, $Rt)", [], 0b011, 0b010, 0b0>;
+}
+
+let hasSideEffects = 0, isSolo = 1 in
+class Y2_INVALIDATE_CACHE<string mnemonic, bit MajOp>
+ : JRInst <
+ (outs), (ins IntRegs:$Rs),
+ #mnemonic#"($Rs)" > {
+ bits<5> Rs;
+
+ let IClass = 0b0101;
+ let Inst{27-21} = 0b0110110;
+ let Inst{20-16} = Rs;
+ let Inst{13-12} = 0b00;
+ let Inst{11} = MajOp;
+ }
+// Instruction cache invalidate
+def Y2_icinva : Y2_INVALIDATE_CACHE<"icinva", 0b0>;
+
+// Zero an aligned 32-byte cacheline.
+let isSoloAin1 = 1 in
+def Y2_dczeroa: ST0Inst <(outs), (ins IntRegs:$Rs),
+ "dczeroa($Rs)"> {
+ bits<5> Rs;
+ let IClass = 0b1010;
+ let Inst{27-21} = 0b0000110;
+ let Inst{13} = 0b0;
+ let Inst{20-16} = Rs;
+ }
+
+// Memory synchronization.
+let hasSideEffects = 0, isSolo = 1 in
+def Y2_isync: JRInst <(outs), (ins),
+ "isync"> {
+ let IClass = 0b0101;
+ let Inst{27-16} = 0b011111000000;
+ let Inst{13} = 0b0;
+ let Inst{9-0} = 0b0000000010;
+ }
+
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
index ee9d060..92ecde3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -5,6 +5,23 @@
pr38151.c
va-arg-22.c
+# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"'
+20030313-1.c
+20030916-1.c
+20031012-1.c
+20041126-1.c
+20060420-1.c
+20071202-1.c
+20120808-1.c
+pr20527-1.c
+pr27073.c
+pr36339.c
+pr37573.c
+pr43236.c
+pr43835.c
+pr45070.c
+pr51933.c
+
# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
struct-ret-1.c
va-arg-11.c
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 82f0ee5..73f654c 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -32,7 +32,6 @@ static unsigned getVectorRegSize(unsigned RegNo) {
return 64;
llvm_unreachable("Unknown vector reg!");
- return 0;
}
static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT,
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 4fdd527..619f7c8 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "X86ShuffleDecode.h"
-#include "llvm/IR/Constants.h"
#include "llvm/CodeGen/MachineValueType.h"
//===----------------------------------------------------------------------===//
@@ -296,54 +295,6 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
}
}
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- // It is not an error for the PSHUFB mask to not be a vector of i8 because the
- // constant pool uniques constants by their bit representation.
- // e.g. the following take up the same space in the constant pool:
- // i128 -170141183420855150465331762880109871104
- //
- // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
- //
- // <4 x i32> <i32 -2147483648, i32 -2147483648,
- // i32 -2147483648, i32 -2147483648>
-
-#ifndef NDEBUG
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
-#endif
-
- // This is a straightforward byte vector.
- if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
- int NumElements = MaskTy->getVectorNumElements();
- ShuffleMask.reserve(NumElements);
-
- for (int i = 0; i < NumElements; ++i) {
- // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
- // lane of the vector we're inside.
- int Base = i & ~0xf;
- Constant *COp = C->getAggregateElement(i);
- if (!COp) {
- ShuffleMask.clear();
- return;
- } else if (isa<UndefValue>(COp)) {
- ShuffleMask.push_back(SM_SentinelUndef);
- continue;
- }
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
- // If the high bit (7) of the byte is set, the element is zeroed.
- if (Element & (1 << 7))
- ShuffleMask.push_back(SM_SentinelZero);
- else {
- // Only the least significant 4 bits of the byte are used.
- int Index = Base + (Element & 0xf);
- ShuffleMask.push_back(Index);
- }
- }
- }
- // TODO: Handle funny-looking vectors too.
-}
-
void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {
@@ -388,68 +339,6 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
- SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- // It is not an error for the PSHUFB mask to not be a vector of i8 because the
- // constant pool uniques constants by their bit representation.
- // e.g. the following take up the same space in the constant pool:
- // i128 -170141183420855150465331762880109871104
- //
- // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
- //
- // <4 x i32> <i32 -2147483648, i32 -2147483648,
- // i32 -2147483648, i32 -2147483648>
-
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-
- if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
- return;
-
- // Only support vector types.
- if (!MaskTy->isVectorTy())
- return;
-
- // Make sure its an integer type.
- Type *VecEltTy = MaskTy->getVectorElementType();
- if (!VecEltTy->isIntegerTy())
- return;
-
- // Support any element type from byte up to element size.
- // This is necesary primarily because 64-bit elements get split to 32-bit
- // in the constant pool on 32-bit target.
- unsigned EltTySize = VecEltTy->getIntegerBitWidth();
- if (EltTySize < 8 || EltTySize > ElSize)
- return;
-
- unsigned NumElements = MaskTySize / ElSize;
- assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
- "Unexpected number of vector elements.");
- ShuffleMask.reserve(NumElements);
- unsigned NumElementsPerLane = 128 / ElSize;
- unsigned Factor = ElSize / EltTySize;
-
- for (unsigned i = 0; i < NumElements; ++i) {
- Constant *COp = C->getAggregateElement(i * Factor);
- if (!COp) {
- ShuffleMask.clear();
- return;
- } else if (isa<UndefValue>(COp)) {
- ShuffleMask.push_back(SM_SentinelUndef);
- continue;
- }
- int Index = i & ~(NumElementsPerLane - 1);
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
- if (ElSize == 64)
- Index += (Element >> 1) & 0x1;
- else
- Index += Element & 0x3;
- ShuffleMask.push_back(Index);
- }
-
- // TODO: Handle funny-looking vectors too.
-}
-
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
unsigned NumDstElts = DstVT.getVectorNumElements();
unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
@@ -572,58 +461,4 @@ void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
}
}
-void DecodeVPERMVMask(const Constant *C, MVT VT,
- SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- if (MaskTy->isVectorTy()) {
- unsigned NumElements = MaskTy->getVectorNumElements();
- if (NumElements == VT.getVectorNumElements()) {
- for (unsigned i = 0; i < NumElements; ++i) {
- Constant *COp = C->getAggregateElement(i);
- if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
- ShuffleMask.clear();
- return;
- }
- if (isa<UndefValue>(COp))
- ShuffleMask.push_back(SM_SentinelUndef);
- else {
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
- Element &= (1 << NumElements) - 1;
- ShuffleMask.push_back(Element);
- }
- }
- }
- return;
- }
- // Scalar value; just broadcast it
- if (!isa<ConstantInt>(C))
- return;
- uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
- int NumElements = VT.getVectorNumElements();
- Element &= (1 << NumElements) - 1;
- for (int i = 0; i < NumElements; ++i)
- ShuffleMask.push_back(Element);
-}
-
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
- SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned NumElements = MaskTy->getVectorNumElements();
- if (NumElements == VT.getVectorNumElements()) {
- for (unsigned i = 0; i < NumElements; ++i) {
- Constant *COp = C->getAggregateElement(i);
- if (!COp) {
- ShuffleMask.clear();
- return;
- }
- if (isa<UndefValue>(COp))
- ShuffleMask.push_back(SM_SentinelUndef);
- else {
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
- Element &= (1 << NumElements*2) - 1;
- ShuffleMask.push_back(Element);
- }
- }
- }
-}
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index ab18e64..72db6a8 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -23,7 +23,6 @@
//===----------------------------------------------------------------------===//
namespace llvm {
-class Constant;
class MVT;
enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 };
@@ -72,9 +71,6 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// different datatypes and vector widths.
void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
-/// \brief Decode a PSHUFB mask from an IR-level vector constant.
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
-
/// \brief Decode a PSHUFB mask from a raw array of constants such as from
/// BUILD_VECTOR.
void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
@@ -95,10 +91,6 @@ void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
- SmallVectorImpl<int> &ShuffleMask);
-
/// \brief Decode a zero extension instruction as a shuffle mask.
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
SmallVectorImpl<int> &ShuffleMask);
@@ -118,18 +110,10 @@ void DecodeEXTRQIMask(int Len, int Idx,
void DecodeINSERTQIMask(int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
-/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMVMask(const Constant *C, MVT VT,
- SmallVectorImpl<int> &ShuffleMask);
-
/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);
-/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
- SmallVectorImpl<int> &ShuffleMask);
-
/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index de94a13..629d4d3 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1098,9 +1098,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
- // All x86 ABIs require that for returning structs by value we copy
- // the sret argument into %rax/%eax (depending on ABI) for the return.
- // We saved the argument into a virtual register in the entry block,
+ // All x86 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
if (F.hasStructRetAttr()) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 242d0333..8b5fd27 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -78,27 +78,6 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
}
-/// usesTheStack - This function checks if any of the users of EFLAGS
-/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
-/// to use the stack, and if we don't adjust the stack we clobber the first
-/// frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(const MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // Conservativley assume that inline assembly might use the stack.
- if (MF.hasInlineAsm())
- return true;
-
- return any_of(MRI.reg_instructions(X86::EFLAGS),
- [](const MachineInstr &RI) { return RI.isCopy(); });
-}
-
-static bool doesStackUseImplyFP(const MachineFunction &MF) {
- bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- return IsWin64Prologue && usesTheStack(MF);
-}
-
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -112,8 +91,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
- MFI->hasStackMap() || MFI->hasPatchPoint() ||
- doesStackUseImplyFP(MF));
+ MFI->hasStackMap() || MFI->hasPatchPoint());
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -965,11 +943,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// push and pop from the stack.
if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
!TRI->needsStackRealignment(MF) &&
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- !IsWin64CC && // Win64 has no Red Zone
- !usesTheStack(MF) && // Don't push and pop.
- !MF.shouldSplitStack()) { // Regular stack
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64CC && // Win64 has no Red Zone
+ !MFI->hasOpaqueSPAdjustment() && // Don't push and pop.
+ !MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4414e47..868ae4e 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -157,13 +157,9 @@ namespace {
/// performance.
bool OptForSize;
- /// If true, selector should try to optimize for minimum code size.
- bool OptForMinSize;
-
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), OptForSize(false),
- OptForMinSize(false) {}
+ : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
const char *getPassName() const override {
return "X86 DAG->DAG Instruction Selection";
@@ -535,10 +531,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
}
void X86DAGToDAGISel::PreprocessISelDAG() {
- // OptFor[Min]Size are used in pattern predicates that isel is matching.
+ // OptForSize is used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->optForSize();
- OptForMinSize = MF->getFunction()->optForMinSize();
- assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0927c2f..d31aab0 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18,6 +18,7 @@
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
+#include "X86ShuffleDecodeConstantPool.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -4556,6 +4557,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
+ Result = DAG.getBitcast(CastVT, Result);
Vec256 = DAG.getBitcast(CastVT, Vec256);
Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
return DAG.getBitcast(ResultVT, Vec256);
@@ -4851,8 +4853,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodePSHUFBMask(C, Mask);
- if (Mask.empty())
- return false;
break;
}
@@ -4870,7 +4870,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
- if (Mask.empty()) return false;
// Mask only contains negative index if an element is zero.
if (std::any_of(Mask.begin(), Mask.end(),
[](int M){ return M == SM_SentinelZero; }))
@@ -4948,8 +4947,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodeVPERMVMask(C, VT, Mask);
- if (Mask.empty())
- return false;
break;
}
return false;
@@ -5000,8 +4997,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodeVPERMV3Mask(C, VT, Mask);
- if (Mask.empty())
- return false;
break;
}
return false;
@@ -5009,6 +5004,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
default: llvm_unreachable("unknown target shuffle node");
}
+ // Empty mask indicates the decode failed.
+ if (Mask.empty())
+ return false;
+
// If we have a fake unary shuffle, the shuffle mask is spread across two
// inputs that are actually the same node. Re-map the mask to always point
// into the first input.
@@ -17372,6 +17371,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (!IntrData) {
if (IntNo == llvm::Intrinsic::x86_seh_ehregnode)
return MarkEHRegistrationNode(Op, DAG);
+ if (IntNo == llvm::Intrinsic::x86_flags_read_u32 ||
+ IntNo == llvm::Intrinsic::x86_flags_read_u64 ||
+ IntNo == llvm::Intrinsic::x86_flags_write_u32 ||
+ IntNo == llvm::Intrinsic::x86_flags_write_u64) {
+ // We need a frame pointer because this will get lowered to a PUSH/POP
+ // sequence.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setHasOpaqueSPAdjustment(true);
+ // Don't do anything here, we will expand these intrinsics out later
+ // during ExpandISelPseudos in EmitInstrWithCustomInserter.
+ return SDValue();
+ }
return SDValue();
}
@@ -21144,6 +21155,47 @@ static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB,
+ const X86Subtarget *Subtarget) {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+ // insert input VAL into EAX
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI->getOperand(0).getReg());
+ // insert zero to ECX
+ BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
+ .addReg(X86::ECX)
+ .addReg(X86::ECX);
+ // insert zero to EDX
+ BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::EDX)
+ .addReg(X86::EDX)
+ .addReg(X86::EDX);
+ // insert WRPKRU instruction
+ BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB,
+ const X86Subtarget *Subtarget) {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+ // insert zero to ECX
+ BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
+ .addReg(X86::ECX)
+ .addReg(X86::ECX);
+ // insert RDPKRU instruction
+ BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr));
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addReg(X86::EAX);
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
const X86Subtarget *Subtarget) {
DebugLoc dl = MI->getDebugLoc();
@@ -22495,6 +22547,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::CMOV_V64I1:
return EmitLoweredSelect(MI, BB);
+ case X86::RDFLAGS32:
+ case X86::RDFLAGS64: {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned PushF =
+ MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
+ unsigned Pop =
+ MI->getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
+ BuildMI(*BB, MI, DL, TII->get(PushF));
+ BuildMI(*BB, MI, DL, TII->get(Pop), MI->getOperand(0).getReg());
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+ }
+
+ case X86::WRFLAGS32:
+ case X86::WRFLAGS64: {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned Push =
+ MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
+ unsigned PopF =
+ MI->getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
+ BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI->getOperand(0).getReg());
+ BuildMI(*BB, MI, DL, TII->get(PopF));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+ }
+
case X86::RELEASE_FADD32mr:
case X86::RELEASE_FADD64mr:
return EmitLoweredAtomicFP(MI, BB);
@@ -22611,7 +22693,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Thread synchronization.
case X86::MONITOR:
return EmitMonitor(MI, BB, Subtarget);
-
+ // PKU feature
+ case X86::WRPKRU:
+ return EmitWRPKRU(MI, BB, Subtarget);
+ case X86::RDPKRU:
+ return EmitRDPKRU(MI, BB, Subtarget);
// xbegin
case X86::XBEGIN:
return EmitXBegin(MI, BB, Subtarget->getInstrInfo());
@@ -23480,6 +23566,31 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
}
return SDValue();
}
+ case X86ISD::BLENDI: {
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() &&
+ "Unexpected input vector types");
+
+ // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
+ // operands and changing the mask to 1. This saves us a bunch of
+ // pattern-matching possibilities related to scalar math ops in SSE/AVX.
+ // x86InstrInfo knows how to commute this back after instruction selection
+ // if it would help register allocation.
+
+ // TODO: If optimizing for size or a processor that doesn't suffer from
+ // partial register update stalls, this should be transformed into a MOVSD
+ // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
+
+ if (VT == MVT::v2f64)
+ if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
+ SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
+ return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
+ }
+
+ return SDValue();
+ }
default:
return SDValue();
}
@@ -23573,9 +23684,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
/// the operands which explicitly discard the lanes which are unused by this
/// operation to try to flow through the rest of the combiner the fact that
/// they're unused.
-static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
+ if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+ (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+ return SDValue();
// We only handle target-independent shuffles.
// FIXME: It would be easy and harmless to use the target shuffle mask
@@ -23617,12 +23732,6 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
return SDValue();
- // Only specific types are legal at this point, assert so we notice if and
- // when these change.
- assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
- VT == MVT::v4f64) &&
- "Unknown vector type encountered!");
-
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
}
@@ -23642,8 +23751,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
- if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
- if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
+ if (TLI.isTypeLegal(VT))
+ if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
return AddSub;
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
@@ -27310,7 +27419,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
// from AH (which we otherwise need to do contortions to access).
if (N0.getOpcode() == ISD::UDIVREM &&
N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
- (VT == MVT::i32 || VT == MVT::i64)) {
+ VT == MVT::i32) {
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
N0.getOperand(0), N0.getOperand(1));
@@ -27382,32 +27491,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
- SDValue V0 = N->getOperand(0);
- SDValue V1 = N->getOperand(1);
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
- // operands and changing the mask to 1. This saves us a bunch of
- // pattern-matching possibilities related to scalar math ops in SSE/AVX.
- // x86InstrInfo knows how to commute this back after instruction selection
- // if it would help register allocation.
-
- // TODO: If optimizing for size or a processor that doesn't suffer from
- // partial register update stalls, this should be transformed into a MOVSD
- // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
-
- if (VT == MVT::v2f64)
- if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
- if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
- SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
- return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
- }
-
- return SDValue();
-}
-
static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
// Gather and Scatter instructions use k-registers for masks. The type of
@@ -27840,6 +27923,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
+// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated.
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
@@ -27851,6 +27935,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGNR:
+ case X86ISD::BLENDI:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
@@ -27865,7 +27950,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
- case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG);
case ISD::MGATHER:
case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG);
}
@@ -27902,6 +27986,18 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
}
}
+/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
+/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
+/// we don't adjust the stack we clobber the first frame index.
+/// See X86InstrInfo::copyPhysReg.
+bool X86TargetLowering::hasCopyImplyingStackAdjustment(
+ MachineFunction *MF) const {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ return any_of(MRI.reg_instructions(X86::EFLAGS),
+ [](const MachineInstr &RI) { return RI.isCopy(); });
+}
+
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index a29dc9a..8bb0e5f 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -697,6 +697,10 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
+ /// Return true if the MachineFunction contains a COPY which would imply
+ /// HasOpaqueSPAdjustment.
+ bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 8bf2925..0a27c33 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2366,6 +2366,7 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
RegisterClass KRCSrc, Predicate prd> {
let Predicates = [prd] in {
+ let hasSideEffects = 0 in
def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
(ins KRC:$src1, KRC:$src2),
"kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 5d7283f..96a29ca 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -250,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1, AddedComplexity = 20 in
+ isPseudo = 1 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
@@ -263,7 +263,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
}
let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
- AddedComplexity = 15 in {
+ AddedComplexity = 1 in {
// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
// which only require 3 bytes compared to MOV32ri which requires 5.
let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
@@ -278,24 +278,12 @@ let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
}
-let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
-// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
-// FIXME: Add itinerary class and Schedule.
-def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
- [(set GR32:$dst, i32immSExt8:$src)]>,
- Requires<[OptForMinSize]>;
-def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
- [(set GR64:$dst, i64immSExt8:$src)]>,
- Requires<[OptForMinSize, NotWin64WithoutFP]>;
-}
-
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
-let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
- isCodeGenOnly = 1, hasSideEffects = 0 in
-def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
- "", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isPseudo = 1, hasSideEffects = 0 in
+def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>;
// This 64-bit pseudo-move can be used for both a 64-bit constant that is
// actually the zero-extension of a 32-bit constant and for labels in the
@@ -566,8 +554,8 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in {
// TODO: Get this to fold the constant into the instruction.
let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "or{l}\t{$zero, $dst|$dst, $zero}",
- [], IIC_ALU_MEM>, Requires<[Not64BitMode]>, LOCK,
+ "or{l}\t{$zero, $dst|$dst, $zero}", [],
+ IIC_ALU_MEM>, Requires<[Not64BitMode]>, OpSize32, LOCK,
Sched<[WriteALULd, WriteRMW]>;
let hasSideEffects = 1 in
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 63e78de..246804e 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
@@ -4453,7 +4452,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// such as TF/IF/DF, which LLVM doesn't model.
//
// Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - usesTheStack.
+ // first frame index.
+ // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment.
bool AXDead = (Reg == AX) ||
@@ -4465,6 +4465,10 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// (unnecessarily) saving+restoring a dead register. However the
// MachineVerifier expects operands that read from dead registers
// to be marked with the "undef" flag.
+ // An example of this can be found in
+ // test/CodeGen/X86/peephole-na-phys-copy-folding.ll and
+ // test/CodeGen/X86/cmpxchg-clobber-flags.ll when using
+ // -verify-machineinstrs.
BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
}
if (FromEFLAGS) {
@@ -5309,50 +5313,6 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
return true;
}
-bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
- MachineBasicBlock &MBB = *MIB->getParent();
- DebugLoc DL = MIB->getDebugLoc();
- int64_t Imm = MIB->getOperand(1).getImm();
- assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
- MachineBasicBlock::iterator I = MIB.getInstr();
-
- int StackAdjustment;
-
- if (Subtarget.is64Bit()) {
- assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
- MIB->getOpcode() == X86::MOV32ImmSExti8);
- // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
- // widen the register if necessary.
- StackAdjustment = 8;
- BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
- MIB->setDesc(get(X86::POP64r));
- MIB->getOperand(0)
- .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
- } else {
- assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
- StackAdjustment = 4;
- BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
- MIB->setDesc(get(X86::POP32r));
- }
-
- // Build CFI if necessary.
- MachineFunction &MF = *MBB.getParent();
- const X86FrameLowering *TFL = Subtarget.getFrameLowering();
- bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- bool NeedsDwarfCFI =
- !IsWin64Prologue &&
- (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
- bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
- if (EmitCFI) {
- TFL->BuildCFI(MBB, I, DL,
- MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
- TFL->BuildCFI(MBB, std::next(I), DL,
- MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
- }
-
- return true;
-}
-
// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@@ -5385,9 +5345,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
case X86::MOV32r_1:
return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
- case X86::MOV32ImmSExti8:
- case X86::MOV64ImmSExti8:
- return ExpandMOVImmSExti8(MIB);
case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:
@@ -5412,7 +5369,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
-
+ case X86::MOV32ri64:
+ MI->setDesc(get(X86::MOV32ri));
+ return true;
+
// KNL does not recognize dependency-breaking idioms for mask registers,
// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
// Using %k0 as the undef input register is a performance heuristic based
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index 9d40334..edd09d6 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -23,7 +23,6 @@
#include "X86GenInstrInfo.inc"
namespace llvm {
- class MachineInstrBuilder;
class X86RegisterInfo;
class X86Subtarget;
@@ -565,9 +564,6 @@ private:
/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const;
-
- /// Expand the MOVImmSExti8 pseudo-instructions.
- bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index f4ca2b8..ea8e562 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -822,8 +822,6 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">,
AssemblerPredicate<"Mode32Bit", "32-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
-def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
- "Subtarget->getFrameLowering()->hasFP(*MF)">;
def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -837,7 +835,6 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
def OptForSize : Predicate<"OptForSize">;
-def OptForMinSize : Predicate<"OptForMinSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
@@ -1093,6 +1090,32 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
}
+let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
+ SchedRW = [WriteRMW], Defs = [ESP] in {
+ let Uses = [ESP, EFLAGS] in
+ def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins),
+ [(set GR32:$dst, (int_x86_flags_read_u32))]>,
+ Requires<[Not64BitMode]>;
+
+ let Uses = [RSP, EFLAGS] in
+ def RDFLAGS64 : PseudoI<(outs GR64:$dst), (ins),
+ [(set GR64:$dst, (int_x86_flags_read_u64))]>,
+ Requires<[In64BitMode]>;
+}
+
+let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
+ SchedRW = [WriteRMW] in {
+ let Defs = [ESP, EFLAGS], Uses = [ESP] in
+ def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),
+ [(int_x86_flags_write_u32 GR32:$src)]>,
+ Requires<[Not64BitMode]>;
+
+ let Defs = [RSP, EFLAGS], Uses = [RSP] in
+ def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src),
+ [(int_x86_flags_write_u64 GR64:$src)]>,
+ Requires<[In64BitMode]>;
+}
+
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
SchedRW = [WriteLoad] in {
def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>,
@@ -1133,7 +1156,8 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
- "push{q}\t$imm", [], IIC_PUSH_IMM>, Requires<[In64BitMode]>;
+ "push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
+ Requires<[In64BitMode]>;
def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[In64BitMode]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index 11dc1e7..83f9b14 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -651,7 +651,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
// Misc.
let SchedRW = [WriteShuffle] in {
-let Uses = [EDI], Predicates = [HasSSE1,In32BitMode] in
+let Uses = [EDI], Predicates = [HasSSE1,Not64BitMode] in
def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMPX.td b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
index cf5e2e3..31608cd 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMPX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
@@ -63,8 +63,8 @@ def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src),
Requires<[HasMPX, In64BitMode]>;
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
- "bndstx \t{$src, $dst|$dst, $src}", []>, TB,
+ "bndstx \t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- "bndldx \t{$src, $dst|$dst, $src}", []>, TB,
- Requires<[HasMPX]>; \ No newline at end of file
+ "bndldx \t{$src, $dst|$dst, $src}", []>, PS,
+ Requires<[HasMPX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 7a44212..624b931 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1466,6 +1466,8 @@ def SSE_CVT_SD2SI : OpndItins<
IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
>;
+// FIXME: We probably want to match the rm form only when optimizing for
+// size, to avoid false depenendecies (see sse_fp_unop_s for details)
multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, OpndItins itins> {
@@ -1489,6 +1491,8 @@ let hasSideEffects = 0 in {
}
}
+// FIXME: We probably want to match the rm form only when optimizing for
+// size, to avoid false depenendecies (see sse_fp_unop_s for details)
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
let hasSideEffects = 0, Predicates = [UseAVX] in {
@@ -1626,6 +1630,8 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
+// FIXME: We probably want to match the rm form only when optimizing for
+// size, to avoid false depenendecies (see sse_fp_unop_s for details)
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
string asm, OpndItins itins> {
@@ -3387,9 +3393,18 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def : Pat<(Intr (load addr:$src)),
(vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
addr:$src), VR128))>;
- def : Pat<(Intr mem_cpat:$src),
- (!cast<Instruction>(NAME#Suffix##m_Int)
- (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
+ }
+ // We don't want to fold scalar loads into these instructions unless
+ // optimizing for size. This is because the folded instruction will have a
+ // partial register update, while the unfolded sequence will not, e.g.
+ // movss mem, %xmm0
+ // rcpss %xmm0, %xmm0
+ // which has a clobber before the rcp, vs.
+ // rcpss mem, %xmm0
+ let Predicates = [target, OptForSize] in {
+ def : Pat<(Intr mem_cpat:$src),
+ (!cast<Instruction>(NAME#Suffix##m_Int)
+ (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
}
}
@@ -3420,28 +3435,37 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
}
+ // We don't want to fold scalar loads into these instructions unless
+ // optimizing for size. This is because the folded instruction will have a
+ // partial register update, while the unfolded sequence will not, e.g.
+ // vmovss mem, %xmm0
+ // vrcpss %xmm0, %xmm0, %xmm0
+ // which has a clobber before the rcp, vs.
+ // vrcpss mem, %xmm0, %xmm0
+ // TODO: In theory, we could fold the load, and avoid the stall caused by
+ // the partial register store, either in ExeDepFix or with smarter RA.
let Predicates = [UseAVX] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
-
- def : Pat<(vt (OpNode mem_cpat:$src)),
- (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
- mem_cpat:$src)>;
-
}
let Predicates = [HasAVX] in {
def : Pat<(Intr VR128:$src),
(!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)),
VR128:$src)>;
-
- def : Pat<(Intr mem_cpat:$src),
- (!cast<Instruction>("V"#NAME#Suffix##m_Int)
+ }
+ let Predicates = [HasAVX, OptForSize] in {
+ def : Pat<(Intr mem_cpat:$src),
+ (!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
}
- let Predicates = [UseAVX, OptForSize] in
- def : Pat<(ScalarVT (OpNode (load addr:$src))),
- (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
- addr:$src)>;
+ let Predicates = [UseAVX, OptForSize] in {
+ def : Pat<(ScalarVT (OpNode (load addr:$src))),
+ (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
+ addr:$src)>;
+ def : Pat<(vt (OpNode mem_cpat:$src)),
+ (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
+ mem_cpat:$src)>;
+ }
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 85e17f5..a97d1e5 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -498,10 +498,10 @@ let Predicates = [HasXSAVE] in {
let Predicates = [HasXSAVEOPT] in {
def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
"xsaveopt\t$dst",
- [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB;
+ [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS;
def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
"xsaveopt64\t$dst",
- [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+ [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>;
}
let Predicates = [HasXSAVEC] in {
def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst),
@@ -551,10 +551,17 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
//==-----------------------------------------------------------------------===//
// PKU - enable protection key
+let usesCustomInserter = 1 in {
+ def WRPKRU : PseudoI<(outs), (ins GR32:$src),
+ [(int_x86_wrpkru GR32:$src)]>;
+ def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
+ [(set GR32:$dst, (int_x86_rdpkru))]>;
+}
+
let Defs = [EAX, EDX], Uses = [ECX] in
- def RDPKRU : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+ def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
let Uses = [EAX, ECX, EDX] in
- def WRPKRU : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
+ def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
//===----------------------------------------------------------------------===//
// FS/GS Base Instructions
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index dc6d85d..646b556 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1208,19 +1208,55 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_di_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_di_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_di_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psllv_d, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psllv_q, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_di_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_di_256, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_di_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_q, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_wi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_wi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_di_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_di_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_di_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
@@ -1229,6 +1265,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv16_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv2_di, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv32hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv4_di, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv4_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv8_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrlv8_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index e186f70..e1ca558 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -14,6 +14,7 @@
#include "X86AsmPrinter.h"
#include "X86RegisterInfo.h"
+#include "X86ShuffleDecodeConstantPool.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "Utils/X86ShuffleDecode.h"
@@ -454,10 +455,6 @@ ReSimplify:
"LEA has segment specified!");
break;
- case X86::MOV32ri64:
- OutMI.setOpcode(X86::MOV32ri);
- break;
-
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
// if one of the registers is extended, but other isn't.
case X86::VMOVZPQILo2PQIrr:
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
new file mode 100644
index 0000000..ef16c5b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -0,0 +1,190 @@
+//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics using
+// constants from the constant pool.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecodeConstantPool.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/IR/Constants.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ // It is not an error for the PSHUFB mask to not be a vector of i8 because the
+ // constant pool uniques constants by their bit representation.
+ // e.g. the following take up the same space in the constant pool:
+ // i128 -170141183420855150465331762880109871104
+ //
+ // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
+ //
+ // <4 x i32> <i32 -2147483648, i32 -2147483648,
+ // i32 -2147483648, i32 -2147483648>
+
+#ifndef NDEBUG
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+ assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
+#endif
+
+ // This is a straightforward byte vector.
+ if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
+ int NumElements = MaskTy->getVectorNumElements();
+ ShuffleMask.reserve(NumElements);
+
+ for (int i = 0; i < NumElements; ++i) {
+ // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+ // lane of the vector we're inside.
+ int Base = i & ~0xf;
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (Element & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (Element & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+ }
+ // TODO: Handle funny-looking vectors too.
+}
+
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ // It is not an error for the PSHUFB mask to not be a vector of i8 because the
+ // constant pool uniques constants by their bit representation.
+ // e.g. the following take up the same space in the constant pool:
+ // i128 -170141183420855150465331762880109871104
+ //
+ // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
+ //
+ // <4 x i32> <i32 -2147483648, i32 -2147483648,
+ // i32 -2147483648, i32 -2147483648>
+
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+
+ if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
+ return;
+
+ // Only support vector types.
+ if (!MaskTy->isVectorTy())
+ return;
+
+ // Make sure its an integer type.
+ Type *VecEltTy = MaskTy->getVectorElementType();
+ if (!VecEltTy->isIntegerTy())
+ return;
+
+ // Support any element type from byte up to element size.
+ // This is necesary primarily because 64-bit elements get split to 32-bit
+ // in the constant pool on 32-bit target.
+ unsigned EltTySize = VecEltTy->getIntegerBitWidth();
+ if (EltTySize < 8 || EltTySize > ElSize)
+ return;
+
+ unsigned NumElements = MaskTySize / ElSize;
+ assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
+ "Unexpected number of vector elements.");
+ ShuffleMask.reserve(NumElements);
+ unsigned NumElementsPerLane = 128 / ElSize;
+ unsigned Factor = ElSize / EltTySize;
+
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i * Factor);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ int Index = i & ~(NumElementsPerLane - 1);
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ if (ElSize == 64)
+ Index += (Element >> 1) & 0x1;
+ else
+ Index += Element & 0x3;
+ ShuffleMask.push_back(Index);
+ }
+
+ // TODO: Handle funny-looking vectors too.
+}
+
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ if (MaskTy->isVectorTy()) {
+ unsigned NumElements = MaskTy->getVectorNumElements();
+ if (NumElements == VT.getVectorNumElements()) {
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
+ ShuffleMask.clear();
+ return;
+ }
+ if (isa<UndefValue>(COp))
+ ShuffleMask.push_back(SM_SentinelUndef);
+ else {
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ Element &= (1 << NumElements) - 1;
+ ShuffleMask.push_back(Element);
+ }
+ }
+ }
+ return;
+ }
+ // Scalar value; just broadcast it
+ if (!isa<ConstantInt>(C))
+ return;
+ uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
+ int NumElements = VT.getVectorNumElements();
+ Element &= (1 << NumElements) - 1;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask.push_back(Element);
+}
+
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ unsigned NumElements = MaskTy->getVectorNumElements();
+ if (NumElements == VT.getVectorNumElements()) {
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ }
+ if (isa<UndefValue>(COp))
+ ShuffleMask.push_back(SM_SentinelUndef);
+ else {
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ Element &= (1 << NumElements*2) - 1;
+ ShuffleMask.push_back(Element);
+ }
+ }
+ }
+}
+} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
new file mode 100644
index 0000000..bcf4632
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -0,0 +1,45 @@
+//===-- X86ShuffleDecodeConstantPool.h - X86 shuffle decode -----*-C++-*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics using
+// constants from the constant pool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H
+#define LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H
+
+#include "llvm/ADT/SmallVector.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+class Constant;
+class MVT;
+
+/// \brief Decode a PSHUFB mask from an IR-level vector constant.
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index d02c861..4295a75 100644
--- a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -10,6 +10,7 @@
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
@@ -21,10 +22,12 @@ using namespace llvm;
STATISTIC(NumReadNone, "Number of functions inferred as readnone");
STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
static bool setDoesNotAccessMemory(Function &F) {
if (F.doesNotAccessMemory())
@@ -42,6 +45,15 @@ static bool setOnlyReadsMemory(Function &F) {
return true;
}
+static bool setOnlyAccessesArgMemory(Function &F) {
+ if (F.onlyAccessesArgMemory())
+ return false;
+ F.setOnlyAccessesArgMemory ();
+ ++NumArgMemOnly;
+ return true;
+}
+
+
static bool setDoesNotThrow(Function &F) {
if (F.doesNotThrow())
return false;
@@ -74,6 +86,17 @@ static bool setDoesNotAlias(Function &F, unsigned n) {
return true;
}
+static bool setNonNull(Function &F, unsigned n) {
+ assert((n != AttributeSet::ReturnIndex ||
+ F.getReturnType()->isPointerTy()) &&
+ "nonnull applies only to pointers");
+ if (F.getAttributes().hasAttribute(n, Attribute::NonNull))
+ return false;
+ F.addAttribute(n, Attribute::NonNull);
+ ++NumNonNull;
+ return true;
+}
+
/// Analyze the name and prototype of the given function and set any applicable
/// attributes.
///
@@ -89,7 +112,6 @@ static bool inferPrototypeAttributes(Function &F,
return false;
bool Changed = false;
-
switch (TheLibFunc) {
case LibFunc::strlen:
if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
@@ -873,6 +895,35 @@ static bool inferPrototypeAttributes(Function &F,
Changed |= setDoesNotCapture(F, 2);
return Changed;
+ case LibFunc::Znwj: // new(unsigned int)
+ case LibFunc::Znwm: // new(unsigned long)
+ case LibFunc::Znaj: // new[](unsigned int)
+ case LibFunc::Znam: // new[](unsigned long)
+ case LibFunc::msvc_new_int: // new(unsigned int)
+ case LibFunc::msvc_new_longlong: // new(unsigned long long)
+ case LibFunc::msvc_new_array_int: // new[](unsigned int)
+ case LibFunc::msvc_new_array_longlong: // new[](unsigned long long)
+ if (FTy->getNumParams() != 1)
+ return false;
+ // Operator new always returns a nonnull noalias pointer
+ Changed |= setNonNull(F, AttributeSet::ReturnIndex);
+ Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex);
+ return Changed;
+
+ //TODO: add LibFunc entries for:
+ //case LibFunc::memset_pattern4:
+ //case LibFunc::memset_pattern8:
+ case LibFunc::memset_pattern16:
+ if (FTy->isVarArg() || FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)) ||
+ !isa<IntegerType>(FTy->getParamType(2)))
+ return false;
+
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+
default:
// FIXME: It'd be really nice to cover all the library functions we're
// aware of here.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e3634f2..090245d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1747,8 +1747,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Translate facts known about a pointer before relocating into
// facts about the relocate value, while being careful to
// preserve relocation semantics.
- GCRelocateOperands Operands(II);
- Value *DerivedPtr = Operands.getDerivedPtr();
+ Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
auto *GCRelocateType = cast<PointerType>(II->getType());
// Remove the relocation if unused, note that this check is required
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index da835a1..0f01d18 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -591,19 +591,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
- (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+ (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
if (!DoXform) return ICI;
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
- In->getType()->getScalarSizeInBits()-1);
- In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+ In->getType()->getScalarSizeInBits() - 1);
+ In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit");
if (In->getType() != CI.getType())
In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
- In = Builder->CreateXor(In, One, In->getName()+".not");
+ In = Builder->CreateXor(In, One, In->getName() + ".not");
}
return ReplaceInstUsesWith(CI, In);
@@ -639,13 +639,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
return ReplaceInstUsesWith(CI, Res);
}
- uint32_t ShiftAmt = KnownZeroMask.logBase2();
+ uint32_t ShAmt = KnownZeroMask.logBase2();
Value *In = ICI->getOperand(0);
- if (ShiftAmt) {
+ if (ShAmt) {
// Perform a logical shr by shiftamt.
// Insert the shift to put the result in the low bit.
- In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
- In->getName()+".lobit");
+ In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
+ In->getName() + ".lobit");
}
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 534f670..e4e5065 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -414,7 +414,7 @@ public:
/// \brief A combiner-aware RAUW-like routine.
///
/// This method is to be used when an instruction is found to be dead,
- /// replacable with another preexisting expression. Here we add all uses of
+ /// replaceable with another preexisting expression. Here we add all uses of
/// I to the worklist, replace all uses of I with the new value, then return
/// I, so that the inst combiner will know that I was modified.
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index e25639a..54a9fbd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -383,15 +383,28 @@ static void replaceExtractElements(InsertElementInst *InsElt,
auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
ConstantVector::get(ExtendMask));
- // Replace all extracts from the original narrow vector with extracts from
- // the new wide vector.
- WideVec->insertBefore(ExtElt);
+ // Insert the new shuffle after the vector operand of the extract is defined
+ // or at the start of the basic block, so any subsequent extracts can use it.
+ bool ReplaceAllExtUsers;
+ if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) {
+ WideVec->insertAfter(ExtVecOpInst);
+ ReplaceAllExtUsers = true;
+ } else {
+ // TODO: Insert at start of function, so it's always safe to replace all?
+ IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
+ ReplaceAllExtUsers = false;
+ }
+
+ // Replace extracts from the original narrow vector with extracts from the new
+ // wide vector.
for (User *U : ExtVecOp->users()) {
- if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) {
- auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
- NewExt->insertAfter(WideVec);
- IC.ReplaceInstUsesWith(*OldExt, NewExt);
- }
+ ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
+ if (!OldExt ||
+ (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent()))
+ continue;
+ auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
+ NewExt->insertAfter(WideVec);
+ IC.ReplaceInstUsesWith(*OldExt, NewExt);
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7c46cfd..903a0b5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3021,7 +3021,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (Inst->isEHPad()) {
+ if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
EndInst = Inst;
continue;
}
@@ -3029,8 +3029,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
++NumDeadInst;
MadeIRChange = true;
}
- if (!Inst->getType()->isTokenTy())
- Inst->eraseFromParent();
+ Inst->eraseFromParent();
}
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 92e41ee..51ff95d 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -234,16 +234,14 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
}
void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
- CoverageData->setSection(getCoverageSection());
- CoverageData->setAlignment(8);
Constant *Init = CoverageData->getInitializer();
- // We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] }
+ // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] }
// for some C. If not, the frontend's given us something broken.
- assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map");
- assert(isa<ConstantArray>(Init->getAggregateElement(4)) &&
+ assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map");
+ assert(isa<ConstantArray>(Init->getAggregateElement(1)) &&
"invalid function list in coverage map");
- ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(4));
+ ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1));
for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
Constant *Record = Records->getOperand(I);
Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 6d70cdc..e01e23f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -75,10 +75,12 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
cl::desc("Disable memory promotion in LICM pass"));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop);
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo);
static bool hoist(Instruction &I, BasicBlock *Preheader);
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
- const Loop *CurLoop, AliasSetTracker *CurAST );
+ const Loop *CurLoop, AliasSetTracker *CurAST,
+ const LICMSafetyInfo *SafetyInfo);
static bool isGuaranteedToExecute(const Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
@@ -92,10 +94,10 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
const AAMDNodes &AAInfo,
AliasSetTracker *CurAST);
-static Instruction *CloneInstructionInExitBlock(const Instruction &I,
- BasicBlock &ExitBlock,
- PHINode &PN,
- const LoopInfo *LI);
+static Instruction *
+CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
+ const LoopInfo *LI,
+ const LICMSafetyInfo *SafetyInfo);
static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
DominatorTree *DT, TargetLibraryInfo *TLI,
Loop *CurLoop, AliasSetTracker *CurAST,
@@ -348,10 +350,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- if (isNotUsedInLoop(I, CurLoop) &&
+ if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) {
++II;
- Changed |= sink(I, LI, DT, CurLoop, CurAST);
+ Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo);
}
}
return Changed;
@@ -432,6 +434,14 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
(I != E) && !SafetyInfo->MayThrow; ++I)
SafetyInfo->MayThrow |= I->mayThrow();
+
+ // Compute funclet colors if we might sink/hoist in a function with a funclet
+ // personality routine.
+ Function *Fn = CurLoop->getHeader()->getParent();
+ if (Fn->hasPersonalityFn())
+ if (Constant *PersonalityFn = Fn->getPersonalityFn())
+ if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)))
+ SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
@@ -466,6 +476,10 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
if (isa<DbgInfoIntrinsic>(I))
return false;
+ // Don't sink calls which can throw.
+ if (CI->mayThrow())
+ return false;
+
// Handle simple cases by querying alias analysis.
FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == FMRB_DoesNotAccessMemory)
@@ -534,10 +548,24 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
/// the loop. If this is true, we can sink the instruction to the exit
/// blocks of the loop.
///
-static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) {
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo) {
+ const auto &BlockColors = SafetyInfo->BlockColors;
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
+ const BasicBlock *BB = PN->getParent();
+ // We cannot sink uses in catchswitches.
+ if (isa<CatchSwitchInst>(BB->getTerminator()))
+ return false;
+
+ // We need to sink a callsite to a unique funclet. Avoid sinking if the
+ // phi use is too muddled.
+ if (isa<CallInst>(I))
+ if (!BlockColors.empty() &&
+ BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
+ return false;
+
// A PHI node where all of the incoming values are this instruction are
// special -- they can just be RAUW'ed with the instruction and thus
// don't require a use in the predecessor. This is a particular important
@@ -565,11 +593,41 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) {
return true;
}
-static Instruction *CloneInstructionInExitBlock(const Instruction &I,
- BasicBlock &ExitBlock,
- PHINode &PN,
- const LoopInfo *LI) {
- Instruction *New = I.clone();
+static Instruction *
+CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
+ const LoopInfo *LI,
+ const LICMSafetyInfo *SafetyInfo) {
+ Instruction *New;
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ const auto &BlockColors = SafetyInfo->BlockColors;
+
+ // Sinking call-sites need to be handled differently from other
+ // instructions. The cloned call-site needs a funclet bundle operand
+ // appropriate for it's location in the CFG.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
+ BundleIdx != BundleEnd; ++BundleIdx) {
+ OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
+ if (Bundle.getTagID() == LLVMContext::OB_funclet)
+ continue;
+
+ OpBundles.emplace_back(Bundle);
+ }
+
+ if (!BlockColors.empty()) {
+ const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
+ assert(CV.size() == 1 && "non-unique color for exit block!");
+ BasicBlock *BBColor = CV.front();
+ Instruction *EHPad = BBColor->getFirstNonPHI();
+ if (EHPad->isEHPad())
+ OpBundles.emplace_back("funclet", EHPad);
+ }
+
+ New = CallInst::Create(CI, OpBundles);
+ } else {
+ New = I.clone();
+ }
+
ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
if (!I.getName().empty()) New->setName(I.getName() + ".le");
@@ -601,7 +659,8 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I,
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
- const Loop *CurLoop, AliasSetTracker *CurAST ) {
+ const Loop *CurLoop, AliasSetTracker *CurAST,
+ const LICMSafetyInfo *SafetyInfo) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
bool Changed = false;
if (isa<LoadInst>(I)) ++NumMovedLoads;
@@ -652,7 +711,7 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
New = It->second;
else
New = SunkCopies[ExitBlock] =
- CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI);
+ CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo);
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
@@ -950,6 +1009,21 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
if (!GuaranteedToExecute)
return Changed;
+ // Figure out the loop exits and their insertion points, if this is the
+ // first promotion.
+ if (ExitBlocks.empty()) {
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ InsertPts.clear();
+ InsertPts.reserve(ExitBlocks.size());
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ }
+
+ // Can't insert into a catchswitch.
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (isa<CatchSwitchInst>(ExitBlock->getTerminator()))
+ return Changed;
+
// Otherwise, this is safe to promote, lets do it!
DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
Changed = true;
@@ -961,15 +1035,6 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// location is better than none.
DebugLoc DL = LoopUses[0]->getDebugLoc();
- // Figure out the loop exits and their insertion points, if this is the
- // first promotion.
- if (ExitBlocks.empty()) {
- CurLoop->getUniqueExitBlocks(ExitBlocks);
- InsertPts.resize(ExitBlocks.size());
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt();
- }
-
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 2d577de..4521640 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -108,7 +108,11 @@ public:
private:
typedef SmallVector<StoreInst *, 8> StoreList;
- StoreList StoreRefs;
+ StoreList StoreRefsForMemset;
+ StoreList StoreRefsForMemcpy;
+ bool HasMemset;
+ bool HasMemsetPattern;
+ bool HasMemcpy;
/// \name Countable Loop Idiom Handling
/// @{
@@ -118,17 +122,15 @@ private:
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
- bool isLegalStore(StoreInst *SI);
+ bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *SplatValue,
+ unsigned StoreAlignment, Value *StoredVal,
Instruction *TheStore, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride);
- bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEV *BECount, bool NegStride);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
/// @}
/// \name Noncountable Loop Idiom Handling
@@ -207,8 +209,13 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
*CurLoop->getHeader()->getParent());
DL = &CurLoop->getHeader()->getModule()->getDataLayout();
- if (SE->hasLoopInvariantBackedgeTakenCount(L))
- return runOnCountableLoop();
+ HasMemset = TLI->has(LibFunc::memset);
+ HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
+ HasMemcpy = TLI->has(LibFunc::memcpy);
+
+ if (HasMemset || HasMemsetPattern || HasMemcpy)
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
return runOnNoncountableLoop();
}
@@ -297,7 +304,8 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
}
-bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
+ bool &ForMemcpy) {
// Don't touch volatile stores.
if (!SI->isSimple())
return false;
@@ -322,22 +330,86 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
return false;
- return true;
+ // See if the store can be turned into a memset.
+
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = nullptr;
+
+ // If we're allowed to form a memset, and the stored value would be
+ // acceptable for memset, use it.
+ if (HasMemset && SplatValue &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // It looks like we can use SplatValue.
+ ForMemset = true;
+ return true;
+ } else if (HasMemsetPattern &&
+ // Don't create memset_pattern16s with address spaces.
+ StorePtr->getType()->getPointerAddressSpace() == 0 &&
+ (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
+ // It looks like we can use PatternValue!
+ ForMemset = true;
+ return true;
+ }
+
+ // Otherwise, see if the store can be turned into a memcpy.
+ if (HasMemcpy) {
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ if (StoreSize != Stride && StoreSize != -Stride)
+ return false;
+
+ // The store must be feeding a non-volatile load.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || !LI->isSimple())
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ // The store and load must share the same stride.
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+ return false;
+
+ // Success. This store can be converted into a memcpy.
+ ForMemcpy = true;
+ return true;
+ }
+ // This store can't be transformed into a memset/memcpy.
+ return false;
}
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
- StoreRefs.clear();
+ StoreRefsForMemset.clear();
+ StoreRefsForMemcpy.clear();
for (Instruction &I : *BB) {
StoreInst *SI = dyn_cast<StoreInst>(&I);
if (!SI)
continue;
+ bool ForMemset = false;
+ bool ForMemcpy = false;
// Make sure this is a strided store with a constant stride.
- if (!isLegalStore(SI))
+ if (!isLegalStore(SI, ForMemset, ForMemcpy))
continue;
// Save the store locations.
- StoreRefs.push_back(SI);
+ if (ForMemset)
+ StoreRefsForMemset.push_back(SI);
+ else if (ForMemcpy)
+ StoreRefsForMemcpy.push_back(SI);
}
}
@@ -357,9 +429,15 @@ bool LoopIdiomRecognize::runOnLoopBlock(
bool MadeChange = false;
// Look for store instructions, which may be optimized to memset/memcpy.
collectStores(BB);
- for (auto &SI : StoreRefs)
+
+ // Look for a single store which can be optimized into a memset.
+ for (auto &SI : StoreRefsForMemset)
MadeChange |= processLoopStore(SI, BECount);
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ for (auto &SI : StoreRefsForMemcpy)
+ MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
+
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
// Look for memset instructions, which may be optimized to a larger memset.
@@ -380,7 +458,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(
return MadeChange;
}
-/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+/// processLoopStore - See if this store can be promoted to a memset.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
assert(SI->isSimple() && "Expected only non-volatile stores.");
@@ -398,12 +476,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
bool NegStride = StoreSize == -Stride;
// See if we can optimize just this store in isolation.
- if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount, NegStride))
- return true;
-
- // Optimize the store into a memcpy, if it feeds an similarly strided load.
- return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride);
+ return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount, NegStride);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
@@ -440,8 +514,14 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
if (!Stride || MSI->getLength() != Stride->getValue())
return false;
+ // Verify that the memset value is loop invariant. If not, we can't promote
+ // the memset.
+ Value *SplatValue = MSI->getValue();
+ if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
+ return false;
+
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), MSI->getValue(), MSI, Ev,
+ MSI->getAlignment(), SplatValue, MSI, Ev,
BECount, /*NegStride=*/false);
}
@@ -496,37 +576,19 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride) {
-
- // If the stored value is a byte-wise value (like i32 -1), then it may be
- // turned into a memset of i8 -1, assuming that all the consecutive bytes
- // are stored. A store of i32 0x01020304 can never be turned into a memset,
- // but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
- unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
- // If we're allowed to form a memset, and the stored value would be acceptable
- // for memset, use it.
- if (SplatValue && TLI->has(LibFunc::memset) &&
- // Verify that the stored value is loop invariant. If not, we can't
- // promote the memset.
- CurLoop->isLoopInvariant(SplatValue)) {
- // Keep and use SplatValue.
- PatternValue = nullptr;
- } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) &&
- (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
- // Don't create memset_pattern16s with address spaces.
- // It looks like we can use PatternValue!
- SplatValue = nullptr;
- } else {
- // Otherwise, this isn't an idiom we can transform. For example, we can't
- // do anything with a 3-byte store.
- return false;
- }
+ if (!SplatValue)
+ PatternValue = getMemSetPatternValue(StoredVal, DL);
+
+ assert((SplatValue || PatternValue) &&
+ "Expected either splat value or pattern value.");
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. This allows us to insert code for it in the preheader.
+ unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
@@ -608,29 +670,25 @@ bool LoopIdiomRecognize::processLoopStridedStore(
/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy. This kicks in for stuff like
/// for (i) A[i] = B[i];
-bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
- StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
- const SCEV *BECount, bool NegStride) {
- // If we're not allowed to form memcpy, we fail.
- if (!TLI->has(LibFunc::memcpy))
- return false;
+bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
+ const SCEV *BECount) {
+ assert(SI->isSimple() && "Expected only non-volatile stores.");
+
+ Value *StorePtr = SI->getPointerOperand();
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ bool NegStride = StoreSize == -Stride;
// The store must be feeding a non-volatile load.
- LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
- if (!LI || !LI->isSimple())
- return false;
+ LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+ assert(LI->isSimple() && "Expected only non-volatile stores.");
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
// random load we can't handle.
const SCEVAddRecExpr *LoadEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
- if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
- return false;
-
- // The store and load must share the same stride.
- if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
- return false;
+ cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0333bf2..7354016 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -481,6 +481,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
return AMemSet;
}
+static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
+ const LoadInst *LI) {
+ unsigned StoreAlign = SI->getAlignment();
+ if (!StoreAlign)
+ StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
+ unsigned LoadAlign = LI->getAlignment();
+ if (!LoadAlign)
+ LoadAlign = DL.getABITypeAlignment(LI->getType());
+
+ return std::min(StoreAlign, LoadAlign);
+}
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
@@ -496,12 +507,84 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
const DataLayout &DL = SI->getModule()->getDataLayout();
- // Detect cases where we're performing call slot forwarding, but
- // happen to be using a load-store pair to implement it, rather than
- // a memcpy.
+ // Load to store forwarding can be interpreted as memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
+
+ auto *T = LI->getType();
+ if (T->isAggregateType()) {
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ MemoryLocation LoadLoc = MemoryLocation::get(LI);
+
+ // We use alias analysis to check if an instruction may store to
+ // the memory we load from in between the load and the store. If
+ // such an instruction is found, we try to promote there instead
+ // of at the store position.
+ Instruction *P = SI;
+ for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator();
+ I != E; ++I) {
+ if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod))
+ continue;
+
+ // We found an instruction that may write to the loaded memory.
+ // We can try to promote at this position instead of the store
+ // position if nothing alias the store memory after this.
+ P = &*I;
+ for (; I != E; ++I) {
+ MemoryLocation StoreLoc = MemoryLocation::get(SI);
+ if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
+ DEBUG(dbgs() << "Alias " << *I << "\n");
+ P = nullptr;
+ break;
+ }
+ }
+
+ break;
+ }
+
+ // If a valid insertion position is found, then we can promote
+ // the load/store pair to a memcpy.
+ if (P) {
+ // If we load from memory that may alias the memory we store to,
+ // memmove must be used to preserve semantic. If not, memcpy can
+ // be used.
+ bool UseMemMove = false;
+ if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ UseMemMove = true;
+
+ unsigned Align = findCommonAlignment(DL, SI, LI);
+ uint64_t Size = DL.getTypeStoreSize(T);
+
+ IRBuilder<> Builder(P);
+ Instruction *M;
+ if (UseMemMove)
+ M = Builder.CreateMemMove(SI->getPointerOperand(),
+ LI->getPointerOperand(), Size,
+ Align, SI->isVolatile());
+ else
+ M = Builder.CreateMemCpy(SI->getPointerOperand(),
+ LI->getPointerOperand(), Size,
+ Align, SI->isVolatile());
+
+ DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
+ << " => " << *M << "\n");
+
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ MD->removeInstruction(LI);
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+
+ // Make sure we do not invalidate the iterator.
+ BBI = M->getIterator();
+ return true;
+ }
+ }
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
MemDepResult ldep = MD->getDependency(LI);
CallInst *C = nullptr;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -522,18 +605,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
if (C) {
- unsigned storeAlign = SI->getAlignment();
- if (!storeAlign)
- storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
- unsigned loadAlign = LI->getAlignment();
- if (!loadAlign)
- loadAlign = DL.getABITypeAlignment(LI->getType());
-
bool changed = performCallSlotOptzn(
LI, SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- std::min(storeAlign, loadAlign), C);
+ findCommonAlignment(DL, SI, LI), C);
if (changed) {
MD->removeInstruction(SI);
SI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index fb970c7..401a740 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -183,6 +183,8 @@ namespace {
Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
void EraseInst(Instruction *I);
+ void RecursivelyEraseDeadInsts(Instruction *I,
+ SetVector<AssertingVH<Instruction>> &Insts);
void OptimizeInst(Instruction *I);
Instruction *canonicalizeNegConstExpr(Instruction *I);
};
@@ -1926,6 +1928,22 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
return nullptr;
}
+// Remove dead instructions and if any operands are trivially dead add them to
+// Insts so they will be removed as well.
+void Reassociate::RecursivelyEraseDeadInsts(
+ Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) {
+ assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
+ SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end());
+ ValueRankMap.erase(I);
+ Insts.remove(I);
+ RedoInsts.remove(I);
+ I->eraseFromParent();
+ for (auto Op : Ops)
+ if (Instruction *OpInst = dyn_cast<Instruction>(Op))
+ if (OpInst->use_empty())
+ Insts.insert(OpInst);
+}
+
/// Zap the given instruction, adding interesting operands to the work list.
void Reassociate::EraseInst(Instruction *I) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
@@ -2255,7 +2273,21 @@ bool Reassociate::runOnFunction(Function &F) {
++II;
}
- // If this produced extra instructions to optimize, handle them now.
+ // Make a copy of all the instructions to be redone so we can remove dead
+ // instructions.
+ SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
+ // Iterate over all instructions to be reevaluated and remove trivially dead
+ // instructions. If any operand of the trivially dead instruction becomes
+ // dead mark it for deletion as well. Continue this process until all
+ // trivially dead instructions have been removed.
+ while (!ToRedo.empty()) {
+ Instruction *I = ToRedo.pop_back_val();
+ if (isInstructionTriviallyDead(I))
+ RecursivelyEraseDeadInsts(I, ToRedo);
+ }
+
+ // Now that we have removed dead instructions, we can reoptimize the
+ // remaining instructions.
while (!RedoInsts.empty()) {
Instruction *I = RedoInsts.pop_back_val();
if (isInstructionTriviallyDead(I))
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index db127c3..5d253be 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -428,30 +428,15 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// We should have never reached here if this argument isn't an gc value
return BaseDefiningValueResult(I, true);
- if (isa<GlobalVariable>(I))
- // base case
+ if (isa<Constant>(I))
+ // We assume that objects with a constant base (e.g. a global) can't move
+ // and don't need to be reported to the collector because they are always
+ // live. All constants have constant bases. Besides global references, all
+ // kinds of constants (e.g. undef, constant expressions, null pointers) can
+ // be introduced by the inliner or the optimizer, especially on dynamically
+ // dead paths. See e.g. test4 in constants.ll.
return BaseDefiningValueResult(I, true);
- // inlining could possibly introduce phi node that contains
- // undef if callee has multiple returns
- if (isa<UndefValue>(I))
- // utterly meaningless, but useful for dealing with
- // partially optimized code.
- return BaseDefiningValueResult(I, true);
-
- // Due to inheritance, this must be _after_ the global variable and undef
- // checks
- if (isa<Constant>(I)) {
- assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
- "order of checks wrong!");
- // Note: Even for frontends which don't have constant references, we can
- // see constants appearing after optimizations. A simple example is
- // specialization of an address computation on null feeding into a merge
- // point where the actual use of the now-constant input is protected by
- // another null check. (e.g. test4 in constants.ll)
- return BaseDefiningValueResult(I, true);
- }
-
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
// If stripping pointer casts changes the address space there is an
@@ -1642,33 +1627,24 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseSet<Value *> &VisitedLiveValues) {
for (User *U : GCRelocs) {
- if (!isa<IntrinsicInst>(U))
+ GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
+ if (!Relocate)
continue;
- IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U);
-
- // We only care about relocates
- if (RelocatedValue->getIntrinsicID() !=
- Intrinsic::experimental_gc_relocate) {
- continue;
- }
-
- GCRelocateOperands RelocateOperands(RelocatedValue);
- Value *OriginalValue =
- const_cast<Value *>(RelocateOperands.getDerivedPtr());
+ Value *OriginalValue = const_cast<Value *>(Relocate->getDerivedPtr());
assert(AllocaMap.count(OriginalValue));
Value *Alloca = AllocaMap[OriginalValue];
// Emit store into the related alloca
// All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
// the correct type according to alloca.
- assert(RelocatedValue->getNextNode() &&
+ assert(Relocate->getNextNode() &&
"Should always have one since it's not a terminator");
- IRBuilder<> Builder(RelocatedValue->getNextNode());
+ IRBuilder<> Builder(Relocate->getNextNode());
Value *CastedRelocatedValue =
- Builder.CreateBitCast(RelocatedValue,
+ Builder.CreateBitCast(Relocate,
cast<AllocaInst>(Alloca)->getAllocatedType(),
- suffixed_name_or(RelocatedValue, ".casted", ""));
+ suffixed_name_or(Relocate, ".casted", ""));
StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 0914699..42287d3 100644
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -74,17 +74,13 @@ namespace llvm {
// insertFastDiv - Substitutes the div/rem instruction with code that checks the
// value of the operands and uses a shorter-faster div/rem instruction when
// possible and the longer-slower div/rem instruction otherwise.
-static bool insertFastDiv(Function &F,
- Function::iterator &I,
- BasicBlock::iterator &J,
- IntegerType *BypassType,
- bool UseDivOp,
- bool UseSignedOp,
+static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
+ bool UseDivOp, bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
+ Function *F = I->getParent()->getParent();
// Get instruction operands
- Instruction *Instr = &*J;
- Value *Dividend = Instr->getOperand(0);
- Value *Divisor = Instr->getOperand(1);
+ Value *Dividend = I->getOperand(0);
+ Value *Divisor = I->getOperand(1);
if (isa<ConstantInt>(Divisor) ||
(isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
@@ -94,13 +90,12 @@ static bool insertFastDiv(Function &F,
}
// Basic Block is split before divide
- BasicBlock *MainBB = &*I;
- BasicBlock *SuccessorBB = I->splitBasicBlock(J);
- ++I; //advance iterator I to successorBB
+ BasicBlock *MainBB = &*I->getParent();
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
// Add new basic block for slow divide operation
- BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
- MainBB->getParent(), SuccessorBB);
+ BasicBlock *SlowBB =
+ BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
SlowBB->moveBefore(SuccessorBB);
IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
Value *SlowQuotientV;
@@ -115,8 +110,8 @@ static bool insertFastDiv(Function &F,
SlowBuilder.CreateBr(SuccessorBB);
// Add new basic block for fast divide operation
- BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
- MainBB->getParent(), SuccessorBB);
+ BasicBlock *FastBB =
+ BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
FastBB->moveBefore(SlowBB);
IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
@@ -139,19 +134,19 @@ static bool insertFastDiv(Function &F,
// Phi nodes for result of div and rem
IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
- PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
QuoPhi->addIncoming(SlowQuotientV, SlowBB);
QuoPhi->addIncoming(FastQuotientV, FastBB);
- PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
RemPhi->addIncoming(SlowRemainderV, SlowBB);
RemPhi->addIncoming(FastRemainderV, FastBB);
- // Replace Instr with appropriate phi node
+ // Replace I with appropriate phi node
if (UseDivOp)
- Instr->replaceAllUsesWith(QuoPhi);
+ I->replaceAllUsesWith(QuoPhi);
else
- Instr->replaceAllUsesWith(RemPhi);
- Instr->eraseFromParent();
+ I->replaceAllUsesWith(RemPhi);
+ I->eraseFromParent();
// Combine operands into a single value with OR for value testing below
MainBB->getInstList().back().eraseFromParent();
@@ -168,9 +163,6 @@ static bool insertFastDiv(Function &F,
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
- // point iterator J at first instruction of successorBB
- J = I->begin();
-
// Cache phi nodes to be used later in place of other instances
// of div or rem with the same sign, dividend, and divisor
DivOpInfo Key(UseSignedOp, Dividend, Divisor);
@@ -179,57 +171,54 @@ static bool insertFastDiv(Function &F,
return true;
}
-// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
-// operands and operation are identical. Otherwise call insertFastDiv to perform
-// the optimization and cache the resulting dividend and remainder.
-static bool reuseOrInsertFastDiv(Function &F,
- Function::iterator &I,
- BasicBlock::iterator &J,
- IntegerType *BypassType,
- bool UseDivOp,
- bool UseSignedOp,
+// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
+// the current BB if operands and operation are identical. Otherwise calls
+// insertFastDiv to perform the optimization and caches the resulting dividend
+// and remainder.
+static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
+ bool UseDivOp, bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
- Instruction *Instr = &*J;
- DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
+ DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
if (CacheI == PerBBDivCache.end()) {
// If previous instance does not exist, insert fast div
- return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
- PerBBDivCache);
+ return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
}
// Replace operation value with previously generated phi node
DivPhiNodes &Value = CacheI->second;
if (UseDivOp) {
// Replace all uses of div instruction with quotient phi node
- J->replaceAllUsesWith(Value.Quotient);
+ I->replaceAllUsesWith(Value.Quotient);
} else {
// Replace all uses of rem instruction with remainder phi node
- J->replaceAllUsesWith(Value.Remainder);
+ I->replaceAllUsesWith(Value.Remainder);
}
- // Advance to next operation
- ++J;
-
// Remove redundant operation
- Instr->eraseFromParent();
+ I->eraseFromParent();
return true;
}
-// bypassSlowDivision - This optimization identifies DIV instructions that can
-// be profitably bypassed and carried out with a shorter, faster divide.
-bool llvm::bypassSlowDivision(Function &F,
- Function::iterator &I,
- const DenseMap<unsigned int, unsigned int> &BypassWidths) {
+// bypassSlowDivision - This optimization identifies DIV instructions in a BB
+// that can be profitably bypassed and carried out with a shorter, faster
+// divide.
+bool llvm::bypassSlowDivision(
+ BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
DivCacheTy DivCache;
bool MadeChange = false;
- for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
+ Instruction* Next = &*BB->begin();
+ while (Next != nullptr) {
+ // We may add instructions immediately after I, but we want to skip over
+ // them.
+ Instruction* I = Next;
+ Next = Next->getNextNode();
// Get instruction details
- unsigned Opcode = J->getOpcode();
+ unsigned Opcode = I->getOpcode();
bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
bool UseSignedOp = Opcode == Instruction::SDiv ||
@@ -240,11 +229,11 @@ bool llvm::bypassSlowDivision(Function &F,
continue;
// Skip division on vector types, only optimize integer instructions
- if (!J->getType()->isIntegerTy())
+ if (!I->getType()->isIntegerTy())
continue;
// Get bitwidth of div/rem instruction
- IntegerType *T = cast<IntegerType>(J->getType());
+ IntegerType *T = cast<IntegerType>(I->getType());
unsigned int bitwidth = T->getBitWidth();
// Continue if bitwidth is not bypassed
@@ -253,10 +242,9 @@ bool llvm::bypassSlowDivision(Function &F,
continue;
// Get type for div/rem instruction with bypass bitwidth
- IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
+ IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
- MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
- UseSignedOp, DivCache);
+ MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
}
return MadeChange;
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index e75163f..0e386ac 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -1305,8 +1305,9 @@ static bool markAliveBlocks(Function &F,
}
}
- // Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ TerminatorInst *Terminator = BB->getTerminator();
+ if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
Value *Callee = II->getCalledValue();
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
changeToUnreachable(II, true);
@@ -1321,6 +1322,44 @@ static bool markAliveBlocks(Function &F,
changeToCall(II);
Changed = true;
}
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
+ // Remove catchpads which cannot be reached.
+ struct CatchPadDenseMapInfo {
+ static CatchPadInst *getEmptyKey() {
+ return DenseMapInfo<CatchPadInst *>::getEmptyKey();
+ }
+ static CatchPadInst *getTombstoneKey() {
+ return DenseMapInfo<CatchPadInst *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CatchPadInst *CatchPad) {
+ return static_cast<unsigned>(hash_combine_range(
+ CatchPad->value_op_begin(), CatchPad->value_op_end()));
+ }
+ static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) {
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+ };
+
+ // Set of unique CatchPads.
+ SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
+ CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
+ HandlerSet;
+ detail::DenseSetEmpty Empty;
+ for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(),
+ E = CatchSwitch->handler_end();
+ I != E; ++I) {
+ BasicBlock *HandlerBB = *I;
+ auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
+ if (!HandlerSet.insert({CatchPad, Empty}).second) {
+ CatchSwitch->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
}
Changed |= ConstantFoldTerminator(BB, true);
@@ -1514,8 +1553,8 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
return true;
// Check if the function is specifically marked as a gc leaf function.
- //
- // TODO: we should be checking the attributes on the call site as well.
+ if (CS.hasFnAttr("gc-leaf-function"))
+ return true;
if (const Function *F = CS.getCalledFunction())
return F->hasFnAttribute("gc-leaf-function");
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index d0932f83..3bb3fa5 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -3448,18 +3449,26 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
if (BBI->mayHaveSideEffects()) {
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (auto *SI = dyn_cast<StoreInst>(BBI)) {
if (SI->isVolatile())
break;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
if (LI->isVolatile())
break;
- } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+ } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
if (RMWI->isVolatile())
break;
- } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+ } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
if (CXI->isVolatile())
break;
+ } else if (isa<CatchPadInst>(BBI)) {
+ // A catchpad may invoke exception object constructors and such, which
+ // in some languages can be arbitrary code, so be conservative by
+ // default.
+ // For CoreCLR, it just involves a type test, so can be removed.
+ if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
+ EHPersonality::CoreCLR)
+ break;
} else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
!isa<LandingPadInst>(BBI)) {
break;
@@ -3485,7 +3494,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
TerminatorInst *TI = Preds[i]->getTerminator();
IRBuilder<> Builder(TI);
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (auto *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isUnconditional()) {
if (BI->getSuccessor(0) == BB) {
new UnreachableInst(TI->getContext(), TI);
@@ -3502,7 +3511,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
}
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i)
if (i.getCaseSuccessor() == BB) {
@@ -3511,18 +3520,49 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
--i; --e;
Changed = true;
}
- } else if ((isa<InvokeInst>(TI) &&
- cast<InvokeInst>(TI)->getUnwindDest() == BB) ||
- isa<CatchSwitchInst>(TI)) {
- removeUnwindEdge(TI->getParent());
- Changed = true;
+ } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ }
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CSI->getUnwindDest() == BB) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ continue;
+ }
+
+ for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
+ E = CSI->handler_end();
+ I != E; ++I) {
+ if (*I == BB) {
+ CSI->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
+ if (CSI->getNumHandlers() == 0) {
+ BasicBlock *CatchSwitchBB = CSI->getParent();
+ if (CSI->hasUnwindDest()) {
+ // Redirect preds to the unwind dest
+ CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
+ } else {
+ // Rewrite all preds to unwind to caller (or from invoke to call).
+ SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
+ for (BasicBlock *EHPred : EHPreds)
+ removeUnwindEdge(EHPred);
+ }
+ // The catchswitch is no longer reachable.
+ new UnreachableInst(CSI->getContext(), CSI);
+ CSI->eraseFromParent();
+ Changed = true;
+ }
} else if (isa<CleanupReturnInst>(TI)) {
new UnreachableInst(TI->getContext(), TI);
TI->eraseFromParent();
Changed = true;
}
- // TODO: We can remove a catchswitch if all it's catchpads end in
- // unreachable.
}
// If this block is now dead, remove it.
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 81dea6d..dc5fee5 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -57,8 +57,7 @@ static bool ignoreCallingConv(LibFunc::Func Func) {
Func == LibFunc::llabs || Func == LibFunc::strlen;
}
-/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
+/// Return true if it only matters that the value is equal or not-equal to zero.
static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
for (User *U : V->users()) {
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -72,8 +71,7 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
return true;
}
-/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
-/// comparisons with With.
+/// Return true if it is only used in equality comparisons with With.
static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
for (User *U : V->users()) {
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -249,12 +247,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(2)->isIntegerTy())
return nullptr;
- // Extract some information from the instruction
+ // Extract some information from the instruction.
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
uint64_t Len;
- // We don't do anything if length is not constant
+ // We don't do anything if length is not constant.
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Len = LengthArg->getZExtValue();
else
@@ -272,12 +270,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
if (SrcLen == 0 || Len == 0)
return Dst;
- // We don't optimize this case
+ // We don't optimize this case.
if (Len < SrcLen)
return nullptr;
// strncat(x, s, c) -> strcat(x, s)
- // s is constant so the strcat can be optimized further
+ // s is constant so the strcat can be optimized further.
return emitStrLenMemCpy(Src, Dst, SrcLen, B);
}
@@ -310,7 +308,8 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI),
+ "strchr");
return nullptr;
}
@@ -490,8 +489,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Type *PT = Callee->getFunctionType()->getParamType(0);
Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
- Value *DstEnd =
- B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
+ Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst,
+ ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -599,7 +598,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
- return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I),
+ "strpbrk");
}
// strpbrk(s, "a") -> strchr(s, 'a')
@@ -878,8 +878,10 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Type *RHSPtrTy =
IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
- Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
- Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+ Value *LHSV =
+ B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+ Value *RHSV =
+ B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
}
@@ -992,6 +994,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
if (V == nullptr)
return nullptr;
+
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.SetFastMathFlags(CI->getFastMathFlags());
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
@@ -1027,6 +1033,10 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
if (V2 == nullptr)
return nullptr;
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.SetFastMathFlags(CI->getFastMathFlags());
+
// fmin((double)floatval1, (double)floatval2)
// -> (double)fminf(floatval1, floatval2)
// TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
@@ -1117,7 +1127,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Callee->getAttributes());
}
- bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
+ bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
// pow(exp(x), y) -> exp(x*y)
// pow(exp2(x), y) -> exp2(x * y)
@@ -1126,7 +1136,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// underflow behavior quite dramatically.
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- if (unsafeFPMath) {
+ if (UnsafeFPMath) {
if (auto *OpC = dyn_cast<CallInst>(Op1)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
@@ -1157,7 +1167,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
LibFunc::fabsl)) {
// In -ffast-math, pow(x, 0.5) -> sqrt(x).
- if (unsafeFPMath)
+ if (UnsafeFPMath)
return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
Callee->getAttributes());
@@ -1183,7 +1193,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
// In -ffast-math, generate repeated fmul instead of generating pow(x, n).
- if (unsafeFPMath) {
+ if (UnsafeFPMath) {
APFloat V = abs(Op2C->getValueAPF());
// We limit to a max of 7 fmul(s). Thus max exponent is 32.
// This transformation applies to integer exponents only.
@@ -1291,12 +1301,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
// function, do that first.
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
- if ((Name == "fmin" && hasFloatVersion(Name)) ||
- (Name == "fmax" && hasFloatVersion(Name))) {
- Value *Ret = optimizeBinaryDoubleFP(CI, B);
- if (Ret)
+ if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
+ if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
return Ret;
- }
// Make sure this has 2 arguments of FP type which match the result type.
FunctionType *FT = Callee->getFunctionType();
@@ -1307,14 +1314,12 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
- Function *F = CI->getParent()->getParent();
- if (canUseUnsafeFPMath(F)) {
+ if (CI->hasUnsafeAlgebra()) {
// Unsafe algebra sets all fast-math-flags to true.
FMF.setUnsafeAlgebra();
} else {
// At a minimum, no-nans-fp-math must be true.
- Attribute Attr = F->getFnAttribute("no-nans-fp-math");
- if (Attr.getValueAsString() != "true")
+ if (!CI->hasNoNaNs())
return nullptr;
// No-signed-zeros is implied by the definitions of fmax/fmin themselves:
// "Ideally, fmax would be sensitive to the sign of zero, for example
@@ -2169,7 +2174,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
- IRBuilder<> Builder(CI);
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
// Command-line parameter overrides function attribute.
@@ -2419,7 +2427,8 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
return false;
}
-Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
+ IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk))
@@ -2433,7 +2442,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &
return nullptr;
}
-Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
+ IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk))
@@ -2447,7 +2457,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<>
return nullptr;
}
-Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
+ IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk))
@@ -2539,7 +2550,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
- IRBuilder<> Builder(CI);
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
// First, check that this is a known library functions.
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 1add78e..2e361d3 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -218,12 +218,12 @@ static Metadata *mapMetadataOp(Metadata *Op,
}
/// Resolve uniquing cycles involving the given metadata.
-static void resolveCycles(Metadata *MD, bool MDMaterialized) {
+static void resolveCycles(Metadata *MD, bool AllowTemps) {
if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
- if (!MDMaterialized && N->isTemporary())
+ if (AllowTemps && N->isTemporary())
return;
if (!N->isResolved())
- N->resolveCycles(MDMaterialized);
+ N->resolveCycles(AllowTemps);
}
}
@@ -253,7 +253,7 @@ static bool remapOperands(MDNode &Node,
// Resolve uniquing cycles underneath distinct nodes on the fly so they
// don't infect later operands.
if (IsDistinct)
- resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata));
+ resolveCycles(New, Flags & RF_HaveUnmaterializedMetadata);
}
}
@@ -401,7 +401,7 @@ Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
return NewMD;
// Resolve cycles involving the entry metadata.
- resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata));
+ resolveCycles(NewMD, Flags & RF_HaveUnmaterializedMetadata);
// Remap the operands of distinct MDNodes.
while (!DistinctWorklist.empty())
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a627dd6..2c0d317 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4294,12 +4294,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
- Reductions[Phi])) {
- if (Reductions[Phi].hasUnsafeAlgebra())
- Requirements->addUnsafeAlgebraInst(
- Reductions[Phi].getUnsafeAlgebraInst());
- AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
+ RecurrenceDescriptor RedDes;
+ if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) {
+ if (RedDes.hasUnsafeAlgebra())
+ Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
+ AllowedExit.insert(RedDes.getLoopExitInstr());
+ Reductions[Phi] = RedDes;
continue;
}
diff --git a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 4177388..6e9a948 100644
--- a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -493,7 +493,8 @@ struct MatchableInfo {
void initialize(const AsmMatcherInfo &Info,
SmallPtrSetImpl<Record*> &SingletonRegisters,
- AsmVariantInfo const &Variant);
+ AsmVariantInfo const &Variant,
+ bool HasMnemonicFirst);
/// validate - Return true if this matchable is a valid thing to match against
/// and perform a bunch of validity checking.
@@ -502,20 +503,21 @@ struct MatchableInfo {
/// findAsmOperand - Find the AsmOperand with the specified name and
/// suboperand index.
int findAsmOperand(StringRef N, int SubOpIdx) const {
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
- if (N == AsmOperands[i].SrcOpName &&
- SubOpIdx == AsmOperands[i].SubOpIdx)
- return i;
- return -1;
+ auto I = std::find_if(AsmOperands.begin(), AsmOperands.end(),
+ [&](const AsmOperand &Op) {
+ return Op.SrcOpName == N && Op.SubOpIdx == SubOpIdx;
+ });
+ return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
}
/// findAsmOperandNamed - Find the first AsmOperand with the specified name.
/// This does not check the suboperand index.
int findAsmOperandNamed(StringRef N) const {
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
- if (N == AsmOperands[i].SrcOpName)
- return i;
- return -1;
+ auto I = std::find_if(AsmOperands.begin(), AsmOperands.end(),
+ [&](const AsmOperand &Op) {
+ return Op.SrcOpName == N;
+ });
+ return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
}
void buildInstructionResultOperands();
@@ -587,7 +589,7 @@ struct MatchableInfo {
HasGT = true;
}
- return !(HasLT ^ HasGT);
+ return HasLT == HasGT;
}
void dump() const;
@@ -595,8 +597,7 @@ struct MatchableInfo {
private:
void tokenizeAsmString(AsmMatcherInfo const &Info,
AsmVariantInfo const &Variant);
- void addAsmOperand(size_t Start, size_t End,
- std::string const &SeparatorCharacters);
+ void addAsmOperand(StringRef Token, bool IsIsolatedToken = false);
};
/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
@@ -837,7 +838,8 @@ extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op,
void MatchableInfo::initialize(const AsmMatcherInfo &Info,
SmallPtrSetImpl<Record*> &SingletonRegisters,
- AsmVariantInfo const &Variant) {
+ AsmVariantInfo const &Variant,
+ bool HasMnemonicFirst) {
AsmVariantID = Variant.AsmVariantNo;
AsmString =
CodeGenInstruction::FlattenAsmStringVariants(AsmString,
@@ -845,6 +847,24 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info,
tokenizeAsmString(Info, Variant);
+ // The first token of the instruction is the mnemonic, which must be a
+ // simple string, not a $foo variable or a singleton register.
+ if (AsmOperands.empty())
+ PrintFatalError(TheDef->getLoc(),
+ "Instruction '" + TheDef->getName() + "' has no tokens");
+
+ assert(!AsmOperands[0].Token.empty());
+ if (HasMnemonicFirst) {
+ Mnemonic = AsmOperands[0].Token;
+ if (Mnemonic[0] == '$')
+ PrintFatalError(TheDef->getLoc(),
+ "Invalid instruction mnemonic '" + Mnemonic + "'!");
+
+ // Remove the first operand, it is tracked in the mnemonic field.
+ AsmOperands.erase(AsmOperands.begin());
+ } else if (AsmOperands[0].Token[0] != '$')
+ Mnemonic = AsmOperands[0].Token;
+
// Compute the require features.
for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates"))
if (const SubtargetFeatureInfo *Feature =
@@ -867,16 +887,8 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info,
}
/// Append an AsmOperand for the given substring of AsmString.
-void MatchableInfo::addAsmOperand(size_t Start, size_t End,
- std::string const &Separators) {
- StringRef String = AsmString;
- // Look for separators before and after to figure out is this token is
- // isolated. Accept '$$' as that's how we escape '$'.
- bool IsIsolatedToken =
- (!Start || Separators.find(String[Start - 1]) != StringRef::npos ||
- String.substr(Start - 1, 2) == "$$") &&
- (End >= String.size() || Separators.find(String[End]) != StringRef::npos);
- AsmOperands.push_back(AsmOperand(IsIsolatedToken, String.slice(Start, End)));
+void MatchableInfo::addAsmOperand(StringRef Token, bool IsIsolatedToken) {
+ AsmOperands.push_back(AsmOperand(IsIsolatedToken, Token));
}
/// tokenizeAsmString - Tokenize a simplified assembly string.
@@ -885,50 +897,58 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
StringRef String = AsmString;
size_t Prev = 0;
bool InTok = false;
- std::string Separators = Variant.TokenizingCharacters +
- Variant.SeparatorCharacters;
+ bool IsIsolatedToken = true;
for (size_t i = 0, e = String.size(); i != e; ++i) {
- if(Variant.BreakCharacters.find(String[i]) != std::string::npos) {
- if(InTok) {
- addAsmOperand(Prev, i, Separators);
+ char Char = String[i];
+ if (Variant.BreakCharacters.find(Char) != std::string::npos) {
+ if (InTok) {
+ addAsmOperand(String.slice(Prev, i), false);
Prev = i;
+ IsIsolatedToken = false;
}
InTok = true;
continue;
}
- if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) {
- if(InTok) {
- addAsmOperand(Prev, i, Separators);
+ if (Variant.TokenizingCharacters.find(Char) != std::string::npos) {
+ if (InTok) {
+ addAsmOperand(String.slice(Prev, i), IsIsolatedToken);
InTok = false;
+ IsIsolatedToken = false;
}
- addAsmOperand(i, i + 1, Separators);
+ addAsmOperand(String.slice(i, i + 1), IsIsolatedToken);
Prev = i + 1;
+ IsIsolatedToken = true;
continue;
}
- if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) {
- if(InTok) {
- addAsmOperand(Prev, i, Separators);
+ if (Variant.SeparatorCharacters.find(Char) != std::string::npos) {
+ if (InTok) {
+ addAsmOperand(String.slice(Prev, i), IsIsolatedToken);
InTok = false;
}
Prev = i + 1;
+ IsIsolatedToken = true;
continue;
}
- switch (String[i]) {
+
+ switch (Char) {
case '\\':
if (InTok) {
- addAsmOperand(Prev, i, Separators);
+ addAsmOperand(String.slice(Prev, i), false);
InTok = false;
+ IsIsolatedToken = false;
}
++i;
assert(i != String.size() && "Invalid quoted character");
- addAsmOperand(i, i + 1, Separators);
+ addAsmOperand(String.slice(i, i + 1), IsIsolatedToken);
Prev = i + 1;
+ IsIsolatedToken = false;
break;
case '$': {
- if (InTok && Prev != i) {
- addAsmOperand(Prev, i, Separators);
+ if (InTok) {
+ addAsmOperand(String.slice(Prev, i), false);
InTok = false;
+ IsIsolatedToken = false;
}
// If this isn't "${", start new identifier looking like "$xxx"
@@ -940,26 +960,20 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
size_t EndPos = String.find('}', i);
assert(EndPos != StringRef::npos &&
"Missing brace in operand reference!");
- addAsmOperand(i, EndPos+1, Separators);
+ addAsmOperand(String.slice(i, EndPos+1), IsIsolatedToken);
Prev = EndPos + 1;
i = EndPos;
+ IsIsolatedToken = false;
break;
}
+
default:
InTok = true;
+ break;
}
}
if (InTok && Prev != String.size())
- addAsmOperand(Prev, StringRef::npos, Separators);
-
- // The first token of the instruction is the mnemonic, which must be a
- // simple string, not a $foo variable or a singleton register.
- if (AsmOperands.empty())
- PrintFatalError(TheDef->getLoc(),
- "Instruction '" + TheDef->getName() + "' has no tokens");
- assert(!AsmOperands[0].Token.empty());
- if (AsmOperands[0].Token[0] != '$')
- Mnemonic = AsmOperands[0].Token;
+ addAsmOperand(String.substr(Prev), IsIsolatedToken);
}
bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
@@ -1352,8 +1366,7 @@ void AsmMatcherInfo::buildInfo() {
// Build information about all of the AssemblerPredicates.
std::vector<Record*> AllPredicates =
Records.getAllDerivedDefinitions("Predicate");
- for (unsigned i = 0, e = AllPredicates.size(); i != e; ++i) {
- Record *Pred = AllPredicates[i];
+ for (Record *Pred : AllPredicates) {
// Ignore predicates that are not intended for the assembler.
if (!Pred->getValueAsBit("AssemblerMatcherPredicate"))
continue;
@@ -1367,6 +1380,8 @@ void AsmMatcherInfo::buildInfo() {
assert(SubtargetFeatures.size() <= 64 && "Too many subtarget features!");
}
+ bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst");
+
// Parse the instructions; we need to do this first so that we can gather the
// singleton register classes.
SmallPtrSet<Record*, 16> SingletonRegisters;
@@ -1398,7 +1413,7 @@ void AsmMatcherInfo::buildInfo() {
auto II = llvm::make_unique<MatchableInfo>(*CGI);
- II->initialize(*this, SingletonRegisters, Variant);
+ II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst);
// Ignore instructions which shouldn't be matched and diagnose invalid
// instruction definitions with an error.
@@ -1426,7 +1441,7 @@ void AsmMatcherInfo::buildInfo() {
auto II = llvm::make_unique<MatchableInfo>(std::move(Alias));
- II->initialize(*this, SingletonRegisters, Variant);
+ II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst);
// Validate the alias definitions.
II->validate(CommentDelimiter, false);
@@ -1732,7 +1747,7 @@ static unsigned getConverterOperandID(const std::string &Name,
static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
std::vector<std::unique_ptr<MatchableInfo>> &Infos,
- raw_ostream &OS) {
+ bool HasMnemonicFirst, raw_ostream &OS) {
SmallSetVector<std::string, 16> OperandConversionKinds;
SmallSetVector<std::string, 16> InstructionConversionKinds;
std::vector<std::vector<uint8_t> > ConversionTable;
@@ -1866,7 +1881,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
// Add the operand entry to the instruction kind conversion row.
ConversionRow.push_back(ID);
- ConversionRow.push_back(OpInfo.AsmOperandNum);
+ ConversionRow.push_back(OpInfo.AsmOperandNum + HasMnemonicFirst);
if (!IsNewConverter)
break;
@@ -1988,8 +2003,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
// Output the operand conversion kind enum.
OS << "enum OperatorConversionKind {\n";
- for (unsigned i = 0, e = OperandConversionKinds.size(); i != e; ++i)
- OS << " " << OperandConversionKinds[i] << ",\n";
+ for (const std::string &Converter : OperandConversionKinds)
+ OS << " " << Converter << ",\n";
OS << " CVT_NUM_CONVERTERS\n";
OS << "};\n\n";
@@ -2156,11 +2171,12 @@ static void emitIsSubclass(CodeGenTarget &Target,
OS << " return false;\n";
}
}
- OS << " }\n";
// If there were case statements emitted into the string stream write the
// default.
- if (!EmittedSwitch)
+ if (EmittedSwitch)
+ OS << " }\n";
+ else
OS << " return false;\n";
OS << "}\n\n";
@@ -2247,19 +2263,16 @@ static void emitSubtargetFeatureFlagEnumeration(AsmMatcherInfo &Info,
static void emitOperandDiagnosticTypes(AsmMatcherInfo &Info, raw_ostream &OS) {
// Get the set of diagnostic types from all of the operand classes.
std::set<StringRef> Types;
- for (std::map<Record*, ClassInfo*>::const_iterator
- I = Info.AsmOperandClasses.begin(),
- E = Info.AsmOperandClasses.end(); I != E; ++I) {
- if (!I->second->DiagnosticType.empty())
- Types.insert(I->second->DiagnosticType);
+ for (const auto &OpClassEntry : Info.AsmOperandClasses) {
+ if (!OpClassEntry.second->DiagnosticType.empty())
+ Types.insert(OpClassEntry.second->DiagnosticType);
}
if (Types.empty()) return;
// Now emit the enum entries.
- for (std::set<StringRef>::const_iterator I = Types.begin(), E = Types.end();
- I != E; ++I)
- OS << " Match_" << *I << ",\n";
+ for (StringRef Type : Types)
+ OS << " Match_" << Type << ",\n";
OS << " END_OPERAND_DIAGNOSTIC_TYPES\n";
}
@@ -2367,8 +2380,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
// iteration order of the map is stable.
std::map<std::string, std::vector<Record*> > AliasesFromMnemonic;
- for (unsigned i = 0, e = Aliases.size(); i != e; ++i) {
- Record *R = Aliases[i];
+ for (Record *R : Aliases) {
// FIXME: Allow AssemblerVariantName to be a comma separated list.
std::string AsmVariantName = R->getValueAsString("AsmVariantName");
if (AsmVariantName != AsmParserVariantName)
@@ -2381,10 +2393,8 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
// Process each alias a "from" mnemonic at a time, building the code executed
// by the string remapper.
std::vector<StringMatcher::StringPair> Cases;
- for (std::map<std::string, std::vector<Record*> >::iterator
- I = AliasesFromMnemonic.begin(), E = AliasesFromMnemonic.end();
- I != E; ++I) {
- const std::vector<Record*> &ToVec = I->second;
+ for (const auto &AliasEntry : AliasesFromMnemonic) {
+ const std::vector<Record*> &ToVec = AliasEntry.second;
// Loop through each alias and emit code that handles each case. If there
// are two instructions without predicates, emit an error. If there is one,
@@ -2409,7 +2419,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
AliasWithNoPredicate = i;
continue;
}
- if (R->getValueAsString("ToMnemonic") == I->first)
+ if (R->getValueAsString("ToMnemonic") == AliasEntry.first)
PrintFatalError(R->getLoc(), "MnemonicAlias to the same string");
if (!MatchCode.empty())
@@ -2427,7 +2437,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
MatchCode += "return;";
- Cases.push_back(std::make_pair(I->first, MatchCode));
+ Cases.push_back(std::make_pair(AliasEntry.first, MatchCode));
}
StringMatcher("Mnemonic", Cases, OS).Emit(Indent);
}
@@ -2470,12 +2480,10 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
const AsmMatcherInfo &Info, StringRef ClassName,
StringToOffsetTable &StringTable,
- unsigned MaxMnemonicIndex) {
+ unsigned MaxMnemonicIndex, bool HasMnemonicFirst) {
unsigned MaxMask = 0;
- for (std::vector<OperandMatchEntry>::const_iterator it =
- Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end();
- it != ie; ++it) {
- MaxMask |= it->OperandMask;
+ for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) {
+ MaxMask |= OMI.OperandMask;
}
// Emit the static custom operand parsing table;
@@ -2515,10 +2523,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
<< Info.OperandMatchInfo.size() << "] = {\n";
OS << " /* Operand List Mask, Mnemonic, Operand Class, Features */\n";
- for (std::vector<OperandMatchEntry>::const_iterator it =
- Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end();
- it != ie; ++it) {
- const OperandMatchEntry &OMI = *it;
+ for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) {
const MatchableInfo &II = *OMI.MI;
OS << " { ";
@@ -2589,19 +2594,25 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
OS << " // Get the next operand index.\n";
- OS << " unsigned NextOpNum = Operands.size();\n";
+ OS << " unsigned NextOpNum = Operands.size()"
+ << (HasMnemonicFirst ? " - 1" : "") << ";\n";
// Emit code to search the table.
OS << " // Search the table.\n";
- OS << " std::pair<const OperandMatchEntry*, const OperandMatchEntry*>";
- OS << " MnemonicRange\n";
- OS << " (OperandMatchTable, OperandMatchTable+";
- OS << Info.OperandMatchInfo.size() << ");\n";
- OS << " if(!Mnemonic.empty())\n";
- OS << " MnemonicRange = std::equal_range(OperandMatchTable,";
- OS << " OperandMatchTable+"
- << Info.OperandMatchInfo.size() << ", Mnemonic,\n"
- << " LessOpcodeOperand());\n\n";
+ if (HasMnemonicFirst) {
+ OS << " auto MnemonicRange =\n";
+ OS << " std::equal_range(std::begin(OperandMatchTable), "
+ "std::end(OperandMatchTable),\n";
+ OS << " Mnemonic, LessOpcodeOperand());\n\n";
+ } else {
+ OS << " auto MnemonicRange = std::make_pair(std::begin(OperandMatchTable),"
+ " std::end(OperandMatchTable));\n";
+ OS << " if (!Mnemonic.empty())\n";
+ OS << " MnemonicRange =\n";
+ OS << " std::equal_range(std::begin(OperandMatchTable), "
+ "std::end(OperandMatchTable),\n";
+ OS << " Mnemonic, LessOpcodeOperand());\n\n";
+ }
OS << " if (MnemonicRange.first == MnemonicRange.second)\n";
OS << " return MatchOperand_NoMatch;\n\n";
@@ -2686,6 +2697,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Compute the information on the custom operand parsing.
Info.buildOperandMatchInfo();
+ bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst");
+
// Write the output.
// Information for the class declaration.
@@ -2700,7 +2713,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< "&Operands);\n";
OS << " void convertToMapAndConstraints(unsigned Kind,\n ";
OS << " const OperandVector &Operands) override;\n";
- OS << " bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);\n";
+ if (HasMnemonicFirst)
+ OS << " bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);\n";
OS << " unsigned MatchInstructionImpl(const OperandVector &Operands,\n"
<< " MCInst &Inst,\n"
<< " uint64_t &ErrorInfo,"
@@ -2761,7 +2775,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Generate the convertToMCInst function to convert operands into an MCInst.
// Also, generate the convertToMapAndConstraints function for MS-style inline
// assembly. The latter doesn't actually generate a MCInst.
- emitConvertFuncs(Target, ClassName, Info.Matchables, OS);
+ emitConvertFuncs(Target, ClassName, Info.Matchables, HasMnemonicFirst, OS);
// Emit the enumeration for classes which participate in matching.
emitMatchClassEnumeration(Target, Info.Classes, OS);
@@ -2883,24 +2897,26 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
}
// A method to determine if a mnemonic is in the list.
- OS << "bool " << Target.getName() << ClassName << "::\n"
- << "mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {\n";
- OS << " // Find the appropriate table for this asm variant.\n";
- OS << " const MatchEntry *Start, *End;\n";
- OS << " switch (VariantID) {\n";
- OS << " default: llvm_unreachable(\"invalid variant!\");\n";
- for (unsigned VC = 0; VC != VariantCount; ++VC) {
- Record *AsmVariant = Target.getAsmParserVariant(VC);
- int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
- OS << " case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC
- << "); End = std::end(MatchTable" << VC << "); break;\n";
+ if (HasMnemonicFirst) {
+ OS << "bool " << Target.getName() << ClassName << "::\n"
+ << "mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {\n";
+ OS << " // Find the appropriate table for this asm variant.\n";
+ OS << " const MatchEntry *Start, *End;\n";
+ OS << " switch (VariantID) {\n";
+ OS << " default: llvm_unreachable(\"invalid variant!\");\n";
+ for (unsigned VC = 0; VC != VariantCount; ++VC) {
+ Record *AsmVariant = Target.getAsmParserVariant(VC);
+ int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
+ OS << " case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC
+ << "); End = std::end(MatchTable" << VC << "); break;\n";
+ }
+ OS << " }\n";
+ OS << " // Search the table.\n";
+ OS << " auto MnemonicRange = ";
+ OS << "std::equal_range(Start, End, Mnemonic, LessOpcode());\n";
+ OS << " return MnemonicRange.first != MnemonicRange.second;\n";
+ OS << "}\n\n";
}
- OS << " }\n";
- OS << " // Search the table.\n";
- OS << " std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
- OS << " std::equal_range(Start, End, Mnemonic, LessOpcode());\n";
- OS << " return MnemonicRange.first != MnemonicRange.second;\n";
- OS << "}\n\n";
// Finally, build the match function.
OS << "unsigned " << Target.getName() << ClassName << "::\n"
@@ -2909,8 +2925,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< " bool matchingInlineAsm, unsigned VariantID) {\n";
OS << " // Eliminate obvious mismatches.\n";
- OS << " if (Operands.size() > " << MaxNumOperands << ") {\n";
- OS << " ErrorInfo = " << MaxNumOperands << ";\n";
+ OS << " if (Operands.size() > "
+ << (MaxNumOperands + HasMnemonicFirst) << ") {\n";
+ OS << " ErrorInfo = "
+ << (MaxNumOperands + HasMnemonicFirst) << ";\n";
OS << " return Match_InvalidOperand;\n";
OS << " }\n\n";
@@ -2919,10 +2937,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
OS << " // Get the instruction mnemonic, which is the first token.\n";
- OS << " StringRef Mnemonic;\n";
- OS << " if (Operands[0]->isToken())\n";
- OS << " Mnemonic = ((" << Target.getName()
- << "Operand&)*Operands[0]).getToken();\n\n";
+ if (HasMnemonicFirst) {
+ OS << " StringRef Mnemonic = ((" << Target.getName()
+ << "Operand&)*Operands[0]).getToken();\n\n";
+ } else {
+ OS << " StringRef Mnemonic;\n";
+ OS << " if (Operands[0]->isToken())\n";
+ OS << " Mnemonic = ((" << Target.getName()
+ << "Operand&)*Operands[0]).getToken();\n\n";
+ }
if (HasMnemonicAliases) {
OS << " // Process all MnemonicAliases to remap the mnemonic.\n";
@@ -2951,12 +2974,18 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< "); End = std::end(MatchTable" << VC << "); break;\n";
}
OS << " }\n";
+
OS << " // Search the table.\n";
- OS << " std::pair<const MatchEntry*, const MatchEntry*> "
- "MnemonicRange(Start, End);\n";
- OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n";
- OS << " if (!Mnemonic.empty())\n";
- OS << " MnemonicRange = std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n";
+ if (HasMnemonicFirst) {
+ OS << " auto MnemonicRange = "
+ "std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n";
+ } else {
+ OS << " auto MnemonicRange = std::make_pair(Start, End);\n";
+ OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n";
+ OS << " if (!Mnemonic.empty())\n";
+ OS << " MnemonicRange = "
+ "std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n";
+ }
OS << " // Return a more specific error code if no mnemonics match.\n";
OS << " if (MnemonicRange.first == MnemonicRange.second)\n";
@@ -2966,16 +2995,25 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< "*ie = MnemonicRange.second;\n";
OS << " it != ie; ++it) {\n";
+ if (HasMnemonicFirst) {
+ OS << " // equal_range guarantees that instruction mnemonic matches.\n";
+ OS << " assert(Mnemonic == it->getMnemonic());\n";
+ }
+
// Emit check that the subclasses match.
OS << " bool OperandsValid = true;\n";
- OS << " for (unsigned i = SIndex; i != " << MaxNumOperands << "; ++i) {\n";
+ OS << " for (unsigned i = " << (HasMnemonicFirst ? "0" : "SIndex")
+ << "; i != " << MaxNumOperands << "; ++i) {\n";
OS << " auto Formal = static_cast<MatchClassKind>(it->Classes[i]);\n";
- OS << " if (i >= Operands.size()) {\n";
+ OS << " if (i" << (HasMnemonicFirst ? "+1" : "")
+ << " >= Operands.size()) {\n";
OS << " OperandsValid = (Formal == " <<"InvalidMatchClass);\n";
- OS << " if (!OperandsValid) ErrorInfo = i;\n";
+ OS << " if (!OperandsValid) ErrorInfo = i"
+ << (HasMnemonicFirst ? "+1" : "") << ";\n";
OS << " break;\n";
OS << " }\n";
- OS << " MCParsedAsmOperand &Actual = *Operands[i];\n";
+ OS << " MCParsedAsmOperand &Actual = *Operands[i"
+ << (HasMnemonicFirst ? "+1" : "") << "];\n";
OS << " unsigned Diag = validateOperandClass(Actual, Formal);\n";
OS << " if (Diag == Match_Success)\n";
OS << " continue;\n";
@@ -2991,8 +3029,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " // If we already had a match that only failed due to a\n";
OS << " // target predicate, that diagnostic is preferred.\n";
OS << " if (!HadMatchOtherThanPredicate &&\n";
- OS << " (it == MnemonicRange.first || ErrorInfo <= i)) {\n";
- OS << " ErrorInfo = i;\n";
+ OS << " (it == MnemonicRange.first || ErrorInfo <= i"
+ << (HasMnemonicFirst ? "+1" : "") << ")) {\n";
+ OS << " ErrorInfo = i" << (HasMnemonicFirst ? "+1" : "") << ";\n";
OS << " // InvalidOperand is the default. Prefer specificity.\n";
OS << " if (Diag != Match_InvalidOperand)\n";
OS << " RetCode = Diag;\n";
@@ -3067,7 +3106,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (!Info.OperandMatchInfo.empty())
emitCustomOperandParsing(OS, Target, Info, ClassName, StringTable,
- MaxMnemonicIndex);
+ MaxMnemonicIndex, HasMnemonicFirst);
OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n";
}
diff --git a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
index 6246d81..d056de0 100644
--- a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -185,16 +185,12 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
const std::vector<Record*> &ImpliesList =
Feature->getValueAsListOfDefs("Implies");
- if (ImpliesList.empty()) {
- OS << "{ }";
- } else {
- OS << "{ ";
- for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
- OS << Target << "::" << ImpliesList[j]->getName();
- if (++j < M) OS << ", ";
- }
- OS << " }";
+ OS << "{";
+ for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
+ OS << " " << Target << "::" << ImpliesList[j]->getName();
+ if (++j < M) OS << ",";
}
+ OS << " }";
OS << " }";
++NumFeatures;
@@ -240,16 +236,12 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
<< "\"" << Name << "\", "
<< "\"Select the " << Name << " processor\", ";
- if (FeatureList.empty()) {
- OS << "{ }";
- } else {
- OS << "{ ";
- for (unsigned j = 0, M = FeatureList.size(); j < M;) {
- OS << Target << "::" << FeatureList[j]->getName();
- if (++j < M) OS << ", ";
- }
- OS << " }";
+ OS << "{";
+ for (unsigned j = 0, M = FeatureList.size(); j < M;) {
+ OS << " " << Target << "::" << FeatureList[j]->getName();
+ if (++j < M) OS << ",";
}
+ OS << " }";
// The { } is for the "implies" section of this data structure.
OS << ", { } }";
diff --git a/contrib/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm/utils/TableGen/TableGen.cpp
index c16a558..bcc594d 100644
--- a/contrib/llvm/utils/TableGen/TableGen.cpp
+++ b/contrib/llvm/utils/TableGen/TableGen.cpp
@@ -13,6 +13,7 @@
#include "TableGenBackends.h" // Declares all backends.
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/TableGen/Error.h"
@@ -182,6 +183,8 @@ int main(int argc, char **argv) {
PrettyStackTraceProgram X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);
+ llvm_shutdown_obj Y;
+
return TableGenMain(argv[0], &LLVMTableGenMain);
}
OpenPOWER on IntegriCloud