summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td15
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp138
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h11
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp28
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h5
17 files changed, 290 insertions, 21 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 419717c..a520770 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -87,9 +87,22 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
}
+ if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
+ return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
+ ? CSR_iOS_CXX_TLS_PE_SaveList
+ : CSR_iOS_CXX_TLS_SaveList;
return RegList;
}
+const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<ARMFunctionInfo>()->isSplitCSR())
+ return CSR_iOS_CXX_TLS_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -97,6 +110,8 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
+ if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS)
+ return CSR_iOS_CXX_TLS_RegMask;
return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
@@ -106,6 +121,14 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
}
const uint32_t *
+ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
+ assert(MF.getSubtarget<ARMSubtarget>().isTargetDarwin() &&
+ "only know about special TLS call on Darwin");
+ return CSR_iOS_TLSCall_RegMask;
+}
+
+
+const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index cea8b80..6a9a45a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -62,6 +62,12 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
switch (Reg) {
case D15: case D14: case D13: case D12:
case D11: case D10: case D9: case D8:
+ case D7: case D6: case D5: case D4:
+ case D3: case D2: case D1: case D0:
+ case D31: case D30: case D29: case D28:
+ case D27: case D26: case D25: case D24:
+ case D23: case D22: case D21: case D20:
+ case D19: case D18: case D17: case D16:
return true;
default:
return false;
@@ -92,9 +98,12 @@ protected:
public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
+ const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 2335164..847ef87 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -225,6 +225,21 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
+def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
+ (sequence "R%u", 12, 1),
+ (sequence "D%u", 31, 0))>;
+
+// C++ TLS access function saves all registers except SP. Try to match
+// the order of CSRs in CSR_iOS.
+def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
+ (sequence "D%u", 31, 0))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>;
+
// The "interrupt" attribute is used to generate code that is acceptable in
// exception-handlers of various kinds. It makes us use a different return
// instruction (handled elsewhere) and affects which registers we must return to
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 9bdf823c..ff2fcfa 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -578,7 +578,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
- if (VT != MVT::i32) return 0;
+ if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
@@ -2083,6 +2083,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 0242440..dfbb969 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -622,7 +622,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else
Base = N;
@@ -801,7 +802,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -1067,7 +1069,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -1186,7 +1189,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
if (N.getOpcode() == ISD::ADD) {
return false; // We want to select register offset instead
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@@ -1292,7 +1296,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9cfb06b..37c0795 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -744,7 +744,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
- if (!Subtarget->isThumb1Only())
+ if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
// ARM does not have ROTL.
@@ -1385,6 +1385,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
else
return CallingConv::ARM_AAPCS;
case CallingConv::Fast:
+ case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
@@ -2347,6 +2348,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (ARM::GPRRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i32));
+ else if (ARM::DPRRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
// Update chain and glue.
RetOps[0] = Chain;
@@ -2530,6 +2544,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address for Darwin, and return an
+/// SDValue containing the final node.
+
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+/// + "extern __thread" declaration.
+/// + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i32] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first word, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "r0".
+///
+/// Since this descriptor may be in a different unit, in general access must
+/// proceed along the usual ARM rules. A common sequence to produce is:
+///
+/// movw rT1, :lower16:_var$non_lazy_ptr
+/// movt rT1, :upper16:_var$non_lazy_ptr
+/// ldr r0, [rT1]
+/// ldr rT2, [r0]
+/// blx rT2
+/// [...address now in r0...]
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+ SDLoc DL(Op);
+
+ // First step is to get the address of the actua global symbol. This is where
+ // the TLS descriptor lives.
+ SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
+
+ // The first entry in the descriptor is a function pointer that we must call
+ // to obtain the address of the variable.
+ SDValue Chain = DAG.getEntryNode();
+ SDValue FuncTLVGet =
+ DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, true, true, 4);
+ Chain = FuncTLVGet.getValue(1);
+
+ MachineFunction &F = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
+ // silly).
+ auto TRI =
+ getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+ auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
+ const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
+
+ // Finally, we can make the call. This is just a degenerate version of a
+ // normal AArch64 call node: r0 takes the address of the descriptor, and
+ // returns the address of the variable in this thread.
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
+ Chain =
+ DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
+ DAG.getRegisterMask(Mask), Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -2631,9 +2711,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->isTargetDarwin())
+ return LowerGlobalTLSAddressDarwin(Op, DAG);
+
// TODO: implement the "local dynamic" model
- assert(Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
+ assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);
@@ -11407,7 +11489,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'J':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a constant between -255 and -1, for negated ADD
// immediates. This can be used in GCC with an "n" modifier that
// prints the negated value, for use with SUB instructions. It is
@@ -11476,7 +11558,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'M':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a multiple of 4 between 0 and 1020, for
// ADD sp + immediate.
if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
@@ -12324,3 +12406,49 @@ unsigned ARMTargetLowering::getExceptionSelectorRegister(
// via the personality function.
return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
}
+
+void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ // Update IsSplitCSR in ARMFunctionInfo.
+ ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void ARMTargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (ARM::GPRRegClass.contains(*I))
+ RC = &ARM::GPRRegClass;
+ else if (ARM::DPRRegClass.contains(*I))
+ RC = &ARM::DPRRegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index b764624..96b56c3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -526,6 +526,8 @@ namespace llvm {
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
+ SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -578,6 +580,15 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index b9de83b..c446ba3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5398,6 +5398,27 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
Requires<[IsARM, UseMovt]>;
} // isReMaterializable
+// The many different faces of TLS access.
+def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst),
+ (MOVi32imm tglobaltlsaddr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapper tglobaltlsaddr:$src),
+ (LDRLIT_ga_abs tglobaltlsaddr:$src)>,
+ Requires<[IsARM, DontUseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, DontUseMovt]>;
+let AddedComplexity = 10 in
+def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
+ (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, UseMovt]>;
+
+
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 7020ffb..defef4e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -5689,7 +5689,10 @@ def : NEONInstAlias<"vmov${p} $Vd, $Vm",
// VMOV : Vector Move (Immediate)
-let isReMaterializable = 1 in {
+// Although VMOVs are not strictly speaking cheap, they are as expensive
+// as their copies counterpart (VORR), so we should prefer rematerialization
+// over splitting when it applies.
+let isReMaterializable = 1, isAsCheapAsAMove=1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
@@ -5744,7 +5747,7 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
"vmov", "f32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
-} // isReMaterializable
+} // isReMaterializable, isAsCheapAsAMove
// Add support for bytes replication feature, so it could be GAS compatible.
// E.g. instructions below:
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index df6f243..5b1f9a0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1366,6 +1366,14 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
(ARMWrapper tglobaladdr:$src))]>,
Requires<[IsThumb, DontUseMovt]>;
+// TLS globals
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+def : Pat<(ARMWrapper tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+
// JumpTable
def : T1Pat<(ARMWrapperJT tjumptable:$dst),
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index d460d33..f42f456 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3875,6 +3875,13 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
}
+def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst),
+ (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
+ (t2MOVi32imm tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index 050cd1a..63e7940 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -930,10 +930,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
// and could enable the conversion to float to be removed completely.
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
- Requires<[IsARM]>;
+ Requires<[IsARM, HasV6T2]>;
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
- Requires<[IsThumb2]>;
+ Requires<[IsThumb2, HasV6T2]>;
def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
Requires<[IsARM]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index ac0330f..71ad7a4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -20,4 +20,5 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+ IsSplitCSR(false) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index d644797..68f9aec 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -118,6 +118,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// coalesced weights.
DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -128,7 +132,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -199,6 +203,9 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
llvm_unreachable("Duplicate entries!");
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6084f22..57577dc 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -388,6 +388,9 @@ private:
size_t calculateContentSize() const;
+ // Reset state between object emissions
+ void reset() override;
+
public:
ARMTargetELFStreamer(MCStreamer &S)
: ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
@@ -415,7 +418,7 @@ public:
MCCodeEmitter *Emitter, bool IsThumb)
: MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
MappingSymbolCounter(0), LastEMS(EMS_None) {
- Reset();
+ EHReset();
}
~ARMELFStreamer() {}
@@ -579,7 +582,10 @@ private:
}
// Helper functions for ARM exception handling directives
- void Reset();
+ void EHReset();
+
+ // Reset state between object emissions
+ void reset() override;
void EmitPersonalityFixup(StringRef Name);
void FlushPendingOffset();
@@ -1040,6 +1046,8 @@ void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
getStreamer().emitInst(Inst, Suffix);
}
+void ARMTargetELFStreamer::reset() { AttributeSection = nullptr; }
+
void ARMELFStreamer::FinishImpl() {
MCTargetStreamer &TS = *getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -1048,6 +1056,18 @@ void ARMELFStreamer::FinishImpl() {
MCELFStreamer::FinishImpl();
}
+void ARMELFStreamer::reset() {
+ MCTargetStreamer &TS = *getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.reset();
+ MappingSymbolCounter = 0;
+ MCELFStreamer::reset();
+ // MCELFStreamer clear's the assembler's e_flags. However, for
+ // arm we manually set the ABI version on streamer creation, so
+ // do the same here
+ getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+}
+
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
unsigned Flags,
@@ -1094,7 +1114,7 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
Kind));
}
-void ARMELFStreamer::Reset() {
+void ARMELFStreamer::EHReset() {
ExTab = nullptr;
FnStart = nullptr;
Personality = nullptr;
@@ -1164,7 +1184,7 @@ void ARMELFStreamer::emitFnEnd() {
SwitchSection(&FnStart->getSection());
// Clean exception handling frame information
- Reset();
+ EHReset();
}
void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index dad50f2..c0d10c8 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -38,6 +38,9 @@ void ARMTargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+// reset() - Reset any state
+void ARMTargetStreamer::reset() {}
+
// The remaining callbacks should be handled separately by each
// streamer.
void ARMTargetStreamer::emitFnStart() {}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 812f983..27faac6 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -53,6 +53,11 @@ public:
/// \p MBB will be correctly handled by the target.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+ /// Disable shrink wrap as tBfar/BL will be used to adjust for long jumps.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return false;
+ }
+
private:
/// Check if the frame lowering of \p MF needs a special fixup
/// code sequence for the epilogue.
OpenPOWER on IntegriCloud