summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp67
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp12
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp88
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp368
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h20
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td81
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td41
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td22
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp189
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp74
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp24
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp28
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp18
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp159
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h13
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp198
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h114
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp84
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp2
31 files changed, 1168 insertions, 533 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 6838084..2d747091 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports single precision only">;
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+ "Enable support for TrustZone security extensions">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
@@ -144,29 +146,33 @@ include "ARMSchedule.td"
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
"Cortex-A5 ARM processors",
[FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk]>;
+ FeatureVMLxForwarding, FeatureT2XtPk,
+ FeatureTrustZone]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
[FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk]>;
+ FeatureVMLxForwarding, FeatureT2XtPk,
+ FeatureTrustZone]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
[FeatureVMLxForwarding,
FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR]>;
+ FeatureAvoidPartialCPSR,
+ FeatureTrustZone]>;
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors",
[FeatureNEONForFP, FeatureT2XtPk,
FeatureVFP4, FeatureMP, FeatureHWDiv,
FeatureHWDivARM, FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
- FeatureHasSlowFPVMLx]>;
+ FeatureHasSlowFPVMLx, FeatureTrustZone]>;
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
[FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR]>;
+ FeatureAvoidPartialCPSR,
+ FeatureTrustZone]>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
[FeatureSlowFPBrcc, FeatureHWDivARM,
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9e68ff4..6005054 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -283,14 +283,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
return false;
--I;
}
- if (!isUnpredicatedTerminator(I))
- return false;
// Get the last instruction in the block.
MachineInstr *LastInst = I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // Check if it's an indirect branch first, this should return 'unanalyzable'
+ // even if it's predicated.
+ if (isIndirectBranchOpcode(LastOpc))
+ return true;
+
+ if (!isUnpredicatedTerminator(I))
+ return false;
// If there is only one terminator instruction, process it.
- unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (isUncondBranchOpcode(LastOpc)) {
TBB = LastInst->getOperand(0).getMBB();
@@ -747,10 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
-static const
-MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
- unsigned Reg, unsigned SubIdx, unsigned State,
- const TargetRegisterInfo *TRI) {
+const MachineInstrBuilder &
+ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
@@ -795,12 +801,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
- AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ if (Subtarget.hasV5TEOps()) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to STM instruction, which has existed since the dawn of
+ // time.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ }
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -948,7 +964,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
@@ -975,12 +990,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MachineInstrBuilder MIB;
+
+ if (Subtarget.hasV5TEOps()) {
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to LDM instruction, which has existed since the dawn of
+ // time.
+ MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ }
+
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 7c107bb..2ef659c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -141,6 +141,10 @@ public:
MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+ const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const;
+
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b6b27f8..b0d34a7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -75,6 +75,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
}
const uint32_t*
+ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+}
+
+const uint32_t*
ARMBaseRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 725033b..0679919 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -96,6 +96,7 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index e6e8c3d..4f94ad2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
+
+ // If we had R3 unallocated only, now we still must to waste it.
+ Reg = State.AllocateReg(GPRArgRegs, 4);
+ assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index b378b96..8ff666e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
// (and the same is true for f64 if VFP is not enabled)
CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
- CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
- "ArgFlags.getOrigAlign() != 8",
+ CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
@@ -195,10 +194,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
+// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
+// and the pointer return value are both passed in R0 in these cases, this can
+// be partially modelled by treating R0 as a callee-saved register
+// Only the resulting RegMask is used; the SaveList is ignored
+def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
+ R5, R4, (sequence "D%u", 15, 8),
+ R0)>;
+
// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+ (sub CSR_AAPCS_ThisReturn, R9))>;
+
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
// add is a workaround for not being able to compile empty list:
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 29fcd40..5d45f64 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -144,8 +144,8 @@ class ARMFastISel : public FastISel {
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
- virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI);
+ virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
virtual bool FastLowerArguments();
private:
#include "ARMGenFastISel.inc"
@@ -2605,7 +2605,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned Opc;
bool isBoolZext = false;
- const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC;
switch (SrcVT.SimpleTy) {
default: return 0;
case MVT::i16:
@@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return false;
}
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
/// successful.
-bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI) {
+bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(LI->getType(), VT))
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 7a02adf..483802b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -141,7 +141,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -159,8 +159,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
return;
// Allocate the vararg register save area. This is not counted in NumBytes.
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+ if (ArgRegsSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
@@ -357,7 +357,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -471,8 +471,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MBBI = NewMI;
}
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+ if (ArgRegsSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1003,7 +1003,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
// The emitPopInst calls below do not insert reloads for the aligned DPRCS2
@@ -1174,7 +1174,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (AFI->isThumb1OnlyFunction()) {
// Spill LR if Thumb1 function uses variable length argument lists.
- if (AFI->getVarArgsRegSaveSize() > 0)
+ if (AFI->getArgRegsSaveSize() > 0)
MRI.setPhysRegUsed(ARM::LR);
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c51de2..9e1782e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
- MVT::i32, MVT::Other, Ops, 5);
+ MVT::i32, MVT::Other, Ops);
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
- MVT::i32, MVT::Other, Ops, 6);
+ MVT::i32, MVT::Other, Ops);
}
}
@@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
SDValue Ops[]= { Base, Offset, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
- MVT::Other, Ops, 5);
+ MVT::Other, Ops);
}
return NULL;
@@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a D register from a pair of S registers.
@@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a quad register from a pair of D registers.
@@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers from a pair of Q registers.
@@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive S registers.
@@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers.
@@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive Q registers.
@@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
@@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
- ResTy, AddrTy, MVT::Other, OpsA, 7);
+ ResTy, AddrTy, MVT::Other, OpsA);
Chain = SDValue(VLdA, 2);
// Load the odd subregs.
@@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
- Ops.data(), Ops.size());
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
}
// Transfer memoperands.
@@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- SDNode *VSt =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
@@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
- MVT::Other, OpsA, 7);
+ MVT::Other, OpsA);
cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
Chain = SDValue(VStA, 1);
@@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Reg0);
Ops.push_back(Chain);
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
- Ops.data(), Ops.size());
+ Ops);
cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
return VStB;
}
@@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes[OpcodeIndex]);
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
- Ops.data(), Ops.size());
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
if (!IsLoad)
return VLdLn;
@@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
- SDNode *VLdDup =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
SuperReg = SDValue(VLdDup, 0);
@@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
Ops.push_back(getAL(CurDAG)); // predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
@@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
MVT::i32, MVT::i32, MVT::Other,
- Ops.data() ,Ops.size());
+ Ops);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
return ResNode;
}
@@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
- Ops, 4);
+ Ops);
} else {
SDValue Ops[] = {
CPIdx,
@@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getEntryNode()
};
ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
- Ops, 5);
+ Ops);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
return NULL;
@@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MVT::i32);
SDValue Ops[] = { N0.getOperand(0), Imm16,
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
}
break;
@@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMULL : ARM::UMULLv5,
- dl, MVT::i32, MVT::i32, Ops, 5);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::SMUL_LOHI: {
@@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMULL : ARM::SMULLv5,
- dl, MVT::i32, MVT::i32, Ops, 5);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::UMLAL:{
@@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
@@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMLAL : ARM::UMLALv5,
- dl, MVT::i32, MVT::i32, Ops, 7);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::SMLAL:{
@@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
@@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMLAL : ARM::SMLALv5,
- dl, MVT::i32, MVT::i32, Ops, 7);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::LOAD: {
@@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MVT::i32);
SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
- MVT::Glue, Ops, 5);
+ MVT::Glue, Ops);
Chain = SDValue(ResNode, 0);
if (N->getNumValues() == 2) {
InFlag = SDValue(ResNode, 1);
@@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VUZP: {
unsigned Opc = 0;
@@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VTRN: {
unsigned Opc = 0;
@@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::BUILD_VECTOR: {
EVT VecVT = N->getValueType(0);
@@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
- SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
- Ops.size());
+ SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
- SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
- Ops.size());
+ SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(1));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
}
case ARMISD::VTBL2: {
DebugLoc dl = N->getDebugLoc();
@@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
- Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
}
case ISD::CONCAT_VECTORS:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bb26090..e49cfc4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -729,7 +729,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
(Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
@@ -747,7 +746,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setInsertFencesForAtomic(true);
} else {
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
@@ -765,8 +763,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Unordered/Monotonic case.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
- // Since the libcalls include locking, fold in the fences
- setShouldFoldAtomicFences(true);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -1238,7 +1234,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1252,6 +1249,15 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
+ // Pass 'this' value directly from the argument to return value, to avoid
+ // reg unit interference
+ if (i == 0 && isThisReturn) {
+ assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
+ "unexpected return calling convention register assignment");
+ InVals.push_back(ThisVal);
+ continue;
+ }
+
SDValue Val;
if (VA.needsCustom()) {
// Handle f64 or half of a v2f64.
@@ -1363,21 +1369,22 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- bool IsSibCall = false;
+ bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool isThisReturn = false;
+ bool isSibCall = false;
// Disable tail calls if they're not supported.
if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
++NumTailCalls;
- IsSibCall = true;
+ isSibCall = true;
}
}
@@ -1393,12 +1400,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned NumBytes = CCInfo.getNextStackOffset();
// For tail calls, memory operands are available in our caller's stack.
- if (IsSibCall)
+ if (isSibCall)
NumBytes = 0;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- if (!IsSibCall)
+ if (!isSibCall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -1460,6 +1467,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
+ if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ assert(VA.getLocVT() == MVT::i32 &&
+ "unexpected calling convention register assignment");
+ assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
+ "unexpected use of 'returned'");
+ isThisReturn = true;
+ }
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
assert(VA.isMemLoc());
@@ -1467,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// True if this byval aggregate will be split between registers
// and memory.
- if (CCInfo.isFirstByValRegValid()) {
+ unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+ if (CurByValIdx < ByValArgsCount) {
+
+ unsigned RegBegin, RegEnd;
+ CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned int i, j;
- for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -1479,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
- offset = ARM::R4 - CCInfo.getFirstByValReg();
- CCInfo.clearFirstByValReg();
+
+ // If parameter size outsides register area, "offset" value
+ // helps us to calculate stack slot for remained part properly.
+ offset = RegEnd - RegBegin;
+
+ CCInfo.nextInRegsParam();
}
- if (Flags.getByValSize() - 4*offset > 0) {
+ if (Flags.getByValSize() > 4*offset) {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
@@ -1499,7 +1524,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops, array_lengthof(Ops)));
}
- } else if (!IsSibCall) {
+ } else if (!isSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1539,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
- InFlag =SDValue();
+ InFlag = SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1680,8 +1705,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
+ const uint32_t *Mask;
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn)
+ // For 'this' returns, use the R0-preserving mask
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ else
+ Mask = ARI->getCallPreservedMask(CallConv);
+
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1703,8 +1735,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, isThisReturn,
+ isThisReturn ? OutVals[0] : SDValue());
}
/// HandleByVal - Every parameter *after* a byval parameter is passed
@@ -1718,8 +1751,24 @@ ARMTargetLowering::HandleByVal(
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((!State->isFirstByValRegValid()) &&
- (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+ // For in-prologue parameters handling, we also introduce stack offset
+ // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+ // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+ // NSAA should be evaluted (NSAA means "next stacked argument address").
+ // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+ // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+ unsigned NSAAOffset = State->getNextStackOffset();
+ if (State->getCallOrPrologue() != Call) {
+ for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+ unsigned RB, RE;
+ State->getInRegsParamInfo(i, RB, RE);
+ assert(NSAAOffset >= (RE-RB)*4 &&
+ "Stack offset for byval regs doesn't introduced anymore?");
+ NSAAOffset -= (RE-RB)*4;
+ }
+ }
+ if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
@@ -1727,22 +1776,45 @@ ARMTargetLowering::HandleByVal(
reg = State->AllocateReg(GPRArgRegs, 4);
}
if (reg != 0) {
- State->setFirstByValReg(reg);
+ unsigned excess = 4 * (ARM::R4 - reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+ return;
+ }
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = reg;
+ unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4;
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs, 4);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ // Make remained size equal to 0 in case, when
+ // the whole structure may be stored into registers.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
- // Confiscate any remaining parameter registers to preclude their
- // assignment to subsequent parameters.
- while (State->AllocateReg(GPRArgRegs, 4))
- ;
}
/// MatchingStackOffset - Return true if the given stack call argument is
@@ -1874,7 +1946,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// local frame.
const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getVarArgsRegSaveSize())
+ if (AFI_Caller->getArgRegsSaveSize())
return false;
// If the callee takes no arguments then go on to check the results of the
@@ -2461,35 +2533,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
}
-static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
- DebugLoc dl = Op.getDebugLoc();
- if (!Subtarget->hasDataBarrier()) {
- // Some ARMv6 cpus can support data barriers with an mcr instruction.
- // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
- // here.
- assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- }
-
- SDValue Op5 = Op.getOperand(5);
- bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
- unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
-
- ARM_MB::MemBOpt DMBOpt;
- if (isDeviceBarrier)
- DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
- else
- DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(DMBOpt, MVT::i32));
-}
-
-
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// FIXME: handle "fence singlethread" more efficiently.
@@ -2586,12 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned &VARegSize, unsigned &VARegSaveSize)
+ unsigned InRegsParamRecordIdx,
+ unsigned &ArgRegsSize,
+ unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
- if (CCInfo.isFirstByValRegValid())
- NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
- else {
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ NumGPRs = REnd - RBegin;
+ } else {
unsigned int firstUnalloced;
firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
sizeof(GPRArgRegs) /
@@ -2600,8 +2647,8 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
}
unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
- VARegSize = NumGPRs * 4;
- VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ ArgRegsSize = NumGPRs * 4;
+ ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1);
}
// The remaining GPRs hold either the beginning of variable-argument
@@ -2611,40 +2658,60 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
// If this is a variadic function, the va_list pointer will begin with
// these values; otherwise, this reassembles a (byval) structure that
// was split between registers and memory.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- bool ForceMutable) const {
+// Return: The frame index registers were stored into.
+int
+ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+
+ // Currently, two use-cases possible:
+ // Case #1. Non var-args function, and we meet first byval parameter.
+ // Setup first unallocated register as first byval register;
+ // eat all remained registers
+ // (these two actions are performed by HandleByVal method).
+ // Then, here, we initialize stack frame with
+ // "store-reg" instructions.
+ // Case #2. Var-args function, that doesn't contain byval parameters.
+ // The same: eat all remained unallocated registers,
+ // initialize stack frame.
+
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex;
- if (CCInfo.isFirstByValRegValid())
- firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
- else {
+ unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+ unsigned RBegin, REnd;
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ firstRegToSaveIndex = RBegin - ARM::R0;
+ lastRegToSaveIndex = REnd - ARM::R0;
+ } else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated
(GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ lastRegToSaveIndex = 4;
}
- unsigned VARegSize, VARegSaveSize;
- computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
- if (VARegSaveSize) {
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
- // the result of va_next.
- AFI->setVarArgsRegSaveSize(VARegSaveSize);
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
- ArgOffset + VARegSaveSize
- - VARegSize,
- false));
- SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
- getPointerTy());
+ unsigned ArgRegsSize, ArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize);
+
+ // Store any by-val regs to their spots on the stack so that they may be
+ // loaded by deferencing the result of formal parameter pointer or va_next.
+ // Note: once stack area for byval/varargs registers
+ // was initialized, it can't be initialized again.
+ if (ArgRegsSaveSize) {
+
+ int FrameIndex = MFI->CreateFixedObject(
+ ArgRegsSaveSize,
+ ArgOffset + ArgRegsSaveSize - ArgRegsSize,
+ false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+ for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+ ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
@@ -2661,13 +2728,37 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
+
+ AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
+ return FrameIndex;
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(
- MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
+ return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable);
+}
+
+// Setup stack frame, the va_list pointer will start from.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Try to store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ // If there is no regs to be stored, just point address after last
+ // argument passed via stack.
+ int FrameIndex =
+ StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+ 0, ArgOffset, ForceMutable);
+
+ AFI->setVarArgsFrameIndex(FrameIndex);
}
SDValue
@@ -2696,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
+
+ // Initially ArgRegsSaveSize is zero.
+ // Then we increase this value each time we meet byval parameter.
+ // We also increase this value in case of varargs function.
+ AFI->setArgRegsSaveSize(0);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
@@ -2793,20 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (!AFI->getVarArgsFrameIndex()) {
- VarArgStyleRegisters(CCInfo, DAG,
- dl, Chain, CurOrigArg,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- true /*force mutable frames*/);
- int VAFrameIndex = AFI->getVarArgsFrameIndex();
- InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
- } else {
- int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
- }
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
+ InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ CCInfo.nextInRegsParam();
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
@@ -2824,7 +2916,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// varargs
if (isVarArg)
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset());
return Chain;
@@ -5165,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return false;
}
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+ if (OrigVT.getSizeInBits() >= 64)
+ return OrigVT;
+
+ assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ return MVT::v2i32;
+ case MVT::v4i8:
+ return MVT::v4i16;
+ }
+}
+
/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
/// We insert the required extension here to get the vector to fill a D register.
@@ -5180,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
- MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
- EVT NewVT;
- switch (OrigSimpleTy) {
- default: llvm_unreachable("Unexpected Orig Vector Type");
- case MVT::v2i8:
- case MVT::v2i16:
- NewVT = MVT::v2i32;
- break;
- case MVT::v4i8:
- NewVT = MVT::v4i16;
- break;
- }
+ EVT NewVT = getExtensionTo64Bits(OrigTy);
+
return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
}
@@ -5201,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
/// reach a total size of 64 bits. We have to add the extension separately
/// because ARM does not have a sign/zero extending load for vectors.
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
- SDValue NonExtendingLoad =
- DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+ // The load already has the right type.
+ if (ExtendedTy == LD->getMemoryVT())
+ return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(),
LD->getAlignment());
- unsigned ExtOp = 0;
- switch (LD->getExtensionType()) {
- default: llvm_unreachable("Unexpected LoadExtType");
- case ISD::EXTLOAD:
- case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
- }
- MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
- MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
- return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
- MemType, ExtType, ExtOp);
+
+ // We need to create a zextload/sextload. We cannot just create a load
+ // followed by a zext/zext node because LowerMUL is also run during normal
+ // operation legalization where we can't create illegal types.
+ return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy,
+ LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
@@ -5614,7 +5713,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 9ee17f0..426010e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -464,7 +464,8 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -473,16 +474,23 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const;
+
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned OffsetFromOrigArg,
unsigned ArgOffset,
- bool ForceMutable = false)
- const;
+ bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned &VARegSize, unsigned &VARegSaveSize) const;
+ unsigned InRegsParamRecordIdx,
+ unsigned &ArgRegsSize,
+ unsigned &ArgRegsSaveSize) const;
virtual SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 11550c5..1bd174e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -221,6 +221,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
AssemblerPredicate<"FeatureMP",
"mp-extensions">;
+def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
+ AssemblerPredicate<"FeatureTrustZone",
+ "TrustZone">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">,
@@ -578,6 +581,17 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+/// imm0_4 predicate - Immediate in the range [0,4].
+def Imm0_4AsmOperand : ImmAsmOperand
+{
+ let Name = "Imm0_4";
+ let DiagnosticType = "ImmRange0_4";
+}
+def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
+ let ParserMatchClass = Imm0_4AsmOperand;
+ let DecoderMethod = "DecodeImm0_4";
+}
+
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -741,18 +755,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
// addrmode_imm12 := reg +/- imm12
//
def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-def addrmode_imm12 : Operand<i32>,
+class AddrMode_Imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
// 12-bit immediate operand. Note that instructions using this encode
// #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
// immediate values are as normal.
let EncoderMethod = "getAddrModeImm12OpValue";
- let PrintMethod = "printAddrModeImm12Operand";
let DecoderMethod = "DecodeAddrModeImm12Operand";
let ParserMatchClass = MemImm12OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
+
+def addrmode_imm12 : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<false>";
+}
+
+def addrmode_imm12_pre : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<true>";
+}
+
// ldst_so_reg := reg +/- reg shop imm
//
def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
@@ -852,14 +874,23 @@ def am2offset_imm : Operand<i32>,
//
// FIXME: split into imm vs. reg versions.
def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-def addrmode3 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+class AddrMode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
let EncoderMethod = "getAddrMode3OpValue";
- let PrintMethod = "printAddrMode3Operand";
let ParserMatchClass = AddrMode3AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
+def addrmode3 : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<false>";
+}
+
+def addrmode3_pre : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<true>";
+}
+
// FIXME: split into imm vs. reg versions.
// FIXME: parser method to handle +/- register.
def AM3OffsetAsmOperand : AsmOperandClass {
@@ -885,15 +916,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
// addrmode5 := reg +/- imm8*4
//
def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-def addrmode5 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode5", []> {
- let PrintMethod = "printAddrMode5Operand";
+class AddrMode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
let EncoderMethod = "getAddrMode5OpValue";
let DecoderMethod = "DecodeAddrMode5Operand";
let ParserMatchClass = AddrMode5AsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm);
}
+def addrmode5 : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<false>";
+}
+
+def addrmode5_pre : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<true>";
+}
+
// addrmode6 := reg with optional alignment
//
def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
@@ -1668,11 +1706,11 @@ def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
NoItinerary, []>;
}
-def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
"hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
- bits<8> imm;
- let Inst{27-8} = 0b00110010000011110000;
- let Inst{7-0} = imm;
+ bits<3> imm;
+ let Inst{27-3} = 0b0011001000001111000000000;
+ let Inst{2-0} = imm;
}
def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -2077,7 +2115,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
// Secure Monitor Call is a system instruction.
def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
- []> {
+ []>, Requires<[IsARM, HasTrustZone]> {
bits<4> opt;
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
@@ -2238,7 +2276,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
multiclass AI2_ldridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
+ (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
@@ -2275,6 +2313,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
let Inst{23} = offset{12};
let Inst{19-16} = addr;
let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
@@ -2307,7 +2346,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode3:$addr), IndexModePre,
+ (ins addrmode3_pre:$addr), IndexModePre,
LdMiscFrm, itin,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<14> addr;
@@ -2341,7 +2380,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
let hasExtraDefRegAllocReq = 1 in {
def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
- (ins addrmode3:$addr), IndexModePre,
+ (ins addrmode3_pre:$addr), IndexModePre,
LdMiscFrm, IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!",
"$addr.base = $Rn_wb", []> {
@@ -2497,7 +2536,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+ (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
StFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
@@ -2619,7 +2658,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+ (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
StMiscFrm, IIC_iStore_bh_ru,
"strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<14> addr;
@@ -2651,7 +2690,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
IndexModePre, StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$Rt, $Rt2, $addr!",
"$addr.base = $Rn_wb", []> {
@@ -4426,7 +4465,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
let Inst{7-0} = addr{7-0};
let DecoderMethod = "DecodeCopMemInstruction";
}
- def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
asm, "\t$cop, $CRd, $addr!", IndexModePre> {
bits<13> addr;
bits<4> cop;
@@ -4497,7 +4536,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
let Inst{7-0} = addr{7-0};
let DecoderMethod = "DecodeCopMemInstruction";
}
- def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
asm, "\t$cop, $CRd, $addr!", IndexModePre> {
bits<13> addr;
bits<4> cop;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 0411ac4..896fd0f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4316,6 +4316,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+
// Vector Bitwise Operations.
def vnotd : PatFrag<(ops node:$in),
@@ -4889,6 +4907,29 @@ def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v4f32, v4f32, fabs>;
+def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
+ (v2i32 (bitconvert (v8i8 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 7))))))),
+ (VABSv8i8 DPR:$src)>;
+def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
+ (v2i32 (bitconvert (v4i16 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 15))))))),
+ (VABSv4i16 DPR:$src)>;
+def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
+ (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
+ (VABSv2i32 DPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
+ (v4i32 (bitconvert (v16i8 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 7))))))),
+ (VABSv16i8 QPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
+ (v4i32 (bitconvert (v8i16 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 15))))))),
+ (VABSv8i16 QPR:$src)>;
+def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
+ (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
+ (VABSv4i32 QPR:$src)>;
+
def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c9d709e..4dacb86 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
def t2addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
- let PrintMethod = "printAddrModeImm12Operand";
+ let PrintMethod = "printAddrModeImm12Operand<false>";
let EncoderMethod = "getAddrModeImm12OpValue";
let DecoderMethod = "DecodeT2AddrModeImm12";
let ParserMatchClass = t2addrmode_imm12_asmoperand;
@@ -3401,12 +3401,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
bits<5> mode;
bit M;
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
- let Inst{25-20} = 0b111010;
- let Inst{19-16} = 0b1111;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
+ let Inst{31-11} = 0b111100111010111110000;
let Inst{10-9} = imod;
let Inst{8} = M;
let Inst{7-5} = iflags;
@@ -3425,13 +3420,13 @@ let imod = 0, iflags = 0, M = 1 in
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
- bits<8> imm;
- let Inst{31-8} = 0b111100111010111110000000;
- let Inst{7-0} = imm;
+def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
+ bits<3> imm;
+ let Inst{31-3} = 0b11110011101011111000000000000;
+ let Inst{2-0} = imm;
}
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
@@ -3449,7 +3444,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []>, Requires<[IsThumb2, HasTrustZone]> {
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
let Inst{15-12} = 0b1000;
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b7ac5d5..c8ed576 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -87,53 +87,6 @@ namespace {
MachineBasicBlock::iterator i)
: Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
- class UnitRegsMap {
- public:
- UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
- const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
- DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
- Cache.find(Reg);
- if (found != Cache.end())
- return found->second;
- else
- return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
- .first->second;
- }
- private:
- SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
- SmallVector<unsigned, 4> Res;
-
- const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
- if (TRC == &ARM::QPRRegClass) {
- if (Reg > ARM::Q7) {
- Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
- Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
- return Res;
- }
-
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
-
- return Res;
- }
-
- if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
-
- return Res;
- }
-
- Res.push_back(Reg);
-
- return Res;
-
- }
- const TargetRegisterInfo* TRI;
- DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
- };
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
@@ -175,11 +128,6 @@ namespace {
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I);
- unsigned AddMemOp(MemOpQueue& MemOps,
- const MemOpQueueEntry newEntry,
- UnitRegsMap& UnitRegsInfo,
- SmallSet<unsigned, 4>& UsedUnitRegs,
- unsigned At = -1U);
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
@@ -1265,103 +1213,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
return false;
}
-/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
-/// It adds store mem ops with simple push_back/insert method,
-/// without any additional logic.
-/// For load operation it does the next:
-/// 1. Adds new load operation into MemOp collection at "At" position.
-/// 2. Removes any "load" operations from MemOps, that changes "Reg" register
-/// contents, prior to "At".
-/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
-/// UsedUnitRegs - set of unit-registers currently in use.
-/// At - position at which it would added, and prior which the clean-up
-/// should be made (for load operation).
-/// FIXME: The clean-up also should be made for store operations,
-/// but the memory address should be analyzed instead of unit registers.
-unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
- const MemOpQueueEntry NewEntry,
- UnitRegsMap& UnitRegsInfo,
- SmallSet<unsigned, 4>& UsedUnitRegs,
- unsigned At) {
- unsigned Cleaned = 0;
-
- if (At == -1U) {
- At = MemOps.size();
- MemOps.push_back(NewEntry);
- } else
- MemOps.insert(&MemOps[At], NewEntry);
-
- // FIXME:
- // If operation is not load, leave it as is by now,
- // So 0 overridden ops would cleaned in this case.
- if (!NewEntry.MBBI->mayLoad())
- return 0;
-
- const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
-
- bool FoundOverriddenLoads = false;
-
- for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
- if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
- FoundOverriddenLoads = true;
- break;
- }
-
- // If we detect that this register is used by load operations that are
- // predecessors for the new one, remove them from MemOps then.
- if (FoundOverriddenLoads) {
- MemOpQueue UpdatedMemOps;
-
- // Scan through MemOps entries.
- for (unsigned i = 0; i != At; ++i) {
- MemOpQueueEntry& MemOpEntry = MemOps[i];
-
- // FIXME: Skip non-load operations by now.
- if (!MemOpEntry.MBBI->mayLoad())
- continue;
-
- const SmallVector<unsigned, 4>& MemOpUnitRegs =
- UnitRegsInfo[MemOpEntry.Reg];
-
- // Lookup entry that loads contents into register used by new entry.
- bool ReleaseThisEntry = false;
- for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
- if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
- MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
- ReleaseThisEntry = true;
- ++Cleaned;
- break;
- }
- }
-
- if (ReleaseThisEntry) {
- const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
- for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
- UsedUnitRegs.erase(RelesedRegs[r]);
- } else
- UpdatedMemOps.push_back(MemOpEntry);
- }
-
- // Keep anything without changes after At position.
- for (unsigned i = At, e = MemOps.size(); i != e; ++i)
- UpdatedMemOps.push_back(MemOps[i]);
-
- MemOps.swap(UpdatedMemOps);
- }
-
- UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
-
- return Cleaned;
-}
-
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
/// ops of the same base and incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned NumMerges = 0;
unsigned NumMemOps = 0;
MemOpQueue MemOps;
- UnitRegsMap UnitRegsInfo(TRI);
- SmallSet<unsigned, 4> UsedRegUnits;
unsigned CurrBase = 0;
int CurrOpc = -1;
unsigned CurrSize = 0;
@@ -1401,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// merge the ldr's so far, including this one. But don't try to
// combine the following ldr(s).
Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ //
+ // The optimization may reorder the second ldr in front of the first
+ // ldr, which violates write after write(WAW) dependence. The same as
+ // str. Try to merge inst(s) already in MemOps.
+ bool Overlap = false;
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+ if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+
if (CurrBase == 0 && !Clobber) {
// Start of a new chain.
CurrBase = Base;
@@ -1408,13 +1281,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
-
MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
++NumMemOps;
- const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
- UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
Advance = true;
- } else {
+ } else if (!Overlap) {
if (Clobber) {
TryMerge = true;
Advance = true;
@@ -1424,24 +1294,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// No need to match PredReg.
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
- unsigned OverridesCleaned =
- AddMemOp(MemOps,
- MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
- UnitRegsInfo, UsedRegUnits) != 0;
- NumMemOps += 1 - OverridesCleaned;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
- for (unsigned I = 0; I != NumMemOps; ++I) {
- if (Offset < MemOps[I].Offset) {
- MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
- unsigned OverridesCleaned =
- AddMemOp(MemOps, entry, UnitRegsInfo,
- UsedRegUnits, I) != 0;
- NumMemOps += 1 - OverridesCleaned;
-
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
break;
- } else if (Offset == MemOps[I].Offset) {
+ } else if (Offset == I->Offset) {
// Collision! This can't be merged!
break;
}
@@ -1512,7 +1378,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrPredReg = 0;
if (NumMemOps) {
MemOps.clear();
- UsedRegUnits.clear();
NumMemOps = 0;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 88d96c0..f4248fc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -38,7 +38,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
- unsigned VarArgsRegSaveSize;
+ unsigned ArgRegsSaveSize;
/// HasStackFrame - True if this function has a stack frame. Set by
/// processFunctionBeforeCalleeSavedScan().
@@ -117,7 +117,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -129,7 +129,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -141,8 +141,8 @@ public:
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
- unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
- void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+ unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+ void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 739300e..8653c46 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -91,6 +91,7 @@ void ARMSubtarget::initializeEnvironment() {
HasRAS = false;
HasMPExtension = false;
FPOnlySP = false;
+ HasTrustZone = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 5b5ee6a..038eb76 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -148,6 +148,9 @@ protected:
/// precision.
bool FPOnlySP;
+ /// HasTrustZone - if true, processor supports TrustZone security extensions
+ bool HasTrustZone;
+
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -251,6 +254,7 @@ public:
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasTrustZone() const { return HasTrustZone; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1019b97..53ece66 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -125,6 +125,10 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
unsigned getAddressComputationCost(Type *Val) const;
+
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info = OK_AnyValue,
+ OperandValueKind Op2Info = OK_AnyValue) const;
/// @}
};
@@ -223,9 +227,9 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
- // Operations that we legalize using load/stores to the stack.
- { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
- { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+ // Operations that we legalize using splitting.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
@@ -456,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
return LT.first * NEONShuffleTbl[Idx].Cost;
}
+
+unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+
+ int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ const unsigned FunctionCallDivCost = 20;
+ const unsigned ReciprocalDivCost = 10;
+ static const CostTblEntry<MVT> CostTbl[] = {
+ // Division.
+ // These costs are somewhat random. Choose a cost of 20 to indicate that
+ // vectorizing devision (added function call) is going to be very expensive.
+ // Double registers types.
+ { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ // Quad register types.
+ { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ // Multiplication.
+ };
+
+ int Idx = -1;
+
+ if (ST->hasNEON())
+ Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
+ LT.second);
+
+ if (Idx != -1)
+ return LT.first * CostTbl[Idx].Cost;
+
+
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ed7b7ec..1dd2953 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -86,11 +86,11 @@ class ARMAsmParser : public MCTargetAsmParser {
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Warning(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
return Parser.Warning(L, Msg, Ranges);
}
bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
return Parser.Error(L, Msg, Ranges);
}
@@ -610,6 +610,13 @@ public:
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
+ bool isImm0_4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 5;
+ }
bool isImm0_1020s4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -4745,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vaclt" || Mnemonic == "vacle" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -5014,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) {
static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
-
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+ unsigned VariantID);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc,
@@ -5026,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// MatchInstructionImpl(), but that's too late for aliases that include
// any sort of suffix.
unsigned AvailableFeatures = getAvailableFeatures();
- applyMnemonicAliases(Name, AvailableFeatures);
+ unsigned AssemblerDialect = getParser().getAssemblerDialect();
+ applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
// First check for the ARM-specific .req directive.
if (Parser.getTok().is(AsmToken::Identifier) &&
@@ -7613,6 +7622,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_ImmRange0_4: {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+ }
case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 2e009e5..ac937f3 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -1951,10 +1953,12 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::CreateImm(mode));
if (iflags) S = MCDisassembler::SoftFail;
} else {
- // imod == '00' && M == '0' --> UNPREDICTABLE
- Inst.setOpcode(ARM::t2CPS1p);
- Inst.addOperand(MCOperand::CreateImm(mode));
- S = MCDisassembler::SoftFail;
+ // imod == '00' && M == '0' --> this is a HINT instruction
+ int imm = fieldFromInstruction(Insn, 0, 8);
+ // HINT are defined only for immediate in [0..4]
+ if(imm > 4) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::t2HINT);
+ Inst.addOperand(MCOperand::CreateImm(imm));
}
return S;
@@ -1996,9 +2000,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
if (Inst.getOpcode() == ARM::MOVTi16)
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
@@ -3570,7 +3575,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned pred = fieldFromInstruction(Insn, 28, 4);
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -4496,6 +4501,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder)
+{
+ unsigned Imm = fieldFromInstruction(Insn, 0, 3);
+ if (Imm > 4) return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2afb20d..3bcd083 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
}
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+ raw_ostream &O,
+ bool AlwaysPrintImm0) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
@@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
- if (ImmOffs || (op == ARM_AM::sub)) {
+ if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
O << ", "
<< markup("<imm:")
<< "#"
@@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
O << ']' << markup(">");
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
@@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
printAM3PostIndexOp(MI, Op, O);
return;
}
- printAM3PreOrOffsetIndexOp(MI, Op, O);
+ printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
}
void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
@@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
O << ARM_AM::getAMSubModeStr(Mode);
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
- if (ImmOffs || Op == ARM_AM::sub) {
+ if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
O << ", "
<< markup("<imm:")
<< "#"
@@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
OffImm = 0;
if (isSub) {
O << ", "
- << markup("<imm:")
+ << markup("<imm:")
<< "#-" << -OffImm
<< markup(">");
}
- else if (OffImm > 0) {
+ else if (AlwaysPrintImm0 || OffImm > 0) {
O << ", "
- << markup("<imm:")
+ << markup("<imm:")
<< "#" << OffImm
<< markup(">");
}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index edff75d..344104e 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -47,12 +47,13 @@ public:
raw_ostream &O);
void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
-
+ template <bool AlwaysPrintImm0>
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
- void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+ bool AlwaysPrintImm0);
void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -60,6 +61,7 @@ public:
raw_ostream &O);
void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -91,6 +93,7 @@ public:
raw_ostream &O);
void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template<bool AlwaysPrintImm0>
void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 418971d..6c3d247 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,7 +13,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMRegisterInfo.h"
#include "ARMUnwindOp.h"
+#include "ARMUnwindOpAsm.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -26,6 +28,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -33,11 +36,15 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
+ assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index");
+ return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
+}
+
namespace {
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -57,8 +64,9 @@ public:
ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter, bool IsThumb)
: MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
- IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
- FnStart(0), Personality(0), CantUnwind(false) {}
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+ Reset();
+ }
~ARMELFStreamer() {}
@@ -75,14 +83,15 @@ public:
virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector);
- virtual void ChangeSection(const MCSection *Section) {
+ virtual void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastMappingSymbols[getPreviousSection().first] = LastEMS;
LastEMS = LastMappingSymbols.lookup(Section);
- MCELFStreamer::ChangeSection(Section);
+ MCELFStreamer::ChangeSection(Section, Subsection);
}
/// This function is the one used to emit instruction data into the ELF
@@ -175,7 +184,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection());
+ Symbol->setSection(*getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -194,6 +203,7 @@ private:
void Reset();
void EmitPersonalityFixup(StringRef Name);
+ void CollectUnwindOpcodes();
void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
SectionKind Kind, const MCSymbol &Fn);
@@ -210,9 +220,16 @@ private:
MCSymbol *ExTab;
MCSymbol *FnStart;
const MCSymbol *Personality;
+ uint32_t VFPRegSave; // Register mask for {d31-d0}
+ uint32_t RegSave; // Register mask for {r15-r0}
+ int64_t SPOffset;
+ uint16_t FPReg;
+ int64_t FPOffset;
+ bool UsedFP;
bool CantUnwind;
+ UnwindOpcodeAssembler UnwindOpAsm;
};
-}
+} // end anonymous namespace
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
@@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
} else {
EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
}
- assert(EHSection);
+ assert(EHSection && "Failed to get the required EH section");
// Switch to .ARM.extab or .ARM.exidx section
SwitchSection(EHSection);
@@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
}
void ARMELFStreamer::Reset() {
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
ExTab = NULL;
FnStart = NULL;
Personality = NULL;
+ VFPRegSave = 0;
+ RegSave = 0;
+ FPReg = MRI.getEncodingValue(ARM::SP);
+ FPOffset = 0;
+ SPOffset = 0;
+ UsedFP = false;
CantUnwind = false;
+
+ UnwindOpAsm.Reset();
}
// Add the R_ARM_NONE fixup at the same position
@@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
MCFixup::getKindForSize(4, false)));
}
+void ARMELFStreamer::CollectUnwindOpcodes() {
+ if (UsedFP) {
+ UnwindOpAsm.EmitSetFP(FPReg);
+ UnwindOpAsm.EmitSPOffset(-FPOffset);
+ } else {
+ UnwindOpAsm.EmitSPOffset(SPOffset);
+ }
+ UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
+ UnwindOpAsm.EmitRegSave(RegSave);
+ UnwindOpAsm.Finalize();
+}
+
void ARMELFStreamer::EmitFnStart() {
assert(FnStart == 0);
FnStart = getContext().CreateTempSymbol();
@@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() {
assert(FnStart && ".fnstart must preceeds .fnend");
// Emit unwind opcodes if there is no .handlerdata directive
- int PersonalityIndex = -1;
if (!ExTab && !CantUnwind) {
- // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
- SwitchToExTabSection(*FnStart);
-
- // Create .ARM.extab label for offset in .ARM.exidx
- ExTab = getContext().CreateTempSymbol();
- EmitLabel(ExTab);
-
- PersonalityIndex = 1;
-
- uint32_t Entry = 0;
- uint32_t NumExtraEntryWords = 0;
- Entry |= NumExtraEntryWords << 24;
- Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
-
- // TODO: This should be generated according to .save, .vsave, .setfp
- // directives. Currently, we are simply generating FINISH opcode.
- Entry |= UNWIND_OPCODE_FINISH << 8;
- Entry |= UNWIND_OPCODE_FINISH;
-
- EmitIntValue(Entry, 4, 0);
+ CollectUnwindOpcodes();
+
+ unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+ if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
+ PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
+ // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
+ // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
+ // then emit a reference to these unwind opcodes in the second word of
+ // the exception index table entry.
+ SwitchToExTabSection(*FnStart);
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+ EmitBytes(UnwindOpAsm.data(), 0);
+ }
}
// Emit the exception index table entry
SwitchToExIdxSection(*FnStart);
- if (PersonalityIndex == 1)
- EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+ unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+ if (PersonalityIndex < NUM_PERSONALITY_INDEX)
+ EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
const MCSymbolRefExpr *FnStartRef =
MCSymbolRefExpr::Create(FnStart,
@@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() {
if (CantUnwind) {
EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
- } else {
+ } else if (ExTab) {
+ // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
const MCSymbolRefExpr *ExTabEntryRef =
MCSymbolRefExpr::Create(ExTab,
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
EmitValue(ExTabEntryRef, 4, 0);
+ } else {
+ // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
+ // the second word of exception index table entry. The size of the unwind
+ // opcodes should always be 4 bytes.
+ assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
+ "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+ assert(UnwindOpAsm.size() == 4u &&
+ "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
+ EmitBytes(UnwindOpAsm.data(), 0);
}
// Clean exception handling frame information
@@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() {
EmitValue(PersonalityRef, 4, 0);
// Emit unwind opcodes
- uint32_t Entry = 0;
- uint32_t NumExtraEntryWords = 0;
-
- // TODO: This should be generated according to .save, .vsave, .setfp
- // directives. Currently, we are simply generating FINISH opcode.
- Entry |= NumExtraEntryWords << 24;
- Entry |= UNWIND_OPCODE_FINISH << 16;
- Entry |= UNWIND_OPCODE_FINISH << 8;
- Entry |= UNWIND_OPCODE_FINISH;
-
- EmitIntValue(Entry, 4, 0);
+ CollectUnwindOpcodes();
+ EmitBytes(UnwindOpAsm.data(), 0);
}
void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
Personality = Per;
+ UnwindOpAsm.setPersonality(Per);
}
-void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
- unsigned NewSpReg,
+void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
+ unsigned NewSPReg,
int64_t Offset) {
- // TODO: Not implemented
+ assert(SPOffset == 0 &&
+ "Current implementation assumes .setfp precedes .pad");
+
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+ uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
+#ifndef NDEBUG
+ uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
+#endif
+
+ assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+ "the operand of .setfp directive should be either $sp or $fp");
+
+ UsedFP = true;
+ FPReg = NewFPRegEncVal;
+ FPOffset = Offset;
}
void ARMELFStreamer::EmitPad(int64_t Offset) {
- // TODO: Not implemented
+ SPOffset += Offset;
}
void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool IsVector) {
- // TODO: Not implemented
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+#ifndef NDEBUG
+ unsigned Max = IsVector ? 32 : 16;
+#endif
+ uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
+
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI.getEncodingValue(RegList[i]);
+ assert(Reg < Max && "Register encoded value out of range");
+ RegMask |= 1u << Reg;
+ }
}
namespace llvm {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
index dad5576..fa4add6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -107,6 +107,19 @@ namespace llvm {
UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
};
+ /// ARM-defined Personality Routine Index
+ enum ARMPersonalityRoutineIndex {
+ // To make the exception handling table become more compact, ARM defined
+ // several personality routines in EHABI. There are 3 different
+ // personality routines in ARM EHABI currently. It is possible to have 16
+ // pre-defined personality routines at most.
+ AEABI_UNWIND_CPP_PR0 = 0,
+ AEABI_UNWIND_CPP_PR1 = 1,
+ AEABI_UNWIND_CPP_PR2 = 2,
+
+ NUM_PERSONALITY_INDEX
+ };
+
}
#endif // ARM_UNWIND_OP_H
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
new file mode 100644
index 0000000..191db69
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -0,0 +1,198 @@
+//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOpAsm.h"
+
+#include "ARMUnwindOp.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
+ if (RegSave == 0u)
+ return;
+
+ // One byte opcode to save register r14 and r11-r4
+ if (RegSave & (1u << 4)) {
+ // The one byte opcode will always save r4, thus we can't use the one byte
+ // opcode when r4 is not in .save directive.
+
+ // Compute the consecutive registers from r4 to r11.
+ uint32_t Range = 0;
+ uint32_t Mask = (1u << 4);
+ for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
+ if ((RegSave & Bit) == 0u)
+ break;
+ ++Range;
+ Mask |= Bit;
+ }
+
+ // Emit this opcode when the mask covers every registers.
+ uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
+ if (UnmaskedReg == 0u) {
+ // Pop r[4 : (4 + n)]
+ Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+ RegSave &= 0x000fu;
+ } else if (UnmaskedReg == (1u << 14)) {
+ // Pop r[14] + r[4 : (4 + n)]
+ Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+ RegSave &= 0x000fu;
+ }
+ }
+
+ // Two bytes opcode to save register r15-r4
+ if ((RegSave & 0xfff0u) != 0) {
+ uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+
+ // Opcode to save register r3-r0
+ if ((RegSave & 0x000fu) != 0) {
+ uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+}
+
+/// Emit unwind opcodes for .vsave directives
+void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
+ size_t i = 32;
+
+ while (i > 16) {
+ uint32_t Bit = 1u << (i - 1);
+ if ((VFPRegSave & Bit) == 0u) {
+ --i;
+ continue;
+ }
+
+ uint32_t Range = 0;
+
+ --i;
+ Bit >>= 1;
+
+ while (i > 16 && (VFPRegSave & Bit)) {
+ --i;
+ ++Range;
+ Bit >>= 1;
+ }
+
+ uint32_t Op =
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+
+ while (i > 0) {
+ uint32_t Bit = 1u << (i - 1);
+ if ((VFPRegSave & Bit) == 0u) {
+ --i;
+ continue;
+ }
+
+ uint32_t Range = 0;
+
+ --i;
+ Bit >>= 1;
+
+ while (i > 0 && (VFPRegSave & Bit)) {
+ --i;
+ ++Range;
+ Bit >>= 1;
+ }
+
+ uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+}
+
+/// Emit unwind opcodes for .setfp directives
+void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
+ Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+}
+
+/// Emit unwind opcodes to update stack pointer
+void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
+ if (Offset > 0x200) {
+ uint8_t Buff[10];
+ size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
+ Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
+ Ops.append(Buff, Buff + Size);
+ } else if (Offset > 0) {
+ if (Offset > 0x100) {
+ Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+ Offset -= 0x100;
+ }
+ Ops.push_back(UNWIND_OPCODE_INC_VSP |
+ static_cast<uint8_t>((Offset - 4) >> 2));
+ } else if (Offset < 0) {
+ while (Offset < -0x100) {
+ Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+ Offset += 0x100;
+ }
+ Ops.push_back(UNWIND_OPCODE_DEC_VSP |
+ static_cast<uint8_t>(((-Offset) - 4) >> 2));
+ }
+}
+
+void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
+ size_t SizeInWords = (size() + 3) / 4;
+ assert(SizeInWords <= 0x100u &&
+ "Only 256 additional words are allowed for unwind opcodes");
+ Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
+}
+
+void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
+ assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+ Ops[Pos] = EHT_COMPACT | PI;
+}
+
+void UnwindOpcodeAssembler::EmitFinishOpcodes() {
+ for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
+ Ops.push_back(UNWIND_OPCODE_FINISH);
+}
+
+void UnwindOpcodeAssembler::Finalize() {
+ if (HasPersonality) {
+ // Personality specified by .personality directive
+ Offset = 1;
+ AddOpcodeSizePrefix(1);
+ } else {
+ if (getOpcodeSize() <= 3) {
+ // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
+ Offset = 1;
+ PersonalityIndex = AEABI_UNWIND_CPP_PR0;
+ AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ } else {
+ // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
+ Offset = 0;
+ PersonalityIndex = AEABI_UNWIND_CPP_PR1;
+ AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ AddOpcodeSizePrefix(1);
+ }
+ }
+
+ // Emit the padding finish opcodes if the size() is not multiple of 4.
+ EmitFinishOpcodes();
+
+ // Swap the byte order
+ uint8_t *Ptr = Ops.begin() + Offset;
+ assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
+ for (size_t i = 0, n = size(); i < n; i += 4) {
+ std::swap(Ptr[i], Ptr[i + 3]);
+ std::swap(Ptr[i + 1], Ptr[i + 2]);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
new file mode 100644
index 0000000..f6ecaeb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -0,0 +1,114 @@
+//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_ASM_H
+#define ARM_UNWIND_OP_ASM_H
+
+#include "ARMUnwindOp.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class UnwindOpcodeAssembler {
+private:
+ llvm::SmallVector<uint8_t, 8> Ops;
+
+ unsigned Offset;
+ unsigned PersonalityIndex;
+ bool HasPersonality;
+
+ enum {
+ // The number of bytes to be preserved for the size and personality index
+ // prefix of unwind opcodes.
+ NUM_PRESERVED_PREFIX_BUF = 2
+ };
+
+public:
+ UnwindOpcodeAssembler()
+ : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
+ PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+ }
+
+ /// Reset the unwind opcode assembler.
+ void Reset() {
+ Ops.resize(NUM_PRESERVED_PREFIX_BUF);
+ Offset = NUM_PRESERVED_PREFIX_BUF;
+ PersonalityIndex = NUM_PERSONALITY_INDEX;
+ HasPersonality = 0;
+ }
+
+ /// Get the size of the payload (including the size byte)
+ size_t size() const {
+ return Ops.size() - Offset;
+ }
+
+ /// Get the beginning of the payload
+ const uint8_t *begin() const {
+ return Ops.begin() + Offset;
+ }
+
+ /// Get the payload
+ StringRef data() const {
+ return StringRef(reinterpret_cast<const char *>(begin()), size());
+ }
+
+ /// Set the personality index
+ void setPersonality(const MCSymbol *Per) {
+ HasPersonality = 1;
+ }
+
+ /// Get the personality index
+ unsigned getPersonalityIndex() const {
+ return PersonalityIndex;
+ }
+
+ /// Emit unwind opcodes for .save directives
+ void EmitRegSave(uint32_t RegSave);
+
+ /// Emit unwind opcodes for .vsave directives
+ void EmitVFPRegSave(uint32_t VFPRegSave);
+
+ /// Emit unwind opcodes for .setfp directives
+ void EmitSetFP(uint16_t FPReg);
+
+ /// Emit unwind opcodes to update stack pointer
+ void EmitSPOffset(int64_t Offset);
+
+ /// Finalize the unwind opcode sequence for EmitBytes()
+ void Finalize();
+
+private:
+ /// Get the size of the opcodes in bytes.
+ size_t getOpcodeSize() const {
+ return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+ }
+
+ /// Add the length prefix to the payload
+ void AddOpcodeSizePrefix(size_t Pos);
+
+ /// Add personality index prefix in some compact format
+ void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+
+ /// Fill the words with finish opcode if it is not aligned
+ void EmitFinishOpcodes();
+};
+
+} // namespace llvm
+
+#endif // ARM_UNWIND_OP_ASM_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 2c3388c..1e2a8b0 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,7 +88,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -104,8 +104,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
- if (VARegSaveSize)
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+ if (ArgRegsSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
@@ -249,7 +249,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -300,7 +300,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- if (VARegSaveSize) {
+ if (ArgRegsSaveSize) {
// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
// to LR, and we can't pop the value directly to the PC since
// we need to update the SP after popping the value. Therefore, we
@@ -313,7 +313,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(ARM::R3, RegState::Define);
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
@@ -376,7 +376,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 67e8ec7..a1b48c2 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
@@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ return;
+ }
+
ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
}
@@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ return;
+ }
+
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
@@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = -Offset;
isSub = true;
}
+ } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+ NumBits = 8;
+ // MCInst operand has already scaled value.
+ Scale = 1;
+ if (Offset < 0) {
+ isSub = true;
+ Offset = -Offset;
+ }
} else {
llvm_unreachable("Unsupported addressing mode!");
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d50f5d9..4795aae 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -926,13 +926,11 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
HighLatencyCPSR = false;
// Check predecessors for the latest CPSRDef.
- bool HasBackEdges = false;
for (MachineBasicBlock::pred_iterator
I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
if (!PInfo.Visited) {
// Since blocks are visited in RPO, this must be a back-edge.
- HasBackEdges = true;
continue;
}
if (PInfo.HighLatencyCPSR) {
OpenPOWER on IntegriCloud