summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2011-02-27 01:32:10 +0000
committerdim <dim@FreeBSD.org>2011-02-27 01:32:10 +0000
commitb951d621be1d00a520871c689c1cd687b6aa3ae6 (patch)
tree5c342f2374324ffec4626f558d9aa49f323f90b4 /contrib/llvm/lib/Target/ARM
parent4004d6a3076e94bd23e681411c43682267a202fe (diff)
parenta0fb00f9837bd0d2e5948f16f6a6b82a7a628f51 (diff)
downloadFreeBSD-src-b951d621be1d00a520871c689c1cd687b6aa3ae6.zip
FreeBSD-src-b951d621be1d00a520871c689c1cd687b6aa3ae6.tar.gz
Update llvm/clang to trunk r126547.
There are several bugfixes in this update, but the most important one is to ensure __start_ and __stop_ symbols for linker sets and kernel module metadata are always emitted in object files: http://llvm.org/bugs/show_bug.cgi?id=9292 Before this fix, if you compiled kernel modules with clang, they would not be properly processed by kldxref, and if they had any dependencies, the kernel would fail to load those. Another problem occurred when attempting to mount a tmpfs filesystem, which would result in 'operation not supported by device'.
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp38
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp102
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td162
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp20
-rw-r--r--contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp9
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp6
14 files changed, 263 insertions, 171 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 1fb8872..7e2183d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -155,10 +155,11 @@ namespace ARMII {
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 18,
- DomainMask = 3 << DomainShift,
+ DomainMask = 7 << DomainShift,
DomainGeneral = 0 << DomainShift,
DomainVFP = 1 << DomainShift,
DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
//===------------------------------------------------------------------===//
// Field shifts - such shifts are used to set field while generating
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 9f29530..26f48b3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -172,6 +172,7 @@ class ARMFastISel : public FastISel {
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(const GlobalValue *GV);
// Call handling routines.
private:
@@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return true;
}
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+ // Depend our opcode for thumb on whether or not we're targeting an
+ // externally callable function. For libcalls we'll just pass a NULL GV
+ // in here.
+ bool isExternal = false;
+ if (!GV || GV->hasExternalLinkage()) isExternal = true;
+
+ // Darwin needs the r9 versions of the opcodes.
+ bool isDarwin = Subtarget->isTargetDarwin();
+ if (isThumb && isExternal) {
+ return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi;
+ } else if (isThumb) {
+ return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ } else {
+ return isDarwin ? ARM::BLr9 : ARM::BL;
+ }
+}
+
// A quick function that will emit a call for a named libcall in F with the
// vector of passed arguments for the Instruction in I. We can assume that we
// can emit a call for any libcall we can produce. This is an abridged version
@@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
- if(isThumb) {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ unsigned CallOpc = ARMSelectCallOp(NULL);
+ if(isThumb)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
.addExternalSymbol(TLI.getLibcallName(Call));
- } else {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ else
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addExternalSymbol(TLI.getLibcallName(Call)));
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
// Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
+ unsigned CallOpc = ARMSelectCallOp(GV);
// Explicitly adding the predicate here.
- if(isThumb) {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ if(isThumb)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
.addGlobalAddress(GV, 0, 0);
- } else {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ else
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addGlobalAddress(GV, 0, 0));
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index f42c6db..68c33f0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
// Move past area 3.
- if (DPRCSSize > 0) MBBI++;
+ if (DPRCSSize > 0) {
+ MBBI++;
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ MBBI++;
+ }
NumBytes = DPRCSOffset;
if (NumBytes) {
@@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
- if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+ if (AFI->getDPRCalleeSavedAreaSize()) {
+ MBBI++;
+ // Since vpop register list cannot have gaps, there may be multiple vpop
+ // instructions in the epilogue.
+ while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ MBBI++;
+ }
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index 676b01e..e97ce50 100644
--- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const TargetInstrDesc &TID = MI->getDesc();
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
- if (Domain == ARMII::DomainVFP) {
- unsigned Opcode = MI->getOpcode();
- if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
- Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
- return false;
- } else if (Domain == ARMII::DomainNEON) {
- if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
- return false;
- } else
+ if (TID.mayStore())
return false;
- return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ return false;
}
ScheduleHazardRecognizer::HazardType
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a506cff..f0d5a7d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -126,6 +126,7 @@ public:
bool SelectAddrMode5(SDValue N, SDValue &Base,
SDValue &Offset);
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+ bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
@@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
return true;
}
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+ SDValue &Offset) {
+ LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+ ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+ if (AM != ISD::POST_INC)
+ return false;
+ Offset = N;
+ if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+ if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ }
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
SDValue &Offset, SDValue &Label) {
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1835ec0..ab9f9e1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
- Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
@@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
@@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
else
RC = ARM::GPRRegisterClass;
- unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
- bool F2IisFast = Subtarget->isCortexA9() ||
- Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ }
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (SrcVT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
@@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
&Tmp1, 1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
- // If float to int conversion isn't going to be super expensive, then simply
- // or in the signbit.
- if (F2IisFast) {
- SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
- SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
- Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
- if (VT == MVT::f32) {
- Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
- }
-
- // f64: Or the high part with signbit and then combine two parts.
- Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp0, 1);
- SDValue Lo = Tmp0.getValue(0);
- SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
- Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
}
- // Remove the signbit of operand 0.
- Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-
- // If operand 1 signbit is one, then negate operand 0.
- SDValue ARMcc;
- SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
- ISD::SETLT, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 765cba4..359ac45 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>;
def IndexModeUpd : IndexMode<3>;
// Instruction execution domain.
-class Domain<bits<2> val> {
- bits<2> Value = val;
+class Domain<bits<3> val> {
+ bits<3> Value = val;
}
def GenericDomain : Domain<0>;
def VFPDomain : Domain<1>; // Instructions in VFP domain only
def NeonDomain : Domain<2>; // Instructions in Neon domain only
def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
//===----------------------------------------------------------------------===//
// ARM special operands.
@@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
let TSFlags{15-10} = Form;
let TSFlags{16} = isUnaryDataProc;
let TSFlags{17} = canXformTo16Bit;
- let TSFlags{19-18} = D.Value;
+ let TSFlags{20-18} = D.Value;
let Constraints = cstr;
let Itinerary = itin;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index c827ce3d..6e3fe2e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>,
let EncoderMethod = "getAddrMode6AddressOpValue";
}
-def am6offset : Operand<i32> {
+def am6offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+ [], [SDNPWantRoot]> {
let PrintMethod = "printAddrMode6OffsetOperand";
let MIOperandInfo = (ops GPR);
let EncoderMethod = "getAddrMode6OffsetOpValue";
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 1e2e550..dc3d63e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-
// ...with address register writeback:
-class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []>;
+ "$Rn.addr = $wb",
+ [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+ addrmode6:$Rn, am6offset:$Rm))]>;
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNWBPseudo<IIC_VST1lnu> {
+ let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr, am6offset:$offset))];
+}
-def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+ NEONvgetlaneu> {
let Inst{7-5} = lane{2-0};
}
-def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+ NEONvgetlaneu> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
-def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+ extractelt> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
-def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index 920c5c9..2990283 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
@@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
@@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
@@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// Match reassociated forms only if not sign dependent rounding.
@@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FIXME: Verify encoding after integrated assembler is working.
@@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // Defs = [FPSCR]
@@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fabs SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
let Defs = [FPSCR] in {
@@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FIXME: Verify encoding after integrated assembler is working.
@@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // Defs = [FPSCR]
@@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fneg SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
@@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
[(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
let Inst{7} = 1; // s32
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
[(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
let Inst{7} = 0; // u32
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FP -> Int:
@@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
[(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
[(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
@@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
@@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
@@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
@@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
@@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
[/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
RegConstraint<"$Sn = $Sd"> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // neverHasSideEffects
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 0bd740c..1465984 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
// Materializable GVs (in JIT lazy compilation mode) do not require an extra
// load from stub.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
if (!isTargetDarwin()) {
// Extra load is needed for all externally visible.
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index f9e86eb..9a27e2f 100644
--- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
}
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
- const TargetInstrDesc &TID = MI->getDesc();
// FIXME: Detect integer instructions properly.
+ const TargetInstrDesc &TID = MI->getDesc();
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
- if (Domain == ARMII::DomainVFP) {
- unsigned Opcode = TID.getOpcode();
- if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
- Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
- return false;
- } else if (Domain == ARMII::DomainNEON) {
- if (TID.mayStore() || TID.mayLoad())
- return false;
- } else {
+ if (TID.mayStore())
return false;
- }
-
- return MI->readsRegister(Reg, TRI);
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(Reg, TRI);
return false;
}
diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
index 97e54bf..965665c 100644
--- a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
+++ b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
@@ -35,6 +35,7 @@ namespace {
private:
const TargetRegisterInfo *TRI;
const ARMBaseInstrInfo *TII;
+ bool isA8;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
@@ -43,6 +44,11 @@ namespace {
char NEONMoveFixPass::ID = 0;
}
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+ return (Domain & ARMII::DomainNEON) ||
+ (isA8 && (Domain & ARMII::DomainNEONA8));
+}
+
bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
RegMap Defs;
bool Modified = false;
@@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
Domain = ARMII::DomainNEON;
}
- if (Domain & ARMII::DomainNEON) {
+ if (inNEONDomain(Domain, isA8)) {
// Convert VMOVD to VMOVDneon
unsigned DestReg = MI->getOperand(0).getReg();
@@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
TRI = TM.getRegisterInfo();
TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2f67257..9b1073b 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
bool
Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
+ while (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == MBB.end())
+ return false;
+ }
+
unsigned PredReg = 0;
return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
}
OpenPOWER on IntegriCloud