summaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-04-02 08:54:30 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-04-02 08:54:30 +0000
commit20e856b2a58d12231aa42d5d13888b15ac03e5a4 (patch)
treecf5763d092b81cecc168fa28032247ee495d06e2 /lib/Target
parent2f2afc1aae898651e26987a5c71f3febb19bca98 (diff)
downloadFreeBSD-src-20e856b2a58d12231aa42d5d13888b15ac03e5a4.zip
FreeBSD-src-20e856b2a58d12231aa42d5d13888b15ac03e5a4.tar.gz
Update LLVM to r100181.
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.td21
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp31
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp96
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td195
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td2
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1169
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp15
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp11
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp4
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp18
-rw-r--r--lib/Target/ARM/README.txt3
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp4
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp15
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td27
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsics.td6
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp6
-rw-r--r--lib/Target/CellSPU/SPU.h67
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp10
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp5
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp15
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td13
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp87
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h19
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td4
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp2
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp5
-rw-r--r--lib/Target/Mangler.cpp5
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td14
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp15
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp5
-rw-r--r--lib/Target/PIC16/PIC16Section.cpp18
-rw-r--r--lib/Target/PIC16/PIC16Section.h17
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp13
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp20
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp12
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h13
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td17
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp30
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td12
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td64
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp12
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp499
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86.td15
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp131
-rw-r--r--lib/Target/X86/X86FastISel.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp214
-rw-r--r--lib/Target/X86/X86ISelLowering.h15
-rw-r--r--lib/Target/X86/X86Instr64bit.td285
-rw-r--r--lib/Target/X86/X86InstrFormats.td69
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp79
-rw-r--r--lib/Target/X86/X86InstrInfo.h14
-rw-r--r--lib/Target/X86/X86InstrInfo.td734
-rw-r--r--lib/Target/X86/X86InstrMMX.td13
-rw-r--r--lib/Target/X86/X86InstrSSE.td128
-rw-r--r--lib/Target/X86/X86Subtarget.cpp4
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp10
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp15
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td2
72 files changed, 2678 insertions, 1734 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index bbb1dbd..6486a60 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -43,6 +43,21 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+// Some processors have multiply-accumulate instructions that don't
+// play nicely with other VFP instructions, and it's generally better
+// to just not use them.
+// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
+// others as well. We should do more benchmarking and confirm one way or
+// the other.
+def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
+ "Disable VFP MAC instructions">;
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+
//===----------------------------------------------------------------------===//
// ARM Processors supported.
//
@@ -92,7 +107,8 @@ def : ProcNoItin<"iwmmxt", [ArchV5TE]>;
// V6 Processors.
def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2,
+ FeatureHasSlowVMLx]>;
def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>;
def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
@@ -106,7 +122,8 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries,
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON]>;
+ [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
+ FeatureNEONForFP]>;
def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e6ea03a..0a0b0ea 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -204,7 +204,15 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -275,6 +283,11 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (!isUncondBranchOpcode(I->getOpcode()) &&
!isCondBranchOpcode(I->getOpcode()))
return 0;
@@ -738,14 +751,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
// FIXME: Neon instructions should support predicates
if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO)
.addReg(SrcReg, getKillRegState(isKill)));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)).
addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addMemOperand(MMO));
}
}
}
@@ -788,12 +803,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
if (Align >= 16
&& (getRegisterInfo().canRealignStack(MF))) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addMemOperand(MMO));
}
}
}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 71207c8..7d48663 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -124,17 +124,17 @@ private:
/// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
SDNode *SelectDYN_ALLOC(SDNode *N);
- /// SelectVLD - Select NEON load intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
@@ -1022,7 +1022,7 @@ static EVT GetNEONSubregVT(EVT VT) {
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1047,6 +1047,9 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
@@ -1060,15 +1063,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
}
EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VLD2,
- // loading 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VLD1 and VLD2,
+ // loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(4, VT);
+ std::vector<EVT> ResTys(2 * NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 4);
+ Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
@@ -1109,7 +1112,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1134,6 +1137,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
@@ -1154,9 +1160,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
}
EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VST2,
- // storing 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VST1 and VST2,
+ // storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
@@ -1167,7 +1173,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+ 5 + 2 * NumVecs);
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -1694,6 +1701,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
+
+ // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
+ if (Subtarget->hasVFP2() &&
+ N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
+ SDValue Chain = N->getOperand(0);
+ SDValue AM5Opc =
+ CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
+ return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
+ Ops, 5);
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::STORE: {
+ // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
+ if (Subtarget->hasVFP2() &&
+ N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
+ SDValue Chain = N->getOperand(0);
+ SDValue AM5Opc =
+ CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
+ AM5Opc, Pred, PredReg, Chain };
+ return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+ }
// Other cases are autogenerated.
break;
}
@@ -1831,16 +1867,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
default:
break;
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64 };
+ return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vld2: {
unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD2d64 };
+ ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD3d64 };
+ ARM::VLD3d32, ARM::VLD1d64T };
unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
ARM::VLD3q16_UPD,
ARM::VLD3q32_UPD };
@@ -1852,7 +1896,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld4: {
unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD4d64 };
+ ARM::VLD4d32, ARM::VLD1d64Q };
unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
ARM::VLD4q16_UPD,
ARM::VLD4q32_UPD };
@@ -1883,16 +1927,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
+ case Intrinsic::arm_neon_vst1: {
+ unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
+ return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vst2: {
unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST2d64 };
+ ARM::VST2d32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST3d64 };
+ ARM::VST3d32, ARM::VST1d64T };
unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
ARM::VST3q16_UPD,
ARM::VST3q32_UPD };
@@ -1904,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst4: {
unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST4d64 };
+ ARM::VST4d32, ARM::VST1d64Q };
unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
ARM::VST4q16_UPD,
ARM::VST4q32_UPD };
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0d0a004..b6c81f6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -456,6 +456,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Generic (and overly aggressive) if-conversion limits.
setIfCvtBlockSizeLimit(10);
setIfCvtDupBlockSizeLimit(2);
+ } else if (Subtarget->hasV7Ops()) {
+ setIfCvtBlockSizeLimit(3);
+ setIfCvtDupBlockSizeLimit(1);
} else if (Subtarget->hasV6Ops()) {
setIfCvtBlockSizeLimit(2);
setIfCvtDupBlockSizeLimit(1);
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 4f6f05d..4427e50 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,10 +1,10 @@
//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -59,7 +59,18 @@ def NEONDupFrm : Format<28>;
def MiscFrm : Format<29>;
def ThumbMiscFrm : Format<30>;
-def NLdStFrm : Format<31>;
+def NLdStFrm : Format<31>;
+def N1RegModImmFrm : Format<32>;
+def N2RegFrm : Format<33>;
+def NVCVTFrm : Format<34>;
+def NVDupLnFrm : Format<35>;
+def N2RegVShLFrm : Format<36>;
+def N2RegVShRFrm : Format<37>;
+def N3RegFrm : Format<38>;
+def N3RegVShFrm : Format<39>;
+def NVExtFrm : Format<40>;
+def NVMulSLFrm : Format<41>;
+def NVTBLFrm : Format<42>;
// Misc flags.
@@ -177,13 +188,13 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
// TSFlagsFields
AddrMode AM = am;
bits<4> AddrModeBits = AM.Value;
-
+
SizeFlagVal SZ = sz;
bits<3> SizeFlag = SZ.Value;
IndexMode IM = im;
bits<2> IndexModeBits = IM.Value;
-
+
Format F = f;
bits<6> Form = F.Value;
@@ -195,7 +206,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
//
bit isUnaryDataProc = 0;
bit canXformTo16Bit = 0;
-
+
let Constraints = cstr;
let Itinerary = itin;
}
@@ -214,9 +225,9 @@ class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
: InstTemplate<am, sz, im, f, d, cstr, itin>;
-class PseudoInst<dag oops, dag iops, InstrItinClass itin,
+class PseudoInst<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
- : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
+ : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
"", itin> {
let OutOperandList = oops;
let InOperandList = iops;
@@ -226,7 +237,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin,
// Almost all ARM instructions are predicable.
class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
- IndexMode im, Format f, InstrItinClass itin,
+ IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
@@ -238,9 +249,9 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
// A few are not predicable
class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
- IndexMode im, Format f, InstrItinClass itin,
- string opc, string asm, string cstr,
- list<dag> pattern>
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
@@ -290,9 +301,9 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern>;
class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string asm, list<dag> pattern>
: InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern>;
+ opc, asm, "", pattern>;
// Ctrl flow instructions
class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -362,7 +373,7 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24-21} = opcod;
let Inst{27-26} = {0,0};
}
-class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
+class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern>;
@@ -387,7 +398,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-26} = {0,1};
}
-class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
+class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
@@ -407,7 +418,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-26} = {0,1};
}
-class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
+class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
@@ -549,7 +560,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
}
// addrmode3 instructions
-class AI3<dag oops, dag iops, Format f, InstrItinClass itin,
+class AI3<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern>;
@@ -853,7 +864,6 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{27-25} = 0b000;
}
-
// addrmode4 instructions
class AXI4ld<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
@@ -961,20 +971,25 @@ class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
: ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
// Two-address instructions
-class TIt<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
- : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst", pattern>;
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst",
+ pattern>;
// tBL, tBX 32-bit instructions
class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
- dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
- : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>, Encoding {
+ dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>,
+ Encoding {
let Inst{31-27} = opcod1;
let Inst{15-14} = opcod2;
let Inst{12} = opcod3;
}
// BR_JT instructions
-class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
: ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
// Thumb1 only
@@ -1001,7 +1016,7 @@ class T1JTI<dag oops, dag iops, InstrItinClass itin,
// Two-address instructions
class T1It<dag oops, dag iops, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
+ : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
asm, cstr, pattern>;
// Thumb1 instruction that can either be predicated or set CPSR.
@@ -1024,7 +1039,7 @@ class T1sI<dag oops, dag iops, InstrItinClass itin,
class T1sIt<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
- "$lhs = $dst", pattern>;
+ "$lhs = $dst", pattern>;
// Thumb1 instruction that can be predicated.
class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1046,7 +1061,7 @@ class T1pI<dag oops, dag iops, InstrItinClass itin,
class T1pIt<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
- "$lhs = $dst", pattern>;
+ "$lhs = $dst", pattern>;
class T1pI1<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -1057,7 +1072,7 @@ class T1pI2<dag oops, dag iops, InstrItinClass itin,
class T1pI4<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeT1_4, Size2Bytes, itin, opc, asm, "", pattern>;
-class T1pIs<dag oops, dag iops,
+class T1pIs<dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
@@ -1146,8 +1161,8 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
- InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
@@ -1161,7 +1176,7 @@ class T2I<dag oops, dag iops, InstrItinClass itin,
: Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
class T2Ii12<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "", pattern>;
+ : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>;
class T2Ii8<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>;
@@ -1196,7 +1211,7 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin,
: Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
class T2Ix2<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
// Two-address instructions
@@ -1295,7 +1310,7 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
- VFPLdStFrm, itin, opc, asm, "", pattern> {
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
@@ -1309,7 +1324,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
- VFPLdStFrm, itin, opc, asm, "", pattern> {
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
@@ -1320,7 +1335,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
- VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-8} = 0b1011;
@@ -1332,7 +1347,7 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
- VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-8} = 0b1010;
@@ -1353,7 +1368,8 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
// Double precision, binary
class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
- dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
@@ -1362,6 +1378,20 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{4} = op4;
}
+// Double precision, binary, VML[AS] (for additional predicate)
+class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1011;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+ list<Predicate> Predicates = [HasVFP2, UseVMLx];
+}
+
+
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1399,7 +1429,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
// Single precision binary, if no NEON
// Same as ASbI except not available if NEON is enabled
class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
- dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
: ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
}
@@ -1419,8 +1450,8 @@ class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
// VFP conversion between floating-point and fixed-point
class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
- dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
: AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
// size (fixed-point number): sx == 0 ? 16 : 32
let Inst{7} = op5; // sx
@@ -1448,7 +1479,7 @@ class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
-class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
@@ -1480,9 +1511,10 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
}
// Same as NeonI except it does not have a "data type" specifier.
-class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
@@ -1490,18 +1522,6 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
list<Predicate> Predicates = [HasNEON];
}
-class NI<dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
- : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, "",
- pattern> {
-}
-
-class NI4<dag oops, dag iops, InstrItinClass itin, string opc,
- string asm, list<dag> pattern>
- : NeonXI<oops, iops, AddrMode4, IndexModeNone, itin, opc, asm, "",
- pattern> {
-}
-
class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
@@ -1514,17 +1534,17 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
let Inst{7-4} = op7_4;
}
-class NDataI<dag oops, dag iops, InstrItinClass itin,
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm,
- cstr, pattern> {
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+ pattern> {
let Inst{31-25} = 0b1111001;
}
-class NDataXI<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm,
- cstr, pattern> {
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+ cstr, pattern> {
let Inst{31-25} = 0b1111001;
}
@@ -1532,8 +1552,9 @@ class NDataXI<dag oops, dag iops, InstrItinClass itin,
class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
bit op5, bit op4,
dag oops, dag iops, InstrItinClass itin,
- string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
let Inst{23} = op23;
let Inst{21-19} = op21_19;
let Inst{11-8} = op11_8;
@@ -1548,7 +1569,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
let Inst{24-23} = op24_23;
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
@@ -1560,10 +1581,10 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
// Same as N2V except it doesn't have a datatype suffix.
class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
- bits<5> op11_7, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
let Inst{24-23} = op24_23;
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
@@ -1575,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
// NEON 2 vector register with immediate.
class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
+ dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{11-8} = op11_8;
@@ -1588,9 +1609,9 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
// NEON 3 vector register format.
class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
+ dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
@@ -1599,11 +1620,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
let Inst{4} = op4;
}
-// Same as N3VX except it doesn't have a data type suffix.
-class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
@@ -1617,7 +1639,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
: InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
- "", itin> {
+ "", itin> {
let Inst{27-20} = opcod1;
let Inst{11-8} = opcod2;
let Inst{6-5} = opcod3;
@@ -1647,6 +1669,19 @@ class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
: NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
opc, dt, asm, pattern>;
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+ InstrItinClass itin, string opc, string dt, string asm,
+ list<dag> pattern>
+ : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+ let Inst{24-23} = 0b11;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op19_16;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+}
+
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
// for single-precision FP.
class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 26a2806..f2ab06f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -140,6 +140,8 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index c977cc3..ed9d31d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -115,51 +115,80 @@ def h64imm : Operand<i64> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
+let mayLoad = 1 in {
// Use vldmia to load a Q register as a D register pair.
-def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
- "vldmia", "$addr, ${dst:dregpair}",
- [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
- let Inst{27-25} = 0b110;
- let Inst{24} = 0; // P bit
- let Inst{23} = 1; // U bit
- let Inst{20} = 1;
- let Inst{11-8} = 0b1011;
-}
+// This is equivalent to VLDMD except that it has a Q register operand
+// instead of a pair of D registers.
+def VLDMQ
+ : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
+ IndexModeNone, IIC_fpLoadm,
+ "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
+def VLDMQ_UPD
+ : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p),
+ IndexModeUpd, IIC_fpLoadm,
+ "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}",
+ "$addr.base = $wb", []>;
+
+// Use vld1 to load a Q register as a D register pair.
+// This alternative to VLDMQ allows an alignment to be specified.
+// This is equivalent to VLD1q64 except that it has a Q register operand.
+def VLD1q
+ : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
+ IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
+def VLD1q_UPD
+ : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64",
+ "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+} // mayLoad = 1
+let mayStore = 1 in {
// Use vstmia to store a Q register as a D register pair.
-def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
- "vstmia", "$addr, ${src:dregpair}",
- [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
- let Inst{27-25} = 0b110;
- let Inst{24} = 0; // P bit
- let Inst{23} = 1; // U bit
- let Inst{20} = 0;
- let Inst{11-8} = 0b1011;
-}
+// This is equivalent to VSTMD except that it has a Q register operand
+// instead of a pair of D registers.
+def VSTMQ
+ : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ IndexModeNone, IIC_fpStorem,
+ "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
+def VSTMQ_UPD
+ : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ IndexModeUpd, IIC_fpStorem,
+ "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}",
+ "$addr.base = $wb", []>;
+
+// Use vst1 to store a Q register as a D register pair.
+// This alternative to VSTMQ allows an alignment to be specified.
+// This is equivalent to VST1q64 except that it has a Q register operand.
+def VST1q
+ : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
+ IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
+def VST1q_UPD
+ : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src),
+ IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset",
+ "$addr.addr = $wb", []>;
+} // mayStore = 1
-// VLD1 : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "\\{$dst\\}, $addr", "",
- [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "${dst:dregpair}, $addr", "",
- [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
-
-def VLD1d8 : VLD1D<0b0000, "8", v8i8>;
-def VLD1d16 : VLD1D<0b0100, "16", v4i16>;
-def VLD1d32 : VLD1D<0b1000, "32", v2i32>;
-def VLD1df : VLD1D<0b1000, "32", v2f32>;
-def VLD1d64 : VLD1D<0b1100, "64", v1i64>;
-
-def VLD1q8 : VLD1Q<0b0000, "8", v16i8>;
-def VLD1q16 : VLD1Q<0b0100, "16", v8i16>;
-def VLD1q32 : VLD1Q<0b1000, "32", v4i32>;
-def VLD1qf : VLD1Q<0b1000, "32", v4f32>;
-def VLD1q64 : VLD1Q<0b1100, "64", v2i64>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-let mayLoad = 1 in {
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
+ (ins addrmode6:$addr), IIC_VLD1,
+ "vld1", Dt, "\\{$dst\\}, $addr", "", []>;
+class VLD1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr), IIC_VLD1,
+ "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+
+def VLD1d8 : VLD1D<0b0000, "8">;
+def VLD1d16 : VLD1D<0b0100, "16">;
+def VLD1d32 : VLD1D<0b1000, "32">;
+def VLD1d64 : VLD1D<0b1100, "64">;
+
+def VLD1q8 : VLD1Q<0b0000, "8">;
+def VLD1q16 : VLD1Q<0b0100, "16">;
+def VLD1q32 : VLD1Q<0b1000, "32">;
+def VLD1q64 : VLD1Q<0b1100, "64">;
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
@@ -182,54 +211,48 @@ def VLD1q8_UPD : VLD1QWB<0b0000, "8">;
def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
-} // mayLoad = 1
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-
-// These (dreg triple/quadruple) are for disassembly only.
+// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-class VLD1D4<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-
-def VLD1d8T : VLD1D3<0b0000, "8">;
-def VLD1d16T : VLD1D3<0b0100, "16">;
-def VLD1d32T : VLD1D3<0b1000, "32">;
-// VLD1d64T : implemented as VLD3d64
-
-def VLD1d8Q : VLD1D4<0b0000, "8">;
-def VLD1d16Q : VLD1D4<0b0100, "16">;
-def VLD1d32Q : VLD1D4<0b1000, "32">;
-// VLD1d64Q : implemented as VLD4d64
-
-// ...with address register writeback:
+ "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
class VLD1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>;
+
+def VLD1d8T : VLD1D3<0b0000, "8">;
+def VLD1d16T : VLD1D3<0b0100, "16">;
+def VLD1d32T : VLD1D3<0b1000, "32">;
+def VLD1d64T : VLD1D3<0b1100, "64">;
+
+def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">;
+def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
+def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
+def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VLD1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
+ "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
class VLD1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
"\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
-def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">;
-def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
-def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
-// VLD1d64T_UPD : implemented as VLD3d64_UPD
+def VLD1d8Q : VLD1D4<0b0000, "8">;
+def VLD1d16Q : VLD1D4<0b0100, "16">;
+def VLD1d32Q : VLD1D4<0b1000, "32">;
+def VLD1d64Q : VLD1D4<0b1100, "64">;
def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">;
def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
-// VLD1d64Q_UPD : implemented as VLD4d64_UPD
+def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -245,9 +268,6 @@ class VLD2Q<bits<4> op7_4, string Dt>
def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
-def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>;
def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
@@ -269,11 +289,6 @@ class VLD2QWB<bits<4> op7_4, string Dt>
def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
-def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100,
- (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
@@ -296,10 +311,6 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
-def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -312,11 +323,6 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
-def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
@@ -341,11 +347,6 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
-def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
- "", []>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -358,13 +359,6 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
-def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4,
- GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", "64",
- "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
@@ -383,62 +377,62 @@ def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
// FIXME: Not yet implemented.
// VLD2LN : Vector Load (single 2-element structure to one lane)
-class VLD2LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2", []>;
-def VLD2LNd8 : VLD2LN<0b0001, "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; }
-def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; }
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">;
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
+def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VLD2LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
"\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
"$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
-def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">;
-def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; }
-def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
-def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; }
-def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; }
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
// VLD3LN : Vector Load (single 3-element structure to one lane)
-class VLD3LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
-def VLD3LNd8 : VLD3LN<0b0010, "8"> { let Inst{4} = 0; }
-def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">;
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
// ...with double-spaced registers:
-def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
+def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
// ...with address register writeback:
-class VLD3LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
@@ -447,37 +441,37 @@ class VLD3LNWB<bits<4> op11_8, string Dt>
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
[]>;
-def VLD3LNd8_UPD : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; }
-def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
-def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
// VLD4LN : Vector Load (single 4-element structure to one lane)
-class VLD4LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
-def VLD4LNd8 : VLD4LN<0b0011, "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; }
-def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; }
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">;
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
+def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VLD4LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
@@ -486,12 +480,12 @@ class VLD4LNWB<bits<4> op11_8, string Dt>
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
[]>;
-def VLD4LNd8_UPD : VLD4LNWB<0b0011, "8">;
-def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; }
-def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
-def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; }
-def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; }
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
@@ -500,31 +494,26 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; }
// FIXME: Not yet implemented.
} // mayLoad = 1, hasExtraDefRegAllocReq = 1
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+
// VST1 : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string Dt, ValueType Ty>
+class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
- "vst1", Dt, "\\{$src\\}, $addr", "",
- [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
- "vst1", Dt, "${src:dregpair}, $addr", "",
- [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>;
-
-let hasExtraSrcRegAllocReq = 1 in {
-def VST1d8 : VST1D<0b0000, "8", v8i8>;
-def VST1d16 : VST1D<0b0100, "16", v4i16>;
-def VST1d32 : VST1D<0b1000, "32", v2i32>;
-def VST1df : VST1D<0b1000, "32", v2f32>;
-def VST1d64 : VST1D<0b1100, "64", v1i64>;
-
-def VST1q8 : VST1Q<0b0000, "8", v16i8>;
-def VST1q16 : VST1Q<0b0100, "16", v8i16>;
-def VST1q32 : VST1Q<0b1000, "32", v4i32>;
-def VST1qf : VST1Q<0b1000, "32", v4f32>;
-def VST1q64 : VST1Q<0b1100, "64", v2i64>;
-} // hasExtraSrcRegAllocReq
-
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+ "vst1", Dt, "\\{$src\\}, $addr", "", []>;
+class VST1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+ "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+
+def VST1d8 : VST1D<0b0000, "8">;
+def VST1d16 : VST1D<0b0100, "16">;
+def VST1d32 : VST1D<0b1000, "32">;
+def VST1d64 : VST1D<0b1100, "64">;
+
+def VST1q8 : VST1Q<0b0000, "8">;
+def VST1q16 : VST1Q<0b0100, "16">;
+def VST1q32 : VST1Q<0b1000, "32">;
+def VST1q64 : VST1Q<0b1100, "64">;
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
@@ -546,53 +535,50 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
-// These (dreg triple/quadruple) are for disassembly only.
+// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-class VST1D4<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-
-def VST1d8T : VST1D3<0b0000, "8">;
-def VST1d16T : VST1D3<0b0100, "16">;
-def VST1d32T : VST1D3<0b1000, "32">;
-// VST1d64T : implemented as VST3d64
-
-def VST1d8Q : VST1D4<0b0000, "8">;
-def VST1d16Q : VST1D4<0b0100, "16">;
-def VST1d32Q : VST1D4<0b1000, "32">;
-// VST1d64Q : implemented as VST4d64
-
-// ...with address register writeback:
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
class VST1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3),
IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
- "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ "$addr.addr = $wb", []>;
+
+def VST1d8T : VST1D3<0b0000, "8">;
+def VST1d16T : VST1D3<0b0100, "16">;
+def VST1d32T : VST1D3<0b1000, "32">;
+def VST1d64T : VST1D3<0b1100, "64">;
+
+def VST1d8T_UPD : VST1D3WB<0b0000, "8">;
+def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
+def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
+def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VST1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+ []>;
class VST1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
- "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ "$addr.addr = $wb", []>;
-def VST1d8T_UPD : VST1D3WB<0b0000, "8">;
-def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
-def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
-// VST1d64T_UPD : implemented as VST3d64_UPD
+def VST1d8Q : VST1D4<0b0000, "8">;
+def VST1d16Q : VST1D4<0b0100, "16">;
+def VST1d32Q : VST1D4<0b1000, "32">;
+def VST1d64Q : VST1D4<0b1100, "64">;
def VST1d8Q_UPD : VST1D4WB<0b0000, "8">;
def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
-// VST1d64Q_UPD : implemented as VST4d64_UPD
+def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -608,9 +594,6 @@ class VST2Q<bits<4> op7_4, string Dt>
def VST2d8 : VST2D<0b1000, 0b0000, "8">;
def VST2d16 : VST2D<0b1000, 0b0100, "16">;
def VST2d32 : VST2D<0b1000, 0b1000, "32">;
-def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
- "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>;
def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
@@ -632,11 +615,6 @@ class VST2QWB<bits<4> op7_4, string Dt>
def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">;
def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
-def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2), IIC_VST,
- "vst1", "64", "\\{$src1, $src2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
@@ -659,10 +637,6 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST3d8 : VST3D<0b0100, 0b0000, "8">;
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
-def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST,
- "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -675,11 +649,6 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
-def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
@@ -704,11 +673,6 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST4d8 : VST4D<0b0000, 0b0000, "8">;
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
-def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- DPR:$src4), IIC_VST,
- "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr",
- "", []>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -721,12 +685,6 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
-def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
- "vst1", "64",
- "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
@@ -745,109 +703,109 @@ def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
// FIXME: Not yet implemented.
// VST2LN : Vector Store (single 2-element structure from one lane)
-class VST2LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
"", []>;
-def VST2LNd8 : VST2LN<0b0001, "8">;
-def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; }
-def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; }
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">;
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
+def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
+def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
+def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VST2LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
"\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
-def VST2LNd8_UPD : VST2LNWB<0b0001, "8">;
-def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; }
-def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">;
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
-def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; }
-def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; }
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
// VST3LN : Vector Store (single 3-element structure from one lane)
-class VST3LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VST, "vst3", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
-def VST3LNd8 : VST3LN<0b0010, "8"> { let Inst{4} = 0; }
-def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">;
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
// ...with double-spaced registers:
-def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
+def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
// ...with address register writeback:
-class VST3LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VST, "vst3", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
-def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; }
-def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">;
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
-def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
// VST4LN : Vector Store (single 4-element structure from one lane)
-class VST4LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VST, "vst4", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
"", []>;
-def VST4LNd8 : VST4LN<0b0011, "8">;
-def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; }
-def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; }
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">;
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
+def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VST4LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
IIC_VST, "vst4", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
-def VST4LNd8_UPD : VST4LNWB<0b0011, "8">;
-def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; }
-def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">;
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
-def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; }
-def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; }
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
} // mayStore = 1, hasExtraSrcRegAllocReq = 1
@@ -906,18 +864,18 @@ class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
+ (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
+ (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
@@ -966,8 +924,8 @@ class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
- OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm,
+ IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
let isCommutable = Commutable;
}
@@ -975,7 +933,7 @@ class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -986,27 +944,28 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
OpcodeStr, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
let isCommutable = Commutable;
}
+
class N3VDSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{
+ (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
@@ -1017,7 +976,7 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1026,7 +985,7 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
OpcodeStr, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
let isCommutable = Commutable;
@@ -1036,7 +995,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -1047,7 +1006,7 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -1057,10 +1016,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1069,7 +1028,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (IntOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_VFP2:$src2),
@@ -1080,19 +1039,18 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (IntOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_8:$src2),
- imm:$lane)))))]> {
+ (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1102,7 +1060,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -1114,7 +1072,7 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -1128,14 +1086,14 @@ class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType Ty, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst, (Ty (OpNode DPR:$src1,
(Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
@@ -1144,7 +1102,8 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+ (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
@@ -1156,7 +1115,8 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+ (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
@@ -1168,7 +1128,7 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst, (Ty (OpNode QPR:$src1,
(Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
@@ -1177,7 +1137,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDNode MulOp, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
@@ -1190,7 +1151,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDNode MulOp, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
@@ -1204,7 +1166,7 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
(OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
@@ -1212,7 +1174,7 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
@@ -1223,7 +1185,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst,
(TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
@@ -1232,7 +1194,8 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
@@ -1244,7 +1207,8 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
@@ -1257,7 +1221,7 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D,
+ (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1268,7 +1232,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1277,8 +1241,8 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -1288,7 +1252,7 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -1299,7 +1263,7 @@ class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1344,17 +1308,17 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
// Shift by immediate,
// both double- and quad-register.
class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin,
+ (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin,
OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
+ (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin,
OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
@@ -1363,8 +1327,8 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
+ (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
(i32 imm:$SIMM))))]>;
@@ -1373,7 +1337,7 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
+ (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin,
OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
(i32 imm:$SIMM))))]>;
@@ -1383,14 +1347,14 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD,
+ (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set DPR:$dst, (Ty (add DPR:$src1,
(Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD,
+ (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set QPR:$dst, (Ty (add QPR:$src1,
(Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
@@ -1398,15 +1362,15 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Shift by immediate and insert,
// both double- and quad-register.
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD,
+ (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ,
+ (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
@@ -1416,15 +1380,15 @@ class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
+ (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
+ (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
//===----------------------------------------------------------------------===//
@@ -1568,24 +1532,24 @@ multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
// Neon 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
-multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0> {
// 64-bit vector types.
- def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16,
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
v4i16, v4i16, IntOp, Commutable>;
- def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32,
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "32"),
v2i32, v2i32, IntOp, Commutable>;
// 128-bit vector types.
- def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16,
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "16"),
v8i16, v8i16, IntOp, Commutable>;
- def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32,
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, IntOp, Commutable>;
}
@@ -1605,38 +1569,37 @@ multiclass N3VIntSL_HS<bits<4> op11_8,
}
// ....then also with element size of 8 bits:
-multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0>
- : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
+ : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
- def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
+ def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, IntOp, Commutable>;
- def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
+ def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp, Commutable>;
}
// ....then also with element size of 64 bits:
-multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0>
- : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
+ : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
- def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
+ def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "64"),
v1i64, v1i64, IntOp, Commutable>;
- def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
+ def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "64"),
v2i64, v2i64, IntOp, Commutable>;
}
-
// Neon Narrowing 3-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1866,46 +1829,46 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon 2-register vector shift by immediate,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- SDNode OpNode> {
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode, Format f> {
// 64-bit vector types.
- def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin,
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
// imm6 = xxxxxx
// 128-bit vector types.
- def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin,
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
}
-
// Neon Shift-Accumulate vector operations,
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1947,41 +1910,43 @@ multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
// Neon Shift-Insert vector operations,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, SDNode ShOp> {
+ string OpcodeStr, SDNode ShOp,
+ Format f> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "8", v8i8, ShOp> {
+ f, OpcodeStr, "8", v8i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "16", v4i16, ShOp> {
+ f, OpcodeStr, "16", v4i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "32", v2i32, ShOp> {
+ f, OpcodeStr, "32", v2i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
- OpcodeStr, "64", v1i64, ShOp>;
+ f, OpcodeStr, "64", v1i64, ShOp>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "8", v16i8, ShOp> {
+ f, OpcodeStr, "8", v16i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "16", v8i16, ShOp> {
+ f, OpcodeStr, "16", v8i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "32", v4i32, ShOp> {
+ f, OpcodeStr, "32", v4i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
- OpcodeStr, "64", v2i64, ShOp>;
+ f, OpcodeStr, "64", v2i64, ShOp>;
// imm6 = xxxxxx
}
@@ -2044,20 +2009,26 @@ defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u",
defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
// VHADD : Vector Halving Add
-defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>;
-defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>;
+defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "u", int_arm_neon_vhaddu, 1>;
// VRHADD : Vector Rounding Halving Add
-defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>;
-defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
// VQADD : Vector Saturating Add
-defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>;
-defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>;
+defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
int_arm_neon_vaddhn, 1>;
@@ -2070,10 +2041,10 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
// VMUL : Vector Multiply (integer, polynomial and floating-point)
defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
-def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8",
- v8i8, v8i8, int_arm_neon_vmulp, 1>;
-def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8",
- v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+ "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+ "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
@@ -2103,7 +2074,7 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
-defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
+defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
"vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
@@ -2125,8 +2096,8 @@ def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
-defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
- IIC_VMULi16Q, IIC_VMULi32Q,
+defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+ IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
"vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
@@ -2285,18 +2256,18 @@ defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u",
defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
// VHSUB : Vector Halving Subtract
-defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vhsub", "s", int_arm_neon_vhsubs, 0>;
-defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vhsub", "u", int_arm_neon_vhsubu, 0>;
// VQSUB : Vector Saturing Subtract
-defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqsub", "s", int_arm_neon_vqsubs, 0>;
-defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
@@ -2323,8 +2294,8 @@ defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>;
-def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32",
- v2i32, v2f32, NEONvcge, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
// For disassembly only.
@@ -2351,21 +2322,27 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$dst, $src, #0">;
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
- v2i32, v2f32, int_arm_neon_vacged, 0>;
-def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32",
- v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
-def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32",
- v2i32, v2f32, int_arm_neon_vacgtd, 0>;
-def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32",
- v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
// Vector Bitwise Operations.
+def vnot8 : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>;
+def vnot16 : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+
// VAND : Vector Bitwise AND
def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
v2i32, v2i32, and, 1>;
@@ -2386,74 +2363,80 @@ def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
// VBIC : Vector Bitwise Bit Clear (AND NOT)
def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
- "vbic", "$dst, $src1, $src2", "",
- [(set DPR:$dst, (v2i32 (and DPR:$src1,
- (vnot_conv DPR:$src2))))]>;
+ (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
+ "vbic", "$dst, $src1, $src2", "",
+ [(set DPR:$dst, (v2i32 (and DPR:$src1,
+ (vnot8 DPR:$src2))))]>;
def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
- "vbic", "$dst, $src1, $src2", "",
- [(set QPR:$dst, (v4i32 (and QPR:$src1,
- (vnot_conv QPR:$src2))))]>;
+ (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (v4i32 (and QPR:$src1,
+ (vnot16 QPR:$src2))))]>;
// VORN : Vector Bitwise OR NOT
def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
- "vorn", "$dst, $src1, $src2", "",
- [(set DPR:$dst, (v2i32 (or DPR:$src1,
- (vnot_conv DPR:$src2))))]>;
+ (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
+ "vorn", "$dst, $src1, $src2", "",
+ [(set DPR:$dst, (v2i32 (or DPR:$src1,
+ (vnot8 DPR:$src2))))]>;
def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
- "vorn", "$dst, $src1, $src2", "",
- [(set QPR:$dst, (v4i32 (or QPR:$src1,
- (vnot_conv QPR:$src2))))]>;
+ (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (v4i32 (or QPR:$src1,
+ (vnot16 QPR:$src2))))]>;
// VMVN : Vector Bitwise NOT
def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
- (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
- "vmvn", "$dst, $src", "",
- [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>;
+ (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
+ "vmvn", "$dst, $src", "",
+ [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
- (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
- "vmvn", "$dst, $src", "",
- [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>;
-def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>;
-def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>;
+ (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
+ "vmvn", "$dst, $src", "",
+ [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
+def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>;
// VBSL : Vector Bitwise Select
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst,
- (v2i32 (or (and DPR:$src2, DPR:$src1),
- (and DPR:$src3, (vnot_conv DPR:$src1)))))]>;
+ (ins DPR:$src1, DPR:$src2, DPR:$src3),
+ N3RegFrm, IIC_VCNTiD,
+ "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
+ [(set DPR:$dst,
+ (v2i32 (or (and DPR:$src2, DPR:$src1),
+ (and DPR:$src3, (vnot8 DPR:$src1)))))]>;
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst,
- (v4i32 (or (and QPR:$src2, QPR:$src1),
- (and QPR:$src3, (vnot_conv QPR:$src1)))))]>;
+ (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ N3RegFrm, IIC_VCNTiQ,
+ "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst,
+ (v4i32 (or (and QPR:$src2, QPR:$src1),
+ (and QPR:$src3, (vnot16 QPR:$src1)))))]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VBINiD, "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiD,
+ "vbif", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
- IIC_VBINiQ, "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiQ,
+ "vbif", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VBINiD, "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiD,
+ "vbit", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
- IIC_VBINiQ, "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiQ,
+ "vbit", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
@@ -2463,15 +2446,15 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// Vector Absolute Differences.
// VABD : Vector Absolute Difference
-defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vabd", "s", int_arm_neon_vabds, 0>;
-defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vabd", "u", int_arm_neon_vabdu, 0>;
-def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND,
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
-def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ,
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
"vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
@@ -2491,36 +2474,40 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
// Vector Maximum and Minimum.
// VMAX : Vector Maximum
-defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
-defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
-def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
- v2f32, v2f32, int_arm_neon_vmaxs, 1>;
-def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
- v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmax", "u", int_arm_neon_vmaxu, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax",
+ "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax",
+ "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>;
// VMIN : Vector Minimum
-defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
-defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
-def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
- v2f32, v2f32, int_arm_neon_vmins, 1>;
-def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32",
- v4f32, v4f32, int_arm_neon_vmins, 1>;
+defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmin", "u", int_arm_neon_vminu, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin",
+ "f32", v2f32, v2f32, int_arm_neon_vmins, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin",
+ "f32", v4f32, v4f32, int_arm_neon_vmins, 1>;
// Vector Pairwise Operations.
// VPADD : Vector Pairwise Add
-def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8",
- v8i8, v8i8, int_arm_neon_vpadd, 0>;
-def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16",
- v4i16, v4i16, int_arm_neon_vpadd, 0>;
-def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32",
- v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32",
- v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
+ "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
+ "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
+ "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd",
+ "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>;
// VPADDL : Vector Pairwise Add Long
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -2535,36 +2522,36 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
int_arm_neon_vpadalu>;
// VPMAX : Vector Pairwise Maximum
-def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8",
- v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
-def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16",
- v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
-def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32",
- v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
-def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8",
- v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
-def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16",
- v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
-def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32",
- v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32",
- v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
// VPMIN : Vector Pairwise Minimum
-def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8",
- v8i8, v8i8, int_arm_neon_vpmins, 0>;
-def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16",
- v4i16, v4i16, int_arm_neon_vpmins, 0>;
-def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32",
- v2i32, v2i32, int_arm_neon_vpmins, 0>;
-def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8",
- v8i8, v8i8, int_arm_neon_vpminu, 0>;
-def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16",
- v4i16, v4i16, int_arm_neon_vpminu, 0>;
-def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32",
- v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32",
- v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
@@ -2583,10 +2570,10 @@ def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
v4f32, v4f32, int_arm_neon_vrecpe>;
// VRECPS : Vector Reciprocal Step
-def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1,
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSD, "vrecps", "f32",
v2f32, v2f32, int_arm_neon_vrecps, 1>;
-def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1,
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrecps", "f32",
v4f32, v4f32, int_arm_neon_vrecps, 1>;
@@ -2605,25 +2592,30 @@ def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
v4f32, v4f32, int_arm_neon_vrsqrte>;
// VRSQRTS : Vector Reciprocal Square Root Step
-def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1,
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSD, "vrsqrts", "f32",
v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
-def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1,
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrsqrts", "f32",
v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
// Vector Shifts.
// VSHL : Vector Shift
-defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
- IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>;
-defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
- IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>;
+defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "s", int_arm_neon_vshifts, 0>;
+defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "u", int_arm_neon_vshiftu, 0>;
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
+ N2RegVShLFrm>;
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>;
-defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>;
+defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
+ N2RegVShRFrm>;
+defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
+ N2RegVShRFrm>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
@@ -2649,28 +2641,37 @@ defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
NEONvshrn>;
// VRSHL : Vector Rounding Shift
-defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>;
-defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>;
+defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "s", int_arm_neon_vrshifts, 0>;
+defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "u", int_arm_neon_vrshiftu, 0>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
-defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
+defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
+ N2RegVShRFrm>;
+defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
+ N2RegVShRFrm>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
NEONvrshrn>;
// VQSHL : Vector Saturating Shift
-defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>;
-defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>;
+defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "s", int_arm_neon_vqshifts, 0>;
+defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "u", int_arm_neon_vqshiftu, 0>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
-defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
+defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
+ N2RegVShLFrm>;
+defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
+ N2RegVShLFrm>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>;
+defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
+ N2RegVShLFrm>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
@@ -2683,12 +2684,12 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
-defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqrshl", "s",
- int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqrshl", "u",
- int_arm_neon_vqrshiftu, 0>;
+defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "s", int_arm_neon_vqrshifts, 0>;
+defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "u", int_arm_neon_vqrshiftu, 0>;
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
@@ -2708,9 +2709,9 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
// VSLI : Vector Shift Left and Insert
-defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>;
+defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
// VSRI : Vector Shift Right and Insert
-defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>;
+defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
// Vector Absolute and Saturating Absolute.
@@ -2732,19 +2733,22 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
// Vector Negate.
-def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
-def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>;
+def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+def vneg8 : PatFrag<(ops node:$in),
+ (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>;
+def vneg16 : PatFrag<(ops node:$in),
+ (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>;
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (Ty (vneg DPR:$src)))]>;
+ [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>;
class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (Ty (vneg QPR:$src)))]>;
+ [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>;
-// VNEG : Vector Negate
+// VNEG : Vector Negate (integer)
def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
@@ -2762,12 +2766,12 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
"vneg", "f32", "$dst, $src", "",
[(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
-def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
-def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>;
-def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>;
-def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>;
-def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>;
-def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>;
+def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>;
// VQNEG : Vector Saturating Negate
defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
@@ -2805,9 +2809,9 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
// VMOV : Vector Move (Register)
def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
- IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+ N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
- IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+ N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
// VMOV : Vector Move (Immediate)
@@ -3048,30 +3052,29 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
// VDUP : Vector Duplicate Lane (from scalar to all elements)
-class VDUPLND<bits<2> op19_18, bits<2> op17_16,
- string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0,
- (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src[$lane]", "",
- [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType Ty>
+ : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
+ [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
-class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, string Dt,
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy>
- : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0,
- (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src[$lane]", "",
- [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>;
+ : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
+ [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
+ imm:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
-def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>;
-def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>;
-def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>;
-def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>;
-def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>;
-def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>;
-def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>;
-def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>;
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>;
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>;
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>;
+def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>;
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>;
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>;
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>;
+def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>;
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -3233,15 +3236,15 @@ def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
- (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD,
- OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+ (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
[(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
(Ty DPR:$rhs), imm:$index)))]>;
class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
- (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ,
- OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+ (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
[(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
(Ty QPR:$rhs), imm:$index)))]>;
@@ -3290,25 +3293,26 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
// VTBL : Vector Table Lookup
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$src), IIC_VTB1,
+ (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1,
"vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2,
+ (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
"vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3,
+ (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
"vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4,
+ (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
+ NVTBLFrm, IIC_VTB4,
"vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
@@ -3317,26 +3321,27 @@ def VTBL4
// VTBX : Vector Table Extension
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1,
+ (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1,
"vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2,
+ (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3,
+ (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
+ NVTBLFrm, IIC_VTBX3,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4,
+ DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
"$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
@@ -3396,12 +3401,12 @@ def : N3VSPat<fmul, VMULfd_sfp>;
//let neverHasSideEffects = 1 in
//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
-// v2f32, fmul, fadd>;
+// v2f32, fmul, fadd>;
//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
//let neverHasSideEffects = 1 in
//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
-// v2f32, fmul, fsub>;
+// v2f32, fmul, fsub>;
//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
@@ -3421,14 +3426,14 @@ def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
// Vector Maximum used for single-precision FP
let neverHasSideEffects = 1 in
def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
"vmax", "f32", "$dst, $src1, $src2", "", []>;
def : N3VSPat<NEONfmax, VMAXfd_sfp>;
// Vector Minimum used for single-precision FP
let neverHasSideEffects = 1 in
def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
"vmin", "f32", "$dst, $src1, $src2", "", []>;
def : N3VSPat<NEONfmin, VMINfd_sfp>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index aca8230..0458389 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -545,7 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
// FP FMA Operations.
//
-def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
@@ -558,7 +558,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
@@ -571,7 +571,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
@@ -589,7 +589,7 @@ def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index bdbec30..cb762a4 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -341,6 +341,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned PReg = PMO.getReg();
unsigned PRegNum = PMO.isUndef() ? UINT_MAX
: ARMRegisterInfo::getRegisterNumbering(PReg);
+ unsigned Count = 1;
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
int NewOffset = MemOps[i].Offset;
@@ -350,11 +351,14 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
: ARMRegisterInfo::getRegisterNumbering(Reg);
// AM4 - register numbers in ascending order.
// AM5 - consecutive register numbers in ascending order.
+ // Can only do up to 16 double-word registers per insn.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+ ((isAM4 && RegNum > PRegNum)
+ || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
+ ++Count;
} else {
// Can't merge this in. Try merge the earlier ones first.
MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 2dad7f1..9e55cd8 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -22,10 +22,6 @@ using namespace llvm;
static cl::opt<bool>
ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
-static cl::opt<bool>
-UseNEONFP("arm-use-neon-fp",
- cl::desc("Use NEON for single-precision FP"),
- cl::init(false), cl::Hidden);
static cl::opt<bool>
UseMOVT("arm-use-movt",
@@ -35,7 +31,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
bool isT)
: ARMArchVersion(V4)
, ARMFPUType(None)
- , UseNEONForSinglePrecisionFP(UseNEONFP)
+ , UseNEONForSinglePrecisionFP(false)
+ , SlowVMLx(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
, PostRAScheduler(false)
@@ -115,14 +112,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
if (!isThumb() || hasThumb2())
PostRAScheduler = true;
-
- // Set CPU specific features.
- if (CPUString == "cortex-a8") {
- // On Cortex-a8, it's faster to perform some single-precision FP
- // operations with NEON instructions.
- if (UseNEONFP.getPosition() == 0)
- UseNEONForSinglePrecisionFP = true;
- }
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 2dc81a4..fa56a91 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -50,6 +50,10 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// SlowVMLx - If the VFP2 instructions are available, indicates whether
+ /// the VML[AS] instructions are slow (if so, don't use them).
+ bool SlowVMLx;
+
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
@@ -119,6 +123,7 @@ protected:
bool hasNEON() const { return ARMFPUType >= NEON; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
+ bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 4a7a1e4..ba736e3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -841,7 +841,7 @@ GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
<< getFunctionNumber() << '_' << uid << '_' << uid2
<< "_set_" << MBB->getNumber();
- return OutContext.GetOrCreateTemporarySymbol(Name.str());
+ return OutContext.GetOrCreateSymbol(Name.str());
}
MCSymbol *ARMAsmPrinter::
@@ -849,7 +849,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
SmallString<60> Name;
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid << '_' << uid2;
- return OutContext.GetOrCreateTemporarySymbol(Name.str());
+ return OutContext.GetOrCreateSymbol(Name.str());
}
void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
@@ -1132,6 +1132,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
else
// Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
4/*size*/, 0/*addrspace*/);
@@ -1186,7 +1191,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
const char *Prefix = MAI->getPrivateGlobalPrefix();
- MCSymbol *Label =OutContext.GetOrCreateTemporarySymbol(Twine(Prefix)
+ MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index 7cb305f..ab2b06b 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -75,7 +75,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
#endif
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCSymbol *ARMMCInstLower::
@@ -91,7 +91,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
#endif
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCOperand ARMMCInstLower::
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index c36fe63..7334259 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -46,10 +46,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
- case ARM::VLD2d64:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
@@ -83,7 +86,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
- case ARM::VLD3d64:
+ case ARM::VLD1d64T:
case ARM::VLD3LNd8:
case ARM::VLD3LNd16:
case ARM::VLD3LNd32:
@@ -128,7 +131,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VLD4d8:
case ARM::VLD4d16:
case ARM::VLD4d32:
- case ARM::VLD4d64:
+ case ARM::VLD1d64Q:
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
@@ -170,10 +173,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
+ case ARM::VST1q8:
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
case ARM::VST2d8:
case ARM::VST2d16:
case ARM::VST2d32:
- case ARM::VST2d64:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@@ -207,7 +213,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
- case ARM::VST3d64:
+ case ARM::VST1d64T:
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
@@ -252,7 +258,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4d8:
case ARM::VST4d16:
case ARM::VST4d32:
- case ARM::VST4d64:
+ case ARM::VST1d64Q:
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 57b65cf..85d5ca0 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -12,6 +12,9 @@ Reimplement 'select' in terms of 'SEL'.
A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
+Interesting optimization for PIC codegen on arm-linux:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129
+
//===---------------------------------------------------------------------===//
Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 29ae631..ad98839 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -200,6 +200,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// It's illegal to emit pop instruction without operands.
if (NumRegs)
MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
return true;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index e4abcdb..55163f9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -69,7 +69,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == ARM::GPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -93,7 +93,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == ARM::GPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 39f0749..d539e08 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -301,7 +301,15 @@ bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TB
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -362,6 +370,11 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (I->getOpcode() != Alpha::BR &&
I->getOpcode() != Alpha::COND_BRANCH_I &&
I->getOpcode() != Alpha::COND_BRANCH_F)
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index e3c3993..2471688 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -65,23 +65,23 @@ def HI16 : SDNodeXForm<imm, [{
//===----------------------------------------------------------------------===//
def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
-def uimm3 : PatLeaf<(imm), [{return isUint<3>(N->getZExtValue());}]>;
-def uimm4 : PatLeaf<(imm), [{return isUint<4>(N->getZExtValue());}]>;
-def uimm5 : PatLeaf<(imm), [{return isUint<5>(N->getZExtValue());}]>;
+def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
+def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
+def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
def uimm5m2 : PatLeaf<(imm), [{
uint64_t value = N->getZExtValue();
- return value % 2 == 0 && isUint<5>(value);
+ return value % 2 == 0 && isUInt<5>(value);
}]>;
def uimm6m4 : PatLeaf<(imm), [{
uint64_t value = N->getZExtValue();
- return value % 4 == 0 && isUint<6>(value);
+ return value % 4 == 0 && isUInt<6>(value);
}]>;
def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
-def uimm16 : PatLeaf<(imm), [{return isUint<16>(N->getZExtValue());}]>;
+def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
def ximm16 : PatLeaf<(imm), [{
int64_t value = N->getSExtValue();
@@ -610,8 +610,7 @@ def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
"cc = !cc;", []>;
def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
- "$dst = $cc;",
- [/*(set D:$dst, (zext JustCC:$cc))*/]>;
+ "$dst = $cc;", []>;
def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
"$dst = cc;", []>;
@@ -859,17 +858,5 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
(CALLa tglobaladdr:$dst)>;
def : Pat<(BfinCall (i32 texternalsym:$dst)),
(CALLa texternalsym:$dst)>;
-
-//def : Pat<(sext JustCC:$cc),
-// (NEG (MOVECC_zext JustCC:$cc))>;
-//def : Pat<(anyext JustCC:$cc),
-// (MOVECC_zext JustCC:$cc)>;
-def : Pat<(i16 (zext JustCC:$cc)),
- (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
-def : Pat<(i16 (sext JustCC:$cc)),
- (EXTRACT_SUBREG (NEG (MOVECC_zext JustCC:$cc)), bfin_subreg_lo16)>;
-def : Pat<(i16 (anyext JustCC:$cc)),
- (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
-
def : Pat<(i16 (trunc D:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinIntrinsics.td b/lib/Target/Blackfin/BlackfinIntrinsics.td
index bf02cfe..ce21b08 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsics.td
+++ b/lib/Target/Blackfin/BlackfinIntrinsics.td
@@ -21,14 +21,14 @@ let TargetPrefix = "bfin", isTarget = 1 in {
// Execute csync instruction with workarounds
def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
- Intrinsic<[llvm_void_ty]>;
+ Intrinsic<[]>;
// Execute ssync instruction with workarounds
def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
- Intrinsic<[llvm_void_ty]>;
+ Intrinsic<[]>;
// Execute idle instruction with workarounds
def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
- Intrinsic<[llvm_void_ty]>;
+ Intrinsic<[]>;
}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index b39a342..84dc9ca 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -164,7 +164,7 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
return;
}
- if (isUint<16>(value)) {
+ if (isUInt<16>(value)) {
BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
return;
}
@@ -255,13 +255,13 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(FIPos==1 && "Bad frame index operand");
MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
MI.getOperand(FIPos+1).setImm(Offset);
- if (isUint<6>(Offset)) {
+ if (isUInt<6>(Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32p_uimm6m4
: BF::LOAD32p_uimm6m4));
return 0;
}
- if (BaseReg == BF::FP && isUint<7>(-Offset)) {
+ if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32fp_nimm7m4
: BF::LOAD32fp_nimm7m4));
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index c960974..1f21511 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -15,7 +15,6 @@
#ifndef LLVM_TARGET_IBMCELLSPU_H
#define LLVM_TARGET_IBMCELLSPU_H
-#include "llvm/System/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -25,73 +24,7 @@ namespace llvm {
FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
- /*--== Utility functions/predicates/etc used all over the place: --==*/
- //! Predicate test for a signed 10-bit value
- /*!
- \param Value The input value to be tested
-
- This predicate tests for a signed 10-bit value, returning the 10-bit value
- as a short if true.
- */
- template<typename T>
- inline bool isS10Constant(T Value);
-
- template<>
- inline bool isS10Constant<short>(short Value) {
- int SExtValue = ((int) Value << (32 - 10)) >> (32 - 10);
- return ((Value > 0 && Value <= (1 << 9) - 1)
- || (Value < 0 && (short) SExtValue == Value));
- }
-
- template<>
- inline bool isS10Constant<int>(int Value) {
- return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
- }
-
- template<>
- inline bool isS10Constant<uint32_t>(uint32_t Value) {
- return (Value <= ((1 << 9) - 1));
- }
-
- template<>
- inline bool isS10Constant<int64_t>(int64_t Value) {
- return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
- }
-
- template<>
- inline bool isS10Constant<uint64_t>(uint64_t Value) {
- return (Value <= ((1 << 9) - 1));
- }
-
- //! Predicate test for an unsigned 10-bit value
- /*!
- \param Value The input value to be tested
-
- This predicate tests for an unsigned 10-bit value, returning the 10-bit value
- as a short if true.
- */
- inline bool isU10Constant(short Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(int Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(uint32_t Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(int64_t Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(uint64_t Value) {
- return (Value == (Value & 0x3ff));
- }
-
extern Target TheCellSPUTarget;
-
}
// Defines symbolic names for the SPU instructions.
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 396a921..90f8310 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -44,28 +44,28 @@ namespace {
bool
isI64IntS10Immediate(ConstantSDNode *CN)
{
- return isS10Constant(CN->getSExtValue());
+ return isInt<10>(CN->getSExtValue());
}
//! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
bool
isI32IntS10Immediate(ConstantSDNode *CN)
{
- return isS10Constant(CN->getSExtValue());
+ return isInt<10>(CN->getSExtValue());
}
//! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
bool
isI32IntU10Immediate(ConstantSDNode *CN)
{
- return isU10Constant(CN->getSExtValue());
+ return isUInt<10>(CN->getSExtValue());
}
//! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
bool
isI16IntS10Immediate(ConstantSDNode *CN)
{
- return isS10Constant(CN->getSExtValue());
+ return isInt<10>(CN->getSExtValue());
}
//! SDNode predicate for i16 sign-extended, 10-bit immediate values
@@ -80,7 +80,7 @@ namespace {
bool
isI16IntU10Immediate(ConstantSDNode *CN)
{
- return isU10Constant((short) CN->getZExtValue());
+ return isUInt<10>((short) CN->getZExtValue());
}
//! SDNode predicate for i16 sign-extended, 10-bit immediate values
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index e863ee3..4b0d442 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1107,7 +1107,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
- SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
+ SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
false, false, 0);
Chain = Store.getOperand(0);
@@ -1491,7 +1492,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
return SDValue();
Value = Value >> 32;
}
- if (isS10Constant(Value))
+ if (isInt<10>(Value))
return DAG.getTargetConstant(Value, ValueType);
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 2306665..86825c8 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -450,7 +450,15 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -513,6 +521,11 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
if (I == MBB.begin())
return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (!isCondBranch(I) && !isUncondBranch(I))
return 0;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 5068f77..6d1f87d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1268,7 +1268,12 @@ multiclass BitwiseAnd
defm AND : BitwiseAnd;
-// N.B.: vnot_conv is one of those special target selection pattern fragments,
+
+def vnot_cell_conv : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
+
+// N.B.: vnot_cell_conv is one of those special target selection pattern
+// fragments,
// in which we expect there to be a bit_convert on the constant. Bear in mind
// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
// constant -1 vector.)
@@ -1301,7 +1306,7 @@ multiclass AndComplement
def r8: ANDCRegInst<R8C>;
// Sometimes, the xor pattern has a bitcast constant:
- def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>;
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
}
defm ANDC : AndComplement;
@@ -1934,7 +1939,7 @@ multiclass SelectBits
def v16i8: SELBVecInst<v16i8>;
def v8i16: SELBVecInst<v8i16>;
def v4i32: SELBVecInst<v4i32>;
- def v2i64: SELBVecInst<v2i64, vnot_conv>;
+ def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
def r128: SELBRegInst<GPRC>;
def r64: SELBRegInst<R64C>;
@@ -4373,7 +4378,7 @@ def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v2f64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
(ORi128_vec VECREG:$src)>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 8c78bab..f3071f2 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -336,6 +337,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = II->getDebugLoc();
while (!MI.getOperand(i).isFI()) {
++i;
@@ -364,11 +366,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// Replace the FrameIndex with base register with $sp (aka $r1)
SPOp.ChangeToRegister(SPU::R1, false);
- if (Offset > SPUFrameInfo::maxFrameOffset()
- || Offset < SPUFrameInfo::minFrameOffset()) {
- errs() << "Large stack adjustment ("
- << Offset
- << ") in SPURegisterInfo::eliminateFrameIndex.";
+
+ // if 'Offset' doesn't fit to the D-form instruction's
+ // immediate, convert the instruction to X-form
+ // if the instruction is not an AI (which takes a s10 immediate), assume
+ // it is a load/store that can take a s14 immediate
+ if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
+ || !isInt<14>(Offset)) {
+ int newOpcode = convertDFormToXForm(MI.getOpcode());
+ unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+ BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+ .addImm(Offset);
+ BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+ .addReg(tmpReg, RegState::Kill)
+ .addReg(SPU::R1);
+ // remove the replaced D-form instruction
+ MBB.erase(II);
} else {
MO.ChangeToImmediate(Offset);
}
@@ -423,6 +436,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MF.getRegInfo().setPhysRegUnused(SPU::R0);
MF.getRegInfo().setPhysRegUnused(SPU::R1);
MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &SPU::R32CRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+
}
void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
@@ -448,7 +469,8 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
assert((FrameSize & 0xf) == 0
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
- if (FrameSize > 0 || MFI->hasCalls()) {
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->hasCalls()) {
FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
@@ -460,14 +482,14 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
// for the ABI
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
.addReg(SPU::R1);
- if (isS10Constant(FrameSize)) {
+ if (isInt<10>(FrameSize)) {
// Spill $sp to adjusted $sp
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
.addReg(SPU::R1);
// Adjust $sp by required amout
BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
.addImm(FrameSize);
- } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ } else if (isInt<16>(FrameSize)) {
// Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
// $r2 to adjust $sp:
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
@@ -475,7 +497,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
.addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1)
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
.addReg(SPU::R2)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
@@ -549,9 +571,11 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
"Can only insert epilog into returning blocks");
assert((FrameSize & 0xf) == 0
&& "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
- if (FrameSize > 0 || MFI->hasCalls()) {
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->hasCalls()) {
FrameSize = FrameSize + SPUFrameInfo::minStackSize();
- if (isS10Constant(FrameSize + LinkSlotOffset)) {
+ if (isInt<10>(FrameSize + LinkSlotOffset)) {
// Reload $lr, adjust $sp by required amount
// Note: We do this to slightly improve dual issue -- not by much, but it
// is an opportunity for dual issue.
@@ -574,7 +598,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
.addReg(SPU::R2);
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
.addImm(16)
- .addReg(SPU::R2);
+ .addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
addReg(SPU::R2)
.addImm(16);
@@ -618,4 +642,43 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
+int
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+ switch(dFormOpcode)
+ {
+ case SPU::AIr32: return SPU::Ar32;
+ case SPU::LQDr32: return SPU::LQXr32;
+ case SPU::LQDr128: return SPU::LQXr128;
+ case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4f32: return SPU::LQXv4f32;
+ case SPU::STQDr32: return SPU::STQXr32;
+ case SPU::STQDr128: return SPU::STQXr128;
+ case SPU::STQDv16i8: return SPU::STQXv16i8;
+ case SPU::STQDv4i32: return SPU::STQXv4i32;
+ case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+ default: assert( false && "Unhandled D to X-form conversion");
+ }
+ // default will assert, but need to return something to keep the
+ // compiler happy.
+ return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const
+{
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ assert( Reg && "Register scavenger failed");
+ return Reg;
+}
+
#include "SPUGenRegisterInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 48feb5c..0a70318 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -53,6 +53,10 @@ namespace llvm {
virtual const TargetRegisterClass* const *
getCalleeSavedRegClasses(const MachineFunction *MF) const;
+ //! Allow for scavenging, so we can get scratch registers when needed.
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+ { return true; }
+
//! Return the reserved registers
BitVector getReservedRegs(const MachineFunction &MF) const;
@@ -97,6 +101,21 @@ namespace llvm {
//! Get DWARF debugging register number
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ //! Convert D-form load/store to X-form load/store
+ /*!
+ Converts a regiser displacement load/store into a register-indexed
+ load/store for large stack frames, when the stack frame exceeds the
+ range of a s10 displacement.
+ */
+ int convertDFormToXForm(int dFormOpcode) const;
+
+ //! Acquire an unused register in an emergency.
+ unsigned findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const;
+
};
} // end namespace llvm
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index 76eb563..82552fa 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -21,11 +21,11 @@ let TargetPrefix = "mblaze", isTarget = 1 in {
[llvm_i32_ty],
[IntrWriteMem]>;
- class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty],
+ class MBFSL_Put_Intrinsic : Intrinsic<[],
[llvm_i32_ty, llvm_i32_ty],
[IntrWriteMem]>;
- class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty],
+ class MBFSL_PutT_Intrinsic : Intrinsic<[],
[llvm_i32_ty],
[IntrWriteMem]>;
}
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index ac41cc8..15d16ec 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -424,7 +424,7 @@ void MSILWriter::printPtrLoad(uint64_t N) {
case Module::Pointer32:
printSimpleInstruction("ldc.i4",utostr(N).c_str());
// FIXME: Need overflow test?
- if (!isUInt32(N)) {
+ if (!isUInt<32>(N)) {
errs() << "Value = " << utostr(N) << '\n';
llvm_unreachable("32-bit pointer overflowed");
}
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
index 32c6b04..f4d7d8a 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
@@ -59,7 +59,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
}
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCSymbol *MSP430MCInstLower::
@@ -75,7 +75,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
}
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCOperand MSP430MCInstLower::
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 6372482..e584770 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -173,6 +173,8 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
if (I->getOpcode() != MSP430::JMP &&
I->getOpcode() != MSP430::JCC)
break;
@@ -241,6 +243,9 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
+
// Working from the bottom, when we see a non-terminator
// instruction, we're done.
if (!isUnpredicatedTerminator(I))
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index fb93706..1d5c511 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -235,10 +235,7 @@ std::string Mangler::getNameWithPrefix(const GlobalValue *GV,
MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
SmallString<60> NameStr;
getNameWithPrefix(NameStr, GV, false);
- if (!GV->hasPrivateLinkage())
- return Context.GetOrCreateSymbol(NameStr.str());
-
- return Context.GetOrCreateTemporarySymbol(NameStr.str());
+ return Context.GetOrCreateSymbol(NameStr.str());
}
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index fa4518d..e948917 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -26,8 +26,9 @@
// Floating Point Compare and Branch
def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>,
SDTCisVT<1, OtherVT>]>;
-def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<0>,
- SDTCisInt<2>]>;
+def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisSameAs<1, 2>, SDTCisFP<1>,
+ SDTCisInt<3>]>;
def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
@@ -244,12 +245,13 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
let hasDelaySlot = 1, Defs=[FCR31] in {
def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
- "c.$cc.s $fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc),
- (implicit FCR31)]>;
+ "c.$cc.s $fs, $ft",
+ [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>;
def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
- "c.$cc.d $fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc),
- (implicit FCR31)]>, Requires<[In32BitMode]>;
+ "c.$cc.d $fs, $ft",
+ [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>,
+ Requires<[In32BitMode]>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 1a9bffc..85cf064 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -433,7 +433,15 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
{
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -562,6 +570,11 @@ RemoveBranch(MachineBasicBlock &MBB) const
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (I->getOpcode() != Mips::J &&
GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
return 0;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 2fb405e..da16e83 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -226,6 +226,11 @@ bool PIC16InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Get the terminator instruction.
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return true;
+ --I;
+ }
// Handle unconditional branches. If the unconditional branch's target is
// successor basic block then remove the unconditional branch.
if (I->getOpcode() == PIC16::br_uncond && AllowModify) {
diff --git a/lib/Target/PIC16/PIC16Section.cpp b/lib/Target/PIC16/PIC16Section.cpp
index a96ebb8..2505b11 100644
--- a/lib/Target/PIC16/PIC16Section.cpp
+++ b/lib/Target/PIC16/PIC16Section.cpp
@@ -17,10 +17,9 @@ using namespace llvm;
// This is the only way to create a PIC16Section. Sections created here
// do not need to be explicitly deleted as they are managed by auto_ptrs.
-PIC16Section *PIC16Section::Create(const StringRef &Name,
- PIC16SectionType Ty,
- const std::string &Address,
- int Color, MCContext &Ctx) {
+PIC16Section *PIC16Section::Create(StringRef Name, PIC16SectionType Ty,
+ StringRef Address, int Color,
+ MCContext &Ctx) {
/// Determine the internal SectionKind info.
/// Users of PIC16Section class should not need to know the internal
@@ -59,8 +58,17 @@ PIC16Section *PIC16Section::Create(const StringRef &Name,
}
+ // Copy strings into context allocated memory so they get free'd when the
+ // context is destroyed.
+ char *NameCopy = static_cast<char*>(Ctx.Allocate(Name.size(), 1));
+ memcpy(NameCopy, Name.data(), Name.size());
+ char *AddressCopy = static_cast<char*>(Ctx.Allocate(Address.size(), 1));
+ memcpy(AddressCopy, Address.data(), Address.size());
+
// Create the Section.
- PIC16Section *S = new (Ctx) PIC16Section(Name, K, Address, Color);
+ PIC16Section *S =
+ new (Ctx) PIC16Section(StringRef(NameCopy, Name.size()), K,
+ StringRef(AddressCopy, Address.size()), Color);
S->T = Ty;
return S;
}
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
index 566f920..9039ca7 100644
--- a/lib/Target/PIC16/PIC16Section.h
+++ b/lib/Target/PIC16/PIC16Section.h
@@ -30,11 +30,11 @@ namespace llvm {
PIC16SectionType T;
/// Name of the section to uniquely identify it.
- std::string Name;
+ StringRef Name;
/// User can specify an address at which a section should be placed.
/// Negative value here means user hasn't specified any.
- std::string Address;
+ StringRef Address;
/// Overlay information - Sections with same color can be overlaid on
/// one another.
@@ -43,17 +43,16 @@ namespace llvm {
/// Total size of all data objects contained here.
unsigned Size;
- PIC16Section(const StringRef &name, SectionKind K, const std::string &addr,
- int color)
+ PIC16Section(StringRef name, SectionKind K, StringRef addr, int color)
: MCSection(K), Name(name), Address(addr), Color(color), Size(0) {
}
public:
/// Return the name of the section.
- const std::string &getName() const { return Name; }
+ StringRef getName() const { return Name; }
/// Return the Address of the section.
- const std::string &getAddress() const { return Address; }
+ StringRef getAddress() const { return Address; }
/// Return the Color of the section.
int getColor() const { return Color; }
@@ -64,6 +63,8 @@ namespace llvm {
void setSize(unsigned size) { Size = size; }
/// Conatined data objects.
+ // FIXME: This vector is leaked because sections are allocated with a
+ // BumpPtrAllocator.
std::vector<const GlobalVariable *>Items;
/// Check section type.
@@ -77,8 +78,8 @@ namespace llvm {
PIC16SectionType getType() const { return T; }
/// This would be the only way to create a section.
- static PIC16Section *Create(const StringRef &Name, PIC16SectionType Ty,
- const std::string &Address, int Color,
+ static PIC16Section *Create(StringRef Name, PIC16SectionType Ty,
+ StringRef Address, int Color,
MCContext &Ctx);
/// Override this as PIC16 has its own way of printing switching
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index ed6fc9d..5adefd3 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -309,8 +309,8 @@ namespace {
const MCSymbol *&TOCEntry = TOC[Sym];
if (TOCEntry == 0)
TOCEntry = OutContext.
- GetOrCreateTemporarySymbol(StringRef(MAI->getPrivateGlobalPrefix()) +
- "C" + Twine(LabelID++));
+ GetOrCreateSymbol(StringRef(MAI->getPrivateGlobalPrefix()) +
+ "C" + Twine(LabelID++));
O << *TOCEntry << "@toc";
}
@@ -674,14 +674,14 @@ static const MCSymbol *GetLazyPtr(const MCSymbol *Sym, MCContext &Ctx) {
// Remove $stub suffix, add $lazy_ptr.
SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
TmpStr += "$lazy_ptr";
- return Ctx.GetOrCreateTemporarySymbol(TmpStr.str());
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
}
static const MCSymbol *GetAnonSym(const MCSymbol *Sym, MCContext &Ctx) {
// Add $tmp suffix to $stub, yielding $stub$tmp.
SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
TmpStr += "$tmp";
- return Ctx.GetOrCreateTemporarySymbol(TmpStr.str());
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
}
void PPCDarwinAsmPrinter::
@@ -811,6 +811,11 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
else
// Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index a752421..52948c8 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -130,7 +130,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
}
// If this branch is in range, ignore it.
- if (isInt16(BranchSize)) {
+ if (isInt<16>(BranchSize)) {
MBBStartOffset += 4;
continue;
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9d79c0d..4f88d35d 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -470,11 +470,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
if (isInt32Immediate(RHS, Imm)) {
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
- if (isUInt16(Imm))
+ if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// If this is a 16-bit signed immediate, fold it.
- if (isInt16((int)Imm))
+ if (isInt<16>((int)Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
@@ -494,7 +494,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
}
Opc = PPC::CMPLW;
} else if (ISD::isUnsignedIntSetCC(CC)) {
- if (isInt32Immediate(RHS, Imm) && isUInt16(Imm))
+ if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
Opc = PPC::CMPLW;
@@ -511,11 +511,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
if (isInt64Immediate(RHS.getNode(), Imm)) {
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
- if (isUInt16(Imm))
+ if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// If this is a 16-bit signed immediate, fold it.
- if (isInt16(Imm))
+ if (isInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
@@ -528,7 +528,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
// xoris r0,r3,0x1234
// cmpldi cr0,r0,0x5678
// beq cr0,L6
- if (isUInt32(Imm)) {
+ if (isUInt<32>(Imm)) {
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
getI64Imm(Imm >> 16)), 0);
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
@@ -537,7 +537,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
}
Opc = PPC::CMPLD;
} else if (ISD::isUnsignedIntSetCC(CC)) {
- if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm))
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
getI64Imm(Imm & 0xFFFF)), 0);
Opc = PPC::CMPLD;
@@ -761,12 +761,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
unsigned Shift = 0;
// If it can't be represented as a 32 bit value.
- if (!isInt32(Imm)) {
+ if (!isInt<32>(Imm)) {
Shift = CountTrailingZeros_64(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
// If the shifted value fits 32 bits.
- if (isInt32(ImmSh)) {
+ if (isInt<32>(ImmSh)) {
// Go with the shifted value.
Imm = ImmSh;
} else {
@@ -785,7 +785,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
unsigned Hi = (Imm >> 16) & 0xFFFF;
// Simple value.
- if (isInt16(Imm)) {
+ if (isInt<16>(Imm)) {
// Just the Lo bits.
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
} else if (Lo) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2c072c1..e67666d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5539,8 +5539,16 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove lowering.
+/// If DstAlign is zero that means it's safe to destination alignment can
+/// satisfy any constraint. Similarly if SrcAlign is zero it means there
+/// isn't a need to check it against alignment requirement, probably because
+/// the source does not need to be loaded. It returns EVT::Other if
+/// SelectionDAG should be responsible for determining it.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP,
SelectionDAG &DAG) const {
if (this->PPCSubTarget.isPPC64()) {
return MVT::i64;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 9c390ac..19fefab 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -347,9 +347,16 @@ namespace llvm {
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
- virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
- SelectionDAG &DAG) const;
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove lowering.
+ /// If DstAlign is zero that means it's safe to destination alignment can
+ /// satisfy any constraint. Similarly if SrcAlign is zero it means there
+ /// isn't a need to check it against alignment requirement, probably because
+ /// the source does not need to be loaded. It returns EVT::Other if
+ /// SelectionDAG should be responsible for determining it.
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP, SelectionDAG &DAG) const;
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 3ff8f27..256370f 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -15,6 +15,10 @@
// Altivec transformation functions and pattern fragments.
//
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -321,7 +325,8 @@ def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
[(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vandc $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (and (v4i32 VRRC:$vA), (vnot VRRC:$vB)))]>;
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
+ (vnot_ppc VRRC:$vB)))]>;
def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfsx $vD, $vB, $UIMM", VecFP,
@@ -435,7 +440,8 @@ def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vnor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vnot (or (v4i32 VRRC:$vA), VRRC:$vB)))]>;
+ [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
+ VRRC:$vB)))]>;
def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vor $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
@@ -640,12 +646,11 @@ def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHW VRRC:$vA, VRRC:$vA)>;
// Logical Operations
-def : Pat<(v4i32 (vnot VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
-def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
-def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))),
+def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
(VNOR VRRC:$A, VRRC:$B)>;
-def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))),
+def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
(VANDC VRRC:$A, VRRC:$B)>;
def : Pat<(fmul VRRC:$vA, VRRC:$vB),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9895bea..82c637e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -213,7 +213,15 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -281,6 +289,11 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
return 0;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 6e7880e..44c5fe6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -512,7 +512,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *MI = I;
DebugLoc dl = MI->getDebugLoc();
- if (isInt16(CalleeAmt)) {
+ if (isInt<16>(CalleeAmt)) {
BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
addImm(CalleeAmt);
} else {
@@ -596,7 +596,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
else
Reg = PPC::R0;
- if (MaxAlign < TargetAlign && isInt16(FrameSize)) {
+ if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
@@ -798,7 +798,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// clear can be encoded. This is extremely uncommon, because normally you
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
- if (isInt16(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -1375,8 +1375,9 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (!isPPC64) {
// PPC32.
if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
- assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
.addReg(PPC::R1)
@@ -1390,7 +1391,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::R1)
.addReg(PPC::R1)
.addReg(PPC::R0);
- } else if (isInt16(NegFrameSize)) {
+ } else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
.addReg(PPC::R1)
.addImm(NegFrameSize)
@@ -1408,8 +1409,9 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
}
} else { // PPC64.
if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
- assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
.addReg(PPC::X1)
@@ -1422,7 +1424,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::X1)
.addReg(PPC::X1)
.addReg(PPC::X0);
- } else if (isInt16(NegFrameSize)) {
+ } else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
.addReg(PPC::X1)
.addImm(NegFrameSize / 4)
@@ -1591,7 +1593,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
// call which invalidates the stack pointer value in SP(0). So we use the
// value of R31 in this case.
- if (FI->hasFastCall() && isInt16(FrameSize)) {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
assert(hasFP(MF) && "Expecting a valid the frame pointer.");
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
.addReg(PPC::R31).addImm(FrameSize);
@@ -1605,7 +1607,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
.addReg(PPC::R1)
.addReg(PPC::R31)
.addReg(PPC::R0);
- } else if (isInt16(FrameSize) &&
+ } else if (isInt<16>(FrameSize) &&
(!ALIGN_STACK || TargetAlign >= MaxAlign) &&
!MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
@@ -1615,7 +1617,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
.addImm(0).addReg(PPC::R1);
}
} else {
- if (FI->hasFastCall() && isInt16(FrameSize)) {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
assert(hasFP(MF) && "Expecting a valid the frame pointer.");
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
.addReg(PPC::X31).addImm(FrameSize);
@@ -1629,7 +1631,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
.addReg(PPC::X1)
.addReg(PPC::X31)
.addReg(PPC::X0);
- } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
+ } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
!MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
.addReg(PPC::X1).addImm(FrameSize);
@@ -1678,7 +1680,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
- if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
.addReg(StackReg).addImm(CallerAllocatedAmt);
} else {
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index f46840c..8c5e905 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -316,19 +316,19 @@ def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
let Defs = [PSW] in {
def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
"cebr\t$src1, $src2",
- [(SystemZcmp FP32:$src1, FP32:$src2), (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
"cdbr\t$src1, $src2",
- [(SystemZcmp FP64:$src1, FP64:$src2), (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
"ceb\t$src1, $src2",
- [(SystemZcmp FP32:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP32:$src1,
+ (load rriaddr12:$src2)))]>;
def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
"cdb\t$src1, $src2",
- [(SystemZcmp FP64:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP64:$src1,
+ (load rriaddr12:$src2)))]>;
} // Defs = [PSW]
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5fa7e8c..06f01e7 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -424,6 +424,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
// Working from the bottom, when we see a non-terminator
// instruction, we're done.
if (!isUnpredicatedTerminator(I))
@@ -500,6 +502,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
if (I->getOpcode() != SystemZ::JMP &&
getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
break;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 0d1af23..22bde4e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -31,7 +31,8 @@ class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
-def SDT_CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_CmpTest : SDTypeProfile<1, 2, [SDTCisI64<0>,
+ SDTCisSameAs<1, 2>]>;
def SDT_BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisI8<1>, SDTCisVT<2, i64>]>;
@@ -980,100 +981,89 @@ let Defs = [PSW] in {
def CMP32rr : RRI<0x19,
(outs), (ins GR32:$src1, GR32:$src2),
"cr\t$src1, $src2",
- [(SystemZcmp GR32:$src1, GR32:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>;
def CMP64rr : RREI<0xB920,
(outs), (ins GR64:$src1, GR64:$src2),
"cgr\t$src1, $src2",
- [(SystemZcmp GR64:$src1, GR64:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
def CMP32ri : RILI<0xC2D,
(outs), (ins GR32:$src1, s32imm:$src2),
"cfi\t$src1, $src2",
- [(SystemZcmp GR32:$src1, imm:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
def CMP64ri32 : RILI<0xC2C,
(outs), (ins GR64:$src1, s32imm64:$src2),
"cgfi\t$src1, $src2",
- [(SystemZcmp GR64:$src1, i64immSExt32:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
def CMP32rm : RXI<0x59,
(outs), (ins GR32:$src1, rriaddr12:$src2),
"c\t$src1, $src2",
- [(SystemZcmp GR32:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
def CMP32rmy : RXYI<0xE359,
(outs), (ins GR32:$src1, rriaddr:$src2),
"cy\t$src1, $src2",
- [(SystemZcmp GR32:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
def CMP64rm : RXYI<0xE320,
(outs), (ins GR64:$src1, rriaddr:$src2),
"cg\t$src1, $src2",
- [(SystemZcmp GR64:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
def UCMP32rr : RRI<0x15,
(outs), (ins GR32:$src1, GR32:$src2),
"clr\t$src1, $src2",
- [(SystemZucmp GR32:$src1, GR32:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
def UCMP64rr : RREI<0xB921,
(outs), (ins GR64:$src1, GR64:$src2),
"clgr\t$src1, $src2",
- [(SystemZucmp GR64:$src1, GR64:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
def UCMP32ri : RILI<0xC2F,
(outs), (ins GR32:$src1, i32imm:$src2),
"clfi\t$src1, $src2",
- [(SystemZucmp GR32:$src1, imm:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
def UCMP64ri32 : RILI<0xC2E,
(outs), (ins GR64:$src1, i64i32imm:$src2),
"clgfi\t$src1, $src2",
- [(SystemZucmp GR64:$src1, i64immZExt32:$src2),
- (implicit PSW)]>;
+ [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
def UCMP32rm : RXI<0x55,
(outs), (ins GR32:$src1, rriaddr12:$src2),
"cl\t$src1, $src2",
- [(SystemZucmp GR32:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr12:$src2)))]>;
def UCMP32rmy : RXYI<0xE355,
(outs), (ins GR32:$src1, rriaddr:$src2),
"cly\t$src1, $src2",
- [(SystemZucmp GR32:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr:$src2)))]>;
def UCMP64rm : RXYI<0xE351,
(outs), (ins GR64:$src1, rriaddr:$src2),
"clg\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (load rriaddr:$src2)))]>;
def CMPSX64rr32 : RREI<0xB930,
(outs), (ins GR64:$src1, GR32:$src2),
"cgfr\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (sext GR32:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sext GR32:$src2)))]>;
def UCMPZX64rr32 : RREI<0xB931,
(outs), (ins GR64:$src1, GR32:$src2),
"clgfr\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (zext GR32:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zext GR32:$src2)))]>;
def CMPSX64rm32 : RXYI<0xE330,
(outs), (ins GR64:$src1, rriaddr:$src2),
"cgf\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (sextloadi64i32 rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sextloadi64i32 rriaddr:$src2)))]>;
def UCMPZX64rm32 : RXYI<0xE331,
(outs), (ins GR64:$src1, rriaddr:$src2),
"clgf\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (zextloadi64i32 rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zextloadi64i32 rriaddr:$src2)))]>;
// FIXME: Add other crazy ucmp forms
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index c3dcf8e..66bb914 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -524,6 +524,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
else
// Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs. However, sometimes the types are local to the file. So
+ // we need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
4/*size*/, 0/*addrspace*/);
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 7d29d97..c851ca3 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -83,7 +83,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
Name += "$non_lazy_ptr";
- MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getGVStubEntry(Sym);
@@ -98,7 +98,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
}
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
Name += "$non_lazy_ptr";
- MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getHiddenGVStubEntry(Sym);
if (StubSym.getPointer() == 0) {
@@ -112,7 +112,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
}
case X86II::MO_DARWIN_STUB: {
Name += "$stub";
- MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getFnStubEntry(Sym);
if (StubSym.getPointer())
@@ -127,7 +127,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
Name.erase(Name.end()-5, Name.end());
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Ctx.GetOrCreateTemporarySymbol(Name.str()), false);
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
}
return Sym;
}
@@ -287,7 +287,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::MOV16r0:
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4d3dedf..22285f1 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
tablegen(X86GenSubtarget.inc -gen-subtarget)
set(sources
+ SSEDomainFix.cpp
X86AsmBackend.cpp
X86CodeEmitter.cpp
X86COFFMachineModuleInfo.cpp
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644
index 0000000..395ab57
--- /dev/null
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -0,0 +1,499 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+/// Allocate objects from a pool, allow objects to be recycled, and provide a
+/// way of deleting everything.
+template<typename T, unsigned PageSize = 64>
+class PoolAllocator {
+ std::vector<T*> Pages, Avail;
+public:
+ ~PoolAllocator() { Clear(); }
+
+ T* Alloc() {
+ if (Avail.empty()) {
+ T *p = new T[PageSize];
+ Pages.push_back(p);
+ Avail.reserve(PageSize);
+ for (unsigned n = 0; n != PageSize; ++n)
+ Avail.push_back(p+n);
+ }
+ T *p = Avail.back();
+ Avail.pop_back();
+ return p;
+ }
+
+ // Allow object to be reallocated. It won't be reconstructed.
+ void Recycle(T *p) {
+ p->clear();
+ Avail.push_back(p);
+ }
+
+ // Destroy all objects, make sure there are no external pointers to them.
+ void Clear() {
+ Avail.clear();
+ while (!Pages.empty()) {
+ delete[] Pages.back();
+ Pages.pop_back();
+ }
+ }
+};
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Available domains. For an open DomainValue, it is the still possible
+ // domains for collapsing. For a collapsed DomainValue it is the domains where
+ // the register is available for free.
+ unsigned Mask;
+
+ // Position of the last defining instruction.
+ unsigned Dist;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // Collapsed DomainValue have no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool collapsed() const { return Instrs.empty(); }
+
+ // Is any domain in mask available?
+ bool compat(unsigned mask) const {
+ return Mask & mask;
+ }
+
+ // Mark domain as available.
+ void add(unsigned domain) {
+ Mask |= 1u << domain;
+ }
+
+ // First domain available in mask.
+ unsigned firstDomain() const {
+ return CountTrailingZeros_32(Mask);
+ }
+
+ DomainValue() { clear(); }
+
+ void clear() {
+ Refs = Mask = Dist = 0;
+ Instrs.clear();
+ }
+};
+
+static const unsigned NumRegs = 16;
+
+class SSEDomainFixPass : public MachineFunctionPass {
+ static char ID;
+ PoolAllocator<DomainValue> Pool;
+
+ MachineFunction *MF;
+ const X86InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineBasicBlock *MBB;
+ DomainValue **LiveRegs;
+ typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveOutMap LiveOuts;
+ unsigned Distance;
+
+public:
+ SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "SSE execution domain fixup";
+ }
+
+private:
+ // Register mapping.
+ int RegIndex(unsigned Reg);
+
+ // LiveRegs manipulations.
+ void SetLiveReg(int rx, DomainValue *DV);
+ void Kill(int rx);
+ void Force(int rx, unsigned domain);
+ void Collapse(DomainValue *dv, unsigned domain);
+ bool Merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock();
+ void visitGenericInstr(MachineInstr*);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char SSEDomainFixPass::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+ // Registers are sorted lexicographically.
+ // We just need them to be consecutive, ordering doesn't matter.
+ assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ reg -= X86::XMM0;
+ return reg < NumRegs ? reg : -1;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs)
+ LiveRegs = (DomainValue**)calloc(sizeof(DomainValue*), NumRegs);
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx]) {
+ assert(LiveRegs[rx]->Refs && "Bad refcount");
+ if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
+ }
+ LiveRegs[rx] = dv;
+ if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs || !LiveRegs[rx]) return;
+
+ // Before killing the last reference to an open DomainValue, collapse it to
+ // the first available domain.
+ if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
+ Collapse(LiveRegs[rx], LiveRegs[rx]->firstDomain());
+ else
+ SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ DomainValue *dv;
+ if (LiveRegs && (dv = LiveRegs[rx])) {
+ if (dv->collapsed())
+ dv->add(domain);
+ else
+ Collapse(dv, domain);
+ } else {
+ // Set up basic collapsed DomainValue.
+ dv = Pool.Alloc();
+ dv->Dist = Distance;
+ dv->add(domain);
+ SetLiveReg(rx, dv);
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->compat(1u << domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty()) {
+ MachineInstr *mi = dv->Instrs.back();
+ TII->SetSSEDomain(mi, domain);
+ dv->Instrs.pop_back();
+ }
+ dv->Mask = 1u << domain;
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1) {
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv) {
+ DomainValue *dv2 = Pool.Alloc();
+ dv2->Dist = Distance;
+ dv2->add(domain);
+ SetLiveReg(rx, dv2);
+ }
+ }
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+ assert(!A->collapsed() && "Cannot merge into collapsed");
+ assert(!B->collapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ if (!A->compat(B->Mask))
+ return false;
+ A->Mask &= B->Mask;
+ A->Dist = std::max(A->Dist, B->Dist);
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == B)
+ SetLiveReg(rx, A);
+ return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = RegIndex(*i);
+ if (rx < 0) continue;
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) continue;
+ DomainValue *pdv = fi->second[rx];
+ if (!pdv) continue;
+ if (!LiveRegs || !LiveRegs[rx]) {
+ SetLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->collapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ if (!pdv->collapsed())
+ Collapse(pdv, LiveRegs[rx]->firstDomain());
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->collapsed())
+ Merge(LiveRegs[rx], pdv);
+ else
+ Collapse(LiveRegs[rx], pdv->firstDomain());
+ }
+ }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ Force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Scan the explicit use operands for incoming domains.
+ unsigned collmask = mask;
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ // Is it possible to use this collapsed register for free?
+ if (dv->collapsed()) {
+ if (unsigned m = collmask & dv->Mask)
+ collmask = m;
+ } else if (dv->compat(collmask))
+ used.push_back(rx);
+ else
+ Kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(collmask)) {
+ unsigned domain = CountTrailingZeros_32(collmask);
+ TII->SetSSEDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match collmask, and build a list of
+ // incoming DomainValue that we want to merge.
+ SmallVector<DomainValue*,4> doms;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ DomainValue *dv = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!dv->compat(collmask)) {
+ Kill(*i);
+ continue;
+ }
+ // sorted, uniqued insert.
+ bool inserted = false;
+ for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+ i != e && !inserted; ++i) {
+ if (dv == *i)
+ inserted = true;
+ else if (dv->Dist < (*i)->Dist) {
+ inserted = true;
+ doms.insert(i, dv);
+ }
+ }
+ if (!inserted)
+ doms.push_back(dv);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!doms.empty()) {
+ if (!dv) {
+ dv = doms.pop_back_val();
+ continue;
+ }
+
+ DomainValue *ThisDV = doms.pop_back_val();
+ if (Merge(dv, ThisDV)) continue;
+
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+ if (LiveRegs[*i] == ThisDV)
+ Kill(*i);
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv)
+ dv = Pool.Alloc();
+ dv->Dist = Distance;
+ dv->Mask = collmask;
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv.
+ for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+ Kill(rx);
+ SetLiveReg(rx, dv);
+ }
+ }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any XMM registers redefined.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ }
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+ TRI = MF->getTarget().getRegisterInfo();
+ MBB = 0;
+ LiveRegs = 0;
+ Distance = 0;
+ assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
+
+ // If no XMM registers are used in the function, we can skip it completely.
+ bool anyregs = false;
+ for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
+ E = X86::VR128RegClass.end(); I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ anyregs = true;
+ break;
+ }
+ if (!anyregs) return false;
+
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*, 16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
+ DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+ DFI != DFE; ++DFI) {
+ MBB = *DFI;
+ enterBasicBlock();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *mi = I;
+ if (mi->isDebugValue()) continue;
+ ++Distance;
+ std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+ if (domp.first)
+ if (domp.second)
+ visitSoftInstr(mi, domp.second);
+ else
+ visitHardInstr(mi, domp.first);
+ else if (LiveRegs)
+ visitGenericInstr(mi);
+ }
+
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ if (LiveRegs)
+ LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+ LiveRegs = 0;
+ }
+
+ // Clear the LiveOuts vectors. Should we also collapse any remaining
+ // DomainValues?
+ for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+ i != e; ++i)
+ free(i->second);
+ LiveOuts.clear();
+ Pool.Clear();
+
+ return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+ return new SSEDomainFixPass();
+}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index c753cf2..9be38a4 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
///
FunctionPass *createX86FloatingPointStackifierPass();
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
/// createX87FPRegKillInserterPass - This function returns a pass which
/// inserts FP_REG_KILL instructions where needed.
///
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 2be51e1..6b62795 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -59,6 +59,9 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
[FeatureCMOV]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+ "IsUAMemFast", "true",
+ "Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions">;
@@ -98,8 +101,10 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
+def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
// Sandy Bridge does not have FMA
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
@@ -160,10 +165,11 @@ def X86InstrInfo : InstrInfo {
"hasAdSizePrefix",
"Prefix",
"hasREX_WPrefix",
- "ImmTypeBits",
- "FPFormBits",
+ "ImmT.Value",
+ "FPForm.Value",
"hasLockPrefix",
"SegOvrBits",
+ "ExeDomain.Value",
"Opcode"];
let TSFlagsShifts = [0,
6,
@@ -174,6 +180,7 @@ def X86InstrInfo : InstrInfo {
16,
19,
20,
+ 22,
24];
}
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 754a200..8e2928c3 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -10,10 +10,14 @@
#include "llvm/Target/TargetAsmBackend.h"
#include "X86.h"
#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
@@ -48,8 +52,135 @@ public:
for (unsigned i = 0; i != Size; ++i)
DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
}
+
+ bool MayNeedRelaxation(const MCInst &Inst,
+ const SmallVectorImpl<MCAsmFixup> &Fixups) const;
+
+ void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ case X86::JAE_1: return X86::JAE_4;
+ case X86::JA_1: return X86::JA_4;
+ case X86::JBE_1: return X86::JBE_4;
+ case X86::JB_1: return X86::JB_4;
+ case X86::JE_1: return X86::JE_4;
+ case X86::JGE_1: return X86::JGE_4;
+ case X86::JG_1: return X86::JG_4;
+ case X86::JLE_1: return X86::JLE_4;
+ case X86::JL_1: return X86::JL_4;
+ case X86::JMP_1: return X86::JMP_4;
+ case X86::JNE_1: return X86::JNE_4;
+ case X86::JNO_1: return X86::JNO_4;
+ case X86::JNP_1: return X86::JNP_4;
+ case X86::JNS_1: return X86::JNS_4;
+ case X86::JO_1: return X86::JO_4;
+ case X86::JP_1: return X86::JP_4;
+ case X86::JS_1: return X86::JS_4;
+ }
+}
+
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst,
+ const SmallVectorImpl<MCAsmFixup> &Fixups) const {
+ // Check for a 1byte pcrel fixup, and enforce that we would know how to relax
+ // this instruction.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) {
+ assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// FIXME: Can tblgen help at all here to verify there aren't other instructions
+// we can relax?
+void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
+ MCInst &Res) const {
+ // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
+ unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode());
+
+ if (RelaxedOp == IF->getInst().getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ IF->getInst().dump_pretty(OS);
+ llvm_report_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ Res = IF->getInst();
+ Res.setOpcode(RelaxedOp);
+}
+
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes. This returns the number of bytes written. It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+///
+/// FIXME this is X86 32-bit specific and should move to a better place.
+bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ static const uint8_t Nops[16][16] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // nopl (%[re]ax)
+ {0x0f, 0x1f, 0x00},
+ // nopl 0(%[re]ax)
+ {0x0f, 0x1f, 0x40, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw 0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw %cs:0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopl 0L(%[re]ax) */
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
+ };
+
+ // Write an optimal sequence for the first 15 bytes.
+ uint64_t OptimalCount = (Count < 16) ? Count : 15;
+ for (uint64_t i = 0, e = OptimalCount; i != e; i++)
+ OW->Write8(Nops[OptimalCount - 1][i]);
+
+ // Finish with single byte nops.
+ for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
+ OW->Write8(0x90);
+
+ return true;
+}
+
+/* *** */
+
class ELFX86AsmBackend : public X86AsmBackend {
public:
ELFX86AsmBackend(const Target &T)
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 5d3edbb..c69eeb3 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -383,7 +383,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
// They have to fit in the 32-bit signed displacement field though.
- if (isInt32(Disp)) {
+ if (isInt<32>(Disp)) {
AM.Disp = (uint32_t)Disp;
return X86SelectAddress(U->getOperand(0), AM);
}
@@ -427,7 +427,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
}
}
// Check for displacement overflow.
- if (!isInt32(Disp))
+ if (!isInt<32>(Disp))
break;
// Ok, the GEP indices were covered by constant-offset and scaled-index
// addressing. Update the address state and move on to examining the base.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 704f9c6..b24d5a1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -802,6 +802,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (!VT.is128BitVector()) {
continue;
}
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
@@ -1008,7 +1009,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
@@ -1066,23 +1067,37 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
}
/// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove
-/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
-/// determining it.
+/// and store operations as a result of memset, memcpy, and memmove lowering.
+/// If DstAlign is zero that means it's safe to destination alignment can
+/// satisfy any constraint. Similarly if SrcAlign is zero it means there
+/// isn't a need to check it against alignment requirement, probably because
+/// the source does not need to be loaded. It returns EVT::Other if
+/// SelectionDAG should be responsible for determining it.
EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP,
SelectionDAG &DAG) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
- return MVT::v4i32;
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
- return MVT::v4f32;
+ if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ if (Size >= 16 &&
+ (Subtarget->isUnalignedMemAccessFast() ||
+ ((DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16))) &&
+ Subtarget->getStackAlignment() >= 16) {
+ if (Subtarget->hasSSE2())
+ return MVT::v4i32;
+ if (SafeToUseFP && Subtarget->hasSSE1())
+ return MVT::v4f32;
+ } else if (SafeToUseFP &&
+ Size >= 8 &&
+ !Subtarget->is64Bit() &&
+ Subtarget->getStackAlignment() >= 8 &&
+ Subtarget->hasSSE2())
+ return MVT::f64;
}
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
@@ -1108,8 +1123,8 @@ MCSymbol *
X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
MCContext &Ctx) const {
const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
- return Ctx.GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix())+
- Twine(MF->getFunctionNumber())+"$pb");
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(MF->getFunctionNumber())+"$pb");
}
@@ -2290,6 +2305,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
+ const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
if (GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) &&
@@ -2301,8 +2317,14 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Look for obvious safe cases to perform tail call optimization that does not
// requite ABI changes. This is what gcc calls sibcall.
- // Do not sibcall optimize vararg calls for now.
- if (isVarArg)
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing any
+ // arguments.
+ if (isVarArg && !Outs.empty())
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
@@ -2417,7 +2439,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
- if (!isInt32(Offset))
+ if (!isInt<32>(Offset))
return false;
// If we don't have a symbolic displacement - we don't have any extra
@@ -3613,6 +3635,69 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
return SDValue();
}
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+ DebugLoc &dl, SelectionDAG &DAG) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = Elts.size();
+
+ LoadSDNode *LDBase = NULL;
+ unsigned LastLoadedElt = -1U;
+
+ // For each element in the initializer, see if we've found a load or an undef.
+ // If we don't find an initial load element, or later load elements are
+ // non-consecutive, bail out.
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Elts[i];
+
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return SDValue();
+ if (!LDBase) {
+ if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+ return SDValue();
+ LDBase = cast<LoadSDNode>(Elt.getNode());
+ LastLoadedElt = i;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+ return SDValue();
+ LastLoadedElt = i;
+ }
+
+ // If we have found an entire vector of loads and undefs, then return a large
+ // load of the entire vector width starting at the base pointer. If we found
+ // consecutive loads for the low half, generate a vzext_load node.
+ if (LastLoadedElt == NumElems - 1) {
+ if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+ return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+ return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->getAlignment());
+ } else if (NumElems == 4 && LastLoadedElt == 1) {
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ }
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
@@ -3841,14 +3926,18 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
- if (Values.size() > 2) {
- // If we have SSE 4.1, Expand into a number of inserts unless the number of
- // values to be inserted is equal to the number of elements, in which case
- // use the unpack code below in the hopes of matching the consecutive elts
- // load merge pattern for shuffles.
- // FIXME: We could probably just check that here directly.
- if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
- getSubtarget()->hasSSE41()) {
+ if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ // Check for a build vector of consecutive loads.
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = Op.getOperand(i);
+
+ // Check for elements which are consecutive loads.
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ if (LD.getNode())
+ return LD;
+
+ // For SSE 4.1, use inserts into undef.
+ if (getSubtarget()->hasSSE41()) {
V[0] = DAG.getUNDEF(VT);
for (unsigned i = 0; i < NumElems; ++i)
if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
@@ -3856,7 +3945,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(i), DAG.getIntPtrConstant(i));
return V[0];
}
- // Expand into a number of unpckl*.
+
+ // Otherwise, expand into a number of unpckl*
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
@@ -3871,7 +3961,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
return V[0];
}
-
return SDValue();
}
@@ -8797,83 +8886,24 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
- EVT EltVT, LoadSDNode *&LDBase,
- unsigned &LastLoadedElt,
- SelectionDAG &DAG, MachineFrameInfo *MFI,
- const TargetLowering &TLI) {
- LDBase = NULL;
- LastLoadedElt = -1U;
- for (unsigned i = 0; i < NumElems; ++i) {
- if (N->getMaskElt(i) < 0) {
- if (!LDBase)
- return false;
- continue;
- }
-
- SDValue Elt = DAG.getShuffleScalarElt(N, i);
- if (!Elt.getNode() ||
- (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
- return false;
- if (!LDBase) {
- if (Elt.getNode()->getOpcode() == ISD::UNDEF)
- return false;
- LDBase = cast<LoadSDNode>(Elt.getNode());
- LastLoadedElt = i;
- continue;
- }
- if (Elt.getOpcode() == ISD::UNDEF)
- continue;
-
- LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
- return false;
- LastLoadedElt = i;
- }
- return true;
-}
-
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
-/// order. In the case of v2i64, it will see if it can rewrite the
-/// shuffle to be an appropriate build vector so it can take advantage of
-// performBuildVectorCombine.
+/// order.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
- unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
return SDValue();
- // Try to combine a vector_shuffle into a 128-bit load.
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- LoadSDNode *LD = NULL;
- unsigned LastLoadedElt;
- if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG,
- MFI, TLI))
- return SDValue();
-
- if (LastLoadedElt == NumElems - 1) {
- if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
- return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(), 0);
- return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- } else if (NumElems == 4 && LastLoadedElt == 1) {
- SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
- SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
- }
- return SDValue();
+ SmallVector<SDValue, 16> Elts;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ Elts.push_back(DAG.getShuffleScalarElt(SVN, i));
+
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
/// PerformShuffleCombine - Detect vector gather/scatter index generation
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 0f15eba..4549cba 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -417,12 +417,15 @@ namespace llvm {
virtual unsigned getByValTypeAlignment(const Type *Ty) const;
/// getOptimalMemOpType - Returns the target specific optimal type for load
- /// and store operations as a result of memset, memcpy, and memmove
- /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
- /// determining it.
- virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
- SelectionDAG &DAG) const;
+ /// and store operations as a result of memset, memcpy, and memmove lowering.
+ /// If DstAlign is zero that means it's safe to destination alignment can
+ /// satisfy any constraint. Similarly if SrcAlign is zero it means there
+ /// isn't a need to check it against alignment requirement, probably because
+ /// the source does not need to be loaded. It returns EVT::Other if
+ /// SelectionDAG should be responsible for determining it.
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP, SelectionDAG &DAG) const;
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 8cbb756..eef2ca0 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -295,19 +295,17 @@ def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
let Defs = [EFLAGS] in {
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsf GR64:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsf (loadi64 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsr GR64:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsr (loadi64 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
// Repeat string ops
@@ -508,8 +506,8 @@ let isCommutable = 1 in
def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, GR64:$src2))]>;
// These are alternate spellings for use by the disassembler, we mark them as
// code gen only to ensure they aren't matched by the assembler.
@@ -523,21 +521,21 @@ let isCodeGenOnly = 1 in {
def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, i64immSExt8:$src2))]>;
def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isConvertibleToThreeAddress
// Register-Memory Addition
def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, (load addr:$src2)))]>;
} // isTwoAddress
@@ -604,8 +602,8 @@ let isTwoAddress = 1 in {
def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, GR64:$src2))]>;
def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -615,20 +613,20 @@ def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, (load addr:$src2)))]>;
// Register-Integer Subtraction
def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>;
def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src),
@@ -716,15 +714,15 @@ let isCommutable = 1 in
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
// Register-Memory Signed Integer Multiplication
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
} // isTwoAddress
// Suprisingly enough, these are not two address instructions!
@@ -733,27 +731,27 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
// Memory-Integer Signed Integer Multiplication
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul (load addr:$src1),
- i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt8:$src2))]>;
def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
(outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul (load addr:$src1),
- i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt32:$src2))]>;
} // Defs = [EFLAGS]
// Unsigned division / remainder
@@ -787,16 +785,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
- [(set GR64:$dst, (add GR64:$src, 1)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>;
let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
- [(set GR64:$dst, (add GR64:$src, -1)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>;
@@ -806,23 +802,19 @@ let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
// Can transform into LEA.
def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src),
"inc{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, 1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
OpSize, Requires<[In64BitMode]>;
def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src),
"inc{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, 1)),
- (implicit EFLAGS)]>,
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
Requires<[In64BitMode]>;
def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src),
"dec{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, -1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
OpSize, Requires<[In64BitMode]>;
def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src),
"dec{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, -1)),
- (implicit EFLAGS)]>,
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress
@@ -1092,26 +1084,26 @@ let isCommutable = 1 in
def AND64rr : RI<0x21, MRMDestReg,
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, GR64:$src2))]>;
def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}", []>;
def AND64rm : RI<0x23, MRMSrcMem,
(outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, (load addr:$src2)))]>;
def AND64ri8 : RIi8<0x83, MRM4r,
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, i64immSExt8:$src2))]>;
def AND64ri32 : RIi32<0x81, MRM4r,
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def AND64mr : RI<0x21, MRMDestMem,
@@ -1135,26 +1127,26 @@ let isCommutable = 1 in
def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, GR64:$src2))]>;
def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}", []>;
def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, (load addr:$src2)))]>;
def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, i64immSExt8:$src2))]>;
def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
@@ -1178,26 +1170,26 @@ let isCommutable = 1 in
def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, GR64:$src2))]>;
def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}", []>;
def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, (load addr:$src2)))]>;
def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>;
def XOR64ri32 : RIi32<0x81, MRM6r,
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
@@ -2181,14 +2173,11 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
} // AddedComplexity
@@ -2215,136 +2204,76 @@ def : Pat<(subc GR64:$src1, imm:$src2),
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
-// Register-Register Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Integer Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
(ADD64rm GR64:$src1, addr:$src2)>;
-// Register-Register Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
(SUB64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Memory Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
(SUB64rm GR64:$src1, addr:$src2)>;
-
-// Register-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
(SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-// Register-Register Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
(IMUL64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Memory Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
(IMUL64rm GR64:$src1, addr:$src2)>;
-
-// Register-Integer Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
(IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Memory-Integer Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
(IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-// INC and DEC with EFLAGS result. Note that these do not set CF.
-def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
- (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
- (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-
-def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
- (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
- (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-
-def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)),
- (INC64r GR64:$src)>;
-def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)),
- (DEC64r GR64:$src)>;
-
-// Register-Register Logical Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
- (OR64rr GR64:$src1, GR64:$src2)>;
+// inc/dec
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-// Register-Integer Logical Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+// or
+def : Pat<(or GR64:$src1, GR64:$src2),
+ (OR64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
(OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Logical Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
(OR64rm GR64:$src1, addr:$src2)>;
-// Register-Register Logical XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// xor
+def : Pat<(xor GR64:$src1, GR64:$src2),
(XOR64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Integer Logical XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Logical XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
(XOR64rm GR64:$src1, addr:$src2)>;
-// Register-Register Logical And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// and
+def : Pat<(and GR64:$src1, GR64:$src2),
(AND64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Integer Logical And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Logical And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
(AND64rm GR64:$src1, addr:$src2)>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index bb81cbf..d25ec26 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -68,6 +68,16 @@ def CompareFP : FPFormat<5>;
def CondMovFP : FPFormat<6>;
def SpecialFP : FPFormat<7>;
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt : Domain<3>;
+
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
@@ -93,7 +103,7 @@ class TA { bits<4> Prefix = 14; }
class TF { bits<4> Prefix = 15; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
- string AsmStr>
+ string AsmStr, Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
@@ -101,7 +111,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
Format Form = f;
bits<6> FormBits = Form.Value;
ImmType ImmT = i;
- bits<3> ImmTypeBits = ImmT.Value;
dag OutOperandList = outs;
dag InOperandList = ins;
@@ -115,20 +124,21 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bits<4> Prefix = 0; // Which prefix byte does this inst have?
bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
- FPFormat FPForm; // What flavor of FP instruction is this?
- bits<3> FPFormBits = 0;
+ FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain ExeDomain = d;
}
-class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
- : X86Inst<o, f, NoImm, outs, ins, asm> {
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm8 , outs, ins, asm> {
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -166,7 +176,7 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
- let FPForm = fp; let FPFormBits = FPForm.Value;
+ let FPForm = fp;
let Pattern = pattern;
}
@@ -196,14 +206,16 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
// SSE2 Instruction Templates:
//
@@ -222,10 +234,12 @@ class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -235,12 +249,15 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[HasSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[HasSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE3]>;
// SSSE3 Instruction Templates:
@@ -254,10 +271,12 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
//
@@ -266,17 +285,20 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE42]>;
// SS42FI - SSE 4.2 instructions with TF prefix.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -286,7 +308,8 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE42]>;
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 139a905..c0c9d98 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -597,7 +597,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMULHUWrr, X86::PMULHUWrm, 16 },
{ X86::PMULHWrr, X86::PMULHWrm, 16 },
{ X86::PMULLDrr, X86::PMULLDrm, 16 },
- { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 },
{ X86::PMULLWrr, X86::PMULLWrm, 16 },
{ X86::PMULUDQrr, X86::PMULUDQrm, 16 },
{ X86::PORrr, X86::PORrm, 16 },
@@ -992,8 +991,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
/// a few instructions in each direction it assumes it's not safe.
static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator E = MBB.end();
+
// It's always safe to clobber EFLAGS at the end of a block.
- if (I == MBB.end())
+ if (I == E)
return true;
// For compile time consideration, if we are not able to determine the
@@ -1017,20 +1018,28 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
// This instruction defines EFLAGS, no need to look any further.
return true;
++Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != E && Iter->isDebugValue())
+ ++Iter;
// If we make it to the end of the block, it's safe to clobber EFLAGS.
- if (Iter == MBB.end())
+ if (Iter == E)
return true;
}
+ MachineBasicBlock::iterator B = MBB.begin();
Iter = I;
for (unsigned i = 0; i < 4; ++i) {
// If we make it to the beginning of the block, it's safe to clobber
// EFLAGS iff EFLAGS is not live-in.
- if (Iter == MBB.begin())
+ if (Iter == B)
return !MBB.isLiveIn(X86::EFLAGS);
--Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != B && Iter->isDebugValue())
+ --Iter;
+
bool SawKill = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
@@ -1677,6 +1686,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
@@ -1773,6 +1784,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
if (I->getOpcode() != X86::JMP_4 &&
GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
@@ -2505,7 +2518,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
Alignment = 16;
break;
@@ -2535,11 +2550,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
SmallVector<MachineOperand,X86AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
- // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
@@ -3648,3 +3665,51 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedInt PackedSingle PackedDouble
+ { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
+ { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
+ { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
+ { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
+ { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
+ { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
+ { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
+ { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
+ { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
+ { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
+ { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ return std::make_pair(domain,
+ domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const unsigned *table = lookup(MI->getOpcode(), dom);
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5111719..f0bdd06 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -398,7 +398,10 @@ namespace X86II {
FS = 1 << SegOvrShift,
GS = 2 << SegOvrShift,
- // Bits 22 -> 23 are unused
+ // Execution domain for SSE instructions in bits 22, 23.
+ // 0 in bits 22-23 means normal, non-SSE instruction.
+ SSEDomainShift = 22,
+
OpcodeShift = 24,
OpcodeMask = 0xFF << OpcodeShift
};
@@ -486,7 +489,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
-
+
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -716,6 +719,13 @@ public:
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
+ /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+ /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+ std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+ /// SetSSEDomain - Set the SSEDomain of MI.
+ void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index c80a18d..8fccc8a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,4 +1,4 @@
-
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,12 +28,13 @@ def SDTX86Cmov : SDTypeProfile<1, 4,
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
// Unary and binary operator instructions that set EFLAGS as a side-effect.
-def SDTUnaryArithWithFlags : SDTypeProfile<1, 1,
- [SDTCisInt<0>]>;
-def SDTBinaryArithWithFlags : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisInt<0>]>;
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+ [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
def SDTX86BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
@@ -77,8 +78,8 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
-def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>;
-def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>;
+def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
@@ -167,6 +168,7 @@ def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
[SDNPCommutative]>;
+
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
@@ -323,6 +325,7 @@ def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
"TM.getCodeModel() == CodeModel::Kernel">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
@@ -473,15 +476,14 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- else {
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
- APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
- return (~KnownZero0 & ~KnownZero1) == 0;
- }
+
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
// 'shld' and 'shrd' instruction patterns. Note that even though these have
@@ -585,7 +587,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
// Return instructions.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in {
+ hasCtrlDep = 1, FPForm = SpecialFP in {
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
[(X86retflag 0)]>;
@@ -806,33 +808,29 @@ let isTwoAddress = 1 in // GR32 = bswap GR32
let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsf (loadi16 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsf (loadi32 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsr (loadi16 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsr (loadi32 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
let neverHasSideEffects = 1 in
@@ -1697,18 +1695,17 @@ let isTwoAddress = 0 in {
let Defs = [EFLAGS] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
- [(set GR8:$dst, (add GR8:$src, 1)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>;
+
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"inc{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, 1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
OpSize, Requires<[In32BitMode]>;
def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"inc{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, 1)),
- (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
+ Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
@@ -1726,18 +1723,16 @@ let isTwoAddress = 0, CodeSize = 2 in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
- [(set GR8:$dst, (add GR8:$src, -1)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"dec{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, -1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
OpSize, Requires<[In32BitMode]>;
def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"dec{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, -1)),
- (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
+ Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
@@ -1758,21 +1753,20 @@ let isTwoAddress = 0, CodeSize = 2 in {
// Logical operators...
let Defs = [EFLAGS] in {
let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
-def AND8rr : I<0x20, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
-def AND16rr : I<0x21, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
-def AND32rr : I<0x21, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+def AND8rr : I<0x20, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>;
+def AND16rr : I<0x21, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ GR16:$src2))]>, OpSize;
+def AND32rr : I<0x21, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ GR32:$src2))]>;
}
// AND instructions with the destination register in REG and the source register
@@ -1789,45 +1783,46 @@ def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
def AND8rm : I<0x22, MRMSrcMem,
(outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
"and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (and GR8:$src1, (loadi8 addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
+ (loadi8 addr:$src2)))]>;
def AND16rm : I<0x23, MRMSrcMem,
(outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
"and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, (loadi16 addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ (loadi16 addr:$src2)))]>,
+ OpSize;
def AND32rm : I<0x23, MRMSrcMem,
(outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, (loadi32 addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ (loadi32 addr:$src2)))]>;
def AND8ri : Ii8<0x80, MRM4r,
(outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
"and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
+ imm:$src2))]>;
def AND16ri : Ii16<0x81, MRM4r,
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ imm:$src2))]>, OpSize;
def AND32ri : Ii32<0x81, MRM4r,
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ imm:$src2))]>;
def AND16ri8 : Ii8<0x83, MRM4r,
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ i16immSExt8:$src2))]>,
OpSize;
def AND32ri8 : Ii8<0x83, MRM4r,
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
def AND8mr : I<0x20, MRMDestMem,
@@ -1888,18 +1883,16 @@ let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst),
(ins GR8 :$src1, GR8 :$src2),
"or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (or GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>;
def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>,
+ OpSize;
def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>;
}
// OR instructions with the destination register in REG and the source register
@@ -1913,48 +1906,48 @@ def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}", []>;
-def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst),
+def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
"or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (or GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
-def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst),
+ [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1,
+ (load addr:$src2)))]>;
+def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
-def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst),
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
+ (load addr:$src2)))]>,
+ OpSize;
+def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
+ (load addr:$src2)))]>;
def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst),
(ins GR8 :$src1, i8imm:$src2),
"or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (or GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>;
def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst),
(ins GR16:$src1, i16imm:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
+ imm:$src2))]>, OpSize;
def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst),
(ins GR32:$src1, i32imm:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
+ imm:$src2))]>;
def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst),
(ins GR16:$src1, i16i8imm:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
+ i16immSExt8:$src2))]>, OpSize;
def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
(ins GR32:$src1, i32i8imm:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
+ i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"or{b}\t{$src, $dst|$dst, $src}",
@@ -2004,18 +1997,18 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
def XOR8rr : I<0x30, MRMDestReg,
(outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
"xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
+ GR8:$src2))]>;
def XOR16rr : I<0x31, MRMDestReg,
(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ GR16:$src2))]>, OpSize;
def XOR32rr : I<0x31, MRMDestReg,
(outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ GR32:$src2))]>;
} // isCommutable = 1
// XOR instructions with the destination register in REG and the source register
@@ -2029,49 +2022,48 @@ def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}", []>;
-def XOR8rm : I<0x32, MRMSrcMem ,
+def XOR8rm : I<0x32, MRMSrcMem,
(outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
"xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
-def XOR16rm : I<0x33, MRMSrcMem ,
+ [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
+ (load addr:$src2)))]>;
+def XOR16rm : I<0x33, MRMSrcMem,
(outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ (load addr:$src2)))]>,
OpSize;
-def XOR32rm : I<0x33, MRMSrcMem ,
+def XOR32rm : I<0x33, MRMSrcMem,
(outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
-
-def XOR8ri : Ii8<0x80, MRM6r,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
-def XOR16ri : Ii16<0x81, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ (load addr:$src2)))]>;
+
+def XOR8ri : Ii8<0x80, MRM6r,
+ (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>;
+def XOR16ri : Ii16<0x81, MRM6r,
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ imm:$src2))]>, OpSize;
def XOR32ri : Ii32<0x81, MRM6r,
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ imm:$src2))]>;
def XOR16ri8 : Ii8<0x83, MRM6r,
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ i16immSExt8:$src2))]>,
OpSize;
def XOR32ri8 : Ii8<0x83, MRM6r,
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
def XOR8mr : I<0x30, MRMDestMem,
@@ -2118,12 +2110,12 @@ let isTwoAddress = 0 in {
[(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
(implicit EFLAGS)]>;
- def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
- "xor{b}\t{$src, %al|%al, $src}", []>;
- def XOR16i16 : Ii16 <0x35, RawFrm, (outs), (ins i16imm:$src),
- "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def XOR32i32 : Ii32 <0x35, RawFrm, (outs), (ins i32imm:$src),
- "xor{l}\t{$src, %eax|%eax, $src}", []>;
+ def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
+ "xor{b}\t{$src, %al|%al, $src}", []>;
+ def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src),
+ "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
+ "xor{l}\t{$src, %eax|%eax, $src}", []>;
} // isTwoAddress = 0
} // Defs = [EFLAGS]
@@ -2690,21 +2682,20 @@ let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
(ins GR8 :$src1, GR8 :$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
// Register-Register Addition
def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
+ GR16:$src2))]>, OpSize;
def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
+ GR32:$src2))]>;
} // end isConvertibleToThreeAddress
} // end isCommutable
@@ -2723,47 +2714,47 @@ let isCodeGenOnly = 1 in {
def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1,
+ (load addr:$src2)))]>;
def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
+ (load addr:$src2)))]>, OpSize;
def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
+ (load addr:$src2)))]>;
// Register-Integer Addition
def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86add_flag GR8:$src1, imm:$src2))]>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
// Register-Integer Addition
def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
(ins GR16:$src1, i16imm:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86add_flag GR16:$src1, imm:$src2))]>, OpSize;
def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
(ins GR32:$src1, i32imm:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86add_flag GR32:$src1, imm:$src2))]>;
def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
(ins GR16:$src1, i16i8imm:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
(ins GR32:$src1, i32i8imm:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
}
let isTwoAddress = 0 in {
@@ -2911,16 +2902,16 @@ let isTwoAddress = 0 in {
// Register-Register Subtraction
def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86sub_flag GR8:$src1, GR8:$src2))]>;
def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize;
def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, GR32:$src2))]>;
def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}", []>;
@@ -2935,45 +2926,45 @@ def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86sub_flag GR8:$src1, (load addr:$src2)))]>;
def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize;
def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, (load addr:$src2)))]>;
// Register-Integer Subtraction
def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
(ins GR8:$src1, i8imm:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86sub_flag GR8:$src1, imm:$src2))]>;
def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
(ins GR16:$src1, i16imm:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize;
def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
(ins GR32:$src1, i32imm:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, imm:$src2))]>;
def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
(ins GR16:$src1, i16i8imm:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
(ins GR32:$src1, i32i8imm:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
// Memory-Register Subtraction
@@ -3122,25 +3113,26 @@ let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, TB, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
}
// Register-Memory Signed Integer Multiply
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, TB, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+ TB, OpSize;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
} // Defs = [EFLAGS]
} // end Two Address instructions
@@ -3150,47 +3142,49 @@ let Defs = [EFLAGS] in {
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, imm:$src2))]>;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
// Memory-Integer Signed Integer Multiply
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>,
+ OpSize;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul (load addr:$src1),
- i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i16immSExt8:$src2))]>, OpSize;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul (load addr:$src1),
- i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i32immSExt8:$src2))]>;
} // Defs = [EFLAGS]
//===----------------------------------------------------------------------===//
@@ -4345,9 +4339,12 @@ def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
(TCRETURNri GR32_TC:$dst, imm:$off)>,
Requires<[In32BitMode]>;
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
+ Requires<[In32BitMode, IsNotPIC]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
@@ -4722,23 +4719,17 @@ def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR16:$src1, imm:$src2),
(ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR32:$src1, imm:$src2),
(ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR16:$src1, GR16:$src2),
(ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR32:$src1, GR32:$src2),
(ADD32rr GR32:$src1, GR32:$src2)>;
} // AddedComplexity
@@ -4746,270 +4737,173 @@ def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
-// Register-Register Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (ADD8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (ADD32rr GR32:$src1, GR32:$src2)>;
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-// Register-Memory Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
(ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
(ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
(ADD32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
- (ADD8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
- (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (SUB8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (SUB32rr GR32:$src1, GR32:$src2)>;
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-// Register-Memory Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
(SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
(SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
(SUB32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
(SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR16:$src1, imm:$src2),
(SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR32:$src1, imm:$src2),
(SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
(IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, GR32:$src2),
(IMUL32rr GR32:$src1, GR32:$src2)>;
-// Register-Memory Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
(IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
(IMUL32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
(IMUL16rri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, imm:$src2),
(IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-// Memory-Integer Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), imm:$src2),
- (implicit EFLAGS)),
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
(IMUL16rmi addr:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
(IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
// Optimize multiply by 2 with EFLAGS result.
let AddedComplexity = 2 in {
-def : Pat<(parallel (X86smul_flag GR16:$src1, 2),
- (implicit EFLAGS)),
- (ADD16rr GR16:$src1, GR16:$src1)>;
-
-def : Pat<(parallel (X86smul_flag GR32:$src1, 2),
- (implicit EFLAGS)),
- (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
}
-// INC and DEC with EFLAGS result. Note that these do not set CF.
-def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)),
- (INC8r GR8:$src)>;
-def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)),
- (DEC8r GR8:$src)>;
-
-def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
- (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
- (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-
-def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
- (INC32r GR32:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
- (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
-
-// Register-Register Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (OR8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (OR32rr GR32:$src1, GR32:$src2)>;
-
-// Register-Memory Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// Increment reg.
+def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+
+// Decrement reg.
+def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
(OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
(OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
(OR32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
- (OR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
- (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
- (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
(OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
(OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (XOR8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (XOR32rr GR32:$src1, GR32:$src2)>;
-
-// Register-Memory XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
(XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
(XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
(XOR32rm GR32:$src1, addr:$src2)>;
-// Register-Integer XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
(XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR16:$src1, imm:$src2),
(XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR32:$src1, imm:$src2),
(XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (AND8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (AND32rr GR32:$src1, GR32:$src2)>;
-
-// Register-Memory And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
(AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
(AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
(AND32rm GR32:$src1, addr:$src2)>;
-// Register-Integer And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
(AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR16:$src1, imm:$src2),
(AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR32:$src1, imm:$src2),
(AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
(AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
(AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
// -disable-16bit support.
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index e1203e2..1c81c5e 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -605,22 +605,9 @@ let AddedComplexity = 10 in {
def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
VR64:$src2)),
(MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
- VR64:$src2)),
- (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
- VR64:$src2)),
- (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-
def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
(load addr:$src2))),
(MMX_PANDNrm VR64:$src1, addr:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
- (load addr:$src2))),
- (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
- (load addr:$src2))),
- (MMX_PANDNrm VR64:$src1, addr:$src2)>;
// Move MMX to lower 64-bit of XMM
def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 720b663..dadc2a6 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -69,8 +69,9 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
-def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>,
- SDTCisVT<1, v4f32>]>;
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, v4f32>,
+ SDTCisVT<2, v4f32>]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
//===----------------------------------------------------------------------===//
@@ -1114,15 +1115,19 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
// load of an all-zeros value if folding it would be beneficial.
// FIXME: Change encoding to pseudo!
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
@@ -1935,6 +1940,7 @@ let Constraints = "$src1 = $dst" in {
//===---------------------------------------------------------------------===//
// SSE integer instructions
+let ExeDomain = SSEPackedInt in {
// Move Instructions
let neverHasSideEffects = 1 in
@@ -2043,6 +2049,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
} // Constraints = "$src1 = $dst"
+} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
@@ -2105,7 +2112,8 @@ defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
// 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
+ ExeDomain = SSEPackedInt in {
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}", []>;
@@ -2139,7 +2147,7 @@ defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
def PANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"pandn\t{$src2, $dst|$dst, $src2}",
@@ -2193,6 +2201,8 @@ defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+let ExeDomain = SSEPackedInt in {
+
// Shuffle and unpack instructions
let AddedComplexity = 5 in {
def PSHUFDri : PDIi8<0x70, MRMSrcReg,
@@ -2369,10 +2379,13 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+} // ExeDomain = SSEPackedInt
+
// Non-temporal stores
def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+let ExeDomain = SSEPackedInt in
def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
@@ -2386,6 +2399,7 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+let ExeDomain = SSEPackedInt in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -2414,7 +2428,7 @@ def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
// FIXME: Change encoding to pseudo.
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
@@ -3016,14 +3030,14 @@ let Predicates = [HasSSE2] in {
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
+ (MOVSSrr (v4f32 (V_SET0PS)),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
+ (MOVSSrr (v4i32 (V_SET0PI)),
(EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}
@@ -3181,9 +3195,6 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
(SHUFFLE_get_shuf_imm VR128:$src3))>;
// Set lowest element and zero upper elements.
-let AddedComplexity = 15 in
-def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
- (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
@@ -3441,8 +3452,28 @@ let Constraints = "$src1 = $dst" in {
OpSize;
}
}
-defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
- int_x86_sse41_pmulld, 1>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+let Constraints = "$src1 = $dst" in {
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
+}
+}
+
+defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
let Constraints = "$src1 = $dst" in {
@@ -3772,12 +3803,12 @@ def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
- [(X86ptest VR128:$src1, VR128:$src2),
- (implicit EFLAGS)]>, OpSize;
+ [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
+ OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
- [(X86ptest VR128:$src1, (load addr:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
+ OpSize;
}
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -3817,6 +3848,53 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>;
+// TODO: These should be AES as a feature set.
+defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc",
+ int_x86_aesni_aesimc>;
+defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc",
+ int_x86_aesni_aesenc>;
+defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast",
+ int_x86_aesni_aesenclast>;
+defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec",
+ int_x86_aesni_aesdec>;
+defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast",
+ int_x86_aesni_aesdeclast>;
+
+def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
+ (AESIMCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
+ (AESIMCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (AESENCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (AESENCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (AESENCLASTrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (AESDECrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (AESDECrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (AESDECLASTrm VR128:$src1, addr:$src2)>;
+
+def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize;
+def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i32i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize;
+
// crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size
// of r and m.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index cd56816..8a0cde4 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -266,6 +266,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+ // If it's Nehalem, unaligned memory access is fast.
+ if (Family == 15 && Model == 26)
+ IsUAMemFast = true;
GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
@@ -286,6 +289,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
+ , IsUAMemFast(false)
, HasVectorUAMem(false)
, DarwinVers(0)
, stackAlignment(8)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 56220db..bf30154 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -78,6 +78,9 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
+ /// IsUAMemFast - True if unaligned memory access is fast.
+ bool IsUAMemFast;
+
/// HasVectorUAMem - True if SIMD operations can have unaligned memory
/// operands. This may require setting a feature bit in the
/// processor.
@@ -148,6 +151,7 @@ public:
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetType == isDarwin; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f13e6f3..c608e56 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,6 +17,7 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
@@ -169,6 +170,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
return true; // -print-machineinstr should print after this.
}
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+ PM.add(createSSEDomainFixPass());
+ return true;
+ }
+ return false;
+}
+
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 2bb5454..ae7b5b2 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -66,6 +66,7 @@ public:
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE);
};
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index e5f5a6d..54df33c 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -215,7 +215,15 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -326,6 +334,11 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
return 0;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 2e9a1e5..dd3cbc1 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -32,7 +32,7 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
SDNPVariadic]>;
-def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone,
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
[SDNPHasChain, SDNPOptInFlag]>;
def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
OpenPOWER on IntegriCloud