summaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-03-21 10:49:05 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-03-21 10:49:05 +0000
commit2f2afc1aae898651e26987a5c71f3febb19bca98 (patch)
tree2caca31db4facdc95c23930c0c745c8ef0dee97d /lib/Target/ARM
parent0f448b841684305c051796982f300c9bff959307 (diff)
downloadFreeBSD-src-2f2afc1aae898651e26987a5c71f3febb19bca98.zip
FreeBSD-src-2f2afc1aae898651e26987a5c71f3febb19bca98.tar.gz
Update LLVM to r99115.
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h61
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp75
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h36
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp32
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp144
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp16
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td83
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td26
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td842
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td7
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td4
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td20
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp27
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp2
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h8
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp70
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp464
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h41
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp124
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp8
22 files changed, 1368 insertions, 726 deletions
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index ddeb1b9..ea62c33 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -35,6 +35,10 @@ namespace ARM_AM {
add = '+', sub = '-'
};
+ static inline const char *getAddrOpcStr(AddrOpc Op) {
+ return Op == sub ? "-" : "";
+ }
+
static inline const char *getShiftOpcStr(ShiftOpc Op) {
switch (Op) {
default: assert(0 && "Unknown shift opc!");
@@ -78,16 +82,6 @@ namespace ARM_AM {
}
}
- static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
- switch (Mode) {
- default: assert(0 && "Unknown addressing sub-mode!");
- case ARM_AM::ia: return isLD ? "fd" : "ea";
- case ARM_AM::ib: return isLD ? "ed" : "fa";
- case ARM_AM::da: return isLD ? "fa" : "ed";
- case ARM_AM::db: return isLD ? "ea" : "fd";
- }
- }
-
/// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
///
static inline unsigned rotr32(unsigned Val, unsigned Amt) {
@@ -473,20 +467,13 @@ namespace ARM_AM {
// IB - Increment before
// DA - Decrement after
// DB - Decrement before
- //
- // If the 4th bit (writeback)is set, then the base register is updated after
- // the memory transfer.
static inline AMSubMode getAM4SubMode(unsigned Mode) {
return (AMSubMode)(Mode & 0x7);
}
- static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
- return (int)SubMode | ((int)WB << 3);
- }
-
- static inline bool getAM4WBFlag(unsigned Mode) {
- return (Mode >> 3) & 1;
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+ return (int)SubMode;
}
//===--------------------------------------------------------------------===//
@@ -501,9 +488,9 @@ namespace ARM_AM {
// operation in bit 8 and the immediate in bits 0-7.
//
// This is also used for FP load/store multiple ops. The second operand
- // encodes the writeback mode in bit 8 and the number of registers (or 2
- // times the number of registers for DPR ops) in bits 0-7. In addition,
- // bits 9-11 encode one of the following two sub-modes:
+ // encodes the number of registers (or 2 times the number of registers
+ // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
+ // following two sub-modes:
//
// IA - Increment after
// DB - Decrement before
@@ -522,17 +509,13 @@ namespace ARM_AM {
/// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
/// VSTM instructions.
- static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
- unsigned char Offset) {
+ static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
assert((SubMode == ia || SubMode == db) &&
"Illegal addressing mode 5 sub-mode!");
- return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+ return ((int)SubMode << 8) | Offset;
}
static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
- return (AMSubMode)((AM5Opc >> 9) & 0x7);
- }
- static inline bool getAM5WBFlag(unsigned AM5Opc) {
- return ((AM5Opc >> 8) & 1);
+ return (AMSubMode)((AM5Opc >> 8) & 0x7);
}
//===--------------------------------------------------------------------===//
@@ -541,23 +524,11 @@ namespace ARM_AM {
//
// This is used for NEON load / store instructions.
//
- // addrmode6 := reg with optional writeback and alignment
+ // addrmode6 := reg with optional alignment
//
- // This is stored in four operands [regaddr, regupdate, opc, align]. The
- // first is the address register. The second register holds the value of
- // a post-access increment for writeback or reg0 if no writeback or if the
- // writeback increment is the size of the memory access. The third
- // operand encodes whether there is writeback to the address register. The
- // fourth operand is the value of the alignment specifier to use or zero if
- // no explicit alignment.
-
- static inline unsigned getAM6Opc(bool WB = false) {
- return (int)WB;
- }
-
- static inline bool getAM6WBFlag(unsigned Mode) {
- return Mode & 1;
- }
+ // This is stored in two operands [regaddr, align]. The first is the
+ // address register. The second operand is the value of the alignment
+ // specifier to use or zero if no explicit alignment.
} // end namespace ARM_AM
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 8e537d8..e6ea03a 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -650,39 +650,49 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
if (SrcRC == ARM::tGPRRegisterClass)
SrcRC = ARM::GPRRegisterClass;
- if (DestRC != SrcRC) {
- if (DestRC->getSize() != SrcRC->getSize())
- return false;
+ // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
+ if (DestRC == ARM::DPR_8RegisterClass)
+ DestRC = ARM::DPR_VFP2RegisterClass;
+ if (SrcRC == ARM::DPR_8RegisterClass)
+ SrcRC = ARM::DPR_VFP2RegisterClass;
+
+ // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
+ if (DestRC == ARM::QPR_VFP2RegisterClass ||
+ DestRC == ARM::QPR_8RegisterClass)
+ DestRC = ARM::QPRRegisterClass;
+ if (SrcRC == ARM::QPR_VFP2RegisterClass ||
+ SrcRC == ARM::QPR_8RegisterClass)
+ SrcRC = ARM::QPRRegisterClass;
+
+ // Disallow copies of unequal sizes.
+ if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
+ return false;
- // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
- // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
- if (DestRC->getSize() != 8 && DestRC->getSize() != 16)
+ if (DestRC == ARM::GPRRegisterClass) {
+ if (SrcRC == ARM::SPRRegisterClass)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg)
+ .addReg(SrcReg));
+ else
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
+ DestReg).addReg(SrcReg)));
+ } else {
+ unsigned Opc;
+
+ if (DestRC == ARM::SPRRegisterClass)
+ Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS);
+ else if (DestRC == ARM::DPRRegisterClass)
+ Opc = ARM::VMOVD;
+ else if (DestRC == ARM::DPR_VFP2RegisterClass ||
+ SrcRC == ARM::DPR_VFP2RegisterClass)
+ // Always use neon reg-reg move if source or dest is NEON-only regclass.
+ Opc = ARM::VMOVDneon;
+ else if (DestRC == ARM::QPRRegisterClass)
+ Opc = ARM::VMOVQ;
+ else
return false;
- }
- if (DestRC == ARM::GPRRegisterClass) {
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
- DestReg).addReg(SrcReg)));
- } else if (DestRC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg)
- .addReg(SrcReg));
- } else if (DestRC == ARM::DPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg)
.addReg(SrcReg));
- } else if (DestRC == ARM::DPR_VFP2RegisterClass ||
- DestRC == ARM::DPR_8RegisterClass ||
- SrcRC == ARM::DPR_VFP2RegisterClass ||
- SrcRC == ARM::DPR_8RegisterClass) {
- // Always use neon reg-reg move if source or dest is NEON-only regclass.
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon),
- DestReg).addReg(SrcReg));
- } else if (DestRC == ARM::QPRRegisterClass ||
- DestRC == ARM::QPR_VFP2RegisterClass ||
- DestRC == ARM::QPR_8RegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ),
- DestReg).addReg(SrcReg));
- } else {
- return false;
}
return true;
@@ -727,10 +737,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
assert((RC == ARM::QPRRegisterClass ||
RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
// FIXME: Neon instructions should support predicates
- if (Align >= 16
- && (getRegisterInfo().canRealignStack(MF))) {
+ if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
- .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+ .addFrameIndex(FI).addImm(128)
.addMemOperand(MMO)
.addReg(SrcReg, getKillRegState(isKill)));
} else {
@@ -780,7 +789,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Align >= 16
&& (getRegisterInfo().canRealignStack(MF))) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
- .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+ .addFrameIndex(FI).addImm(128)
.addMemOperand(MMO));
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 767d5ec..292c498 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -93,34 +93,34 @@ namespace ARMII {
StMiscFrm = 9 << FormShift,
LdStMulFrm = 10 << FormShift,
- LdStExFrm = 28 << FormShift,
+ LdStExFrm = 11 << FormShift,
// Miscellaneous arithmetic instructions
- ArithMiscFrm = 11 << FormShift,
+ ArithMiscFrm = 12 << FormShift,
// Extend instructions
- ExtFrm = 12 << FormShift,
+ ExtFrm = 13 << FormShift,
// VFP formats
- VFPUnaryFrm = 13 << FormShift,
- VFPBinaryFrm = 14 << FormShift,
- VFPConv1Frm = 15 << FormShift,
- VFPConv2Frm = 16 << FormShift,
- VFPConv3Frm = 17 << FormShift,
- VFPConv4Frm = 18 << FormShift,
- VFPConv5Frm = 19 << FormShift,
- VFPLdStFrm = 20 << FormShift,
- VFPLdStMulFrm = 21 << FormShift,
- VFPMiscFrm = 22 << FormShift,
+ VFPUnaryFrm = 14 << FormShift,
+ VFPBinaryFrm = 15 << FormShift,
+ VFPConv1Frm = 16 << FormShift,
+ VFPConv2Frm = 17 << FormShift,
+ VFPConv3Frm = 18 << FormShift,
+ VFPConv4Frm = 19 << FormShift,
+ VFPConv5Frm = 20 << FormShift,
+ VFPLdStFrm = 21 << FormShift,
+ VFPLdStMulFrm = 22 << FormShift,
+ VFPMiscFrm = 23 << FormShift,
// Thumb format
- ThumbFrm = 23 << FormShift,
+ ThumbFrm = 24 << FormShift,
// NEON format
- NEONFrm = 24 << FormShift,
- NEONGetLnFrm = 25 << FormShift,
- NEONSetLnFrm = 26 << FormShift,
- NEONDupFrm = 27 << FormShift,
+ NEONFrm = 25 << FormShift,
+ NEONGetLnFrm = 26 << FormShift,
+ NEONSetLnFrm = 27 << FormShift,
+ NEONDupFrm = 28 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 11e1c48..b380c95 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -80,7 +80,7 @@ unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
case D23: return 23;
case D24: return 24;
case D25: return 25;
- case D26: return 27;
+ case D26: return 26;
case D27: return 27;
case D28: return 28;
case D29: return 29;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 334c820..e7aa0c8 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -55,12 +55,12 @@ namespace {
const std::vector<MachineConstantPoolEntry> *MCPEs;
const std::vector<MachineJumpTableEntry> *MJTEs;
bool IsPIC;
-
+
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
+
static char ID;
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
@@ -68,7 +68,7 @@ namespace {
TD(tm.getTargetData()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-
+
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
/// machine instructions.
@@ -163,7 +163,7 @@ namespace {
char ARMCodeEmitter::ID = 0;
-/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
/// code to the specified MCE object.
FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
JITCodeEmitter &JCE) {
@@ -617,8 +617,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
}
}
-unsigned ARMCodeEmitter::getMachineSoRegOpValue(
- const MachineInstr &MI,
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
const TargetInstrDesc &TID,
const MachineOperand &MO,
unsigned OpIdx) {
@@ -690,7 +689,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
}
unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
- const TargetInstrDesc &TID) const {
+ const TargetInstrDesc &TID) const {
for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
const MachineOperand &MO = MI.getOperand(i-1);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
@@ -699,8 +698,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
return 0;
}
-void ARMCodeEmitter::emitDataProcessingInstruction(
- const MachineInstr &MI,
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
unsigned ImplicitRd,
unsigned ImplicitRn) {
const TargetInstrDesc &TID = MI.getDesc();
@@ -765,8 +763,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(
emitWordLE(Binary);
}
-void ARMCodeEmitter::emitLoadStoreInstruction(
- const MachineInstr &MI,
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
unsigned ImplicitRd,
unsigned ImplicitRn) {
const TargetInstrDesc &TID = MI.getDesc();
@@ -841,7 +838,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(
}
void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRn) {
+ unsigned ImplicitRn) {
const TargetInstrDesc &TID = MI.getDesc();
unsigned Form = TID.TSFlags & ARMII::FormMask;
bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
@@ -950,7 +947,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
// Set bit W(21)
- if (ARM_AM::getAM4WBFlag(MO.getImm()))
+ if (IsUpdating)
Binary |= 0x1 << ARMII::W_BitShift;
// Set registers
@@ -1238,8 +1235,7 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-void ARMCodeEmitter::emitVFPConversionInstruction(
- const MachineInstr &MI) {
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
unsigned Form = TID.TSFlags & ARMII::FormMask;
@@ -1329,8 +1325,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
- const MachineInstr &MI) {
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
@@ -1353,7 +1349,7 @@ void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
// Set bit W(21)
- if (ARM_AM::getAM5WBFlag(MO.getImm()))
+ if (IsUpdating)
Binary |= 0x1 << ARMII::W_BitShift;
// First register is encoded in Dd.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 013e00a..71207c8 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -80,8 +80,7 @@ public:
SDValue &Mode);
bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Offset);
- bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update,
- SDValue &Opc, SDValue &Align);
+ bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
SDValue &Label);
@@ -502,12 +501,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
}
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
- SDValue &Addr, SDValue &Update,
- SDValue &Opc, SDValue &Align) {
+ SDValue &Addr, SDValue &Align) {
Addr = N;
- // Default to no writeback.
- Update = CurDAG->getRegister(0, MVT::i32);
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
// Default to no alignment.
Align = CurDAG->getTargetConstant(0, MVT::i32);
return true;
@@ -1030,8 +1025,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1055,14 +1050,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1070,11 +1064,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
// Quad registers are directly supported for VLD2,
// loading 2 pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
std::vector<EVT> ResTys(4, VT);
ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
Chain = SDValue(VLd, 4);
// Combine the even and odd subregs to produce the result.
@@ -1086,25 +1079,21 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MemAddr.getValueType());
ResTys.push_back(MVT::Other);
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
Chain = SDValue(VLdA, NumVecs+1);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
- Align, Pred, PredReg, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7);
+ const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
+ Align, Reg0, Pred, Reg0, Chain };
+ SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
Chain = SDValue(VLdB, NumVecs+1);
// Combine the even and odd subregs to produce the result.
@@ -1123,8 +1112,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1148,12 +1137,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 10> Ops;
Ops.push_back(MemAddr);
- Ops.push_back(MemUpdate);
- Ops.push_back(MemOpc);
Ops.push_back(Align);
if (is64BitVector) {
@@ -1161,9 +1148,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(N->getOperand(Vec+3));
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1178,40 +1165,37 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
N->getOperand(Vec+3)));
}
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+ Ops.push_back(Reg0); // post-access address offset
// Store the even subregs.
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
N->getOperand(Vec+3)));
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+7);
+ MVT::Other, Ops.data(), NumVecs+6);
Chain = SDValue(VStA, 1);
// Store the odd subregs.
Ops[0] = SDValue(VStA, 0); // MemAddr
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
N->getOperand(Vec+3));
- Ops[NumVecs+4] = Pred;
- Ops[NumVecs+5] = PredReg;
- Ops[NumVecs+6] = Chain;
+ Ops[NumVecs+5] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+7);
+ MVT::Other, Ops.data(), NumVecs+6);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1224,8 +1208,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1259,12 +1243,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 9> Ops;
+ SmallVector<SDValue, 10> Ops;
Ops.push_back(MemAddr);
- Ops.push_back(MemUpdate);
- Ops.push_back(MemOpc);
Ops.push_back(Align);
unsigned Opc = 0;
@@ -1287,16 +1269,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
}
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0);
Ops.push_back(Chain);
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLdLn =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8);
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6);
// For a 64-bit vector load to D registers, nothing more needs to be done.
if (is64BitVector)
return VLdLn;
@@ -1859,37 +1841,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld3: {
unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
ARM::VLD3d32, ARM::VLD3d64 };
- unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a };
- unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b };
+ unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
+ ARM::VLD3q16_UPD,
+ ARM::VLD3q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
+ ARM::VLD3q16odd_UPD,
+ ARM::VLD3q32odd_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
ARM::VLD4d32, ARM::VLD4d64 };
- unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a };
- unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b };
+ unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
+ ARM::VLD4q16_UPD,
+ ARM::VLD4q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
+ ARM::VLD4q16odd_UPD,
+ ARM::VLD4q32odd_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
+ unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
+ unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld3lane: {
unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
+ unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
+ unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4lane: {
unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
+ unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
+ unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -1903,37 +1893,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst3: {
unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
ARM::VST3d32, ARM::VST3d64 };
- unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
- unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
+ unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
+ ARM::VST3q16_UPD,
+ ARM::VST3q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
+ ARM::VST3q16odd_UPD,
+ ARM::VST3q32odd_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
ARM::VST4d32, ARM::VST4d64 };
- unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
- unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
+ unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
+ ARM::VST4q16_UPD,
+ ARM::VST4q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
+ ARM::VST4q16odd_UPD,
+ ARM::VST4q32odd_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
- unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
- unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
+ unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
+ unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst3lane: {
unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
- unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
- unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
+ unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
+ unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4lane: {
unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
- unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
- unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
+ unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
+ unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8f20843..0d0a004 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -436,9 +436,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
}
// Special handling for half-precision FP.
- if (Subtarget->hasVFP3() && Subtarget->hasFP16()) {
- setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom);
- setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom);
+ if (!Subtarget->hasFP16()) {
+ setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+ setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
@@ -499,8 +499,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FTOUI: return "ARMISD::FTOUI";
case ARMISD::SITOF: return "ARMISD::SITOF";
case ARMISD::UITOF: return "ARMISD::UITOF";
- case ARMISD::F16_TO_F32: return "ARMISD::F16_TO_F32";
- case ARMISD::F32_TO_F16: return "ARMISD::F32_TO_F16";
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
@@ -1987,9 +1985,6 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default:
assert(0 && "Invalid opcode!");
- case ISD::FP32_TO_FP16:
- Opc = ARMISD::F32_TO_F16;
- break;
case ISD::FP_TO_SINT:
Opc = ARMISD::FTOSI;
break;
@@ -2009,9 +2004,6 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default:
assert(0 && "Invalid opcode!");
- case ISD::FP16_TO_FP32:
- Opc = ARMISD::F16_TO_F32;
- break;
case ISD::SINT_TO_FP:
Opc = ARMISD::SITOF;
break;
@@ -3078,10 +3070,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
- case ISD::FP16_TO_FP32:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
- case ISD::FP32_TO_FP16:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index d7b2ba3..f8f8adc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -59,8 +59,6 @@ namespace llvm {
FTOUI, // FP to uint within a FP register.
SITOF, // sint to FP within a FP register.
UITOF, // uint to FP within a FP register.
- F16_TO_F32, // Half FP to single FP within a FP register.
- F32_TO_F16, // Single FP to half FP within a FP register.
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 258a96b..4f6f05d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -15,8 +15,8 @@
// Format specifies the encoding used by the instruction. This is part of the
// ad-hoc solution used to emit machine instruction encodings by our machine
// code emitter.
-class Format<bits<5> val> {
- bits<5> Value = val;
+class Format<bits<6> val> {
+ bits<6> Value = val;
}
def Pseudo : Format<0>;
@@ -33,32 +33,34 @@ def LdMiscFrm : Format<8>;
def StMiscFrm : Format<9>;
def LdStMulFrm : Format<10>;
-def LdStExFrm : Format<28>;
+def LdStExFrm : Format<11>;
-def ArithMiscFrm : Format<11>;
-def ExtFrm : Format<12>;
+def ArithMiscFrm : Format<12>;
+def ExtFrm : Format<13>;
-def VFPUnaryFrm : Format<13>;
-def VFPBinaryFrm : Format<14>;
-def VFPConv1Frm : Format<15>;
-def VFPConv2Frm : Format<16>;
-def VFPConv3Frm : Format<17>;
-def VFPConv4Frm : Format<18>;
-def VFPConv5Frm : Format<19>;
-def VFPLdStFrm : Format<20>;
-def VFPLdStMulFrm : Format<21>;
-def VFPMiscFrm : Format<22>;
+def VFPUnaryFrm : Format<14>;
+def VFPBinaryFrm : Format<15>;
+def VFPConv1Frm : Format<16>;
+def VFPConv2Frm : Format<17>;
+def VFPConv3Frm : Format<18>;
+def VFPConv4Frm : Format<19>;
+def VFPConv5Frm : Format<20>;
+def VFPLdStFrm : Format<21>;
+def VFPLdStMulFrm : Format<22>;
+def VFPMiscFrm : Format<23>;
-def ThumbFrm : Format<23>;
+def ThumbFrm : Format<24>;
-def NEONFrm : Format<24>;
-def NEONGetLnFrm : Format<25>;
-def NEONSetLnFrm : Format<26>;
-def NEONDupFrm : Format<27>;
+def NEONFrm : Format<25>;
+def NEONGetLnFrm : Format<26>;
+def NEONSetLnFrm : Format<27>;
+def NEONDupFrm : Format<28>;
def MiscFrm : Format<29>;
def ThumbMiscFrm : Format<30>;
+def NLdStFrm : Format<31>;
+
// Misc flags.
// the instruction has a Rn register operand.
@@ -71,7 +73,7 @@ class UnaryDP { bit isUnaryDataProc = 1; }
class Xform16Bit { bit canXformTo16Bit = 1; }
//===----------------------------------------------------------------------===//
-// ARM Instruction flags. These need to match ARMInstrInfo.h.
+// ARM Instruction flags. These need to match ARMBaseInstrInfo.h.
//
// Addressing mode.
@@ -183,7 +185,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
bits<2> IndexModeBits = IM.Value;
Format F = f;
- bits<5> Form = F.Value;
+ bits<6> Form = F.Value;
Domain D = d;
bits<2> Dom = D.Value;
@@ -229,7 +231,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
@@ -257,7 +259,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
@@ -1007,8 +1009,8 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
- let OutOperandList = !con(oops, (ops s_cc_out:$s));
- let InOperandList = !con(iops, (ops pred:$p));
+ let OutOperandList = !con(oops, (outs s_cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
@@ -1030,7 +1032,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
string opc, string asm, string cstr, list<dag> pattern>
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
@@ -1109,7 +1111,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
@@ -1125,7 +1127,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
@@ -1209,7 +1211,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
@@ -1265,7 +1267,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [HasVFP2];
@@ -1464,11 +1466,12 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
// ARM NEON Instruction templates.
//
-class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
- string opc, string dt, string asm, string cstr, list<dag> pattern>
- : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(
!strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
!strconcat("\t", asm));
@@ -1481,7 +1484,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
@@ -1502,8 +1505,8 @@ class NI4<dag oops, dag iops, InstrItinClass itin, string opc,
class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr,
- pattern> {
+ : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+ cstr, pattern> {
let Inst{31-24} = 0b11110100;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
@@ -1513,7 +1516,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
class NDataI<dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm,
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm,
cstr, pattern> {
let Inst{31-25} = 0b1111001;
}
@@ -1621,7 +1624,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
let Inst{4} = 1;
let OutOperandList = oops;
- let InOperandList = !con(iops, (ops pred:$p));
+ let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(
!strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
!strconcat("\t", asm));
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 3fc37da..26a2806 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -62,11 +62,14 @@ def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ SDNPVariadic]>;
def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ SDNPVariadic]>;
def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ SDNPVariadic]>;
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInFlag]>;
@@ -282,7 +285,7 @@ def pclabel : Operand<i32> {
// shifter_operand operands: so_reg and so_imm.
def so_reg : Operand<i32>, // reg reg imm
- ComplexPattern<i32, 3, "SelectShifterOperandReg",
+ ComplexPattern<i32, 3, "SelectShifterOperandReg",
[shl,srl,sra,rotr]> {
let PrintMethod = "printSORegOperand";
let MIOperandInfo = (ops GPR, GPR, i32imm);
@@ -392,9 +395,14 @@ def addrmode5 : Operand<i32>,
// addrmode6 := reg with optional writeback
//
def addrmode6 : Operand<i32>,
- ComplexPattern<i32, 4, "SelectAddrMode6", []> {
+ ComplexPattern<i32, 2, "SelectAddrMode6", []> {
let PrintMethod = "printAddrMode6Operand";
- let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+}
+
+def am6offset : Operand<i32> {
+ let PrintMethod = "printAddrMode6OffsetOperand";
+ let MIOperandInfo = (ops GPR);
}
// addrmodepc := pc + reg
@@ -909,7 +917,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, LdStMulFrm, IIC_Br,
- "ldm${addr:submode}${p}\t$addr, $dsts",
+ "ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
// On non-Darwin platforms R9 is callee-saved.
@@ -1354,7 +1362,7 @@ def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, LdStMulFrm, IIC_iLoadm,
- "ldm${addr:submode}${p}\t$addr, $dsts",
+ "ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
} // mayLoad, hasExtraDefRegAllocReq
@@ -1367,7 +1375,7 @@ def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, LdStMulFrm, IIC_iStorem,
- "stm${addr:submode}${p}\t$addr, $srcs",
+ "stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>;
} // mayStore, hasExtraSrcRegAllocReq
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8fee6fa..c977cc3 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -138,214 +138,360 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
}
// VLD1 : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp>
+class VLD1D<bits<4> op7_4, string Dt, ValueType Ty>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- OpcodeStr, Dt, "\\{$dst\\}, $addr", "",
- [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp>
+ "vld1", Dt, "\\{$dst\\}, $addr", "",
+ [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
+class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty>
: NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- OpcodeStr, Dt, "${dst:dregpair}, $addr", "",
- [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
+ "vld1", Dt, "${dst:dregpair}, $addr", "",
+ [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
+
+def VLD1d8 : VLD1D<0b0000, "8", v8i8>;
+def VLD1d16 : VLD1D<0b0100, "16", v4i16>;
+def VLD1d32 : VLD1D<0b1000, "32", v2i32>;
+def VLD1df : VLD1D<0b1000, "32", v2f32>;
+def VLD1d64 : VLD1D<0b1100, "64", v1i64>;
+
+def VLD1q8 : VLD1Q<0b0000, "8", v16i8>;
+def VLD1q16 : VLD1Q<0b0100, "16", v8i16>;
+def VLD1q32 : VLD1Q<0b1000, "32", v4i32>;
+def VLD1qf : VLD1Q<0b1000, "32", v4f32>;
+def VLD1q64 : VLD1Q<0b1100, "64", v2i64>;
+
+let mayLoad = 1 in {
+
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+ "vld1", Dt, "\\{$dst\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+class VLD1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+ "vld1", Dt, "${dst:dregpair}, $addr$offset",
+ "$addr.addr = $wb", []>;
-def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>;
-def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>;
-def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>;
-def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>;
-def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>;
+def VLD1d8_UPD : VLD1DWB<0b0000, "8">;
+def VLD1d16_UPD : VLD1DWB<0b0100, "16">;
+def VLD1d32_UPD : VLD1DWB<0b1000, "32">;
+def VLD1d64_UPD : VLD1DWB<0b1100, "64">;
-def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>;
-def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>;
-def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
-def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
-def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
+def VLD1q8_UPD : VLD1QWB<0b0000, "8">;
+def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
+def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
+def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+} // mayLoad = 1
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// These (dreg triple/quadruple) are for disassembly only.
-class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+class VLD1D3<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
"\\{$dst1, $dst2, $dst3\\}, $addr", "",
[/* For disassembly only; pattern left blank */]>;
-class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+ (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
"\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
[/* For disassembly only; pattern left blank */]>;
-def VLD1d8T : VLD1D3<0b0000, "vld1", "8">;
-def VLD1d16T : VLD1D3<0b0100, "vld1", "16">;
-def VLD1d32T : VLD1D3<0b1000, "vld1", "32">;
-//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">;
-
-def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">;
-def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">;
-def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">;
-//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">;
+def VLD1d8T : VLD1D3<0b0000, "8">;
+def VLD1d16T : VLD1D3<0b0100, "16">;
+def VLD1d32T : VLD1D3<0b1000, "32">;
+// VLD1d64T : implemented as VLD3d64
+
+def VLD1d8Q : VLD1D4<0b0000, "8">;
+def VLD1d16Q : VLD1D4<0b0100, "16">;
+def VLD1d32Q : VLD1D4<0b1000, "32">;
+// VLD1d64Q : implemented as VLD4d64
+
+// ...with address register writeback:
+class VLD1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+ "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb",
+ [/* For disassembly only; pattern left blank */]>;
+class VLD1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+ "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
+ [/* For disassembly only; pattern left blank */]>;
+def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">;
+def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
+def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
+// VLD1d64T_UPD : implemented as VLD3d64_UPD
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
+// VLD1d64Q_UPD : implemented as VLD4d64_UPD
// VLD2 : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2),
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr), IIC_VLD2,
- OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
-class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b10,0b0011,op7_4,
+ "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+class VLD2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr), IIC_VLD2,
- OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
- "", []>;
+ "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
-def VLD2d8 : VLD2D<0b0000, "vld2", "8">;
-def VLD2d16 : VLD2D<0b0100, "vld2", "16">;
-def VLD2d32 : VLD2D<0b1000, "vld2", "32">;
+def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
+def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
+def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr), IIC_VLD1,
"vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>;
-def VLD2q8 : VLD2Q<0b0000, "vld2", "8">;
-def VLD2q16 : VLD2Q<0b0100, "vld2", "16">;
-def VLD2q32 : VLD2Q<0b1000, "vld2", "32">;
+def VLD2q8 : VLD2Q<0b0000, "8">;
+def VLD2q16 : VLD2Q<0b0100, "16">;
+def VLD2q32 : VLD2Q<0b1000, "32">;
-// These (double-spaced dreg pair) are for disassembly only.
-class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD2,
- OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+ "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+class VLD2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, 0b0011, op7_4,
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+ "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">;
-def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">;
-def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">;
+def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
+def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100,
+ (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+ "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+
+def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
+def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
+def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
+def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
+def VLD2b32 : VLD2D<0b1001, 0b1000, "32">;
+def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
// VLD3 : Vector Load (multiple 3-element structures)
-class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr), IIC_VLD3,
- OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
-class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr), IIC_VLD3,
- OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr",
- "$addr.addr = $wb", []>;
+ "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
-def VLD3d8 : VLD3D<0b0000, "vld3", "8">;
-def VLD3d16 : VLD3D<0b0100, "vld3", "16">;
-def VLD3d32 : VLD3D<0b1000, "vld3", "32">;
+def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
+def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
+def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr), IIC_VLD1,
"vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
-// vld3 to double-spaced even registers.
-def VLD3q8a : VLD3WB<0b0000, "vld3", "8">;
-def VLD3q16a : VLD3WB<0b0100, "vld3", "16">;
-def VLD3q32a : VLD3WB<0b1000, "vld3", "32">;
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
+ "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-// vld3 to double-spaced odd registers.
-def VLD3q8b : VLD3WB<0b0000, "vld3", "8">;
-def VLD3q16b : VLD3WB<0b0100, "vld3", "16">;
-def VLD3q32b : VLD3WB<0b1000, "vld3", "32">;
+def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100,
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+ "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
+def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
+def VLD3q32 : VLD3D<0b0101, 0b1000, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
+def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
+def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
// VLD4 : Vector Load (multiple 4-element structures)
-class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b10,0b0000,op7_4,
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr), IIC_VLD4,
- OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
- "", []>;
-class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b10,0b0001,op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr), IIC_VLD4,
- OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
- "$addr.addr = $wb", []>;
+ "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
-def VLD4d8 : VLD4D<0b0000, "vld4", "8">;
-def VLD4d16 : VLD4D<0b0100, "vld4", "16">;
-def VLD4d32 : VLD4D<0b1000, "vld4", "32">;
+def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
+def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
+def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr), IIC_VLD1,
"vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
"", []>;
-// vld4 to double-spaced even registers.
-def VLD4q8a : VLD4WB<0b0000, "vld4", "8">;
-def VLD4q16a : VLD4WB<0b0100, "vld4", "16">;
-def VLD4q32a : VLD4WB<0b1000, "vld4", "32">;
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
+ "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-// vld4 to double-spaced odd registers.
-def VLD4q8b : VLD4WB<0b0000, "vld4", "8">;
-def VLD4q16b : VLD4WB<0b0100, "vld4", "16">;
-def VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
+def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100,
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4,
+ GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+ "vld1", "64",
+ "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
+def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
+def VLD4q32 : VLD4D<0b0001, 0b1000, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
+def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
+def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
// VLD2LN : Vector Load (single 2-element structure to one lane)
-class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
- : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
- "$src1 = $dst1, $src2 = $dst2", []>;
+class VLD2LN<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
+ "$src1 = $dst1, $src2 = $dst2", []>;
+
+def VLD2LNd8 : VLD2LN<0b0001, "8">;
+def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; }
+def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; }
-// vld2 to single-spaced registers.
-def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; }
-def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; }
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
-// vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
-def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
-// vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
-def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
+ "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
+ "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
+
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">;
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; }
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; }
// VLD3LN : Vector Load (single 3-element structure to one lane)
-class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
- : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt,
- "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
- "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
-
-// vld3 to single-spaced registers.
-def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; }
-def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; }
-def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; }
-
-// vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
-
-// vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
+class VLD3LN<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
+ "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
+ "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+
+def VLD3LNd8 : VLD3LN<0b0010, "8"> { let Inst{4} = 0; }
+def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VLD3, "vld3", Dt,
+ "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
+ "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
+ []>;
+
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; }
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
// VLD4LN : Vector Load (single 4-element structure to one lane)
-class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
- : NLdSt<1,0b10,op11_8,{?,?,?,?},
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt,
+class VLD4LN<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
- "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+ "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
-// vld4 to single-spaced registers.
-def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; }
-def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; }
+def VLD4LNd8 : VLD4LN<0b0011, "8">;
+def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; }
+def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; }
-// vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
-def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
-// vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
-def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+ (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VLD4, "vld4", Dt,
+"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
+"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
+ []>;
+
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, "8">;
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; }
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; }
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
@@ -355,213 +501,353 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
} // mayLoad = 1, hasExtraDefRegAllocReq = 1
// VST1 : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp>
+class VST1D<bits<4> op7_4, string Dt, ValueType Ty>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
- OpcodeStr, Dt, "\\{$src\\}, $addr", "",
- [(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp>
+ "vst1", Dt, "\\{$src\\}, $addr", "",
+ [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>;
+class VST1Q<bits<4> op7_4, string Dt, ValueType Ty>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
- OpcodeStr, Dt, "${src:dregpair}, $addr", "",
- [(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
+ "vst1", Dt, "${src:dregpair}, $addr", "",
+ [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>;
let hasExtraSrcRegAllocReq = 1 in {
-def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>;
-def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>;
-def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>;
-def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>;
-def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>;
-
-def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>;
-def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>;
-def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>;
-def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
-def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
+def VST1d8 : VST1D<0b0000, "8", v8i8>;
+def VST1d16 : VST1D<0b0100, "16", v4i16>;
+def VST1d32 : VST1D<0b1000, "32", v2i32>;
+def VST1df : VST1D<0b1000, "32", v2f32>;
+def VST1d64 : VST1D<0b1100, "64", v1i64>;
+
+def VST1q8 : VST1Q<0b0000, "8", v16i8>;
+def VST1q16 : VST1Q<0b0100, "16", v8i16>;
+def VST1q32 : VST1Q<0b1000, "32", v4i32>;
+def VST1qf : VST1Q<0b1000, "32", v4f32>;
+def VST1q64 : VST1Q<0b1100, "64", v2i64>;
} // hasExtraSrcRegAllocReq
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
+ "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
+class VST1QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+ "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+
+def VST1d8_UPD : VST1DWB<0b0000, "8">;
+def VST1d16_UPD : VST1DWB<0b0100, "16">;
+def VST1d32_UPD : VST1DWB<0b1000, "32">;
+def VST1d64_UPD : VST1DWB<0b1100, "64">;
+
+def VST1q8_UPD : VST1QWB<0b0000, "8">;
+def VST1q16_UPD : VST1QWB<0b0100, "16">;
+def VST1q32_UPD : VST1QWB<0b1000, "32">;
+def VST1q64_UPD : VST1QWB<0b1100, "64">;
+
// These (dreg triple/quadruple) are for disassembly only.
-class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- OpcodeStr, Dt,
- "\\{$src1, $src2, $src3\\}, $addr", "",
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "",
[/* For disassembly only; pattern left blank */]>;
-class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, OpcodeStr, Dt,
- "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
[/* For disassembly only; pattern left blank */]>;
-def VST1d8T : VST1D3<0b0000, "vst1", "8">;
-def VST1d16T : VST1D3<0b0100, "vst1", "16">;
-def VST1d32T : VST1D3<0b1000, "vst1", "32">;
-//def VST1d64T : VST1D3<0b1100, "vst1", "64">;
-
-def VST1d8Q : VST1D4<0b0000, "vst1", "8">;
-def VST1d16Q : VST1D4<0b0100, "vst1", "16">;
-def VST1d32Q : VST1D4<0b1000, "vst1", "32">;
-//def VST1d64Q : VST1D4<0b1100, "vst1", "64">;
+def VST1d8T : VST1D3<0b0000, "8">;
+def VST1d16T : VST1D3<0b0100, "16">;
+def VST1d32T : VST1D3<0b1000, "32">;
+// VST1d64T : implemented as VST3d64
+
+def VST1d8Q : VST1D4<0b0000, "8">;
+def VST1d16Q : VST1D4<0b0100, "16">;
+def VST1d32Q : VST1D4<0b1000, "32">;
+// VST1d64Q : implemented as VST4d64
+
+// ...with address register writeback:
+class VST1D3WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3),
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
+ "$addr.addr = $wb",
+ [/* For disassembly only; pattern left blank */]>;
+class VST1D4WB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+ "$addr.addr = $wb",
+ [/* For disassembly only; pattern left blank */]>;
+def VST1d8T_UPD : VST1D3WB<0b0000, "8">;
+def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
+def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
+// VST1d64T_UPD : implemented as VST3d64_UPD
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+def VST1d8Q_UPD : VST1D4WB<0b0000, "8">;
+def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
+def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
+// VST1d64Q_UPD : implemented as VST4d64_UPD
// VST2 : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b00,0b1000,op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
-class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b00,0b0011,op7_4, (outs),
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2),
+ IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+class VST2Q<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
-def VST2d8 : VST2D<0b0000, "vst2", "8">;
-def VST2d16 : VST2D<0b0100, "vst2", "16">;
-def VST2d32 : VST2D<0b1000, "vst2", "32">;
+def VST2d8 : VST2D<0b1000, 0b0000, "8">;
+def VST2d16 : VST2D<0b1000, 0b0100, "16">;
+def VST2d32 : VST2D<0b1000, 0b1000, "32">;
def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
"vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>;
-def VST2q8 : VST2Q<0b0000, "vst2", "8">;
-def VST2q16 : VST2Q<0b0100, "vst2", "16">;
-def VST2q32 : VST2Q<0b1000, "vst2", "32">;
+def VST2q8 : VST2Q<0b0000, "8">;
+def VST2q16 : VST2Q<0b0100, "16">;
+def VST2q32 : VST2Q<0b1000, "32">;
-// These (double-spaced dreg pair) are for disassembly only.
-class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0, 0b00, 0b1001, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
+ IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+class VST2QWB<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">;
-def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">;
-def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">;
+def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
+def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2), IIC_VST,
+ "vst1", "64", "\\{$src1, $src2\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+
+def VST2q8_UPD : VST2QWB<0b0000, "8">;
+def VST2q16_UPD : VST2QWB<0b0100, "16">;
+def VST2q32_UPD : VST2QWB<0b1000, "32">;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8 : VST2D<0b1001, 0b0000, "8">;
+def VST2b16 : VST2D<0b1001, 0b0100, "16">;
+def VST2b32 : VST2D<0b1001, 0b1000, "32">;
+def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">;
// VST3 : Vector Store (multiple 3-element structures)
-class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b00,0b0100,op7_4, (outs),
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
-class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr",
- "$addr.addr = $wb", []>;
+ "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
-def VST3d8 : VST3D<0b0000, "vst3", "8">;
-def VST3d16 : VST3D<0b0100, "vst3", "16">;
-def VST3d32 : VST3D<0b1000, "vst3", "32">;
+def VST3d8 : VST3D<0b0100, 0b0000, "8">;
+def VST3d16 : VST3D<0b0100, 0b0100, "16">;
+def VST3d32 : VST3D<0b0100, 0b1000, "32">;
def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
IIC_VST,
"vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>;
-// vst3 to double-spaced even registers.
-def VST3q8a : VST3WB<0b0000, "vst3", "8">;
-def VST3q16a : VST3WB<0b0100, "vst3", "16">;
-def VST3q32a : VST3WB<0b1000, "vst3", "32">;
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+ "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-// vst3 to double-spaced odd registers.
-def VST3q8b : VST3WB<0b0000, "vst3", "8">;
-def VST3q16b : VST3WB<0b0100, "vst3", "16">;
-def VST3q32b : VST3WB<0b1000, "vst3", "32">;
+def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+ "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VST3q8 : VST3D<0b0101, 0b0000, "8">;
+def VST3q16 : VST3D<0b0101, 0b0100, "16">;
+def VST3q32 : VST3D<0b0101, 0b1000, "32">;
+def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">;
+def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
+def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
// VST4 : Vector Store (multiple 4-element structures)
-class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b00,0b0000,op7_4, (outs),
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
-class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
- : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
- "$addr.addr = $wb", []>;
-def VST4d8 : VST4D<0b0000, "vst4", "8">;
-def VST4d16 : VST4D<0b0100, "vst4", "16">;
-def VST4d32 : VST4D<0b1000, "vst4", "32">;
+def VST4d8 : VST4D<0b0000, 0b0000, "8">;
+def VST4d16 : VST4D<0b0000, 0b0100, "16">;
+def VST4d32 : VST4D<0b0000, 0b1000, "32">;
def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
DPR:$src4), IIC_VST,
"vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
-// vst4 to double-spaced even registers.
-def VST4q8a : VST4WB<0b0000, "vst4", "8">;
-def VST4q16a : VST4WB<0b0100, "vst4", "16">;
-def VST4q32a : VST4WB<0b1000, "vst4", "32">;
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
+ "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-// vst4 to double-spaced odd registers.
-def VST4q8b : VST4WB<0b0000, "vst4", "8">;
-def VST4q16b : VST4WB<0b0100, "vst4", "16">;
-def VST4q32b : VST4WB<0b1000, "vst4", "32">;
+def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
+ "vst1", "64",
+ "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VST4q8 : VST4D<0b0001, 0b0000, "8">;
+def VST4q16 : VST4D<0b0001, 0b0100, "16">;
+def VST4q32 : VST4D<0b0001, 0b1000, "32">;
+def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
+def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
+def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
// VST2LN : Vector Store (single 2-element structure from one lane)
-class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
- : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST2LN<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
+ IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
"", []>;
-// vst2 to single-spaced registers.
-def VST2LNd8 : VST2LN<0b0001, "vst2", "8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; }
-def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; }
+def VST2LNd8 : VST2LN<0b0001, "8">;
+def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; }
+def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; }
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
-// vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
-def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
+ "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-// vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
-def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
+def VST2LNd8_UPD : VST2LNWB<0b0001, "8">;
+def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; }
+def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; }
// VST3LN : Vector Store (single 3-element structure from one lane)
-class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
- : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST3LN<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+ nohash_imm:$lane), IIC_VST, "vst3", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
-// vst3 to single-spaced registers.
-def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; }
-def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; }
-def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; }
+def VST3LNd8 : VST3LN<0b0010, "8"> { let Inst{4} = 0; }
+def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VST, "vst3", Dt,
+ "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-// vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
+def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; }
+def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
-// vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
// VST4LN : Vector Store (single 4-element structure from one lane)
-class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
- : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST4LN<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+ nohash_imm:$lane), IIC_VST, "vst4", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
"", []>;
-// vst4 to single-spaced registers.
-def VST4LNd8 : VST4LN<0b0011, "vst4", "8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; }
-def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; }
+def VST4LNd8 : VST4LN<0b0011, "8">;
+def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; }
+def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; }
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, string Dt>
+ : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VST, "vst4", Dt,
+ "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
-// vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
-def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
+def VST4LNd8_UPD : VST4LNWB<0b0011, "8">;
+def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; }
+def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; }
-// vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
-def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
+def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; }
} // mayStore = 1, hasExtraSrcRegAllocReq = 1
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 37c9fc5..e3ca536 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -16,7 +16,8 @@
//
def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ SDNPVariadic]>;
def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
@@ -549,7 +550,7 @@ def tLDM : T1I<(outs),
def tLDM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
IIC_iLoadm,
- "ldm${addr:submode}${p}\t$addr, $dsts",
+ "ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
} // mayLoad, hasExtraDefRegAllocReq
@@ -558,7 +559,7 @@ let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
def tSTM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
IIC_iStorem,
- "stm${addr:submode}${p}\t$addr, $srcs",
+ "stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index ab9e926..262aae4 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1218,7 +1218,7 @@ def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops), IIC_iLoadm,
- "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+ "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
@@ -1244,7 +1244,7 @@ def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IIC_iStorem,
- "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs",
+ "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 4d1d48a..aca8230 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -25,8 +25,6 @@ def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
-def arm_f16tof32 : SDNode<"ARMISD::F16_TO_F32", SDT_ITOF>;
-def arm_f32tof16 : SDNode<"ARMISD::F32_TO_F16", SDT_FTOI>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
@@ -94,7 +92,7 @@ def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+ "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
"$addr.base = $wb", []> {
let Inst{20} = 1;
}
@@ -102,7 +100,7 @@ def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+ "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
"$addr.base = $wb", []> {
let Inst{20} = 1;
}
@@ -124,7 +122,7 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+ "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
"$addr.base = $wb", []> {
let Inst{20} = 0;
}
@@ -132,7 +130,7 @@ def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+ "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
"$addr.base = $wb", []> {
let Inst{20} = 0;
}
@@ -259,11 +257,17 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
- [(set SPR:$dst, (f32 (arm_f32tof16 SPR:$a)))]>;
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
- [(set SPR:$dst, (arm_f16tof32 SPR:$a))]>;
+ [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 8fbcf45..bdbec30 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -253,7 +253,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
.addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
: BuildMI(MBB, MBBI, dl, TII->get(Opcode))
.addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+ .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
.addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
@@ -505,11 +505,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
if (MI->getOperand(i).getReg() == Base)
return false;
}
- assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()));
Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
} else {
// VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
- assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()));
Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
}
@@ -573,11 +571,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
.addReg(Base, getKillRegState(BaseKill));
if (isAM4) {
// [t2]LDM_UPD, [t2]STM_UPD
- MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true))
+ MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
.addImm(Pred).addReg(PredReg);
} else {
// VLDM[SD}_UPD, VSTM[SD]_UPD
- MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset))
+ MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
.addImm(Pred).addReg(PredReg);
}
// Transfer the rest of operands.
@@ -709,7 +707,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned Offset = 0;
if (isAM5)
Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
- true, (isDPR ? 2 : 1));
+ (isDPR ? 2 : 1));
else if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
else
@@ -1157,19 +1155,24 @@ namespace {
};
}
-/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
-/// (bx lr) into the preceeding stack restore so it directly restore the value
-/// of LR into pc.
-/// ldmfd sp!, {r7, lr}
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// directly restore the value of LR into pc.
+/// ldmfd sp!, {..., lr}
/// bx lr
+/// or
+/// ldmfd sp!, {..., lr}
+/// mov pc, lr
/// =>
-/// ldmfd sp!, {r7, pc}
+/// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = prior(MBB.end());
if (MBBI != MBB.begin() &&
- (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
+ (MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::MOVPCLR)) {
MachineInstr *PrevMI = prior(MBBI);
if (PrevMI->getOpcode() == ARM::LDM_UPD ||
PrevMI->getOpcode() == ARM::t2LDM_UPD) {
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7233f5c..95f57b7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -21,7 +21,7 @@
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 88e67e3..c32f16c 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -23,6 +23,7 @@
#include "ARMISelLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/OwningPtr.h"
namespace llvm {
@@ -83,7 +84,8 @@ public:
/// Thumb-1 and Thumb-2.
///
class ThumbTargetMachine : public ARMBaseTargetMachine {
- ARMBaseInstrInfo *InstrInfo; // either Thumb1InstrInfo or Thumb2InstrInfo
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ OwningPtr<ARMBaseInstrInfo> InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
ARMTargetLowering TLInfo;
public:
@@ -100,7 +102,9 @@ public:
}
/// returns either Thumb1InstrInfo or Thumb2InstrInfo
- virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; }
+ virtual const ARMBaseInstrInfo *getInstrInfo() const {
+ return InstrInfo.get();
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
};
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 4db14a3..4a7a1e4 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -101,6 +101,7 @@ namespace {
void printAddrMode5Operand(const MachineInstr *MI, int OpNum,
const char *Modifier = 0);
void printAddrMode6Operand(const MachineInstr *MI, int OpNum);
+ void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum);
void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
const char *Modifier = 0);
void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
@@ -431,16 +432,16 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
O << "[" << getRegisterName(MO1.getReg());
if (!MO2.getReg()) {
- if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
O << ", #"
- << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
<< ARM_AM::getAM2Offset(MO3.getImm());
O << "]";
return;
}
O << ", "
- << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
<< getRegisterName(MO2.getReg());
if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
@@ -458,12 +459,12 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
assert(ImmOffs && "Malformed indexed load / store!");
O << "#"
- << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
<< ImmOffs;
return;
}
- O << (char)ARM_AM::getAM2Op(MO2.getImm())
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
<< getRegisterName(MO1.getReg());
if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
@@ -490,7 +491,7 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
O << ", #"
- << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
<< ImmOffs;
O << "]";
}
@@ -508,35 +509,22 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
assert(ImmOffs && "Malformed indexed load / store!");
O << "#"
- << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
<< ImmOffs;
}
void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
const char *Modifier) {
- const MachineOperand &MO1 = MI->getOperand(Op);
const MachineOperand &MO2 = MI->getOperand(Op+1);
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
if (Modifier && strcmp(Modifier, "submode") == 0) {
- if (MO1.getReg() == ARM::SP) {
- // FIXME
- bool isLDM = (MI->getOpcode() == ARM::LDM ||
- MI->getOpcode() == ARM::LDM_UPD ||
- MI->getOpcode() == ARM::LDM_RET ||
- MI->getOpcode() == ARM::t2LDM ||
- MI->getOpcode() == ARM::t2LDM_UPD ||
- MI->getOpcode() == ARM::t2LDM_RET);
- O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
- } else
- O << ARM_AM::getAMSubModeStr(Mode);
+ O << ARM_AM::getAMSubModeStr(Mode);
} else if (Modifier && strcmp(Modifier, "wide") == 0) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
if (Mode == ARM_AM::ia)
O << ".w";
} else {
printOperand(MI, Op);
- if (ARM_AM::getAM4WBFlag(MO2.getImm()))
- O << "!";
}
}
@@ -559,8 +547,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
} else if (Modifier && strcmp(Modifier, "base") == 0) {
// Used for FSTM{D|S} and LSTM{D|S} operations.
O << getRegisterName(MO1.getReg());
- if (ARM_AM::getAM5WBFlag(MO2.getImm()))
- O << "!";
return;
}
@@ -568,7 +554,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
O << ", #"
- << (char)ARM_AM::getAM5Op(MO2.getImm())
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
<< ImmOffs*4;
}
O << "]";
@@ -577,22 +563,21 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
const MachineOperand &MO1 = MI->getOperand(Op);
const MachineOperand &MO2 = MI->getOperand(Op+1);
- const MachineOperand &MO3 = MI->getOperand(Op+2);
- const MachineOperand &MO4 = MI->getOperand(Op+3);
O << "[" << getRegisterName(MO1.getReg());
- if (MO4.getImm()) {
+ if (MO2.getImm()) {
// FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << MO4.getImm();
+ O << ", :" << MO2.getImm();
}
O << "]";
+}
- if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
- if (MO2.getReg() == 0)
- O << "!";
- else
- O << ", " << getRegisterName(MO2.getReg());
- }
+void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO = MI->getOperand(Op);
+ if (MO.getReg() == 0)
+ O << "!";
+ else
+ O << ", " << getRegisterName(MO.getReg());
}
void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
@@ -604,7 +589,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
const MachineOperand &MO1 = MI->getOperand(Op);
assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
- O << "[pc, +" << getRegisterName(MO1.getReg()) << "]";
+ O << "[pc, " << getRegisterName(MO1.getReg()) << "]";
}
void
@@ -627,10 +612,11 @@ void
ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned Mask = MI->getOperand(Op).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
unsigned NumTZ = CountTrailingZeros_32(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
- bool T = (Mask & (1 << Pos)) == 0;
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
if (T)
O << 't';
else
@@ -662,7 +648,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
if (MO3.getReg())
O << ", " << getRegisterName(MO3.getReg());
else if (unsigned ImmOffs = MO2.getImm())
- O << ", #+" << ImmOffs * Scale;
+ O << ", #" << ImmOffs * Scale;
O << "]";
}
@@ -684,7 +670,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
const MachineOperand &MO2 = MI->getOperand(Op+1);
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = MO2.getImm())
- O << ", #+" << ImmOffs*4;
+ O << ", #" << ImmOffs*4;
O << "]";
}
@@ -720,7 +706,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
unsigned OffImm = MO2.getImm();
if (OffImm) // Don't print +0.
- O << ", #+" << OffImm;
+ O << ", #" << OffImm;
O << "]";
}
@@ -736,7 +722,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
if (OffImm < 0)
O << ", #-" << -OffImm;
else if (OffImm > 0)
- O << ", #+" << OffImm;
+ O << ", #" << OffImm;
O << "]";
}
@@ -752,7 +738,7 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
if (OffImm < 0)
O << ", #-" << -OffImm * 4;
else if (OffImm > 0)
- O << ", #+" << OffImm * 4;
+ O << ", #" << OffImm * 4;
O << "]";
}
@@ -764,7 +750,7 @@ void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
if (OffImm < 0)
O << "#-" << -OffImm;
else if (OffImm > 0)
- O << "#+" << OffImm;
+ O << "#" << OffImm;
}
void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index a2084b0..30763a9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -28,7 +28,159 @@ using namespace llvm;
#undef MachineInstr
#undef ARMAsmPrinter
-void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+static unsigned NextReg(unsigned Reg) {
+ switch (Reg) {
+ default:
+ assert(0 && "Unexpected register enum");
+
+ case ARM::D0:
+ return ARM::D1;
+ case ARM::D1:
+ return ARM::D2;
+ case ARM::D2:
+ return ARM::D3;
+ case ARM::D3:
+ return ARM::D4;
+ case ARM::D4:
+ return ARM::D5;
+ case ARM::D5:
+ return ARM::D6;
+ case ARM::D6:
+ return ARM::D7;
+ case ARM::D7:
+ return ARM::D8;
+ case ARM::D8:
+ return ARM::D9;
+ case ARM::D9:
+ return ARM::D10;
+ case ARM::D10:
+ return ARM::D11;
+ case ARM::D11:
+ return ARM::D12;
+ case ARM::D12:
+ return ARM::D13;
+ case ARM::D13:
+ return ARM::D14;
+ case ARM::D14:
+ return ARM::D15;
+ case ARM::D15:
+ return ARM::D16;
+ case ARM::D16:
+ return ARM::D17;
+ case ARM::D17:
+ return ARM::D18;
+ case ARM::D18:
+ return ARM::D19;
+ case ARM::D19:
+ return ARM::D20;
+ case ARM::D20:
+ return ARM::D21;
+ case ARM::D21:
+ return ARM::D22;
+ case ARM::D22:
+ return ARM::D23;
+ case ARM::D23:
+ return ARM::D24;
+ case ARM::D24:
+ return ARM::D25;
+ case ARM::D25:
+ return ARM::D26;
+ case ARM::D26:
+ return ARM::D27;
+ case ARM::D27:
+ return ARM::D28;
+ case ARM::D28:
+ return ARM::D29;
+ case ARM::D29:
+ return ARM::D30;
+ case ARM::D30:
+ return ARM::D31;
+ }
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI) {
+ // Check for MOVs and print canonical forms, instead.
+ if (MI->getOpcode() == ARM::MOVs) {
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+ const MCOperand &MO3 = MI->getOperand(3);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+ printSBitModifierOperand(MI, 6);
+ printPredicateOperand(MI, 4);
+
+ O << '\t' << getRegisterName(Dst.getReg())
+ << ", " << getRegisterName(MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+ return;
+
+ O << ", ";
+
+ if (MO2.getReg()) {
+ O << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else {
+ O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+ return;
+ }
+
+ // A8.6.123 PUSH
+ if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ const MCOperand &MO1 = MI->getOperand(2);
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 3);
+ O << '\t';
+ printRegisterList(MI, 5);
+ return;
+ }
+ }
+
+ // A8.6.122 POP
+ if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ const MCOperand &MO1 = MI->getOperand(2);
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 3);
+ O << '\t';
+ printRegisterList(MI, 5);
+ return;
+ }
+ }
+
+ // A8.6.355 VPUSH
+ if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ const MCOperand &MO1 = MI->getOperand(2);
+ if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 3);
+ O << '\t';
+ printRegisterList(MI, 5);
+ return;
+ }
+ }
+
+ // A8.6.354 VPOP
+ if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
+ MI->getOperand(0).getReg() == ARM::SP) {
+ const MCOperand &MO1 = MI->getOperand(2);
+ if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 3);
+ O << '\t';
+ printRegisterList(MI, 5);
+ return;
+ }
+ }
+
+ printInstruction(MI);
+ }
void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const char *Modifier) {
@@ -36,6 +188,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
unsigned Reg = Op.getReg();
if (Modifier && strcmp(Modifier, "dregpair") == 0) {
+ O << '{' << getRegisterName(Reg) << ", "
+ << getRegisterName(NextReg(Reg)) << '}';
+#if 0
// FIXME: Breaks e.g. ARM/vmul.ll.
assert(0);
/*
@@ -44,6 +199,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << '{'
<< getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
<< '}';*/
+#endif
} else if (Modifier && strcmp(Modifier, "lane") == 0) {
assert(0);
/*
@@ -56,7 +212,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << getRegisterName(Reg);
}
} else if (Op.isImm()) {
- assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ assert((Modifier && !strcmp(Modifier, "call")) ||
+ ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
O << '#' << Op.getImm();
} else {
assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
@@ -142,17 +299,17 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) {
O << "[" << getRegisterName(MO1.getReg());
if (!MO2.getReg()) {
- if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
O << ", #"
- << (char)ARM_AM::getAM2Op(MO3.getImm())
- << ARM_AM::getAM2Offset(MO3.getImm());
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm());
O << "]";
return;
}
O << ", "
- << (char)ARM_AM::getAM2Op(MO3.getImm())
- << getRegisterName(MO2.getReg());
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg());
if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
O << ", "
@@ -169,11 +326,14 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
if (!MO1.getReg()) {
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
assert(ImmOffs && "Malformed indexed load / store!");
- O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs;
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs;
return;
}
- O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg());
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << getRegisterName(MO1.getReg());
if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
O << ", "
@@ -196,8 +356,8 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) {
if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
O << ", #"
- << (char)ARM_AM::getAM3Op(MO3.getImm())
- << ImmOffs;
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs;
O << ']';
}
@@ -214,35 +374,24 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
assert(ImmOffs && "Malformed indexed load / store!");
- O << "#"
- << (char)ARM_AM::getAM3Op(MO2.getImm())
- << ImmOffs;
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << ImmOffs;
}
void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum,
const char *Modifier) {
- const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
if (Modifier && strcmp(Modifier, "submode") == 0) {
- if (MO1.getReg() == ARM::SP) {
- // FIXME
- bool isLDM = (MI->getOpcode() == ARM::LDM ||
- MI->getOpcode() == ARM::LDM_RET ||
- MI->getOpcode() == ARM::t2LDM ||
- MI->getOpcode() == ARM::t2LDM_RET);
- O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
- } else
- O << ARM_AM::getAMSubModeStr(Mode);
+ O << ARM_AM::getAMSubModeStr(Mode);
} else if (Modifier && strcmp(Modifier, "wide") == 0) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
if (Mode == ARM_AM::ia)
O << ".w";
} else {
printOperand(MI, OpNum);
- if (ARM_AM::getAM4WBFlag(MO2.getImm()))
- O << "!";
}
}
@@ -263,8 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
} else if (Modifier && strcmp(Modifier, "base") == 0) {
// Used for FSTM{D|S} and LSTM{D|S} operations.
O << getRegisterName(MO1.getReg());
- if (ARM_AM::getAM5WBFlag(MO2.getImm()))
- O << "!";
return;
}
@@ -272,7 +419,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
O << ", #"
- << (char)ARM_AM::getAM5Op(MO2.getImm())
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
<< ImmOffs*4;
}
O << "]";
@@ -281,17 +428,22 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- const MCOperand &MO3 = MI->getOperand(OpNum+2);
- // FIXME: No support yet for specifying alignment.
- O << '[' << getRegisterName(MO1.getReg()) << ']';
-
- if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
- if (MO2.getReg() == 0)
- O << '!';
- else
- O << ", " << getRegisterName(MO2.getReg());
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO2.getImm()) {
+ // FIXME: Both darwin as and GNU as violate ARM docs here.
+ O << ", :" << MO2.getImm();
}
+ O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+ unsigned OpNum) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getReg() == 0)
+ O << "!";
+ else
+ O << ", " << getRegisterName(MO.getReg());
}
void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
@@ -311,14 +463,56 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) {
O << "{";
- // Always skip the first operand, it's the optional (and implicit writeback).
- for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) {
- if (i != OpNum+1) O << ", ";
+ for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != OpNum) O << ", ";
O << getRegisterName(MI->getOperand(i).getReg());
}
O << "}";
}
+void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned option = Op.getImm();
+ unsigned mode = option & 31;
+ bool changemode = option >> 5 & 1;
+ unsigned AIF = option >> 6 & 7;
+ unsigned imod = option >> 9 & 3;
+ if (imod == 2)
+ O << "ie";
+ else if (imod == 3)
+ O << "id";
+ O << '\t';
+ if (imod > 1) {
+ if (AIF & 4) O << 'a';
+ if (AIF & 2) O << 'i';
+ if (AIF & 1) O << 'f';
+ if (AIF > 0 && changemode) O << ", ";
+ }
+ if (changemode)
+ O << '#' << mode;
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned Mask = Op.getImm();
+ if (Mask) {
+ O << '_';
+ if (Mask & 8) O << 'f';
+ if (Mask & 4) O << 's';
+ if (Mask & 2) O << 'x';
+ if (Mask & 1) O << 'c';
+ }
+}
+
+void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum){
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << '#';
+ if (Op.getImm() < 0)
+ O << '-' << (-Op.getImm() - 1);
+ else
+ O << Op.getImm();
+}
+
void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
if (CC != ARMCC::AL)
@@ -360,3 +554,191 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) {
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) {
O << "#" << MI->getOperand(OpNum).getImm() * 4;
}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned Mask = MI->getOperand(OpNum).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ O << 't';
+ else
+ O << 'e';
+ }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op)
+{
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg());
+ O << ", " << getRegisterName(MO2.getReg()) << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op,
+ unsigned Scale) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO3.getReg())
+ O << ", " << getRegisterName(MO3.getReg());
+ else if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs * Scale;
+ O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op)
+{
+ printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op)
+{
+ printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op)
+{
+ printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI,unsigned Op) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs*4;
+ O << "]";
+}
+
+void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum) {
+ O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
+ if (MI->getOpcode() == ARM::t2TBH)
+ O << ", lsl #1";
+ O << ']';
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ unsigned Reg = MO1.getReg();
+ O << getRegisterName(Reg);
+
+ // Print the shift opc.
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+ << " ";
+
+ assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+ O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
+ unsigned OpNum) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ unsigned OffImm = MO2.getImm();
+ if (OffImm) // Don't print +0.
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+ unsigned OpNum) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+ unsigned OpNum) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ // Don't print +0.
+ if (OffImm < 0)
+ O << ", #-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << ", #" << OffImm * 4;
+ O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+ unsigned OpNum) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+ // Don't print +0.
+ if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else if (OffImm > 0)
+ O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+ unsigned OpNum) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ // Don't print +0.
+ if (OffImm < 0)
+ O << "#-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+ unsigned OpNum) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+ O << "[" << getRegisterName(MO1.getReg());
+
+ assert(MO2.getReg() && "Invalid so_reg load / store address!");
+ O << ", " << getRegisterName(MO2.getReg());
+
+ unsigned ShAmt = MO3.getImm();
+ if (ShAmt) {
+ assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+ O << ", lsl #" << ShAmt;
+ }
+ O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum) {
+ O << '#' << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum) {
+ O << '#' << MI->getOperand(OpNum).getImm();
+}
+
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 9a3cbc3..d41b5df 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -48,32 +48,33 @@ public:
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
const char *Modifier = 0);
void printAddrMode6Operand(const MCInst *MI, unsigned OpNum);
+ void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum);
void printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
const char *Modifier = 0);
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum);
- void printThumbITMask(const MCInst *MI, unsigned OpNum) {}
- void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {}
+ void printThumbITMask(const MCInst *MI, unsigned OpNum);
+ void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum);
void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum,
- unsigned Scale) {}
- void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {}
- void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {}
- void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {}
- void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {}
+ unsigned Scale);
+ void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum);
+ void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum);
+ void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum);
+ void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum);
- void printT2SOOperand(const MCInst *MI, unsigned OpNum) {}
- void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {}
- void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {}
- void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {}
- void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {}
- void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum) {}
- void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {}
+ void printT2SOOperand(const MCInst *MI, unsigned OpNum);
+ void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum);
+ void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum);
+ void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum);
+ void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum);
+ void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum);
+ void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum);
- void printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {}
- void printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {}
- void printNegZeroOperand(const MCInst *MI, unsigned OpNum) {}
+ void printCPSOptionOperand(const MCInst *MI, unsigned OpNum);
+ void printMSRMaskOperand(const MCInst *MI, unsigned OpNum);
+ void printNegZeroOperand(const MCInst *MI, unsigned OpNum);
void printPredicateOperand(const MCInst *MI, unsigned OpNum);
void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum);
void printSBitModifierOperand(const MCInst *MI, unsigned OpNum);
@@ -82,10 +83,10 @@ public:
const char *Modifier);
void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {}
void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {}
- void printTBAddrMode(const MCInst *MI, unsigned OpNum) {}
+ void printTBAddrMode(const MCInst *MI, unsigned OpNum);
void printNoHashImmediate(const MCInst *MI, unsigned OpNum);
- void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {}
- void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {}
+ void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum);
+ void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum);
void printHex8ImmOperand(const MCInst *MI, int OpNum) {}
void printHex16ImmOperand(const MCInst *MI, int OpNum) {}
void printHex32ImmOperand(const MCInst *MI, int OpNum) {}
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index d9942c8..c36fe63 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -64,16 +64,16 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VLD2LNq16a:
- case ARM::VLD2LNq32a:
+ case ARM::VLD2LNq16:
+ case ARM::VLD2LNq32:
FirstOpnd = 0;
NumRegs = 2;
Offset = 0;
Stride = 2;
return true;
- case ARM::VLD2LNq16b:
- case ARM::VLD2LNq32b:
+ case ARM::VLD2LNq16odd:
+ case ARM::VLD2LNq32odd:
FirstOpnd = 0;
NumRegs = 2;
Offset = 1;
@@ -91,34 +91,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VLD3q8a:
- case ARM::VLD3q16a:
- case ARM::VLD3q32a:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
FirstOpnd = 0;
NumRegs = 3;
Offset = 0;
Stride = 2;
return true;
- case ARM::VLD3q8b:
- case ARM::VLD3q16b:
- case ARM::VLD3q32b:
+ case ARM::VLD3q8odd_UPD:
+ case ARM::VLD3q16odd_UPD:
+ case ARM::VLD3q32odd_UPD:
FirstOpnd = 0;
NumRegs = 3;
Offset = 1;
Stride = 2;
return true;
- case ARM::VLD3LNq16a:
- case ARM::VLD3LNq32a:
+ case ARM::VLD3LNq16:
+ case ARM::VLD3LNq32:
FirstOpnd = 0;
NumRegs = 3;
Offset = 0;
Stride = 2;
return true;
- case ARM::VLD3LNq16b:
- case ARM::VLD3LNq32b:
+ case ARM::VLD3LNq16odd:
+ case ARM::VLD3LNq32odd:
FirstOpnd = 0;
NumRegs = 3;
Offset = 1;
@@ -136,34 +136,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VLD4q8a:
- case ARM::VLD4q16a:
- case ARM::VLD4q32a:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
FirstOpnd = 0;
NumRegs = 4;
Offset = 0;
Stride = 2;
return true;
- case ARM::VLD4q8b:
- case ARM::VLD4q16b:
- case ARM::VLD4q32b:
+ case ARM::VLD4q8odd_UPD:
+ case ARM::VLD4q16odd_UPD:
+ case ARM::VLD4q32odd_UPD:
FirstOpnd = 0;
NumRegs = 4;
Offset = 1;
Stride = 2;
return true;
- case ARM::VLD4LNq16a:
- case ARM::VLD4LNq32a:
+ case ARM::VLD4LNq16:
+ case ARM::VLD4LNq32:
FirstOpnd = 0;
NumRegs = 4;
Offset = 0;
Stride = 2;
return true;
- case ARM::VLD4LNq16b:
- case ARM::VLD4LNq32b:
+ case ARM::VLD4LNq16odd:
+ case ARM::VLD4LNq32odd:
FirstOpnd = 0;
NumRegs = 4;
Offset = 1;
@@ -177,28 +177,28 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
- FirstOpnd = 4;
+ FirstOpnd = 2;
NumRegs = 2;
return true;
case ARM::VST2q8:
case ARM::VST2q16:
case ARM::VST2q32:
- FirstOpnd = 4;
+ FirstOpnd = 2;
NumRegs = 4;
return true;
- case ARM::VST2LNq16a:
- case ARM::VST2LNq32a:
- FirstOpnd = 4;
+ case ARM::VST2LNq16:
+ case ARM::VST2LNq32:
+ FirstOpnd = 2;
NumRegs = 2;
Offset = 0;
Stride = 2;
return true;
- case ARM::VST2LNq16b:
- case ARM::VST2LNq32b:
- FirstOpnd = 4;
+ case ARM::VST2LNq16odd:
+ case ARM::VST2LNq32odd:
+ FirstOpnd = 2;
NumRegs = 2;
Offset = 1;
Stride = 2;
@@ -211,39 +211,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
- FirstOpnd = 4;
+ FirstOpnd = 2;
NumRegs = 3;
return true;
- case ARM::VST3q8a:
- case ARM::VST3q16a:
- case ARM::VST3q32a:
- FirstOpnd = 5;
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ FirstOpnd = 4;
NumRegs = 3;
Offset = 0;
Stride = 2;
return true;
- case ARM::VST3q8b:
- case ARM::VST3q16b:
- case ARM::VST3q32b:
- FirstOpnd = 5;
+ case ARM::VST3q8odd_UPD:
+ case ARM::VST3q16odd_UPD:
+ case ARM::VST3q32odd_UPD:
+ FirstOpnd = 4;
NumRegs = 3;
Offset = 1;
Stride = 2;
return true;
- case ARM::VST3LNq16a:
- case ARM::VST3LNq32a:
- FirstOpnd = 4;
+ case ARM::VST3LNq16:
+ case ARM::VST3LNq32:
+ FirstOpnd = 2;
NumRegs = 3;
Offset = 0;
Stride = 2;
return true;
- case ARM::VST3LNq16b:
- case ARM::VST3LNq32b:
- FirstOpnd = 4;
+ case ARM::VST3LNq16odd:
+ case ARM::VST3LNq32odd:
+ FirstOpnd = 2;
NumRegs = 3;
Offset = 1;
Stride = 2;
@@ -256,39 +256,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
- FirstOpnd = 4;
+ FirstOpnd = 2;
NumRegs = 4;
return true;
- case ARM::VST4q8a:
- case ARM::VST4q16a:
- case ARM::VST4q32a:
- FirstOpnd = 5;
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ FirstOpnd = 4;
NumRegs = 4;
Offset = 0;
Stride = 2;
return true;
- case ARM::VST4q8b:
- case ARM::VST4q16b:
- case ARM::VST4q32b:
- FirstOpnd = 5;
+ case ARM::VST4q8odd_UPD:
+ case ARM::VST4q16odd_UPD:
+ case ARM::VST4q32odd_UPD:
+ FirstOpnd = 4;
NumRegs = 4;
Offset = 1;
Stride = 2;
return true;
- case ARM::VST4LNq16a:
- case ARM::VST4LNq32a:
- FirstOpnd = 4;
+ case ARM::VST4LNq16:
+ case ARM::VST4LNq32:
+ FirstOpnd = 2;
NumRegs = 4;
Offset = 0;
Stride = 2;
return true;
- case ARM::VST4LNq16b:
- case ARM::VST4LNq32b:
- FirstOpnd = 4;
+ case ARM::VST4LNq16odd:
+ case ARM::VST4LNq32odd:
+ FirstOpnd = 2;
NumRegs = 4;
Offset = 1;
Stride = 2;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index f5ba155..f36d4ef 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -78,14 +78,16 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
DebugLoc ndl = NMI->getDebugLoc();
unsigned NPredReg = 0;
ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
- if (NCC == OCC) {
- Mask |= (1 << Pos);
- } else if (NCC != CC)
+ if (NCC == CC || NCC == OCC)
+ Mask |= (NCC & 1) << Pos;
+ else
break;
--Pos;
++MBBI;
}
Mask |= (1 << Pos);
+ // Tag along (firstcond[0] << 4) with the mask.
+ Mask |= (CC & 1) << 4;
MIB.addImm(Mask);
Modified = true;
++NumITs;
OpenPOWER on IntegriCloud