diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-03-21 10:49:05 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-03-21 10:49:05 +0000 |
commit | 2f2afc1aae898651e26987a5c71f3febb19bca98 (patch) | |
tree | 2caca31db4facdc95c23930c0c745c8ef0dee97d /lib/Target/ARM | |
parent | 0f448b841684305c051796982f300c9bff959307 (diff) | |
download | FreeBSD-src-2f2afc1aae898651e26987a5c71f3febb19bca98.zip FreeBSD-src-2f2afc1aae898651e26987a5c71f3febb19bca98.tar.gz |
Update LLVM to r99115.
Diffstat (limited to 'lib/Target/ARM')
22 files changed, 1368 insertions, 726 deletions
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index ddeb1b9..ea62c33 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -35,6 +35,10 @@ namespace ARM_AM { add = '+', sub = '-' }; + static inline const char *getAddrOpcStr(AddrOpc Op) { + return Op == sub ? "-" : ""; + } + static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { default: assert(0 && "Unknown shift opc!"); @@ -78,16 +82,6 @@ namespace ARM_AM { } } - static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { - switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); - case ARM_AM::ia: return isLD ? "fd" : "ea"; - case ARM_AM::ib: return isLD ? "ed" : "fa"; - case ARM_AM::da: return isLD ? "fa" : "ed"; - case ARM_AM::db: return isLD ? "ea" : "fd"; - } - } - /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. /// static inline unsigned rotr32(unsigned Val, unsigned Amt) { @@ -473,20 +467,13 @@ namespace ARM_AM { // IB - Increment before // DA - Decrement after // DB - Decrement before - // - // If the 4th bit (writeback)is set, then the base register is updated after - // the memory transfer. static inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); } - static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) { - return (int)SubMode | ((int)WB << 3); - } - - static inline bool getAM4WBFlag(unsigned Mode) { - return (Mode >> 3) & 1; + static inline unsigned getAM4ModeImm(AMSubMode SubMode) { + return (int)SubMode; } //===--------------------------------------------------------------------===// @@ -501,9 +488,9 @@ namespace ARM_AM { // operation in bit 8 and the immediate in bits 0-7. // // This is also used for FP load/store multiple ops. The second operand - // encodes the writeback mode in bit 8 and the number of registers (or 2 - // times the number of registers for DPR ops) in bits 0-7. In addition, - // bits 9-11 encode one of the following two sub-modes: + // encodes the number of registers (or 2 times the number of registers + // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the + // following two sub-modes: // // IA - Increment after // DB - Decrement before @@ -522,17 +509,13 @@ namespace ARM_AM { /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and /// VSTM instructions. - static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, - unsigned char Offset) { + static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) { assert((SubMode == ia || SubMode == db) && "Illegal addressing mode 5 sub-mode!"); - return ((int)SubMode << 9) | ((int)WB << 8) | Offset; + return ((int)SubMode << 8) | Offset; } static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { - return (AMSubMode)((AM5Opc >> 9) & 0x7); - } - static inline bool getAM5WBFlag(unsigned AM5Opc) { - return ((AM5Opc >> 8) & 1); + return (AMSubMode)((AM5Opc >> 8) & 0x7); } //===--------------------------------------------------------------------===// @@ -541,23 +524,11 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional writeback and alignment + // addrmode6 := reg with optional alignment // - // This is stored in four operands [regaddr, regupdate, opc, align]. The - // first is the address register. The second register holds the value of - // a post-access increment for writeback or reg0 if no writeback or if the - // writeback increment is the size of the memory access. The third - // operand encodes whether there is writeback to the address register. The - // fourth operand is the value of the alignment specifier to use or zero if - // no explicit alignment. - - static inline unsigned getAM6Opc(bool WB = false) { - return (int)WB; - } - - static inline bool getAM6WBFlag(unsigned Mode) { - return Mode & 1; - } + // This is stored in two operands [regaddr, align]. The first is the + // address register. The second operand is the value of the alignment + // specifier to use or zero if no explicit alignment. } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8e537d8..e6ea03a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -650,39 +650,49 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (SrcRC == ARM::tGPRRegisterClass) SrcRC = ARM::GPRRegisterClass; - if (DestRC != SrcRC) { - if (DestRC->getSize() != SrcRC->getSize()) - return false; + // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. + if (DestRC == ARM::DPR_8RegisterClass) + DestRC = ARM::DPR_VFP2RegisterClass; + if (SrcRC == ARM::DPR_8RegisterClass) + SrcRC = ARM::DPR_VFP2RegisterClass; + + // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. + if (DestRC == ARM::QPR_VFP2RegisterClass || + DestRC == ARM::QPR_8RegisterClass) + DestRC = ARM::QPRRegisterClass; + if (SrcRC == ARM::QPR_VFP2RegisterClass || + SrcRC == ARM::QPR_8RegisterClass) + SrcRC = ARM::QPRRegisterClass; + + // Disallow copies of unequal sizes. + if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) + return false; - // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. - // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. - if (DestRC->getSize() != 8 && DestRC->getSize() != 16) + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::SPRRegisterClass) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg) + .addReg(SrcReg)); + else + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), + DestReg).addReg(SrcReg))); + } else { + unsigned Opc; + + if (DestRC == ARM::SPRRegisterClass) + Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS); + else if (DestRC == ARM::DPRRegisterClass) + Opc = ARM::VMOVD; + else if (DestRC == ARM::DPR_VFP2RegisterClass || + SrcRC == ARM::DPR_VFP2RegisterClass) + // Always use neon reg-reg move if source or dest is NEON-only regclass. + Opc = ARM::VMOVDneon; + else if (DestRC == ARM::QPRRegisterClass) + Opc = ARM::VMOVQ; + else return false; - } - if (DestRC == ARM::GPRRegisterClass) { - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), - DestReg).addReg(SrcReg))); - } else if (DestRC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg) - .addReg(SrcReg)); - } else if (DestRC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg) .addReg(SrcReg)); - } else if (DestRC == ARM::DPR_VFP2RegisterClass || - DestRC == ARM::DPR_8RegisterClass || - SrcRC == ARM::DPR_VFP2RegisterClass || - SrcRC == ARM::DPR_8RegisterClass) { - // Always use neon reg-reg move if source or dest is NEON-only regclass. - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon), - DestReg).addReg(SrcReg)); - } else if (DestRC == ARM::QPRRegisterClass || - DestRC == ARM::QPR_VFP2RegisterClass || - DestRC == ARM::QPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ), - DestReg).addReg(SrcReg)); - } else { - return false; } return true; @@ -727,10 +737,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - if (Align >= 16 - && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO) .addReg(SrcReg, getKillRegState(isKill))); } else { @@ -780,7 +789,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 767d5ec..292c498 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -93,34 +93,34 @@ namespace ARMII { StMiscFrm = 9 << FormShift, LdStMulFrm = 10 << FormShift, - LdStExFrm = 28 << FormShift, + LdStExFrm = 11 << FormShift, // Miscellaneous arithmetic instructions - ArithMiscFrm = 11 << FormShift, + ArithMiscFrm = 12 << FormShift, // Extend instructions - ExtFrm = 12 << FormShift, + ExtFrm = 13 << FormShift, // VFP formats - VFPUnaryFrm = 13 << FormShift, - VFPBinaryFrm = 14 << FormShift, - VFPConv1Frm = 15 << FormShift, - VFPConv2Frm = 16 << FormShift, - VFPConv3Frm = 17 << FormShift, - VFPConv4Frm = 18 << FormShift, - VFPConv5Frm = 19 << FormShift, - VFPLdStFrm = 20 << FormShift, - VFPLdStMulFrm = 21 << FormShift, - VFPMiscFrm = 22 << FormShift, + VFPUnaryFrm = 14 << FormShift, + VFPBinaryFrm = 15 << FormShift, + VFPConv1Frm = 16 << FormShift, + VFPConv2Frm = 17 << FormShift, + VFPConv3Frm = 18 << FormShift, + VFPConv4Frm = 19 << FormShift, + VFPConv5Frm = 20 << FormShift, + VFPLdStFrm = 21 << FormShift, + VFPLdStMulFrm = 22 << FormShift, + VFPMiscFrm = 23 << FormShift, // Thumb format - ThumbFrm = 23 << FormShift, + ThumbFrm = 24 << FormShift, // NEON format - NEONFrm = 24 << FormShift, - NEONGetLnFrm = 25 << FormShift, - NEONSetLnFrm = 26 << FormShift, - NEONDupFrm = 27 << FormShift, + NEONFrm = 25 << FormShift, + NEONGetLnFrm = 26 << FormShift, + NEONSetLnFrm = 27 << FormShift, + NEONDupFrm = 28 << FormShift, //===------------------------------------------------------------------===// // Misc flags. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 11e1c48..b380c95 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -80,7 +80,7 @@ unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, case D23: return 23; case D24: return 24; case D25: return 25; - case D26: return 27; + case D26: return 26; case D27: return 27; case D28: return 28; case D29: return 29; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 334c820..e7aa0c8 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -55,12 +55,12 @@ namespace { const std::vector<MachineConstantPoolEntry> *MCPEs; const std::vector<MachineJumpTableEntry> *MJTEs; bool IsPIC; - + void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - + static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) @@ -68,7 +68,7 @@ namespace { TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - + /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. @@ -163,7 +163,7 @@ namespace { char ARMCodeEmitter::ID = 0; -/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM +/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM /// code to the specified MCE object. FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE) { @@ -617,8 +617,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } } -unsigned ARMCodeEmitter::getMachineSoRegOpValue( - const MachineInstr &MI, +unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, const TargetInstrDesc &TID, const MachineOperand &MO, unsigned OpIdx) { @@ -690,7 +689,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { } unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, - const TargetInstrDesc &TID) const { + const TargetInstrDesc &TID) const { for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) @@ -699,8 +698,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, return 0; } -void ARMCodeEmitter::emitDataProcessingInstruction( - const MachineInstr &MI, +void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); @@ -765,8 +763,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction( emitWordLE(Binary); } -void ARMCodeEmitter::emitLoadStoreInstruction( - const MachineInstr &MI, +void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); @@ -841,7 +838,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction( } void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRn) { + unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; @@ -950,7 +947,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); // Set bit W(21) - if (ARM_AM::getAM4WBFlag(MO.getImm())) + if (IsUpdating) Binary |= 0x1 << ARMII::W_BitShift; // Set registers @@ -1238,8 +1235,7 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -void ARMCodeEmitter::emitVFPConversionInstruction( - const MachineInstr &MI) { +void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; @@ -1329,8 +1325,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction( - const MachineInstr &MI) { +void +ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0; @@ -1353,7 +1349,7 @@ void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction( Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); // Set bit W(21) - if (ARM_AM::getAM5WBFlag(MO.getImm())) + if (IsUpdating) Binary |= 0x1 << ARMII::W_BitShift; // First register is encoded in Dd. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 013e00a..71207c8 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -80,8 +80,7 @@ public: SDValue &Mode); bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align); + bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align); bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -502,12 +501,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, } bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, - SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align) { + SDValue &Addr, SDValue &Align) { Addr = N; - // Default to no writeback. - Update = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); // Default to no alignment. Align = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1030,8 +1025,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1055,14 +1050,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1070,11 +1064,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1086,25 +1079,21 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); - std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MemAddr.getValueType()); ResTys.push_back(MVT::Other); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7); + const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, - Align, Pred, PredReg, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7); + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), + Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1123,8 +1112,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1148,12 +1137,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 10> Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); if (is64BitVector) { @@ -1161,9 +1148,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1178,40 +1165,37 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, N->getOperand(Vec+3))); } Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + Ops.push_back(Reg0); // post-access address offset // Store the even subregs. for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, N->getOperand(Vec+3))); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+4] = Pred; - Ops[NumVecs+5] = PredReg; - Ops[NumVecs+6] = Chain; + Ops[NumVecs+5] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1224,8 +1208,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1259,12 +1243,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 9> Ops; + SmallVector<SDValue, 10> Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); unsigned Opc = 0; @@ -1287,16 +1269,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); Ops.push_back(Chain); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6); std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8); + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1859,37 +1841,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld3: { unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, ARM::VLD3d32, ARM::VLD3d64 }; - unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a }; - unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b }; + unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, + ARM::VLD3q16_UPD, + ARM::VLD3q32_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD, + ARM::VLD3q16odd_UPD, + ARM::VLD3q32odd_UPD }; return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, ARM::VLD4d32, ARM::VLD4d64 }; - unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a }; - unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b }; + unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, + ARM::VLD4q16_UPD, + ARM::VLD4q32_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD, + ARM::VLD4q16odd_UPD, + ARM::VLD4q32odd_UPD }; return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld2lane: { unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd }; return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld3lane: { unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd }; return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4lane: { unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd }; return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } @@ -1903,37 +1893,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst3: { unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, ARM::VST3d32, ARM::VST3d64 }; - unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a }; - unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b }; + unsigned QOpcodes0[] = { ARM::VST3q8_UPD, + ARM::VST3q16_UPD, + ARM::VST3q32_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD, + ARM::VST3q16odd_UPD, + ARM::VST3q32odd_UPD }; return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, ARM::VST4d32, ARM::VST4d64 }; - unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a }; - unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b }; + unsigned QOpcodes0[] = { ARM::VST4q8_UPD, + ARM::VST4q16_UPD, + ARM::VST4q32_UPD }; + unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD, + ARM::VST4q16odd_UPD, + ARM::VST4q32odd_UPD }; return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 }; - unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a }; - unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b }; + unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 }; + unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd }; return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst3lane: { unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 }; - unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a }; - unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b }; + unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 }; + unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd }; return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4lane: { unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 }; - unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a }; - unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b }; + unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 }; + unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd }; return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8f20843..0d0a004 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -436,9 +436,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } // Special handling for half-precision FP. - if (Subtarget->hasVFP3() && Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom); + if (!Subtarget->hasFP16()) { + setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); + setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } @@ -499,8 +499,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FTOUI: return "ARMISD::FTOUI"; case ARMISD::SITOF: return "ARMISD::SITOF"; case ARMISD::UITOF: return "ARMISD::UITOF"; - case ARMISD::F16_TO_F32: return "ARMISD::F16_TO_F32"; - case ARMISD::F32_TO_F16: return "ARMISD::F32_TO_F16"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; @@ -1987,9 +1985,6 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Invalid opcode!"); - case ISD::FP32_TO_FP16: - Opc = ARMISD::F32_TO_F16; - break; case ISD::FP_TO_SINT: Opc = ARMISD::FTOSI; break; @@ -2009,9 +2004,6 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Invalid opcode!"); - case ISD::FP16_TO_FP32: - Opc = ARMISD::F16_TO_F32; - break; case ISD::SINT_TO_FP: Opc = ARMISD::SITOF; break; @@ -3078,10 +3070,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); - case ISD::FP16_TO_FP32: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); - case ISD::FP32_TO_FP16: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d7b2ba3..f8f8adc 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -59,8 +59,6 @@ namespace llvm { FTOUI, // FP to uint within a FP register. SITOF, // sint to FP within a FP register. UITOF, // uint to FP within a FP register. - F16_TO_F32, // Half FP to single FP within a FP register. - F32_TO_F16, // Single FP to half FP within a FP register. SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 258a96b..4f6f05d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -15,8 +15,8 @@ // Format specifies the encoding used by the instruction. This is part of the // ad-hoc solution used to emit machine instruction encodings by our machine // code emitter. -class Format<bits<5> val> { - bits<5> Value = val; +class Format<bits<6> val> { + bits<6> Value = val; } def Pseudo : Format<0>; @@ -33,32 +33,34 @@ def LdMiscFrm : Format<8>; def StMiscFrm : Format<9>; def LdStMulFrm : Format<10>; -def LdStExFrm : Format<28>; +def LdStExFrm : Format<11>; -def ArithMiscFrm : Format<11>; -def ExtFrm : Format<12>; +def ArithMiscFrm : Format<12>; +def ExtFrm : Format<13>; -def VFPUnaryFrm : Format<13>; -def VFPBinaryFrm : Format<14>; -def VFPConv1Frm : Format<15>; -def VFPConv2Frm : Format<16>; -def VFPConv3Frm : Format<17>; -def VFPConv4Frm : Format<18>; -def VFPConv5Frm : Format<19>; -def VFPLdStFrm : Format<20>; -def VFPLdStMulFrm : Format<21>; -def VFPMiscFrm : Format<22>; +def VFPUnaryFrm : Format<14>; +def VFPBinaryFrm : Format<15>; +def VFPConv1Frm : Format<16>; +def VFPConv2Frm : Format<17>; +def VFPConv3Frm : Format<18>; +def VFPConv4Frm : Format<19>; +def VFPConv5Frm : Format<20>; +def VFPLdStFrm : Format<21>; +def VFPLdStMulFrm : Format<22>; +def VFPMiscFrm : Format<23>; -def ThumbFrm : Format<23>; +def ThumbFrm : Format<24>; -def NEONFrm : Format<24>; -def NEONGetLnFrm : Format<25>; -def NEONSetLnFrm : Format<26>; -def NEONDupFrm : Format<27>; +def NEONFrm : Format<25>; +def NEONGetLnFrm : Format<26>; +def NEONSetLnFrm : Format<27>; +def NEONDupFrm : Format<28>; def MiscFrm : Format<29>; def ThumbMiscFrm : Format<30>; +def NLdStFrm : Format<31>; + // Misc flags. // the instruction has a Rn register operand. @@ -71,7 +73,7 @@ class UnaryDP { bit isUnaryDataProc = 1; } class Xform16Bit { bit canXformTo16Bit = 1; } //===----------------------------------------------------------------------===// -// ARM Instruction flags. These need to match ARMInstrInfo.h. +// ARM Instruction flags. These need to match ARMBaseInstrInfo.h. // // Addressing mode. @@ -183,7 +185,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, bits<2> IndexModeBits = IM.Value; Format F = f; - bits<5> Form = F.Value; + bits<6> Form = F.Value; Domain D = d; bits<2> Dom = D.Value; @@ -229,7 +231,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; @@ -257,7 +259,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; @@ -1007,8 +1009,8 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { - let OutOperandList = !con(oops, (ops s_cc_out:$s)); - let InOperandList = !con(iops, (ops pred:$p)); + let OutOperandList = !con(oops, (outs s_cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; @@ -1030,7 +1032,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; @@ -1109,7 +1111,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1125,7 +1127,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1209,7 +1211,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1265,7 +1267,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; @@ -1464,11 +1466,12 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, // ARM NEON Instruction templates. // -class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { +class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, + InstrItinClass itin, string opc, string dt, string asm, string cstr, + list<dag> pattern> + : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat( !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), !strconcat("\t", asm)); @@ -1481,7 +1484,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; @@ -1502,8 +1505,8 @@ class NI4<dag oops, dag iops, InstrItinClass itin, string opc, class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr, - pattern> { + : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm, + cstr, pattern> { let Inst{31-24} = 0b11110100; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1513,7 +1516,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, class NDataI<dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm, + : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{31-25} = 0b1111001; } @@ -1621,7 +1624,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, let Inst{4} = 1; let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat( !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), !strconcat("\t", asm)); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3fc37da..26a2806 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -62,11 +62,14 @@ def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; @@ -282,7 +285,7 @@ def pclabel : Operand<i32> { // shifter_operand operands: so_reg and so_imm. def so_reg : Operand<i32>, // reg reg imm - ComplexPattern<i32, 3, "SelectShifterOperandReg", + ComplexPattern<i32, 3, "SelectShifterOperandReg", [shl,srl,sra,rotr]> { let PrintMethod = "printSORegOperand"; let MIOperandInfo = (ops GPR, GPR, i32imm); @@ -392,9 +395,14 @@ def addrmode5 : Operand<i32>, // addrmode6 := reg with optional writeback // def addrmode6 : Operand<i32>, - ComplexPattern<i32, 4, "SelectAddrMode6", []> { + ComplexPattern<i32, 2, "SelectAddrMode6", []> { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); + let MIOperandInfo = (ops GPR:$addr, i32imm); +} + +def am6offset : Operand<i32> { + let PrintMethod = "printAddrMode6OffsetOperand"; + let MIOperandInfo = (ops GPR); } // addrmodepc := pc + reg @@ -909,7 +917,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, LdStMulFrm, IIC_Br, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; // On non-Darwin platforms R9 is callee-saved. @@ -1354,7 +1362,7 @@ def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, LdStMulFrm, IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; } // mayLoad, hasExtraDefRegAllocReq @@ -1367,7 +1375,7 @@ def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, LdStMulFrm, IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $srcs", + "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>; } // mayStore, hasExtraSrcRegAllocReq diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8fee6fa..c977cc3 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -138,214 +138,360 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, } // VLD1 : Vector Load (multiple single elements) -class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> +class VLD1D<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, Dt, "\\{$dst\\}, $addr", "", - [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> + "vld1", Dt, "\\{$dst\\}, $addr", "", + [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; +class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, Dt, "${dst:dregpair}, $addr", "", - [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; + "vld1", Dt, "${dst:dregpair}, $addr", "", + [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; + +def VLD1d8 : VLD1D<0b0000, "8", v8i8>; +def VLD1d16 : VLD1D<0b0100, "16", v4i16>; +def VLD1d32 : VLD1D<0b1000, "32", v2i32>; +def VLD1df : VLD1D<0b1000, "32", v2f32>; +def VLD1d64 : VLD1D<0b1100, "64", v1i64>; + +def VLD1q8 : VLD1Q<0b0000, "8", v16i8>; +def VLD1q16 : VLD1Q<0b0100, "16", v8i16>; +def VLD1q32 : VLD1Q<0b1000, "32", v4i32>; +def VLD1qf : VLD1Q<0b1000, "32", v4f32>; +def VLD1q64 : VLD1Q<0b1100, "64", v2i64>; + +let mayLoad = 1 in { + +// ...with address register writeback: +class VLD1DWB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "\\{$dst\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VLD1QWB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "${dst:dregpair}, $addr$offset", + "$addr.addr = $wb", []>; -def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>; -def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>; -def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>; -def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>; -def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>; +def VLD1d8_UPD : VLD1DWB<0b0000, "8">; +def VLD1d16_UPD : VLD1DWB<0b0100, "16">; +def VLD1d32_UPD : VLD1DWB<0b1000, "32">; +def VLD1d64_UPD : VLD1DWB<0b1100, "64">; -def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>; -def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>; -def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; -def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; -def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; +def VLD1q8_UPD : VLD1QWB<0b0000, "8">; +def VLD1q16_UPD : VLD1QWB<0b0100, "16">; +def VLD1q32_UPD : VLD1QWB<0b1000, "32">; +def VLD1q64_UPD : VLD1QWB<0b1100, "64">; +} // mayLoad = 1 + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // These (dreg triple/quadruple) are for disassembly only. -class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, +class VLD1D3<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt> +class VLD1D4<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -def VLD1d8T : VLD1D3<0b0000, "vld1", "8">; -def VLD1d16T : VLD1D3<0b0100, "vld1", "16">; -def VLD1d32T : VLD1D3<0b1000, "vld1", "32">; -//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">; - -def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">; -def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">; -def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">; -//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">; +def VLD1d8T : VLD1D3<0b0000, "8">; +def VLD1d16T : VLD1D3<0b0100, "16">; +def VLD1d32T : VLD1D3<0b1000, "32">; +// VLD1d64T : implemented as VLD3d64 + +def VLD1d8Q : VLD1D4<0b0000, "8">; +def VLD1d16Q : VLD1D4<0b0100, "16">; +def VLD1d32Q : VLD1D4<0b1000, "32">; +// VLD1d64Q : implemented as VLD4d64 + +// ...with address register writeback: +class VLD1D3WB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +class VLD1D4WB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0010,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; +def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; +def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; +// VLD1d64T_UPD : implemented as VLD3d64_UPD -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; +def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; +def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; +// VLD1d64Q_UPD : implemented as VLD4d64_UPD // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), +class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; -class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0011,op7_4, + "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; +class VLD2Q<bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, 0b0011, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "", []>; + "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -def VLD2d8 : VLD2D<0b0000, "vld2", "8">; -def VLD2d16 : VLD2D<0b0100, "vld2", "16">; -def VLD2d32 : VLD2D<0b1000, "vld2", "32">; +def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; +def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; +def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>; -def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; -def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; -def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; +def VLD2q8 : VLD2Q<0b0000, "8">; +def VLD2q16 : VLD2Q<0b0100, "16">; +def VLD2q32 : VLD2Q<0b1000, "32">; -// These (double-spaced dreg pair) are for disassembly only. -class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; +// ...with address register writeback: +class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VLD2QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, 0b0011, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; -def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">; -def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">; -def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">; +def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; +def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; +def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; +def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100, + (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset", + "$addr.addr = $wb", []>; + +def VLD2q8_UPD : VLD2QWB<0b0000, "8">; +def VLD2q16_UPD : VLD2QWB<0b0100, "16">; +def VLD2q32_UPD : VLD2QWB<0b1000, "32">; + +// ...with double-spaced registers (for disassembly only): +def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; +def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; +def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; +def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; +def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; +def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; // VLD3 : Vector Load (multiple 3-element structures) -class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), +class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", - "$addr.addr = $wb", []>; + "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -def VLD3d8 : VLD3D<0b0000, "vld3", "8">; -def VLD3d16 : VLD3D<0b0100, "vld3", "16">; -def VLD3d32 : VLD3D<0b1000, "vld3", "32">; +def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; +def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; +def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -// vld3 to double-spaced even registers. -def VLD3q8a : VLD3WB<0b0000, "vld3", "8">; -def VLD3q16a : VLD3WB<0b0100, "vld3", "16">; -def VLD3q32a : VLD3WB<0b1000, "vld3", "32">; +// ...with address register writeback: +class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, + "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vld3 to double-spaced odd registers. -def VLD3q8b : VLD3WB<0b0000, "vld3", "8">; -def VLD3q16b : VLD3WB<0b0100, "vld3", "16">; -def VLD3q32b : VLD3WB<0b1000, "vld3", "32">; +def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; +def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; +def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; +def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; +def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; +def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; +def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; +def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; +def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; +def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; // VLD4 : Vector Load (multiple 4-element structures) -class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0000,op7_4, +class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "", []>; -class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0001,op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "$addr.addr = $wb", []>; + "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -def VLD4d8 : VLD4D<0b0000, "vld4", "8">; -def VLD4d16 : VLD4D<0b0100, "vld4", "16">; -def VLD4d32 : VLD4D<0b1000, "vld4", "32">; +def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; +def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; +def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -// vld4 to double-spaced even registers. -def VLD4q8a : VLD4WB<0b0000, "vld4", "8">; -def VLD4q16a : VLD4WB<0b0100, "vld4", "16">; -def VLD4q32a : VLD4WB<0b1000, "vld4", "32">; +// ...with address register writeback: +class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, + "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vld4 to double-spaced odd registers. -def VLD4q8b : VLD4WB<0b0000, "vld4", "8">; -def VLD4q16b : VLD4WB<0b0100, "vld4", "16">; -def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; +def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; +def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; +def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, + GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; +def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; +def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; +def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; +def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; +def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; +def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; +def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2", []>; +class VLD2LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2", []>; + +def VLD2LNd8 : VLD2LN<0b0001, "8">; +def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; } +def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; } -// vld2 to single-spaced registers. -def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; } -def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; } +// ...with double-spaced registers: +def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } -// vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } -// vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } +// ...with address register writeback: +class VLD2LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, + "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", + "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; + +def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">; +def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; } +def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; } + +def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; } // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt, - "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; - -// vld3 to single-spaced registers. -def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; } -def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; } -def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; } - -// vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } - -// vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } +class VLD3LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3, "vld3", Dt, + "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + +def VLD3LNd8 : VLD3LN<0b0010, "8"> { let Inst{4} = 0; } +def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } + +// ...with double-spaced registers: +def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...alternate versions to be allocated odd register numbers: +def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...with address register writeback: +class VLD3LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VLD3, "vld3", Dt, + "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", + []>; + +def VLD3LNd8_UPD : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; } +def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } + +def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt, +class VLD4LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; -// vld4 to single-spaced registers. -def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; } -def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; } +def VLD4LNd8 : VLD4LN<0b0011, "8">; +def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; } +def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; } -// vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } +// ...with double-spaced registers: +def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } -// vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VLD4LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VLD4, "vld4", Dt, +"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", +"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", + []>; + +def VLD4LNd8_UPD : VLD4LNWB<0b0011, "8">; +def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; } +def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; } + +def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; } // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -355,213 +501,353 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } } // mayLoad = 1, hasExtraDefRegAllocReq = 1 // VST1 : Vector Store (multiple single elements) -class VST1D<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> +class VST1D<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - OpcodeStr, Dt, "\\{$src\\}, $addr", "", - [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> + "vst1", Dt, "\\{$src\\}, $addr", "", + [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>; +class VST1Q<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - OpcodeStr, Dt, "${src:dregpair}, $addr", "", - [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; + "vst1", Dt, "${src:dregpair}, $addr", "", + [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>; let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>; -def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>; -def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>; -def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>; -def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>; - -def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>; -def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>; -def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>; -def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; -def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; +def VST1d8 : VST1D<0b0000, "8", v8i8>; +def VST1d16 : VST1D<0b0100, "16", v4i16>; +def VST1d32 : VST1D<0b1000, "32", v2i32>; +def VST1df : VST1D<0b1000, "32", v2f32>; +def VST1d64 : VST1D<0b1100, "64", v1i64>; + +def VST1q8 : VST1Q<0b0000, "8", v16i8>; +def VST1q16 : VST1Q<0b0100, "16", v8i16>; +def VST1q32 : VST1Q<0b1000, "32", v4i32>; +def VST1qf : VST1Q<0b1000, "32", v4f32>; +def VST1q64 : VST1Q<0b1100, "64", v2i64>; } // hasExtraSrcRegAllocReq +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + +// ...with address register writeback: +class VST1DWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, + "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; +class VST1QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; + +def VST1d8_UPD : VST1DWB<0b0000, "8">; +def VST1d16_UPD : VST1DWB<0b0100, "16">; +def VST1d32_UPD : VST1DWB<0b1000, "32">; +def VST1d64_UPD : VST1DWB<0b1100, "64">; + +def VST1q8_UPD : VST1QWB<0b0000, "8">; +def VST1q16_UPD : VST1QWB<0b0100, "16">; +def VST1q32_UPD : VST1QWB<0b1000, "32">; +def VST1q64_UPD : VST1QWB<0b1100, "64">; + // These (dreg triple/quadruple) are for disassembly only. -class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt> +class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, - "\\{$src1, $src2, $src3\\}, $addr", "", + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt> +class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, - "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -def VST1d8T : VST1D3<0b0000, "vst1", "8">; -def VST1d16T : VST1D3<0b0100, "vst1", "16">; -def VST1d32T : VST1D3<0b1000, "vst1", "32">; -//def VST1d64T : VST1D3<0b1100, "vst1", "64">; - -def VST1d8Q : VST1D4<0b0000, "vst1", "8">; -def VST1d16Q : VST1D4<0b0100, "vst1", "16">; -def VST1d32Q : VST1D4<0b1000, "vst1", "32">; -//def VST1d64Q : VST1D4<0b1100, "vst1", "64">; +def VST1d8T : VST1D3<0b0000, "8">; +def VST1d16T : VST1D3<0b0100, "16">; +def VST1d32T : VST1D3<0b1000, "32">; +// VST1d64T : implemented as VST3d64 + +def VST1d8Q : VST1D4<0b0000, "8">; +def VST1d16Q : VST1D4<0b0100, "16">; +def VST1d32Q : VST1D4<0b1000, "32">; +// VST1d64Q : implemented as VST4d64 + +// ...with address register writeback: +class VST1D3WB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +class VST1D4WB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +def VST1d8T_UPD : VST1D3WB<0b0000, "8">; +def VST1d16T_UPD : VST1D3WB<0b0100, "16">; +def VST1d32T_UPD : VST1D3WB<0b1000, "32">; +// VST1d64T_UPD : implemented as VST3d64_UPD -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; +def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; +def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; +// VST1d64Q_UPD : implemented as VST4d64_UPD // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b1000,op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; -class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0011,op7_4, (outs), +class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), + IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; +class VST2Q<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0011, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -def VST2d8 : VST2D<0b0000, "vst2", "8">; -def VST2d16 : VST2D<0b0100, "vst2", "16">; -def VST2d32 : VST2D<0b1000, "vst2", "32">; +def VST2d8 : VST2D<0b1000, 0b0000, "8">; +def VST2d16 : VST2D<0b1000, 0b0100, "16">; +def VST2d32 : VST2D<0b1000, 0b1000, "32">; def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>; -def VST2q8 : VST2Q<0b0000, "vst2", "8">; -def VST2q16 : VST2Q<0b0100, "vst2", "16">; -def VST2q32 : VST2Q<0b1000, "vst2", "32">; +def VST2q8 : VST2Q<0b0000, "8">; +def VST2q16 : VST2Q<0b0100, "16">; +def VST2q32 : VST2Q<0b1000, "32">; -// These (double-spaced dreg pair) are for disassembly only. -class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0, 0b00, 0b1001, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; +// ...with address register writeback: +class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), + IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VST2QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; -def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">; -def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">; -def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">; +def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; +def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; +def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; +def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2), IIC_VST, + "vst1", "64", "\\{$src1, $src2\\}, $addr$offset", + "$addr.addr = $wb", []>; + +def VST2q8_UPD : VST2QWB<0b0000, "8">; +def VST2q16_UPD : VST2QWB<0b0100, "16">; +def VST2q32_UPD : VST2QWB<0b1000, "32">; + +// ...with double-spaced registers (for disassembly only): +def VST2b8 : VST2D<0b1001, 0b0000, "8">; +def VST2b16 : VST2D<0b1001, 0b0100, "16">; +def VST2b32 : VST2D<0b1001, 0b1000, "32">; +def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; +def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; +def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; // VST3 : Vector Store (multiple 3-element structures) -class VST3D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0100,op7_4, (outs), +class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; -class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", - "$addr.addr = $wb", []>; + "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; -def VST3d8 : VST3D<0b0000, "vst3", "8">; -def VST3d16 : VST3D<0b0100, "vst3", "16">; -def VST3d32 : VST3D<0b1000, "vst3", "32">; +def VST3d8 : VST3D<0b0100, 0b0000, "8">; +def VST3d16 : VST3D<0b0100, 0b0100, "16">; +def VST3d32 : VST3D<0b0100, 0b1000, "32">; def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>; -// vst3 to double-spaced even registers. -def VST3q8a : VST3WB<0b0000, "vst3", "8">; -def VST3q16a : VST3WB<0b0100, "vst3", "16">; -def VST3q32a : VST3WB<0b1000, "vst3", "32">; +// ...with address register writeback: +class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst3 to double-spaced odd registers. -def VST3q8b : VST3WB<0b0000, "vst3", "8">; -def VST3q16b : VST3WB<0b0100, "vst3", "16">; -def VST3q32b : VST3WB<0b1000, "vst3", "32">; +def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; +def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; +def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; +def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VST3q8 : VST3D<0b0101, 0b0000, "8">; +def VST3q16 : VST3D<0b0101, 0b0100, "16">; +def VST3q32 : VST3D<0b0101, 0b1000, "32">; +def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; +def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; +def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; +def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; +def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; // VST4 : Vector Store (multiple 4-element structures) -class VST4D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0000,op7_4, (outs), +class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", - "$addr.addr = $wb", []>; -def VST4d8 : VST4D<0b0000, "vst4", "8">; -def VST4d16 : VST4D<0b0100, "vst4", "16">; -def VST4d32 : VST4D<0b1000, "vst4", "32">; +def VST4d8 : VST4D<0b0000, 0b0000, "8">; +def VST4d16 : VST4D<0b0000, 0b0100, "16">; +def VST4d32 : VST4D<0b0000, 0b1000, "32">; def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -// vst4 to double-spaced even registers. -def VST4q8a : VST4WB<0b0000, "vst4", "8">; -def VST4q16a : VST4WB<0b0100, "vst4", "16">; -def VST4q32a : VST4WB<0b1000, "vst4", "32">; +// ...with address register writeback: +class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, + "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst4 to double-spaced odd registers. -def VST4q8b : VST4WB<0b0000, "vst4", "8">; -def VST4q16b : VST4WB<0b0100, "vst4", "16">; -def VST4q32b : VST4WB<0b1000, "vst4", "32">; +def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; +def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; +def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; +def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, + "vst1", "64", + "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VST4q8 : VST4D<0b0001, 0b0000, "8">; +def VST4q16 : VST4D<0b0001, 0b0100, "16">; +def VST4q32 : VST4D<0b0001, 0b1000, "32">; +def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; +def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; +def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; +def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; +def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST2LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", + IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", "", []>; -// vst2 to single-spaced registers. -def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; -def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; } -def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; } +def VST2LNd8 : VST2LN<0b0001, "8">; +def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; } +def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; } + +// ...with double-spaced registers: +def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; } -// vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VST2LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, + "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } +def VST2LNd8_UPD : VST2LNWB<0b0001, "8">; +def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; } +def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; } + +def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; } // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST3LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + nohash_imm:$lane), IIC_VST, "vst3", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; -// vst3 to single-spaced registers. -def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; } -def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; } -def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; } +def VST3LNd8 : VST3LN<0b0010, "8"> { let Inst{4} = 0; } +def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } + +// ...with double-spaced registers: +def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...alternate versions to be allocated odd register numbers: +def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...with address register writeback: +class VST3LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VST, "vst3", Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } +def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; } +def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } -// vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } +def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST4LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + nohash_imm:$lane), IIC_VST, "vst4", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", "", []>; -// vst4 to single-spaced registers. -def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; -def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; } -def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; } +def VST4LNd8 : VST4LN<0b0011, "8">; +def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; } +def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; } + +// ...with double-spaced registers: +def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...alternate versions to be allocated odd register numbers: +def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VST4LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VST, "vst4", Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } +def VST4LNd8_UPD : VST4LNWB<0b0011, "8">; +def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; } +def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; } -// vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } +def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; } } // mayStore = 1, hasExtraSrcRegAllocReq = 1 diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 37c9fc5..e3ca536 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -16,7 +16,8 @@ // def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); @@ -549,7 +550,7 @@ def tLDM : T1I<(outs), def tLDM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 } // mayLoad, hasExtraDefRegAllocReq @@ -558,7 +559,7 @@ let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def tSTM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $srcs", + "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189 diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ab9e926..262aae4 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1218,7 +1218,7 @@ def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", + "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; @@ -1244,7 +1244,7 @@ def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", + "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 4d1d48a..aca8230 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -25,8 +25,6 @@ def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; -def arm_f16tof32 : SDNode<"ARMISD::F16_TO_F32", SDT_ITOF>; -def arm_f32tof16 : SDNode<"ARMISD::F32_TO_F16", SDT_FTOI>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; @@ -94,7 +92,7 @@ def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "vldm${addr:submode}${p}\t${addr:base}!, $dsts", "$addr.base = $wb", []> { let Inst{20} = 1; } @@ -102,7 +100,7 @@ def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "vldm${addr:submode}${p}\t${addr:base}!, $dsts", "$addr.base = $wb", []> { let Inst{20} = 1; } @@ -124,7 +122,7 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "vstm${addr:submode}${p}\t${addr:base}!, $srcs", "$addr.base = $wb", []> { let Inst{20} = 0; } @@ -132,7 +130,7 @@ def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "vstm${addr:submode}${p}\t${addr:base}!, $srcs", "$addr.base = $wb", []> { let Inst{20} = 0; } @@ -259,11 +257,17 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", - [(set SPR:$dst, (f32 (arm_f32tof16 SPR:$a)))]>; + [/* For disassembly only; pattern left blank */]>; + +def : ARMPat<(f32_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", - [(set SPR:$dst, (arm_f16tof32 SPR:$a))]>; + [/* For disassembly only; pattern left blank */]>; + +def : ARMPat<(f16_to_f32 GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8fbcf45..bdbec30 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -253,7 +253,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) + .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) .addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) @@ -505,11 +505,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MI->getOperand(i).getReg() == Base) return false; } - assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())); Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); } else { // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. - assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())); Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); } @@ -573,11 +571,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, .addReg(Base, getKillRegState(BaseKill)); if (isAM4) { // [t2]LDM_UPD, [t2]STM_UPD - MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true)) + MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) .addImm(Pred).addReg(PredReg); } else { // VLDM[SD}_UPD, VSTM[SD]_UPD - MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset)) + MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) .addImm(Pred).addReg(PredReg); } // Transfer the rest of operands. @@ -709,7 +707,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Offset = 0; if (isAM5) Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, - true, (isDPR ? 2 : 1)); + (isDPR ? 2 : 1)); else if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); else @@ -1157,19 +1155,24 @@ namespace { }; } -/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op -/// (bx lr) into the preceeding stack restore so it directly restore the value -/// of LR into pc. -/// ldmfd sp!, {r7, lr} +/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops +/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it +/// directly restore the value of LR into pc. +/// ldmfd sp!, {..., lr} /// bx lr +/// or +/// ldmfd sp!, {..., lr} +/// mov pc, lr /// => -/// ldmfd sp!, {r7, pc} +/// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = prior(MBB.end()); if (MBBI != MBB.begin() && - (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { + (MBBI->getOpcode() == ARM::BX_RET || + MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::MOVPCLR)) { MachineInstr *PrevMI = prior(MBBI); if (PrevMI->getOpcode() == ARM::LDM_UPD || PrevMI->getOpcode() == ARM::t2LDM_UPD) { diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7233f5c..95f57b7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -21,7 +21,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 88e67e3..c32f16c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -23,6 +23,7 @@ #include "ARMISelLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/ADT/OwningPtr.h" namespace llvm { @@ -83,7 +84,8 @@ public: /// Thumb-1 and Thumb-2. /// class ThumbTargetMachine : public ARMBaseTargetMachine { - ARMBaseInstrInfo *InstrInfo; // either Thumb1InstrInfo or Thumb2InstrInfo + // Either Thumb1InstrInfo or Thumb2InstrInfo. + OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; public: @@ -100,7 +102,9 @@ public: } /// returns either Thumb1InstrInfo or Thumb2InstrInfo - virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; } + virtual const ARMBaseInstrInfo *getInstrInfo() const { + return InstrInfo.get(); + } virtual const TargetData *getTargetData() const { return &DataLayout; } }; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 4db14a3..4a7a1e4 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -101,6 +101,7 @@ namespace { void printAddrMode5Operand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MachineInstr *MI, int OpNum); + void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum); void printAddrModePCOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); @@ -431,16 +432,16 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. O << ", #" - << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) << ARM_AM::getAM2Offset(MO3.getImm()); O << "]"; return; } O << ", " - << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) << getRegisterName(MO2.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) @@ -458,12 +459,12 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); O << "#" - << (char)ARM_AM::getAM2Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; return; } - O << (char)ARM_AM::getAM2Op(MO2.getImm()) + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << getRegisterName(MO1.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) @@ -490,7 +491,7 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) { if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) O << ", #" - << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) << ImmOffs; O << "]"; } @@ -508,35 +509,22 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); O << "#" - << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; } void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, const char *Modifier) { - const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Modifier && strcmp(Modifier, "submode") == 0) { - if (MO1.getReg() == ARM::SP) { - // FIXME - bool isLDM = (MI->getOpcode() == ARM::LDM || - MI->getOpcode() == ARM::LDM_UPD || - MI->getOpcode() == ARM::LDM_RET || - MI->getOpcode() == ARM::t2LDM || - MI->getOpcode() == ARM::t2LDM_UPD || - MI->getOpcode() == ARM::t2LDM_RET); - O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); } else if (Modifier && strcmp(Modifier, "wide") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Mode == ARM_AM::ia) O << ".w"; } else { printOperand(MI, Op); - if (ARM_AM::getAM4WBFlag(MO2.getImm())) - O << "!"; } } @@ -559,8 +547,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. O << getRegisterName(MO1.getReg()); - if (ARM_AM::getAM5WBFlag(MO2.getImm())) - O << "!"; return; } @@ -568,7 +554,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { O << ", #" - << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs*4; } O << "]"; @@ -577,22 +563,21 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - const MachineOperand &MO4 = MI->getOperand(Op+3); O << "[" << getRegisterName(MO1.getReg()); - if (MO4.getImm()) { + if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO4.getImm(); + O << ", :" << MO2.getImm(); } O << "]"; +} - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << "!"; - else - O << ", " << getRegisterName(MO2.getReg()); - } +void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO = MI->getOperand(Op); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, @@ -604,7 +589,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, const MachineOperand &MO1 = MI->getOperand(Op); assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << "[pc, +" << getRegisterName(MO1.getReg()) << "]"; + O << "[pc, " << getRegisterName(MO1.getReg()) << "]"; } void @@ -627,10 +612,11 @@ void ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) { // (3 - the number of trailing zeros) is the number of then / else. unsigned Mask = MI->getOperand(Op).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; unsigned NumTZ = CountTrailingZeros_32(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { - bool T = (Mask & (1 << Pos)) == 0; + bool T = ((Mask >> Pos) & 1) == CondBit0; if (T) O << 't'; else @@ -662,7 +648,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, if (MO3.getReg()) O << ", " << getRegisterName(MO3.getReg()); else if (unsigned ImmOffs = MO2.getImm()) - O << ", #+" << ImmOffs * Scale; + O << ", #" << ImmOffs * Scale; O << "]"; } @@ -684,7 +670,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { const MachineOperand &MO2 = MI->getOperand(Op+1); O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) - O << ", #+" << ImmOffs*4; + O << ", #" << ImmOffs*4; O << "]"; } @@ -720,7 +706,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, unsigned OffImm = MO2.getImm(); if (OffImm) // Don't print +0. - O << ", #+" << OffImm; + O << ", #" << OffImm; O << "]"; } @@ -736,7 +722,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI, if (OffImm < 0) O << ", #-" << -OffImm; else if (OffImm > 0) - O << ", #+" << OffImm; + O << ", #" << OffImm; O << "]"; } @@ -752,7 +738,7 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI, if (OffImm < 0) O << ", #-" << -OffImm * 4; else if (OffImm > 0) - O << ", #+" << OffImm * 4; + O << ", #" << OffImm * 4; O << "]"; } @@ -764,7 +750,7 @@ void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, if (OffImm < 0) O << "#-" << -OffImm; else if (OffImm > 0) - O << "#+" << OffImm; + O << "#" << OffImm; } void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index a2084b0..30763a9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -28,7 +28,159 @@ using namespace llvm; #undef MachineInstr #undef ARMAsmPrinter -void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +static unsigned NextReg(unsigned Reg) { + switch (Reg) { + default: + assert(0 && "Unexpected register enum"); + + case ARM::D0: + return ARM::D1; + case ARM::D1: + return ARM::D2; + case ARM::D2: + return ARM::D3; + case ARM::D3: + return ARM::D4; + case ARM::D4: + return ARM::D5; + case ARM::D5: + return ARM::D6; + case ARM::D6: + return ARM::D7; + case ARM::D7: + return ARM::D8; + case ARM::D8: + return ARM::D9; + case ARM::D9: + return ARM::D10; + case ARM::D10: + return ARM::D11; + case ARM::D11: + return ARM::D12; + case ARM::D12: + return ARM::D13; + case ARM::D13: + return ARM::D14; + case ARM::D14: + return ARM::D15; + case ARM::D15: + return ARM::D16; + case ARM::D16: + return ARM::D17; + case ARM::D17: + return ARM::D18; + case ARM::D18: + return ARM::D19; + case ARM::D19: + return ARM::D20; + case ARM::D20: + return ARM::D21; + case ARM::D21: + return ARM::D22; + case ARM::D22: + return ARM::D23; + case ARM::D23: + return ARM::D24; + case ARM::D24: + return ARM::D25; + case ARM::D25: + return ARM::D26; + case ARM::D26: + return ARM::D27; + case ARM::D27: + return ARM::D28; + case ARM::D28: + return ARM::D29; + case ARM::D29: + return ARM::D30; + case ARM::D30: + return ARM::D31; + } +} + +void ARMInstPrinter::printInst(const MCInst *MI) { + // Check for MOVs and print canonical forms, instead. + if (MI->getOpcode() == ARM::MOVs) { + const MCOperand &Dst = MI->getOperand(0); + const MCOperand &MO1 = MI->getOperand(1); + const MCOperand &MO2 = MI->getOperand(2); + const MCOperand &MO3 = MI->getOperand(3); + + O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())); + printSBitModifierOperand(MI, 6); + printPredicateOperand(MI, 4); + + O << '\t' << getRegisterName(Dst.getReg()) + << ", " << getRegisterName(MO1.getReg()); + + if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx) + return; + + O << ", "; + + if (MO2.getReg()) { + O << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } + return; + } + + // A8.6.123 PUSH + if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { + O << '\t' << "push"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.122 POP + if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { + O << '\t' << "pop"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.355 VPUSH + if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) { + O << '\t' << "vpush"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.354 VPOP + if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) { + O << '\t' << "vpop"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + printInstruction(MI); + } void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const char *Modifier) { @@ -36,6 +188,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { unsigned Reg = Op.getReg(); if (Modifier && strcmp(Modifier, "dregpair") == 0) { + O << '{' << getRegisterName(Reg) << ", " + << getRegisterName(NextReg(Reg)) << '}'; +#if 0 // FIXME: Breaks e.g. ARM/vmul.ll. assert(0); /* @@ -44,6 +199,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}';*/ +#endif } else if (Modifier && strcmp(Modifier, "lane") == 0) { assert(0); /* @@ -56,7 +212,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << getRegisterName(Reg); } } else if (Op.isImm()) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert((Modifier && !strcmp(Modifier, "call")) || + ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); O << '#' << Op.getImm(); } else { assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); @@ -142,17 +299,17 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) { O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. O << ", #" - << (char)ARM_AM::getAM2Op(MO3.getImm()) - << ARM_AM::getAM2Offset(MO3.getImm()); + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ARM_AM::getAM2Offset(MO3.getImm()); O << "]"; return; } O << ", " - << (char)ARM_AM::getAM2Op(MO3.getImm()) - << getRegisterName(MO2.getReg()); + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << getRegisterName(MO2.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) O << ", " @@ -169,11 +326,14 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); - O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs; + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << ImmOffs; return; } - O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg()); + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << getRegisterName(MO1.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) O << ", " @@ -196,8 +356,8 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) { if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) O << ", #" - << (char)ARM_AM::getAM3Op(MO3.getImm()) - << ImmOffs; + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ImmOffs; O << ']'; } @@ -214,35 +374,24 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); - O << "#" - << (char)ARM_AM::getAM3Op(MO2.getImm()) - << ImmOffs; + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) + << ImmOffs; } void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum, const char *Modifier) { - const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Modifier && strcmp(Modifier, "submode") == 0) { - if (MO1.getReg() == ARM::SP) { - // FIXME - bool isLDM = (MI->getOpcode() == ARM::LDM || - MI->getOpcode() == ARM::LDM_RET || - MI->getOpcode() == ARM::t2LDM || - MI->getOpcode() == ARM::t2LDM_RET); - O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); } else if (Modifier && strcmp(Modifier, "wide") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Mode == ARM_AM::ia) O << ".w"; } else { printOperand(MI, OpNum); - if (ARM_AM::getAM4WBFlag(MO2.getImm())) - O << "!"; } } @@ -263,8 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. O << getRegisterName(MO1.getReg()); - if (ARM_AM::getAM5WBFlag(MO2.getImm())) - O << "!"; return; } @@ -272,7 +419,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { O << ", #" - << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs*4; } O << "]"; @@ -281,17 +428,22 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) { const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); - // FIXME: No support yet for specifying alignment. - O << '[' << getRegisterName(MO1.getReg()) << ']'; - - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << '!'; - else - O << ", " << getRegisterName(MO2.getReg()); + O << "[" << getRegisterName(MO1.getReg()); + if (MO2.getImm()) { + // FIXME: Both darwin as and GNU as violate ARM docs here. + O << ", :" << MO2.getImm(); } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, @@ -311,14 +463,56 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) { O << "{"; - // Always skip the first operand, it's the optional (and implicit writeback). - for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) { - if (i != OpNum+1) O << ", "; + for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { + if (i != OpNum) O << ", "; O << getRegisterName(MI->getOperand(i).getReg()); } O << "}"; } +void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned option = Op.getImm(); + unsigned mode = option & 31; + bool changemode = option >> 5 & 1; + unsigned AIF = option >> 6 & 7; + unsigned imod = option >> 9 & 3; + if (imod == 2) + O << "ie"; + else if (imod == 3) + O << "id"; + O << '\t'; + if (imod > 1) { + if (AIF & 4) O << 'a'; + if (AIF & 2) O << 'i'; + if (AIF & 1) O << 'f'; + if (AIF > 0 && changemode) O << ", "; + } + if (changemode) + O << '#' << mode; +} + +void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned Mask = Op.getImm(); + if (Mask) { + O << '_'; + if (Mask & 8) O << 'f'; + if (Mask & 4) O << 's'; + if (Mask & 2) O << 'x'; + if (Mask & 1) O << 'c'; + } +} + +void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum){ + const MCOperand &Op = MI->getOperand(OpNum); + O << '#'; + if (Op.getImm() < 0) + O << '-' << (-Op.getImm() - 1); + else + O << Op.getImm(); +} + void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); if (CC != ARMCC::AL) @@ -360,3 +554,191 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) { O << "#" << MI->getOperand(OpNum).getImm() * 4; } + +void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum) { + // (3 - the number of trailing zeros) is the number of then / else. + unsigned Mask = MI->getOperand(OpNum).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { + bool T = ((Mask >> Pos) & 1) == CondBit0; + if (T) + O << 't'; + else + O << 'e'; + } +} + +void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op) +{ + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()); + O << ", " << getRegisterName(MO2.getReg()) << "]"; +} + +void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op, + unsigned Scale) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + if (MO3.getReg()) + O << ", " << getRegisterName(MO3.getReg()); + else if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs * Scale; + O << "]"; +} + +void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 1); +} + +void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 2); +} + +void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 4); +} + +void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI,unsigned Op) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()); + if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs*4; + O << "]"; +} + +void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum) { + O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg()); + if (MI->getOpcode() == ARM::t2TBH) + O << ", lsl #1"; + O << ']'; +} + +// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 +// register with shift forms. +// REG 0 0 - e.g. R5 +// REG IMM, SH_OPC - e.g. R5, LSL #3 +void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + unsigned Reg = MO1.getReg(); + O << getRegisterName(Reg); + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) + << " "; + + assert(MO2.isImm() && "Not a valid t2_so_reg value!"); + O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); +} + +void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + unsigned OffImm = MO2.getImm(); + if (OffImm) // Don't print +0. + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm(); + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm * 4; + else if (OffImm > 0) + O << ", #" << OffImm * 4; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm(); + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm; + else if (OffImm > 0) + O << "#" << OffImm; +} + +void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm * 4; + else if (OffImm > 0) + O << "#" << OffImm * 4; +} + +void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << "[" << getRegisterName(MO1.getReg()); + + assert(MO2.getReg() && "Invalid so_reg load / store address!"); + O << ", " << getRegisterName(MO2.getReg()); + + unsigned ShAmt = MO3.getImm(); + if (ShAmt) { + assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); + O << ", lsl #" << ShAmt; + } + O << "]"; +} + +void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum) { + O << '#' << MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum) { + O << '#' << MI->getOperand(OpNum).getImm(); +} + diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 9a3cbc3..d41b5df 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -48,32 +48,33 @@ public: void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum); void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum); - void printThumbITMask(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {} + void printThumbITMask(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum); void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, - unsigned Scale) {} - void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {} + unsigned Scale); + void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum); - void printT2SOOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {} + void printT2SOOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum); - void printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {} - void printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {} - void printNegZeroOperand(const MCInst *MI, unsigned OpNum) {} + void printCPSOptionOperand(const MCInst *MI, unsigned OpNum); + void printMSRMaskOperand(const MCInst *MI, unsigned OpNum); + void printNegZeroOperand(const MCInst *MI, unsigned OpNum); void printPredicateOperand(const MCInst *MI, unsigned OpNum); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum); void printSBitModifierOperand(const MCInst *MI, unsigned OpNum); @@ -82,10 +83,10 @@ public: const char *Modifier); void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {} void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {} - void printTBAddrMode(const MCInst *MI, unsigned OpNum) {} + void printTBAddrMode(const MCInst *MI, unsigned OpNum); void printNoHashImmediate(const MCInst *MI, unsigned OpNum); - void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {} - void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {} + void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum); + void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum); void printHex8ImmOperand(const MCInst *MI, int OpNum) {} void printHex16ImmOperand(const MCInst *MI, int OpNum) {} void printHex32ImmOperand(const MCInst *MI, int OpNum) {} diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index d9942c8..c36fe63 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -64,16 +64,16 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD2LNq16a: - case ARM::VLD2LNq32a: + case ARM::VLD2LNq16: + case ARM::VLD2LNq32: FirstOpnd = 0; NumRegs = 2; Offset = 0; Stride = 2; return true; - case ARM::VLD2LNq16b: - case ARM::VLD2LNq32b: + case ARM::VLD2LNq16odd: + case ARM::VLD2LNq32odd: FirstOpnd = 0; NumRegs = 2; Offset = 1; @@ -91,34 +91,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VLD3q8a: - case ARM::VLD3q16a: - case ARM::VLD3q32a: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: FirstOpnd = 0; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VLD3q8b: - case ARM::VLD3q16b: - case ARM::VLD3q32b: + case ARM::VLD3q8odd_UPD: + case ARM::VLD3q16odd_UPD: + case ARM::VLD3q32odd_UPD: FirstOpnd = 0; NumRegs = 3; Offset = 1; Stride = 2; return true; - case ARM::VLD3LNq16a: - case ARM::VLD3LNq32a: + case ARM::VLD3LNq16: + case ARM::VLD3LNq32: FirstOpnd = 0; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VLD3LNq16b: - case ARM::VLD3LNq32b: + case ARM::VLD3LNq16odd: + case ARM::VLD3LNq32odd: FirstOpnd = 0; NumRegs = 3; Offset = 1; @@ -136,34 +136,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD4q8a: - case ARM::VLD4q16a: - case ARM::VLD4q32a: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: FirstOpnd = 0; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VLD4q8b: - case ARM::VLD4q16b: - case ARM::VLD4q32b: + case ARM::VLD4q8odd_UPD: + case ARM::VLD4q16odd_UPD: + case ARM::VLD4q32odd_UPD: FirstOpnd = 0; NumRegs = 4; Offset = 1; Stride = 2; return true; - case ARM::VLD4LNq16a: - case ARM::VLD4LNq32a: + case ARM::VLD4LNq16: + case ARM::VLD4LNq32: FirstOpnd = 0; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VLD4LNq16b: - case ARM::VLD4LNq32b: + case ARM::VLD4LNq16odd: + case ARM::VLD4LNq32odd: FirstOpnd = 0; NumRegs = 4; Offset = 1; @@ -177,28 +177,28 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; - case ARM::VST2LNq16a: - case ARM::VST2LNq32a: - FirstOpnd = 4; + case ARM::VST2LNq16: + case ARM::VST2LNq32: + FirstOpnd = 2; NumRegs = 2; Offset = 0; Stride = 2; return true; - case ARM::VST2LNq16b: - case ARM::VST2LNq32b: - FirstOpnd = 4; + case ARM::VST2LNq16odd: + case ARM::VST2LNq32odd: + FirstOpnd = 2; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,39 +211,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 3; return true; - case ARM::VST3q8a: - case ARM::VST3q16a: - case ARM::VST3q32a: - FirstOpnd = 5; + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VST3q8b: - case ARM::VST3q16b: - case ARM::VST3q32b: - FirstOpnd = 5; + case ARM::VST3q8odd_UPD: + case ARM::VST3q16odd_UPD: + case ARM::VST3q32odd_UPD: + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; return true; - case ARM::VST3LNq16a: - case ARM::VST3LNq32a: - FirstOpnd = 4; + case ARM::VST3LNq16: + case ARM::VST3LNq32: + FirstOpnd = 2; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VST3LNq16b: - case ARM::VST3LNq32b: - FirstOpnd = 4; + case ARM::VST3LNq16odd: + case ARM::VST3LNq32odd: + FirstOpnd = 2; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,39 +256,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; - case ARM::VST4q8a: - case ARM::VST4q16a: - case ARM::VST4q32a: - FirstOpnd = 5; + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VST4q8b: - case ARM::VST4q16b: - case ARM::VST4q32b: - FirstOpnd = 5; + case ARM::VST4q8odd_UPD: + case ARM::VST4q16odd_UPD: + case ARM::VST4q32odd_UPD: + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2; return true; - case ARM::VST4LNq16a: - case ARM::VST4LNq32a: - FirstOpnd = 4; + case ARM::VST4LNq16: + case ARM::VST4LNq32: + FirstOpnd = 2; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VST4LNq16b: - case ARM::VST4LNq32b: - FirstOpnd = 4; + case ARM::VST4LNq16odd: + case ARM::VST4LNq32odd: + FirstOpnd = 2; NumRegs = 4; Offset = 1; Stride = 2; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index f5ba155..f36d4ef 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -78,14 +78,16 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { DebugLoc ndl = NMI->getDebugLoc(); unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NCC == OCC) { - Mask |= (1 << Pos); - } else if (NCC != CC) + if (NCC == CC || NCC == OCC) + Mask |= (NCC & 1) << Pos; + else break; --Pos; ++MBBI; } Mask |= (1 << Pos); + // Tag along (firstcond[0] << 4) with the mask. + Mask |= (CC & 1) << 4; MIB.addImm(Mask); Modified = true; ++NumITs; |