diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-03-21 10:49:05 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-03-21 10:49:05 +0000 |
commit | 2f2afc1aae898651e26987a5c71f3febb19bca98 (patch) | |
tree | 2caca31db4facdc95c23930c0c745c8ef0dee97d /lib/Target | |
parent | 0f448b841684305c051796982f300c9bff959307 (diff) | |
download | FreeBSD-src-2f2afc1aae898651e26987a5c71f3febb19bca98.zip FreeBSD-src-2f2afc1aae898651e26987a5c71f3febb19bca98.tar.gz |
Update LLVM to r99115.
Diffstat (limited to 'lib/Target')
59 files changed, 1896 insertions, 1341 deletions
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index ddeb1b9..ea62c33 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -35,6 +35,10 @@ namespace ARM_AM { add = '+', sub = '-' }; + static inline const char *getAddrOpcStr(AddrOpc Op) { + return Op == sub ? "-" : ""; + } + static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { default: assert(0 && "Unknown shift opc!"); @@ -78,16 +82,6 @@ namespace ARM_AM { } } - static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { - switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); - case ARM_AM::ia: return isLD ? "fd" : "ea"; - case ARM_AM::ib: return isLD ? "ed" : "fa"; - case ARM_AM::da: return isLD ? "fa" : "ed"; - case ARM_AM::db: return isLD ? "ea" : "fd"; - } - } - /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. /// static inline unsigned rotr32(unsigned Val, unsigned Amt) { @@ -473,20 +467,13 @@ namespace ARM_AM { // IB - Increment before // DA - Decrement after // DB - Decrement before - // - // If the 4th bit (writeback)is set, then the base register is updated after - // the memory transfer. static inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); } - static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) { - return (int)SubMode | ((int)WB << 3); - } - - static inline bool getAM4WBFlag(unsigned Mode) { - return (Mode >> 3) & 1; + static inline unsigned getAM4ModeImm(AMSubMode SubMode) { + return (int)SubMode; } //===--------------------------------------------------------------------===// @@ -501,9 +488,9 @@ namespace ARM_AM { // operation in bit 8 and the immediate in bits 0-7. // // This is also used for FP load/store multiple ops. The second operand - // encodes the writeback mode in bit 8 and the number of registers (or 2 - // times the number of registers for DPR ops) in bits 0-7. In addition, - // bits 9-11 encode one of the following two sub-modes: + // encodes the number of registers (or 2 times the number of registers + // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the + // following two sub-modes: // // IA - Increment after // DB - Decrement before @@ -522,17 +509,13 @@ namespace ARM_AM { /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and /// VSTM instructions. - static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, - unsigned char Offset) { + static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) { assert((SubMode == ia || SubMode == db) && "Illegal addressing mode 5 sub-mode!"); - return ((int)SubMode << 9) | ((int)WB << 8) | Offset; + return ((int)SubMode << 8) | Offset; } static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { - return (AMSubMode)((AM5Opc >> 9) & 0x7); - } - static inline bool getAM5WBFlag(unsigned AM5Opc) { - return ((AM5Opc >> 8) & 1); + return (AMSubMode)((AM5Opc >> 8) & 0x7); } //===--------------------------------------------------------------------===// @@ -541,23 +524,11 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional writeback and alignment + // addrmode6 := reg with optional alignment // - // This is stored in four operands [regaddr, regupdate, opc, align]. The - // first is the address register. The second register holds the value of - // a post-access increment for writeback or reg0 if no writeback or if the - // writeback increment is the size of the memory access. The third - // operand encodes whether there is writeback to the address register. The - // fourth operand is the value of the alignment specifier to use or zero if - // no explicit alignment. - - static inline unsigned getAM6Opc(bool WB = false) { - return (int)WB; - } - - static inline bool getAM6WBFlag(unsigned Mode) { - return Mode & 1; - } + // This is stored in two operands [regaddr, align]. The first is the + // address register. The second operand is the value of the alignment + // specifier to use or zero if no explicit alignment. } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8e537d8..e6ea03a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -650,39 +650,49 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (SrcRC == ARM::tGPRRegisterClass) SrcRC = ARM::GPRRegisterClass; - if (DestRC != SrcRC) { - if (DestRC->getSize() != SrcRC->getSize()) - return false; + // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. + if (DestRC == ARM::DPR_8RegisterClass) + DestRC = ARM::DPR_VFP2RegisterClass; + if (SrcRC == ARM::DPR_8RegisterClass) + SrcRC = ARM::DPR_VFP2RegisterClass; + + // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. + if (DestRC == ARM::QPR_VFP2RegisterClass || + DestRC == ARM::QPR_8RegisterClass) + DestRC = ARM::QPRRegisterClass; + if (SrcRC == ARM::QPR_VFP2RegisterClass || + SrcRC == ARM::QPR_8RegisterClass) + SrcRC = ARM::QPRRegisterClass; + + // Disallow copies of unequal sizes. + if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) + return false; - // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. - // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. - if (DestRC->getSize() != 8 && DestRC->getSize() != 16) + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::SPRRegisterClass) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg) + .addReg(SrcReg)); + else + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), + DestReg).addReg(SrcReg))); + } else { + unsigned Opc; + + if (DestRC == ARM::SPRRegisterClass) + Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS); + else if (DestRC == ARM::DPRRegisterClass) + Opc = ARM::VMOVD; + else if (DestRC == ARM::DPR_VFP2RegisterClass || + SrcRC == ARM::DPR_VFP2RegisterClass) + // Always use neon reg-reg move if source or dest is NEON-only regclass. + Opc = ARM::VMOVDneon; + else if (DestRC == ARM::QPRRegisterClass) + Opc = ARM::VMOVQ; + else return false; - } - if (DestRC == ARM::GPRRegisterClass) { - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), - DestReg).addReg(SrcReg))); - } else if (DestRC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg) - .addReg(SrcReg)); - } else if (DestRC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg) .addReg(SrcReg)); - } else if (DestRC == ARM::DPR_VFP2RegisterClass || - DestRC == ARM::DPR_8RegisterClass || - SrcRC == ARM::DPR_VFP2RegisterClass || - SrcRC == ARM::DPR_8RegisterClass) { - // Always use neon reg-reg move if source or dest is NEON-only regclass. - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon), - DestReg).addReg(SrcReg)); - } else if (DestRC == ARM::QPRRegisterClass || - DestRC == ARM::QPR_VFP2RegisterClass || - DestRC == ARM::QPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ), - DestReg).addReg(SrcReg)); - } else { - return false; } return true; @@ -727,10 +737,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - if (Align >= 16 - && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO) .addReg(SrcReg, getKillRegState(isKill))); } else { @@ -780,7 +789,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 767d5ec..292c498 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -93,34 +93,34 @@ namespace ARMII { StMiscFrm = 9 << FormShift, LdStMulFrm = 10 << FormShift, - LdStExFrm = 28 << FormShift, + LdStExFrm = 11 << FormShift, // Miscellaneous arithmetic instructions - ArithMiscFrm = 11 << FormShift, + ArithMiscFrm = 12 << FormShift, // Extend instructions - ExtFrm = 12 << FormShift, + ExtFrm = 13 << FormShift, // VFP formats - VFPUnaryFrm = 13 << FormShift, - VFPBinaryFrm = 14 << FormShift, - VFPConv1Frm = 15 << FormShift, - VFPConv2Frm = 16 << FormShift, - VFPConv3Frm = 17 << FormShift, - VFPConv4Frm = 18 << FormShift, - VFPConv5Frm = 19 << FormShift, - VFPLdStFrm = 20 << FormShift, - VFPLdStMulFrm = 21 << FormShift, - VFPMiscFrm = 22 << FormShift, + VFPUnaryFrm = 14 << FormShift, + VFPBinaryFrm = 15 << FormShift, + VFPConv1Frm = 16 << FormShift, + VFPConv2Frm = 17 << FormShift, + VFPConv3Frm = 18 << FormShift, + VFPConv4Frm = 19 << FormShift, + VFPConv5Frm = 20 << FormShift, + VFPLdStFrm = 21 << FormShift, + VFPLdStMulFrm = 22 << FormShift, + VFPMiscFrm = 23 << FormShift, // Thumb format - ThumbFrm = 23 << FormShift, + ThumbFrm = 24 << FormShift, // NEON format - NEONFrm = 24 << FormShift, - NEONGetLnFrm = 25 << FormShift, - NEONSetLnFrm = 26 << FormShift, - NEONDupFrm = 27 << FormShift, + NEONFrm = 25 << FormShift, + NEONGetLnFrm = 26 << FormShift, + NEONSetLnFrm = 27 << FormShift, + NEONDupFrm = 28 << FormShift, //===------------------------------------------------------------------===// // Misc flags. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 11e1c48..b380c95 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -80,7 +80,7 @@ unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, case D23: return 23; case D24: return 24; case D25: return 25; - case D26: return 27; + case D26: return 26; case D27: return 27; case D28: return 28; case D29: return 29; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 334c820..e7aa0c8 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -55,12 +55,12 @@ namespace { const std::vector<MachineConstantPoolEntry> *MCPEs; const std::vector<MachineJumpTableEntry> *MJTEs; bool IsPIC; - + void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - + static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) @@ -68,7 +68,7 @@ namespace { TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - + /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. @@ -163,7 +163,7 @@ namespace { char ARMCodeEmitter::ID = 0; -/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM +/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM /// code to the specified MCE object. FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE) { @@ -617,8 +617,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } } -unsigned ARMCodeEmitter::getMachineSoRegOpValue( - const MachineInstr &MI, +unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, const TargetInstrDesc &TID, const MachineOperand &MO, unsigned OpIdx) { @@ -690,7 +689,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { } unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, - const TargetInstrDesc &TID) const { + const TargetInstrDesc &TID) const { for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) @@ -699,8 +698,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, return 0; } -void ARMCodeEmitter::emitDataProcessingInstruction( - const MachineInstr &MI, +void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); @@ -765,8 +763,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction( emitWordLE(Binary); } -void ARMCodeEmitter::emitLoadStoreInstruction( - const MachineInstr &MI, +void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); @@ -841,7 +838,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction( } void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRn) { + unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; @@ -950,7 +947,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); // Set bit W(21) - if (ARM_AM::getAM4WBFlag(MO.getImm())) + if (IsUpdating) Binary |= 0x1 << ARMII::W_BitShift; // Set registers @@ -1238,8 +1235,7 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -void ARMCodeEmitter::emitVFPConversionInstruction( - const MachineInstr &MI) { +void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; @@ -1329,8 +1325,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction( - const MachineInstr &MI) { +void +ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0; @@ -1353,7 +1349,7 @@ void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction( Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); // Set bit W(21) - if (ARM_AM::getAM5WBFlag(MO.getImm())) + if (IsUpdating) Binary |= 0x1 << ARMII::W_BitShift; // First register is encoded in Dd. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 013e00a..71207c8 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -80,8 +80,7 @@ public: SDValue &Mode); bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align); + bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align); bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -502,12 +501,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, } bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, - SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align) { + SDValue &Addr, SDValue &Align) { Addr = N; - // Default to no writeback. - Update = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); // Default to no alignment. Align = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1030,8 +1025,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1055,14 +1050,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1070,11 +1064,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1086,25 +1079,21 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); - std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MemAddr.getValueType()); ResTys.push_back(MVT::Other); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7); + const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, - Align, Pred, PredReg, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7); + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), + Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1123,8 +1112,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1148,12 +1137,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 10> Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); if (is64BitVector) { @@ -1161,9 +1148,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1178,40 +1165,37 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, N->getOperand(Vec+3))); } Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + Ops.push_back(Reg0); // post-access address offset // Store the even subregs. for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, N->getOperand(Vec+3))); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+4] = Pred; - Ops[NumVecs+5] = PredReg; - Ops[NumVecs+6] = Chain; + Ops[NumVecs+5] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1224,8 +1208,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1259,12 +1243,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 9> Ops; + SmallVector<SDValue, 10> Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); unsigned Opc = 0; @@ -1287,16 +1269,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); Ops.push_back(Chain); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6); std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8); + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1859,37 +1841,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld3: { unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, ARM::VLD3d32, ARM::VLD3d64 }; - unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a }; - unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b }; + unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, + ARM::VLD3q16_UPD, + ARM::VLD3q32_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD, + ARM::VLD3q16odd_UPD, + ARM::VLD3q32odd_UPD }; return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, ARM::VLD4d32, ARM::VLD4d64 }; - unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a }; - unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b }; + unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, + ARM::VLD4q16_UPD, + ARM::VLD4q32_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD, + ARM::VLD4q16odd_UPD, + ARM::VLD4q32odd_UPD }; return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld2lane: { unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd }; return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld3lane: { unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd }; return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4lane: { unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd }; return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } @@ -1903,37 +1893,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst3: { unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, ARM::VST3d32, ARM::VST3d64 }; - unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a }; - unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b }; + unsigned QOpcodes0[] = { ARM::VST3q8_UPD, + ARM::VST3q16_UPD, + ARM::VST3q32_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD, + ARM::VST3q16odd_UPD, + ARM::VST3q32odd_UPD }; return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, ARM::VST4d32, ARM::VST4d64 }; - unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a }; - unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b }; + unsigned QOpcodes0[] = { ARM::VST4q8_UPD, + ARM::VST4q16_UPD, + ARM::VST4q32_UPD }; + unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD, + ARM::VST4q16odd_UPD, + ARM::VST4q32odd_UPD }; return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 }; - unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a }; - unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b }; + unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 }; + unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd }; return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst3lane: { unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 }; - unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a }; - unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b }; + unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 }; + unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd }; return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4lane: { unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 }; - unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a }; - unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b }; + unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 }; + unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd }; return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8f20843..0d0a004 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -436,9 +436,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } // Special handling for half-precision FP. - if (Subtarget->hasVFP3() && Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom); + if (!Subtarget->hasFP16()) { + setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); + setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } @@ -499,8 +499,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FTOUI: return "ARMISD::FTOUI"; case ARMISD::SITOF: return "ARMISD::SITOF"; case ARMISD::UITOF: return "ARMISD::UITOF"; - case ARMISD::F16_TO_F32: return "ARMISD::F16_TO_F32"; - case ARMISD::F32_TO_F16: return "ARMISD::F32_TO_F16"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; @@ -1987,9 +1985,6 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Invalid opcode!"); - case ISD::FP32_TO_FP16: - Opc = ARMISD::F32_TO_F16; - break; case ISD::FP_TO_SINT: Opc = ARMISD::FTOSI; break; @@ -2009,9 +2004,6 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Invalid opcode!"); - case ISD::FP16_TO_FP32: - Opc = ARMISD::F16_TO_F32; - break; case ISD::SINT_TO_FP: Opc = ARMISD::SITOF; break; @@ -3078,10 +3070,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); - case ISD::FP16_TO_FP32: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); - case ISD::FP32_TO_FP16: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d7b2ba3..f8f8adc 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -59,8 +59,6 @@ namespace llvm { FTOUI, // FP to uint within a FP register. SITOF, // sint to FP within a FP register. UITOF, // uint to FP within a FP register. - F16_TO_F32, // Half FP to single FP within a FP register. - F32_TO_F16, // Single FP to half FP within a FP register. SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 258a96b..4f6f05d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -15,8 +15,8 @@ // Format specifies the encoding used by the instruction. This is part of the // ad-hoc solution used to emit machine instruction encodings by our machine // code emitter. -class Format<bits<5> val> { - bits<5> Value = val; +class Format<bits<6> val> { + bits<6> Value = val; } def Pseudo : Format<0>; @@ -33,32 +33,34 @@ def LdMiscFrm : Format<8>; def StMiscFrm : Format<9>; def LdStMulFrm : Format<10>; -def LdStExFrm : Format<28>; +def LdStExFrm : Format<11>; -def ArithMiscFrm : Format<11>; -def ExtFrm : Format<12>; +def ArithMiscFrm : Format<12>; +def ExtFrm : Format<13>; -def VFPUnaryFrm : Format<13>; -def VFPBinaryFrm : Format<14>; -def VFPConv1Frm : Format<15>; -def VFPConv2Frm : Format<16>; -def VFPConv3Frm : Format<17>; -def VFPConv4Frm : Format<18>; -def VFPConv5Frm : Format<19>; -def VFPLdStFrm : Format<20>; -def VFPLdStMulFrm : Format<21>; -def VFPMiscFrm : Format<22>; +def VFPUnaryFrm : Format<14>; +def VFPBinaryFrm : Format<15>; +def VFPConv1Frm : Format<16>; +def VFPConv2Frm : Format<17>; +def VFPConv3Frm : Format<18>; +def VFPConv4Frm : Format<19>; +def VFPConv5Frm : Format<20>; +def VFPLdStFrm : Format<21>; +def VFPLdStMulFrm : Format<22>; +def VFPMiscFrm : Format<23>; -def ThumbFrm : Format<23>; +def ThumbFrm : Format<24>; -def NEONFrm : Format<24>; -def NEONGetLnFrm : Format<25>; -def NEONSetLnFrm : Format<26>; -def NEONDupFrm : Format<27>; +def NEONFrm : Format<25>; +def NEONGetLnFrm : Format<26>; +def NEONSetLnFrm : Format<27>; +def NEONDupFrm : Format<28>; def MiscFrm : Format<29>; def ThumbMiscFrm : Format<30>; +def NLdStFrm : Format<31>; + // Misc flags. // the instruction has a Rn register operand. @@ -71,7 +73,7 @@ class UnaryDP { bit isUnaryDataProc = 1; } class Xform16Bit { bit canXformTo16Bit = 1; } //===----------------------------------------------------------------------===// -// ARM Instruction flags. These need to match ARMInstrInfo.h. +// ARM Instruction flags. These need to match ARMBaseInstrInfo.h. // // Addressing mode. @@ -183,7 +185,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, bits<2> IndexModeBits = IM.Value; Format F = f; - bits<5> Form = F.Value; + bits<6> Form = F.Value; Domain D = d; bits<2> Dom = D.Value; @@ -229,7 +231,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; @@ -257,7 +259,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; @@ -1007,8 +1009,8 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { - let OutOperandList = !con(oops, (ops s_cc_out:$s)); - let InOperandList = !con(iops, (ops pred:$p)); + let OutOperandList = !con(oops, (outs s_cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; @@ -1030,7 +1032,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; @@ -1109,7 +1111,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1125,7 +1127,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1209,7 +1211,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1265,7 +1267,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; @@ -1464,11 +1466,12 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, // ARM NEON Instruction templates. // -class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { +class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, + InstrItinClass itin, string opc, string dt, string asm, string cstr, + list<dag> pattern> + : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat( !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), !strconcat("\t", asm)); @@ -1481,7 +1484,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; @@ -1502,8 +1505,8 @@ class NI4<dag oops, dag iops, InstrItinClass itin, string opc, class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr, - pattern> { + : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm, + cstr, pattern> { let Inst{31-24} = 0b11110100; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1513,7 +1516,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, class NDataI<dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm, + : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{31-25} = 0b1111001; } @@ -1621,7 +1624,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, let Inst{4} = 1; let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat( !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), !strconcat("\t", asm)); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3fc37da..26a2806 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -62,11 +62,14 @@ def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; @@ -282,7 +285,7 @@ def pclabel : Operand<i32> { // shifter_operand operands: so_reg and so_imm. def so_reg : Operand<i32>, // reg reg imm - ComplexPattern<i32, 3, "SelectShifterOperandReg", + ComplexPattern<i32, 3, "SelectShifterOperandReg", [shl,srl,sra,rotr]> { let PrintMethod = "printSORegOperand"; let MIOperandInfo = (ops GPR, GPR, i32imm); @@ -392,9 +395,14 @@ def addrmode5 : Operand<i32>, // addrmode6 := reg with optional writeback // def addrmode6 : Operand<i32>, - ComplexPattern<i32, 4, "SelectAddrMode6", []> { + ComplexPattern<i32, 2, "SelectAddrMode6", []> { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); + let MIOperandInfo = (ops GPR:$addr, i32imm); +} + +def am6offset : Operand<i32> { + let PrintMethod = "printAddrMode6OffsetOperand"; + let MIOperandInfo = (ops GPR); } // addrmodepc := pc + reg @@ -909,7 +917,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, LdStMulFrm, IIC_Br, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; // On non-Darwin platforms R9 is callee-saved. @@ -1354,7 +1362,7 @@ def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, LdStMulFrm, IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; } // mayLoad, hasExtraDefRegAllocReq @@ -1367,7 +1375,7 @@ def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, LdStMulFrm, IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $srcs", + "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>; } // mayStore, hasExtraSrcRegAllocReq diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8fee6fa..c977cc3 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -138,214 +138,360 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, } // VLD1 : Vector Load (multiple single elements) -class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> +class VLD1D<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, Dt, "\\{$dst\\}, $addr", "", - [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> + "vld1", Dt, "\\{$dst\\}, $addr", "", + [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; +class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, Dt, "${dst:dregpair}, $addr", "", - [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; + "vld1", Dt, "${dst:dregpair}, $addr", "", + [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; + +def VLD1d8 : VLD1D<0b0000, "8", v8i8>; +def VLD1d16 : VLD1D<0b0100, "16", v4i16>; +def VLD1d32 : VLD1D<0b1000, "32", v2i32>; +def VLD1df : VLD1D<0b1000, "32", v2f32>; +def VLD1d64 : VLD1D<0b1100, "64", v1i64>; + +def VLD1q8 : VLD1Q<0b0000, "8", v16i8>; +def VLD1q16 : VLD1Q<0b0100, "16", v8i16>; +def VLD1q32 : VLD1Q<0b1000, "32", v4i32>; +def VLD1qf : VLD1Q<0b1000, "32", v4f32>; +def VLD1q64 : VLD1Q<0b1100, "64", v2i64>; + +let mayLoad = 1 in { + +// ...with address register writeback: +class VLD1DWB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "\\{$dst\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VLD1QWB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "${dst:dregpair}, $addr$offset", + "$addr.addr = $wb", []>; -def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>; -def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>; -def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>; -def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>; -def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>; +def VLD1d8_UPD : VLD1DWB<0b0000, "8">; +def VLD1d16_UPD : VLD1DWB<0b0100, "16">; +def VLD1d32_UPD : VLD1DWB<0b1000, "32">; +def VLD1d64_UPD : VLD1DWB<0b1100, "64">; -def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>; -def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>; -def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; -def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; -def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; +def VLD1q8_UPD : VLD1QWB<0b0000, "8">; +def VLD1q16_UPD : VLD1QWB<0b0100, "16">; +def VLD1q32_UPD : VLD1QWB<0b1000, "32">; +def VLD1q64_UPD : VLD1QWB<0b1100, "64">; +} // mayLoad = 1 + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // These (dreg triple/quadruple) are for disassembly only. -class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, +class VLD1D3<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt> +class VLD1D4<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -def VLD1d8T : VLD1D3<0b0000, "vld1", "8">; -def VLD1d16T : VLD1D3<0b0100, "vld1", "16">; -def VLD1d32T : VLD1D3<0b1000, "vld1", "32">; -//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">; - -def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">; -def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">; -def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">; -//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">; +def VLD1d8T : VLD1D3<0b0000, "8">; +def VLD1d16T : VLD1D3<0b0100, "16">; +def VLD1d32T : VLD1D3<0b1000, "32">; +// VLD1d64T : implemented as VLD3d64 + +def VLD1d8Q : VLD1D4<0b0000, "8">; +def VLD1d16Q : VLD1D4<0b0100, "16">; +def VLD1d32Q : VLD1D4<0b1000, "32">; +// VLD1d64Q : implemented as VLD4d64 + +// ...with address register writeback: +class VLD1D3WB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +class VLD1D4WB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0010,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; +def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; +def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; +// VLD1d64T_UPD : implemented as VLD3d64_UPD -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; +def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; +def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; +// VLD1d64Q_UPD : implemented as VLD4d64_UPD // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), +class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; -class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0011,op7_4, + "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; +class VLD2Q<bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, 0b0011, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "", []>; + "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -def VLD2d8 : VLD2D<0b0000, "vld2", "8">; -def VLD2d16 : VLD2D<0b0100, "vld2", "16">; -def VLD2d32 : VLD2D<0b1000, "vld2", "32">; +def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; +def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; +def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>; -def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; -def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; -def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; +def VLD2q8 : VLD2Q<0b0000, "8">; +def VLD2q16 : VLD2Q<0b0100, "16">; +def VLD2q32 : VLD2Q<0b1000, "32">; -// These (double-spaced dreg pair) are for disassembly only. -class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; +// ...with address register writeback: +class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VLD2QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, 0b0011, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; -def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">; -def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">; -def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">; +def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; +def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; +def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; +def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100, + (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset", + "$addr.addr = $wb", []>; + +def VLD2q8_UPD : VLD2QWB<0b0000, "8">; +def VLD2q16_UPD : VLD2QWB<0b0100, "16">; +def VLD2q32_UPD : VLD2QWB<0b1000, "32">; + +// ...with double-spaced registers (for disassembly only): +def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; +def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; +def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; +def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; +def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; +def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; // VLD3 : Vector Load (multiple 3-element structures) -class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), +class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", - "$addr.addr = $wb", []>; + "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -def VLD3d8 : VLD3D<0b0000, "vld3", "8">; -def VLD3d16 : VLD3D<0b0100, "vld3", "16">; -def VLD3d32 : VLD3D<0b1000, "vld3", "32">; +def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; +def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; +def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -// vld3 to double-spaced even registers. -def VLD3q8a : VLD3WB<0b0000, "vld3", "8">; -def VLD3q16a : VLD3WB<0b0100, "vld3", "16">; -def VLD3q32a : VLD3WB<0b1000, "vld3", "32">; +// ...with address register writeback: +class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, + "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vld3 to double-spaced odd registers. -def VLD3q8b : VLD3WB<0b0000, "vld3", "8">; -def VLD3q16b : VLD3WB<0b0100, "vld3", "16">; -def VLD3q32b : VLD3WB<0b1000, "vld3", "32">; +def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; +def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; +def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; +def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; +def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; +def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; +def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; +def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; +def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; +def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; // VLD4 : Vector Load (multiple 4-element structures) -class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0000,op7_4, +class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "", []>; -class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0001,op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "$addr.addr = $wb", []>; + "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -def VLD4d8 : VLD4D<0b0000, "vld4", "8">; -def VLD4d16 : VLD4D<0b0100, "vld4", "16">; -def VLD4d32 : VLD4D<0b1000, "vld4", "32">; +def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; +def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; +def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -// vld4 to double-spaced even registers. -def VLD4q8a : VLD4WB<0b0000, "vld4", "8">; -def VLD4q16a : VLD4WB<0b0100, "vld4", "16">; -def VLD4q32a : VLD4WB<0b1000, "vld4", "32">; +// ...with address register writeback: +class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, + "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vld4 to double-spaced odd registers. -def VLD4q8b : VLD4WB<0b0000, "vld4", "8">; -def VLD4q16b : VLD4WB<0b0100, "vld4", "16">; -def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; +def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; +def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; +def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, + GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; +def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; +def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; +def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; +def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; +def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; +def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; +def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2", []>; +class VLD2LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2", []>; + +def VLD2LNd8 : VLD2LN<0b0001, "8">; +def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; } +def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; } -// vld2 to single-spaced registers. -def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; } -def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; } +// ...with double-spaced registers: +def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } -// vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } -// vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } +// ...with address register writeback: +class VLD2LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, + "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", + "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; + +def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">; +def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; } +def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; } + +def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; } // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt, - "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; - -// vld3 to single-spaced registers. -def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; } -def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; } -def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; } - -// vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } - -// vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } +class VLD3LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3, "vld3", Dt, + "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + +def VLD3LNd8 : VLD3LN<0b0010, "8"> { let Inst{4} = 0; } +def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } + +// ...with double-spaced registers: +def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...alternate versions to be allocated odd register numbers: +def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...with address register writeback: +class VLD3LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VLD3, "vld3", Dt, + "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", + []>; + +def VLD3LNd8_UPD : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; } +def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } + +def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt, +class VLD4LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; -// vld4 to single-spaced registers. -def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; } -def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; } +def VLD4LNd8 : VLD4LN<0b0011, "8">; +def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; } +def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; } -// vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } +// ...with double-spaced registers: +def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } -// vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VLD4LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VLD4, "vld4", Dt, +"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", +"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", + []>; + +def VLD4LNd8_UPD : VLD4LNWB<0b0011, "8">; +def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; } +def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; } + +def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; } // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -355,213 +501,353 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } } // mayLoad = 1, hasExtraDefRegAllocReq = 1 // VST1 : Vector Store (multiple single elements) -class VST1D<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> +class VST1D<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - OpcodeStr, Dt, "\\{$src\\}, $addr", "", - [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> + "vst1", Dt, "\\{$src\\}, $addr", "", + [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>; +class VST1Q<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - OpcodeStr, Dt, "${src:dregpair}, $addr", "", - [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; + "vst1", Dt, "${src:dregpair}, $addr", "", + [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>; let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>; -def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>; -def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>; -def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>; -def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>; - -def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>; -def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>; -def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>; -def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; -def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; +def VST1d8 : VST1D<0b0000, "8", v8i8>; +def VST1d16 : VST1D<0b0100, "16", v4i16>; +def VST1d32 : VST1D<0b1000, "32", v2i32>; +def VST1df : VST1D<0b1000, "32", v2f32>; +def VST1d64 : VST1D<0b1100, "64", v1i64>; + +def VST1q8 : VST1Q<0b0000, "8", v16i8>; +def VST1q16 : VST1Q<0b0100, "16", v8i16>; +def VST1q32 : VST1Q<0b1000, "32", v4i32>; +def VST1qf : VST1Q<0b1000, "32", v4f32>; +def VST1q64 : VST1Q<0b1100, "64", v2i64>; } // hasExtraSrcRegAllocReq +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + +// ...with address register writeback: +class VST1DWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, + "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; +class VST1QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; + +def VST1d8_UPD : VST1DWB<0b0000, "8">; +def VST1d16_UPD : VST1DWB<0b0100, "16">; +def VST1d32_UPD : VST1DWB<0b1000, "32">; +def VST1d64_UPD : VST1DWB<0b1100, "64">; + +def VST1q8_UPD : VST1QWB<0b0000, "8">; +def VST1q16_UPD : VST1QWB<0b0100, "16">; +def VST1q32_UPD : VST1QWB<0b1000, "32">; +def VST1q64_UPD : VST1QWB<0b1100, "64">; + // These (dreg triple/quadruple) are for disassembly only. -class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt> +class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, - "\\{$src1, $src2, $src3\\}, $addr", "", + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt> +class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, - "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -def VST1d8T : VST1D3<0b0000, "vst1", "8">; -def VST1d16T : VST1D3<0b0100, "vst1", "16">; -def VST1d32T : VST1D3<0b1000, "vst1", "32">; -//def VST1d64T : VST1D3<0b1100, "vst1", "64">; - -def VST1d8Q : VST1D4<0b0000, "vst1", "8">; -def VST1d16Q : VST1D4<0b0100, "vst1", "16">; -def VST1d32Q : VST1D4<0b1000, "vst1", "32">; -//def VST1d64Q : VST1D4<0b1100, "vst1", "64">; +def VST1d8T : VST1D3<0b0000, "8">; +def VST1d16T : VST1D3<0b0100, "16">; +def VST1d32T : VST1D3<0b1000, "32">; +// VST1d64T : implemented as VST3d64 + +def VST1d8Q : VST1D4<0b0000, "8">; +def VST1d16Q : VST1D4<0b0100, "16">; +def VST1d32Q : VST1D4<0b1000, "32">; +// VST1d64Q : implemented as VST4d64 + +// ...with address register writeback: +class VST1D3WB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +class VST1D4WB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +def VST1d8T_UPD : VST1D3WB<0b0000, "8">; +def VST1d16T_UPD : VST1D3WB<0b0100, "16">; +def VST1d32T_UPD : VST1D3WB<0b1000, "32">; +// VST1d64T_UPD : implemented as VST3d64_UPD -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; +def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; +def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; +// VST1d64Q_UPD : implemented as VST4d64_UPD // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b1000,op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; -class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0011,op7_4, (outs), +class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), + IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; +class VST2Q<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0011, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -def VST2d8 : VST2D<0b0000, "vst2", "8">; -def VST2d16 : VST2D<0b0100, "vst2", "16">; -def VST2d32 : VST2D<0b1000, "vst2", "32">; +def VST2d8 : VST2D<0b1000, 0b0000, "8">; +def VST2d16 : VST2D<0b1000, 0b0100, "16">; +def VST2d32 : VST2D<0b1000, 0b1000, "32">; def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>; -def VST2q8 : VST2Q<0b0000, "vst2", "8">; -def VST2q16 : VST2Q<0b0100, "vst2", "16">; -def VST2q32 : VST2Q<0b1000, "vst2", "32">; +def VST2q8 : VST2Q<0b0000, "8">; +def VST2q16 : VST2Q<0b0100, "16">; +def VST2q32 : VST2Q<0b1000, "32">; -// These (double-spaced dreg pair) are for disassembly only. -class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0, 0b00, 0b1001, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; +// ...with address register writeback: +class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), + IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VST2QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; -def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">; -def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">; -def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">; +def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; +def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; +def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; +def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2), IIC_VST, + "vst1", "64", "\\{$src1, $src2\\}, $addr$offset", + "$addr.addr = $wb", []>; + +def VST2q8_UPD : VST2QWB<0b0000, "8">; +def VST2q16_UPD : VST2QWB<0b0100, "16">; +def VST2q32_UPD : VST2QWB<0b1000, "32">; + +// ...with double-spaced registers (for disassembly only): +def VST2b8 : VST2D<0b1001, 0b0000, "8">; +def VST2b16 : VST2D<0b1001, 0b0100, "16">; +def VST2b32 : VST2D<0b1001, 0b1000, "32">; +def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; +def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; +def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; // VST3 : Vector Store (multiple 3-element structures) -class VST3D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0100,op7_4, (outs), +class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; -class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", - "$addr.addr = $wb", []>; + "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; -def VST3d8 : VST3D<0b0000, "vst3", "8">; -def VST3d16 : VST3D<0b0100, "vst3", "16">; -def VST3d32 : VST3D<0b1000, "vst3", "32">; +def VST3d8 : VST3D<0b0100, 0b0000, "8">; +def VST3d16 : VST3D<0b0100, 0b0100, "16">; +def VST3d32 : VST3D<0b0100, 0b1000, "32">; def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>; -// vst3 to double-spaced even registers. -def VST3q8a : VST3WB<0b0000, "vst3", "8">; -def VST3q16a : VST3WB<0b0100, "vst3", "16">; -def VST3q32a : VST3WB<0b1000, "vst3", "32">; +// ...with address register writeback: +class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst3 to double-spaced odd registers. -def VST3q8b : VST3WB<0b0000, "vst3", "8">; -def VST3q16b : VST3WB<0b0100, "vst3", "16">; -def VST3q32b : VST3WB<0b1000, "vst3", "32">; +def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; +def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; +def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; +def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VST3q8 : VST3D<0b0101, 0b0000, "8">; +def VST3q16 : VST3D<0b0101, 0b0100, "16">; +def VST3q32 : VST3D<0b0101, 0b1000, "32">; +def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; +def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; +def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; +def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; +def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; // VST4 : Vector Store (multiple 4-element structures) -class VST4D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0000,op7_4, (outs), +class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", - "$addr.addr = $wb", []>; -def VST4d8 : VST4D<0b0000, "vst4", "8">; -def VST4d16 : VST4D<0b0100, "vst4", "16">; -def VST4d32 : VST4D<0b1000, "vst4", "32">; +def VST4d8 : VST4D<0b0000, 0b0000, "8">; +def VST4d16 : VST4D<0b0000, 0b0100, "16">; +def VST4d32 : VST4D<0b0000, 0b1000, "32">; def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -// vst4 to double-spaced even registers. -def VST4q8a : VST4WB<0b0000, "vst4", "8">; -def VST4q16a : VST4WB<0b0100, "vst4", "16">; -def VST4q32a : VST4WB<0b1000, "vst4", "32">; +// ...with address register writeback: +class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, + "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst4 to double-spaced odd registers. -def VST4q8b : VST4WB<0b0000, "vst4", "8">; -def VST4q16b : VST4WB<0b0100, "vst4", "16">; -def VST4q32b : VST4WB<0b1000, "vst4", "32">; +def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; +def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; +def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; +def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, + "vst1", "64", + "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VST4q8 : VST4D<0b0001, 0b0000, "8">; +def VST4q16 : VST4D<0b0001, 0b0100, "16">; +def VST4q32 : VST4D<0b0001, 0b1000, "32">; +def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; +def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; +def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; +def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; +def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST2LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", + IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", "", []>; -// vst2 to single-spaced registers. -def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; -def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; } -def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; } +def VST2LNd8 : VST2LN<0b0001, "8">; +def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; } +def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; } + +// ...with double-spaced registers: +def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; } -// vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VST2LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, + "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } +def VST2LNd8_UPD : VST2LNWB<0b0001, "8">; +def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; } +def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; } + +def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; } // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST3LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + nohash_imm:$lane), IIC_VST, "vst3", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; -// vst3 to single-spaced registers. -def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; } -def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; } -def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; } +def VST3LNd8 : VST3LN<0b0010, "8"> { let Inst{4} = 0; } +def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } + +// ...with double-spaced registers: +def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...alternate versions to be allocated odd register numbers: +def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...with address register writeback: +class VST3LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VST, "vst3", Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } +def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; } +def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } -// vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } +def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST4LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + nohash_imm:$lane), IIC_VST, "vst4", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", "", []>; -// vst4 to single-spaced registers. -def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; -def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; } -def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; } +def VST4LNd8 : VST4LN<0b0011, "8">; +def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; } +def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; } + +// ...with double-spaced registers: +def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...alternate versions to be allocated odd register numbers: +def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VST4LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VST, "vst4", Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } +def VST4LNd8_UPD : VST4LNWB<0b0011, "8">; +def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; } +def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; } -// vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } +def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; } } // mayStore = 1, hasExtraSrcRegAllocReq = 1 diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 37c9fc5..e3ca536 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -16,7 +16,8 @@ // def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); @@ -549,7 +550,7 @@ def tLDM : T1I<(outs), def tLDM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 } // mayLoad, hasExtraDefRegAllocReq @@ -558,7 +559,7 @@ let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def tSTM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $srcs", + "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189 diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ab9e926..262aae4 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1218,7 +1218,7 @@ def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", + "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; @@ -1244,7 +1244,7 @@ def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", + "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 4d1d48a..aca8230 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -25,8 +25,6 @@ def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; -def arm_f16tof32 : SDNode<"ARMISD::F16_TO_F32", SDT_ITOF>; -def arm_f32tof16 : SDNode<"ARMISD::F32_TO_F16", SDT_FTOI>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; @@ -94,7 +92,7 @@ def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "vldm${addr:submode}${p}\t${addr:base}!, $dsts", "$addr.base = $wb", []> { let Inst{20} = 1; } @@ -102,7 +100,7 @@ def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "vldm${addr:submode}${p}\t${addr:base}!, $dsts", "$addr.base = $wb", []> { let Inst{20} = 1; } @@ -124,7 +122,7 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "vstm${addr:submode}${p}\t${addr:base}!, $srcs", "$addr.base = $wb", []> { let Inst{20} = 0; } @@ -132,7 +130,7 @@ def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "vstm${addr:submode}${p}\t${addr:base}!, $srcs", "$addr.base = $wb", []> { let Inst{20} = 0; } @@ -259,11 +257,17 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", - [(set SPR:$dst, (f32 (arm_f32tof16 SPR:$a)))]>; + [/* For disassembly only; pattern left blank */]>; + +def : ARMPat<(f32_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", - [(set SPR:$dst, (arm_f16tof32 SPR:$a))]>; + [/* For disassembly only; pattern left blank */]>; + +def : ARMPat<(f16_to_f32 GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8fbcf45..bdbec30 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -253,7 +253,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) + .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) .addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) @@ -505,11 +505,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MI->getOperand(i).getReg() == Base) return false; } - assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())); Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); } else { // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. - assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())); Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); } @@ -573,11 +571,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, .addReg(Base, getKillRegState(BaseKill)); if (isAM4) { // [t2]LDM_UPD, [t2]STM_UPD - MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true)) + MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) .addImm(Pred).addReg(PredReg); } else { // VLDM[SD}_UPD, VSTM[SD]_UPD - MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset)) + MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) .addImm(Pred).addReg(PredReg); } // Transfer the rest of operands. @@ -709,7 +707,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Offset = 0; if (isAM5) Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, - true, (isDPR ? 2 : 1)); + (isDPR ? 2 : 1)); else if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); else @@ -1157,19 +1155,24 @@ namespace { }; } -/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op -/// (bx lr) into the preceeding stack restore so it directly restore the value -/// of LR into pc. -/// ldmfd sp!, {r7, lr} +/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops +/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it +/// directly restore the value of LR into pc. +/// ldmfd sp!, {..., lr} /// bx lr +/// or +/// ldmfd sp!, {..., lr} +/// mov pc, lr /// => -/// ldmfd sp!, {r7, pc} +/// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = prior(MBB.end()); if (MBBI != MBB.begin() && - (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { + (MBBI->getOpcode() == ARM::BX_RET || + MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::MOVPCLR)) { MachineInstr *PrevMI = prior(MBBI); if (PrevMI->getOpcode() == ARM::LDM_UPD || PrevMI->getOpcode() == ARM::t2LDM_UPD) { diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7233f5c..95f57b7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -21,7 +21,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 88e67e3..c32f16c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -23,6 +23,7 @@ #include "ARMISelLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/ADT/OwningPtr.h" namespace llvm { @@ -83,7 +84,8 @@ public: /// Thumb-1 and Thumb-2. /// class ThumbTargetMachine : public ARMBaseTargetMachine { - ARMBaseInstrInfo *InstrInfo; // either Thumb1InstrInfo or Thumb2InstrInfo + // Either Thumb1InstrInfo or Thumb2InstrInfo. + OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; public: @@ -100,7 +102,9 @@ public: } /// returns either Thumb1InstrInfo or Thumb2InstrInfo - virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; } + virtual const ARMBaseInstrInfo *getInstrInfo() const { + return InstrInfo.get(); + } virtual const TargetData *getTargetData() const { return &DataLayout; } }; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 4db14a3..4a7a1e4 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -101,6 +101,7 @@ namespace { void printAddrMode5Operand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MachineInstr *MI, int OpNum); + void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum); void printAddrModePCOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); @@ -431,16 +432,16 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. O << ", #" - << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) << ARM_AM::getAM2Offset(MO3.getImm()); O << "]"; return; } O << ", " - << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) << getRegisterName(MO2.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) @@ -458,12 +459,12 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); O << "#" - << (char)ARM_AM::getAM2Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; return; } - O << (char)ARM_AM::getAM2Op(MO2.getImm()) + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << getRegisterName(MO1.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) @@ -490,7 +491,7 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) { if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) O << ", #" - << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) << ImmOffs; O << "]"; } @@ -508,35 +509,22 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); O << "#" - << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; } void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, const char *Modifier) { - const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Modifier && strcmp(Modifier, "submode") == 0) { - if (MO1.getReg() == ARM::SP) { - // FIXME - bool isLDM = (MI->getOpcode() == ARM::LDM || - MI->getOpcode() == ARM::LDM_UPD || - MI->getOpcode() == ARM::LDM_RET || - MI->getOpcode() == ARM::t2LDM || - MI->getOpcode() == ARM::t2LDM_UPD || - MI->getOpcode() == ARM::t2LDM_RET); - O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); } else if (Modifier && strcmp(Modifier, "wide") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Mode == ARM_AM::ia) O << ".w"; } else { printOperand(MI, Op); - if (ARM_AM::getAM4WBFlag(MO2.getImm())) - O << "!"; } } @@ -559,8 +547,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. O << getRegisterName(MO1.getReg()); - if (ARM_AM::getAM5WBFlag(MO2.getImm())) - O << "!"; return; } @@ -568,7 +554,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { O << ", #" - << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs*4; } O << "]"; @@ -577,22 +563,21 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - const MachineOperand &MO4 = MI->getOperand(Op+3); O << "[" << getRegisterName(MO1.getReg()); - if (MO4.getImm()) { + if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO4.getImm(); + O << ", :" << MO2.getImm(); } O << "]"; +} - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << "!"; - else - O << ", " << getRegisterName(MO2.getReg()); - } +void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO = MI->getOperand(Op); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, @@ -604,7 +589,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, const MachineOperand &MO1 = MI->getOperand(Op); assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << "[pc, +" << getRegisterName(MO1.getReg()) << "]"; + O << "[pc, " << getRegisterName(MO1.getReg()) << "]"; } void @@ -627,10 +612,11 @@ void ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) { // (3 - the number of trailing zeros) is the number of then / else. unsigned Mask = MI->getOperand(Op).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; unsigned NumTZ = CountTrailingZeros_32(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { - bool T = (Mask & (1 << Pos)) == 0; + bool T = ((Mask >> Pos) & 1) == CondBit0; if (T) O << 't'; else @@ -662,7 +648,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, if (MO3.getReg()) O << ", " << getRegisterName(MO3.getReg()); else if (unsigned ImmOffs = MO2.getImm()) - O << ", #+" << ImmOffs * Scale; + O << ", #" << ImmOffs * Scale; O << "]"; } @@ -684,7 +670,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { const MachineOperand &MO2 = MI->getOperand(Op+1); O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) - O << ", #+" << ImmOffs*4; + O << ", #" << ImmOffs*4; O << "]"; } @@ -720,7 +706,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, unsigned OffImm = MO2.getImm(); if (OffImm) // Don't print +0. - O << ", #+" << OffImm; + O << ", #" << OffImm; O << "]"; } @@ -736,7 +722,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI, if (OffImm < 0) O << ", #-" << -OffImm; else if (OffImm > 0) - O << ", #+" << OffImm; + O << ", #" << OffImm; O << "]"; } @@ -752,7 +738,7 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI, if (OffImm < 0) O << ", #-" << -OffImm * 4; else if (OffImm > 0) - O << ", #+" << OffImm * 4; + O << ", #" << OffImm * 4; O << "]"; } @@ -764,7 +750,7 @@ void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, if (OffImm < 0) O << "#-" << -OffImm; else if (OffImm > 0) - O << "#+" << OffImm; + O << "#" << OffImm; } void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index a2084b0..30763a9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -28,7 +28,159 @@ using namespace llvm; #undef MachineInstr #undef ARMAsmPrinter -void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +static unsigned NextReg(unsigned Reg) { + switch (Reg) { + default: + assert(0 && "Unexpected register enum"); + + case ARM::D0: + return ARM::D1; + case ARM::D1: + return ARM::D2; + case ARM::D2: + return ARM::D3; + case ARM::D3: + return ARM::D4; + case ARM::D4: + return ARM::D5; + case ARM::D5: + return ARM::D6; + case ARM::D6: + return ARM::D7; + case ARM::D7: + return ARM::D8; + case ARM::D8: + return ARM::D9; + case ARM::D9: + return ARM::D10; + case ARM::D10: + return ARM::D11; + case ARM::D11: + return ARM::D12; + case ARM::D12: + return ARM::D13; + case ARM::D13: + return ARM::D14; + case ARM::D14: + return ARM::D15; + case ARM::D15: + return ARM::D16; + case ARM::D16: + return ARM::D17; + case ARM::D17: + return ARM::D18; + case ARM::D18: + return ARM::D19; + case ARM::D19: + return ARM::D20; + case ARM::D20: + return ARM::D21; + case ARM::D21: + return ARM::D22; + case ARM::D22: + return ARM::D23; + case ARM::D23: + return ARM::D24; + case ARM::D24: + return ARM::D25; + case ARM::D25: + return ARM::D26; + case ARM::D26: + return ARM::D27; + case ARM::D27: + return ARM::D28; + case ARM::D28: + return ARM::D29; + case ARM::D29: + return ARM::D30; + case ARM::D30: + return ARM::D31; + } +} + +void ARMInstPrinter::printInst(const MCInst *MI) { + // Check for MOVs and print canonical forms, instead. + if (MI->getOpcode() == ARM::MOVs) { + const MCOperand &Dst = MI->getOperand(0); + const MCOperand &MO1 = MI->getOperand(1); + const MCOperand &MO2 = MI->getOperand(2); + const MCOperand &MO3 = MI->getOperand(3); + + O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())); + printSBitModifierOperand(MI, 6); + printPredicateOperand(MI, 4); + + O << '\t' << getRegisterName(Dst.getReg()) + << ", " << getRegisterName(MO1.getReg()); + + if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx) + return; + + O << ", "; + + if (MO2.getReg()) { + O << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } + return; + } + + // A8.6.123 PUSH + if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { + O << '\t' << "push"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.122 POP + if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { + O << '\t' << "pop"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.355 VPUSH + if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) { + O << '\t' << "vpush"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.354 VPOP + if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) { + O << '\t' << "vpop"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + printInstruction(MI); + } void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const char *Modifier) { @@ -36,6 +188,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { unsigned Reg = Op.getReg(); if (Modifier && strcmp(Modifier, "dregpair") == 0) { + O << '{' << getRegisterName(Reg) << ", " + << getRegisterName(NextReg(Reg)) << '}'; +#if 0 // FIXME: Breaks e.g. ARM/vmul.ll. assert(0); /* @@ -44,6 +199,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}';*/ +#endif } else if (Modifier && strcmp(Modifier, "lane") == 0) { assert(0); /* @@ -56,7 +212,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << getRegisterName(Reg); } } else if (Op.isImm()) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert((Modifier && !strcmp(Modifier, "call")) || + ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); O << '#' << Op.getImm(); } else { assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); @@ -142,17 +299,17 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) { O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. O << ", #" - << (char)ARM_AM::getAM2Op(MO3.getImm()) - << ARM_AM::getAM2Offset(MO3.getImm()); + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ARM_AM::getAM2Offset(MO3.getImm()); O << "]"; return; } O << ", " - << (char)ARM_AM::getAM2Op(MO3.getImm()) - << getRegisterName(MO2.getReg()); + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << getRegisterName(MO2.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) O << ", " @@ -169,11 +326,14 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); - O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs; + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << ImmOffs; return; } - O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg()); + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << getRegisterName(MO1.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) O << ", " @@ -196,8 +356,8 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) { if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) O << ", #" - << (char)ARM_AM::getAM3Op(MO3.getImm()) - << ImmOffs; + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ImmOffs; O << ']'; } @@ -214,35 +374,24 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); - O << "#" - << (char)ARM_AM::getAM3Op(MO2.getImm()) - << ImmOffs; + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) + << ImmOffs; } void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum, const char *Modifier) { - const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Modifier && strcmp(Modifier, "submode") == 0) { - if (MO1.getReg() == ARM::SP) { - // FIXME - bool isLDM = (MI->getOpcode() == ARM::LDM || - MI->getOpcode() == ARM::LDM_RET || - MI->getOpcode() == ARM::t2LDM || - MI->getOpcode() == ARM::t2LDM_RET); - O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); } else if (Modifier && strcmp(Modifier, "wide") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Mode == ARM_AM::ia) O << ".w"; } else { printOperand(MI, OpNum); - if (ARM_AM::getAM4WBFlag(MO2.getImm())) - O << "!"; } } @@ -263,8 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. O << getRegisterName(MO1.getReg()); - if (ARM_AM::getAM5WBFlag(MO2.getImm())) - O << "!"; return; } @@ -272,7 +419,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { O << ", #" - << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs*4; } O << "]"; @@ -281,17 +428,22 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) { const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); - // FIXME: No support yet for specifying alignment. - O << '[' << getRegisterName(MO1.getReg()) << ']'; - - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << '!'; - else - O << ", " << getRegisterName(MO2.getReg()); + O << "[" << getRegisterName(MO1.getReg()); + if (MO2.getImm()) { + // FIXME: Both darwin as and GNU as violate ARM docs here. + O << ", :" << MO2.getImm(); } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, @@ -311,14 +463,56 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) { O << "{"; - // Always skip the first operand, it's the optional (and implicit writeback). - for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) { - if (i != OpNum+1) O << ", "; + for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { + if (i != OpNum) O << ", "; O << getRegisterName(MI->getOperand(i).getReg()); } O << "}"; } +void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned option = Op.getImm(); + unsigned mode = option & 31; + bool changemode = option >> 5 & 1; + unsigned AIF = option >> 6 & 7; + unsigned imod = option >> 9 & 3; + if (imod == 2) + O << "ie"; + else if (imod == 3) + O << "id"; + O << '\t'; + if (imod > 1) { + if (AIF & 4) O << 'a'; + if (AIF & 2) O << 'i'; + if (AIF & 1) O << 'f'; + if (AIF > 0 && changemode) O << ", "; + } + if (changemode) + O << '#' << mode; +} + +void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned Mask = Op.getImm(); + if (Mask) { + O << '_'; + if (Mask & 8) O << 'f'; + if (Mask & 4) O << 's'; + if (Mask & 2) O << 'x'; + if (Mask & 1) O << 'c'; + } +} + +void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum){ + const MCOperand &Op = MI->getOperand(OpNum); + O << '#'; + if (Op.getImm() < 0) + O << '-' << (-Op.getImm() - 1); + else + O << Op.getImm(); +} + void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); if (CC != ARMCC::AL) @@ -360,3 +554,191 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) { O << "#" << MI->getOperand(OpNum).getImm() * 4; } + +void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum) { + // (3 - the number of trailing zeros) is the number of then / else. + unsigned Mask = MI->getOperand(OpNum).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { + bool T = ((Mask >> Pos) & 1) == CondBit0; + if (T) + O << 't'; + else + O << 'e'; + } +} + +void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op) +{ + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()); + O << ", " << getRegisterName(MO2.getReg()) << "]"; +} + +void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op, + unsigned Scale) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + if (MO3.getReg()) + O << ", " << getRegisterName(MO3.getReg()); + else if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs * Scale; + O << "]"; +} + +void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 1); +} + +void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 2); +} + +void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 4); +} + +void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI,unsigned Op) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()); + if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs*4; + O << "]"; +} + +void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum) { + O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg()); + if (MI->getOpcode() == ARM::t2TBH) + O << ", lsl #1"; + O << ']'; +} + +// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 +// register with shift forms. +// REG 0 0 - e.g. R5 +// REG IMM, SH_OPC - e.g. R5, LSL #3 +void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + unsigned Reg = MO1.getReg(); + O << getRegisterName(Reg); + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) + << " "; + + assert(MO2.isImm() && "Not a valid t2_so_reg value!"); + O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); +} + +void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + unsigned OffImm = MO2.getImm(); + if (OffImm) // Don't print +0. + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm(); + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm * 4; + else if (OffImm > 0) + O << ", #" << OffImm * 4; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm(); + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm; + else if (OffImm > 0) + O << "#" << OffImm; +} + +void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm * 4; + else if (OffImm > 0) + O << "#" << OffImm * 4; +} + +void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << "[" << getRegisterName(MO1.getReg()); + + assert(MO2.getReg() && "Invalid so_reg load / store address!"); + O << ", " << getRegisterName(MO2.getReg()); + + unsigned ShAmt = MO3.getImm(); + if (ShAmt) { + assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); + O << ", lsl #" << ShAmt; + } + O << "]"; +} + +void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum) { + O << '#' << MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum) { + O << '#' << MI->getOperand(OpNum).getImm(); +} + diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 9a3cbc3..d41b5df 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -48,32 +48,33 @@ public: void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum); void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum); - void printThumbITMask(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {} + void printThumbITMask(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum); void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, - unsigned Scale) {} - void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {} + unsigned Scale); + void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum); - void printT2SOOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {} + void printT2SOOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum); - void printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {} - void printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {} - void printNegZeroOperand(const MCInst *MI, unsigned OpNum) {} + void printCPSOptionOperand(const MCInst *MI, unsigned OpNum); + void printMSRMaskOperand(const MCInst *MI, unsigned OpNum); + void printNegZeroOperand(const MCInst *MI, unsigned OpNum); void printPredicateOperand(const MCInst *MI, unsigned OpNum); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum); void printSBitModifierOperand(const MCInst *MI, unsigned OpNum); @@ -82,10 +83,10 @@ public: const char *Modifier); void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {} void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {} - void printTBAddrMode(const MCInst *MI, unsigned OpNum) {} + void printTBAddrMode(const MCInst *MI, unsigned OpNum); void printNoHashImmediate(const MCInst *MI, unsigned OpNum); - void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {} - void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {} + void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum); + void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum); void printHex8ImmOperand(const MCInst *MI, int OpNum) {} void printHex16ImmOperand(const MCInst *MI, int OpNum) {} void printHex32ImmOperand(const MCInst *MI, int OpNum) {} diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index d9942c8..c36fe63 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -64,16 +64,16 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD2LNq16a: - case ARM::VLD2LNq32a: + case ARM::VLD2LNq16: + case ARM::VLD2LNq32: FirstOpnd = 0; NumRegs = 2; Offset = 0; Stride = 2; return true; - case ARM::VLD2LNq16b: - case ARM::VLD2LNq32b: + case ARM::VLD2LNq16odd: + case ARM::VLD2LNq32odd: FirstOpnd = 0; NumRegs = 2; Offset = 1; @@ -91,34 +91,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VLD3q8a: - case ARM::VLD3q16a: - case ARM::VLD3q32a: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: FirstOpnd = 0; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VLD3q8b: - case ARM::VLD3q16b: - case ARM::VLD3q32b: + case ARM::VLD3q8odd_UPD: + case ARM::VLD3q16odd_UPD: + case ARM::VLD3q32odd_UPD: FirstOpnd = 0; NumRegs = 3; Offset = 1; Stride = 2; return true; - case ARM::VLD3LNq16a: - case ARM::VLD3LNq32a: + case ARM::VLD3LNq16: + case ARM::VLD3LNq32: FirstOpnd = 0; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VLD3LNq16b: - case ARM::VLD3LNq32b: + case ARM::VLD3LNq16odd: + case ARM::VLD3LNq32odd: FirstOpnd = 0; NumRegs = 3; Offset = 1; @@ -136,34 +136,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD4q8a: - case ARM::VLD4q16a: - case ARM::VLD4q32a: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: FirstOpnd = 0; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VLD4q8b: - case ARM::VLD4q16b: - case ARM::VLD4q32b: + case ARM::VLD4q8odd_UPD: + case ARM::VLD4q16odd_UPD: + case ARM::VLD4q32odd_UPD: FirstOpnd = 0; NumRegs = 4; Offset = 1; Stride = 2; return true; - case ARM::VLD4LNq16a: - case ARM::VLD4LNq32a: + case ARM::VLD4LNq16: + case ARM::VLD4LNq32: FirstOpnd = 0; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VLD4LNq16b: - case ARM::VLD4LNq32b: + case ARM::VLD4LNq16odd: + case ARM::VLD4LNq32odd: FirstOpnd = 0; NumRegs = 4; Offset = 1; @@ -177,28 +177,28 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; - case ARM::VST2LNq16a: - case ARM::VST2LNq32a: - FirstOpnd = 4; + case ARM::VST2LNq16: + case ARM::VST2LNq32: + FirstOpnd = 2; NumRegs = 2; Offset = 0; Stride = 2; return true; - case ARM::VST2LNq16b: - case ARM::VST2LNq32b: - FirstOpnd = 4; + case ARM::VST2LNq16odd: + case ARM::VST2LNq32odd: + FirstOpnd = 2; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,39 +211,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 3; return true; - case ARM::VST3q8a: - case ARM::VST3q16a: - case ARM::VST3q32a: - FirstOpnd = 5; + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VST3q8b: - case ARM::VST3q16b: - case ARM::VST3q32b: - FirstOpnd = 5; + case ARM::VST3q8odd_UPD: + case ARM::VST3q16odd_UPD: + case ARM::VST3q32odd_UPD: + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; return true; - case ARM::VST3LNq16a: - case ARM::VST3LNq32a: - FirstOpnd = 4; + case ARM::VST3LNq16: + case ARM::VST3LNq32: + FirstOpnd = 2; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VST3LNq16b: - case ARM::VST3LNq32b: - FirstOpnd = 4; + case ARM::VST3LNq16odd: + case ARM::VST3LNq32odd: + FirstOpnd = 2; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,39 +256,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; - case ARM::VST4q8a: - case ARM::VST4q16a: - case ARM::VST4q32a: - FirstOpnd = 5; + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VST4q8b: - case ARM::VST4q16b: - case ARM::VST4q32b: - FirstOpnd = 5; + case ARM::VST4q8odd_UPD: + case ARM::VST4q16odd_UPD: + case ARM::VST4q32odd_UPD: + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2; return true; - case ARM::VST4LNq16a: - case ARM::VST4LNq32a: - FirstOpnd = 4; + case ARM::VST4LNq16: + case ARM::VST4LNq32: + FirstOpnd = 2; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VST4LNq16b: - case ARM::VST4LNq32b: - FirstOpnd = 4; + case ARM::VST4LNq16odd: + case ARM::VST4LNq32odd: + FirstOpnd = 2; NumRegs = 4; Offset = 1; Stride = 2; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index f5ba155..f36d4ef 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -78,14 +78,16 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { DebugLoc ndl = NMI->getDebugLoc(); unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NCC == OCC) { - Mask |= (1 << Pos); - } else if (NCC != CC) + if (NCC == CC || NCC == OCC) + Mask |= (NCC & 1) << Pos; + else break; --Pos; ++MBBI; } Mask |= (1 << Pos); + // Tag along (firstcond[0] << 4) with the mask. + Mask |= (CC & 1) << 4; MIB.addImm(Mask); Modified = true; ++NumITs; diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td index 6d82875..d984556 100644 --- a/lib/Target/Alpha/AlphaInstrFormats.td +++ b/lib/Target/Alpha/AlphaInstrFormats.td @@ -56,16 +56,16 @@ class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { bits<5> Ra; - let OutOperandList = (ops GPRC:$RA); - let InOperandList = (ops); + let OutOperandList = (outs GPRC:$RA); + let InOperandList = (ins); let Inst{25-21} = Ra; let Inst{20-16} = 0; let Inst{15-0} = fc; } class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { - let OutOperandList = (ops); - let InOperandList = (ops); + let OutOperandList = (outs); + let InOperandList = (ins); let Inst{25-21} = 0; let Inst{20-16} = 0; let Inst{15-0} = fc; @@ -77,7 +77,7 @@ class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass bits<5> Rb; bits<14> disp; - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; let Inst{25-21} = Ra; @@ -92,7 +92,7 @@ class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> patt bits<5> Rb; bits<14> disp; - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; let Inst{25-21} = Ra; @@ -107,7 +107,7 @@ def target : Operand<OtherVT> {} let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; bits<64> Opc; //dummy bits<5> Ra; @@ -122,8 +122,8 @@ let isBranch = 1, isTerminator = 1 in class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { let Pattern = pattern; - let OutOperandList = (ops); - let InOperandList = (ops target:$DISP); + let OutOperandList = (outs); + let InOperandList = (ins target:$DISP); bits<5> Ra; bits<21> disp; @@ -250,7 +250,7 @@ class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, Ins //3.3.5 class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; bits<26> Function; diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 91e58ce..d5d5e02 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -392,12 +392,12 @@ def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0 let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { - def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine - def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine + def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine + def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine } let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in -def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0", +def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", [(brind GPRC:$RS)], s_jsr>; //Jump let isCall = 1, Ra = 26, @@ -414,18 +414,18 @@ let isCall = 1, Ra = 26, Rb = 27, disp = 0, F0, F1, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in { - def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine + def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine } let isCall = 1, Ra = 23, Rb = 27, disp = 0, Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in - def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem + def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem -def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return +def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return -let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)", [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow", @@ -445,7 +445,7 @@ def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow", } -let OutOperandList = (ops), InOperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in { def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)", [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow", @@ -465,7 +465,7 @@ def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow", } //Load address -let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)", [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>; def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow", @@ -476,25 +476,25 @@ def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh", [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high } -let OutOperandList = (ops), InOperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in { def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)", [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow", [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; } -let OutOperandList = (ops F4RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)", [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow", [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>; } -let OutOperandList = (ops), InOperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in { def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)", [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow", [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; } -let OutOperandList = (ops F8RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)", [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow", @@ -570,15 +570,15 @@ def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr), //load address, rellocated gpdist form -let OutOperandList = (ops GPRC:$RA), - InOperandList = (ops s16imm:$DISP, GPRC:$RB, s16imm:$NUM), +let OutOperandList = (outs GPRC:$RA), + InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM), mayLoad = 1 in { def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address } //Load quad, rellocated literal form -let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in +let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal", [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>; def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB), @@ -591,8 +591,8 @@ let OutOperandList = (outs GPRC:$RR), def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>; def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>; } -let OutOperandList = (ops GPRC:$RA), - InOperandList = (ops s64imm:$DISP, GPRC:$RB), +let OutOperandList = (outs GPRC:$RA), + InOperandList = (ins s64imm:$DISP, GPRC:$RB), mayLoad = 1 in { def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>; def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>; @@ -611,11 +611,11 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)), //Floats -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC", [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>; -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RA, F4RC:$RB) in { +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in { def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC", [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>; def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC", @@ -634,11 +634,11 @@ def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", //Doubles -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC", [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RA, F8RC:$RB) in { +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in { def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC", [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>; def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC", @@ -665,13 +665,13 @@ def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>; } //More CPYS forms: -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RA, F8RC:$RB) in { +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in { def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>; def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>; } -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RA, F4RC:$RB) in { +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in { def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>; def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent @@ -680,7 +680,7 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", } //conditional moves, floats -let OutOperandList = (ops F4RC:$RDEST), InOperandList = (ops F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), +let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), isTwoAddress = 1 in { def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero @@ -690,7 +690,7 @@ def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero } //conditional moves, doubles -let OutOperandList = (ops F8RC:$RDEST), InOperandList = (ops F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), +let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), isTwoAddress = 1 in { def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>; @@ -790,33 +790,33 @@ def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), -let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in +let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC", [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating -let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in +let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC", [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC", [(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC", [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC", [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC", [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC", [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC", [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>; -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC", [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>; @@ -829,20 +829,20 @@ def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf), //Branching ///////////////////////////////////////////////////////// class br_icc<bits<6> opc, string asmstr> - : BFormN<opc, (ops u64imm:$opc, GPRC:$R, target:$dst), + : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst), !strconcat(asmstr, " $R,$dst"), s_icbr>; class br_fcc<bits<6> opc, string asmstr> - : BFormN<opc, (ops u64imm:$opc, F8RC:$R, target:$dst), + : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst), !strconcat(asmstr, " $R,$dst"), s_fbr>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { let Ra = 31 in def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>; -def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst), +def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", s_icbr>; -def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst), +def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst", s_fbr>; //Branches, int diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 88ff85f..e3c3993 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -29,7 +29,8 @@ def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd, def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; @@ -610,7 +611,7 @@ def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb), def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc), "$dst = $cc;", - [(set D:$dst, (zext JustCC:$cc))]>; + [/*(set D:$dst, (zext JustCC:$cc))*/]>; def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc), "$dst = cc;", []>; @@ -859,10 +860,10 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)), def : Pat<(BfinCall (i32 texternalsym:$dst)), (CALLa texternalsym:$dst)>; -def : Pat<(sext JustCC:$cc), - (NEG (MOVECC_zext JustCC:$cc))>; -def : Pat<(anyext JustCC:$cc), - (MOVECC_zext JustCC:$cc)>; +//def : Pat<(sext JustCC:$cc), +// (NEG (MOVECC_zext JustCC:$cc))>; +//def : Pat<(anyext JustCC:$cc), +// (MOVECC_zext JustCC:$cc)>; def : Pat<(i16 (zext JustCC:$cc)), (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>; def : Pat<(i16 (sext JustCC:$cc)), diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp index ea9480d..34a8d38 100644 --- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -53,6 +53,10 @@ std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, unsigned BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const { + if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l' + || Name[2] != 'v' || Name[3] != 'm') + return 0; // All intrinsics start with 'llvm.' + #define GET_FUNCTION_RECOGNIZER #include "BlackfinGenIntrinsics.inc" #undef GET_FUNCTION_RECOGNIZER diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index b1ba0d2..0c265ad 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -111,7 +111,8 @@ namespace { static char ID; explicit CWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) { + TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0), + NextAnonValueNumber(0) { FPCounter = 0; } @@ -147,6 +148,8 @@ namespace { delete IL; delete TD; delete Mang; + delete TCtx; + delete TAsm; FPConstantMap.clear(); TypeNames.clear(); ByValParams.clear(); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index ad12604..5068f77 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1133,16 +1133,14 @@ class XSBHInst<dag OOL, dag IOL, list<dag> pattern>: "xsbh\t$rDst, $rSrc", IntegerOp, pattern>; -class XSBHVecInst<ValueType vectype>: - XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>; - class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>: XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), pattern>; multiclass ExtendByteHalfword { - def v16i8: XSBHVecInst<v8i16>; + def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), + [ + /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>; def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), [(set R16C:$rDst, (sext R8C:$rSrc))]>; def r16: XSBHInRegInst<R16C, @@ -1200,8 +1198,8 @@ class XSWDInst<dag OOL, dag IOL, list<dag> pattern>: class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>: XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [(set (out_vectype VECREG:$rDst), - (sext (out_vectype VECREG:$rSrc)))]>; + [/*(set (out_vectype VECREG:$rDst), + (sext (out_vectype VECREG:$rSrc)))*/]>; class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>: XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc), @@ -4146,7 +4144,7 @@ def CFSif32 : def FESDvec : RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA), "fesd\t$rT, $rA", SPrecFP, - [(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))]>; + [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>; def FESDf32 : RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA), diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 8507861..846c7ed 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -28,7 +28,8 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; // Operand type constraints for vector shuffle/permute operations def SDT_SPUshuffle : SDTypeProfile<1, 3, [ diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index c8faffc..4931860 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -57,6 +57,10 @@ std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, unsigned MBlazeIntrinsicInfo:: lookupName(const char *Name, unsigned Len) const { + if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l' + || Name[2] != 'v' || Name[3] != 'm') + return 0; // All intrinsics start with 'llvm.' + #define GET_FUNCTION_RECOGNIZER #include "MBlazeGenIntrinsics.inc" #undef GET_FUNCTION_RECOGNIZER diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index cef3697..2b9e941 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -29,7 +29,8 @@ def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, + SDNPVariadic]>; // Hi and Lo nodes are used to handle global addresses. Used on // MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index b6eceb3..1001d29 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -184,7 +184,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { // by any chance, as we do not link in those as .bc lib. So these calls // are always external and it is safe to emit an extern. if (PAN::isMemIntrinsic(Sym->getName())) - LibcallDecls.push_back(createESName(Sym->getName())); + LibcallDecls.insert(Sym->getName()); O << *Sym; break; @@ -199,7 +199,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { Printname = PAN::Rename(Sname); } // Record these decls, we need to print them in asm as extern. - LibcallDecls.push_back(createESName(Printname)); + LibcallDecls.insert(Printname); } O << Printname; @@ -221,18 +221,6 @@ void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { O << PIC16CondCodeToString((PIC16CC::CondCodes)CC); } -// This function is used to sort the decls list. -// should return true if s1 should come before s2. -static bool is_before(const char *s1, const char *s2) { - return strcmp(s1, s2) <= 0; -} - -// This is used by list::unique below. -// unique will filter out duplicates if it knows them. -static bool is_duplicate(const char *s1, const char *s2) { - return !strcmp(s1, s2); -} - /// printLibcallDecls - print the extern declarations for compiler /// intrinsics. /// @@ -241,12 +229,9 @@ void PIC16AsmPrinter::printLibcallDecls() { if (LibcallDecls.empty()) return; O << MAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n"; - // Remove duplicate entries. - LibcallDecls.sort(is_before); - LibcallDecls.unique(is_duplicate); - for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); - I != LibcallDecls.end(); I++) { + for (std::set<std::string>::const_iterator I = LibcallDecls.begin(), + E = LibcallDecls.end(); I != E; I++) { O << MAI->getExternDirective() << *I << "\n"; } O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n"; diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index 519be4c..8063fcc 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -25,6 +25,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include <list> +#include <set> #include <string> namespace llvm { @@ -80,7 +81,7 @@ namespace llvm { PIC16TargetLowering *PTLI; PIC16DbgInfo DbgInfo; const PIC16MCAsmInfo *PMAI; - std::list<const char *> LibcallDecls; // List of extern decls. + std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls. std::vector<const GlobalVariable *> ExternalVarDecls; std::vector<const GlobalVariable *> ExternalVarDefs; }; diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h index 3a8bbfb..566f920 100644 --- a/lib/Target/PIC16/PIC16Section.h +++ b/lib/Target/PIC16/PIC16Section.h @@ -45,7 +45,7 @@ namespace llvm { PIC16Section(const StringRef &name, SectionKind K, const std::string &addr, int color) - : MCSection(K), Name(name), Address(addr), Color(color) { + : MCSection(K), Name(name), Address(addr), Color(color), Size(0) { } public: diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 845cd8f..532a3ec 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -111,9 +111,11 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; @@ -124,16 +126,18 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index cac6962..c4a7408 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -20,7 +20,7 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; if (TheTriple.getOS() == Triple::Darwin) diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index d88d508..9489580 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -527,11 +527,11 @@ let Uses = [O0, O1, O2, O3, O4, O5], def JMPLrr : F3_1<2, 0b111000, (outs), (ins MEMrr:$ptr), "call $ptr", - [(call ADDRrr:$ptr)]>; + [(call ADDRrr:$ptr)]>; def JMPLri : F3_2<2, 0b111000, (outs), (ins MEMri:$ptr), "call $ptr", - [(call ADDRri:$ptr)]>; + [(call ADDRri:$ptr)]>; } // Section B.28 - Read State Register Instructions diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index a75b85d..0d1af23 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -47,7 +47,7 @@ def SDT_Address : SDTypeProfile<1, 1, def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>; def SystemZcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart, [SDNPHasChain, SDNPOutFlag]>; diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 9a16808..643b397 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -460,6 +460,15 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); + case Type::UnionTyID: { + const UnionType *UnTy = cast<UnionType>(Ty); + uint64_t Size = 0; + for (UnionType::element_iterator i = UnTy->element_begin(), + e = UnTy->element_end(); i != e; ++i) { + Size = std::max(Size, getTypeSizeInBits(*i)); + } + return Size; + } case Type::IntegerTyID: return cast<IntegerType>(Ty)->getBitWidth(); case Type::VoidTyID: @@ -516,6 +525,17 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); return std::max(Align, (unsigned)Layout->getAlignment()); } + case Type::UnionTyID: { + const UnionType *UnTy = cast<UnionType>(Ty); + unsigned Align = 1; + + // Unions need the maximum alignment of all their entries + for (UnionType::element_iterator i = UnTy->element_begin(), + e = UnTy->element_end(); i != e; ++i) { + Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref)); + } + return Align; + } case Type::IntegerTyID: case Type::VoidTyID: AlignType = INTEGER_ALIGN; @@ -600,6 +620,11 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, // Update Ty to refer to current element Ty = STy->getElementType(FieldNo); + } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) { + unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue(); + + // Offset into union is canonically 0, but type changes + Ty = UnTy->getElementType(FieldNo); } else { // Update Ty to refer to current element Ty = cast<SequentialType>(Ty)->getElementType(); diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index a093e2d..44722b3 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -317,7 +317,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang, case dwarf::DW_EH_PE_pcrel: { // Emit a label to the streamer for the current position. This gives us // .-foo addressing. - MCSymbol *PCSym = getContext().GetOrCreateTemporarySymbol(); + MCSymbol *PCSym = getContext().CreateTempSymbol(); Streamer.EmitLabel(PCSym); const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); return MCBinaryExpr::CreateSub(Res, PC, getContext()); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index dde86fb..47873d1 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -29,6 +29,9 @@ struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { MCAsmParser &Parser; +protected: + unsigned Is64Bit : 1; + private: MCAsmParser &getParser() const { return Parser; } @@ -45,6 +48,8 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); + void InstructionCleanup(MCInst &Inst); + /// @name Auto-generated Match Functions /// { @@ -62,7 +67,23 @@ public: virtual bool ParseDirective(AsmToken DirectiveID); }; - + +class X86_32ATTAsmParser : public X86ATTAsmParser { +public: + X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser) + : X86ATTAsmParser(T, _Parser) { + Is64Bit = false; + } +}; + +class X86_64ATTAsmParser : public X86ATTAsmParser { +public: + X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser) + : X86ATTAsmParser(T, _Parser) { + Is64Bit = true; + } +}; + } // end anonymous namespace /// @name Auto-generated Match Functions @@ -548,8 +569,10 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.size() == 3 && static_cast<X86Operand*>(Operands[1])->isImm() && isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) && - cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) + cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) { + delete Operands[1]; Operands.erase(Operands.begin() + 1); + } return false; } @@ -586,12 +609,30 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +// FIXME: Custom X86 cleanup function to implement a temporary hack to handle +// matching INCL/DECL correctly for x86_64. This needs to be replaced by a +// proper mechanism for supporting (ambiguous) feature dependent instructions. +void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { + if (!Is64Bit) return; + + switch (Inst.getOpcode()) { + case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break; + case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break; + case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break; + case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break; + case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break; + case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; + case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; + case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; + } +} + extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); - RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); + RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); + RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index cbfc57a..7d29d97 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -427,7 +427,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // MYGLOBAL + (. - PICBASE) // However, we can't generate a ".", so just emit a new label here and refer // to it. - MCSymbol *DotSym = OutContext.GetOrCreateTemporarySymbol(); + MCSymbol *DotSym = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(DotSym); // Now that we have emitted the label, lower the complex operand expression. diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index a316860..7b7b5cb 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -459,11 +459,11 @@ static void translateInstruction(MCInst &mcInst, } } -static const MCDisassembler *createX86_32Disassembler(const Target &T) { +static MCDisassembler *createX86_32Disassembler(const Target &T) { return new X86Disassembler::X86_32Disassembler; } -static const MCDisassembler *createX86_64Disassembler(const Target &T) { +static MCDisassembler *createX86_64Disassembler(const Target &T) { return new X86Disassembler::X86_64Disassembler; } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index a0a04ba..4f02ed4 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1355,8 +1355,8 @@ int decodeInstruction(struct InternalInstruction* insn, insn->length = insn->readerCursor - insn->startLocation; - dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %llu", - startLoc, insn->readerCursor, insn->length); + dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", + startLoc, insn->readerCursor, insn->length); if (insn->length > 15) dbgprintf(insn, "Instruction exceeds 15-byte limit"); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6a4bdb5..2be51e1 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -191,6 +191,7 @@ include "X86CallingConv.td" // Currently the X86 assembly parser only supports ATT syntax. def ATTAsmParser : AsmParser { string AsmParserClassName = "ATTAsmParser"; + string AsmParserInstCleanup = "InstructionCleanup"; int Variant = 0; // Discard comments in assembly strings. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index a44afc6..754a200 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -9,39 +9,100 @@ #include "llvm/Target/TargetAsmBackend.h" #include "X86.h" +#include "X86FixupKinds.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MachObjectWriter.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; namespace { +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: assert(0 && "invalid fixup kind!"); + case X86::reloc_pcrel_1byte: + case FK_Data_1: return 0; + case FK_Data_2: return 1; + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + class X86AsmBackend : public TargetAsmBackend { public: X86AsmBackend(const Target &T) : TargetAsmBackend(T) {} + + void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF, + uint64_t Value) const { + unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); + + assert(Fixup.Offset + Size <= DF.getContents().size() && + "Invalid fixup offset!"); + for (unsigned i = 0; i != Size; ++i) + DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8)); + } +}; + +class ELFX86AsmBackend : public X86AsmBackend { +public: + ELFX86AsmBackend(const Target &T) + : X86AsmBackend(T) { + HasAbsolutizedSet = true; + HasScatteredSymbols = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return 0; + } + + bool isVirtualSection(const MCSection &Section) const { + const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section); + return SE.getType() == MCSectionELF::SHT_NOBITS;; + } }; class DarwinX86AsmBackend : public X86AsmBackend { public: DarwinX86AsmBackend(const Target &T) - : X86AsmBackend(T) {} - - virtual bool hasAbsolutizedSet() const { return true; } + : X86AsmBackend(T) { + HasAbsolutizedSet = true; + HasScatteredSymbols = true; + } - virtual bool hasScatteredSymbols() const { return true; } + bool isVirtualSection(const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + return (SMO.getType() == MCSectionMachO::S_ZEROFILL || + SMO.getType() == MCSectionMachO::S_GB_ZEROFILL); + } }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { public: DarwinX86_32AsmBackend(const Target &T) : DarwinX86AsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new MachObjectWriter(OS, /*Is64Bit=*/false); + } }; class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { public: DarwinX86_64AsmBackend(const Target &T) - : DarwinX86AsmBackend(T) {} + : DarwinX86AsmBackend(T) { + HasReliableSymbolDifference = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new MachObjectWriter(OS, /*Is64Bit=*/true); + } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { // Temporary labels in the string literals sections require symbols. The @@ -65,7 +126,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_32AsmBackend(T); default: - return new X86AsmBackend(T); + return new ELFX86AsmBackend(T); } } @@ -75,6 +136,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_64AsmBackend(T); default: - return new X86AsmBackend(T); + return new ELFX86AsmBackend(T); } } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 96b652d..5d3edbb 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1166,6 +1166,21 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::stackprotector: { + // Emit code inline code to store the stack guard onto the stack. + EVT PtrTy = TLI.getPointerTy(); + + Value *Op1 = I.getOperand(1); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + + // Grab the frame index. + X86AddressMode AM; + if (!X86SelectAddress(Slot, AM)) return false; + + if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; + + return true; + } case Intrinsic::objectsize: { ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2)); const Type *Ty = I.getCalledFunction()->getReturnType(); diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h index c8dac3c..a8117d4 100644 --- a/lib/Target/X86/X86FixupKinds.h +++ b/lib/Target/X86/X86FixupKinds.h @@ -17,7 +17,8 @@ namespace X86 { enum Fixups { reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 - reloc_riprel_4byte // 32-bit rip-relative + reloc_riprel_4byte, // 32-bit rip-relative + reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq }; } } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 4058885..1c0ed7e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -140,6 +140,21 @@ namespace { } namespace { + class X86ISelListener : public SelectionDAG::DAGUpdateListener { + SmallSet<SDNode*, 4> Deletes; + public: + explicit X86ISelListener() {} + virtual void NodeDeleted(SDNode *N, SDNode *E) { + Deletes.insert(N); + } + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } + bool IsDeleted(SDNode *N) { + return Deletes.count(N); + } + }; + //===--------------------------------------------------------------------===// /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. @@ -187,6 +202,7 @@ namespace { bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + X86ISelListener &DeadNodes, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, @@ -651,7 +667,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { - if (MatchAddressRecursively(N, AM, 0)) + X86ISelListener DeadNodes; + if (MatchAddressRecursively(N, AM, DeadNodes, 0)) return true; // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has @@ -680,6 +697,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { } bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + X86ISelListener &DeadNodes, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); @@ -845,7 +863,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1) || + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + DeadNodes.IsDeleted(N.getNode())) { AM = Backup; break; } @@ -854,6 +876,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM = Backup; break; } + int Cost = 0; SDValue RHS = N.getNode()->getOperand(1); // If the RHS involves a register with multiple uses, this @@ -907,13 +930,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::ADD: { X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && - !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) - return false; + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1)) { + if (DeadNodes.IsDeleted(N.getNode())) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return true; + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, + DeadNodes, Depth+1)) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return DeadNodes.IsDeleted(N.getNode()); + } + + // Try again after commuting the operands. AM = Backup; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && - !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) - return false; + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, + DeadNodes, Depth+1)) { + if (DeadNodes.IsDeleted(N.getNode())) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return true; + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1)) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return DeadNodes.IsDeleted(N.getNode()); + } AM = Backup; // If we couldn't fold both operands into the address at the same time, @@ -935,16 +978,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { X86ISelAddressMode Backup = AM; uint64_t Offset = CN->getSExtValue(); + + // Check to see if the LHS & C is zero. + if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) + break; + // Start with the LHS as an addr mode. - if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. (!is64Bit || X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, - AM.hasSymbolicDisplacement())) && - // Check to see if the LHS & C is zero. - CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.hasSymbolicDisplacement()))) { AM.Disp += Offset; return false; } @@ -1015,7 +1061,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, CurDAG->RepositionNode(N.getNode(), Shl.getNode()); Shl.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, Shl); + CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); return false; @@ -1066,7 +1112,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, NewSHIFT); + CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes); AM.Scale = 1 << ShiftCst; AM.IndexReg = NewAND; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7d2140b..704f9c6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2310,6 +2310,28 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; + // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack. + // Therefore if it's not used by the call it is not safe to optimize this into + // a sibcall. + bool Unused = false; + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + if (!Ins[i].Used) { + Unused = true; + break; + } + } + if (Unused) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CalleeCC, false, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_X86); + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) + return false; + } + } + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 4262c0ac..8cbb756 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -144,7 +144,7 @@ let isCall = 1 in // NOTE: this pattern doesn't match "X86call imm", because we do not know // that the offset between an arbitrary immediate and the call will fit in // the 32-bit pcrel field that we have. - def CALL64pcrel32 : Ii32<0xE8, RawFrm, + def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call{q}\t$dst", []>, Requires<[In64BitMode, NotWin64]>; @@ -511,6 +511,14 @@ def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), [(set GR64:$dst, (add GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; +// These are alternate spellings for use by the disassembler, we mark them as +// code gen only to ensure they aren't matched by the assembler. +let isCodeGenOnly = 1 in { + def ADD64rr_alt : RI<0x03, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", []>; +} + // Register-Integer Addition def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), @@ -531,12 +539,6 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; -// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but -// differently encoded. -def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", []>; - } // isTwoAddress // Memory-Register Addition @@ -1225,59 +1227,59 @@ let Defs = [EFLAGS] in { def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src), "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in -def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), +def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR64:$src1, GR64:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>; def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)), + 0))]>; def TEST64ri32 : RIi32<0xF7, MRM0r, (outs), (ins GR64:$src1, i64i32imm:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2), + 0))]>; def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), (ins i64mem:$src1, i64i32imm:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1), + i64immSExt32:$src2), 0))]>; def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src), "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, GR64:$src2), - (implicit EFLAGS)]>; -def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; + [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>; + +// These are alternate spellings for use by the disassembler, we mark them as +// code gen only to ensure they aren't matched by the assembler. +let isCodeGenOnly = 1 in { + def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; +} + def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi64 addr:$src1), GR64:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>; def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>; def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>; def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>; def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi64 addr:$src1), + i64immSExt8:$src2))]>; def CMP64mi32 : RIi32<0x81, MRM7m, (outs), (ins i64mem:$src1, i64i32imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi64 addr:$src1), + i64immSExt32:$src2))]>; } // Defs = [EFLAGS] // Bit tests. @@ -1285,8 +1287,7 @@ def CMP64mi32 : RIi32<0x81, MRM7m, (outs), let Defs = [EFLAGS] in { def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR64:$src1, GR64:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB; // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's @@ -1300,15 +1301,14 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt (loadi64 addr:$src1), + i64immSExt8:$src2))]>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; @@ -1938,7 +1938,7 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), // Comparisons. // TEST R,R is smaller than CMP R,0 -def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; // Conditional moves with folded loads with operands swapped and conditions @@ -2233,21 +2233,6 @@ def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (ADD64rm GR64:$src1, addr:$src2)>; -// Memory-Register Addition with EFLAGS result -def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Subtraction with EFLAGS result def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2266,24 +2251,6 @@ def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2), (implicit EFLAGS)), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; -// Memory-Register Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB64mr addr:$dst, GR64:$src2)>; - -// Memory-Integer Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), - i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Signed Integer Multiplication with EFLAGS result def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2313,36 +2280,18 @@ def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2), // INC and DEC with EFLAGS result. Note that these do not set CF. def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC64_16m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC64_16m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC64_32m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC64_32m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)), (INC64r GR64:$src)>; -def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC64m addr:$dst)>; def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)), (DEC64r GR64:$src)>; -def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC64m addr:$dst)>; // Register-Register Logical Or with EFLAGS result def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2), @@ -2362,20 +2311,6 @@ def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (OR64rm GR64:$src1, addr:$src2)>; -// Memory-Register Logical Or with EFLAGS result -def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Logical XOr with EFLAGS result def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2394,21 +2329,6 @@ def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (XOR64rm GR64:$src1, addr:$src2)>; -// Memory-Register Logical XOr with EFLAGS result -def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Logical And with EFLAGS result def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2427,21 +2347,6 @@ def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (AND64rm GR64:$src1, addr:$src2)>; -// Memory-Register Logical And with EFLAGS result -def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND64mi32 addr:$dst, i64immSExt32:$src2)>; - //===----------------------------------------------------------------------===// // X86-64 SSE Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index b730918..e6d1fee 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -562,15 +562,13 @@ def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>; // FPSW = cmp ST(0) with ST(i) +// CC = ST(0) cmp ST(i) def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - [(X86cmp RFP32:$lhs, RFP32:$rhs), - (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) + [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>; def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - [(X86cmp RFP64:$lhs, RFP64:$rhs), - (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) + [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>; def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - [(X86cmp RFP80:$lhs, RFP80:$rhs), - (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) + [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>; } let Defs = [EFLAGS], Uses = [ST0] in { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1225b68..c80a18d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -21,8 +21,7 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>]>; -// FIXME: Should be modelled as returning i32 -def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>; def SDTX86Cmov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, @@ -120,12 +119,12 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, - [SDNPHasChain]>; + [SDNPHasChain, SDNPVariadic]>; def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, @@ -135,7 +134,8 @@ def X86callseq_end : [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, + SDNPVariadic]>; def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>; @@ -158,7 +158,7 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, [SDNPCommutative]>; @@ -661,9 +661,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // Loop instructions -def LOOP : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>; -def LOOPE : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>; -def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>; +def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; +def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; +def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; //===----------------------------------------------------------------------===// // Call Instructions... @@ -3200,17 +3200,16 @@ let Defs = [EFLAGS] in { let isCommutable = 1 in { // TEST X, Y --> TEST Y, X def TEST8rr : I<0x84, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR8:$src1, GR8:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>; def TEST16rr : I<0x85, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR16:$src1, GR16:$src2), 0), - (implicit EFLAGS)]>, + [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2), + 0))]>, OpSize; def TEST32rr : I<0x85, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR32:$src1, GR32:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2), + 0))]>; } def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src), @@ -3222,48 +3221,46 @@ def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src), def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)), + 0))]>; def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (and GR16:$src1, + (loadi16 addr:$src2)), 0))]>, OpSize; def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR32:$src1, + (loadi32 addr:$src2)), 0))]>; def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8 (outs), (ins GR8:$src1, i8imm:$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR8:$src1, imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>; def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16 (outs), (ins GR16:$src1, i16imm:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR16:$src1, imm:$src2), 0), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>, + OpSize; def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32 (outs), (ins GR32:$src1, i32imm:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR32:$src1, imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>; def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8 (outs), (ins i8mem:$src1, i8imm:$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2), + 0))]>; def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16 (outs), (ins i16mem:$src1, i16imm:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2), + 0))]>, OpSize; def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32 (outs), (ins i32mem:$src1, i32imm:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2), + 0))]>; } // Defs = [EFLAGS] @@ -3477,45 +3474,41 @@ def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src), def CMP8rr : I<0x38, MRMDestReg, (outs), (ins GR8 :$src1, GR8 :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR8:$src1, GR8:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>; def CMP16rr : I<0x39, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, GR16:$src2), (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize; def CMP32rr : I<0x39, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, GR32:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>; def CMP8mr : I<0x38, MRMDestMem, (outs), (ins i8mem :$src1, GR8 :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi8 addr:$src1), GR8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>; def CMP16mr : I<0x39, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi16 addr:$src1), GR16:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>, + OpSize; def CMP32mr : I<0x39, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi32 addr:$src1), GR32:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>; def CMP8rm : I<0x3A, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR8:$src1, (loadi8 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>; def CMP16rm : I<0x3B, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>, + OpSize; def CMP32rm : I<0x3B, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>; // These are alternate spellings for use by the disassembler, we mark them as // code gen only to ensure they aren't matched by the assembler. @@ -3531,51 +3524,47 @@ let isCodeGenOnly = 1 in { def CMP8ri : Ii8<0x80, MRM7r, (outs), (ins GR8:$src1, i8imm:$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR8:$src1, imm:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>; def CMP16ri : Ii16<0x81, MRM7r, (outs), (ins GR16:$src1, i16imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, imm:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize; def CMP32ri : Ii32<0x81, MRM7r, (outs), (ins GR32:$src1, i32imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, imm:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>; def CMP8mi : Ii8 <0x80, MRM7m, (outs), (ins i8mem :$src1, i8imm :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi8 addr:$src1), imm:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>; def CMP16mi : Ii16<0x81, MRM7m, (outs), (ins i16mem:$src1, i16imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi16 addr:$src1), imm:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>, + OpSize; def CMP32mi : Ii32<0x81, MRM7m, (outs), (ins i32mem:$src1, i32imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi32 addr:$src1), imm:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>; def CMP16ri8 : Ii8<0x83, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>, + OpSize; def CMP16mi8 : Ii8<0x83, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (loadi16 addr:$src1), + i16immSExt8:$src2))]>, OpSize; def CMP32mi8 : Ii8<0x83, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi32 addr:$src1), + i32immSExt8:$src2))]>; def CMP32ri8 : Ii8<0x83, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>; } // Defs = [EFLAGS] // Bit tests. @@ -3583,12 +3572,10 @@ def CMP32ri8 : Ii8<0x83, MRM7r, let Defs = [EFLAGS] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR16:$src1, GR16:$src2), - (implicit EFLAGS)]>, OpSize, TB; + [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB; def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR32:$src1, GR32:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB; // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's @@ -3610,23 +3597,22 @@ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize, TB; + [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>, + OpSize, TB; def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize, TB; + [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) + ]>, OpSize, TB; def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2)) + ]>, TB; def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; @@ -4401,11 +4387,11 @@ def : Pat<(subc GR32:$src1, i32immSExt8:$src2), // Comparisons. // TEST R,R is smaller than CMP R,0 -def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR8:$src1, 0), (TEST8rr GR8:$src1, GR8:$src1)>; -def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR16:$src1, 0), (TEST16rr GR16:$src1, GR16:$src1)>; -def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR32:$src1, 0), (TEST32rr GR32:$src1, GR32:$src1)>; // Conditional moves with folded loads with operands swapped and conditions @@ -4799,42 +4785,6 @@ def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Addition with EFLAGS result -def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer Addition with EFLAGS result -def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD32mi8 addr:$dst, i32immSExt8:$src2)>; - // Register-Register Subtraction with EFLAGS result def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), @@ -4874,43 +4824,6 @@ def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB32mi8 addr:$dst, i32immSExt8:$src2)>; - - // Register-Register Signed Integer Multiply with EFLAGS result def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2), (implicit EFLAGS)), @@ -4969,36 +4882,18 @@ def : Pat<(parallel (X86smul_flag GR32:$src1, 2), // INC and DEC with EFLAGS result. Note that these do not set CF. def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)), (INC8r GR8:$src)>; -def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC8m addr:$dst)>; def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)), (DEC8r GR8:$src)>; -def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC8m addr:$dst)>; def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), (INC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC16m addr:$dst)>, Requires<[In32BitMode]>; def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC16m addr:$dst)>, Requires<[In32BitMode]>; def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), (INC32r GR32:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC32m addr:$dst)>, Requires<[In32BitMode]>; def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC32m addr:$dst)>, Requires<[In32BitMode]>; // Register-Register Or with EFLAGS result def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2), @@ -5039,42 +4934,6 @@ def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Or with EFLAGS result -def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer Or with EFLAGS result -def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR32mi8 addr:$dst, i32immSExt8:$src2)>; - // Register-Register XOr with EFLAGS result def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), @@ -5114,42 +4973,6 @@ def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register XOr with EFLAGS result -def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer XOr with EFLAGS result -def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR32mi8 addr:$dst, i32immSExt8:$src2)>; - // Register-Register And with EFLAGS result def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), @@ -5189,42 +5012,6 @@ def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register And with EFLAGS result -def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer And with EFLAGS result -def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND32mi8 addr:$dst, i32immSExt8:$src2)>; - // -disable-16bit support. def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst), (MOV16mi addr:$dst, imm:$src)>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 18f9e52..720b663 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -513,11 +513,10 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>; def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, (loadf32 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; @@ -546,21 +545,21 @@ let Constraints = "$src1 = $dst" in { let Defs = [EFLAGS] in { def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v4f32 VR128:$src1), VR128:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), + VR128:$src2))]>; def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), + (load addr:$src2)))]>; def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comiss\t{$src2, $src1|$src1, $src2}", - [(X86comi (v4f32 VR128:$src1), VR128:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v4f32 VR128:$src1), + VR128:$src2))]>; def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "comiss\t{$src2, $src1|$src1, $src2}", - [(X86comi (v4f32 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v4f32 VR128:$src1), + (load addr:$src2)))]>; } // Defs = [EFLAGS] // Aliases of packed SSE1 instructions for scalar use. These all have names @@ -1298,11 +1297,10 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>; def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, (loadf64 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>; } // Defs = [EFLAGS] // Aliases to match intrinsics which expect XMM operand(s). @@ -1324,21 +1322,21 @@ let Constraints = "$src1 = $dst" in { let Defs = [EFLAGS] in { def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), + VR128:$src2))]>; def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), + (load addr:$src2)))]>; def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comisd\t{$src2, $src1|$src1, $src2}", - [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v2f64 VR128:$src1), + VR128:$src2))]>; def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "comisd\t{$src2, $src1|$src1, $src2}", - [(X86comi (v2f64 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v2f64 VR128:$src1), + (load addr:$src2)))]>; } // Defs = [EFLAGS] // Aliases of packed SSE2 instructions for scalar use. These all have names @@ -3825,54 +3823,65 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), let Constraints = "$src1 = $dst" in { def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i8mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{b} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_8 GR32:$src1, - (load addr:$src2)))]>, OpSize; + (load addr:$src2)))]>; def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR8:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{b} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>, - OpSize; + (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>; def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i16mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{w} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_16 GR32:$src1, (load addr:$src2)))]>, OpSize; def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR16:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{w} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>, OpSize; def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{l} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32 GR32:$src1, - (load addr:$src2)))]>, OpSize; + (load addr:$src2)))]>; def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{l} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>, - OpSize; - def CRC64m64 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), + (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>; + def CRC64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i8mem:$src2), + "crc32{b} \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc64_8 GR64:$src1, + (load addr:$src2)))]>, + REX_W; + def CRC64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR8:$src2), + "crc32{b} \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>, + REX_W; + def CRC64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{q} \t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64 GR64:$src1, + (int_x86_sse42_crc64_64 GR64:$src1, (load addr:$src2)))]>, - OpSize, REX_W; - def CRC64r64 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), + REX_W; + def CRC64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{q} \t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>, - OpSize, REX_W; + (int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>, + REX_W; } // String/text processing instructions. diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 3f18696..a9681e6 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -38,14 +38,15 @@ public: ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 3; + return 4; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_4byte", 0, 4 * 8 }, - { "reloc_pcrel_1byte", 0, 1 * 8 }, - { "reloc_riprel_4byte", 0, 4 * 8 } + { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -165,7 +166,8 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If the fixup is pc-relative, we need to bias the value to be relative to // the start of the field, not the end of the field. if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || - FixupKind == MCFixupKind(X86::reloc_riprel_4byte)) + FixupKind == MCFixupKind(X86::reloc_riprel_4byte) || + FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load)) ImmOffset -= 4; if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) ImmOffset -= 1; @@ -197,6 +199,15 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + unsigned FixupKind = X86::reloc_riprel_4byte; + + // movq loads are handled with a special relocation form which allows the + // linker to eliminate some loads for GOT references which end up in the + // same linkage unit. + if (MI.getOpcode() == X86::MOV64rm || + MI.getOpcode() == X86::MOV64rm_TC) + FixupKind = X86::reloc_riprel_4byte_movq_load; + // rip-relative addressing is actually relative to the *next* instruction. // Since an immediate can follow the mod/rm byte for an instruction, this // means that we need to bias the immediate field of the instruction with @@ -204,7 +215,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // expression to emit. int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; - EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte), + EmitImmediate(Disp, 4, MCFixupKind(FixupKind), CurByte, OS, Fixups, -ImmSize); return; } @@ -269,7 +280,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Emit the normal disp32 encoding. EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); ForceDisp32 = true; - } else if (Disp.getImm() == 0 && BaseReg != X86::EBP) { + } else if (Disp.getImm() == 0 && + // Base reg can't be anything that ends up with '5' as the base + // reg, it is the magic [*] nomenclature that indicates no base. + BaseRegNo != N86::EBP) { // Emit no displacement ModR/M byte EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS); } else if (isDisp8(Disp.getImm())) { diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index f907614..cd56816 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -366,12 +366,3 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (StackAlignment) stackAlignment = StackAlignment; } - -bool X86Subtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtarget::ANTIDEP_CRITICAL; - CriticalPathRCs.clear(); - return OptLevel >= CodeGenOpt::Aggressive; -} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 50338d3..56220db 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -230,12 +230,6 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; - - /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling - /// at 'More' optimization level. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 56ddaf8..f13e6f3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -22,7 +22,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 46805d5..2e9a1e5 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -29,7 +29,8 @@ include "XCoreInstrFormats.td" // Call def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; |