diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-12-01 11:07:05 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-12-01 11:07:05 +0000 |
commit | e7908924d847e63b02bc82bfaa1709ab9c774dcd (patch) | |
tree | ffe0478472eaa0686f11cb02c6df7d257b8719b0 /lib/Target | |
parent | bf68f1ea49e39c4194f339ddd4421b0c3a31988b (diff) | |
download | FreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.zip FreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.tar.gz |
Update LLVM to r90226.
Diffstat (limited to 'lib/Target')
68 files changed, 2239 insertions, 1653 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index b50b609..c95d4c8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -402,6 +402,21 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } +/// isPredicable - Return true if the specified instruction can be predicated. +/// By default, this returns true for every instruction with a +/// PredicateOperand. +bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return false; + + if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); + return AFI->isThumb2Function(); + } + return true; +} /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, @@ -647,11 +662,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, SrcRC == ARM::DPR_VFP2RegisterClass || SrcRC == ARM::DPR_8RegisterClass) { // Always use neon reg-reg move if source or dest is NEON-only regclass. - BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon), + DestReg).addReg(SrcReg)); } else if (DestRC == ARM::QPRRegisterClass || DestRC == ARM::QPR_VFP2RegisterClass || DestRC == ARM::QPR_8RegisterClass) { - BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ), + DestReg).addReg(SrcReg)); } else { return false; } @@ -695,13 +712,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().needsStackRealignment(MF))) { - BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO) - .addReg(SrcReg, getKillRegState(isKill)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addMemOperand(MMO) + .addReg(SrcReg, getKillRegState(isKill))); } else { - BuildMI(MBB, I, DL, get(ARM::VSTRQ)). - addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRQ)). + addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } } @@ -740,11 +758,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().needsStackRealignment(MF))) { - BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addMemOperand(MMO)); } else { - BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0). - addMemOperand(MMO); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } } @@ -978,7 +997,10 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const { int Opcode = MI0->getOpcode(); - if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) { + if (Opcode == ARM::t2LDRpci || + Opcode == ARM::t2LDRpci_pic || + Opcode == ARM::tLDRpci || + Opcode == ARM::tLDRpci_pic) { if (MI1->getOpcode() != Opcode) return false; if (MI0->getNumOperands() != MI1->getNumOperands()) @@ -1005,16 +1027,6 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI); } -unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB, - unsigned DefaultLimit) const { - // If the target processor can predict indirect branches, it is highly - // desirable to duplicate them, since it can often make them predictable. - if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) && - getSubtarget().hasBranchTargetBuffer()) - return DefaultLimit + 2; - return DefaultLimit; -} - /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 73e854f..282e30c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -162,6 +162,22 @@ namespace ARMII { I_BitShift = 25, CondShift = 28 }; + + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // ARM Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_LO16 - On a symbol operand, this represents a relocation containing + /// lower 16 bit of the address. Used only via movw instruction. + MO_LO16, + + /// MO_HI16 - On a symbol operand, this represents a relocation containing + /// higher 16 bit of the address. Used only via movt instruction. + MO_HI16 + }; } class ARMBaseInstrInfo : public TargetInstrInfoImpl { @@ -220,6 +236,8 @@ public: virtual bool DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const; + virtual bool isPredicable(MachineInstr *MI) const; + /// GetInstSize - Returns the size of the specified MachineInstr. /// virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; @@ -272,9 +290,6 @@ public: virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, const MachineRegisterInfo *MRI) const; - - virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB, - unsigned DefaultLimit) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 19762ee..653328d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -799,6 +799,54 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return ARM::SP; } +int +ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + bool isFixed = MFI->isFixedObjectIndex(FI); + + FrameReg = ARM::SP; + if (AFI->isGPRCalleeSavedArea1Frame(FI)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FI)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (AFI->isDPRCalleeSavedAreaFrame(FI)) + Offset -= AFI->getDPRCalleeSavedAreaOffset(); + else if (needsStackRealignment(MF)) { + // When dynamically realigning the stack, use the frame pointer for + // parameters, and the stack pointer for locals. + assert (hasFP(MF) && "dynamic stack realignment without a FP!"); + if (isFixed) { + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + } else if (hasFP(MF) && AFI->hasStackFrame()) { + if (isFixed || MFI->hasVarSizedObjects()) { + // Use frame pointer to reference fixed objects unless this is a + // frameless function. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } else if (AFI->isThumb2Function()) { + // In Thumb2 mode, the negative offset is very limited. + int FPOffset = Offset - AFI->getFramePtrSpillOffset(); + if (FPOffset >= -255 && FPOffset < 0) { + FrameReg = getFrameRegister(MF); + Offset = FPOffset; + } + } + } + return Offset; +} + + +int +ARMBaseRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { + unsigned FrameReg; + return getFrameIndexReference(MF, FI, FrameReg); +} + unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); return 0; @@ -1115,45 +1163,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - unsigned FrameReg = ARM::SP; int FrameIndex = MI.getOperand(i).getIndex(); int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj; - bool isFixed = MFI->isFixedObjectIndex(FrameIndex); + unsigned FrameReg; - // When doing dynamic stack realignment, all of these need to change(?) - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) - Offset -= AFI->getDPRCalleeSavedAreaOffset(); - else if (needsStackRealignment(MF)) { - // When dynamically realigning the stack, use the frame pointer for - // parameters, and the stack pointer for locals. - assert (hasFP(MF) && "dynamic stack realignment without a FP!"); - if (isFixed) { - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - // When referencing from the frame pointer, stack pointer adjustments - // don't matter. - SPAdj = 0; - } - } else if (hasFP(MF) && AFI->hasStackFrame()) { - assert(SPAdj == 0 && "Unexpected stack offset!"); - if (isFixed || MFI->hasVarSizedObjects()) { - // Use frame pointer to reference fixed objects unless this is a - // frameless function. - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - } else if (AFI->isThumb2Function()) { - // In Thumb2 mode, the negative offset is very limited. - int FPOffset = Offset - AFI->getFramePtrSpillOffset(); - if (FPOffset >= -255 && FPOffset < 0) { - FrameReg = getFrameRegister(MF); - Offset = FPOffset; - } - } - } + Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); + if (FrameReg != ARM::SP) + SPAdj = 0; // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 4b267b0..2788d07 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -106,6 +106,9 @@ public: // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; + int getFrameIndexReference(MachineFunction &MF, int FI, + unsigned &FrameReg) const; + int getFrameIndexOffset(MachineFunction &MF, int FI) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 766acff..17e7d44 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -613,7 +613,6 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) { break; case TargetInstrInfo::IMPLICIT_DEF: case TargetInstrInfo::KILL: - case ARM::DWARF_LOC: // Do nothing. break; case ARM::CONSTPOOL_ENTRY: diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index d22c43a..e59a315 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -162,6 +162,9 @@ namespace { /// the branch fix up pass. bool HasFarJump; + /// HasInlineAsm - True if the function contains inline assembly. + bool HasInlineAsm; + const TargetInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; @@ -236,10 +239,19 @@ void ARMConstantIslands::verify(MachineFunction &MF) { if (!MBB->empty() && MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { unsigned MBBId = MBB->getNumber(); - assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || + assert(HasInlineAsm || + (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); } } + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { + CPUser &U = CPUsers[i]; + unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8); + unsigned CPEOffset = GetOffsetOf(U.CPEMI); + unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset : + UserOffset - CPEOffset; + assert(Disp <= U.MaxDisp || "Constant pool entry out of range!"); + } #endif } @@ -269,6 +281,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { isThumb2 = AFI->isThumb2Function(); HasFarJump = false; + HasInlineAsm = false; // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. @@ -452,6 +465,19 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { /// and finding all of the constant pool users. void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, const std::vector<MachineInstr*> &CPEMIs) { + // First thing, see if the function has any inline assembly in it. If so, + // we have to be conservative about alignment assumptions, as we don't + // know for sure the size of any instructions in the inline assembly. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) + if (I->getOpcode() == ARM::INLINEASM) + HasInlineAsm = true; + } + + // Now go back through the instructions and build up our data structures unsigned Offset = 0; for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { @@ -481,7 +507,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. AFI->setAlign(2U); - if ((Offset+MBBSize)%4 != 0) + if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) // FIXME: Add a pseudo ALIGN instruction instead. MBBSize += 2; // padding continue; // Does not get an entry in ImmBranches @@ -550,7 +576,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, case ARM::LEApcrel: // This takes a SoImm, which is 8 bit immediate rotated. We'll // pretend the maximum offset is 255 * 4. Since each instruction - // 4 byte wide, this is always correct. We'llc heck for other + // 4 byte wide, this is always correct. We'll check for other // displacements that fits in a SoImm as well. Bits = 8; Scale = 4; @@ -609,7 +635,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, if (isThumb && !MBB.empty() && MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && - (Offset%4) != 0) + ((Offset%4) != 0 || HasInlineAsm)) MBBSize += 2; BBSizes.push_back(MBBSize); @@ -633,7 +659,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { // alignment padding, and compensate if so. if (isThumb && MI->getOpcode() == ARM::CONSTPOOL_ENTRY && - Offset%4 != 0) + (Offset%4 != 0 || HasInlineAsm)) Offset += 2; // Sum instructions before MI in MBB. @@ -829,7 +855,7 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { unsigned CPEOffset = GetOffsetOf(CPEMI); - assert(CPEOffset%4 == 0 && "Misaligned CPE"); + assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE"); if (DoDump) { DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() @@ -870,7 +896,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, if (!isThumb) continue; MachineBasicBlock *MBB = MBBI; - if (!MBB->empty()) { + if (!MBB->empty() && !HasInlineAsm) { // Constant pool entries require padding. if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { unsigned OldOffset = BBOffsets[i] - delta; @@ -1226,7 +1252,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; // Compensate for .align 2 in thumb mode. - if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0) + if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm)) Size += 2; // Increase the size of the island block to account for the new entry. BBSizes[NewIsland->getNumber()] += Size; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 4d0f899..c929c54 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -75,17 +75,30 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } case ARM::t2MOVi32imm: { unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Imm = MI.getOperand(1).getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; if (!MI.getOperand(0).isDead()) { - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::t2MOVi16), DstReg) - .addImm(Lo16)); - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true)) - .addReg(DstReg).addImm(Hi16)); + const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder LO16, HI16; + + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true)).addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + // FIXME: What's about memoperands? + } + AddDefaultPred(LO16); + AddDefaultPred(HI16); } MI.eraseFromParent(); Modified = true; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 696a8e1..d63f3e6 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -149,6 +149,21 @@ private: /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc); + /// SelectCMOVOp - Select CMOV instructions for ARM. + SDNode *SelectCMOVOp(SDValue Op); + SDNode *SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + SDNode *SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + SDNode *SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + SDNode *SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -246,7 +261,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } else if (N.getOpcode() == ARMISD::Wrapper) { + } else if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -448,7 +465,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } else if (N.getOpcode() == ARMISD::Wrapper) { + } else if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); } Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), @@ -543,7 +562,13 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, } if (N.getOpcode() != ISD::ADD) { - Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N; + if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + Base = N.getOperand(0); + } else + Base = N; + Offset = CurDAG->getRegister(0, MVT::i32); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -666,7 +691,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; - } else if (N.getOpcode() == ARMISD::Wrapper) { + } else if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::TargetConstantPool) return false; // We want to select t2LDRpci instead. @@ -1034,12 +1061,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, case MVT::v4i32: OpcodeIndex = 2; break; } + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); } EVT RegVT = GetNEONSubregVT(VT); @@ -1047,10 +1077,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; std::vector<EVT> ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1071,15 +1102,16 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, - Align, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5); + Align, Pred, PredReg, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1123,6 +1155,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, case MVT::v4i32: OpcodeIndex = 2; break; } + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SmallVector<SDValue, 8> Ops; Ops.push_back(MemAddr); Ops.push_back(MemUpdate); @@ -1133,8 +1168,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, unsigned Opc = DOpcodes[OpcodeIndex]; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); } EVT RegVT = GetNEONSubregVT(VT); @@ -1148,8 +1185,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3))); } + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11); } // Otherwise, quad registers are stored with two separate instructions, @@ -1162,10 +1201,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, N->getOperand(Vec+3))); + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+5); + MVT::Other, Ops.data(), NumVecs+7); Chain = SDValue(VStA, 1); // Store the odd subregs. @@ -1173,10 +1214,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+4] = Chain; + Ops[NumVecs+4] = Pred; + Ops[NumVecs+5] = PredReg; + Ops[NumVecs+6] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+5); + MVT::Other, Ops.data(), NumVecs+7); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1224,6 +1267,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, case MVT::v4i32: OpcodeIndex = 1; break; } + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SmallVector<SDValue, 9> Ops; Ops.push_back(MemAddr); Ops.push_back(MemUpdate); @@ -1249,15 +1295,17 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, N->getOperand(Vec+3))); } Ops.push_back(getI32Imm(Lane)); + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+5); + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+7); // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1282,7 +1330,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, return NULL; unsigned Shl_imm = 0; - if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){ + if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)) { assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); unsigned Srl_imm = 0; if (isInt32Immediate(Op.getOperand(1), Srl_imm)) { @@ -1302,6 +1350,173 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, return NULL; } +SDNode *ARMDAGToDAGISel:: +SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + SDValue CPTmp0; + SDValue CPTmp1; + if (SelectT2ShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1)) { + unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue(); + unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); + unsigned Opc = 0; + switch (SOShOp) { + case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; + case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; + case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; + case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; + default: + llvm_unreachable("Unknown so_reg opcode!"); + break; + } + SDValue SOShImm = + CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6); + } + return 0; +} + +SDNode *ARMDAGToDAGISel:: +SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (SelectShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7); + } + return 0; +} + +SDNode *ARMDAGToDAGISel:: +SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (!T) + return 0; + + if (Predicate_t2_so_imm(TrueVal.getNode())) { + SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), + ARM::t2MOVCCi, MVT::i32, Ops, 5); + } + return 0; +} + +SDNode *ARMDAGToDAGISel:: +SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (!T) + return 0; + + if (Predicate_so_imm(TrueVal.getNode())) { + SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), + ARM::MOVCCi, MVT::i32, Ops, 5); + } + return 0; +} + +SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { + EVT VT = Op.getValueType(); + SDValue FalseVal = Op.getOperand(0); + SDValue TrueVal = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + SDValue CCR = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(CC.getOpcode() == ISD::Constant); + assert(CCR.getOpcode() == ISD::Register); + ARMCC::CondCodes CCVal = + (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue(); + + if (!Subtarget->isThumb1Only() && VT == MVT::i32) { + // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Pattern complexity = 18 cost = 1 size = 0 + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (Subtarget->isThumb()) { + SDNode *Res = SelectT2CMOVShiftOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectT2CMOVShiftOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } else { + SDNode *Res = SelectARMCMOVShiftOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectARMCMOVShiftOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, + // (imm:i32)<<P:Predicate_so_imm>>:$true, + // (imm:i32):$cc) + // Emits: (MOVCCi:i32 GPR:i32:$false, + // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) + // Pattern complexity = 10 cost = 1 size = 0 + if (Subtarget->isThumb()) { + SDNode *Res = SelectT2CMOVSoImmOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectT2CMOVSoImmOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } else { + SDNode *Res = SelectARMCMOVSoImmOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectARMCMOVSoImmOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + // + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 11 size = 0 + // + // Also FCPYScc and FCPYDcc. + SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; + unsigned Opc = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: assert(false && "Illegal conditional move type!"); + break; + case MVT::i32: + Opc = Subtarget->isThumb() + ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) + : ARM::MOVCCr; + break; + case MVT::f32: + Opc = ARM::VMOVScc; + break; + case MVT::f64: + Opc = ARM::VMOVDcc; + break; + } + return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); +} + SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); @@ -1337,7 +1552,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *ResNode; if (Subtarget->isThumb1Only()) { - SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, @@ -1549,122 +1764,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } - case ARMISD::CMOV: { - EVT VT = Op.getValueType(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - SDValue N3 = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); - assert(N2.getOpcode() == ISD::Constant); - assert(N3.getOpcode() == ISD::Register); - - if (!Subtarget->isThumb1Only() && VT == MVT::i32) { - // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Pattern complexity = 18 cost = 1 size = 0 - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (Subtarget->isThumb()) { - if (SelectT2ShifterOperandReg(Op, N1, CPTmp0, CPTmp1)) { - unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue(); - unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); - unsigned Opc = 0; - switch (SOShOp) { - case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; - case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; - case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; - case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; - default: - llvm_unreachable("Unknown so_reg opcode!"); - break; - } - SDValue SOShImm = - CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, CPTmp0, SOShImm, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6); - } - } else { - if (SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) { - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), - ARM::MOVCCs, MVT::i32, Ops, 7); - } - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<<P:Predicate_so_imm>>:$true, - // (imm:i32):$cc) - // Emits: (MOVCCi:i32 GPR:i32:$false, - // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) - // Pattern complexity = 10 cost = 1 size = 0 - if (N3.getOpcode() == ISD::Constant) { - if (Subtarget->isThumb()) { - if (Predicate_t2_so_imm(N3.getNode())) { - SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N1)->getZExtValue()), - MVT::i32); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), - ARM::t2MOVCCi, MVT::i32, Ops, 5); - } - } else { - if (Predicate_so_imm(N3.getNode())) { - SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N1)->getZExtValue()), - MVT::i32); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), - ARM::MOVCCi, MVT::i32, Ops, 5); - } - } - } - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 1 size = 0 - // - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 11 size = 0 - // - // Also FCPYScc and FCPYDcc. - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; - case MVT::i32: - Opc = Subtarget->isThumb() - ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) - : ARM::MOVCCr; - break; - case MVT::f32: - Opc = ARM::VMOVScc; - break; - case MVT::f64: - Opc = ARM::VMOVDcc; - break; - } - return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); - } + case ARMISD::CMOV: + return SelectCMOVOp(Op); case ARMISD::CNEG: { EVT VT = Op.getValueType(); SDValue N0 = Op.getOperand(0); @@ -1707,8 +1808,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VZIPq32; break; } - return CurDAG->getMachineNode(Opc, dl, VT, VT, - N->getOperand(0), N->getOperand(1)); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } case ARMISD::VUZP: { unsigned Opc = 0; @@ -1724,8 +1827,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VUZPq32; break; } - return CurDAG->getMachineNode(Opc, dl, VT, VT, - N->getOperand(0), N->getOperand(1)); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } case ARMISD::VTRN: { unsigned Opc = 0; @@ -1741,8 +1846,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VTRNq32; break; } - return CurDAG->getMachineNode(Opc, dl, VT, VT, - N->getOperand(0), N->getOperand(1)); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } case ISD::INTRINSIC_VOID: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c3af8e6..c839fc6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <sstream> @@ -355,10 +356,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - // Support label based line numbers. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); @@ -1360,10 +1357,17 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, PseudoSourceValue::getGOT(), 0); return Result; } else { - SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + // If we have T2 ops, we can materialize the address directly via movt/movw + // pair. This is always cheaper. + if (Subtarget->useMovt()) { + return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, PtrVT)); + } else { + SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); + } } } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 83b5cb4..e76e93c 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1217,27 +1217,45 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, // class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> + string opc, string dt, string asm, string cstr, list<dag> pattern> : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat( + !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), + !strconcat("\t", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; } -class NI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, "", pattern> { +// Same as NeonI except it does not have a "data type" specifier. +class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); + let Pattern = pattern; + list<Predicate> Predicates = [HasNEON]; } -class NI4<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : NeonI<oops, iops, AddrMode4, IndexModeNone, itin, asm, "", pattern> { +class NI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, "", + pattern> { +} + +class NI4<dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list<dag> pattern> + : NeonXI<oops, iops, AddrMode4, IndexModeNone, itin, opc, asm, "", + pattern> { } class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, asm, cstr, pattern> { + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr, + pattern> { let Inst{31-24} = 0b11110100; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1246,8 +1264,16 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, } class NDataI<dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, cstr, pattern> { + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm, + cstr, pattern> { + let Inst{31-25} = 0b1111001; +} + +class NDataXI<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, + cstr, pattern> { let Inst{31-25} = 0b1111001; } @@ -1255,8 +1281,8 @@ class NDataI<dag oops, dag iops, InstrItinClass itin, class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, bit op5, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, asm, cstr, pattern> { + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { let Inst{23} = op23; let Inst{21-19} = op21_19; let Inst{11-8} = op11_8; @@ -1270,8 +1296,8 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, asm, cstr, pattern> { + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; @@ -1281,14 +1307,16 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{4} = op4; } -// NEON Vector Duplicate (scalar). -// Inst{19-16} is specified by subclasses. -class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, asm, cstr, pattern> { +// Same as N2V except it doesn't have a datatype suffix. +class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; let Inst{11-7} = op11_7; let Inst{6} = op6; let Inst{4} = op4; @@ -1297,8 +1325,8 @@ class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4, // NEON 2 vector register with immediate. class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, asm, cstr, pattern> { + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; let Inst{11-8} = op11_8; @@ -1310,8 +1338,8 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, // NEON 3 vector register format. class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, asm, cstr, pattern> { + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1320,16 +1348,15 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{4} = op4; } -// NEON 3 vector register with immediate. This is only used for VEXT where -// op11_8 represents the starting byte index of the extracted result in the -// concatenation of the operands and is left unspecified. -class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, asm, cstr, pattern> { +// Same as N3VX except it doesn't have a data type suffix. +class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; let Inst{6} = op6; let Inst{4} = op4; } @@ -1337,29 +1364,37 @@ class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4, // NEON VMOVs between scalar and core registers. class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : AI<oops, iops, f, itin, opc, asm, pattern> { + string opc, string dt, string asm, list<dag> pattern> + : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain, + "", itin> { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; let Inst{4} = 1; + + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat( + !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), + !strconcat("\t", asm)); + let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; } class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> + string opc, string dt, string asm, list<dag> pattern> : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin, - opc, asm, pattern>; + opc, dt, asm, pattern>; class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> + string opc, string dt, string asm, list<dag> pattern> : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin, - opc, asm, pattern>; + opc, dt, asm, pattern>; class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> + string opc, string dt, string asm, list<dag> pattern> : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin, - opc, asm, pattern>; + opc, dt, asm, pattern>; // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON // for single-precision FP. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 79bde29..7516d3c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -116,6 +116,10 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">; def CarryDefIsUsed : Predicate<"N.getNode()->hasAnyUseOfValue(1)">; +// FIXME: Eventually this will be just "hasV6T2Ops". +def UseMovt : Predicate<"Subtarget->useMovt()">; +def DontUseMovt : Predicate<"!Subtarget->useMovt()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -204,7 +208,7 @@ def hi16 : SDNodeXForm<imm, [{ def lo16AllZero : PatLeaf<(i32 imm), [{ // Returns true if all low 16-bits are 0. return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; - }], hi16>; +}], hi16>; /// imm0_65535 predicate - True if the 32-bit immediate is in the range /// [0.65535]. @@ -284,6 +288,22 @@ def so_imm2part_2 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, MVT::i32); }]>; +def so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{ + return ARM_AM::isSOImmTwoPartVal(-(int)N->getZExtValue()); + }]> { + let PrintMethod = "printSOImm2PartOperand"; +} + +def so_neg_imm2part_1 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getSOImmTwoPartFirst(-(int)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def so_neg_imm2part_2 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getSOImmTwoPartSecond(-(int)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. def imm0_31 : Operand<i32>, PatLeaf<(imm), [{ return (int32_t)N->getZExtValue() < 32; @@ -568,12 +588,6 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -def DWARF_LOC : -PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary, - ".loc $file, $line, $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; - - // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), @@ -581,25 +595,24 @@ def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; let AddedComplexity = 10 in { -let canFoldAsLoad = 1 in def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr", [(set GPR:$dst, (load addrmodepc:$addr))]>; def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; } let AddedComplexity = 10 in { @@ -801,13 +814,14 @@ let isBranch = 1, isTerminator = 1 in { // // Load -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, + mayHaveSideEffects = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; @@ -992,7 +1006,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), let Constraints = "$src = $dst" in def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), DPFrm, IIC_iMOVi, - "movt", "\t$dst, $imm", + "movt", "\t$dst, $imm", [(set GPR:$dst, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, @@ -1593,12 +1607,6 @@ let Defs = // Non-Instruction Patterns // -// ConstantPool, GlobalAddress, and JumpTable -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>; -def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; -def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), - (LEApcrelJT tjumptable:$dst, imm:$id)>; - // Large immediate handling. // Two piece so_imms. @@ -1618,9 +1626,9 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS), (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)), (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), - (SUBri (SUBri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS), + (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)), + (so_neg_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction, the benefit is that it can be remat'd @@ -1628,10 +1636,19 @@ def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), // FIXME: Remove this when we can do generalized remat. let isReMaterializable = 1 in def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, - "movw", "\t$dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>, Requires<[IsARM, HasV6T2]>; +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, + Requires<[IsARM, DontUseMovt]>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, + Requires<[IsARM, UseMovt]>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (LEApcrelJT tjumptable:$dst, imm:$id)>; + // TODO: add,sub,and, 3-instr forms? diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e1353b7..3166931 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -124,7 +124,7 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), IIC_fpLoadm, - "vldm${addr:submode} ${addr:base}, $dst1", + "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; @@ -134,7 +134,7 @@ def VLDMD : NI<(outs), def VLDMS : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), IIC_fpLoadm, - "vldm${addr:submode} ${addr:base}, $dst1", + "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; @@ -146,7 +146,7 @@ def VLDMS : NI<(outs), // Use vldmia to load a Q register as a D register pair. def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, - "vldmia\t$addr, ${dst:dregpair}", + "vldmia", "$addr, ${dst:dregpair}", [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -158,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), // Use vstmia to store a Q register as a D register pair. def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, - "vstmia\t$addr, ${src:dregpair}", + "vstmia", "$addr, ${src:dregpair}", [(store (v2f64 QPR:$src), addrmode4:$addr)]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -168,178 +168,221 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), } // VLD1 : Vector Load (multiple single elements) -class VLD1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "", + OpcodeStr, Dt, "\\{$dst\\}, $addr", "", [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -class VLD1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "", + OpcodeStr, Dt, "${dst:dregpair}, $addr", "", [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>; -def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>; -def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>; -def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>; -def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>; +def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>; +def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>; +def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>; +def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>; +def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>; -def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>; -def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>; -def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>; -def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>; -def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>; +def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>; +def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>; +def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; +def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; +def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op7_4, string OpcodeStr> +class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; -class VLD2Q<bits<4> op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$dst1,$dst2\\}, $addr", "", []>; +class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0011,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD2, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; -def VLD2d8 : VLD2D<0b0000, "vld2.8">; -def VLD2d16 : VLD2D<0b0100, "vld2.16">; -def VLD2d32 : VLD2D<0b1000, "vld2.32">; +def VLD2d8 : VLD2D<0b0000, "vld2", "8">; +def VLD2d16 : VLD2D<0b0100, "vld2", "16">; +def VLD2d32 : VLD2D<0b1000, "vld2", "32">; def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2\\}, $addr", "", []>; -def VLD2q8 : VLD2Q<0b0000, "vld2.8">; -def VLD2q16 : VLD2Q<0b0100, "vld2.16">; -def VLD2q32 : VLD2Q<0b1000, "vld2.32">; +def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; +def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; +def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; // VLD3 : Vector Load (multiple 3-element structures) -class VLD3D<bits<4> op7_4, string OpcodeStr> +class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; -class VLD3WB<bits<4> op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; +class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr), IIC_VLD3, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "$addr.addr = $wb", []>; -def VLD3d8 : VLD3D<0b0000, "vld3.8">; -def VLD3d16 : VLD3D<0b0100, "vld3.16">; -def VLD3d32 : VLD3D<0b1000, "vld3.32">; +def VLD3d8 : VLD3D<0b0000, "vld3", "8">; +def VLD3d16 : VLD3D<0b0100, "vld3", "16">; +def VLD3d32 : VLD3D<0b1000, "vld3", "32">; def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; // vld3 to double-spaced even registers. -def VLD3q8a : VLD3WB<0b0000, "vld3.8">; -def VLD3q16a : VLD3WB<0b0100, "vld3.16">; -def VLD3q32a : VLD3WB<0b1000, "vld3.32">; +def VLD3q8a : VLD3WB<0b0000, "vld3", "8">; +def VLD3q16a : VLD3WB<0b0100, "vld3", "16">; +def VLD3q32a : VLD3WB<0b1000, "vld3", "32">; // vld3 to double-spaced odd registers. -def VLD3q8b : VLD3WB<0b0000, "vld3.8">; -def VLD3q16b : VLD3WB<0b0100, "vld3.16">; -def VLD3q32b : VLD3WB<0b1000, "vld3.32">; +def VLD3q8b : VLD3WB<0b0000, "vld3", "8">; +def VLD3q16b : VLD3WB<0b0100, "vld3", "16">; +def VLD3q32b : VLD3WB<0b1000, "vld3", "32">; // VLD4 : Vector Load (multiple 4-element structures) -class VLD4D<bits<4> op7_4, string OpcodeStr> +class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0000,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD4, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; -class VLD4WB<bits<4> op7_4, string OpcodeStr> +class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0001,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr), IIC_VLD4, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "$addr.addr = $wb", []>; -def VLD4d8 : VLD4D<0b0000, "vld4.8">; -def VLD4d16 : VLD4D<0b0100, "vld4.16">; -def VLD4d32 : VLD4D<0b1000, "vld4.32">; +def VLD4d8 : VLD4D<0b0000, "vld4", "8">; +def VLD4d16 : VLD4D<0b0100, "vld4", "16">; +def VLD4d32 : VLD4D<0b1000, "vld4", "32">; def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; // vld4 to double-spaced even registers. -def VLD4q8a : VLD4WB<0b0000, "vld4.8">; -def VLD4q16a : VLD4WB<0b0100, "vld4.16">; -def VLD4q32a : VLD4WB<0b1000, "vld4.32">; +def VLD4q8a : VLD4WB<0b0000, "vld4", "8">; +def VLD4q16a : VLD4WB<0b0100, "vld4", "16">; +def VLD4q32a : VLD4WB<0b1000, "vld4", "32">; // vld4 to double-spaced odd registers. -def VLD4q8b : VLD4WB<0b0000, "vld4.8">; -def VLD4q16b : VLD4WB<0b0100, "vld4.16">; -def VLD4q32b : VLD4WB<0b1000, "vld4.32">; +def VLD4q8b : VLD4WB<0b0000, "vld4", "8">; +def VLD4q16b : VLD4WB<0b0100, "vld4", "16">; +def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN<bits<4> op11_8, string OpcodeStr> - : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, - !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), - "$src1 = $dst1, $src2 = $dst2", []>; - -def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">; -def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">; +class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2, + OpcodeStr, Dt, "\\{$dst1[$lane],$dst2[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2", []>; + +// vld2 to single-spaced registers. +def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; +def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { + let Inst{5} = 0; +} +def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { + let Inst{6} = 0; +} // vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">; -def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">; +def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { + let Inst{5} = 1; +} +def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { + let Inst{6} = 1; +} // vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">; -def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">; +def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { + let Inst{5} = 1; +} +def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { + let Inst{6} = 1; +} // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN<bits<4> op11_8, string OpcodeStr> - : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, - !strconcat(OpcodeStr, - "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; - -def VLD3LNd8 : VLD3LN<0b0010, "vld3.8">; -def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">; -def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">; +class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3, + OpcodeStr, Dt, + "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + +// vld3 to single-spaced registers. +def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { + let Inst{4} = 0; +} +def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { + let Inst{5-4} = 0b00; +} +def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { + let Inst{6-4} = 0b000; +} // vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">; -def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">; +def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { + let Inst{5-4} = 0b10; +} +def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { + let Inst{6-4} = 0b100; +} // vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">; -def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">; +def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { + let Inst{5-4} = 0b10; +} +def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { + let Inst{6-4} = 0b100; +} // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN<bits<4> op11_8, string OpcodeStr> - : NLdSt<1,0b10,op11_8,0b0000, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, - !strconcat(OpcodeStr, - "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; - -def VLD4LNd8 : VLD4LN<0b0011, "vld4.8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">; -def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">; +class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b10,op11_8,{?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4, + OpcodeStr, Dt, + "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + +// vld4 to single-spaced registers. +def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; +def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { + let Inst{5} = 0; +} +def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { + let Inst{6} = 0; +} // vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">; -def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">; +def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { + let Inst{5} = 1; +} +def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { + let Inst{6} = 1; +} // vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">; -def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">; +def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { + let Inst{5} = 1; +} +def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { + let Inst{6} = 1; +} // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -349,178 +392,221 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">; } // mayLoad = 1, hasExtraDefRegAllocReq = 1 // VST1 : Vector Store (multiple single elements) -class VST1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VST1D<bits<4> op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "", + OpcodeStr, Dt, "\\{$src\\}, $addr", "", [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "", + OpcodeStr, Dt, "${src:dregpair}, $addr", "", [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>; -def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>; -def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>; -def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>; -def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>; - -def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>; -def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>; -def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>; -def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>; -def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>; +def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>; +def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>; +def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>; +def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>; +def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>; + +def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>; +def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>; +def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>; +def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; +def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; } // hasExtraSrcRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op7_4, string OpcodeStr> +class VST2D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b1000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; -class VST2Q<bits<4> op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$src1,$src2\\}, $addr", "", []>; +class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0011,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; -def VST2d8 : VST2D<0b0000, "vst2.8">; -def VST2d16 : VST2D<0b0100, "vst2.16">; -def VST2d32 : VST2D<0b1000, "vst2.32">; +def VST2d8 : VST2D<0b0000, "vst2", "8">; +def VST2d16 : VST2D<0b0100, "vst2", "16">; +def VST2d32 : VST2D<0b1000, "vst2", "32">; def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2\\}, $addr", "", []>; -def VST2q8 : VST2Q<0b0000, "vst2.8">; -def VST2q16 : VST2Q<0b0100, "vst2.16">; -def VST2q32 : VST2Q<0b1000, "vst2.32">; +def VST2q8 : VST2Q<0b0000, "vst2", "8">; +def VST2q16 : VST2Q<0b0100, "vst2", "16">; +def VST2q32 : VST2Q<0b1000, "vst2", "32">; // VST3 : Vector Store (multiple 3-element structures) -class VST3D<bits<4> op7_4, string OpcodeStr> +class VST3D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0100,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; -class VST3WB<bits<4> op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "", []>; +class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "$addr.addr = $wb", []>; -def VST3d8 : VST3D<0b0000, "vst3.8">; -def VST3d16 : VST3D<0b0100, "vst3.16">; -def VST3d32 : VST3D<0b1000, "vst3.32">; +def VST3d8 : VST3D<0b0000, "vst3", "8">; +def VST3d16 : VST3D<0b0100, "vst3", "16">; +def VST3d32 : VST3D<0b1000, "vst3", "32">; def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2,$src3\\}, $addr", "", []>; // vst3 to double-spaced even registers. -def VST3q8a : VST3WB<0b0000, "vst3.8">; -def VST3q16a : VST3WB<0b0100, "vst3.16">; -def VST3q32a : VST3WB<0b1000, "vst3.32">; +def VST3q8a : VST3WB<0b0000, "vst3", "8">; +def VST3q16a : VST3WB<0b0100, "vst3", "16">; +def VST3q32a : VST3WB<0b1000, "vst3", "32">; // vst3 to double-spaced odd registers. -def VST3q8b : VST3WB<0b0000, "vst3.8">; -def VST3q16b : VST3WB<0b0100, "vst3.16">; -def VST3q32b : VST3WB<0b1000, "vst3.32">; +def VST3q8b : VST3WB<0b0000, "vst3", "8">; +def VST3q16b : VST3WB<0b0100, "vst3", "16">; +def VST3q32b : VST3WB<0b1000, "vst3", "32">; // VST4 : Vector Store (multiple 4-element structures) -class VST4D<bits<4> op7_4, string OpcodeStr> +class VST4D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; -class VST4WB<bits<4> op7_4, string OpcodeStr> +class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "$addr.addr = $wb", []>; -def VST4d8 : VST4D<0b0000, "vst4.8">; -def VST4d16 : VST4D<0b0100, "vst4.16">; -def VST4d32 : VST4D<0b1000, "vst4.32">; +def VST4d8 : VST4D<0b0000, "vst4", "8">; +def VST4d16 : VST4D<0b0100, "vst4", "16">; +def VST4d32 : VST4D<0b1000, "vst4", "32">; def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; // vst4 to double-spaced even registers. -def VST4q8a : VST4WB<0b0000, "vst4.8">; -def VST4q16a : VST4WB<0b0100, "vst4.16">; -def VST4q32a : VST4WB<0b1000, "vst4.32">; +def VST4q8a : VST4WB<0b0000, "vst4", "8">; +def VST4q16a : VST4WB<0b0100, "vst4", "16">; +def VST4q32a : VST4WB<0b1000, "vst4", "32">; // vst4 to double-spaced odd registers. -def VST4q8b : VST4WB<0b0000, "vst4.8">; -def VST4q16b : VST4WB<0b0100, "vst4.16">; -def VST4q32b : VST4WB<0b1000, "vst4.32">; +def VST4q8b : VST4WB<0b0000, "vst4", "8">; +def VST4q16b : VST4WB<0b0100, "vst4", "16">; +def VST4q32b : VST4WB<0b1000, "vst4", "32">; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN<bits<4> op11_8, string OpcodeStr> - : NLdSt<1,0b00,op11_8,0b0000, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), - "", []>; - -def VST2LNd8 : VST2LN<0b0001, "vst2.8">; -def VST2LNd16 : VST2LN<0b0101, "vst2.16">; -def VST2LNd32 : VST2LN<0b1001, "vst2.32">; +class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VST, + OpcodeStr, Dt, "\\{$src1[$lane],$src2[$lane]\\}, $addr", + "", []>; + +// vst2 to single-spaced registers. +def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; +def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { + let Inst{5} = 0; +} +def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { + let Inst{6} = 0; +} // vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2.16">; -def VST2LNq32a: VST2LN<0b1001, "vst2.32">; +def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { + let Inst{5} = 1; +} +def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { + let Inst{6} = 1; +} // vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2.16">; -def VST2LNq32b: VST2LN<0b1001, "vst2.32">; +def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { + let Inst{5} = 1; +} +def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { + let Inst{6} = 1; +} // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN<bits<4> op11_8, string OpcodeStr> - : NLdSt<1,0b00,op11_8,0b0000, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, - !strconcat(OpcodeStr, - "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>; - -def VST3LNd8 : VST3LN<0b0010, "vst3.8">; -def VST3LNd16 : VST3LN<0b0110, "vst3.16">; -def VST3LNd32 : VST3LN<0b1010, "vst3.32">; +class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST, + OpcodeStr, Dt, + "\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr", "", []>; + +// vst3 to single-spaced registers. +def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { + let Inst{4} = 0; +} +def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { + let Inst{5-4} = 0b00; +} +def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { + let Inst{6-4} = 0b000; +} // vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3.16">; -def VST3LNq32a: VST3LN<0b1010, "vst3.32">; +def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { + let Inst{5-4} = 0b10; +} +def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { + let Inst{6-4} = 0b100; +} // vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3.16">; -def VST3LNq32b: VST3LN<0b1010, "vst3.32">; +def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { + let Inst{5-4} = 0b10; +} +def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { + let Inst{6-4} = 0b100; +} // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN<bits<4> op11_8, string OpcodeStr> - : NLdSt<1,0b00,op11_8,0b0000, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, - !strconcat(OpcodeStr, - "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"), - "", []>; - -def VST4LNd8 : VST4LN<0b0011, "vst4.8">; -def VST4LNd16 : VST4LN<0b0111, "vst4.16">; -def VST4LNd32 : VST4LN<0b1011, "vst4.32">; +class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST, + OpcodeStr, Dt, + "\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr", + "", []>; + +// vst4 to single-spaced registers. +def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; +def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { + let Inst{5} = 0; +} +def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { + let Inst{6} = 0; +} // vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4.16">; -def VST4LNq32a: VST4LN<0b1011, "vst4.32">; +def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { + let Inst{5} = 1; +} +def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { + let Inst{6} = 1; +} // vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4.16">; -def VST4LNq32b: VST4LN<0b1011, "vst4.32">; +def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { + let Inst{5} = 1; +} +def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { + let Inst{6} = 1; +} } // mayStore = 1, hasExtraSrcRegAllocReq = 1 @@ -570,25 +656,25 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{ // Basic 2-register operations, both double- and quad-register. class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; // Basic 2-register operations, scalar single-precision. class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), - IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; + IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> : NEONFPPat<(ResTy (OpNode SPR:$a)), @@ -599,27 +685,27 @@ class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; // Basic 2-register intrinsics, scalar single-precision class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, - !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; + OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDIntsPat<SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$a)), @@ -630,49 +716,62 @@ class N2VDIntsPat<SDNode OpNode, NeonI Inst> // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), - (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; // Long 2-register intrinsics (currently only used for VMOVL). class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), - (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. -class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr> +class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), (ins DPR:$src1, DPR:$src2), IIC_VPERMD, - !strconcat(OpcodeStr, "\t$dst1, $dst2"), + OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, - InstrItinClass itin, string OpcodeStr> + InstrItinClass itin, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), (ins QPR:$src1, QPR:$src2), itin, - !strconcat(OpcodeStr, "\t$dst1, $dst2"), + OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; // Basic 3-register operations, both double- and quad-register. class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { + let isCommutable = Commutable; +} +// Same as N3VD but no data type. +class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3VX<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, + OpcodeStr, "$dst, $src1, $src2", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { let isCommutable = Commutable; } class N3VDSL<bits<2> op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -680,11 +779,11 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8, let isCommutable = 0; } class N3VDSL16<bits<2> op21_20, bits<4> op11_8, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), IIC_VMULi16D, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -693,20 +792,31 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8, } class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { + let isCommutable = Commutable; +} +class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3VX<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, + OpcodeStr, "$dst, $src1, $src2", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { let isCommutable = Commutable; } class N3VQSL<bits<2> op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -714,11 +824,12 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, let isCommutable = 0; } class N3VQSL16<bits<2> op21_20, bits<4> op11_8, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), IIC_VMULi16Q, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -728,11 +839,11 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, // Basic 3-register operations, scalar single-precision class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> { + OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { let isCommutable = Commutable; } class N3VDsPat<SDNode OpNode, NeonI Inst> @@ -744,19 +855,20 @@ class N3VDsPat<SDNode OpNode, NeonI Inst> // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { let isCommutable = Commutable; } class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -764,10 +876,10 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -776,19 +888,21 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { let isCommutable = Commutable; } class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -796,10 +910,11 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -809,30 +924,32 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, // Multiply-Add/Sub operations, both double- and quad-register. class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (Ty (OpNode DPR:$src1, (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, (Ty (NEONvduplane (Ty DPR_VFP2:$src3), imm:$lane)))))))]>; class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, @@ -840,32 +957,33 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))))]>; class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (Ty (OpNode QPR:$src1, (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))))]>; class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, @@ -874,12 +992,12 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, // Multiply-Add/Sub operations, scalar single-precision class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>; + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), @@ -892,50 +1010,51 @@ class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (OpTy DPR:$src2), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))]>; class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (OpTy DPR:$src2), @@ -945,40 +1064,41 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti // Narrowing 3-register intrinsics. class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyD, ValueType TyQ, + string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { let isCommutable = Commutable; } // Long 3-register intrinsics. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, - Intrinsic IntOp, bit Commutable> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { let isCommutable = Commutable; } class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), imm:$lane)))))]>; class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_8:$src2), @@ -986,128 +1106,135 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin // Wide 3-register intrinsics. class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { let isCommutable = Commutable; } // Pairwise long 2-register intrinsics, both double- and quad-register. class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; // Pairwise long 2-register accumulate intrinsics, // both double- and quad-register. // The destination register is also used as the first source operand register. class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, - !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, - !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; // Shift by immediate, // both double- and quad-register. class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, - !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, - !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, - !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), (i32 imm:$SIMM))))]>; // Narrow shift by immediate. class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, - !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), (i32 imm:$SIMM))))]>; // Shift right by immediate and accumulate, // both double- and quad-register. class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD, - !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set DPR:$dst, (Ty (add DPR:$src1, (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD, - !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set QPR:$dst, (Ty (add QPR:$src1, (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD, - !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ, - !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; // Convert, with fractional bits immediate, // both double- and quad-register. class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, - !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, - !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; //===----------------------------------------------------------------------===// @@ -1126,41 +1253,55 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode OpNode, bit Commutable = 0> { + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { // 64-bit vector types. def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, - !strconcat(OpcodeStr, "8"), v8i8, v8i8, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, OpNode, Commutable>; def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, - !strconcat(OpcodeStr, "16"), v4i16, v4i16, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "16"), + v4i16, v4i16, OpNode, Commutable>; def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, - !strconcat(OpcodeStr, "32"), v2i32, v2i32, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "32"), + v2i32, v2i32, OpNode, Commutable>; // 128-bit vector types. def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, - !strconcat(OpcodeStr, "8"), v16i8, v16i8, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, OpNode, Commutable>; def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, - !strconcat(OpcodeStr, "16"), v8i16, v8i16, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "16"), + v8i16, v8i16, OpNode, Commutable>; def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, - !strconcat(OpcodeStr, "32"), v4i32, v4i32, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "32"), + v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; +multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), + v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), + v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), + v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), + v4i32, v2i32, ShOp>; } // ....then also with element size 64 bits: multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, SDNode OpNode, bit Commutable = 0> + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, - OpcodeStr, OpNode, Commutable> { + OpcodeStr, Dt, OpNode, Commutable> { def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, - !strconcat(OpcodeStr, "64"), v1i64, v1i64, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, OpNode, Commutable>; def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, - !strconcat(OpcodeStr, "64"), v2i64, v2i64, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, OpNode, Commutable>; } @@ -1168,27 +1309,30 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp> { def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, - itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, IntOp>; def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, - itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, IntOp>; def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, - itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, IntOp>; } // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } @@ -1198,66 +1342,85 @@ multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. - def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"), + def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, + OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp, Commutable>; - def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"), + def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, + OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp, Commutable>; // 128-bit vector types. - def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"), + def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, + OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp, Commutable>; - def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"), + def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, + OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp, Commutable>; } multiclass N3VIntSL_HS<bits<4> op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; - def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; - def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; - def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; + string OpcodeStr, string Dt, Intrinsic IntOp> { + def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; + def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; + def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; + def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, - OpcodeStr, IntOp, Commutable> { + OpcodeStr, Dt, IntOp, Commutable> { def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, - !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, IntOp, Commutable>; def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, - !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, IntOp, Commutable>; } // ....then also with element size of 64 bits: multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, - OpcodeStr, IntOp, Commutable> { + OpcodeStr, Dt, IntOp, Commutable> { def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, - !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, IntOp, Commutable>; def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, - !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, IntOp, Commutable>; } // Neon Narrowing 3-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"), + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> { + def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, + OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, IntOp, Commutable>; - def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"), + def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, + OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, IntOp, Commutable>; - def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"), + def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, + OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, IntOp, Commutable>; } @@ -1266,41 +1429,50 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, - !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, IntOp, Commutable>; def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, - !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, IntOp, Commutable>; } multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp> { def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, - !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, - !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> { + : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt, + IntOp, Commutable> { def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, - !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, IntOp, Commutable>; } // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> { + def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp, Commutable>; - def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), + def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), + def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp, Commutable>; } @@ -1310,57 +1482,57 @@ multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode OpNode> { + string OpcodeStr, string Dt, SDNode OpNode> { // 64-bit vector types. def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, - !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; + OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, - !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, - !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; // 128-bit vector types. def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, - !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; + OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, - !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, - !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; } multiclass N3VMulOpSL_HS<bits<4> op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, string Dt, SDNode ShOp> { def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, - !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, - !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, - !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>; def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, - !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>; } // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, - !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, - !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, - !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, - !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, - !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, - !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1368,27 +1540,27 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, - !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, - !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, - !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, - !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> - : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> + : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> { def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, - !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } @@ -1397,22 +1569,22 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; + itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; + itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; + itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1420,22 +1592,22 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; } @@ -1443,61 +1615,62 @@ multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; } // Neon 2-register vector shift by immediate, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "8"), v8i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "16"), v4i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "32"), v2i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin, - !strconcat(OpcodeStr, "64"), v1i64, OpNode>; + OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "8"), v16i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "16"), v8i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "32"), v4i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin, - !strconcat(OpcodeStr, "64"), v2i64, OpNode>; + OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx } @@ -1505,39 +1678,39 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, // Neon Shift-Accumulate vector operations, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, string Dt, SDNode ShOp> { // 64-bit vector types. def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v8i8, ShOp> { + OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v4i16, ShOp> { + OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v2i32, ShOp> { + OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, - !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v16i8, ShOp> { + OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v8i16, ShOp> { + OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v4i32, ShOp> { + OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, - !strconcat(OpcodeStr, "64"), v2i64, ShOp>; + OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; // imm6 = xxxxxx } @@ -1548,53 +1721,53 @@ multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v8i8, ShOp> { + OpcodeStr, "8", v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v4i16, ShOp> { + OpcodeStr, "16", v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v2i32, ShOp> { + OpcodeStr, "32", v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, - !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + OpcodeStr, "64", v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v16i8, ShOp> { + OpcodeStr, "8", v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v8i16, ShOp> { + OpcodeStr, "16", v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v4i32, ShOp> { + OpcodeStr, "32", v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, - !strconcat(OpcodeStr, "64"), v2i64, ShOp>; + OpcodeStr, "64", v2i64, ShOp>; // imm6 = xxxxxx } // Neon Shift Long operations, // element sizes of 8, 16, 32 bits: multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, SDNode OpNode> { + bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - !strconcat(OpcodeStr, "8"), v8i16, v8i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - !strconcat(OpcodeStr, "16"), v4i32, v4i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - !strconcat(OpcodeStr, "32"), v2i64, v2i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -1602,18 +1775,18 @@ multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, // Neon Shift Narrow operations, // element sizes of 16, 32, 64 bits: multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, - bit op4, InstrItinClass itin, string OpcodeStr, + bit op4, InstrItinClass itin, string OpcodeStr, string Dt, SDNode OpNode> { def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - !strconcat(OpcodeStr, "16"), v8i8, v8i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - !strconcat(OpcodeStr, "32"), v4i16, v4i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - !strconcat(OpcodeStr, "64"), v2i32, v2i64, OpNode> { + OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -1625,49 +1798,58 @@ multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, // Vector Add Operations. // VADD : Vector Add (integer and floating-point) -defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>; -def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; -def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; +defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", + add, 1>; +def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", + v2f32, v2f32, fadd, 1>; +def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", + v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", + int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", + int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; -defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; +defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; +defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; + IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>; defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; + IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; + IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>; defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; + IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; + IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>; defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; + IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", + int_arm_neon_vaddhn, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) -defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; +defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", + int_arm_neon_vraddhn, 1>; // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) -defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, - IIC_VMULi32Q, "vmul.i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, - int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, - int_arm_neon_vmulp, 1>; -def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; -def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; +defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", + v8i8, v8i8, int_arm_neon_vmulp, 1>; +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", + v16i8, v16i8, int_arm_neon_vmulp, 1>; +def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", + v2f32, v2f32, fmul, 1>; +def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", + v4f32, v4f32, fmul, 1>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), @@ -1690,66 +1872,80 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh.s", int_arm_neon_vqdmulh, 1>; + "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh.s", int_arm_neon_vqdmulh>; + "vqdmulh", "s", int_arm_neon_vqdmulh>; def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), - (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), + imm:$lane)))), (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), - (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), + imm:$lane)))), (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; + "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqrdmulh.s", int_arm_neon_vqrdmulh>; + "vqrdmulh", "s", int_arm_neon_vqrdmulh>; def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), - (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), + imm:$lane)))), (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), - (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), + imm:$lane)))), (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, - int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", + int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", + int_arm_neon_vmullu, 1>; +def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; +defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", + int_arm_neon_vmulls>; +defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", + int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", + int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", + int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. // VMLA : Vector Multiply Accumulate (integer and floating-point) defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; -def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; -def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", + v2f32, fmul, fadd>; +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", + v4f32, fmul, fadd>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; -def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; -def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", + v2f32, fmul, fadd>; +def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", + v4f32, v2f32, fmul, fadd>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1766,7 +1962,7 @@ def : Pat<(v4i32 (add (v4i32 QPR:$src1), (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), @@ -1779,25 +1975,30 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; -defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>; -defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>; +defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", + int_arm_neon_vqdmlal>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; -def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; -def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", + v2f32, fmul, fsub>; +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", + v4f32, fmul, fsub>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; -def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; -def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", + v2f32, fmul, fsub>; +def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", + v4f32, v2f32, fmul, fsub>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1810,7 +2011,7 @@ def : Pat<(v8i16 (sub (v8i16 QPR:$src1), def : Pat<(v4i32 (sub (v4i32 QPR:$src1), (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, @@ -1819,7 +2020,7 @@ def : Pat<(v4i32 (sub (v4i32 QPR:$src1), def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (fmul (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, @@ -1827,146 +2028,170 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; -defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>; -defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>; +defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>; +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", + int_arm_neon_vqdmlsl>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) -defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>; -def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; -def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; +defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, + "vsub", "i", sub, 0>; +def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", + v2f32, v2f32, fsub, 0>; +def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", + v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", + int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", + int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; -defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; +defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; +defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vhsub", "s", int_arm_neon_vhsubs, 0>; +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vqsub", "s", int_arm_neon_vqsubs, 0>; +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", + int_arm_neon_vsubhn, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) -defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; +defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", + int_arm_neon_vrsubhn, 0>; // Vector Comparisons. // VCEQ : Vector Compare Equal defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vceq.i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; + IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; +def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, + NEONvceq, 1>; +def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, + NEONvceq, 1>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge.s", NEONvcge, 0>; + IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; + IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", + v2i32, v2f32, NEONvcge, 0>; +def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, + NEONvcge, 0>; // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>; + IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; + IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; +def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, + NEONvcgt, 0>; +def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, + NEONvcgt, 0>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, - int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, - int_arm_neon_vacgeq, 0>; +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", + v2i32, v2f32, int_arm_neon_vacged, 0>; +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32", + v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, - int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, - int_arm_neon_vacgtq, 0>; +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32", + v2i32, v2f32, int_arm_neon_vacgtd, 0>; +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32", + v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vtst.i", NEONvtst, 1>; + IIC_VBINi4Q, "vtst", "i", NEONvtst, 1>; // Vector Bitwise Operations. // VAND : Vector Bitwise AND -def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; -def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>; +def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", + v2i32, v2i32, and, 1>; +def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", + v4i32, v4i32, and, 1>; // VEOR : Vector Bitwise Exclusive OR -def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>; -def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>; +def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", + v2i32, v2i32, xor, 1>; +def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", + v4i32, v4i32, xor, 1>; // VORR : Vector Bitwise OR -def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>; -def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; +def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", + v2i32, v2i32, or, 1>; +def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", + v4i32, v4i32, or, 1>; // VBIC : Vector Bitwise Bit Clear (AND NOT) -def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), +def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vbic\t$dst, $src1, $src2", "", + "vbic", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (and DPR:$src1, (vnot_conv DPR:$src2))))]>; -def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), +def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vbic\t$dst, $src1, $src2", "", + "vbic", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (and QPR:$src1, (vnot_conv QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT -def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), +def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vorn\t$dst, $src1, $src2", "", + "vorn", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot_conv DPR:$src2))))]>; -def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), +def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vorn\t$dst, $src1, $src2", "", + "vorn", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot_conv QPR:$src2))))]>; // VMVN : Vector Bitwise NOT -def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, +def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, - "vmvn\t$dst, $src", "", + "vmvn", "$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; -def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, +def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, - "vmvn\t$dst, $src", "", + "vmvn", "$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select -def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), +def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, - "vbsl\t$dst, $src2, $src3", "$src1 = $dst", + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; -def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), +def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, - "vbsl\t$dst, $src2, $src3", "$src1 = $dst", + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False -// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", +// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", // VBIT : Vector Bitwise Insert if True -// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", +// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", // These are not yet implemented. The TwoAddress pass will not go looking // for equivalent operations with different register constraints; it just // inserts copies. @@ -1974,259 +2199,270 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), // Vector Absolute Differences. // VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, - int_arm_neon_vabds, 0>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, - int_arm_neon_vabds, 0>; +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vabd", "s", int_arm_neon_vabds, 0>; +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vabd", "u", int_arm_neon_vabdu, 0>; +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, + "vabdl", "s", int_arm_neon_vabdls, 0>; +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, + "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba.s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba.u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; + IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, - int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, - int_arm_neon_vmaxs, 1>; + IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", + v2f32, v2f32, int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", + v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; + IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, - int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, - int_arm_neon_vmins, 1>; + IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", + v2f32, v2f32, int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32", + v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, - int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, - int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, - int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, - int_arm_neon_vpadd, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8", + v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16", + v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32", + v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32", + v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long -defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", +defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", int_arm_neon_vpaddls>; -defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", +defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", int_arm_neon_vpaddlu>; // VPADAL : Vector Pairwise Add and Accumulate Long -defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s", +defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", int_arm_neon_vpadals>; -defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u", +defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, - int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, - int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, - int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, - int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, - int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, - int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, - int_arm_neon_vpmaxs, 0>; +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8", + v8i8, v8i8, int_arm_neon_vpmaxs, 0>; +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16", + v4i16, v4i16, int_arm_neon_vpmaxs, 0>; +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32", + v2i32, v2i32, int_arm_neon_vpmaxs, 0>; +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8", + v8i8, v8i8, int_arm_neon_vpmaxu, 0>; +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16", + v4i16, v4i16, int_arm_neon_vpmaxu, 0>; +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32", + v2i32, v2i32, int_arm_neon_vpmaxu, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32", + v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, - int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, - int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, - int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, - int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, - int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, - int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, - int_arm_neon_vpmins, 0>; +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8", + v8i8, v8i8, int_arm_neon_vpmins, 0>; +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16", + v4i16, v4i16, int_arm_neon_vpmins, 0>; +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32", + v2i32, v2i32, int_arm_neon_vpmins, 0>; +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8", + v8i8, v8i8, int_arm_neon_vpminu, 0>; +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16", + v4i16, v4i16, int_arm_neon_vpminu, 0>; +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32", + v2i32, v2i32, int_arm_neon_vpminu, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32", + v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. // VRECPE : Vector Reciprocal Estimate def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAD, "vrecpe.u32", + IIC_VUNAD, "vrecpe", "u32", v2i32, v2i32, int_arm_neon_vrecpe>; def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAQ, "vrecpe.u32", + IIC_VUNAQ, "vrecpe", "u32", v4i32, v4i32, int_arm_neon_vrecpe>; def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAD, "vrecpe.f32", + IIC_VUNAD, "vrecpe", "f32", v2f32, v2f32, int_arm_neon_vrecpe>; def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAQ, "vrecpe.f32", + IIC_VUNAQ, "vrecpe", "f32", v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, - int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, - int_arm_neon_vrecps, 1>; +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, + IIC_VRECSD, "vrecps", "f32", + v2f32, v2f32, int_arm_neon_vrecps, 1>; +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, + IIC_VRECSQ, "vrecps", "f32", + v4f32, v4f32, int_arm_neon_vrecps, 1>; // VRSQRTE : Vector Reciprocal Square Root Estimate def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAD, "vrsqrte.u32", + IIC_VUNAD, "vrsqrte", "u32", v2i32, v2i32, int_arm_neon_vrsqrte>; def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAQ, "vrsqrte.u32", + IIC_VUNAQ, "vrsqrte", "u32", v4i32, v4i32, int_arm_neon_vrsqrte>; def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAD, "vrsqrte.f32", + IIC_VUNAD, "vrsqrte", "f32", v2f32, v2f32, int_arm_neon_vrsqrte>; def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAQ, "vrsqrte.f32", + IIC_VUNAQ, "vrsqrte", "f32", v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, - int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, - int_arm_neon_vrsqrts, 1>; +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, + IIC_VRECSD, "vrsqrts", "f32", + v2f32, v2f32, int_arm_neon_vrsqrts, 1>; +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, + IIC_VRECSQ, "vrsqrts", "f32", + v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; + IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>; defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; + IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>; // VSHLL : Vector Shift Left Long -defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>; -defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>; +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, + bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, ResTy, OpTy, OpNode> { + : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, + ResTy, OpTy, OpNode> { let Inst{21-16} = op21_16; } -def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", +def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", v8i16, v8i8, NEONvshlli>; -def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", +def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", v4i32, v4i16, NEONvshlli>; -def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", +def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; + IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>; defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; + IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i", +defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", NEONvrshrn>; // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; + IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>; defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; + IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s", +defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", NEONvqshrns>; -defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u", +defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s", +defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; + IIC_VSHLi4Q, "vqrshl", "s", + int_arm_neon_vqrshifts, 0>; defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; + IIC_VSHLi4Q, "vqrshl", "u", + int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s", +defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", NEONvqrshrns>; -defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u", +defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s", +defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate -defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; -defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; +defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; +defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; // VRSRA : Vector Rounding Shift Right and Accumulate -defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; -defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; +defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; +defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; +defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>; // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; +defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>; // Vector Absolute and Saturating Absolute. // VABS : Vector Absolute Value defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, - IIC_VUNAiD, IIC_VUNAiQ, "vabs.s", + IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs.f32", + IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs.f32", + IIC_VUNAQ, "vabs", "f32", v4f32, v4f32, int_arm_neon_vabs>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s", + IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", int_arm_neon_vqabs>; // Vector Negate. @@ -2234,31 +2470,31 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; -class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> +class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), - IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; -class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> +class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; // VNEG : Vector Negate -def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; -def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; -def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; -def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; -def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; -def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; +def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; +def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; +def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; +def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; +def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; +def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, - "vneg.f32\t$dst, $src", "", + "vneg", "f32", "$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, - "vneg.f32\t$dst, $src", "", + "vneg", "f32", "$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; @@ -2270,35 +2506,35 @@ def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s", + IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", int_arm_neon_vqneg>; // Vector Bit Counting Operations. // VCLS : Vector Count Leading Sign Bits defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vcls.s", + IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", int_arm_neon_vcls>; // VCLZ : Vector Count Leading Zeros defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vclz.i", + IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", int_arm_neon_vclz>; // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiD, "vcnt.8", + IIC_VCNTiD, "vcnt", "8", v8i8, v8i8, int_arm_neon_vcnt>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiQ, "vcnt.8", + IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; // Vector Move Operations. // VMOV : Vector Move (Register) -def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - IIC_VMOVD, "vmov\t$dst, $src", "", []>; -def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - IIC_VMOVD, "vmov\t$dst, $src", "", []>; +def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), + IIC_VMOVD, "vmov", "$dst, $src", "", []>; +def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), + IIC_VMOVD, "vmov", "$dst, $src", "", []>; // VMOV : Vector Move (Immediate) @@ -2339,65 +2575,65 @@ def vmovImm64 : PatLeaf<(build_vector), [{ def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, - "vmov.i8\t$dst, $SIMM", "", + "vmov", "i8", "$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, - "vmov.i8\t$dst, $SIMM", "", + "vmov", "i8", "$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; -def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), (ins h16imm:$SIMM), IIC_VMOVImm, - "vmov.i16\t$dst, $SIMM", "", + "vmov", "i16", "$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; -def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), (ins h16imm:$SIMM), IIC_VMOVImm, - "vmov.i16\t$dst, $SIMM", "", + "vmov", "i16", "$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), (ins h32imm:$SIMM), IIC_VMOVImm, - "vmov.i32\t$dst, $SIMM", "", + "vmov", "i32", "$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), +def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), (ins h32imm:$SIMM), IIC_VMOVImm, - "vmov.i32\t$dst, $SIMM", "", + "vmov", "i32", "$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, - "vmov.i64\t$dst, $SIMM", "", + "vmov", "i64", "$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, - "vmov.i64\t$dst, $SIMM", "", + "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; // VMOV : Vector Get Lane (move scalar to ARM core register) -def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, +def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), imm:$lane))]>; -def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, +def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), imm:$lane))]>; -def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, +def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), imm:$lane))]>; -def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, +def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), imm:$lane))]>; -def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, +def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", [(set GPR:$dst, (extractelt (v2i32 DPR:$src), imm:$lane))]>; // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -2436,19 +2672,19 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), // VMOV : Vector Set Lane (move ARM core register to scalar) let Constraints = "$src1 = $dst" in { -def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), +def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), GPR:$src2, imm:$lane))]>; -def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), +def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), GPR:$src2, imm:$lane))]>; -def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), +def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), GPR:$src2, imm:$lane))]>; } @@ -2512,55 +2748,57 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)), // VDUP : Vector Duplicate (from ARM core register to all elements) -class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> +class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), + IIC_VMOVIS, "vdup", Dt, "$dst, $src", [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; -class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> +class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), + IIC_VMOVIS, "vdup", Dt, "$dst, $src", [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; -def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; -def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; -def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; -def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; -def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; +def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; +def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; +def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; +def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", "32", "$dst, $src", [(set DPR:$dst, (v2f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", "32", "$dst, $src", [(set QPR:$dst, (v4f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND<string OpcodeStr, ValueType Ty> - : N2VDup<0b11, 0b11, 0b11000, 0, 0, +class VDUPLND<bits<2> op19_18, bits<2> op17_16, + string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", + OpcodeStr, Dt, "$dst, $src[$lane]", "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -class VDUPLNQ<string OpcodeStr, ValueType ResTy, ValueType OpTy> - : N2VDup<0b11, 0b11, 0b11000, 1, 0, +class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy> + : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", + OpcodeStr, Dt, "$dst, $src[$lane]", "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<"vdup.8", v8i8> { let Inst{16} = 1; } -def VDUPLN16d : VDUPLND<"vdup.16", v4i16> { let Inst{17-16} = 0b10; } -def VDUPLN32d : VDUPLND<"vdup.32", v2i32> { let Inst{18-16} = 0b100; } -def VDUPLNfd : VDUPLND<"vdup.32", v2f32> { let Inst{18-16} = 0b100; } -def VDUPLN8q : VDUPLNQ<"vdup.8", v16i8, v8i8> { let Inst{16} = 1; } -def VDUPLN16q : VDUPLNQ<"vdup.16", v8i16, v4i16> { let Inst{17-16} = 0b10; } -def VDUPLN32q : VDUPLNQ<"vdup.32", v4i32, v2i32> { let Inst{18-16} = 0b100; } -def VDUPLNfq : VDUPLNQ<"vdup.32", v4f32, v2f32> { let Inst{18-16} = 0b100; } +def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>; +def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>; +def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>; +def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>; +def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>; +def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>; +def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>; +def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -2579,19 +2817,15 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def VDUPfdf : N2VDup<0b11, 0b11, 0b11000, 0, 0, - (outs DPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]> { - let Inst{18-16} = 0b100; -} +def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, + (outs DPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", + [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; -def VDUPfqf : N2VDup<0b11, 0b11, 0b11000, 1, 0, - (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]> { - let Inst{18-16} = 0b100; -} +def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, + (outs QPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", + [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, @@ -2603,176 +2837,178 @@ def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), (DSubReg_f64_other_reg imm:$lane))>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i", - int_arm_neon_vmovn>; +defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, + "vmovn", "i", int_arm_neon_vmovn>; // VQMOVN : Vector Saturating Narrowing Move -defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s", - int_arm_neon_vqmovns>; -defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", - int_arm_neon_vqmovnu>; -defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", - int_arm_neon_vqmovnsu>; +defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, + "vqmovn", "s", int_arm_neon_vqmovns>; +defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, + "vqmovn", "u", int_arm_neon_vqmovnu>; +defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, + "vqmovun", "s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>; +defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", + int_arm_neon_vmovls>; +defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", + int_arm_neon_vmovlu>; // Vector Conversions. // VCVT : Vector Convert Between Floating-Point and Integers -def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v2i32, v2f32, fp_to_sint>; -def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v2i32, v2f32, fp_to_uint>; -def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v2f32, v2i32, sint_to_fp>; -def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v2f32, v2i32, uint_to_fp>; -def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v4i32, v4f32, fp_to_sint>; -def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v4i32, v4f32, fp_to_uint>; -def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v4f32, v4i32, sint_to_fp>; -def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. -def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; -def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; // Vector Reverse. // VREV64 : Vector Reverse elements within 64-bit doublewords -class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty> +class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; -class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> +class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; -def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; -def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; -def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; -def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; +def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; +def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; +def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; +def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; -def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; -def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; -def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; -def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; +def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; +def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; +def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; +def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; // VREV32 : Vector Reverse elements within 32-bit words -class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty> +class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; -class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> +class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; -def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; -def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; +def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; +def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; -def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; -def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; +def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; +def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; // VREV16 : Vector Reverse elements within 16-bit halfwords -class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty> +class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; -class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> +class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; -def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; -def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; +def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; +def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; // Other Vector Shuffles. // VEXT : Vector Extract -class VEXTd<string OpcodeStr, ValueType Ty> - : N3VImm<0,1,0b11,0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), - (Ty DPR:$rhs), imm:$index)))]>; - -class VEXTq<string OpcodeStr, ValueType Ty> - : N3VImm<0,1,0b11,1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), - (Ty QPR:$rhs), imm:$index)))]>; - -def VEXTd8 : VEXTd<"vext.8", v8i8>; -def VEXTd16 : VEXTd<"vext.16", v4i16>; -def VEXTd32 : VEXTd<"vext.32", v2i32>; -def VEXTdf : VEXTd<"vext.32", v2f32>; - -def VEXTq8 : VEXTq<"vext.8", v16i8>; -def VEXTq16 : VEXTq<"vext.16", v8i16>; -def VEXTq32 : VEXTq<"vext.32", v4i32>; -def VEXTqf : VEXTq<"vext.32", v4f32>; +class VEXTd<string OpcodeStr, string Dt, ValueType Ty> + : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, + OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), + (Ty DPR:$rhs), imm:$index)))]>; + +class VEXTq<string OpcodeStr, string Dt, ValueType Ty> + : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, + OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), + (Ty QPR:$rhs), imm:$index)))]>; + +def VEXTd8 : VEXTd<"vext", "8", v8i8>; +def VEXTd16 : VEXTd<"vext", "16", v4i16>; +def VEXTd32 : VEXTd<"vext", "32", v2i32>; +def VEXTdf : VEXTd<"vext", "32", v2f32>; + +def VEXTq8 : VEXTq<"vext", "8", v16i8>; +def VEXTq16 : VEXTq<"vext", "16", v8i16>; +def VEXTq32 : VEXTq<"vext", "32", v4i32>; +def VEXTqf : VEXTq<"vext", "32", v4f32>; // VTRN : Vector Transpose -def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; -def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; -def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; +def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; +def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; +def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; -def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">; -def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">; -def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">; +def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; +def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; +def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; // VUZP : Vector Unzip (Deinterleave) -def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; -def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; +def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; +def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; +def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; -def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">; -def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">; -def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">; +def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; +def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; +def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; // VZIP : Vector Zip (Interleave) -def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; -def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; +def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; +def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; +def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; -def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">; -def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">; -def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; +def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; +def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; +def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // Vector Table Lookup and Table Extension. @@ -2780,25 +3016,25 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$src), IIC_VTB1, - "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, - "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, - "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, - "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; } // hasExtraSrcRegAllocReq = 1 @@ -2807,26 +3043,26 @@ def VTBL4 def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, - "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, - "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, - "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, - "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; } // hasExtraSrcRegAllocReq = 1 @@ -2840,17 +3076,17 @@ def VTBX4 // Vector Add Operations used for single-precision FP let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; +def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>; def : N3VDsPat<fadd, VADDfd_sfp>; // Vector Sub Operations used for single-precision FP let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; +def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>; def : N3VDsPat<fsub, VSUBfd_sfp>; // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; +def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>; def : N3VDsPat<fmul, VMULfd_sfp>; // Vector Multiply-Accumulate/Subtract used for single-precision FP @@ -2858,17 +3094,17 @@ def : N3VDsPat<fmul, VMULfd_sfp>; // we want to avoid them for now. e.g., alternating vmla/vadd instructions. //let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; +//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>; //def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; //let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; +//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>; //def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs.f32", + IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; def : N2VDIntsPat<fabs, VABSfd_sfp>; @@ -2876,27 +3112,27 @@ def : N2VDIntsPat<fabs, VABSfd_sfp>; let neverHasSideEffects = 1 in def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg.f32\t$dst, $src", "", []>; + "vneg", "f32", "$dst, $src", "", []>; def : N2VDIntsPat<fneg, VNEGf32d_sfp>; // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v2i32, v2f32, fp_to_sint>; def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v2i32, v2f32, fp_to_uint>; def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v2f32, v2i32, sint_to_fp>; def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v2f32, v2i32, uint_to_fp>; def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index d1831d1..b5956a3 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -296,7 +296,7 @@ let isBranch = 1, isTerminator = 1 in { // Load Store Instructions. // -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; @@ -332,13 +332,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", ".n\t$dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, + mayHaveSideEffects = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 1bb9bfd..9489815 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -49,8 +49,8 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. t2_so_imm values are // represented in the imm field in the same 12-bit form that they are encoded -// into t2_so_imm instructions: the 8-bit immediate is the least significant bits -// [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. +// into t2_so_imm instructions: the 8-bit immediate is the least significant +// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; @@ -88,6 +88,21 @@ def t2_so_imm2part_2 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, MVT::i32); }]>; +def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{ + return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue()); + }]> { +} + +def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. def imm1_31 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; @@ -252,9 +267,9 @@ multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> { [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } -/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a -/// binary operation that produces a value and use and define the carry bit. -/// It's not predicable. +/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns +/// for a binary operation that produces a value and use and define the carry +/// bit. It's not predicable. let Uses = [CPSR] in { multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm @@ -471,7 +486,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), // // Load -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in defm t2LDR : T2I_ld<"ldr", UnOpFrag<(load node:$Src)>>; // Loads with zero extension @@ -615,7 +630,7 @@ def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, - (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), @@ -718,9 +733,9 @@ def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (t2UXTB16r_rot GPR:$Src, 8)>; defm t2UXTAB : T2I_bin_rrot<"uxtab", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; + BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; defm t2UXTAH : T2I_bin_rrot<"uxtah", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; + BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; } //===----------------------------------------------------------------------===// @@ -1162,15 +1177,9 @@ def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(sub GPR:$LHS, t2_so_imm2part:$RHS), - (t2SUBri (t2SUBri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; - -// ConstantPool, GlobalAddress, and JumpTable -def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>; -def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; -def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), - (t2LEApcrelJT tjumptable:$dst, imm:$id)>; +def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS), + (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), + (t2_so_neg_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction to make it re-materializable. Remove @@ -1180,10 +1189,20 @@ def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>; +// ConstantPool, GlobalAddress, and JumpTable +def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, + Requires<[IsThumb2, DontUseMovt]>; +def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; +def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, + Requires<[IsThumb2, UseMovt]>; + +def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (t2LEApcrelJT tjumptable:$dst, imm:$id)>; + // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and // scheduling. -let isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index ba341f4..5bfe89d 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>, // Load / store Instructions. // -let canFoldAsLoad = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; @@ -437,7 +437,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconstd", "\t$dst, $imm", + "vmov", ".f64\t$dst, $imm", [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; @@ -448,7 +448,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconsts", "\t$dst, $imm", + "vmov", ".f32\t$dst, $imm", [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 24990e6..aa50cfd 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -139,7 +139,8 @@ ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, JITCodeEmitter &JCE) { - JCE.startGVStub(GV, 4, 4); + MachineCodeEmitter::BufferState BS; + JCE.startGVStub(BS, GV, 4, 4); intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); if (!sys::Memory::setRangeWritable((void*)Addr, 4)) { llvm_unreachable("ERROR: Unable to mark indirect symbol writable"); @@ -148,19 +149,27 @@ void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, if (!sys::Memory::setRangeExecutable((void*)Addr, 4)) { llvm_unreachable("ERROR: Unable to mark indirect symbol executable"); } - void *PtrAddr = JCE.finishGVStub(GV); + void *PtrAddr = JCE.finishGVStub(BS); addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); return PtrAddr; } +TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() { + // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a + // 4-byte address. See emitFunctionStub for details. + StubLayout Result = {16, 4}; + return Result; +} + void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { + void *Addr; // If this is just a call to an external function, emit a branch instead of a // call. The code is the same except for one bit of the last instruction. if (Fn != (void*)(intptr_t)ARMCompilationCallback) { // Branch to the corresponding function addr. if (IsPIC) { - // The stub is 8-byte size and 4-aligned. + // The stub is 16-byte size and 4-aligned. intptr_t LazyPtr = getIndirectSymAddr(Fn); if (!LazyPtr) { // In PIC mode, the function stub is loading a lazy-ptr. @@ -172,30 +181,30 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, errs() << "JIT: Stub emitted at [" << LazyPtr << "] for external function at '" << Fn << "'\n"); } - JCE.startGVStub(F, 16, 4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable((void*)Addr, 16)) { + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub writable"); } - JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4] + JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4] JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] - JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8) - sys::Memory::InvalidateInstructionCache((void*)Addr, 16); - if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) { + JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8) + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub executable"); } } else { // The stub is 8-byte size and 4-aligned. - JCE.startGVStub(F, 8, 4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable((void*)Addr, 8)) { + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 8)) { llvm_unreachable("ERROR: Unable to mark stub writable"); } JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] JCE.emitWordLE((intptr_t)Fn); // addr of function - sys::Memory::InvalidateInstructionCache((void*)Addr, 8); - if (!sys::Memory::setRangeExecutable((void*)Addr, 8)) { + sys::Memory::InvalidateInstructionCache(Addr, 8); + if (!sys::Memory::setRangeExecutable(Addr, 8)) { llvm_unreachable("ERROR: Unable to mark stub executable"); } } @@ -207,9 +216,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, // // Branch and link to the compilation callback. // The stub is 16-byte size and 4-byte aligned. - JCE.startGVStub(F, 16, 4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable((void*)Addr, 16)) { + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub writable"); } // Save LR so the callback can determine which stub called it. @@ -222,13 +231,13 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] // The address of the compilation callback. JCE.emitWordLE((intptr_t)ARMCompilationCallback); - sys::Memory::InvalidateInstructionCache((void*)Addr, 16); - if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) { + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub executable"); } } - return JCE.finishGVStub(F); + return Addr; } intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index 7dfeed8..ff332b7 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -61,6 +61,10 @@ namespace llvm { virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE); + // getStubLayout - Returns the size and alignment of the largest call stub + // on ARM. + virtual StubLayout getStubLayout(); + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 427645c..bbbf413 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -180,7 +180,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP Unary InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>]>, + InstrStage<4, [FU_NLSPipe]>], [4, 1]>, // // Single-precision FP Compare InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -189,17 +189,17 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP Compare InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>]>, + InstrStage<4, [FU_NLSPipe]>], [4, 1]>, // // Single to Double FP Convert InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<7, [FU_NPipe], 0>, - InstrStage<7, [FU_NLSPipe]>]>, + InstrStage<7, [FU_NLSPipe]>], [7, 1]>, // // Double to Single FP Convert InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<5, [FU_NPipe], 0>, - InstrStage<5, [FU_NLSPipe]>]>, + InstrStage<5, [FU_NLSPipe]>], [5, 1]>, // // Single-Precision FP to Integer Convert InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -208,7 +208,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-Precision FP to Integer Convert InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>]>, + InstrStage<8, [FU_NLSPipe]>], [8, 1]>, // // Integer to Single-Precision FP Convert InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -217,7 +217,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Integer to Double-Precision FP Convert InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>]>, + InstrStage<8, [FU_NLSPipe]>], [8, 1]>, // // Single-precision FP ALU InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -226,7 +226,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP ALU InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<9, [FU_NPipe], 0>, - InstrStage<9, [FU_NLSPipe]>]>, + InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>, // // Single-precision FP Multiply InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -235,7 +235,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP Multiply InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<11, [FU_NPipe], 0>, - InstrStage<11, [FU_NLSPipe]>]>, + InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>, // // Single-precision FP MAC InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -244,27 +244,27 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>]>, + InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>, // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<20, [FU_NPipe], 0>, - InstrStage<20, [FU_NLSPipe]>]>, + InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>, // // Double-precision FP DIV InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>]>, + InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>, // // Single-precision FP SQRT InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>]>, + InstrStage<19, [FU_NLSPipe]>], [19, 1]>, // // Double-precision FP SQRT InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>]>, + InstrStage<29, [FU_NLSPipe]>], [29, 1]>, // // Single-precision FP Load // use FU_Issue to enforce the 1 load/store per cycle limit diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 432ed78..71f3883 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -27,6 +27,10 @@ UseNEONFP("arm-use-neon-fp", cl::desc("Use NEON for single-precision FP"), cl::init(false), cl::Hidden); +static cl::opt<bool> +UseMOVT("arm-use-movt", + cl::init(true), cl::Hidden); + ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) : ARMArchVersion(V4T) @@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ThumbMode(Thumb1) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) + , UseMovt(UseMOVT) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. @@ -109,8 +114,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; } - HasBranchTargetBuffer = (CPUString == "cortex-a8" || - CPUString == "cortex-a9"); } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3d0e01e..3f06b7b 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -50,9 +50,6 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; - /// HasBranchTargetBuffer - True if processor can predict indirect branches. - bool HasBranchTargetBuffer; - /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -65,6 +62,10 @@ protected: /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; + /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit + /// imms (including global addresses). + bool UseMovt; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -126,12 +127,12 @@ protected: bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); } bool hasThumb2() const { return ThumbMode >= Thumb2; } - bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; } - bool isR9Reserved() const { return IsR9Reserved; } + bool useMovt() const { return UseMovt && hasV6T2Ops(); } + const std::string & getCPUString() const { return CPUString; } - + /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index dd4a240..692bb19 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -330,6 +330,8 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const char *Modifier) { const MachineOperand &MO = MI->getOperand(OpNum); + unsigned TF = MO.getTargetFlags(); + switch (MO.getType()) { default: assert(0 && "<unknown operand type>"); @@ -356,12 +358,12 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_Immediate: { int64_t Imm = MO.getImm(); O << '#'; - if (Modifier) { - if (strcmp(Modifier, "lo16") == 0) - O << ":lower16:"; - else if (strcmp(Modifier, "hi16") == 0) - O << ":upper16:"; - } + if ((Modifier && strcmp(Modifier, "lo16") == 0) || + (TF & ARMII::MO_LO16)) + O << ":lower16:"; + else if ((Modifier && strcmp(Modifier, "hi16") == 0) || + (TF & ARMII::MO_HI16)) + O << ":upper16:"; O << Imm; break; } @@ -371,6 +373,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); GlobalValue *GV = MO.getGlobal(); + + if ((Modifier && strcmp(Modifier, "lo16") == 0) || + (TF & ARMII::MO_LO16)) + O << ":lower16:"; + else if ((Modifier && strcmp(Modifier, "hi16") == 0) || + (TF & ARMII::MO_HI16)) + O << ":upper16:"; O << Mang->getMangledName(GV); printOffset(MO.getOffset()); @@ -998,7 +1007,7 @@ void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) { void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) { const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); - O << '#' << ARM::getVFPf32Imm(FP->getValueAPF()); + O << '#' << FP->getValueAPF().convertToFloat(); if (VerboseAsm) { O.PadToColumn(MAI->getCommentColumn()); O << MAI->getCommentString() << ' '; @@ -1008,7 +1017,7 @@ void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) { void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum) { const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); - O << '#' << ARM::getVFPf64Imm(FP->getValueAPF()); + O << '#' << FP->getValueAPF().convertToDouble(); if (VerboseAsm) { O.PadToColumn(MAI->getCommentColumn()); O << MAI->getCommentString() << ' '; diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 7d767ec..50abcf4 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -81,8 +81,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { // afterwards // - The imp-defs / imp-uses are superregs only, we don't care about // them. - BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg); + AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(), + TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg)); MBB.erase(MI); MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index ad1739c..b2fd7b3 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -78,7 +78,7 @@ namespace { { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 }, // FIXME: Do we need the 16-bit 'S' variant? { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, @@ -413,6 +413,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (MI->getOperand(2).getImm() == 0) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); break; + case ARM::t2MOVi16: + // Can convert only 'pure' immediate operands, not immediates obtained as + // globals' addresses. + if (MI->getOperand(1).isImm()) + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + break; } return false; } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 9217522..b5579f4 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -127,10 +127,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // Not implemented yet. diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp index d328135..b3b711e 100644 --- a/lib/Target/Alpha/AlphaJITInfo.cpp +++ b/lib/Target/Alpha/AlphaJITInfo.cpp @@ -190,17 +190,27 @@ extern "C" { #endif } +TargetJITInfo::StubLayout AlphaJITInfo::getStubLayout() { + // The stub contains 19 4-byte instructions, aligned at 4 bytes: + // R0 = R27 + // 8 x "R27 <<= 8; R27 |= 8-bits-of-Target" == 16 instructions + // JMP R27 + // Magic number so the compilation callback can recognize the stub. + StubLayout Result = {19 * 4, 4}; + return Result; +} + void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; //assert(Fn == AlphaCompilationCallback && "Where are you going?\n"); //Do things in a stupid slow way! - JCE.startGVStub(F, 19*4); void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue(); for (int x = 0; x < 19; ++ x) JCE.emitWordLE(0); EmitBranchToAt(Addr, Fn); DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n"); - return JCE.finishGVStub(F); + return Addr; } TargetJITInfo::LazyResolverFn diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h index ecb467f..bd358a4 100644 --- a/lib/Target/Alpha/AlphaJITInfo.h +++ b/lib/Target/Alpha/AlphaJITInfo.h @@ -31,6 +31,7 @@ namespace llvm { explicit AlphaJITInfo(TargetMachine &tm) : TM(tm) { useGOT = true; } + virtual StubLayout getStubLayout(); virtual void *emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE); virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index c5c96f8..ad2510a 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -114,10 +114,6 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) // READCYCLECOUNTER needs special type legalization. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // Use the default implementation. diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td index 642d10f..d396cc8 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -44,7 +44,7 @@ class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> { let Num = num; } -// Ywo halves of 32-bit register +// Two halves of 32-bit register multiclass Rss<bits<3> group, bits<3> num, string n> { def H : Rs<group, num, 1, !strconcat(n, ".h")>; def L : Rs<group, num, 0, !strconcat(n, ".l")>; diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 4dd82a6..23e192e 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -387,10 +387,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - // Support label based line numbers. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index d3b575a..f24ffd2 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -31,14 +31,6 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { } //===----------------------------------------------------------------------===// -// DWARF debugging Pseudo Instructions -//===----------------------------------------------------------------------===// - -def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), - ".loc $file, $line, $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; - -//===----------------------------------------------------------------------===// // Loads: // NB: The ordering is actually important, since the instruction selection // will try each of the instructions in sequence, i.e., the D-form first with diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index c0084be..beccb2c 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -312,8 +312,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, else if (AM.JT != -1) Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/); else if (AM.BlockAddr) - Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/, - true /*AM.SymbolFlags*/); + Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, + true, 0/*AM.SymbolFlags*/); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5a925f5..29cc370 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -162,7 +162,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const { - return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; + return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 2; } //===----------------------------------------------------------------------===// @@ -594,9 +594,17 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: TCC = MSP430CC::COND_E; // aka COND_Z + // Minor optimization: if RHS is a constant, swap operands, then the + // constant can be folded into comparison. + if (RHS.getOpcode() == ISD::Constant) + std::swap(LHS, RHS); break; case ISD::SETNE: TCC = MSP430CC::COND_NE; // aka COND_NZ + // Minor optimization: if RHS is a constant, swap operands, then the + // constant can be folded into comparison. + if (RHS.getOpcode() == ISD::Constant) + std::swap(LHS, RHS); break; case ISD::SETULE: std::swap(LHS, RHS); // FALLTHROUGH diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index c3bbfe8..7a26f6c 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -823,37 +823,6 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), "cmp.w\t{$src1, $src2}", [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>; -def CMP8mi0 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #0}", - [(MSP430cmp (load addr:$src1), (i8 0)), (implicit SRW)]>; -def CMP16mi0: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #0}", - [(MSP430cmp (load addr:$src1), (i16 0)), (implicit SRW)]>; -def CMP8mi1 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #1}", - [(MSP430cmp (load addr:$src1), (i8 1)), (implicit SRW)]>; -def CMP16mi1: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #1}", - [(MSP430cmp (load addr:$src1), (i16 1)), (implicit SRW)]>; -def CMP8mi2 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #2}", - [(MSP430cmp (load addr:$src1), (i8 2)), (implicit SRW)]>; -def CMP16mi2: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #2}", - [(MSP430cmp (load addr:$src1), (i16 2)), (implicit SRW)]>; -def CMP8mi4 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #4}", - [(MSP430cmp (load addr:$src1), (i8 4)), (implicit SRW)]>; -def CMP16mi4: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #4}", - [(MSP430cmp (load addr:$src1), (i16 4)), (implicit SRW)]>; -def CMP8mi8 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #8}", - [(MSP430cmp (load addr:$src1), (i8 8)), (implicit SRW)]>; -def CMP16mi8: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #8}", - [(MSP430cmp (load addr:$src1), (i16 8)), (implicit SRW)]>; - } // Defs = [SRW] //===----------------------------------------------------------------------===// diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp index 4e3a8d0..516eacb 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -19,6 +19,7 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { WeakRefDirective ="\t.weak\t"; SetDirective = "\t.set\t"; PCSymbol="."; + CommentString = ";"; AlignmentIsInBytes = false; AllowNameToStartWithDigit = true; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 2990ba9..ede111d 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -144,6 +144,7 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || + (Addr.getOpcode() == ISD::TargetConstantPool) || (Addr.getOpcode() == ISD::TargetJumpTable)){ Base = CurDAG->getRegister(Mips::GP, MVT::i32); Offset = Addr; @@ -174,23 +175,21 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) } // When loading from constant pools, load the lower address part in - // the instruction itself. Instead of: + // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(0).getOpcode() == MipsISD::Hi && + if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || + Addr.getOperand(0).getOpcode() == ISD::LOAD) && Addr.getOperand(1).getOpcode() == MipsISD::Lo) { SDValue LoVal = Addr.getOperand(1); - if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>( - LoVal.getOperand(0))) { - if (!CP->getOffset()) { - Base = Addr.getOperand(0); - Offset = LoVal.getOperand(0); - return true; - } + if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) { + Base = Addr.getOperand(0); + Offset = LoVal.getOperand(0); + return true; } } } @@ -235,6 +234,10 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { else return NULL; + // Choose the offsets depending on the endianess + if (TM.getTargetData()->isBigEndian()) + std::swap(Offset0, Offset1); + // Instead of: // ldc $f0, X($3) // Generate: @@ -296,6 +299,10 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { else return NULL; + // Choose the offsets depending on the endianess + if (TM.getTargetData()->isBigEndian()) + std::swap(Offset0, Offset1); + // Instead of: // sdc $f0, X($3) // Generate: diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c9a43b4..ced8b93 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -132,10 +132,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::f32, Expand); setOperationAction(ISD::FEXP, MVT::f32, Expand); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // Use the default for now @@ -567,8 +563,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) SDValue ResNode; ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); Constant *C = N->getConstVal(); - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_ABS_HILO); // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); @@ -581,11 +575,21 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP); // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); - //} else { // %hi/%lo relocation + + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_HILO); SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - //} + } else { + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_GOT); + SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), + CP, NULL, 0); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); + } return ResNode; } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index af64c9f..6d8e160 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -200,22 +200,33 @@ void MipsInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC) const { - unsigned Opc; - DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); if (RC == Mips::CPURegsRegisterClass) - Opc = Mips::SW; + BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); else if (RC == Mips::FGR32RegisterClass) - Opc = Mips::SWC1; - else { - assert(RC == Mips::AFGR64RegisterClass); - Opc = Mips::SDC1; - } - - BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); + else if (RC == Mips::AFGR64RegisterClass) { + if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { + BuildMI(MBB, I, DL, get(Mips::SDC1)) + .addReg(SrcReg, getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); + } else { + const TargetRegisterInfo *TRI = + MBB.getParent()->getTarget().getRegisterInfo(); + const unsigned *SubSet = TRI->getSubRegisters(SrcReg); + BuildMI(MBB, I, DL, get(Mips::SWC1)) + .addReg(SubSet[0], getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); + BuildMI(MBB, I, DL, get(Mips::SWC1)) + .addReg(SubSet[1], getKillRegState(isKill)) + .addImm(4).addFrameIndex(FI); + } + } else + llvm_unreachable("Register class not handled!"); } void MipsInstrInfo:: @@ -223,19 +234,27 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC) const { - unsigned Opc; - if (RC == Mips::CPURegsRegisterClass) - Opc = Mips::LW; - else if (RC == Mips::FGR32RegisterClass) - Opc = Mips::LWC1; - else { - assert(RC == Mips::AFGR64RegisterClass); - Opc = Mips::LDC1; - } - DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI); + + if (RC == Mips::CPURegsRegisterClass) + BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI); + else if (RC == Mips::FGR32RegisterClass) + BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI); + else if (RC == Mips::AFGR64RegisterClass) { + if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { + BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI); + } else { + const TargetRegisterInfo *TRI = + MBB.getParent()->getTarget().getRegisterInfo(); + const unsigned *SubSet = TRI->getSubRegisters(DestReg); + BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0]) + .addImm(0).addFrameIndex(FI); + BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1]) + .addImm(4).addFrameIndex(FI); + } + } else + llvm_unreachable("Register class not handled!"); } MachineInstr *MipsInstrInfo:: @@ -278,11 +297,14 @@ foldMemoryOperandImpl(MachineFunction &MF, const TargetRegisterClass *RC = RI.getRegClass(MI->getOperand(0).getReg()); unsigned StoreOpc, LoadOpc; + bool IsMips1 = TM.getSubtarget<MipsSubtarget>().isMips1(); if (RC == Mips::FGR32RegisterClass) { LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1; } else { assert(RC == Mips::AFGR64RegisterClass); + // Mips1 doesn't have ldc/sdc instructions. + if (IsMips1) break; LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index ad326db..cae4181 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -107,8 +107,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const static const unsigned BitMode32CalleeSavedRegs[] = { Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, - Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15,0 + Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0 }; if (Subtarget.isSingleFloat()) @@ -136,9 +135,7 @@ MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, - &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 0 + &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 }; if (Subtarget.isSingleFloat()) diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 0ed44d2..6e0e3ce 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -306,10 +306,9 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, int ElementAux[PIC16Dbg::AuxSize] = { 0 }; std::string TagName = ""; DIDerivedType DITy(Element.getNode()); - const char *ElementName = DITy.getName(); unsigned short ElementSize = DITy.getSizeInBits()/8; // Get mangleddd name for this structure/union element. - std::string MangMemName = ElementName + SuffixNo; + std::string MangMemName = DITy.getName().str() + SuffixNo; PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName); short Class = 0; if( CTy.getTag() == dwarf::DW_TAG_union_type) @@ -337,12 +336,11 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { continue; if (CTy.getTag() == dwarf::DW_TAG_union_type || CTy.getTag() == dwarf::DW_TAG_structure_type ) { - const char *Name = CTy.getName(); // Get the number after llvm.dbg.composite and make UniqueSuffix from // it. std::string DIVar = CTy.getNode()->getNameStr(); std::string UniqueSuffix = "." + DIVar.substr(18); - std::string MangledCTyName = Name + UniqueSuffix; + std::string MangledCTyName = CTy.getName().str() + UniqueSuffix; unsigned short size = CTy.getSizeInBits()/8; int Aux[PIC16Dbg::AuxSize] = {0}; // 7th and 8th byte represent size of structure/union. diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h index 65f113e..73d30bf 100644 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -42,11 +42,12 @@ public: /// frame pointer. static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { // For the Darwin ABI: - // Use the TOC save slot in the PowerPC linkage area for saving the frame - // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 - // is treated as a caller saved register.) + // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area + // for saving the frame pointer (if needed.) While the published ABI has + // not used this slot since at least MacOSX 10.2, there is older code + // around that does use it, and that needs to continue to work. if (isDarwinABI) - return isPPC64 ? 40 : 20; + return isPPC64 ? -8U : -4U; // SVR4 ABI: First slot in the general register save area. return -4U; @@ -90,6 +91,17 @@ public: // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const { + if (TM.getSubtarget<PPCSubtarget>().isDarwinABI()) { + NumEntries = 1; + if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + static const SpillSlot darwin64Offsets = {PPC::X31, -8}; + return &darwin64Offsets; + } else { + static const SpillSlot darwinOffsets = {PPC::R31, -4}; + return &darwinOffsets; + } + } + // Early exit if not using the SVR4 ABI. if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { NumEntries = 0; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index fb9a240..e7334b5 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -86,7 +86,7 @@ namespace { /// isRotateAndMask - Returns true if Mask and Shift can be folded into a /// rotate and mask opcode and mask operation. - static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask, + static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME); /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC @@ -358,7 +358,7 @@ bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { } bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, - bool IsShiftMask, unsigned &SH, + bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { // Don't even go down this path for i64, since different logic will be // necessary for rldicl/rldicr/rldimi. @@ -374,12 +374,12 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, if (Opcode == ISD::SHL) { // apply shift left to mask if it comes first - if (IsShiftMask) Mask = Mask << Shift; + if (isShiftMask) Mask = Mask << Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu << Shift); } else if (Opcode == ISD::SRL) { // apply shift right to mask if it comes first - if (IsShiftMask) Mask = Mask >> Shift; + if (isShiftMask) Mask = Mask >> Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu >> Shift); // adjust for the left rotate @@ -443,8 +443,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { unsigned MB, ME; if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) { - SDValue Tmp1, Tmp2, Tmp3; - bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF; + SDValue Tmp1, Tmp2; if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { @@ -461,10 +460,9 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { Op1 = Op1.getOperand(0); } } - - Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0; + SH &= 31; - SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB), + SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 099fcb5..30a7861 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -182,10 +182,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - // Support label based line numbers. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); @@ -1174,7 +1170,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { DebugLoc DL = Op.getDebugLoc(); BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue TgtBA = DAG.getBlockAddress(BA, DL, /*isTarget=*/true); + SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero); @@ -2177,10 +2173,10 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accomodate the arguments for the tailcall. -static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, +static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { - if (!IsTailCall) return 0; + if (!isTailCall) return 0; PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); unsigned CallerMinReservedArea = FI->getMinReservedArea(); @@ -3190,8 +3186,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Construct the stack pointer operand. - bool IsPPC64 = Subtarget.isPPC64(); - unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; + bool isPPC64 = Subtarget.isPPC64(); + unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; SDValue StackPtr = DAG.getRegister(SP, PtrVT); // Get the operands for the STACKRESTORE. @@ -3213,7 +3209,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isPPC64 = PPCSubTarget.isPPC64(); bool isDarwinABI = PPCSubTarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -3225,9 +3221,9 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset, + RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); @@ -3238,7 +3234,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isPPC64 = PPCSubTarget.isPPC64(); bool isDarwinABI = PPCSubTarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -3250,11 +3246,11 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index f5c095a..2b3f80d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1358,15 +1358,6 @@ def RLWNM : MForm_2<23, //===----------------------------------------------------------------------===// -// DWARF Pseudo Instructions -// - -def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), - "${:comment} .loc $file, $line, $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), - (i32 imm:$file))]>; - -//===----------------------------------------------------------------------===// // PowerPC Instruction Patterns // diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index ef25d92..c679bcd 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -323,6 +323,15 @@ PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback; } +TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() { + // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3 + // instructions to save the caller's address if this is a lazy-compilation + // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address + // into a register and jump through it. + StubLayout Result = {10*4, 4}; + return Result; +} + #if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ defined(__APPLE__) extern "C" void sys_icache_invalidate(const void *Addr, size_t len); @@ -330,12 +339,12 @@ extern "C" void sys_icache_invalidate(const void *Addr, size_t len); void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; // If this is just a call to an external function, emit a branch instead of a // call. The code is the same except for one bit of the last instruction. if (Fn != (void*)(intptr_t)PPC32CompilationCallback && Fn != (void*)(intptr_t)PPC64CompilationCallback) { - JCE.startGVStub(F, 7*4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + void *Addr = (void*)JCE.getCurrentPCValue(); JCE.emitWordBE(0); JCE.emitWordBE(0); JCE.emitWordBE(0); @@ -343,13 +352,12 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0); JCE.emitWordBE(0); JCE.emitWordBE(0); - EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit); - sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4); - return JCE.finishGVStub(F); + EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 7*4); + return Addr; } - JCE.startGVStub(F, 10*4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + void *Addr = (void*)JCE.getCurrentPCValue(); if (is64Bit) { JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 @@ -372,8 +380,8 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0); JCE.emitWordBE(0); EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit); - sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4); - return JCE.finishGVStub(F); + sys::Memory::InvalidateInstructionCache(Addr, 10*4); + return Addr; } diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h index 2e25b29..47ead59 100644 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -30,6 +30,7 @@ namespace llvm { is64Bit = tmIs64Bit; } + virtual StubLayout getStubLayout(); virtual void *emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE); virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index e65e644..0c3c8eb 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1032,18 +1032,17 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); - bool IsPPC64 = Subtarget.isPPC64(); - bool IsSVR4ABI = Subtarget.isSVR4ABI(); + bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo *MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. - if (!FPSI && needsFP(MF) && IsSVR4ABI) { + if (!FPSI && needsFP(MF)) { // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); @@ -1067,7 +1066,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (needsFP(MF) || spillsCR(MF)) { const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); @@ -1297,7 +1296,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { int NegFrameSize = -FrameSize; // Get processor type. - bool IsPPC64 = Subtarget.isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // Get operating system bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) must be saved. @@ -1306,7 +1305,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; if (HasFP) { @@ -1316,11 +1315,11 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } - if (IsPPC64) { + if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); @@ -1361,7 +1360,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now - if (!IsPPC64) { + if (!isPPC64) { // PPC32. if (ALIGN_STACK && MaxAlign > TargetAlign) { assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); @@ -1444,19 +1443,19 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); } else { - MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31); + MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31); Moves.push_back(MachineMove(FrameLabelId, SP, SP)); } if (HasFP) { MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset); - MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31); + MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31); Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); } if (MustSaveLR) { MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); - MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR); + MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR); Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc)); } } @@ -1465,7 +1464,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // If there is a frame pointer, copy R1 into R31 if (HasFP) { - if (!IsPPC64) { + if (!isPPC64) { BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) .addReg(PPC::R1) .addReg(PPC::R1); @@ -1481,8 +1480,8 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Mark effective beginning of when frame pointer is ready. BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId); - MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) : - (IsPPC64 ? PPC::X1 : PPC::R1)); + MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : + (isPPC64 ? PPC::X1 : PPC::R1)); MachineLocation FPSrc(MachineLocation::VirtualFP); Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); } @@ -1528,7 +1527,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, int FrameSize = MFI->getStackSize(); // Get processor type. - bool IsPPC64 = Subtarget.isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // Get operating system bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) has been saved. @@ -1537,7 +1536,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; if (HasFP) { @@ -1547,7 +1546,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -1575,7 +1574,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, if (FrameSize) { // The loaded (or persistent) stack pointer value is offset by the 'stwu' // on entry to the function. Add this offset back now. - if (!IsPPC64) { + if (!isPPC64) { // If this function contained a fastcc call and PerformTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the @@ -1629,7 +1628,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, } } - if (IsPPC64) { + if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) .addImm(LROffset/4).addReg(PPC::X1); @@ -1659,13 +1658,13 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); - unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1; - unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31; - unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI; + unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) { BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) diff --git a/lib/Target/README.txt b/lib/Target/README.txt index aad621f..2d8a687 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2,6 +2,29 @@ Target Independent Opportunities: //===---------------------------------------------------------------------===// +Dead argument elimination should be enhanced to handle cases when an argument is +dead to an externally visible function. Though the argument can't be removed +from the externally visible function, the caller doesn't need to pass it in. +For example in this testcase: + + void foo(int X) __attribute__((noinline)); + void foo(int X) { sideeffect(); } + void bar(int A) { foo(A+1); } + +We compile bar to: + +define void @bar(i32 %A) nounwind ssp { + %0 = add nsw i32 %A, 1 ; <i32> [#uses=1] + tail call void @foo(i32 %0) nounwind noinline ssp + ret void +} + +The add is dead, we could pass in 'i32 undef' instead. This occurs for C++ +templates etc, which usually have linkonce_odr/weak_odr linkage, not internal +linkage. + +//===---------------------------------------------------------------------===// + With the recent changes to make the implicit def/use set explicit in machineinstrs, we should change the target descriptions for 'call' instructions so that the .td files don't list all the call-clobbered registers as implicit @@ -220,7 +243,7 @@ so cool to turn it into something like: ... which would only do one 32-bit XOR per loop iteration instead of two. It would also be nice to recognize the reg->size doesn't alias reg->node[i], but -alas. +this requires TBAA. //===---------------------------------------------------------------------===// @@ -280,6 +303,9 @@ unsigned int popcount(unsigned int input) { return count; } +This is a form of idiom recognition for loops, the same thing that could be +useful for recognizing memset/memcpy. + //===---------------------------------------------------------------------===// These should turn into single 16-bit (unaligned?) loads on little/big endian @@ -343,7 +369,7 @@ PHI Slicing could be extended to do this. //===---------------------------------------------------------------------===// -LSR should know what GPR types a target has. This code: +LSR should know what GPR types a target has from TargetData. This code: volatile short X, Y; // globals @@ -369,7 +395,6 @@ LBB1_2: LSR should reuse the "+" IV for the exit test. - //===---------------------------------------------------------------------===// Tail call elim should be more aggressive, checking to see if the call is @@ -441,25 +466,6 @@ entry: //===---------------------------------------------------------------------===// -"basicaa" should know how to look through "or" instructions that act like add -instructions. For example in this code, the x*4+1 is turned into x*4 | 1, and -basicaa can't analyze the array subscript, leading to duplicated loads in the -generated code: - -void test(int X, int Y, int a[]) { -int i; - for (i=2; i<1000; i+=4) { - a[i+0] = a[i-1+0]*a[i-2+0]; - a[i+1] = a[i-1+1]*a[i-2+1]; - a[i+2] = a[i-1+2]*a[i-2+2]; - a[i+3] = a[i-1+3]*a[i-2+3]; - } -} - -BasicAA also doesn't do this for add. It needs to know that &A[i+1] != &A[i]. - -//===---------------------------------------------------------------------===// - We should investigate an instruction sinking pass. Consider this silly example in pic mode: @@ -1110,6 +1116,8 @@ later. //===---------------------------------------------------------------------===// +[STORE SINKING] + Store sinking: This code: void f (int n, int *cond, int *res) { @@ -1165,6 +1173,8 @@ This is GCC PR38204. //===---------------------------------------------------------------------===// +[STORE SINKING] + GCC PR37810 is an interesting case where we should sink load/store reload into the if block and outside the loop, so we don't reload/store it on the non-call path. @@ -1192,7 +1202,7 @@ we don't sink the store. We need partially dead store sinking. //===---------------------------------------------------------------------===// -[PHI TRANSLATE GEPs] +[LOAD PRE CRIT EDGE SPLITTING] GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack leading to excess stack traffic. This could be handled by GVN with some crazy @@ -1209,99 +1219,59 @@ bb3: ; preds = %bb1, %bb2, %bb %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0 %11 = load i32* %10, align 4 -%11 is fully redundant, an in BB2 it should have the value %8. +%11 is partially redundant, an in BB2 it should have the value %8. + +GCC PR33344 and PR35287 are similar cases. -GCC PR33344 is a similar case. //===---------------------------------------------------------------------===// -[PHI TRANSLATE INDEXED GEPs] PR5313 +[LOAD PRE] -Load redundancy elimination for simple loop. This loop: +There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the +GCC testsuite, ones we don't get yet are (checked through loadpre25): -void append_text(const char* text,unsigned char * const io) { - while(*text) - *io=*text++; -} +[CRIT EDGE BREAKING] +loadpre3.c predcom-4.c -Compiles to have a fully redundant load in the loop (%2): +[PRE OF READONLY CALL] +loadpre5.c -define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind { -entry: - %0 = load i8* %text, align 1 ; <i8> [#uses=1] - %1 = icmp eq i8 %0, 0 ; <i1> [#uses=1] - br i1 %1, label %return, label %bb - -bb: ; preds = %bb, %entry - %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ] ; <i32> [#uses=2] - %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1] - %2 = load i8* %text_addr.04, align 1 ; <i8> [#uses=1] - store i8 %2, i8* %io, align 1 - %tmp = add i32 %indvar, 1 ; <i32> [#uses=2] - %scevgep = getelementptr i8* %text, i32 %tmp ; <i8*> [#uses=1] - %3 = load i8* %scevgep, align 1 ; <i8> [#uses=1] - %4 = icmp eq i8 %3, 0 ; <i1> [#uses=1] - br i1 %4, label %return, label %bb - -return: ; preds = %bb, %entry - ret void -} +[TURN SELECT INTO BRANCH] +loadpre14.c loadpre15.c -//===---------------------------------------------------------------------===// +actually a conditional increment: loadpre18.c loadpre19.c -There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the -GCC testsuite. There are many pre testcases as ssa-pre-*.c //===---------------------------------------------------------------------===// -There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the -GCC testsuite. For example, predcom-1.c is: - - for (i = 2; i < 1000; i++) - fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff; - -which compiles into: - -bb1: ; preds = %bb1, %bb1.thread - %indvar = phi i32 [ 0, %bb1.thread ], [ %0, %bb1 ] - %i.0.reg2mem.0 = add i32 %indvar, 2 - %0 = add i32 %indvar, 1 ; <i32> [#uses=3] - %1 = getelementptr [1000 x i32]* @fib, i32 0, i32 %0 - %2 = load i32* %1, align 4 ; <i32> [#uses=1] - %3 = getelementptr [1000 x i32]* @fib, i32 0, i32 %indvar - %4 = load i32* %3, align 4 ; <i32> [#uses=1] - %5 = add i32 %4, %2 ; <i32> [#uses=1] - %6 = and i32 %5, 65535 ; <i32> [#uses=1] - %7 = getelementptr [1000 x i32]* @fib, i32 0, i32 %i.0.reg2mem.0 - store i32 %6, i32* %7, align 4 - %exitcond = icmp eq i32 %0, 998 ; <i1> [#uses=1] - br i1 %exitcond, label %return, label %bb1 +[SCALAR PRE] +There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the +GCC testsuite. -This is basically: - LOAD fib[i+1] - LOAD fib[i] - STORE fib[i+2] +//===---------------------------------------------------------------------===// -instead of handling this as a loop or other xform, all we'd need to do is teach -load PRE to phi translate the %0 add (i+1) into the predecessor as (i'+1+1) = -(i'+2) (where i' is the previous iteration of i). This would find the store -which feeds it. +There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the +GCC testsuite. For example, we get the first example in predcom-1.c, but +miss the second one: -predcom-2.c is apparently the same as predcom-1.c -predcom-3.c is very similar but needs loads feeding each other instead of -store->load. -predcom-4.c seems the same as the rest. +unsigned fib[1000]; +unsigned avg[1000]; +__attribute__ ((noinline)) +void count_averages(int n) { + int i; + for (i = 1; i < n; i++) + avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff; +} -//===---------------------------------------------------------------------===// +which compiles into two loads instead of one in the loop. -Other simple load PRE cases: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting] +predcom-2.c is the same as predcom-1.c -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge) - llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis +predcom-3.c is very similar but needs loads feeding each other instead of +store->load. -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS] //===---------------------------------------------------------------------===// @@ -1334,7 +1304,7 @@ Interesting missed case because of control flow flattening (should be 2 loads): http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -gvn -instcombine | llvm-dis -we miss it because we need 1) GEP PHI TRAN, 2) CRIT EDGE 3) MULTIPLE DIFFERENT +we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 133f828..1b3ca3e 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -644,10 +644,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // VASTART needs to be custom lowered to use the VarArgsFrameIndex. @@ -663,8 +659,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); // No debug info support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); setStackPointerRegisterToSaveRestore(SP::O6); diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 6fdbc92..f887523 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -783,8 +783,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } // Exception Handling. - LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0, - SectionKind::getReadOnlyWithRel()); + LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, + SectionKind::getDataRel()); EHFrameSection = getMachOSection("__TEXT", "__eh_frame", MCSectionMachO::S_COALESCED | diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index be9f4b2..38c0c28 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -43,7 +43,6 @@ MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { Twine(AsmPrinter.getFunctionNumber())+"$pb"); } - /// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an /// MCOperand. MCSymbol *X86MCInstLower:: @@ -231,6 +230,19 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { return Ctx.GetOrCreateSymbol(Name.str()); } +MCSymbol *X86MCInstLower:: +GetBlockAddressSymbol(const MachineOperand &MO) const { + const char *Suffix = ""; + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on BA operand"); + case X86II::MO_NO_FLAG: break; // No flag. + case X86II::MO_PIC_BASE_OFFSET: break; // Doesn't modify symbol name. + case X86II::MO_GOTOFF: Suffix = "@GOTOFF"; break; + } + + return AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress(), Suffix); +} + MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a @@ -331,8 +343,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); break; case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol( - MO.getBlockAddress())); + MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); break; } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h index fa25b90..94f8bfc 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -43,6 +43,7 @@ public: MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; private: diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..b329e89 --- /dev/null +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86Disassembler + X86Disassembler.cpp + ) +add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen) diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile new file mode 100644 index 0000000..b289647 --- /dev/null +++ b/lib/Target/X86/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/X86/Disassembler/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Disassembler + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp new file mode 100644 index 0000000..2ebbc9b --- /dev/null +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -0,0 +1,29 @@ +//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler.h" +#include "llvm/Target/TargetRegistry.h" +#include "X86.h" +using namespace llvm; + +static const MCDisassembler *createX86_32Disassembler(const Target &T) { + return 0; +} + +static const MCDisassembler *createX86_64Disassembler(const Target &T) { + return 0; +} + +extern "C" void LLVMInitializeX86Disassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheX86_32Target, + createX86_32Disassembler); + TargetRegistry::RegisterMCDisassembler(TheX86_64Target, + createX86_64Disassembler); +} diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 220831d..b311a6e 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -18,6 +18,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenFastISel.inc \ X86GenCallingConv.inc X86GenSubtarget.inc -DIRS = AsmPrinter AsmParser TargetInfo +DIRS = AsmPrinter AsmParser Disassembler TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 4497931..4892e17 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -595,7 +595,6 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, break; case TargetInstrInfo::IMPLICIT_DEF: case TargetInstrInfo::KILL: - case X86::DWARF_LOC: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6a3577a..a9a78be 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -252,8 +252,8 @@ namespace { else if (AM.JT != -1) Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); else if (AM.BlockAddr) - Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/, - true /*AM.SymbolFlags*/); + Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, + true, AM.SymbolFlags); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); @@ -777,7 +777,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { AM.SymbolFlags = J->getTargetFlags(); } else { AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); - //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); + AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); } if (N.getOpcode() == X86ISD::WrapperRIP) @@ -808,7 +808,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { AM.SymbolFlags = J->getTargetFlags(); } else { AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); - //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); + AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); } return false; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6018cf5..d80b8ec 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -373,13 +373,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); } - // Use the default ISD::DBG_STOPPOINT. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && !Subtarget->isTargetCygMing()) { - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } @@ -978,6 +975,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); + // Divide and reminder operations have no vector equivalent and can + // trap. Do a custom widening for these operations in which we never + // generate more divides/remainder than the original vector width. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + if (!isTypeLegal((MVT::SimpleValueType)VT)) { + setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom); + setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom); + setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom); + setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom); + } + } + // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores @@ -4722,18 +4732,27 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { - unsigned WrapperKind = X86ISD::Wrapper; + // Create the TargetBlockAddressAddress node. + unsigned char OpFlags = + Subtarget->ClassifyBlockAddressReference(); CodeModel::Model M = getTargetMachine().getCodeModel(); + BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + DebugLoc dl = Op.getDebugLoc(); + SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), + /*isTarget=*/true, OpFlags); + if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) - WrapperKind = X86ISD::WrapperRIP; - - DebugLoc DL = Op.getDebugLoc(); - - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true); + Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); + else + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); - Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + // With PIC, the address is actually $g + Offset. + if (isGlobalRelativeToPICBase(OpFlags)) { + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), + Result); + } return Result; } @@ -7164,6 +7183,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(edx.getValue(1)); return; } + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements())); + return; + } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index a01534b..b5fa862 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1663,7 +1663,7 @@ def : Pat<(X86tcret GR64:$dst, imm:$off), (TCRETURNri64 GR64:$dst, imm:$off)>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), - (TCRETURNdi64 texternalsym:$dst, imm:$off)>; + (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>; def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 1ddceb1..a37013d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3133,7 +3133,6 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case TargetInstrInfo::IMPLICIT_DEF: case TargetInstrInfo::KILL: - case X86::DWARF_LOC: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a79f262..90ef1f4 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -718,7 +718,6 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -3506,16 +3505,6 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), [(set GR32:$dst, (fsload addr:$src))]>, SegFS; //===----------------------------------------------------------------------===// -// DWARF Pseudo Instructions -// - -def DWARF_LOC : I<0, Pseudo, (outs), - (ins i32imm:$line, i32imm:$col, i32imm:$file), - ".loc\t$file $line $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), - (i32 imm:$file))]>; - -//===----------------------------------------------------------------------===// // EH Pseudo Instructions // let isTerminator = 1, isReturn = 1, isBarrier = 1, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ee63d56..dfdd4ce 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2820,40 +2820,40 @@ defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", let Constraints = "$src1 = $dst" in { def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i16imm:$src3), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i16imm:$src3), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32imm:$src3), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, OpSize; def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i32imm:$src3), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, OpSize; } // palignr patterns. -def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i16 imm:$src3)), +def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)), (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; def : Pat<(int_x86_ssse3_palign_r VR64:$src1, (memop64 addr:$src2), - (i16 imm:$src3)), + (i8 imm:$src3)), (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i32 imm:$src3)), +def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)), (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, (memopv2i64 addr:$src2), - (i32 imm:$src3)), + (i8 imm:$src3)), (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0792bdd..ce06f0f 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -426,83 +426,77 @@ X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) { void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; #if defined (X86_64_JIT) - JCE.startGVStub(GV, 8, 8); + JCE.startGVStub(BS, GV, 8, 8); JCE.emitWordLE((unsigned)(intptr_t)ptr); JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32)); #else - JCE.startGVStub(GV, 4, 4); + JCE.startGVStub(BS, GV, 4, 4); JCE.emitWordLE((intptr_t)ptr); #endif - return JCE.finishGVStub(GV); + return JCE.finishGVStub(BS); } -void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn, +TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { + // The 64-bit stub contains: + // movabs r10 <- 8-byte-target-address # 10 bytes + // call|jmp *r10 # 3 bytes + // The 32-bit stub contains a 5-byte call|jmp. + // If the stub is a call to the compilation callback, an extra byte is added + // to mark it as a stub. + StubLayout Result = {14, 4}; + return Result; +} + +void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; // Note, we cast to intptr_t here to silence a -pedantic warning that // complains about casting a function pointer to a normal pointer. #if defined (X86_32_JIT) && !defined (_MSC_VER) - bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback && - Fn != (void*)(intptr_t)X86CompilationCallback_SSE); + bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback && + Target != (void*)(intptr_t)X86CompilationCallback_SSE); #else - bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback; + bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback; #endif + JCE.emitAlignment(4); + void *Result = (void*)JCE.getCurrentPCValue(); if (NotCC) { #if defined (X86_64_JIT) - JCE.startGVStub(F, 13, 4); JCE.emitByte(0x49); // REX prefix JCE.emitByte(0xB8+2); // movabsq r10 - JCE.emitWordLE((unsigned)(intptr_t)Fn); - JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32)); + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); JCE.emitByte(0x41); // REX prefix JCE.emitByte(0xFF); // jmpq *r10 JCE.emitByte(2 | (4 << 3) | (3 << 6)); #else - JCE.startGVStub(F, 5, 4); JCE.emitByte(0xE9); - JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); #endif - return JCE.finishGVStub(F); + return Result; } #if defined (X86_64_JIT) - JCE.startGVStub(F, 14, 4); JCE.emitByte(0x49); // REX prefix JCE.emitByte(0xB8+2); // movabsq r10 - JCE.emitWordLE((unsigned)(intptr_t)Fn); - JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32)); + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); JCE.emitByte(0x41); // REX prefix JCE.emitByte(0xFF); // callq *r10 JCE.emitByte(2 | (2 << 3) | (3 << 6)); #else - JCE.startGVStub(F, 6, 4); JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination... - JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); #endif // This used to use 0xCD, but that value is used by JITMemoryManager to // initialize the buffer with garbage, which means it may follow a // noreturn function call, confusing X86CompilationCallback2. PR 4929. JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! - return JCE.finishGVStub(F); -} - -void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub, - JITCodeEmitter &JCE) { - // Note, we cast to intptr_t here to silence a -pedantic warning that - // complains about casting a function pointer to a normal pointer. - JCE.startGVStub(F, Stub, 5); - JCE.emitByte(0xE9); -#if defined (X86_64_JIT) && !defined (NDEBUG) - // Yes, we need both of these casts, or some broken versions of GCC (4.2.4) - // get the signed-ness of the expression wrong. Go figure. - intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5; - assert(((Displacement << 32) >> 32) == Displacement - && "PIC displacement does not fit in displacement field!"); -#endif - JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); - JCE.finishGVStub(F); + return Result; } /// getPICJumpTableEntry - Returns the value of the jumptable entry for the diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index c381433..238420c 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -43,18 +43,16 @@ namespace llvm { virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE); + // getStubLayout - Returns the size and alignment of the largest call stub + // on X86. + virtual StubLayout getStubLayout(); + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function* F, void *Fn, + virtual void *emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE); - /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to - /// emit a small native function that simply calls Fn. Emit the stub into - /// the supplied buffer. - virtual void emitFunctionStubAtAddr(const Function* F, void *Fn, - void *Buffer, JITCodeEmitter &JCE); - /// getPICJumpTableEntry - Returns the value of the jumptable entry for the /// specific basic block. virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index f577fcf..33852bd 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -1262,7 +1262,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, else if (RetOpcode== X86::TCRETURNri64) BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); else - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index b901c14..661f560 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -28,6 +28,21 @@ using namespace llvm; #include <intrin.h> #endif +/// ClassifyBlockAddressReference - Classify a blockaddress reference for the +/// current subtarget according to how we should reference it in a non-pcrel +/// context. +unsigned char X86Subtarget:: +ClassifyBlockAddressReference() const { + if (isPICStyleGOT()) // 32-bit ELF targets. + return X86II::MO_GOTOFF; + + if (isPICStyleStubPIC()) // Darwin/32 in PIC mode. + return X86II::MO_PIC_BASE_OFFSET; + + // Direct static reference to label. + return X86II::MO_NO_FLAG; +} + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 23f2841..fb457dd 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -199,6 +199,11 @@ public: unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM)const; + /// ClassifyBlockAddressReference - Classify a blockaddress reference for the + /// current subtarget according to how we should reference it in a non-pcrel + /// context. + unsigned char ClassifyBlockAddressReference() const; + /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 00dcce6..f310456 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -142,10 +142,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // Debug - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - maxStoresPerMemset = 4; maxStoresPerMemmove = maxStoresPerMemcpy = 2; @@ -295,7 +291,7 @@ LowerBlockAddress(SDValue Op, SelectionDAG &DAG) DebugLoc DL = Op.getDebugLoc(); BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true); + SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result); } |